mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
237 lines
4.5 KiB
Plaintext
237 lines
4.5 KiB
Plaintext
{
|
|
"metadata": {
|
|
"name": "",
|
|
"signature": "sha256:8cd9b925468d91a5445fbecd65918d54988bc825d74728cb9bb0d22c193e03ed"
|
|
},
|
|
"nbformat": 3,
|
|
"nbformat_minor": 0,
|
|
"worksheets": [
|
|
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Linux Commands"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Disk Usage"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Display human-readable (-h) free disk space:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"df -h"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Display human-readable (-h) disk usage statistics:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"du -h ./"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Display human-readable (-h) disk usage statistics, showing only the total usage (-s):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"du -sh ../"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Display the human-readable (-h) disk usage statistics, showing also the grand total for all file types (-c):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"du -csh ./"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Splitting Files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Count number of lines in a file with wc:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"wc -l < file.txt"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Count the number of lines in a file with grep:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"grep -c \".\" file.txt"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Split a file into multiple files based on line count:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"split -l 20 file.txt new"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Split a file into multiple files based on line count, use suffix of length 1:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"split -l 802 -a 1 file.csv dir/part-user-csv.tbl-"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Grep"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Check number of files matching \u201c.txt\":"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ls -1 | grep .txt | wc -l"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Check number of MapReduce records processed, outputting the results to the terminal:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"cat * | grep -c \"foo\" folder/part*"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Compression"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Uncompress all tar.gz in current directory to another directory"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"for i in *.tar.gz; do echo working on $i; tar xvzf $i -C directory/ ; done"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": []
|
|
}
|
|
],
|
|
"metadata": {}
|
|
}
|
|
]
|
|
} |