Added linux commands to count lines and split files into multiple parts based on line counts.

This commit is contained in:
Donne Martin 2015-02-28 12:32:08 -05:00
parent a709c709ce
commit 133cddb267

View File

@ -89,6 +89,81 @@
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Splitting Files"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Count number of lines in a file with wc:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"wc -l < file.txt"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Count the number of lines in a file with grep:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"grep -c \".\" file.txt"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Split a file into multiple files based on line count:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"split -l 20 file.txt new"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Split a file into multiple files based on line count, use suffix of length 1:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"split -l 802 -a 1 file.csv dir/part-user-csv.tbl-"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}