diff --git a/commands/linux.ipynb b/commands/linux.ipynb index f1fa12b..54a84da 100644 --- a/commands/linux.ipynb +++ b/commands/linux.ipynb @@ -89,6 +89,81 @@ "language": "python", "metadata": {}, "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splitting Files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Count number of lines in a file with wc:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "wc -l < file.txt" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Count the number of lines in a file with grep:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "grep -c \".\" file.txt" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Split a file into multiple files based on line count:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "split -l 20 file.txt new" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Split a file into multiple files based on line count, use suffix of length 1:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "split -l 802 -a 1 file.csv dir/part-user-csv.tbl-" + ], + "language": "python", + "metadata": {}, + "outputs": [] } ], "metadata": {}