{ "metadata": { "name": "", "signature": "sha256:c5c96220a48c13454e0e8b2b2085f4bfa95d7b1eb9a9de0771c4fb7b818c001a" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Linux Commands\n", "\n", "* Disk Usage\n", "* Splitting Files\n", "* Grep\n", "* Compression\n", "* Terminal Syntax Highlighting" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Disk Usage" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Display human-readable (-h) free disk space:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df -h" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Display human-readable (-h) disk usage statistics:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "du -h ./" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Display human-readable (-h) disk usage statistics, showing only the total usage (-s):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "du -sh ../" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Display the human-readable (-h) disk usage statistics, showing also the grand total for all file types (-c):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "du -csh ./" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Splitting Files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Count number of lines in a file with wc:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "wc -l < file.txt" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Count the number of lines in a file with grep:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "grep -c \".\" file.txt" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Split a file into multiple files based on line count:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "split -l 20 file.txt new" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Split a file into multiple files based on line count, use suffix of length 1:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "split -l 802 -a 1 file.csv dir/part-user-csv.tbl-" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Grep" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check number of files matching \u201c.txt\":" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ls -1 | grep .txt | wc -l" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check number of MapReduce records processed, outputting the results to the terminal:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cat * | grep -c \"foo\" folder/part*" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Uncompress all tar.gz in current directory to another directory" ] }, { "cell_type": "code", "collapsed": false, "input": [ "for i in *.tar.gz; do echo working on $i; tar xvzf $i -C directory/ ; done" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Terminal Syntax Highlighting" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add the following to your ~/.bash_profile:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "export PS1='\\[\\033[01;32m\\]\\u@\\h\\[\\033[00m\\]:\\[\\033[01;34m\\]\\W\\[\\033[00m\\]\\$ '\n", "export CLICOLOR=1\n", "export LSCOLORS=ExFxBxDxCxegedabagacad\n", "alias ls='ls -GFh'" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Reload .bash_profile:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "source ~/.bash_profile" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }