From 2106accb6d2cfbb8dea0facf8cc06af55e00d0b0 Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Sun, 24 May 2015 10:49:21 -0400 Subject: [PATCH] Updated notebook to v3. --- spark/hdfs.ipynb | 468 +++++++++++++++++++++++++---------------------- 1 file changed, 245 insertions(+), 223 deletions(-) diff --git a/spark/hdfs.ipynb b/spark/hdfs.ipynb index 96c8af5..84dc00e 100644 --- a/spark/hdfs.ipynb +++ b/spark/hdfs.ipynb @@ -1,226 +1,248 @@ { - "metadata": { - "name": "", - "signature": "sha256:ff0a8cf7aae0fcbadcd718a2f9164affac4fd22893c629a3755198b4f003d63a" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ + "cells": [ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# HDFS" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run an HDFS command:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run a file system command on the file systems (FsShell):" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "List the user's home directory:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -ls" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "List the HDFS root directory:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -ls /" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copy a local file to the user's directory on HDFS:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -put file.txt file.txt" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Display the contents of the specified HDFS file:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -cat file.txt" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print the last 10 lines of the file to the terminal:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -cat file.txt | tail -n 10" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "View a directory and all of its files:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -cat dir/* | less" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copy an HDFS file to local:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -get file.txt file.txt" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a directory on HDFS:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -mkdir dir" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Recursively delete the specified directory and all of its contents:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "!hdfs dfs -rm -r dir" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")" - ], - "language": "python", - "metadata": {}, - "outputs": [] - } - ], - "metadata": {} + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# HDFS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run an HDFS command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run a file system command on the file systems (FsShell):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List the user's home directory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -ls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List the HDFS root directory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -ls /" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy a local file to the user's directory on HDFS:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -put file.txt file.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Display the contents of the specified HDFS file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -cat file.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the last 10 lines of the file to the terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -cat file.txt | tail -n 10" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "View a directory and all of its files:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -cat dir/* | less" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copy an HDFS file to local:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -get file.txt file.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a directory on HDFS:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -mkdir dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Recursively delete the specified directory and all of its contents:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "!hdfs dfs -rm -r dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}