{ "metadata": { "name": "", "signature": "sha256:ff0a8cf7aae0fcbadcd718a2f9164affac4fd22893c629a3755198b4f003d63a" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HDFS" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run an HDFS command:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run a file system command on the file systems (FsShell):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List the user's home directory:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -ls" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List the HDFS root directory:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -ls /" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Copy a local file to the user's directory on HDFS:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -put file.txt file.txt" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Display the contents of the specified HDFS file:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -cat file.txt" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Print the last 10 lines of the file to the terminal:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -cat file.txt | tail -n 10" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "View a directory and all of its files:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -cat dir/* | less" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Copy an HDFS file to local:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -get file.txt file.txt" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a directory on HDFS:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -mkdir dir" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Recursively delete the specified directory and all of its contents:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "!hdfs dfs -rm -r dir" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }