Updated notebook to v3.

This commit is contained in:
Donne Martin 2015-05-24 10:49:21 -04:00
parent d4587d3771
commit 2106accb6d

View File

@ -1,226 +1,248 @@
{ {
"metadata": { "cells": [
"name": "",
"signature": "sha256:ff0a8cf7aae0fcbadcd718a2f9164affac4fd22893c629a3755198b4f003d63a"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{ {
"cells": [ "cell_type": "markdown",
{ "metadata": {},
"cell_type": "markdown", "source": [
"metadata": {}, "# HDFS"
"source": [ ]
"# HDFS" },
] {
}, "cell_type": "markdown",
{ "metadata": {},
"cell_type": "markdown", "source": [
"metadata": {}, "Run an HDFS command:"
"source": [ ]
"Run an HDFS command:" },
] {
}, "cell_type": "code",
{ "execution_count": null,
"cell_type": "code", "metadata": {
"collapsed": false, "collapsed": false
"input": [ },
"!hdfs" "outputs": [],
], "source": [
"language": "python", "!hdfs"
"metadata": {}, ]
"outputs": [] },
}, {
{ "cell_type": "markdown",
"cell_type": "markdown", "metadata": {},
"metadata": {}, "source": [
"source": [ "Run a file system command on the file systems (FsShell):"
"Run a file system command on the file systems (FsShell):" ]
] },
}, {
{ "cell_type": "code",
"cell_type": "code", "execution_count": null,
"collapsed": false, "metadata": {
"input": [ "collapsed": false
"!hdfs dfs" },
], "outputs": [],
"language": "python", "source": [
"metadata": {}, "!hdfs dfs"
"outputs": [] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"List the user's home directory:" "List the user's home directory:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "execution_count": null,
"input": [ "metadata": {
"!hdfs dfs -ls" "collapsed": false
], },
"language": "python", "outputs": [],
"metadata": {}, "source": [
"outputs": [] "!hdfs dfs -ls"
}, ]
{ },
"cell_type": "markdown", {
"metadata": {}, "cell_type": "markdown",
"source": [ "metadata": {},
"List the HDFS root directory:" "source": [
] "List the HDFS root directory:"
}, ]
{ },
"cell_type": "code", {
"collapsed": false, "cell_type": "code",
"input": [ "execution_count": null,
"!hdfs dfs -ls /" "metadata": {
], "collapsed": false
"language": "python", },
"metadata": {}, "outputs": [],
"outputs": [] "source": [
}, "!hdfs dfs -ls /"
{ ]
"cell_type": "markdown", },
"metadata": {}, {
"source": [ "cell_type": "markdown",
"Copy a local file to the user's directory on HDFS:" "metadata": {},
] "source": [
}, "Copy a local file to the user's directory on HDFS:"
{ ]
"cell_type": "code", },
"collapsed": false, {
"input": [ "cell_type": "code",
"!hdfs dfs -put file.txt file.txt" "execution_count": null,
], "metadata": {
"language": "python", "collapsed": false
"metadata": {}, },
"outputs": [] "outputs": [],
}, "source": [
{ "!hdfs dfs -put file.txt file.txt"
"cell_type": "markdown", ]
"metadata": {}, },
"source": [ {
"Display the contents of the specified HDFS file:" "cell_type": "markdown",
] "metadata": {},
}, "source": [
{ "Display the contents of the specified HDFS file:"
"cell_type": "code", ]
"collapsed": false, },
"input": [ {
"!hdfs dfs -cat file.txt" "cell_type": "code",
], "execution_count": null,
"language": "python", "metadata": {
"metadata": {}, "collapsed": false
"outputs": [] },
}, "outputs": [],
{ "source": [
"cell_type": "markdown", "!hdfs dfs -cat file.txt"
"metadata": {}, ]
"source": [ },
"Print the last 10 lines of the file to the terminal:" {
] "cell_type": "markdown",
}, "metadata": {},
{ "source": [
"cell_type": "code", "Print the last 10 lines of the file to the terminal:"
"collapsed": false, ]
"input": [ },
"!hdfs dfs -cat file.txt | tail -n 10" {
], "cell_type": "code",
"language": "python", "execution_count": null,
"metadata": {}, "metadata": {
"outputs": [] "collapsed": false
}, },
{ "outputs": [],
"cell_type": "markdown", "source": [
"metadata": {}, "!hdfs dfs -cat file.txt | tail -n 10"
"source": [ ]
"View a directory and all of its files:" },
] {
}, "cell_type": "markdown",
{ "metadata": {},
"cell_type": "code", "source": [
"collapsed": false, "View a directory and all of its files:"
"input": [ ]
"!hdfs dfs -cat dir/* | less" },
], {
"language": "python", "cell_type": "code",
"metadata": {}, "execution_count": null,
"outputs": [] "metadata": {
}, "collapsed": false
{ },
"cell_type": "markdown", "outputs": [],
"metadata": {}, "source": [
"source": [ "!hdfs dfs -cat dir/* | less"
"Copy an HDFS file to local:" ]
] },
}, {
{ "cell_type": "markdown",
"cell_type": "code", "metadata": {},
"collapsed": false, "source": [
"input": [ "Copy an HDFS file to local:"
"!hdfs dfs -get file.txt file.txt" ]
], },
"language": "python", {
"metadata": {}, "cell_type": "code",
"outputs": [] "execution_count": null,
}, "metadata": {
{ "collapsed": false
"cell_type": "markdown", },
"metadata": {}, "outputs": [],
"source": [ "source": [
"Create a directory on HDFS:" "!hdfs dfs -get file.txt file.txt"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"collapsed": false, "metadata": {},
"input": [ "source": [
"!hdfs dfs -mkdir dir" "Create a directory on HDFS:"
], ]
"language": "python", },
"metadata": {}, {
"outputs": [] "cell_type": "code",
}, "execution_count": null,
{ "metadata": {
"cell_type": "markdown", "collapsed": false
"metadata": {}, },
"source": [ "outputs": [],
"Recursively delete the specified directory and all of its contents:" "source": [
] "!hdfs dfs -mkdir dir"
}, ]
{ },
"cell_type": "code", {
"collapsed": false, "cell_type": "markdown",
"input": [ "metadata": {},
"!hdfs dfs -rm -r dir" "source": [
], "Recursively delete the specified directory and all of its contents:"
"language": "python", ]
"metadata": {}, },
"outputs": [] {
}, "cell_type": "code",
{ "execution_count": null,
"cell_type": "markdown", "metadata": {
"metadata": {}, "collapsed": false
"source": [ },
"Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):" "outputs": [],
] "source": [
}, "!hdfs dfs -rm -r dir"
{ ]
"cell_type": "code", },
"collapsed": false, {
"input": [ "cell_type": "markdown",
"data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")" "metadata": {},
], "source": [
"language": "python", "Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):"
"metadata": {}, ]
"outputs": [] },
} {
], "cell_type": "code",
"metadata": {} "execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")"
]
} }
] ],
} "metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}