Updated notebook to v3.

This commit is contained in:
Donne Martin 2015-05-24 10:49:21 -04:00
parent d4587d3771
commit 2106accb6d

View File

@ -1,226 +1,248 @@
{
"metadata": {
"name": "",
"signature": "sha256:ff0a8cf7aae0fcbadcd718a2f9164affac4fd22893c629a3755198b4f003d63a"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
"cells": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# HDFS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run an HDFS command:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run a file system command on the file systems (FsShell):"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the user's home directory:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -ls"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the HDFS root directory:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -ls /"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copy a local file to the user's directory on HDFS:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -put file.txt file.txt"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Display the contents of the specified HDFS file:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -cat file.txt"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Print the last 10 lines of the file to the terminal:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -cat file.txt | tail -n 10"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View a directory and all of its files:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -cat dir/* | less"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copy an HDFS file to local:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -get file.txt file.txt"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a directory on HDFS:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -mkdir dir"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Recursively delete the specified directory and all of its contents:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!hdfs dfs -rm -r dir"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
"cell_type": "markdown",
"metadata": {},
"source": [
"# HDFS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run an HDFS command:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run a file system command on the file systems (FsShell):"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the user's home directory:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -ls"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the HDFS root directory:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -ls /"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copy a local file to the user's directory on HDFS:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -put file.txt file.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Display the contents of the specified HDFS file:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -cat file.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Print the last 10 lines of the file to the terminal:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -cat file.txt | tail -n 10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View a directory and all of its files:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -cat dir/* | less"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copy an HDFS file to local:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -get file.txt file.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a directory on HDFS:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -mkdir dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Recursively delete the specified directory and all of its contents:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!hdfs dfs -rm -r dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")"
]
}
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}