Added mrjob snippet to run a job on EMR or locally.

This commit is contained in:
Donne Martin 2015-02-23 11:48:03 -05:00
parent 7b15eb949b
commit 89a7bf4b93

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:2531311c9289bbab1a6c03f5be4cffdd2eee75ac64274fe4b532ab55316c066d"
"signature": "sha256:44b8b97435ef131ae163887a097426e7d9818e0394168d960e892917df05417c"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -333,6 +333,47 @@
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## mrjob"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run an mrjob on the given input (must be a flat file hierarchy), placing the results in the output (output directory must not exist):"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"python mr-script.py -r emr s3://bucket-source/ --output-dir=s3://bucket-dest/"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run an mrjob locally on the specified input file, sending the results to the specified output file:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"python mrjob_script.py input_data.txt > output_data.txt"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}