diff --git a/commands/aws.ipynb b/commands/aws.ipynb index 5987e22..629a0a8 100644 --- a/commands/aws.ipynb +++ b/commands/aws.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:2531311c9289bbab1a6c03f5be4cffdd2eee75ac64274fe4b532ab55316c066d" + "signature": "sha256:44b8b97435ef131ae163887a097426e7d9818e0394168d960e892917df05417c" }, "nbformat": 3, "nbformat_minor": 0, @@ -333,6 +333,47 @@ "language": "python", "metadata": {}, "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## mrjob" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run an mrjob on the given input (must be a flat file hierarchy), placing the results in the output (output directory must not exist):" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "python mr-script.py -r emr s3://bucket-source/ --output-dir=s3://bucket-dest/" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run an mrjob locally on the specified input file, sending the results to the specified output file:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "python mrjob_script.py input_data.txt > output_data.txt" + ], + "language": "python", + "metadata": {}, + "outputs": [] } ], "metadata": {}