diff --git a/commands/aws.ipynb b/commands/aws.ipynb index ae5e836..8524577 100644 --- a/commands/aws.ipynb +++ b/commands/aws.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:cbe332e49ee1a97b7e73961a5b49d8f32b4b13da36145a34bb8ecb72d5843320" + "signature": "sha256:f59ffce0eaf78612d7d3a80cbeded6c9da4ff5cb53efa49c1d2ce8cba1663c8c" }, "nbformat": 3, "nbformat_minor": 0, @@ -129,7 +129,6 @@ "metadata": {}, "outputs": [] }, - { "cell_type": "markdown", "metadata": {}, @@ -230,6 +229,102 @@ "language": "python", "metadata": {}, "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup s3-parallel-put\n", + "\n", + "s3-parallel-put is a great tool for uploading multiple files to S3 in parallel." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install package dependencies:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sudo apt-get install boto\n", + "sudo apt-get install git" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Clone the s3-parallel-put repo:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "git clone https://github.com/twpayne/s3-parallel-put.git" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup AWS keys for s3-parallel-put:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "export AWS_ACCESS_KEY_ID=XXX\n", + "export AWS_SECRET_ACCESS_KEY=XXX" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sample usage:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "s3-parallel-put --bucket=bucket --prefix=PREFIX SOURCE" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dry run of putting files in the current directory on S3 with the given S3 prefix, do not check first if they exist:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "s3-parallel-put --bucket=bucket --host=s3.amazonaws.com --put=stupid --dry-run --prefix=prefix/ ./" + ], + "language": "python", + "metadata": {}, + "outputs": [] } ], "metadata": {}