diff --git a/spark/spark.ipynb b/spark/spark.ipynb index f4f0c87..fc9f00d 100644 --- a/spark/spark.ipynb +++ b/spark/spark.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:ecb4af31fb2838a9be26c4692a4c2619957209df829895e8486de7eb84b59fa3" + "signature": "sha256:0426fb2480e184a6d65b40b69e4601e1abb23c84cc1090d1fe0e2e98803c6220" }, "nbformat": 3, "nbformat_minor": 0, @@ -16,7 +16,8 @@ "\n", "* Python Shell\n", "* RDDs\n", - "* Pair RDDs" + "* Pair RDDs\n", + "* Running Spark on a Cluster" ] }, { @@ -351,6 +352,118 @@ "language": "python", "metadata": {}, "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running Spark on a Cluster" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Start the standalone cluster's Master and Worker daemons:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sudo service spark-master start\n", + "sudo service spark-worker start" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Stop the standalone cluster's Master and Worker daemons:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sudo service spark-master stop\n", + "sudo service spark-worker stop" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Restart the standalone cluster's Master and Worker daemons:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sudo service spark-master stop\n", + "sudo service spark-worker stop" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "View the Spark standalone cluster UI:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "http://localhost:18080//" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Start the Spark shell and connect to the cluster:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "MASTER=spark://localhost:7077 pyspark" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Confirm you are connected to the correct master:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sc.master" + ], + "language": "python", + "metadata": {}, + "outputs": [] } ], "metadata": {}