From ce605a6fdfc1b0b587027c6d264efb21014525ed Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Fri, 13 Mar 2015 08:25:50 -0400 Subject: [PATCH] Added snippets for configuring Spark applications. --- spark/spark.ipynb | 102 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/spark/spark.ipynb b/spark/spark.ipynb index ce1b437..1999b94 100644 --- a/spark/spark.ipynb +++ b/spark/spark.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:b92b25c3049cb5859029a2d6232ea44923a8ac687cfb4a5a2271cfae4b02c773" + "signature": "sha256:41d7aa3100998f60caf63db5380eee43e78d4de66c65af923e4877df81248ae7" }, "nbformat": 3, "nbformat_minor": 0, @@ -22,7 +22,8 @@ "* Working with Partitions\n", "* Caching RDDs\n", "* Checkpointing RDDs\n", - "* Writing and Running a Spark Application" + "* Writing and Running a Spark Application\n", + "* Configuring Spark Applications" ] }, { @@ -718,6 +719,103 @@ "language": "python", "metadata": {}, "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuring Spark Applications" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run a Spark app and set the configuration options in the command line:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!spark-submit --master spark//localhost:7077 --name 'App Name' script.py data/*" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Configure spark.conf:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "spark.app.name App Name\n", + "spark.ui.port 4141\n", + "spark.master spark://localhost:7077" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run a Spark app and set the configuration options through spark.conf:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!spark-submit --properties-file spark.conf script.py data/*" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the config options programmatically:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sconf = SparkConf() \\\n", + " .setAppName(\"Word Count\") \\\n", + " .set(\"spark.ui.port\",\"4141\")\n", + "sc = SparkContext(conf=sconf)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set logging levels located in the following file, or place a copy in your pwd:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "$SPARK_HOME/conf/log4j.properties.template" + ], + "language": "python", + "metadata": {}, + "outputs": [] } ], "metadata": {}