Added snippets for configuring Spark applications.

This commit is contained in:
Donne Martin 2015-03-13 08:25:50 -04:00
parent 53789e0e3e
commit ce605a6fdf

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:b92b25c3049cb5859029a2d6232ea44923a8ac687cfb4a5a2271cfae4b02c773"
"signature": "sha256:41d7aa3100998f60caf63db5380eee43e78d4de66c65af923e4877df81248ae7"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -22,7 +22,8 @@
"* Working with Partitions\n",
"* Caching RDDs\n",
"* Checkpointing RDDs\n",
"* Writing and Running a Spark Application"
"* Writing and Running a Spark Application\n",
"* Configuring Spark Applications"
]
},
{
@ -718,6 +719,103 @@
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configuring Spark Applications"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run a Spark app and set the configuration options in the command line:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!spark-submit --master spark//localhost:7077 --name 'App Name' script.py data/*"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Configure spark.conf:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"spark.app.name App Name\n",
"spark.ui.port 4141\n",
"spark.master spark://localhost:7077"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run a Spark app and set the configuration options through spark.conf:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!spark-submit --properties-file spark.conf script.py data/*"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set the config options programmatically:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sconf = SparkConf() \\\n",
" .setAppName(\"Word Count\") \\\n",
" .set(\"spark.ui.port\",\"4141\")\n",
"sc = SparkContext(conf=sconf)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set logging levels located in the following file, or place a copy in your pwd:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"$SPARK_HOME/conf/log4j.properties.template"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}