mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
Added snippets for configuring Spark applications.
This commit is contained in:
parent
53789e0e3e
commit
ce605a6fdf
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"metadata": {
|
||||
"name": "",
|
||||
"signature": "sha256:b92b25c3049cb5859029a2d6232ea44923a8ac687cfb4a5a2271cfae4b02c773"
|
||||
"signature": "sha256:41d7aa3100998f60caf63db5380eee43e78d4de66c65af923e4877df81248ae7"
|
||||
},
|
||||
"nbformat": 3,
|
||||
"nbformat_minor": 0,
|
||||
|
@ -22,7 +22,8 @@
|
|||
"* Working with Partitions\n",
|
||||
"* Caching RDDs\n",
|
||||
"* Checkpointing RDDs\n",
|
||||
"* Writing and Running a Spark Application"
|
||||
"* Writing and Running a Spark Application\n",
|
||||
"* Configuring Spark Applications"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -718,6 +719,103 @@
|
|||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuring Spark Applications"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run a Spark app and set the configuration options in the command line:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"!spark-submit --master spark//localhost:7077 --name 'App Name' script.py data/*"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Configure spark.conf:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"spark.app.name App Name\n",
|
||||
"spark.ui.port 4141\n",
|
||||
"spark.master spark://localhost:7077"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run a Spark app and set the configuration options through spark.conf:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"!spark-submit --properties-file spark.conf script.py data/*"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set the config options programmatically:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"sconf = SparkConf() \\\n",
|
||||
" .setAppName(\"Word Count\") \\\n",
|
||||
" .set(\"spark.ui.port\",\"4141\")\n",
|
||||
"sc = SparkContext(conf=sconf)"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set logging levels located in the following file, or place a copy in your pwd:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"$SPARK_HOME/conf/log4j.properties.template"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
}
|
||||
],
|
||||
"metadata": {}
|
||||
|
|
Loading…
Reference in New Issue
Block a user