mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
Added Random Forest Predicting section.
This commit is contained in:
parent
3fcbc8364f
commit
055cd52cd3
|
@ -2402,6 +2402,156 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"prompt_number": 39
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Random Forest: Predicting"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Read the test data:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"df_test = pd.read_csv('../data/titanic/test.csv')\n",
|
||||
"df_test.head(3)"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"html": [
|
||||
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>PassengerId</th>\n",
|
||||
" <th>Pclass</th>\n",
|
||||
" <th>Name</th>\n",
|
||||
" <th>Sex</th>\n",
|
||||
" <th>Age</th>\n",
|
||||
" <th>SibSp</th>\n",
|
||||
" <th>Parch</th>\n",
|
||||
" <th>Ticket</th>\n",
|
||||
" <th>Fare</th>\n",
|
||||
" <th>Cabin</th>\n",
|
||||
" <th>Embarked</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td> 892</td>\n",
|
||||
" <td> 3</td>\n",
|
||||
" <td> Kelly, Mr. James</td>\n",
|
||||
" <td> male</td>\n",
|
||||
" <td> 34.5</td>\n",
|
||||
" <td> 0</td>\n",
|
||||
" <td> 0</td>\n",
|
||||
" <td> 330911</td>\n",
|
||||
" <td> 7.8292</td>\n",
|
||||
" <td> NaN</td>\n",
|
||||
" <td> Q</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td> 893</td>\n",
|
||||
" <td> 3</td>\n",
|
||||
" <td> Wilkes, Mrs. James (Ellen Needs)</td>\n",
|
||||
" <td> female</td>\n",
|
||||
" <td> 47.0</td>\n",
|
||||
" <td> 1</td>\n",
|
||||
" <td> 0</td>\n",
|
||||
" <td> 363272</td>\n",
|
||||
" <td> 7.0000</td>\n",
|
||||
" <td> NaN</td>\n",
|
||||
" <td> S</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td> 894</td>\n",
|
||||
" <td> 2</td>\n",
|
||||
" <td> Myles, Mr. Thomas Francis</td>\n",
|
||||
" <td> male</td>\n",
|
||||
" <td> 62.0</td>\n",
|
||||
" <td> 0</td>\n",
|
||||
" <td> 0</td>\n",
|
||||
" <td> 240276</td>\n",
|
||||
" <td> 9.6875</td>\n",
|
||||
" <td> NaN</td>\n",
|
||||
" <td> Q</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"metadata": {},
|
||||
"output_type": "pyout",
|
||||
"prompt_number": 40,
|
||||
"text": [
|
||||
" PassengerId Pclass Name Sex Age SibSp \\\n",
|
||||
"0 892 3 Kelly, Mr. James male 34.5 0 \n",
|
||||
"1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 \n",
|
||||
"2 894 2 Myles, Mr. Thomas Francis male 62.0 0 \n",
|
||||
"\n",
|
||||
" Parch Ticket Fare Cabin Embarked \n",
|
||||
"0 0 330911 7.8292 NaN Q \n",
|
||||
"1 0 363272 7.0000 NaN S \n",
|
||||
"2 0 240276 9.6875 NaN Q "
|
||||
]
|
||||
}
|
||||
],
|
||||
"prompt_number": 40
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note the test data does not contain the column 'Survived', we'll use our trained model to predict these values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"df_test = clean_data(df_test, drop_passenger_id=False)\n",
|
||||
"test_data = df_test.values"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"prompt_number": 41
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Take the decision trees and run it on the test data:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"# Test data (features), skip the first column 'PassengerId'\n",
|
||||
"test_input = test_data[0::, 1::]\n",
|
||||
"\n",
|
||||
"# Predict the Survival values for the test data\n",
|
||||
"output = forest.predict(test_input)"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"prompt_number": 42
|
||||
}
|
||||
],
|
||||
"metadata": {}
|
||||
|
|
Loading…
Reference in New Issue
Block a user