Added Random Forest Predicting section.

This commit is contained in:
Donne Martin 2015-03-20 11:35:25 -04:00
parent 3fcbc8364f
commit 055cd52cd3

View File

@ -2402,6 +2402,156 @@
"metadata": {},
"outputs": [],
"prompt_number": 39
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest: Predicting"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read the test data:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_test = pd.read_csv('../data/titanic/test.csv')\n",
"df_test.head(3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 892</td>\n",
" <td> 3</td>\n",
" <td> Kelly, Mr. James</td>\n",
" <td> male</td>\n",
" <td> 34.5</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 330911</td>\n",
" <td> 7.8292</td>\n",
" <td> NaN</td>\n",
" <td> Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 893</td>\n",
" <td> 3</td>\n",
" <td> Wilkes, Mrs. James (Ellen Needs)</td>\n",
" <td> female</td>\n",
" <td> 47.0</td>\n",
" <td> 1</td>\n",
" <td> 0</td>\n",
" <td> 363272</td>\n",
" <td> 7.0000</td>\n",
" <td> NaN</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 894</td>\n",
" <td> 2</td>\n",
" <td> Myles, Mr. Thomas Francis</td>\n",
" <td> male</td>\n",
" <td> 62.0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 240276</td>\n",
" <td> 9.6875</td>\n",
" <td> NaN</td>\n",
" <td> Q</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 40,
"text": [
" PassengerId Pclass Name Sex Age SibSp \\\n",
"0 892 3 Kelly, Mr. James male 34.5 0 \n",
"1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 \n",
"2 894 2 Myles, Mr. Thomas Francis male 62.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 330911 7.8292 NaN Q \n",
"1 0 363272 7.0000 NaN S \n",
"2 0 240276 9.6875 NaN Q "
]
}
],
"prompt_number": 40
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note the test data does not contain the column 'Survived', we'll use our trained model to predict these values."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_test = clean_data(df_test, drop_passenger_id=False)\n",
"test_data = df_test.values"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 41
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Take the decision trees and run it on the test data:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Test data (features), skip the first column 'PassengerId'\n",
"test_input = test_data[0::, 1::]\n",
"\n",
"# Predict the Survival values for the test data\n",
"output = forest.predict(test_input)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 42
}
],
"metadata": {}