diff --git a/kaggle/titanic.ipynb b/kaggle/titanic.ipynb
index 8f00a2e..aeb246e 100644
--- a/kaggle/titanic.ipynb
+++ b/kaggle/titanic.ipynb
@@ -2402,6 +2402,156 @@
"metadata": {},
"outputs": [],
"prompt_number": 39
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Random Forest: Predicting"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Read the test data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_test = pd.read_csv('../data/titanic/test.csv')\n",
+ "df_test.head(3)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PassengerId | \n",
+ " Pclass | \n",
+ " Name | \n",
+ " Sex | \n",
+ " Age | \n",
+ " SibSp | \n",
+ " Parch | \n",
+ " Ticket | \n",
+ " Fare | \n",
+ " Cabin | \n",
+ " Embarked | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 892 | \n",
+ " 3 | \n",
+ " Kelly, Mr. James | \n",
+ " male | \n",
+ " 34.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 330911 | \n",
+ " 7.8292 | \n",
+ " NaN | \n",
+ " Q | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 893 | \n",
+ " 3 | \n",
+ " Wilkes, Mrs. James (Ellen Needs) | \n",
+ " female | \n",
+ " 47.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 363272 | \n",
+ " 7.0000 | \n",
+ " NaN | \n",
+ " S | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 894 | \n",
+ " 2 | \n",
+ " Myles, Mr. Thomas Francis | \n",
+ " male | \n",
+ " 62.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 240276 | \n",
+ " 9.6875 | \n",
+ " NaN | \n",
+ " Q | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 40,
+ "text": [
+ " PassengerId Pclass Name Sex Age SibSp \\\n",
+ "0 892 3 Kelly, Mr. James male 34.5 0 \n",
+ "1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 \n",
+ "2 894 2 Myles, Mr. Thomas Francis male 62.0 0 \n",
+ "\n",
+ " Parch Ticket Fare Cabin Embarked \n",
+ "0 0 330911 7.8292 NaN Q \n",
+ "1 0 363272 7.0000 NaN S \n",
+ "2 0 240276 9.6875 NaN Q "
+ ]
+ }
+ ],
+ "prompt_number": 40
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Note the test data does not contain the column 'Survived', we'll use our trained model to predict these values."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_test = clean_data(df_test, drop_passenger_id=False)\n",
+ "test_data = df_test.values"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 41
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Take the decision trees and run it on the test data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "# Test data (features), skip the first column 'PassengerId'\n",
+ "test_input = test_data[0::, 1::]\n",
+ "\n",
+ "# Predict the Survival values for the test data\n",
+ "output = forest.predict(test_input)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 42
}
],
"metadata": {}