From 055cd52cd3ba2d7646113c70318f2ff3727830b2 Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Fri, 20 Mar 2015 11:35:25 -0400 Subject: [PATCH] Added Random Forest Predicting section. --- kaggle/titanic.ipynb | 150 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/kaggle/titanic.ipynb b/kaggle/titanic.ipynb index 8f00a2e..aeb246e 100644 --- a/kaggle/titanic.ipynb +++ b/kaggle/titanic.ipynb @@ -2402,6 +2402,156 @@ "metadata": {}, "outputs": [], "prompt_number": 39 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forest: Predicting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read the test data:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_test = pd.read_csv('../data/titanic/test.csv')\n", + "df_test.head(3)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0 892 3 Kelly, Mr. James male 34.5 0 0 330911 7.8292 NaN Q
1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 0 363272 7.0000 NaN S
2 894 2 Myles, Mr. Thomas Francis male 62.0 0 0 240276 9.6875 NaN Q
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 40, + "text": [ + " PassengerId Pclass Name Sex Age SibSp \\\n", + "0 892 3 Kelly, Mr. James male 34.5 0 \n", + "1 893 3 Wilkes, Mrs. James (Ellen Needs) female 47.0 1 \n", + "2 894 2 Myles, Mr. Thomas Francis male 62.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 330911 7.8292 NaN Q \n", + "1 0 363272 7.0000 NaN S \n", + "2 0 240276 9.6875 NaN Q " + ] + } + ], + "prompt_number": 40 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note the test data does not contain the column 'Survived', we'll use our trained model to predict these values." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_test = clean_data(df_test, drop_passenger_id=False)\n", + "test_data = df_test.values" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 41 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Take the decision trees and run it on the test data:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Test data (features), skip the first column 'PassengerId'\n", + "test_input = test_data[0::, 1::]\n", + "\n", + "# Predict the Survival values for the test data\n", + "output = forest.predict(test_input)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 42 } ], "metadata": {}