From 01d65fd232414f60c02164292da128517131ac6a Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Fri, 20 Mar 2015 11:36:41 -0400 Subject: [PATCH] Added Random Forest: Prepare for Kaggle Submission section. --- kaggle/titanic.ipynb | 132 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/kaggle/titanic.ipynb b/kaggle/titanic.ipynb index aeb246e..3720515 100644 --- a/kaggle/titanic.ipynb +++ b/kaggle/titanic.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:d50d4742eadf392ff4590f3569ff98f508db3dacbf33e0a31bd799b6bf4826ac" + "signature": "sha256:fa67591f8fc6a26469c4ee41e525877faea06787800b22b7273babe8116a05c4" }, "nbformat": 3, "nbformat_minor": 0, @@ -29,6 +29,7 @@ "* Data Munging Summary\n", "* Random Forest: Training\n", "* Random Forest: Predicting\n", + "* Random Forest: Prepare for Kaggle Submission\n", "* Support Vector Machine: Training\n", "* Support Vector Machine: Predicting" ] @@ -2552,6 +2553,135 @@ "metadata": {}, "outputs": [], "prompt_number": 42 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forest: Prepare for Kaggle Submission" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a DataFrame by combining the index from the test data with the output of predictions:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "result = np.c_[test_data[:, 0].astype(int), output.astype(int)]\n", + "df_result = pd.DataFrame(result[:, 0:2], columns=['PassengerId', 'Survived'])\n", + "df_result.head(10)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvived
0 892 0
1 893 0
2 894 0
3 895 0
4 896 0
5 897 0
6 898 0
7 899 0
8 900 1
9 901 0
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 43, + "text": [ + " PassengerId Survived\n", + "0 892 0\n", + "1 893 0\n", + "2 894 0\n", + "3 895 0\n", + "4 896 0\n", + "5 897 0\n", + "6 898 0\n", + "7 899 0\n", + "8 900 1\n", + "9 901 0" + ] + } + ], + "prompt_number": 43 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Write the results to csv:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_result.to_csv('../data/titanic/results-rf.csv', index=False)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 44 } ], "metadata": {}