diff --git a/kaggle/titanic.ipynb b/kaggle/titanic.ipynb index aeb246e..3720515 100644 --- a/kaggle/titanic.ipynb +++ b/kaggle/titanic.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:d50d4742eadf392ff4590f3569ff98f508db3dacbf33e0a31bd799b6bf4826ac" + "signature": "sha256:fa67591f8fc6a26469c4ee41e525877faea06787800b22b7273babe8116a05c4" }, "nbformat": 3, "nbformat_minor": 0, @@ -29,6 +29,7 @@ "* Data Munging Summary\n", "* Random Forest: Training\n", "* Random Forest: Predicting\n", + "* Random Forest: Prepare for Kaggle Submission\n", "* Support Vector Machine: Training\n", "* Support Vector Machine: Predicting" ] @@ -2552,6 +2553,135 @@ "metadata": {}, "outputs": [], "prompt_number": 42 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Forest: Prepare for Kaggle Submission" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a DataFrame by combining the index from the test data with the output of predictions:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "result = np.c_[test_data[:, 0].astype(int), output.astype(int)]\n", + "df_result = pd.DataFrame(result[:, 0:2], columns=['PassengerId', 'Survived'])\n", + "df_result.head(10)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + " | PassengerId | \n", + "Survived | \n", + "
---|---|---|
0 | \n", + "892 | \n", + "0 | \n", + "
1 | \n", + "893 | \n", + "0 | \n", + "
2 | \n", + "894 | \n", + "0 | \n", + "
3 | \n", + "895 | \n", + "0 | \n", + "
4 | \n", + "896 | \n", + "0 | \n", + "
5 | \n", + "897 | \n", + "0 | \n", + "
6 | \n", + "898 | \n", + "0 | \n", + "
7 | \n", + "899 | \n", + "0 | \n", + "
8 | \n", + "900 | \n", + "1 | \n", + "
9 | \n", + "901 | \n", + "0 | \n", + "