From 45634da9ff997a1f0fd28cc62419ba78b751b9bf Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Sat, 31 Jan 2015 07:46:16 -0500 Subject: [PATCH] Seeded random for more predictability between iterations. Added snippets for setting a fill value for indices that do not overlap for arithmetic operations. --- pandas/pandas.ipynb | 216 +++++++++++++++++++++++++++++++++----------- 1 file changed, 161 insertions(+), 55 deletions(-) diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb index 94a0c4b..1198a7c 100644 --- a/pandas/pandas.ipynb +++ b/pandas/pandas.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:f58e882e3019ecb243505ef140cb0aedee21ecd47d1210ca8e6b4e2281c1316e" + "signature": "sha256:56fea349850ffa0f72e6b8900647ed0f5ede765deae5932b1a26237f03166107" }, "nbformat": 3, "nbformat_minor": 0, @@ -3332,6 +3332,7 @@ "cell_type": "code", "collapsed": false, "input": [ + "np.random.seed(0)\n", "ser_6 = Series(np.random.randn(5),\n", " index=['a', 'b', 'c', 'd', 'e'])\n", "ser_6\n" @@ -3344,11 +3345,11 @@ "output_type": "pyout", "prompt_number": 64, "text": [ - "a -0.224292\n", - "b -1.166149\n", - "c -2.078194\n", - "d 2.060130\n", - "e 0.292102\n", + "a 1.764052\n", + "b 0.400157\n", + "c 0.978738\n", + "d 2.240893\n", + "e 1.867558\n", "dtype: float64" ] } @@ -3359,6 +3360,7 @@ "cell_type": "code", "collapsed": false, "input": [ + "np.random.seed(1)\n", "ser_7 = Series(np.random.randn(5),\n", " index=['a', 'c', 'e', 'f', 'g'])\n", "ser_7" @@ -3371,11 +3373,11 @@ "output_type": "pyout", "prompt_number": 65, "text": [ - "a -0.329560\n", - "c 1.009019\n", - "e -2.489898\n", - "f -0.731142\n", - "g -0.025979\n", + "a 1.624345\n", + "c -0.611756\n", + "e -0.528172\n", + "f -1.072969\n", + "g 0.865408\n", "dtype: float64" ] } @@ -3386,7 +3388,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Adding objects results in the union of index pairs if the pairs are not the same, resulting in NAs for indices that do not overlap:" + "Adding objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:" ] }, { @@ -3403,11 +3405,11 @@ "output_type": "pyout", "prompt_number": 66, "text": [ - "a -0.553851\n", + "a 3.388398\n", "b NaN\n", - "c -1.069176\n", + "c 0.366982\n", "d NaN\n", - "e -2.197796\n", + "e 1.339386\n", "f NaN\n", "g NaN\n", "dtype: float64" @@ -3416,10 +3418,45 @@ ], "prompt_number": 66 }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set a fill value instead of NaN for indices that do not overlap:" + ] + }, { "cell_type": "code", "collapsed": false, "input": [ + "ser_6.add(ser_7, fill_value=0)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 67, + "text": [ + "a 3.388398\n", + "b 0.400157\n", + "c 0.366982\n", + "d 2.240893\n", + "e 1.339386\n", + "f -1.072969\n", + "g 0.865408\n", + "dtype: float64" + ] + } + ], + "prompt_number": 67 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "np.random.seed(0)\n", "frame_8 = DataFrame(np.random.rand(9).reshape((3, 3)),\n", " columns=['a', 'b', 'c'])\n", "frame_8" @@ -3442,21 +3479,21 @@ " \n", " \n", " 0\n", - " 0.529893\n", - " 0.251433\n", - " 0.111151\n", + " 0.548814\n", + " 0.715189\n", + " 0.602763\n", " \n", " \n", " 1\n", - " 0.088052\n", - " 0.595180\n", - " 0.128917\n", + " 0.544883\n", + " 0.423655\n", + " 0.645894\n", " \n", " \n", " 2\n", - " 0.237248\n", - " 0.515434\n", - " 0.720097\n", + " 0.437587\n", + " 0.891773\n", + " 0.963663\n", " \n", " \n", "\n", @@ -3464,21 +3501,22 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 67, + "prompt_number": 68, "text": [ " a b c\n", - "0 0.529893 0.251433 0.111151\n", - "1 0.088052 0.595180 0.128917\n", - "2 0.237248 0.515434 0.720097" + "0 0.548814 0.715189 0.602763\n", + "1 0.544883 0.423655 0.645894\n", + "2 0.437587 0.891773 0.963663" ] } ], - "prompt_number": 67 + "prompt_number": 68 }, { "cell_type": "code", "collapsed": false, "input": [ + "np.random.seed(1)\n", "frame_9 = DataFrame(np.random.rand(9).reshape((3, 3)),\n", " columns=['b', 'c', 'd'])\n", "frame_9" @@ -3501,21 +3539,21 @@ " \n", " \n", " 0\n", - " 0.924304\n", - " 0.058943\n", - " 0.434582\n", + " 0.417022\n", + " 0.720324\n", + " 0.000114\n", " \n", " \n", " 1\n", - " 0.730805\n", - " 0.545480\n", - " 0.299172\n", + " 0.302333\n", + " 0.146756\n", + " 0.092339\n", " \n", " \n", " 2\n", - " 0.280603\n", - " 0.703927\n", - " 0.790074\n", + " 0.186260\n", + " 0.345561\n", + " 0.396767\n", " \n", " \n", "\n", @@ -3523,22 +3561,22 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 68, + "prompt_number": 69, "text": [ " b c d\n", - "0 0.924304 0.058943 0.434582\n", - "1 0.730805 0.545480 0.299172\n", - "2 0.280603 0.703927 0.790074" + "0 0.417022 0.720324 0.000114\n", + "1 0.302333 0.146756 0.092339\n", + "2 0.186260 0.345561 0.396767" ] } ], - "prompt_number": 68 + "prompt_number": 69 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Adding objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NAs for indices that do not overlap:" + "Adding objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:" ] }, { @@ -3567,22 +3605,22 @@ " \n", " 0\n", " NaN\n", - " 1.175738\n", - " 0.170094\n", + " 1.132211\n", + " 1.323088\n", " NaN\n", " \n", " \n", " 1\n", " NaN\n", - " 1.325985\n", - " 0.674397\n", + " 0.725987\n", + " 0.792650\n", " NaN\n", " \n", " \n", " 2\n", " NaN\n", - " 0.796037\n", - " 1.424024\n", + " 1.078033\n", + " 1.309223\n", " NaN\n", " \n", " \n", @@ -3591,16 +3629,84 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 69, + "prompt_number": 70, "text": [ " a b c d\n", - "0 NaN 1.175738 0.170094 NaN\n", - "1 NaN 1.325985 0.674397 NaN\n", - "2 NaN 0.796037 1.424024 NaN" + "0 NaN 1.132211 1.323088 NaN\n", + "1 NaN 0.725987 0.792650 NaN\n", + "2 NaN 1.078033 1.309223 NaN" ] } ], - "prompt_number": 69 + "prompt_number": 70 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set a fill value instead of NaN for indices that do not overlap:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_8.add(frame_9, fill_value=0)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
0 0.548814 1.132211 1.323088 0.000114
1 0.544883 0.725987 0.792650 0.092339
2 0.437587 1.078033 1.309223 0.396767
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 71, + "text": [ + " a b c d\n", + "0 0.548814 1.132211 1.323088 0.000114\n", + "1 0.544883 0.725987 0.792650 0.092339\n", + "2 0.437587 1.078033 1.309223 0.396767" + ] + } + ], + "prompt_number": 71 } ], "metadata": {}