diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb index d0d722d..7dcd716 100644 --- a/pandas/pandas.ipynb +++ b/pandas/pandas.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:4c3e3bc5e2e66a53e95dce82d5f14b65d389cff01d434f1cf65ee91981716551" + "signature": "sha256:1d166238da27d666eaee4041e79ad7adcc22b6b5ef5d6098ce354d0160c79ba5" }, "nbformat": 3, "nbformat_minor": 0, @@ -3651,7 +3651,8 @@ "cell_type": "code", "collapsed": false, "input": [ - "df_8.add(df_9, fill_value=0)" + "df_10 = df_8.add(df_9, fill_value=0)\n", + "df_10" ], "language": "python", "metadata": {}, @@ -3707,6 +3708,326 @@ } ], "prompt_number": 71 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like NumPy, pandas supports arithmetic operations between DataFrames and Series.\n", + "\n", + "Match the index of the Series on the DataFrame's columns, broadcasting down the rows:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_8 = df_10.ix[0]\n", + "df_11 = df_10 - ser_8\n", + "df_11" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
0 0.000000 0.000000 0.000000 0.000000
1-0.003930-0.406224-0.530438 0.092224
2-0.111226-0.054178-0.013864 0.396653
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 72, + "text": [ + " a b c d\n", + "0 0.000000 0.000000 0.000000 0.000000\n", + "1 -0.003930 -0.406224 -0.530438 0.092224\n", + "2 -0.111226 -0.054178 -0.013864 0.396653" + ] + } + ], + "prompt_number": 72 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_9 = Series(range(3), index=['a', 'd', 'e'])\n", + "ser_9" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 73, + "text": [ + "a 0\n", + "d 1\n", + "e 2\n", + "dtype: int64" + ] + } + ], + "prompt_number": 73 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_11 - ser_9" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcde
0 0.000000NaNNaN-1.000000NaN
1-0.003930NaNNaN-0.907776NaN
2-0.111226NaNNaN-0.603347NaN
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 74, + "text": [ + " a b c d e\n", + "0 0.000000 NaN NaN -1.000000 NaN\n", + "1 -0.003930 NaN NaN -0.907776 NaN\n", + "2 -0.111226 NaN NaN -0.603347 NaN" + ] + } + ], + "prompt_number": 74 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_10" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
0 0.548814 1.132211 1.323088 0.000114
1 0.544883 0.725987 0.792650 0.092339
2 0.437587 1.078033 1.309223 0.396767
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 75, + "text": [ + " a b c d\n", + "0 0.548814 1.132211 1.323088 0.000114\n", + "1 0.544883 0.725987 0.792650 0.092339\n", + "2 0.437587 1.078033 1.309223 0.396767" + ] + } + ], + "prompt_number": 75 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_10 = Series([100, 200, 300])\n", + "print ser_10" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "0 100\n", + "1 200\n", + "2 300\n", + "dtype: int64\n" + ] + } + ], + "prompt_number": 76 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_10.sub(ser_10, axis=0)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcd
0 -99.451186 -98.867789 -98.676912 -99.999886
1-199.455117-199.274013-199.207350-199.907661
2-299.562413-298.921967-298.690777-299.603233
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 77, + "text": [ + " a b c d\n", + "0 -99.451186 -98.867789 -98.676912 -99.999886\n", + "1 -199.455117 -199.274013 -199.207350 -199.907661\n", + "2 -299.562413 -298.921967 -298.690777 -299.603233" + ] + } + ], + "prompt_number": 77 } ], "metadata": {}