Added Series ranking snippets. Tweaked some of the comments positions relative to the code. Minor tweaks to some snippets.

2024-03-22 13:30:56 +08:00 · 2015-02-01 07:33:19 -05:00 · 2015-02-01 07:33:19 -05:00 · 3f5e508eb6
commit 3f5e508eb6
parent 91cdd02752
1 changed files with 250 additions and 115 deletions
--- a/pandas/pandas.ipynb
+++ b/pandas/pandas.ipynb
@ -1,7 +1,7 @@
 {
 "metadata": {
  "name": "",
-  "signature": "sha256:17491453ae73630f23f856b3c0724fbd00a52c4f53b239a07e340ec3113ea230"
+  "signature": "sha256:2b6aa402b58aa2da8c06d378f19732970903abe573b39f9a7490982d0e2ebcbc"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
@ -3331,6 +3331,13 @@
      "## Arithmetic and Data Alignment"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Adding Series objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3338,7 +3345,7 @@
      "np.random.seed(0)\n",
      "ser_6 = Series(np.random.randn(5),\n",
      "               index=['a', 'b', 'c', 'd', 'e'])\n",
-      "ser_6\n"
+      "ser_6"
     ],
     "language": "python",
     "metadata": {},
@ -3387,13 +3394,6 @@
     ],
     "prompt_number": 65
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Adding objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3455,6 +3455,13 @@
     ],
     "prompt_number": 67
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Adding DataFrame objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3575,13 +3582,6 @@
     ],
     "prompt_number": 69
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Adding objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3784,6 +3784,13 @@
     ],
     "prompt_number": 72
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3808,13 +3815,6 @@
     ],
     "prompt_number": 73
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3880,6 +3880,13 @@
     ],
     "prompt_number": 74
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
@ -3946,37 +3953,30 @@
     "collapsed": false,
     "input": [
      "ser_10 = Series([100, 200, 300])\n",
-      "print ser_10"
+      "ser_10"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
-       "output_type": "stream",
+       "metadata": {},
-       "stream": "stdout",
+       "output_type": "pyout",
       "prompt_number": 76,
       "text": [
        "0    100\n",
        "1    200\n",
        "2    300\n",
-        "dtype: int64\n"
+        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 76
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_11 = df_10.sub(ser_10, axis=0)\n",
+      "df_10.sub(ser_10, axis=0)"
      "df_11"
     ],
     "language": "python",
     "metadata": {},
@ -4051,8 +4051,8 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_12 = np.abs(df_11)\n",
+      "df_11 = np.abs(df_11)\n",
-      "df_12"
+      "df_11"
     ],
     "language": "python",
     "metadata": {},
@ -4073,24 +4073,24 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-        "      <td>  99.451186</td>\n",
+        "      <td> 0.000000</td>\n",
-        "      <td>  98.867789</td>\n",
+        "      <td> 0.000000</td>\n",
-        "      <td>  98.676912</td>\n",
+        "      <td> 0.000000</td>\n",
-        "      <td>  99.999886</td>\n",
+        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-        "      <td> 199.455117</td>\n",
+        "      <td> 0.003930</td>\n",
-        "      <td> 199.274013</td>\n",
+        "      <td> 0.406224</td>\n",
-        "      <td> 199.207350</td>\n",
+        "      <td> 0.530438</td>\n",
-        "      <td> 199.907661</td>\n",
+        "      <td> 0.092224</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-        "      <td> 299.562413</td>\n",
+        "      <td> 0.111226</td>\n",
-        "      <td> 298.921967</td>\n",
+        "      <td> 0.054178</td>\n",
-        "      <td> 298.690777</td>\n",
+        "      <td> 0.013864</td>\n",
-        "      <td> 299.603233</td>\n",
+        "      <td> 0.396653</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@ -4100,10 +4100,10 @@
       "output_type": "pyout",
       "prompt_number": 78,
       "text": [
-        "            a           b           c           d\n",
+        "          a         b         c         d\n",
-        "0   99.451186   98.867789   98.676912   99.999886\n",
+        "0  0.000000  0.000000  0.000000  0.000000\n",
-        "1  199.455117  199.274013  199.207350  199.907661\n",
+        "1  0.003930  0.406224  0.530438  0.092224\n",
-        "2  299.562413  298.921967  298.690777  299.603233"
+        "2  0.111226  0.054178  0.013864  0.396653"
       ]
      }
     ],
@ -4121,7 +4121,7 @@
     "collapsed": false,
     "input": [
      "func_1 = lambda x: x.max() - x.min()\n",
-      "df_12.apply(func_1)"
+      "df_11.apply(func_1)"
     ],
     "language": "python",
     "metadata": {},
@ -4131,10 +4131,10 @@
       "output_type": "pyout",
       "prompt_number": 79,
       "text": [
-        "a    200.111226\n",
+        "a    0.111226\n",
-        "b    200.054178\n",
+        "b    0.406224\n",
-        "c    200.013864\n",
+        "c    0.530438\n",
-        "d    199.603347\n",
+        "d    0.396653\n",
        "dtype: float64"
       ]
      }
@ -4152,7 +4152,7 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_12.apply(func_1, axis=1)"
+      "df_11.apply(func_1, axis=1)"
     ],
     "language": "python",
     "metadata": {},
@ -4162,9 +4162,9 @@
       "output_type": "pyout",
       "prompt_number": 80,
       "text": [
-        "0    1.322973\n",
+        "0    0.000000\n",
-        "1    0.700311\n",
+        "1    0.526508\n",
-        "2    0.912456\n",
+        "2    0.382789\n",
        "dtype: float64"
       ]
      }
@ -4183,7 +4183,7 @@
     "collapsed": false,
     "input": [
      "func_2 = lambda x: Series([x.min(), x.max()], index=['min', 'max'])\n",
-      "df_12.apply(func_2)"
+      "df_11.apply(func_2)"
     ],
     "language": "python",
     "metadata": {},
@ -4204,17 +4204,17 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>min</th>\n",
-        "      <td>  99.451186</td>\n",
+        "      <td> 0.000000</td>\n",
-        "      <td>  98.867789</td>\n",
+        "      <td> 0.000000</td>\n",
-        "      <td>  98.676912</td>\n",
+        "      <td> 0.000000</td>\n",
-        "      <td>  99.999886</td>\n",
+        "      <td> 0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>max</th>\n",
-        "      <td> 299.562413</td>\n",
+        "      <td> 0.111226</td>\n",
-        "      <td> 298.921967</td>\n",
+        "      <td> 0.406224</td>\n",
-        "      <td> 298.690777</td>\n",
+        "      <td> 0.530438</td>\n",
-        "      <td> 299.603233</td>\n",
+        "      <td> 0.396653</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@ -4224,9 +4224,9 @@
       "output_type": "pyout",
       "prompt_number": 81,
       "text": [
-        "              a           b           c           d\n",
+        "            a         b         c         d\n",
-        "min   99.451186   98.867789   98.676912   99.999886\n",
+        "min  0.000000  0.000000  0.000000  0.000000\n",
-        "max  299.562413  298.921967  298.690777  299.603233"
+        "max  0.111226  0.406224  0.530438  0.396653"
       ]
      }
     ],
@ -4244,7 +4244,7 @@
     "collapsed": false,
     "input": [
      "func_3 = lambda x: '%.2f' %x\n",
-      "df_12.applymap(func_3)"
+      "df_11.applymap(func_3)"
     ],
     "language": "python",
     "metadata": {},
@ -4265,24 +4265,24 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-        "      <td>  99.45</td>\n",
+        "      <td> 0.00</td>\n",
-        "      <td>  98.87</td>\n",
+        "      <td> 0.00</td>\n",
-        "      <td>  98.68</td>\n",
+        "      <td> 0.00</td>\n",
-        "      <td> 100.00</td>\n",
+        "      <td> 0.00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-        "      <td> 199.46</td>\n",
+        "      <td> 0.00</td>\n",
-        "      <td> 199.27</td>\n",
+        "      <td> 0.41</td>\n",
-        "      <td> 199.21</td>\n",
+        "      <td> 0.53</td>\n",
-        "      <td> 199.91</td>\n",
+        "      <td> 0.09</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-        "      <td> 299.56</td>\n",
+        "      <td> 0.11</td>\n",
-        "      <td> 298.92</td>\n",
+        "      <td> 0.05</td>\n",
-        "      <td> 298.69</td>\n",
+        "      <td> 0.01</td>\n",
-        "      <td> 299.60</td>\n",
+        "      <td> 0.40</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@ -4292,10 +4292,10 @@
       "output_type": "pyout",
       "prompt_number": 82,
       "text": [
-        "        a       b       c       d\n",
+        "      a     b     c     d\n",
-        "0   99.45   98.87   98.68  100.00\n",
+        "0  0.00  0.00  0.00  0.00\n",
-        "1  199.46  199.27  199.21  199.91\n",
+        "1  0.00  0.41  0.53  0.09\n",
-        "2  299.56  298.92  298.69  299.60"
+        "2  0.11  0.05  0.01  0.40"
       ]
      }
     ],
@ -4312,7 +4312,7 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_12['a'].map(func_3)"
+      "df_11['a'].map(func_3)"
     ],
     "language": "python",
     "metadata": {},
@ -4322,9 +4322,9 @@
       "output_type": "pyout",
       "prompt_number": 83,
       "text": [
-        "0     99.45\n",
+        "0    0.00\n",
-        "1    199.46\n",
+        "1    0.00\n",
-        "2    299.56\n",
+        "2    0.11\n",
        "Name: a, dtype: object"
       ]
      }
@ -4350,7 +4350,7 @@
      {
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 114,
+       "prompt_number": 84,
       "text": [
        "fo    100\n",
        "br    200\n",
@ -4360,7 +4360,7 @@
       ]
      }
     ],
-     "prompt_number": 114
+     "prompt_number": 84
    },
    {
     "cell_type": "markdown",
@ -4381,7 +4381,7 @@
      {
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 115,
+       "prompt_number": 85,
       "text": [
        "br    200\n",
        "bz    300\n",
@ -4391,7 +4391,7 @@
       ]
      }
     ],
-     "prompt_number": 115
+     "prompt_number": 85
    },
    {
     "cell_type": "markdown",
@ -4412,7 +4412,7 @@
      {
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 132,
+       "prompt_number": 86,
       "text": [
        "fo    100\n",
        "br    200\n",
@ -4422,16 +4422,16 @@
       ]
      }
     ],
-     "prompt_number": 132
+     "prompt_number": 86
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_13 = DataFrame(np.arange(12).reshape((3, 4)),\n",
+      "df_12 = DataFrame(np.arange(12).reshape((3, 4)),\n",
      "                  index=['three', 'one', 'two'],\n",
      "                  columns=['c', 'a', 'b', 'd'])\n",
-      "df_13"
+      "df_12"
     ],
     "language": "python",
     "metadata": {},
@ -4477,7 +4477,7 @@
       ],
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 128,
+       "prompt_number": 87,
       "text": [
        "       c  a   b   d\n",
        "three  0  1   2   3\n",
@ -4486,7 +4486,7 @@
       ]
      }
     ],
-     "prompt_number": 128
+     "prompt_number": 87
    },
    {
     "cell_type": "markdown",
@ -4499,7 +4499,7 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_13.sort_index()"
+      "df_12.sort_index()"
     ],
     "language": "python",
     "metadata": {},
@ -4545,7 +4545,7 @@
       ],
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 129,
+       "prompt_number": 88,
       "text": [
        "       c  a   b   d\n",
        "one    4  5   6   7\n",
@ -4554,7 +4554,7 @@
       ]
      }
     ],
-     "prompt_number": 129
+     "prompt_number": 88
    },
    {
     "cell_type": "markdown",
@ -4567,7 +4567,7 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_13.sort_index(axis=1, ascending=False)"
+      "df_12.sort_index(axis=1, ascending=False)"
     ],
     "language": "python",
     "metadata": {},
@ -4613,7 +4613,7 @@
       ],
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 131,
+       "prompt_number": 89,
       "text": [
        "        d  c   b  a\n",
        "three   3  0   2  1\n",
@ -4622,7 +4622,7 @@
       ]
      }
     ],
-     "prompt_number": 131
+     "prompt_number": 89
    },
    {
     "cell_type": "markdown",
@ -4635,7 +4635,7 @@
     "cell_type": "code",
     "collapsed": false,
     "input": [
-      "df_13.sort_index(by=['d', 'c'])"
+      "df_12.sort_index(by=['d', 'c'])"
     ],
     "language": "python",
     "metadata": {},
@ -4681,7 +4681,7 @@
       ],
       "metadata": {},
       "output_type": "pyout",
-       "prompt_number": 134,
+       "prompt_number": 90,
       "text": [
        "       c  a   b   d\n",
        "three  0  1   2   3\n",
@ -4690,7 +4690,142 @@
       ]
      }
     ],
-     "prompt_number": 134
+     "prompt_number": 90
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Ranking is similar to numpy.argsort except that ties are broken by assigning each group the mean rank:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_11 = Series([7, -5, 7, 4, 2, 0, 4, 7])\n",
      "ser_11 = ser_11.order()\n",
      "ser_11"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 91,
       "text": [
        "1   -5\n",
        "5    0\n",
        "4    2\n",
        "3    4\n",
        "6    4\n",
        "0    7\n",
        "2    7\n",
        "7    7\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 91
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_11.rank()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 92,
       "text": [
        "1    1.0\n",
        "5    2.0\n",
        "4    3.0\n",
        "3    4.5\n",
        "6    4.5\n",
        "0    7.0\n",
        "2    7.0\n",
        "7    7.0\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 92
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Rank a Series according to when they appear in the data:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_11.rank(method='first')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 93,
       "text": [
        "1    1\n",
        "5    2\n",
        "4    3\n",
        "3    4\n",
        "6    5\n",
        "0    6\n",
        "2    7\n",
        "7    8\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 93
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Rank a Series in descending order, using the maximum rank for the group:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_11.rank(ascending=False, method='max')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 94,
       "text": [
        "1    8\n",
        "5    7\n",
        "4    6\n",
        "3    5\n",
        "6    5\n",
        "0    3\n",
        "2    3\n",
        "7    3\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 94
    }
   ],
   "metadata": {}