Added Series ranking snippets. Tweaked some of the comments positions relative to the code. Minor tweaks to some snippets.

This commit is contained in:
Donne Martin 2015-02-01 07:33:19 -05:00
parent 91cdd02752
commit 3f5e508eb6

View File

@ -1,7 +1,7 @@
{ {
"metadata": { "metadata": {
"name": "", "name": "",
"signature": "sha256:17491453ae73630f23f856b3c0724fbd00a52c4f53b239a07e340ec3113ea230" "signature": "sha256:2b6aa402b58aa2da8c06d378f19732970903abe573b39f9a7490982d0e2ebcbc"
}, },
"nbformat": 3, "nbformat": 3,
"nbformat_minor": 0, "nbformat_minor": 0,
@ -3331,6 +3331,13 @@
"## Arithmetic and Data Alignment" "## Arithmetic and Data Alignment"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding Series objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3338,7 +3345,7 @@
"np.random.seed(0)\n", "np.random.seed(0)\n",
"ser_6 = Series(np.random.randn(5),\n", "ser_6 = Series(np.random.randn(5),\n",
" index=['a', 'b', 'c', 'd', 'e'])\n", " index=['a', 'b', 'c', 'd', 'e'])\n",
"ser_6\n" "ser_6"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -3387,13 +3394,6 @@
], ],
"prompt_number": 65 "prompt_number": 65
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3455,6 +3455,13 @@
], ],
"prompt_number": 67 "prompt_number": 67
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding DataFrame objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3575,13 +3582,6 @@
], ],
"prompt_number": 69 "prompt_number": 69
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3784,6 +3784,13 @@
], ],
"prompt_number": 72 "prompt_number": 72
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3808,13 +3815,6 @@
], ],
"prompt_number": 73 "prompt_number": 73
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3880,6 +3880,13 @@
], ],
"prompt_number": 74 "prompt_number": 74
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
@ -3946,37 +3953,30 @@
"collapsed": false, "collapsed": false,
"input": [ "input": [
"ser_10 = Series([100, 200, 300])\n", "ser_10 = Series([100, 200, 300])\n",
"print ser_10" "ser_10"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"output_type": "stream", "metadata": {},
"stream": "stdout", "output_type": "pyout",
"prompt_number": 76,
"text": [ "text": [
"0 100\n", "0 100\n",
"1 200\n", "1 200\n",
"2 300\n", "2 300\n",
"dtype: int64\n" "dtype: int64"
] ]
} }
], ],
"prompt_number": 76 "prompt_number": 76
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_11 = df_10.sub(ser_10, axis=0)\n", "df_10.sub(ser_10, axis=0)"
"df_11"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4051,8 +4051,8 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_12 = np.abs(df_11)\n", "df_11 = np.abs(df_11)\n",
"df_12" "df_11"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4073,24 +4073,24 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td> 99.451186</td>\n", " <td> 0.000000</td>\n",
" <td> 98.867789</td>\n", " <td> 0.000000</td>\n",
" <td> 98.676912</td>\n", " <td> 0.000000</td>\n",
" <td> 99.999886</td>\n", " <td> 0.000000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td> 199.455117</td>\n", " <td> 0.003930</td>\n",
" <td> 199.274013</td>\n", " <td> 0.406224</td>\n",
" <td> 199.207350</td>\n", " <td> 0.530438</td>\n",
" <td> 199.907661</td>\n", " <td> 0.092224</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td> 299.562413</td>\n", " <td> 0.111226</td>\n",
" <td> 298.921967</td>\n", " <td> 0.054178</td>\n",
" <td> 298.690777</td>\n", " <td> 0.013864</td>\n",
" <td> 299.603233</td>\n", " <td> 0.396653</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -4100,10 +4100,10 @@
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 78, "prompt_number": 78,
"text": [ "text": [
" a b c d\n", " a b c d\n",
"0 99.451186 98.867789 98.676912 99.999886\n", "0 0.000000 0.000000 0.000000 0.000000\n",
"1 199.455117 199.274013 199.207350 199.907661\n", "1 0.003930 0.406224 0.530438 0.092224\n",
"2 299.562413 298.921967 298.690777 299.603233" "2 0.111226 0.054178 0.013864 0.396653"
] ]
} }
], ],
@ -4121,7 +4121,7 @@
"collapsed": false, "collapsed": false,
"input": [ "input": [
"func_1 = lambda x: x.max() - x.min()\n", "func_1 = lambda x: x.max() - x.min()\n",
"df_12.apply(func_1)" "df_11.apply(func_1)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4131,10 +4131,10 @@
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 79, "prompt_number": 79,
"text": [ "text": [
"a 200.111226\n", "a 0.111226\n",
"b 200.054178\n", "b 0.406224\n",
"c 200.013864\n", "c 0.530438\n",
"d 199.603347\n", "d 0.396653\n",
"dtype: float64" "dtype: float64"
] ]
} }
@ -4152,7 +4152,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_12.apply(func_1, axis=1)" "df_11.apply(func_1, axis=1)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4162,9 +4162,9 @@
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 80, "prompt_number": 80,
"text": [ "text": [
"0 1.322973\n", "0 0.000000\n",
"1 0.700311\n", "1 0.526508\n",
"2 0.912456\n", "2 0.382789\n",
"dtype: float64" "dtype: float64"
] ]
} }
@ -4183,7 +4183,7 @@
"collapsed": false, "collapsed": false,
"input": [ "input": [
"func_2 = lambda x: Series([x.min(), x.max()], index=['min', 'max'])\n", "func_2 = lambda x: Series([x.min(), x.max()], index=['min', 'max'])\n",
"df_12.apply(func_2)" "df_11.apply(func_2)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4204,17 +4204,17 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>min</th>\n", " <th>min</th>\n",
" <td> 99.451186</td>\n", " <td> 0.000000</td>\n",
" <td> 98.867789</td>\n", " <td> 0.000000</td>\n",
" <td> 98.676912</td>\n", " <td> 0.000000</td>\n",
" <td> 99.999886</td>\n", " <td> 0.000000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>max</th>\n", " <th>max</th>\n",
" <td> 299.562413</td>\n", " <td> 0.111226</td>\n",
" <td> 298.921967</td>\n", " <td> 0.406224</td>\n",
" <td> 298.690777</td>\n", " <td> 0.530438</td>\n",
" <td> 299.603233</td>\n", " <td> 0.396653</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -4224,9 +4224,9 @@
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 81, "prompt_number": 81,
"text": [ "text": [
" a b c d\n", " a b c d\n",
"min 99.451186 98.867789 98.676912 99.999886\n", "min 0.000000 0.000000 0.000000 0.000000\n",
"max 299.562413 298.921967 298.690777 299.603233" "max 0.111226 0.406224 0.530438 0.396653"
] ]
} }
], ],
@ -4244,7 +4244,7 @@
"collapsed": false, "collapsed": false,
"input": [ "input": [
"func_3 = lambda x: '%.2f' %x\n", "func_3 = lambda x: '%.2f' %x\n",
"df_12.applymap(func_3)" "df_11.applymap(func_3)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4265,24 +4265,24 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td> 99.45</td>\n", " <td> 0.00</td>\n",
" <td> 98.87</td>\n", " <td> 0.00</td>\n",
" <td> 98.68</td>\n", " <td> 0.00</td>\n",
" <td> 100.00</td>\n", " <td> 0.00</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td> 199.46</td>\n", " <td> 0.00</td>\n",
" <td> 199.27</td>\n", " <td> 0.41</td>\n",
" <td> 199.21</td>\n", " <td> 0.53</td>\n",
" <td> 199.91</td>\n", " <td> 0.09</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td> 299.56</td>\n", " <td> 0.11</td>\n",
" <td> 298.92</td>\n", " <td> 0.05</td>\n",
" <td> 298.69</td>\n", " <td> 0.01</td>\n",
" <td> 299.60</td>\n", " <td> 0.40</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -4292,10 +4292,10 @@
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 82, "prompt_number": 82,
"text": [ "text": [
" a b c d\n", " a b c d\n",
"0 99.45 98.87 98.68 100.00\n", "0 0.00 0.00 0.00 0.00\n",
"1 199.46 199.27 199.21 199.91\n", "1 0.00 0.41 0.53 0.09\n",
"2 299.56 298.92 298.69 299.60" "2 0.11 0.05 0.01 0.40"
] ]
} }
], ],
@ -4312,7 +4312,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_12['a'].map(func_3)" "df_11['a'].map(func_3)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4322,9 +4322,9 @@
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 83, "prompt_number": 83,
"text": [ "text": [
"0 99.45\n", "0 0.00\n",
"1 199.46\n", "1 0.00\n",
"2 299.56\n", "2 0.11\n",
"Name: a, dtype: object" "Name: a, dtype: object"
] ]
} }
@ -4350,7 +4350,7 @@
{ {
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 114, "prompt_number": 84,
"text": [ "text": [
"fo 100\n", "fo 100\n",
"br 200\n", "br 200\n",
@ -4360,7 +4360,7 @@
] ]
} }
], ],
"prompt_number": 114 "prompt_number": 84
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -4381,7 +4381,7 @@
{ {
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 115, "prompt_number": 85,
"text": [ "text": [
"br 200\n", "br 200\n",
"bz 300\n", "bz 300\n",
@ -4391,7 +4391,7 @@
] ]
} }
], ],
"prompt_number": 115 "prompt_number": 85
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -4412,7 +4412,7 @@
{ {
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 132, "prompt_number": 86,
"text": [ "text": [
"fo 100\n", "fo 100\n",
"br 200\n", "br 200\n",
@ -4422,16 +4422,16 @@
] ]
} }
], ],
"prompt_number": 132 "prompt_number": 86
}, },
{ {
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_13 = DataFrame(np.arange(12).reshape((3, 4)),\n", "df_12 = DataFrame(np.arange(12).reshape((3, 4)),\n",
" index=['three', 'one', 'two'],\n", " index=['three', 'one', 'two'],\n",
" columns=['c', 'a', 'b', 'd'])\n", " columns=['c', 'a', 'b', 'd'])\n",
"df_13" "df_12"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4477,7 +4477,7 @@
], ],
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 128, "prompt_number": 87,
"text": [ "text": [
" c a b d\n", " c a b d\n",
"three 0 1 2 3\n", "three 0 1 2 3\n",
@ -4486,7 +4486,7 @@
] ]
} }
], ],
"prompt_number": 128 "prompt_number": 87
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -4499,7 +4499,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_13.sort_index()" "df_12.sort_index()"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4545,7 +4545,7 @@
], ],
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 129, "prompt_number": 88,
"text": [ "text": [
" c a b d\n", " c a b d\n",
"one 4 5 6 7\n", "one 4 5 6 7\n",
@ -4554,7 +4554,7 @@
] ]
} }
], ],
"prompt_number": 129 "prompt_number": 88
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -4567,7 +4567,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_13.sort_index(axis=1, ascending=False)" "df_12.sort_index(axis=1, ascending=False)"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4613,7 +4613,7 @@
], ],
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 131, "prompt_number": 89,
"text": [ "text": [
" d c b a\n", " d c b a\n",
"three 3 0 2 1\n", "three 3 0 2 1\n",
@ -4622,7 +4622,7 @@
] ]
} }
], ],
"prompt_number": 131 "prompt_number": 89
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -4635,7 +4635,7 @@
"cell_type": "code", "cell_type": "code",
"collapsed": false, "collapsed": false,
"input": [ "input": [
"df_13.sort_index(by=['d', 'c'])" "df_12.sort_index(by=['d', 'c'])"
], ],
"language": "python", "language": "python",
"metadata": {}, "metadata": {},
@ -4681,7 +4681,7 @@
], ],
"metadata": {}, "metadata": {},
"output_type": "pyout", "output_type": "pyout",
"prompt_number": 134, "prompt_number": 90,
"text": [ "text": [
" c a b d\n", " c a b d\n",
"three 0 1 2 3\n", "three 0 1 2 3\n",
@ -4690,7 +4690,142 @@
] ]
} }
], ],
"prompt_number": 134 "prompt_number": 90
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ranking is similar to numpy.argsort except that ties are broken by assigning each group the mean rank:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11 = Series([7, -5, 7, 4, 2, 0, 4, 7])\n",
"ser_11 = ser_11.order()\n",
"ser_11"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 91,
"text": [
"1 -5\n",
"5 0\n",
"4 2\n",
"3 4\n",
"6 4\n",
"0 7\n",
"2 7\n",
"7 7\n",
"dtype: int64"
]
}
],
"prompt_number": 91
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11.rank()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 92,
"text": [
"1 1.0\n",
"5 2.0\n",
"4 3.0\n",
"3 4.5\n",
"6 4.5\n",
"0 7.0\n",
"2 7.0\n",
"7 7.0\n",
"dtype: float64"
]
}
],
"prompt_number": 92
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rank a Series according to when they appear in the data:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11.rank(method='first')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 93,
"text": [
"1 1\n",
"5 2\n",
"4 3\n",
"3 4\n",
"6 5\n",
"0 6\n",
"2 7\n",
"7 8\n",
"dtype: float64"
]
}
],
"prompt_number": 93
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rank a Series in descending order, using the maximum rank for the group:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11.rank(ascending=False, method='max')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 94,
"text": [
"1 8\n",
"5 7\n",
"4 6\n",
"3 5\n",
"6 5\n",
"0 3\n",
"2 3\n",
"7 3\n",
"dtype: float64"
]
}
],
"prompt_number": 94
} }
], ],
"metadata": {} "metadata": {}