Added Series ranking snippets. Tweaked some of the comments positions relative to the code. Minor tweaks to some snippets.

This commit is contained in:
Donne Martin 2015-02-01 07:33:19 -05:00
parent 91cdd02752
commit 3f5e508eb6

View File

@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
"signature": "sha256:17491453ae73630f23f856b3c0724fbd00a52c4f53b239a07e340ec3113ea230"
"signature": "sha256:2b6aa402b58aa2da8c06d378f19732970903abe573b39f9a7490982d0e2ebcbc"
},
"nbformat": 3,
"nbformat_minor": 0,
@ -3331,6 +3331,13 @@
"## Arithmetic and Data Alignment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding Series objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3338,7 +3345,7 @@
"np.random.seed(0)\n",
"ser_6 = Series(np.random.randn(5),\n",
" index=['a', 'b', 'c', 'd', 'e'])\n",
"ser_6\n"
"ser_6"
],
"language": "python",
"metadata": {},
@ -3387,13 +3394,6 @@
],
"prompt_number": 65
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3455,6 +3455,13 @@
],
"prompt_number": 67
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding DataFrame objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3575,13 +3582,6 @@
],
"prompt_number": 69
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Adding objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3784,6 +3784,13 @@
],
"prompt_number": 72
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3808,13 +3815,6 @@
],
"prompt_number": 73
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3880,6 +3880,13 @@
],
"prompt_number": 74
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
]
},
{
"cell_type": "code",
"collapsed": false,
@ -3946,37 +3953,30 @@
"collapsed": false,
"input": [
"ser_10 = Series([100, 200, 300])\n",
"print ser_10"
"ser_10"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"metadata": {},
"output_type": "pyout",
"prompt_number": 76,
"text": [
"0 100\n",
"1 200\n",
"2 300\n",
"dtype: int64\n"
"dtype: int64"
]
}
],
"prompt_number": 76
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_11 = df_10.sub(ser_10, axis=0)\n",
"df_11"
"df_10.sub(ser_10, axis=0)"
],
"language": "python",
"metadata": {},
@ -4051,8 +4051,8 @@
"cell_type": "code",
"collapsed": false,
"input": [
"df_12 = np.abs(df_11)\n",
"df_12"
"df_11 = np.abs(df_11)\n",
"df_11"
],
"language": "python",
"metadata": {},
@ -4073,24 +4073,24 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 99.451186</td>\n",
" <td> 98.867789</td>\n",
" <td> 98.676912</td>\n",
" <td> 99.999886</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 199.455117</td>\n",
" <td> 199.274013</td>\n",
" <td> 199.207350</td>\n",
" <td> 199.907661</td>\n",
" <td> 0.003930</td>\n",
" <td> 0.406224</td>\n",
" <td> 0.530438</td>\n",
" <td> 0.092224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 299.562413</td>\n",
" <td> 298.921967</td>\n",
" <td> 298.690777</td>\n",
" <td> 299.603233</td>\n",
" <td> 0.111226</td>\n",
" <td> 0.054178</td>\n",
" <td> 0.013864</td>\n",
" <td> 0.396653</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -4100,10 +4100,10 @@
"output_type": "pyout",
"prompt_number": 78,
"text": [
" a b c d\n",
"0 99.451186 98.867789 98.676912 99.999886\n",
"1 199.455117 199.274013 199.207350 199.907661\n",
"2 299.562413 298.921967 298.690777 299.603233"
" a b c d\n",
"0 0.000000 0.000000 0.000000 0.000000\n",
"1 0.003930 0.406224 0.530438 0.092224\n",
"2 0.111226 0.054178 0.013864 0.396653"
]
}
],
@ -4121,7 +4121,7 @@
"collapsed": false,
"input": [
"func_1 = lambda x: x.max() - x.min()\n",
"df_12.apply(func_1)"
"df_11.apply(func_1)"
],
"language": "python",
"metadata": {},
@ -4131,10 +4131,10 @@
"output_type": "pyout",
"prompt_number": 79,
"text": [
"a 200.111226\n",
"b 200.054178\n",
"c 200.013864\n",
"d 199.603347\n",
"a 0.111226\n",
"b 0.406224\n",
"c 0.530438\n",
"d 0.396653\n",
"dtype: float64"
]
}
@ -4152,7 +4152,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"df_12.apply(func_1, axis=1)"
"df_11.apply(func_1, axis=1)"
],
"language": "python",
"metadata": {},
@ -4162,9 +4162,9 @@
"output_type": "pyout",
"prompt_number": 80,
"text": [
"0 1.322973\n",
"1 0.700311\n",
"2 0.912456\n",
"0 0.000000\n",
"1 0.526508\n",
"2 0.382789\n",
"dtype: float64"
]
}
@ -4183,7 +4183,7 @@
"collapsed": false,
"input": [
"func_2 = lambda x: Series([x.min(), x.max()], index=['min', 'max'])\n",
"df_12.apply(func_2)"
"df_11.apply(func_2)"
],
"language": "python",
"metadata": {},
@ -4204,17 +4204,17 @@
" <tbody>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 99.451186</td>\n",
" <td> 98.867789</td>\n",
" <td> 98.676912</td>\n",
" <td> 99.999886</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 299.562413</td>\n",
" <td> 298.921967</td>\n",
" <td> 298.690777</td>\n",
" <td> 299.603233</td>\n",
" <td> 0.111226</td>\n",
" <td> 0.406224</td>\n",
" <td> 0.530438</td>\n",
" <td> 0.396653</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -4224,9 +4224,9 @@
"output_type": "pyout",
"prompt_number": 81,
"text": [
" a b c d\n",
"min 99.451186 98.867789 98.676912 99.999886\n",
"max 299.562413 298.921967 298.690777 299.603233"
" a b c d\n",
"min 0.000000 0.000000 0.000000 0.000000\n",
"max 0.111226 0.406224 0.530438 0.396653"
]
}
],
@ -4244,7 +4244,7 @@
"collapsed": false,
"input": [
"func_3 = lambda x: '%.2f' %x\n",
"df_12.applymap(func_3)"
"df_11.applymap(func_3)"
],
"language": "python",
"metadata": {},
@ -4265,24 +4265,24 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 99.45</td>\n",
" <td> 98.87</td>\n",
" <td> 98.68</td>\n",
" <td> 100.00</td>\n",
" <td> 0.00</td>\n",
" <td> 0.00</td>\n",
" <td> 0.00</td>\n",
" <td> 0.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 199.46</td>\n",
" <td> 199.27</td>\n",
" <td> 199.21</td>\n",
" <td> 199.91</td>\n",
" <td> 0.00</td>\n",
" <td> 0.41</td>\n",
" <td> 0.53</td>\n",
" <td> 0.09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 299.56</td>\n",
" <td> 298.92</td>\n",
" <td> 298.69</td>\n",
" <td> 299.60</td>\n",
" <td> 0.11</td>\n",
" <td> 0.05</td>\n",
" <td> 0.01</td>\n",
" <td> 0.40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -4292,10 +4292,10 @@
"output_type": "pyout",
"prompt_number": 82,
"text": [
" a b c d\n",
"0 99.45 98.87 98.68 100.00\n",
"1 199.46 199.27 199.21 199.91\n",
"2 299.56 298.92 298.69 299.60"
" a b c d\n",
"0 0.00 0.00 0.00 0.00\n",
"1 0.00 0.41 0.53 0.09\n",
"2 0.11 0.05 0.01 0.40"
]
}
],
@ -4312,7 +4312,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"df_12['a'].map(func_3)"
"df_11['a'].map(func_3)"
],
"language": "python",
"metadata": {},
@ -4322,9 +4322,9 @@
"output_type": "pyout",
"prompt_number": 83,
"text": [
"0 99.45\n",
"1 199.46\n",
"2 299.56\n",
"0 0.00\n",
"1 0.00\n",
"2 0.11\n",
"Name: a, dtype: object"
]
}
@ -4350,7 +4350,7 @@
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 114,
"prompt_number": 84,
"text": [
"fo 100\n",
"br 200\n",
@ -4360,7 +4360,7 @@
]
}
],
"prompt_number": 114
"prompt_number": 84
},
{
"cell_type": "markdown",
@ -4381,7 +4381,7 @@
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 115,
"prompt_number": 85,
"text": [
"br 200\n",
"bz 300\n",
@ -4391,7 +4391,7 @@
]
}
],
"prompt_number": 115
"prompt_number": 85
},
{
"cell_type": "markdown",
@ -4412,7 +4412,7 @@
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 132,
"prompt_number": 86,
"text": [
"fo 100\n",
"br 200\n",
@ -4422,16 +4422,16 @@
]
}
],
"prompt_number": 132
"prompt_number": 86
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_13 = DataFrame(np.arange(12).reshape((3, 4)),\n",
"df_12 = DataFrame(np.arange(12).reshape((3, 4)),\n",
" index=['three', 'one', 'two'],\n",
" columns=['c', 'a', 'b', 'd'])\n",
"df_13"
"df_12"
],
"language": "python",
"metadata": {},
@ -4477,7 +4477,7 @@
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 128,
"prompt_number": 87,
"text": [
" c a b d\n",
"three 0 1 2 3\n",
@ -4486,7 +4486,7 @@
]
}
],
"prompt_number": 128
"prompt_number": 87
},
{
"cell_type": "markdown",
@ -4499,7 +4499,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"df_13.sort_index()"
"df_12.sort_index()"
],
"language": "python",
"metadata": {},
@ -4545,7 +4545,7 @@
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 129,
"prompt_number": 88,
"text": [
" c a b d\n",
"one 4 5 6 7\n",
@ -4554,7 +4554,7 @@
]
}
],
"prompt_number": 129
"prompt_number": 88
},
{
"cell_type": "markdown",
@ -4567,7 +4567,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"df_13.sort_index(axis=1, ascending=False)"
"df_12.sort_index(axis=1, ascending=False)"
],
"language": "python",
"metadata": {},
@ -4613,7 +4613,7 @@
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 131,
"prompt_number": 89,
"text": [
" d c b a\n",
"three 3 0 2 1\n",
@ -4622,7 +4622,7 @@
]
}
],
"prompt_number": 131
"prompt_number": 89
},
{
"cell_type": "markdown",
@ -4635,7 +4635,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"df_13.sort_index(by=['d', 'c'])"
"df_12.sort_index(by=['d', 'c'])"
],
"language": "python",
"metadata": {},
@ -4681,7 +4681,7 @@
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 134,
"prompt_number": 90,
"text": [
" c a b d\n",
"three 0 1 2 3\n",
@ -4690,7 +4690,142 @@
]
}
],
"prompt_number": 134
"prompt_number": 90
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Ranking is similar to numpy.argsort except that ties are broken by assigning each group the mean rank:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11 = Series([7, -5, 7, 4, 2, 0, 4, 7])\n",
"ser_11 = ser_11.order()\n",
"ser_11"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 91,
"text": [
"1 -5\n",
"5 0\n",
"4 2\n",
"3 4\n",
"6 4\n",
"0 7\n",
"2 7\n",
"7 7\n",
"dtype: int64"
]
}
],
"prompt_number": 91
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11.rank()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 92,
"text": [
"1 1.0\n",
"5 2.0\n",
"4 3.0\n",
"3 4.5\n",
"6 4.5\n",
"0 7.0\n",
"2 7.0\n",
"7 7.0\n",
"dtype: float64"
]
}
],
"prompt_number": 92
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rank a Series according to when they appear in the data:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11.rank(method='first')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 93,
"text": [
"1 1\n",
"5 2\n",
"4 3\n",
"3 4\n",
"6 5\n",
"0 6\n",
"2 7\n",
"7 8\n",
"dtype: float64"
]
}
],
"prompt_number": 93
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rank a Series in descending order, using the maximum rank for the group:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ser_11.rank(ascending=False, method='max')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 94,
"text": [
"1 8\n",
"5 7\n",
"4 6\n",
"3 5\n",
"6 5\n",
"0 3\n",
"2 3\n",
"7 3\n",
"dtype: float64"
]
}
],
"prompt_number": 94
}
],
"metadata": {}