diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb
index ee247f8..6c8c4fd 100644
--- a/pandas/pandas.ipynb
+++ b/pandas/pandas.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:17491453ae73630f23f856b3c0724fbd00a52c4f53b239a07e340ec3113ea230"
+ "signature": "sha256:2b6aa402b58aa2da8c06d378f19732970903abe573b39f9a7490982d0e2ebcbc"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -3331,6 +3331,13 @@
"## Arithmetic and Data Alignment"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Adding Series objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
+ ]
+ },
{
"cell_type": "code",
"collapsed": false,
@@ -3338,7 +3345,7 @@
"np.random.seed(0)\n",
"ser_6 = Series(np.random.randn(5),\n",
" index=['a', 'b', 'c', 'd', 'e'])\n",
- "ser_6\n"
+ "ser_6"
],
"language": "python",
"metadata": {},
@@ -3387,13 +3394,6 @@
],
"prompt_number": 65
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Adding objects results in the union of index pairs if the pairs are not the same, resulting in NaN for indices that do not overlap:"
- ]
- },
{
"cell_type": "code",
"collapsed": false,
@@ -3455,6 +3455,13 @@
],
"prompt_number": 67
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Adding DataFrame objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
+ ]
+ },
{
"cell_type": "code",
"collapsed": false,
@@ -3575,13 +3582,6 @@
],
"prompt_number": 69
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Adding objects results in the union of index pairs for rows and columns if the pairs are not the same, resulting in NaN for indices that do not overlap:"
- ]
- },
{
"cell_type": "code",
"collapsed": false,
@@ -3784,6 +3784,13 @@
],
"prompt_number": 72
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
+ ]
+ },
{
"cell_type": "code",
"collapsed": false,
@@ -3808,13 +3815,6 @@
],
"prompt_number": 73
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Match the index of the Series on the DataFrame's columns, broadcasting down the rows and union the indices that do not match:"
- ]
- },
{
"cell_type": "code",
"collapsed": false,
@@ -3880,6 +3880,13 @@
],
"prompt_number": 74
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
+ ]
+ },
{
"cell_type": "code",
"collapsed": false,
@@ -3946,37 +3953,30 @@
"collapsed": false,
"input": [
"ser_10 = Series([100, 200, 300])\n",
- "print ser_10"
+ "ser_10"
],
"language": "python",
"metadata": {},
"outputs": [
{
- "output_type": "stream",
- "stream": "stdout",
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 76,
"text": [
"0 100\n",
"1 200\n",
"2 300\n",
- "dtype: int64\n"
+ "dtype: int64"
]
}
],
"prompt_number": 76
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Broadcast over the columns and match the rows (axis=0) by using an arithmetic method:"
- ]
- },
{
"cell_type": "code",
"collapsed": false,
"input": [
- "df_11 = df_10.sub(ser_10, axis=0)\n",
- "df_11"
+ "df_10.sub(ser_10, axis=0)"
],
"language": "python",
"metadata": {},
@@ -4051,8 +4051,8 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "df_12 = np.abs(df_11)\n",
- "df_12"
+ "df_11 = np.abs(df_11)\n",
+ "df_11"
],
"language": "python",
"metadata": {},
@@ -4073,24 +4073,24 @@
"
\n",
" \n",
" 0 | \n",
- " 99.451186 | \n",
- " 98.867789 | \n",
- " 98.676912 | \n",
- " 99.999886 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
" 1 | \n",
- " 199.455117 | \n",
- " 199.274013 | \n",
- " 199.207350 | \n",
- " 199.907661 | \n",
+ " 0.003930 | \n",
+ " 0.406224 | \n",
+ " 0.530438 | \n",
+ " 0.092224 | \n",
"
\n",
" \n",
" 2 | \n",
- " 299.562413 | \n",
- " 298.921967 | \n",
- " 298.690777 | \n",
- " 299.603233 | \n",
+ " 0.111226 | \n",
+ " 0.054178 | \n",
+ " 0.013864 | \n",
+ " 0.396653 | \n",
"
\n",
" \n",
"\n",
@@ -4100,10 +4100,10 @@
"output_type": "pyout",
"prompt_number": 78,
"text": [
- " a b c d\n",
- "0 99.451186 98.867789 98.676912 99.999886\n",
- "1 199.455117 199.274013 199.207350 199.907661\n",
- "2 299.562413 298.921967 298.690777 299.603233"
+ " a b c d\n",
+ "0 0.000000 0.000000 0.000000 0.000000\n",
+ "1 0.003930 0.406224 0.530438 0.092224\n",
+ "2 0.111226 0.054178 0.013864 0.396653"
]
}
],
@@ -4121,7 +4121,7 @@
"collapsed": false,
"input": [
"func_1 = lambda x: x.max() - x.min()\n",
- "df_12.apply(func_1)"
+ "df_11.apply(func_1)"
],
"language": "python",
"metadata": {},
@@ -4131,10 +4131,10 @@
"output_type": "pyout",
"prompt_number": 79,
"text": [
- "a 200.111226\n",
- "b 200.054178\n",
- "c 200.013864\n",
- "d 199.603347\n",
+ "a 0.111226\n",
+ "b 0.406224\n",
+ "c 0.530438\n",
+ "d 0.396653\n",
"dtype: float64"
]
}
@@ -4152,7 +4152,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "df_12.apply(func_1, axis=1)"
+ "df_11.apply(func_1, axis=1)"
],
"language": "python",
"metadata": {},
@@ -4162,9 +4162,9 @@
"output_type": "pyout",
"prompt_number": 80,
"text": [
- "0 1.322973\n",
- "1 0.700311\n",
- "2 0.912456\n",
+ "0 0.000000\n",
+ "1 0.526508\n",
+ "2 0.382789\n",
"dtype: float64"
]
}
@@ -4183,7 +4183,7 @@
"collapsed": false,
"input": [
"func_2 = lambda x: Series([x.min(), x.max()], index=['min', 'max'])\n",
- "df_12.apply(func_2)"
+ "df_11.apply(func_2)"
],
"language": "python",
"metadata": {},
@@ -4204,17 +4204,17 @@
" \n",
" \n",
" min | \n",
- " 99.451186 | \n",
- " 98.867789 | \n",
- " 98.676912 | \n",
- " 99.999886 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
"
\n",
" \n",
" max | \n",
- " 299.562413 | \n",
- " 298.921967 | \n",
- " 298.690777 | \n",
- " 299.603233 | \n",
+ " 0.111226 | \n",
+ " 0.406224 | \n",
+ " 0.530438 | \n",
+ " 0.396653 | \n",
"
\n",
" \n",
"\n",
@@ -4224,9 +4224,9 @@
"output_type": "pyout",
"prompt_number": 81,
"text": [
- " a b c d\n",
- "min 99.451186 98.867789 98.676912 99.999886\n",
- "max 299.562413 298.921967 298.690777 299.603233"
+ " a b c d\n",
+ "min 0.000000 0.000000 0.000000 0.000000\n",
+ "max 0.111226 0.406224 0.530438 0.396653"
]
}
],
@@ -4244,7 +4244,7 @@
"collapsed": false,
"input": [
"func_3 = lambda x: '%.2f' %x\n",
- "df_12.applymap(func_3)"
+ "df_11.applymap(func_3)"
],
"language": "python",
"metadata": {},
@@ -4265,24 +4265,24 @@
" \n",
" \n",
" 0 | \n",
- " 99.45 | \n",
- " 98.87 | \n",
- " 98.68 | \n",
- " 100.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
"
\n",
" \n",
" 1 | \n",
- " 199.46 | \n",
- " 199.27 | \n",
- " 199.21 | \n",
- " 199.91 | \n",
+ " 0.00 | \n",
+ " 0.41 | \n",
+ " 0.53 | \n",
+ " 0.09 | \n",
"
\n",
" \n",
" 2 | \n",
- " 299.56 | \n",
- " 298.92 | \n",
- " 298.69 | \n",
- " 299.60 | \n",
+ " 0.11 | \n",
+ " 0.05 | \n",
+ " 0.01 | \n",
+ " 0.40 | \n",
"
\n",
" \n",
"\n",
@@ -4292,10 +4292,10 @@
"output_type": "pyout",
"prompt_number": 82,
"text": [
- " a b c d\n",
- "0 99.45 98.87 98.68 100.00\n",
- "1 199.46 199.27 199.21 199.91\n",
- "2 299.56 298.92 298.69 299.60"
+ " a b c d\n",
+ "0 0.00 0.00 0.00 0.00\n",
+ "1 0.00 0.41 0.53 0.09\n",
+ "2 0.11 0.05 0.01 0.40"
]
}
],
@@ -4312,7 +4312,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "df_12['a'].map(func_3)"
+ "df_11['a'].map(func_3)"
],
"language": "python",
"metadata": {},
@@ -4322,9 +4322,9 @@
"output_type": "pyout",
"prompt_number": 83,
"text": [
- "0 99.45\n",
- "1 199.46\n",
- "2 299.56\n",
+ "0 0.00\n",
+ "1 0.00\n",
+ "2 0.11\n",
"Name: a, dtype: object"
]
}
@@ -4350,7 +4350,7 @@
{
"metadata": {},
"output_type": "pyout",
- "prompt_number": 114,
+ "prompt_number": 84,
"text": [
"fo 100\n",
"br 200\n",
@@ -4360,7 +4360,7 @@
]
}
],
- "prompt_number": 114
+ "prompt_number": 84
},
{
"cell_type": "markdown",
@@ -4381,7 +4381,7 @@
{
"metadata": {},
"output_type": "pyout",
- "prompt_number": 115,
+ "prompt_number": 85,
"text": [
"br 200\n",
"bz 300\n",
@@ -4391,7 +4391,7 @@
]
}
],
- "prompt_number": 115
+ "prompt_number": 85
},
{
"cell_type": "markdown",
@@ -4412,7 +4412,7 @@
{
"metadata": {},
"output_type": "pyout",
- "prompt_number": 132,
+ "prompt_number": 86,
"text": [
"fo 100\n",
"br 200\n",
@@ -4422,16 +4422,16 @@
]
}
],
- "prompt_number": 132
+ "prompt_number": 86
},
{
"cell_type": "code",
"collapsed": false,
"input": [
- "df_13 = DataFrame(np.arange(12).reshape((3, 4)),\n",
+ "df_12 = DataFrame(np.arange(12).reshape((3, 4)),\n",
" index=['three', 'one', 'two'],\n",
" columns=['c', 'a', 'b', 'd'])\n",
- "df_13"
+ "df_12"
],
"language": "python",
"metadata": {},
@@ -4477,7 +4477,7 @@
],
"metadata": {},
"output_type": "pyout",
- "prompt_number": 128,
+ "prompt_number": 87,
"text": [
" c a b d\n",
"three 0 1 2 3\n",
@@ -4486,7 +4486,7 @@
]
}
],
- "prompt_number": 128
+ "prompt_number": 87
},
{
"cell_type": "markdown",
@@ -4499,7 +4499,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "df_13.sort_index()"
+ "df_12.sort_index()"
],
"language": "python",
"metadata": {},
@@ -4545,7 +4545,7 @@
],
"metadata": {},
"output_type": "pyout",
- "prompt_number": 129,
+ "prompt_number": 88,
"text": [
" c a b d\n",
"one 4 5 6 7\n",
@@ -4554,7 +4554,7 @@
]
}
],
- "prompt_number": 129
+ "prompt_number": 88
},
{
"cell_type": "markdown",
@@ -4567,7 +4567,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "df_13.sort_index(axis=1, ascending=False)"
+ "df_12.sort_index(axis=1, ascending=False)"
],
"language": "python",
"metadata": {},
@@ -4613,7 +4613,7 @@
],
"metadata": {},
"output_type": "pyout",
- "prompt_number": 131,
+ "prompt_number": 89,
"text": [
" d c b a\n",
"three 3 0 2 1\n",
@@ -4622,7 +4622,7 @@
]
}
],
- "prompt_number": 131
+ "prompt_number": 89
},
{
"cell_type": "markdown",
@@ -4635,7 +4635,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "df_13.sort_index(by=['d', 'c'])"
+ "df_12.sort_index(by=['d', 'c'])"
],
"language": "python",
"metadata": {},
@@ -4681,7 +4681,7 @@
],
"metadata": {},
"output_type": "pyout",
- "prompt_number": 134,
+ "prompt_number": 90,
"text": [
" c a b d\n",
"three 0 1 2 3\n",
@@ -4690,7 +4690,142 @@
]
}
],
- "prompt_number": 134
+ "prompt_number": 90
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Ranking is similar to numpy.argsort except that ties are broken by assigning each group the mean rank:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_11 = Series([7, -5, 7, 4, 2, 0, 4, 7])\n",
+ "ser_11 = ser_11.order()\n",
+ "ser_11"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 91,
+ "text": [
+ "1 -5\n",
+ "5 0\n",
+ "4 2\n",
+ "3 4\n",
+ "6 4\n",
+ "0 7\n",
+ "2 7\n",
+ "7 7\n",
+ "dtype: int64"
+ ]
+ }
+ ],
+ "prompt_number": 91
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_11.rank()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 92,
+ "text": [
+ "1 1.0\n",
+ "5 2.0\n",
+ "4 3.0\n",
+ "3 4.5\n",
+ "6 4.5\n",
+ "0 7.0\n",
+ "2 7.0\n",
+ "7 7.0\n",
+ "dtype: float64"
+ ]
+ }
+ ],
+ "prompt_number": 92
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Rank a Series according to when they appear in the data:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_11.rank(method='first')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 93,
+ "text": [
+ "1 1\n",
+ "5 2\n",
+ "4 3\n",
+ "3 4\n",
+ "6 5\n",
+ "0 6\n",
+ "2 7\n",
+ "7 8\n",
+ "dtype: float64"
+ ]
+ }
+ ],
+ "prompt_number": 93
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Rank a Series in descending order, using the maximum rank for the group:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_11.rank(ascending=False, method='max')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 94,
+ "text": [
+ "1 8\n",
+ "5 7\n",
+ "4 6\n",
+ "3 5\n",
+ "6 5\n",
+ "0 3\n",
+ "2 3\n",
+ "7 3\n",
+ "dtype: float64"
+ ]
+ }
+ ],
+ "prompt_number": 94
}
],
"metadata": {}