diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb
index 2f0f8a5..47c53dd 100644
--- a/pandas/pandas.ipynb
+++ b/pandas/pandas.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:1d555e34f97d4a24383bba48a1c34b1526e08e18276d519d2c8afaf3ff0550f4"
+ "signature": "sha256:c354a8841cc70bd62479cc5ca49e30ec7138276bbd5c4b7cccfa0c622c5f0428"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -15,7 +15,8 @@
"# Pandas\n",
"\n",
"* Series\n",
- "* DataFrame"
+ "* DataFrame\n",
+ "* Reindexing"
]
},
{
@@ -1516,7 +1517,7 @@
],
"metadata": {},
"output_type": "pyout",
- "prompt_number": 37,
+ "prompt_number": 33,
"text": [
" MD VA\n",
"year \n",
@@ -1527,7 +1528,7 @@
]
}
],
- "prompt_number": 37
+ "prompt_number": 33
},
{
"cell_type": "markdown",
@@ -1580,7 +1581,7 @@
],
"metadata": {},
"output_type": "pyout",
- "prompt_number": 38,
+ "prompt_number": 34,
"text": [
"state MD VA\n",
"year \n",
@@ -1591,7 +1592,7 @@
]
}
],
- "prompt_number": 38
+ "prompt_number": 34
},
{
"cell_type": "markdown",
@@ -1612,14 +1613,14 @@
{
"metadata": {},
"output_type": "pyout",
- "prompt_number": 39,
+ "prompt_number": 35,
"text": [
"array([[ nan, 5.2],\n",
" [ 4.1, nan]])"
]
}
],
- "prompt_number": 39
+ "prompt_number": 35
},
{
"cell_type": "markdown",
@@ -1640,7 +1641,7 @@
{
"metadata": {},
"output_type": "pyout",
- "prompt_number": 41,
+ "prompt_number": 36,
"text": [
"array([[2012, 'VA', 5.0, nan],\n",
" [2013, 'VA', 5.1, nan],\n",
@@ -1650,7 +1651,595 @@
]
}
],
+ "prompt_number": 36
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Reindexing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a new object with the data conformed to a new index. Any missing values are set to NaN."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "frame_3"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " state | \n",
+ " pop | \n",
+ " unempl | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " VA | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " VA | \n",
+ " 5.1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2014 | \n",
+ " VA | \n",
+ " 5.2 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2014 | \n",
+ " MD | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2015 | \n",
+ " MD | \n",
+ " 4.1 | \n",
+ " 6.1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows \u00d7 4 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 37,
+ "text": [
+ " year state pop unempl\n",
+ "0 2012 VA 5.0 NaN\n",
+ "1 2013 VA 5.1 NaN\n",
+ "2 2014 VA 5.2 6.0\n",
+ "3 2014 MD 4.0 6.0\n",
+ "4 2015 MD 4.1 6.1\n",
+ "\n",
+ "[5 rows x 4 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 37
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Reindexing rows returns a new frame with the specified index:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "frame_3.reindex(list(reversed(range(0, 6))))"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " state | \n",
+ " pop | \n",
+ " unempl | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 2015 | \n",
+ " MD | \n",
+ " 4.1 | \n",
+ " 6.1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2014 | \n",
+ " MD | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2014 | \n",
+ " VA | \n",
+ " 5.2 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2013 | \n",
+ " VA | \n",
+ " 5.1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " VA | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6 rows \u00d7 4 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 38,
+ "text": [
+ " year state pop unempl\n",
+ "5 NaN NaN NaN NaN\n",
+ "4 2015 MD 4.1 6.1\n",
+ "3 2014 MD 4.0 6.0\n",
+ "2 2014 VA 5.2 6.0\n",
+ "1 2013 VA 5.1 NaN\n",
+ "0 2012 VA 5.0 NaN\n",
+ "\n",
+ "[6 rows x 4 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 38
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Missing values can be set to something other than NaN:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "frame_3.reindex(range(6, 0), fill_value=0)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " Index([], dtype='object') | \n",
+ " Empty DataFrame | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
0 rows \u00d7 4 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 39,
+ "text": [
+ "Empty DataFrame\n",
+ "Columns: [year, state, pop, unempl]\n",
+ "Index: []\n",
+ "\n",
+ "[0 rows x 4 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 39
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Interpolate ordered data like a time series:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_5 = Series(['foo', 'bar', 'baz'], index=[0, 2, 4])"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [],
+ "prompt_number": 40
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_5.reindex(range(5), method='ffill')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 41,
+ "text": [
+ "0 foo\n",
+ "1 foo\n",
+ "2 bar\n",
+ "3 bar\n",
+ "4 baz\n",
+ "dtype: object"
+ ]
+ }
+ ],
"prompt_number": 41
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "ser_5.reindex(range(5), method='bfill')"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 42,
+ "text": [
+ "0 foo\n",
+ "1 bar\n",
+ "2 bar\n",
+ "3 baz\n",
+ "4 baz\n",
+ "dtype: object"
+ ]
+ }
+ ],
+ "prompt_number": 42
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Reindex columns:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "frame_3.reindex(columns=['state', 'pop', 'unempl', 'year'])"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " state | \n",
+ " pop | \n",
+ " unempl | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " VA | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " VA | \n",
+ " 5.1 | \n",
+ " NaN | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VA | \n",
+ " 5.2 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MD | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MD | \n",
+ " 4.1 | \n",
+ " 6.1 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows \u00d7 4 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 43,
+ "text": [
+ " state pop unempl year\n",
+ "0 VA 5.0 NaN 2012\n",
+ "1 VA 5.1 NaN 2013\n",
+ "2 VA 5.2 6.0 2014\n",
+ "3 MD 4.0 6.0 2014\n",
+ "4 MD 4.1 6.1 2015\n",
+ "\n",
+ "[5 rows x 4 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 43
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Reindex rows and columns while filling rows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "frame_3.reindex(index=list(reversed(range(0, 6))),\n",
+ " fill_value=0,\n",
+ " columns=['state', 'pop', 'unempl', 'year'])"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " state | \n",
+ " pop | \n",
+ " unempl | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MD | \n",
+ " 4.1 | \n",
+ " 6.1 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MD | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VA | \n",
+ " 5.2 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " VA | \n",
+ " 5.1 | \n",
+ " NaN | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " VA | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6 rows \u00d7 4 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 44,
+ "text": [
+ " state pop unempl year\n",
+ "5 0 0.0 0.0 0\n",
+ "4 MD 4.1 6.1 2015\n",
+ "3 MD 4.0 6.0 2014\n",
+ "2 VA 5.2 6.0 2014\n",
+ "1 VA 5.1 NaN 2013\n",
+ "0 VA 5.0 NaN 2012\n",
+ "\n",
+ "[6 rows x 4 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 44
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Reindex using ix:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "frame_3.ix[range(0, 7), ['state', 'pop', 'unempl', 'year']]"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " state | \n",
+ " pop | \n",
+ " unempl | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " VA | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " VA | \n",
+ " 5.1 | \n",
+ " NaN | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VA | \n",
+ " 5.2 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MD | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MD | \n",
+ " 4.1 | \n",
+ " 6.1 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7 rows \u00d7 4 columns
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 45,
+ "text": [
+ " state pop unempl year\n",
+ "0 VA 5.0 NaN 2012\n",
+ "1 VA 5.1 NaN 2013\n",
+ "2 VA 5.2 6.0 2014\n",
+ "3 MD 4.0 6.0 2014\n",
+ "4 MD 4.1 6.1 2015\n",
+ "5 NaN NaN NaN NaN\n",
+ "6 NaN NaN NaN NaN\n",
+ "\n",
+ "[7 rows x 4 columns]"
+ ]
+ }
+ ],
+ "prompt_number": 45
}
],
"metadata": {}