From ea1743d906a3a73d7a8fe3dcfb0290037de01697 Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Thu, 29 Jan 2015 12:58:28 -0500 Subject: [PATCH] Added reindexing snippets. --- pandas/pandas.ipynb | 607 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 598 insertions(+), 9 deletions(-) diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb index 2f0f8a5..47c53dd 100644 --- a/pandas/pandas.ipynb +++ b/pandas/pandas.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:1d555e34f97d4a24383bba48a1c34b1526e08e18276d519d2c8afaf3ff0550f4" + "signature": "sha256:c354a8841cc70bd62479cc5ca49e30ec7138276bbd5c4b7cccfa0c622c5f0428" }, "nbformat": 3, "nbformat_minor": 0, @@ -15,7 +15,8 @@ "# Pandas\n", "\n", "* Series\n", - "* DataFrame" + "* DataFrame\n", + "* Reindexing" ] }, { @@ -1516,7 +1517,7 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 37, + "prompt_number": 33, "text": [ " MD VA\n", "year \n", @@ -1527,7 +1528,7 @@ ] } ], - "prompt_number": 37 + "prompt_number": 33 }, { "cell_type": "markdown", @@ -1580,7 +1581,7 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 38, + "prompt_number": 34, "text": [ "state MD VA\n", "year \n", @@ -1591,7 +1592,7 @@ ] } ], - "prompt_number": 38 + "prompt_number": 34 }, { "cell_type": "markdown", @@ -1612,14 +1613,14 @@ { "metadata": {}, "output_type": "pyout", - "prompt_number": 39, + "prompt_number": 35, "text": [ "array([[ nan, 5.2],\n", " [ 4.1, nan]])" ] } ], - "prompt_number": 39 + "prompt_number": 35 }, { "cell_type": "markdown", @@ -1640,7 +1641,7 @@ { "metadata": {}, "output_type": "pyout", - "prompt_number": 41, + "prompt_number": 36, "text": [ "array([[2012, 'VA', 5.0, nan],\n", " [2013, 'VA', 5.1, nan],\n", @@ -1650,7 +1651,595 @@ ] } ], + "prompt_number": 36 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reindexing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a new object with the data conformed to a new index. Any missing values are set to NaN." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_3" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearstatepopunempl
0 2012 VA 5.0 NaN
1 2013 VA 5.1 NaN
2 2014 VA 5.2 6.0
3 2014 MD 4.0 6.0
4 2015 MD 4.1 6.1
\n", + "

5 rows \u00d7 4 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 37, + "text": [ + " year state pop unempl\n", + "0 2012 VA 5.0 NaN\n", + "1 2013 VA 5.1 NaN\n", + "2 2014 VA 5.2 6.0\n", + "3 2014 MD 4.0 6.0\n", + "4 2015 MD 4.1 6.1\n", + "\n", + "[5 rows x 4 columns]" + ] + } + ], + "prompt_number": 37 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reindexing rows returns a new frame with the specified index:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_3.reindex(list(reversed(range(0, 6))))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearstatepopunempl
5 NaN NaN NaN NaN
4 2015 MD 4.1 6.1
3 2014 MD 4.0 6.0
2 2014 VA 5.2 6.0
1 2013 VA 5.1 NaN
0 2012 VA 5.0 NaN
\n", + "

6 rows \u00d7 4 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 38, + "text": [ + " year state pop unempl\n", + "5 NaN NaN NaN NaN\n", + "4 2015 MD 4.1 6.1\n", + "3 2014 MD 4.0 6.0\n", + "2 2014 VA 5.2 6.0\n", + "1 2013 VA 5.1 NaN\n", + "0 2012 VA 5.0 NaN\n", + "\n", + "[6 rows x 4 columns]" + ] + } + ], + "prompt_number": 38 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Missing values can be set to something other than NaN:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_3.reindex(range(6, 0), fill_value=0)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Index([], dtype='object')Empty DataFrame
\n", + "

0 rows \u00d7 4 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 39, + "text": [ + "Empty DataFrame\n", + "Columns: [year, state, pop, unempl]\n", + "Index: []\n", + "\n", + "[0 rows x 4 columns]" + ] + } + ], + "prompt_number": 39 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Interpolate ordered data like a time series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_5 = Series(['foo', 'bar', 'baz'], index=[0, 2, 4])" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 40 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_5.reindex(range(5), method='ffill')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 41, + "text": [ + "0 foo\n", + "1 foo\n", + "2 bar\n", + "3 bar\n", + "4 baz\n", + "dtype: object" + ] + } + ], "prompt_number": 41 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_5.reindex(range(5), method='bfill')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 42, + "text": [ + "0 foo\n", + "1 bar\n", + "2 bar\n", + "3 baz\n", + "4 baz\n", + "dtype: object" + ] + } + ], + "prompt_number": 42 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reindex columns:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_3.reindex(columns=['state', 'pop', 'unempl', 'year'])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statepopunemplyear
0 VA 5.0 NaN 2012
1 VA 5.1 NaN 2013
2 VA 5.2 6.0 2014
3 MD 4.0 6.0 2014
4 MD 4.1 6.1 2015
\n", + "

5 rows \u00d7 4 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 43, + "text": [ + " state pop unempl year\n", + "0 VA 5.0 NaN 2012\n", + "1 VA 5.1 NaN 2013\n", + "2 VA 5.2 6.0 2014\n", + "3 MD 4.0 6.0 2014\n", + "4 MD 4.1 6.1 2015\n", + "\n", + "[5 rows x 4 columns]" + ] + } + ], + "prompt_number": 43 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reindex rows and columns while filling rows:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_3.reindex(index=list(reversed(range(0, 6))),\n", + " fill_value=0,\n", + " columns=['state', 'pop', 'unempl', 'year'])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statepopunemplyear
5 0 0.0 0.0 0
4 MD 4.1 6.1 2015
3 MD 4.0 6.0 2014
2 VA 5.2 6.0 2014
1 VA 5.1 NaN 2013
0 VA 5.0 NaN 2012
\n", + "

6 rows \u00d7 4 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 44, + "text": [ + " state pop unempl year\n", + "5 0 0.0 0.0 0\n", + "4 MD 4.1 6.1 2015\n", + "3 MD 4.0 6.0 2014\n", + "2 VA 5.2 6.0 2014\n", + "1 VA 5.1 NaN 2013\n", + "0 VA 5.0 NaN 2012\n", + "\n", + "[6 rows x 4 columns]" + ] + } + ], + "prompt_number": 44 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reindex using ix:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "frame_3.ix[range(0, 7), ['state', 'pop', 'unempl', 'year']]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
statepopunemplyear
0 VA 5.0 NaN 2012
1 VA 5.1 NaN 2013
2 VA 5.2 6.0 2014
3 MD 4.0 6.0 2014
4 MD 4.1 6.1 2015
5 NaN NaN NaN NaN
6 NaN NaN NaN NaN
\n", + "

7 rows \u00d7 4 columns

\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 45, + "text": [ + " state pop unempl year\n", + "0 VA 5.0 NaN 2012\n", + "1 VA 5.1 NaN 2013\n", + "2 VA 5.2 6.0 2014\n", + "3 MD 4.0 6.0 2014\n", + "4 MD 4.1 6.1 2015\n", + "5 NaN NaN NaN NaN\n", + "6 NaN NaN NaN NaN\n", + "\n", + "[7 rows x 4 columns]" + ] + } + ], + "prompt_number": 45 } ], "metadata": {}