diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb index e3bee24..c39a9af 100644 --- a/pandas/pandas.ipynb +++ b/pandas/pandas.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:0dc910ada47612859f282813d751ef152a62e4d6493b0027ad17a368b35379a0" + "signature": "sha256:207ce6e6163805a40ff6b55987e709194f2be605d9cc8f7276e2b6d50b096e89" }, "nbformat": 3, "nbformat_minor": 0, @@ -17,7 +17,8 @@ "* Series\n", "* DataFrame\n", "* Reindexing\n", - "* Dropping Entries" + "* Dropping Entries\n", + "* Indexing, Selecting, Filtering" ] }, { @@ -639,7 +640,6 @@ " \n", " \n", "\n", - "
5 rows \u00d7 3 columns
\n", "" ], "metadata": {}, @@ -651,9 +651,7 @@ "1 5.1 VA 2013\n", "2 5.2 VA 2014\n", "3 4.0 MD 2014\n", - "4 4.1 MD 2015\n", - "\n", - "[5 rows x 3 columns]" + "4 4.1 MD 2015" ] } ], @@ -721,7 +719,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 3 columns
\n", "" ], "metadata": {}, @@ -733,9 +730,7 @@ "1 2013 VA 5.1\n", "2 2014 VA 5.2\n", "3 2014 MD 4.0\n", - "4 2015 MD 4.1\n", - "\n", - "[5 rows x 3 columns]" + "4 2015 MD 4.1" ] } ], @@ -809,7 +804,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -821,9 +815,7 @@ "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 NaN\n", "3 2014 MD 4.0 NaN\n", - "4 2015 MD 4.1 NaN\n", - "\n", - "[5 rows x 4 columns]" + "4 2015 MD 4.1 NaN" ] } ], @@ -992,7 +984,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -1004,9 +995,7 @@ "1 2013 VA 5.1 1\n", "2 2014 VA 5.2 2\n", "3 2014 MD 4.0 3\n", - "4 2015 MD 4.1 4\n", - "\n", - "[5 rows x 4 columns]" + "4 2015 MD 4.1 4" ] } ], @@ -1081,7 +1070,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -1093,9 +1081,7 @@ "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 6.0\n", "3 2014 MD 4.0 6.0\n", - "4 2015 MD 4.1 6.1\n", - "\n", - "[5 rows x 4 columns]" + "4 2015 MD 4.1 6.1" ] } ], @@ -1175,7 +1161,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 5 columns
\n", "" ], "metadata": {}, @@ -1187,9 +1172,7 @@ "1 2013 VA 5.1 NaN VA\n", "2 2014 VA 5.2 6.0 VA\n", "3 2014 MD 4.0 6.0 MD\n", - "4 2015 MD 4.1 6.1 MD\n", - "\n", - "[5 rows x 5 columns]" + "4 2015 MD 4.1 6.1 MD" ] } ], @@ -1263,7 +1246,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -1275,9 +1257,7 @@ "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 6.0\n", "3 2014 MD 4.0 6.0\n", - "4 2015 MD 4.1 6.1\n", - "\n", - "[5 rows x 4 columns]" + "4 2015 MD 4.1 6.1" ] } ], @@ -1331,7 +1311,6 @@ " \n", " \n", "\n", - "3 rows \u00d7 2 columns
\n", "" ], "metadata": {}, @@ -1341,9 +1320,7 @@ " MD VA\n", "2013 NaN 5.1\n", "2014 4.0 5.2\n", - "2015 4.1 NaN\n", - "\n", - "[3 rows x 2 columns]" + "2015 4.1 NaN" ] } ], @@ -1392,7 +1369,6 @@ " \n", " \n", "\n", - "2 rows \u00d7 3 columns
\n", "" ], "metadata": {}, @@ -1401,9 +1377,7 @@ "text": [ " 2013 2014 2015\n", "MD NaN 4.0 4.1\n", - "VA 5.1 5.2 NaN\n", - "\n", - "[2 rows x 3 columns]" + "VA 5.1 5.2 NaN" ] } ], @@ -1452,7 +1426,6 @@ " \n", " \n", "\n", - "2 rows \u00d7 2 columns
\n", "" ], "metadata": {}, @@ -1461,9 +1434,7 @@ "text": [ " MD VA\n", "2014 NaN 5.2\n", - "2015 4.1 NaN\n", - "\n", - "[2 rows x 2 columns]" + "2015 4.1 NaN" ] } ], @@ -1515,7 +1486,6 @@ " \n", " \n", "\n", - "2 rows \u00d7 2 columns
\n", "" ], "metadata": {}, @@ -1525,9 +1495,7 @@ " MD VA\n", "year \n", "2014 NaN 5.2\n", - "2015 4.1 NaN\n", - "\n", - "[2 rows x 2 columns]" + "2015 4.1 NaN" ] } ], @@ -1579,7 +1547,6 @@ " \n", " \n", "\n", - "2 rows \u00d7 2 columns
\n", "" ], "metadata": {}, @@ -1589,9 +1556,7 @@ "state MD VA\n", "year \n", "2014 NaN 5.2\n", - "2015 4.1 NaN\n", - "\n", - "[2 rows x 2 columns]" + "2015 4.1 NaN" ] } ], @@ -1730,7 +1695,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -1742,9 +1706,7 @@ "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 6.0\n", "3 2014 MD 4.0 6.0\n", - "4 2015 MD 4.1 6.1\n", - "\n", - "[5 rows x 4 columns]" + "4 2015 MD 4.1 6.1" ] } ], @@ -1824,7 +1786,6 @@ " \n", " \n", "\n", - "6 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -1837,9 +1798,7 @@ "3 2014 MD 4.0 6.0\n", "2 2014 VA 5.2 6.0\n", "1 2013 VA 5.1 NaN\n", - "0 2012 VA 5.0 NaN\n", - "\n", - "[6 rows x 4 columns]" + "0 2012 VA 5.0 NaN" ] } ], @@ -1865,14 +1824,18 @@ "html": [ "Index([], dtype='object') | \n", - "Empty DataFrame | \n", + " \n", + "|||||
\n", + " | year | \n", + "state | \n", + "pop | \n", + "unempl | \n", "
---|
0 rows \u00d7 4 columns
\n", "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -2036,9 +1996,7 @@ "1 VA 5.1 NaN 2013\n", "2 VA 5.2 6.0 2014\n", "3 MD 4.0 6.0 2014\n", - "4 MD 4.1 6.1 2015\n", - "\n", - "[5 rows x 4 columns]" + "4 MD 4.1 6.1 2015" ] } ], @@ -2120,7 +2078,6 @@ " \n", " \n", "\n", - "6 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -2133,9 +2090,7 @@ "3 MD 4.0 6.0 2014\n", "2 VA 5.2 6.0 2014\n", "1 VA 5.1 NaN 2013\n", - "0 VA 5.0 NaN 2012\n", - "\n", - "[6 rows x 4 columns]" + "0 VA 5.0 NaN 2012" ] } ], @@ -2223,7 +2178,6 @@ " \n", " \n", "\n", - "7 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -2237,9 +2191,7 @@ "3 MD 4.0 6.0 2014\n", "4 MD 4.1 6.1 2015\n", "5 NaN NaN NaN NaN\n", - "6 NaN NaN NaN NaN\n", - "\n", - "[7 rows x 4 columns]" + "6 NaN NaN NaN NaN" ] } ], @@ -2320,7 +2272,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 4 columns
\n", "" ], "metadata": {}, @@ -2332,9 +2283,7 @@ "3 MD 4.0 6.0 2014\n", "4 MD 4.1 6.1 2015\n", "5 NaN NaN NaN NaN\n", - "6 NaN NaN NaN NaN\n", - "\n", - "[5 rows x 4 columns]" + "6 NaN NaN NaN NaN" ] } ], @@ -2402,7 +2351,6 @@ " \n", " \n", "\n", - "5 rows \u00d7 3 columns
\n", "" ], "metadata": {}, @@ -2414,15 +2362,230 @@ "3 MD 4.0 2014\n", "4 MD 4.1 2015\n", "5 NaN NaN NaN\n", - "6 NaN NaN NaN\n", - "\n", - "[5 rows x 3 columns]" + "6 NaN NaN NaN" ] } ], "prompt_number": 47 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Indexing, Selecting, Filtering" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Series indexing is similar to NumPy array indexing with the added bonus of being able to use the Series' index values." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 48, + "text": [ + "a 1\n", + "b 1\n", + "c 2\n", + "d -3\n", + "e -5\n", + "dtype: int64" + ] + } + ], + "prompt_number": 48 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select a value from a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[0] == ser_2['a']" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 49, + "text": [ + "True" + ] + } + ], + "prompt_number": 49 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select a slice from a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[1:4]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 50, + "text": [ + "b 1\n", + "c 2\n", + "d -3\n", + "dtype: int64" + ] + } + ], + "prompt_number": 50 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select specific values from a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[['b', 'c', 'd']]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 51, + "text": [ + "b 1\n", + "c 2\n", + "d -3\n", + "dtype: int64" + ] + } + ], + "prompt_number": 51 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select from a Series based on a filter:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[ser_2 > 0]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 52, + "text": [ + "a 1\n", + "b 1\n", + "c 2\n", + "dtype: int64" + ] + } + ], + "prompt_number": 52 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select a slice from a Series with labels (note the end point is inclusive):" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2['a':'b']" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 53, + "text": [ + "a 1\n", + "b 1\n", + "dtype: int64" + ] + } + ], + "prompt_number": 53 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Assign to a Series slice (note the end point is inclusive):" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2['a':'b'] = 0\n", + "ser_2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 54, + "text": [ + "a 0\n", + "b 0\n", + "c 2\n", + "d -3\n", + "e -5\n", + "dtype: int64" + ] + } + ], + "prompt_number": 54 } - ], "metadata": {} } ]