{
 "metadata": {
  "name": "",
  "signature": "sha256:1d555e34f97d4a24383bba48a1c34b1526e08e18276d519d2c8afaf3ff0550f4"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Pandas\n",
      "\n",
      "* Series\n",
      "* DataFrame"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from pandas import Series, DataFrame\n",
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Series\n",
      "\n",
      "A Series is a one-dimensional array-like object containing an array of data and an associated array of data labels.  The data can be any NumPy data type and the labels are the Series' index."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Create a Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_1 = Series([1, 1, 2, -3, -5, 8, 13])\n",
      "ser_1"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "0     1\n",
        "1     1\n",
        "2     2\n",
        "3    -3\n",
        "4    -5\n",
        "5     8\n",
        "6    13\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Get the array representation of a Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_1.values"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "array([ 1,  1,  2, -3, -5,  8, 13])"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Get the index of the Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_1.index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Create a Series with a custom index:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_2 = Series([1, 1, 2, -3, -5], index=['a', 'b', 'c', 'd', 'e'])\n",
      "ser_2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 5,
       "text": [
        "a    1\n",
        "b    1\n",
        "c    2\n",
        "d   -3\n",
        "e   -5\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Get a value from a Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_2[4] == ser_2['e']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "True"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Get a set of values from a Series by passing in a list:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_2[['c', 'a', 'b']]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "c    2\n",
        "a    1\n",
        "b    1\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Get values great than 0:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_2[ser_2 > 0]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 8,
       "text": [
        "a    1\n",
        "b    1\n",
        "c    2\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Scalar multiply:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_2 * 2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 9,
       "text": [
        "a     2\n",
        "b     2\n",
        "c     4\n",
        "d    -6\n",
        "e   -10\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Apply a numpy math function:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "np.exp(ser_2)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 10,
       "text": [
        "a    2.718282\n",
        "b    2.718282\n",
        "c    7.389056\n",
        "d    0.049787\n",
        "e    0.006738\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "A Series is like a fixed-length, ordered dict.  \n",
      "\n",
      "Create a series by passing in a dict:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "dict_1 = {'foo' : 100, 'bar' : 200, 'baz' : 300}\n",
      "ser_3 = Series(dict_1)\n",
      "ser_3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 11,
       "text": [
        "bar    200\n",
        "baz    300\n",
        "foo    100\n",
        "dtype: int64"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Re-order a Series by passing in an index (indices not found are NaN):"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "index = ['foo', 'bar', 'baz', 'qux']\n",
      "ser_4 = Series(dict_1, index=index)\n",
      "ser_4"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 12,
       "text": [
        "foo    100\n",
        "bar    200\n",
        "baz    300\n",
        "qux    NaN\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 12
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Check for NaN with the pandas method:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pd.isnull(ser_4)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 13,
       "text": [
        "foo    False\n",
        "bar    False\n",
        "baz    False\n",
        "qux     True\n",
        "dtype: bool"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Check for NaN with the Series method:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_4.isnull()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 14,
       "text": [
        "foo    False\n",
        "bar    False\n",
        "baz    False\n",
        "qux     True\n",
        "dtype: bool"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Series automatically aligns differently indexed data in arithmetic operations:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_3 + ser_4"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 15,
       "text": [
        "bar    400\n",
        "baz    600\n",
        "foo    200\n",
        "qux    NaN\n",
        "dtype: float64"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Name a Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_4.name = 'foobarbazqux'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 16
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Name a Series index:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_4.index.name = 'label'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_4"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 18,
       "text": [
        "label\n",
        "foo      100\n",
        "bar      200\n",
        "baz      300\n",
        "qux      NaN\n",
        "Name: foobarbazqux, dtype: float64"
       ]
      }
     ],
     "prompt_number": 18
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Rename a Series' index in place:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ser_4.index = ['fo', 'br', 'bz', 'qx']\n",
      "ser_4"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 19,
       "text": [
        "fo    100\n",
        "br    200\n",
        "bz    300\n",
        "qx    NaN\n",
        "Name: foobarbazqux, dtype: float64"
       ]
      }
     ],
     "prompt_number": 19
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## DataFrame\n",
      "\n",
      "A DataFrame is a tabular data structure containing an ordered collection of columns.  Each column can have a different type.  DataFrames have both row and column indices and is analogous to a dict of Series.  Row and column operations are treated roughly symmetrically.  Columns returned when indexing a DataFrame are views of the underlying data, not a copy.  To obtain a copy, use the Series' copy method.\n",
      "\n",
      "Create a DataFrame:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data_1 = {'state': ['VA', 'VA', 'VA', 'MD', 'MD'],\n",
      "        'year': [2012, 2013, 2014, 2014, 2015],\n",
      "        'pop': [5.0, 5.1, 5.2, 4.0, 4.1]}\n",
      "frame_1 = DataFrame(data_1)\n",
      "frame_1"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>pop</th>\n",
        "      <th>state</th>\n",
        "      <th>year</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 5.0</td>\n",
        "      <td> VA</td>\n",
        "      <td> 2012</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 5.1</td>\n",
        "      <td> VA</td>\n",
        "      <td> 2013</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 5.2</td>\n",
        "      <td> VA</td>\n",
        "      <td> 2014</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 4.0</td>\n",
        "      <td> MD</td>\n",
        "      <td> 2014</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 4.1</td>\n",
        "      <td> MD</td>\n",
        "      <td> 2015</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 3 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 20,
       "text": [
        "   pop state  year\n",
        "0  5.0    VA  2012\n",
        "1  5.1    VA  2013\n",
        "2  5.2    VA  2014\n",
        "3  4.0    MD  2014\n",
        "4  4.1    MD  2015\n",
        "\n",
        "[5 rows x 3 columns]"
       ]
      }
     ],
     "prompt_number": 20
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Create a DataFrame specifying a sequence of columns:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_2 = DataFrame(data_1, columns=['year', 'state', 'pop'])\n",
      "frame_2"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>year</th>\n",
        "      <th>state</th>\n",
        "      <th>pop</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2012</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2013</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2014</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2014</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2015</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 3 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 21,
       "text": [
        "   year state  pop\n",
        "0  2012    VA  5.0\n",
        "1  2013    VA  5.1\n",
        "2  2014    VA  5.2\n",
        "3  2014    MD  4.0\n",
        "4  2015    MD  4.1\n",
        "\n",
        "[5 rows x 3 columns]"
       ]
      }
     ],
     "prompt_number": 21
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Like Series, columns that are not present in the data are NaN:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3 = DataFrame(data_1, columns=['year', 'state', 'pop', 'unempl'])\n",
      "frame_3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>year</th>\n",
        "      <th>state</th>\n",
        "      <th>pop</th>\n",
        "      <th>unempl</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2012</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.0</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2013</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2014</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.2</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2014</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.0</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2015</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 22,
       "text": [
        "   year state  pop unempl\n",
        "0  2012    VA  5.0    NaN\n",
        "1  2013    VA  5.1    NaN\n",
        "2  2014    VA  5.2    NaN\n",
        "3  2014    MD  4.0    NaN\n",
        "4  2015    MD  4.1    NaN\n",
        "\n",
        "[5 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 22
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Retrieve a column by key, returning a Series:\n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3['state']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 23,
       "text": [
        "0    VA\n",
        "1    VA\n",
        "2    VA\n",
        "3    MD\n",
        "4    MD\n",
        "Name: state, dtype: object"
       ]
      }
     ],
     "prompt_number": 23
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Retrive a column by attribute, returning a Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3.year"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 24,
       "text": [
        "0    2012\n",
        "1    2013\n",
        "2    2014\n",
        "3    2014\n",
        "4    2015\n",
        "Name: year, dtype: int64"
       ]
      }
     ],
     "prompt_number": 24
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Retrieve a row by position:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3.ix[0]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 25,
       "text": [
        "year      2012\n",
        "state       VA\n",
        "pop          5\n",
        "unempl     NaN\n",
        "Name: 0, dtype: object"
       ]
      }
     ],
     "prompt_number": 25
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Update a column by assignment:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3['unempl'] = np.arange(5)\n",
      "frame_3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>year</th>\n",
        "      <th>state</th>\n",
        "      <th>pop</th>\n",
        "      <th>unempl</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2012</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.0</td>\n",
        "      <td> 0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2013</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.1</td>\n",
        "      <td> 1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2014</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.2</td>\n",
        "      <td> 2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2014</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.0</td>\n",
        "      <td> 3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2015</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.1</td>\n",
        "      <td> 4</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 26,
       "text": [
        "   year state  pop  unempl\n",
        "0  2012    VA  5.0       0\n",
        "1  2013    VA  5.1       1\n",
        "2  2014    VA  5.2       2\n",
        "3  2014    MD  4.0       3\n",
        "4  2015    MD  4.1       4\n",
        "\n",
        "[5 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 26
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Assign a Series to a column (note if assigning a list or array, the length must match the DataFrame, unlike a Series):"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "unempl = Series([6.0, 6.0, 6.1], index=[2, 3, 4])\n",
      "frame_3['unempl'] = unempl\n",
      "frame_3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>year</th>\n",
        "      <th>state</th>\n",
        "      <th>pop</th>\n",
        "      <th>unempl</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2012</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.0</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2013</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2014</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.2</td>\n",
        "      <td> 6.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2014</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.0</td>\n",
        "      <td> 6.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2015</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.1</td>\n",
        "      <td> 6.1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 27,
       "text": [
        "   year state  pop  unempl\n",
        "0  2012    VA  5.0     NaN\n",
        "1  2013    VA  5.1     NaN\n",
        "2  2014    VA  5.2     6.0\n",
        "3  2014    MD  4.0     6.0\n",
        "4  2015    MD  4.1     6.1\n",
        "\n",
        "[5 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 27
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Assign a new column that doesn't exist to create a new column:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3['state_dup'] = frame_3['state']\n",
      "frame_3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>year</th>\n",
        "      <th>state</th>\n",
        "      <th>pop</th>\n",
        "      <th>unempl</th>\n",
        "      <th>state_dup</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2012</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.0</td>\n",
        "      <td> NaN</td>\n",
        "      <td> VA</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2013</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.1</td>\n",
        "      <td> NaN</td>\n",
        "      <td> VA</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2014</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.2</td>\n",
        "      <td> 6.0</td>\n",
        "      <td> VA</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2014</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.0</td>\n",
        "      <td> 6.0</td>\n",
        "      <td> MD</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2015</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.1</td>\n",
        "      <td> 6.1</td>\n",
        "      <td> MD</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 5 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 28,
       "text": [
        "   year state  pop  unempl state_dup\n",
        "0  2012    VA  5.0     NaN        VA\n",
        "1  2013    VA  5.1     NaN        VA\n",
        "2  2014    VA  5.2     6.0        VA\n",
        "3  2014    MD  4.0     6.0        MD\n",
        "4  2015    MD  4.1     6.1        MD\n",
        "\n",
        "[5 rows x 5 columns]"
       ]
      }
     ],
     "prompt_number": 28
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Delete a column:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "del frame_3['state_dup']\n",
      "frame_3"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>year</th>\n",
        "      <th>state</th>\n",
        "      <th>pop</th>\n",
        "      <th>unempl</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 2012</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.0</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2013</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 2014</td>\n",
        "      <td> VA</td>\n",
        "      <td> 5.2</td>\n",
        "      <td> 6.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 2014</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.0</td>\n",
        "      <td> 6.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 2015</td>\n",
        "      <td> MD</td>\n",
        "      <td> 4.1</td>\n",
        "      <td> 6.1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 29,
       "text": [
        "   year state  pop  unempl\n",
        "0  2012    VA  5.0     NaN\n",
        "1  2013    VA  5.1     NaN\n",
        "2  2014    VA  5.2     6.0\n",
        "3  2014    MD  4.0     6.0\n",
        "4  2015    MD  4.1     6.1\n",
        "\n",
        "[5 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 29
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Create a DataFrame from a nested dict of dicts (the keys in the inner dicts are unioned and sorted to form the index in the result, unless an explicit index is specified):"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pop = {'VA' : {2013 : 5.1, 2014 : 5.2},\n",
      "       'MD' : {2014 : 4.0, 2015 : 4.1}}\n",
      "frame_4 = DataFrame(pop)\n",
      "frame_4"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>MD</th>\n",
        "      <th>VA</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>2013</th>\n",
        "      <td> NaN</td>\n",
        "      <td> 5.1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2014</th>\n",
        "      <td> 4.0</td>\n",
        "      <td> 5.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2015</th>\n",
        "      <td> 4.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>3 rows \u00d7 2 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 30,
       "text": [
        "       MD   VA\n",
        "2013  NaN  5.1\n",
        "2014  4.0  5.2\n",
        "2015  4.1  NaN\n",
        "\n",
        "[3 rows x 2 columns]"
       ]
      }
     ],
     "prompt_number": 30
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Transpose the DataFrame:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_4.T"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>2013</th>\n",
        "      <th>2014</th>\n",
        "      <th>2015</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>MD</th>\n",
        "      <td> NaN</td>\n",
        "      <td> 4.0</td>\n",
        "      <td> 4.1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>VA</th>\n",
        "      <td> 5.1</td>\n",
        "      <td> 5.2</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 3 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 31,
       "text": [
        "    2013  2014  2015\n",
        "MD   NaN   4.0   4.1\n",
        "VA   5.1   5.2   NaN\n",
        "\n",
        "[2 rows x 3 columns]"
       ]
      }
     ],
     "prompt_number": 31
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Create a DataFrame from a dict of Series:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "data_2 = {'VA' : frame_4['VA'][1:],\n",
      "          'MD' : frame_4['MD'][2:]}\n",
      "frame_5 = DataFrame(data_2)\n",
      "frame_5"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>MD</th>\n",
        "      <th>VA</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>2014</th>\n",
        "      <td> NaN</td>\n",
        "      <td> 5.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2015</th>\n",
        "      <td> 4.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 2 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 32,
       "text": [
        "       MD   VA\n",
        "2014  NaN  5.2\n",
        "2015  4.1  NaN\n",
        "\n",
        "[2 rows x 2 columns]"
       ]
      }
     ],
     "prompt_number": 32
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Set the DataFrame index name:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_5.index.name = 'year'\n",
      "frame_5"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>MD</th>\n",
        "      <th>VA</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>2014</th>\n",
        "      <td> NaN</td>\n",
        "      <td> 5.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2015</th>\n",
        "      <td> 4.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 2 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 37,
       "text": [
        "       MD   VA\n",
        "year          \n",
        "2014  NaN  5.2\n",
        "2015  4.1  NaN\n",
        "\n",
        "[2 rows x 2 columns]"
       ]
      }
     ],
     "prompt_number": 37
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Set the DataFrame columns name:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_5.columns.name = 'state'\n",
      "frame_5"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th>state</th>\n",
        "      <th>MD</th>\n",
        "      <th>VA</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>2014</th>\n",
        "      <td> NaN</td>\n",
        "      <td> 5.2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2015</th>\n",
        "      <td> 4.1</td>\n",
        "      <td> NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 2 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 38,
       "text": [
        "state   MD   VA\n",
        "year           \n",
        "2014   NaN  5.2\n",
        "2015   4.1  NaN\n",
        "\n",
        "[2 rows x 2 columns]"
       ]
      }
     ],
     "prompt_number": 38
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Return the data contained in a DataFrame as a 2D ndarray:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_5.values"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 39,
       "text": [
        "array([[ nan,  5.2],\n",
        "       [ 4.1,  nan]])"
       ]
      }
     ],
     "prompt_number": 39
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "If the columns are different dtypes, the 2D ndarray's dtype will accomodate all of the columns:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "frame_3.values"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 41,
       "text": [
        "array([[2012, 'VA', 5.0, nan],\n",
        "       [2013, 'VA', 5.1, nan],\n",
        "       [2014, 'VA', 5.2, 6.0],\n",
        "       [2014, 'MD', 4.0, 6.0],\n",
        "       [2015, 'MD', 4.1, 6.1]], dtype=object)"
       ]
      }
     ],
     "prompt_number": 41
    }
   ],
   "metadata": {}
  }
 ]
}