mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
3423 lines
83 KiB
Plaintext
3423 lines
83 KiB
Plaintext
{
|
|
"metadata": {
|
|
"name": "",
|
|
"signature": "sha256:f58e882e3019ecb243505ef140cb0aedee21ecd47d1210ca8e6b4e2281c1316e"
|
|
},
|
|
"nbformat": 3,
|
|
"nbformat_minor": 0,
|
|
"worksheets": [
|
|
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Pandas\n",
|
|
"\n",
|
|
"* Series\n",
|
|
"* DataFrame\n",
|
|
"* Reindexing\n",
|
|
"* Dropping Entries\n",
|
|
"* Indexing, Selecting, Filtering\n",
|
|
"* Arithmetic and Data Alignment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"from pandas import Series, DataFrame\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 1
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Series\n",
|
|
"\n",
|
|
"A Series is a one-dimensional array-like object containing an array of data and an associated array of data labels. The data can be any NumPy data type and the labels are the Series' index."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_1 = Series([1, 1, 2, -3, -5, 8, 13])\n",
|
|
"ser_1"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 2,
|
|
"text": [
|
|
"0 1\n",
|
|
"1 1\n",
|
|
"2 2\n",
|
|
"3 -3\n",
|
|
"4 -5\n",
|
|
"5 8\n",
|
|
"6 13\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 2
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Get the array representation of a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_1.values"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 3,
|
|
"text": [
|
|
"array([ 1, 1, 2, -3, -5, 8, 13])"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 3
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Index objects are immutable and hold the axis labels and metadata such as names and axis names.\n",
|
|
"\n",
|
|
"Get the index of the Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_1.index"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 4,
|
|
"text": [
|
|
"Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 4
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a Series with a custom index:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2 = Series([1, 1, 2, -3, -5], index=['a', 'b', 'c', 'd', 'e'])\n",
|
|
"ser_2"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 5,
|
|
"text": [
|
|
"a 1\n",
|
|
"b 1\n",
|
|
"c 2\n",
|
|
"d -3\n",
|
|
"e -5\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 5
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Get a value from a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[4] == ser_2['e']"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 6,
|
|
"text": [
|
|
"True"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 6
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Get a set of values from a Series by passing in a list:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[['c', 'a', 'b']]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 7,
|
|
"text": [
|
|
"c 2\n",
|
|
"a 1\n",
|
|
"b 1\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 7
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Get values great than 0:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[ser_2 > 0]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 8,
|
|
"text": [
|
|
"a 1\n",
|
|
"b 1\n",
|
|
"c 2\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 8
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Scalar multiply:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2 * 2"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 9,
|
|
"text": [
|
|
"a 2\n",
|
|
"b 2\n",
|
|
"c 4\n",
|
|
"d -6\n",
|
|
"e -10\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 9
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Apply a numpy math function:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"import numpy as np\n",
|
|
"np.exp(ser_2)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 10,
|
|
"text": [
|
|
"a 2.718282\n",
|
|
"b 2.718282\n",
|
|
"c 7.389056\n",
|
|
"d 0.049787\n",
|
|
"e 0.006738\n",
|
|
"dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 10
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"A Series is like a fixed-length, ordered dict. \n",
|
|
"\n",
|
|
"Create a series by passing in a dict:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"dict_1 = {'foo' : 100, 'bar' : 200, 'baz' : 300}\n",
|
|
"ser_3 = Series(dict_1)\n",
|
|
"ser_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 11,
|
|
"text": [
|
|
"bar 200\n",
|
|
"baz 300\n",
|
|
"foo 100\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 11
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Re-order a Series by passing in an index (indices not found are NaN):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"index = ['foo', 'bar', 'baz', 'qux']\n",
|
|
"ser_4 = Series(dict_1, index=index)\n",
|
|
"ser_4"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 12,
|
|
"text": [
|
|
"foo 100\n",
|
|
"bar 200\n",
|
|
"baz 300\n",
|
|
"qux NaN\n",
|
|
"dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 12
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Check for NaN with the pandas method:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"pd.isnull(ser_4)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 13,
|
|
"text": [
|
|
"foo False\n",
|
|
"bar False\n",
|
|
"baz False\n",
|
|
"qux True\n",
|
|
"dtype: bool"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 13
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Check for NaN with the Series method:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_4.isnull()"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 14,
|
|
"text": [
|
|
"foo False\n",
|
|
"bar False\n",
|
|
"baz False\n",
|
|
"qux True\n",
|
|
"dtype: bool"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 14
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Series automatically aligns differently indexed data in arithmetic operations:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_3 + ser_4"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 15,
|
|
"text": [
|
|
"bar 400\n",
|
|
"baz 600\n",
|
|
"foo 200\n",
|
|
"qux NaN\n",
|
|
"dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 15
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Name a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_4.name = 'foobarbazqux'"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 16
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Name a Series index:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_4.index.name = 'label'"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 17
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_4"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 18,
|
|
"text": [
|
|
"label\n",
|
|
"foo 100\n",
|
|
"bar 200\n",
|
|
"baz 300\n",
|
|
"qux NaN\n",
|
|
"Name: foobarbazqux, dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 18
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Rename a Series' index in place:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_4.index = ['fo', 'br', 'bz', 'qx']\n",
|
|
"ser_4"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 19,
|
|
"text": [
|
|
"fo 100\n",
|
|
"br 200\n",
|
|
"bz 300\n",
|
|
"qx NaN\n",
|
|
"Name: foobarbazqux, dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 19
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## DataFrame\n",
|
|
"\n",
|
|
"A DataFrame is a tabular data structure containing an ordered collection of columns. Each column can have a different type. DataFrames have both row and column indices and is analogous to a dict of Series. Row and column operations are treated roughly symmetrically. Columns returned when indexing a DataFrame are views of the underlying data, not a copy. To obtain a copy, use the Series' copy method.\n",
|
|
"\n",
|
|
"Create a DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"data_1 = {'state': ['VA', 'VA', 'VA', 'MD', 'MD'],\n",
|
|
" 'year': [2012, 2013, 2014, 2014, 2015],\n",
|
|
" 'pop': [5.0, 5.1, 5.2, 4.0, 4.1]}\n",
|
|
"frame_1 = DataFrame(data_1)\n",
|
|
"frame_1"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 20,
|
|
"text": [
|
|
" pop state year\n",
|
|
"0 5.0 VA 2012\n",
|
|
"1 5.1 VA 2013\n",
|
|
"2 5.2 VA 2014\n",
|
|
"3 4.0 MD 2014\n",
|
|
"4 4.1 MD 2015"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 20
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a DataFrame specifying a sequence of columns:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_2 = DataFrame(data_1, columns=['year', 'state', 'pop'])\n",
|
|
"frame_2"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 21,
|
|
"text": [
|
|
" year state pop\n",
|
|
"0 2012 VA 5.0\n",
|
|
"1 2013 VA 5.1\n",
|
|
"2 2014 VA 5.2\n",
|
|
"3 2014 MD 4.0\n",
|
|
"4 2015 MD 4.1"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 21
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Like Series, columns that are not present in the data are NaN:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3 = DataFrame(data_1, columns=['year', 'state', 'pop', 'unempl'])\n",
|
|
"frame_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 22,
|
|
"text": [
|
|
" year state pop unempl\n",
|
|
"0 2012 VA 5.0 NaN\n",
|
|
"1 2013 VA 5.1 NaN\n",
|
|
"2 2014 VA 5.2 NaN\n",
|
|
"3 2014 MD 4.0 NaN\n",
|
|
"4 2015 MD 4.1 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 22
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Retrieve a column by key, returning a Series:\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3['state']"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 23,
|
|
"text": [
|
|
"0 VA\n",
|
|
"1 VA\n",
|
|
"2 VA\n",
|
|
"3 MD\n",
|
|
"4 MD\n",
|
|
"Name: state, dtype: object"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 23
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Retrive a column by attribute, returning a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.year"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 24,
|
|
"text": [
|
|
"0 2012\n",
|
|
"1 2013\n",
|
|
"2 2014\n",
|
|
"3 2014\n",
|
|
"4 2015\n",
|
|
"Name: year, dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 24
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Retrieve a row by position:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.ix[0]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 25,
|
|
"text": [
|
|
"year 2012\n",
|
|
"state VA\n",
|
|
"pop 5\n",
|
|
"unempl NaN\n",
|
|
"Name: 0, dtype: object"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 25
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Update a column by assignment:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3['unempl'] = np.arange(5)\n",
|
|
"frame_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> 0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> 1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 4</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 26,
|
|
"text": [
|
|
" year state pop unempl\n",
|
|
"0 2012 VA 5.0 0\n",
|
|
"1 2013 VA 5.1 1\n",
|
|
"2 2014 VA 5.2 2\n",
|
|
"3 2014 MD 4.0 3\n",
|
|
"4 2015 MD 4.1 4"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 26
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Assign a Series to a column (note if assigning a list or array, the length must match the DataFrame, unlike a Series):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"unempl = Series([6.0, 6.0, 6.1], index=[2, 3, 4])\n",
|
|
"frame_3['unempl'] = unempl\n",
|
|
"frame_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 27,
|
|
"text": [
|
|
" year state pop unempl\n",
|
|
"0 2012 VA 5.0 NaN\n",
|
|
"1 2013 VA 5.1 NaN\n",
|
|
"2 2014 VA 5.2 6.0\n",
|
|
"3 2014 MD 4.0 6.0\n",
|
|
"4 2015 MD 4.1 6.1"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 27
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Assign a new column that doesn't exist to create a new column:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3['state_dup'] = frame_3['state']\n",
|
|
"frame_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>state_dup</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> VA</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> VA</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> VA</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> MD</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> MD</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 28,
|
|
"text": [
|
|
" year state pop unempl state_dup\n",
|
|
"0 2012 VA 5.0 NaN VA\n",
|
|
"1 2013 VA 5.1 NaN VA\n",
|
|
"2 2014 VA 5.2 6.0 VA\n",
|
|
"3 2014 MD 4.0 6.0 MD\n",
|
|
"4 2015 MD 4.1 6.1 MD"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 28
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Delete a column:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"del frame_3['state_dup']\n",
|
|
"frame_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 29,
|
|
"text": [
|
|
" year state pop unempl\n",
|
|
"0 2012 VA 5.0 NaN\n",
|
|
"1 2013 VA 5.1 NaN\n",
|
|
"2 2014 VA 5.2 6.0\n",
|
|
"3 2014 MD 4.0 6.0\n",
|
|
"4 2015 MD 4.1 6.1"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 29
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a DataFrame from a nested dict of dicts (the keys in the inner dicts are unioned and sorted to form the index in the result, unless an explicit index is specified):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"pop = {'VA' : {2013 : 5.1, 2014 : 5.2},\n",
|
|
" 'MD' : {2014 : 4.0, 2015 : 4.1}}\n",
|
|
"frame_4 = DataFrame(pop)\n",
|
|
"frame_4"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>MD</th>\n",
|
|
" <th>VA</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2013</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2014</th>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2015</th>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 30,
|
|
"text": [
|
|
" MD VA\n",
|
|
"2013 NaN 5.1\n",
|
|
"2014 4.0 5.2\n",
|
|
"2015 4.1 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 30
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Transpose the DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_4.T"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>2013</th>\n",
|
|
" <th>2014</th>\n",
|
|
" <th>2015</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>MD</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>VA</th>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 31,
|
|
"text": [
|
|
" 2013 2014 2015\n",
|
|
"MD NaN 4.0 4.1\n",
|
|
"VA 5.1 5.2 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 31
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a DataFrame from a dict of Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"data_2 = {'VA' : frame_4['VA'][1:],\n",
|
|
" 'MD' : frame_4['MD'][2:]}\n",
|
|
"frame_5 = DataFrame(data_2)\n",
|
|
"frame_5"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>MD</th>\n",
|
|
" <th>VA</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2014</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2015</th>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 32,
|
|
"text": [
|
|
" MD VA\n",
|
|
"2014 NaN 5.2\n",
|
|
"2015 4.1 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 32
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Set the DataFrame index name:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_5.index.name = 'year'\n",
|
|
"frame_5"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>MD</th>\n",
|
|
" <th>VA</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>year</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2014</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2015</th>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 33,
|
|
"text": [
|
|
" MD VA\n",
|
|
"year \n",
|
|
"2014 NaN 5.2\n",
|
|
"2015 4.1 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 33
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Set the DataFrame columns name:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_5.columns.name = 'state'\n",
|
|
"frame_5"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th>state</th>\n",
|
|
" <th>MD</th>\n",
|
|
" <th>VA</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>year</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2014</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2015</th>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 34,
|
|
"text": [
|
|
"state MD VA\n",
|
|
"year \n",
|
|
"2014 NaN 5.2\n",
|
|
"2015 4.1 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 34
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Return the data contained in a DataFrame as a 2D ndarray:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_5.values"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 35,
|
|
"text": [
|
|
"array([[ nan, 5.2],\n",
|
|
" [ 4.1, nan]])"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 35
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"If the columns are different dtypes, the 2D ndarray's dtype will accomodate all of the columns:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.values"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 36,
|
|
"text": [
|
|
"array([[2012, 'VA', 5.0, nan],\n",
|
|
" [2013, 'VA', 5.1, nan],\n",
|
|
" [2014, 'VA', 5.2, 6.0],\n",
|
|
" [2014, 'MD', 4.0, 6.0],\n",
|
|
" [2015, 'MD', 4.1, 6.1]], dtype=object)"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 36
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Reindexing"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Create a new object with the data conformed to a new index. Any missing values are set to NaN."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 37,
|
|
"text": [
|
|
" year state pop unempl\n",
|
|
"0 2012 VA 5.0 NaN\n",
|
|
"1 2013 VA 5.1 NaN\n",
|
|
"2 2014 VA 5.2 6.0\n",
|
|
"3 2014 MD 4.0 6.0\n",
|
|
"4 2015 MD 4.1 6.1"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 37
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Reindexing rows returns a new frame with the specified index:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.reindex(list(reversed(range(0, 6))))"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 2015</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 2014</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 2013</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 2012</td>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 38,
|
|
"text": [
|
|
" year state pop unempl\n",
|
|
"5 NaN NaN NaN NaN\n",
|
|
"4 2015 MD 4.1 6.1\n",
|
|
"3 2014 MD 4.0 6.0\n",
|
|
"2 2014 VA 5.2 6.0\n",
|
|
"1 2013 VA 5.1 NaN\n",
|
|
"0 2012 VA 5.0 NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 38
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Missing values can be set to something other than NaN:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.reindex(range(6, 0), fill_value=0)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 39,
|
|
"text": [
|
|
"Empty DataFrame\n",
|
|
"Columns: [year, state, pop, unempl]\n",
|
|
"Index: []"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 39
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Interpolate ordered data like a time series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_5 = Series(['foo', 'bar', 'baz'], index=[0, 2, 4])"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 40
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_5.reindex(range(5), method='ffill')"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 41,
|
|
"text": [
|
|
"0 foo\n",
|
|
"1 foo\n",
|
|
"2 bar\n",
|
|
"3 bar\n",
|
|
"4 baz\n",
|
|
"dtype: object"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 41
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_5.reindex(range(5), method='bfill')"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 42,
|
|
"text": [
|
|
"0 foo\n",
|
|
"1 bar\n",
|
|
"2 bar\n",
|
|
"3 baz\n",
|
|
"4 baz\n",
|
|
"dtype: object"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 42
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Reindex columns:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.reindex(columns=['state', 'pop', 'unempl', 'year'])"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 43,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 VA 5.0 NaN 2012\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"4 MD 4.1 6.1 2015"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 43
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Reindex rows and columns while filling rows:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_3.reindex(index=list(reversed(range(0, 6))),\n",
|
|
" fill_value=0,\n",
|
|
" columns=['state', 'pop', 'unempl', 'year'])"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> 0</td>\n",
|
|
" <td> 0.0</td>\n",
|
|
" <td> 0.0</td>\n",
|
|
" <td> 0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 44,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"5 0 0.0 0.0 0\n",
|
|
"4 MD 4.1 6.1 2015\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"0 VA 5.0 NaN 2012"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 44
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Reindex using ix:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6 = frame_3.ix[range(0, 7), ['state', 'pop', 'unempl', 'year']]\n",
|
|
"frame_6"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 45,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 VA 5.0 NaN 2012\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"4 MD 4.1 6.1 2015\n",
|
|
"5 NaN NaN NaN NaN\n",
|
|
"6 NaN NaN NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 45
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Dropping Entries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Drop rows from a Series or DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_7 = frame_6.drop([0, 1])\n",
|
|
"frame_7"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 46,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"4 MD 4.1 6.1 2015\n",
|
|
"5 NaN NaN NaN NaN\n",
|
|
"6 NaN NaN NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 46
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Drop columns from a DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_7 = frame_7.drop('unempl', axis=1)\n",
|
|
"frame_7"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 47,
|
|
"text": [
|
|
" state pop year\n",
|
|
"2 VA 5.2 2014\n",
|
|
"3 MD 4.0 2014\n",
|
|
"4 MD 4.1 2015\n",
|
|
"5 NaN NaN NaN\n",
|
|
"6 NaN NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 47
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Indexing, Selecting, Filtering"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Series indexing is similar to NumPy array indexing with the added bonus of being able to use the Series' index values."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 48,
|
|
"text": [
|
|
"a 1\n",
|
|
"b 1\n",
|
|
"c 2\n",
|
|
"d -3\n",
|
|
"e -5\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 48
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select a value from a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[0] == ser_2['a']"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 49,
|
|
"text": [
|
|
"True"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 49
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select a slice from a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[1:4]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 50,
|
|
"text": [
|
|
"b 1\n",
|
|
"c 2\n",
|
|
"d -3\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 50
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select specific values from a Series:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[['b', 'c', 'd']]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 51,
|
|
"text": [
|
|
"b 1\n",
|
|
"c 2\n",
|
|
"d -3\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 51
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select from a Series based on a filter:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2[ser_2 > 0]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 52,
|
|
"text": [
|
|
"a 1\n",
|
|
"b 1\n",
|
|
"c 2\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 52
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select a slice from a Series with labels (note the end point is inclusive):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2['a':'b']"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 53,
|
|
"text": [
|
|
"a 1\n",
|
|
"b 1\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 53
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Assign to a Series slice (note the end point is inclusive):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_2['a':'b'] = 0\n",
|
|
"ser_2"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 54,
|
|
"text": [
|
|
"a 0\n",
|
|
"b 0\n",
|
|
"c 2\n",
|
|
"d -3\n",
|
|
"e -5\n",
|
|
"dtype: int64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 54
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Pandas supports indexing into a DataFrame."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 55,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 VA 5.0 NaN 2012\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"4 MD 4.1 6.1 2015\n",
|
|
"5 NaN NaN NaN NaN\n",
|
|
"6 NaN NaN NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 55
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select specified columns from a DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6[['pop', 'unempl']]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 56,
|
|
"text": [
|
|
" pop unempl\n",
|
|
"0 5.0 NaN\n",
|
|
"1 5.1 NaN\n",
|
|
"2 5.2 6.0\n",
|
|
"3 4.0 6.0\n",
|
|
"4 4.1 6.1\n",
|
|
"5 NaN NaN\n",
|
|
"6 NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 56
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select a slice from a DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6[:2]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 57,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 VA 5.0 NaN 2012\n",
|
|
"1 VA 5.1 NaN 2013"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 57
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select from a DataFrame based on a filter:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6[frame_6['pop'] > 5]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 58,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"2 VA 5.2 6 2014"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 58
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Perform a scalar comparison on a DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6 > 5"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> True</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> True</td>\n",
|
|
" <td> True</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> True</td>\n",
|
|
" <td> True</td>\n",
|
|
" <td> True</td>\n",
|
|
" <td> True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> True</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> True</td>\n",
|
|
" <td> True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> True</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> True</td>\n",
|
|
" <td> True</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" <td> False</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 59,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 True False False True\n",
|
|
"1 True True False True\n",
|
|
"2 True True True True\n",
|
|
"3 True False True True\n",
|
|
"4 True False True True\n",
|
|
"5 False False False False\n",
|
|
"6 False False False False"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 59
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Perform a scalar comparison on a DataFrame, retain the values that pass the filter:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6[frame_6 > 5]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 60,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 VA NaN NaN 2012\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD NaN 6.0 2014\n",
|
|
"4 MD NaN 6.1 2015\n",
|
|
"5 NaN NaN NaN NaN\n",
|
|
"6 NaN NaN NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 60
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select a slice of rows from a DataFrame (note the end point is inclusive):"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6.ix[2:3]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 61,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"2 VA 5.2 6 2014\n",
|
|
"3 MD 4.0 6 2014"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 61
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select a slice of rows from a specific column of a DataFrame:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6.ix[0:2, 'pop']\n",
|
|
"frame_6"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.0</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2012</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.1</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> 2013</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" <td> NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 62,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"0 VA 5.0 NaN 2012\n",
|
|
"1 VA 5.1 NaN 2013\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"4 MD 4.1 6.1 2015\n",
|
|
"5 NaN NaN NaN NaN\n",
|
|
"6 NaN NaN NaN NaN"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 62
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Select rows based on an arithmetic operation on a specific row:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"frame_6.ix[frame_6.unempl > 5.0]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"html": [
|
|
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>pop</th>\n",
|
|
" <th>unempl</th>\n",
|
|
" <th>year</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td> VA</td>\n",
|
|
" <td> 5.2</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.0</td>\n",
|
|
" <td> 6.0</td>\n",
|
|
" <td> 2014</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td> MD</td>\n",
|
|
" <td> 4.1</td>\n",
|
|
" <td> 6.1</td>\n",
|
|
" <td> 2015</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 63,
|
|
"text": [
|
|
" state pop unempl year\n",
|
|
"2 VA 5.2 6.0 2014\n",
|
|
"3 MD 4.0 6.0 2014\n",
|
|
"4 MD 4.1 6.1 2015"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 63
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Arithmetic and Data Alignment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_6 = Series(np.random.randn(5),\n",
|
|
" index=['a', 'b', 'c', 'd', 'e'])\n",
|
|
"ser_6\n"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 64,
|
|
"text": [
|
|
"a -0.224292\n",
|
|
"b -1.166149\n",
|
|
"c -2.078194\n",
|
|
"d 2.060130\n",
|
|
"e 0.292102\n",
|
|
"dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 64
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_7 = Series(np.random.randn(5),\n",
|
|
" index=['a', 'c', 'e', 'f', 'g'])\n",
|
|
"ser_7"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 65,
|
|
"text": [
|
|
"a -0.329560\n",
|
|
"c 1.009019\n",
|
|
"e -2.489898\n",
|
|
"f -0.731142\n",
|
|
"g -0.025979\n",
|
|
"dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 65
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Adding objects results in the union of index pairs if the pairs are not the same, resulting in NAs for indices that do not overlap:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"ser_6 + ser_7"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 66,
|
|
"text": [
|
|
"a -0.553851\n",
|
|
"b NaN\n",
|
|
"c -1.069176\n",
|
|
"d NaN\n",
|
|
"e -2.197796\n",
|
|
"f NaN\n",
|
|
"g NaN\n",
|
|
"dtype: float64"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 66
|
|
}
|
|
],
|
|
"metadata": {}
|
|
}
|
|
]
|
|
} |