{ "metadata": { "name": "", "signature": "sha256:1d555e34f97d4a24383bba48a1c34b1526e08e18276d519d2c8afaf3ff0550f4" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pandas\n", "\n", "* Series\n", "* DataFrame" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from pandas import Series, DataFrame\n", "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Series\n", "\n", "A Series is a one-dimensional array-like object containing an array of data and an associated array of data labels. The data can be any NumPy data type and the labels are the Series' index." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_1 = Series([1, 1, 2, -3, -5, 8, 13])\n", "ser_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ "0 1\n", "1 1\n", "2 2\n", "3 -3\n", "4 -5\n", "5 8\n", "6 13\n", "dtype: int64" ] } ], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get the array representation of a Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_1.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "array([ 1, 1, 2, -3, -5, 8, 13])" ] } ], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get the index of the Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_1.index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')" ] } ], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a Series with a custom index:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_2 = Series([1, 1, 2, -3, -5], index=['a', 'b', 'c', 'd', 'e'])\n", "ser_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "a 1\n", "b 1\n", "c 2\n", "d -3\n", "e -5\n", "dtype: int64" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get a value from a Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_2[4] == ser_2['e']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "True" ] } ], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get a set of values from a Series by passing in a list:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_2[['c', 'a', 'b']]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "c 2\n", "a 1\n", "b 1\n", "dtype: int64" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get values great than 0:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_2[ser_2 > 0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "a 1\n", "b 1\n", "c 2\n", "dtype: int64" ] } ], "prompt_number": 8 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Scalar multiply:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_2 * 2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "a 2\n", "b 2\n", "c 4\n", "d -6\n", "e -10\n", "dtype: int64" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Apply a numpy math function:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "np.exp(ser_2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "a 2.718282\n", "b 2.718282\n", "c 7.389056\n", "d 0.049787\n", "e 0.006738\n", "dtype: float64" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "A Series is like a fixed-length, ordered dict. \n", "\n", "Create a series by passing in a dict:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1 = {'foo' : 100, 'bar' : 200, 'baz' : 300}\n", "ser_3 = Series(dict_1)\n", "ser_3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "bar 200\n", "baz 300\n", "foo 100\n", "dtype: int64" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Re-order a Series by passing in an index (indices not found are NaN):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "index = ['foo', 'bar', 'baz', 'qux']\n", "ser_4 = Series(dict_1, index=index)\n", "ser_4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "foo 100\n", "bar 200\n", "baz 300\n", "qux NaN\n", "dtype: float64" ] } ], "prompt_number": 12 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check for NaN with the pandas method:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.isnull(ser_4)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "foo False\n", "bar False\n", "baz False\n", "qux True\n", "dtype: bool" ] } ], "prompt_number": 13 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check for NaN with the Series method:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_4.isnull()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "foo False\n", "bar False\n", "baz False\n", "qux True\n", "dtype: bool" ] } ], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Series automatically aligns differently indexed data in arithmetic operations:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_3 + ser_4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 15, "text": [ "bar 400\n", "baz 600\n", "foo 200\n", "qux NaN\n", "dtype: float64" ] } ], "prompt_number": 15 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Name a Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_4.name = 'foobarbazqux'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Name a Series index:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_4.index.name = 'label'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "ser_4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 18, "text": [ "label\n", "foo 100\n", "bar 200\n", "baz 300\n", "qux NaN\n", "Name: foobarbazqux, dtype: float64" ] } ], "prompt_number": 18 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rename a Series' index in place:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser_4.index = ['fo', 'br', 'bz', 'qx']\n", "ser_4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ "fo 100\n", "br 200\n", "bz 300\n", "qx NaN\n", "Name: foobarbazqux, dtype: float64" ] } ], "prompt_number": 19 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DataFrame\n", "\n", "A DataFrame is a tabular data structure containing an ordered collection of columns. Each column can have a different type. DataFrames have both row and column indices and is analogous to a dict of Series. Row and column operations are treated roughly symmetrically. Columns returned when indexing a DataFrame are views of the underlying data, not a copy. To obtain a copy, use the Series' copy method.\n", "\n", "Create a DataFrame:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data_1 = {'state': ['VA', 'VA', 'VA', 'MD', 'MD'],\n", " 'year': [2012, 2013, 2014, 2014, 2015],\n", " 'pop': [5.0, 5.1, 5.2, 4.0, 4.1]}\n", "frame_1 = DataFrame(data_1)\n", "frame_1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
popstateyear
0 5.0 VA 2012
1 5.1 VA 2013
2 5.2 VA 2014
3 4.0 MD 2014
4 4.1 MD 2015
\n", "

5 rows \u00d7 3 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ " pop state year\n", "0 5.0 VA 2012\n", "1 5.1 VA 2013\n", "2 5.2 VA 2014\n", "3 4.0 MD 2014\n", "4 4.1 MD 2015\n", "\n", "[5 rows x 3 columns]" ] } ], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a DataFrame specifying a sequence of columns:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_2 = DataFrame(data_1, columns=['year', 'state', 'pop'])\n", "frame_2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepop
0 2012 VA 5.0
1 2013 VA 5.1
2 2014 VA 5.2
3 2014 MD 4.0
4 2015 MD 4.1
\n", "

5 rows \u00d7 3 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 21, "text": [ " year state pop\n", "0 2012 VA 5.0\n", "1 2013 VA 5.1\n", "2 2014 VA 5.2\n", "3 2014 MD 4.0\n", "4 2015 MD 4.1\n", "\n", "[5 rows x 3 columns]" ] } ], "prompt_number": 21 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Like Series, columns that are not present in the data are NaN:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3 = DataFrame(data_1, columns=['year', 'state', 'pop', 'unempl'])\n", "frame_3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopunempl
0 2012 VA 5.0 NaN
1 2013 VA 5.1 NaN
2 2014 VA 5.2 NaN
3 2014 MD 4.0 NaN
4 2015 MD 4.1 NaN
\n", "

5 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 22, "text": [ " year state pop unempl\n", "0 2012 VA 5.0 NaN\n", "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 NaN\n", "3 2014 MD 4.0 NaN\n", "4 2015 MD 4.1 NaN\n", "\n", "[5 rows x 4 columns]" ] } ], "prompt_number": 22 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Retrieve a column by key, returning a Series:\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3['state']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 23, "text": [ "0 VA\n", "1 VA\n", "2 VA\n", "3 MD\n", "4 MD\n", "Name: state, dtype: object" ] } ], "prompt_number": 23 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Retrive a column by attribute, returning a Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3.year" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 24, "text": [ "0 2012\n", "1 2013\n", "2 2014\n", "3 2014\n", "4 2015\n", "Name: year, dtype: int64" ] } ], "prompt_number": 24 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Retrieve a row by position:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3.ix[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ "year 2012\n", "state VA\n", "pop 5\n", "unempl NaN\n", "Name: 0, dtype: object" ] } ], "prompt_number": 25 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Update a column by assignment:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3['unempl'] = np.arange(5)\n", "frame_3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopunempl
0 2012 VA 5.0 0
1 2013 VA 5.1 1
2 2014 VA 5.2 2
3 2014 MD 4.0 3
4 2015 MD 4.1 4
\n", "

5 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ " year state pop unempl\n", "0 2012 VA 5.0 0\n", "1 2013 VA 5.1 1\n", "2 2014 VA 5.2 2\n", "3 2014 MD 4.0 3\n", "4 2015 MD 4.1 4\n", "\n", "[5 rows x 4 columns]" ] } ], "prompt_number": 26 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Assign a Series to a column (note if assigning a list or array, the length must match the DataFrame, unlike a Series):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "unempl = Series([6.0, 6.0, 6.1], index=[2, 3, 4])\n", "frame_3['unempl'] = unempl\n", "frame_3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopunempl
0 2012 VA 5.0 NaN
1 2013 VA 5.1 NaN
2 2014 VA 5.2 6.0
3 2014 MD 4.0 6.0
4 2015 MD 4.1 6.1
\n", "

5 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 27, "text": [ " year state pop unempl\n", "0 2012 VA 5.0 NaN\n", "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 6.0\n", "3 2014 MD 4.0 6.0\n", "4 2015 MD 4.1 6.1\n", "\n", "[5 rows x 4 columns]" ] } ], "prompt_number": 27 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Assign a new column that doesn't exist to create a new column:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3['state_dup'] = frame_3['state']\n", "frame_3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopunemplstate_dup
0 2012 VA 5.0 NaN VA
1 2013 VA 5.1 NaN VA
2 2014 VA 5.2 6.0 VA
3 2014 MD 4.0 6.0 MD
4 2015 MD 4.1 6.1 MD
\n", "

5 rows \u00d7 5 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 28, "text": [ " year state pop unempl state_dup\n", "0 2012 VA 5.0 NaN VA\n", "1 2013 VA 5.1 NaN VA\n", "2 2014 VA 5.2 6.0 VA\n", "3 2014 MD 4.0 6.0 MD\n", "4 2015 MD 4.1 6.1 MD\n", "\n", "[5 rows x 5 columns]" ] } ], "prompt_number": 28 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Delete a column:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "del frame_3['state_dup']\n", "frame_3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopunempl
0 2012 VA 5.0 NaN
1 2013 VA 5.1 NaN
2 2014 VA 5.2 6.0
3 2014 MD 4.0 6.0
4 2015 MD 4.1 6.1
\n", "

5 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 29, "text": [ " year state pop unempl\n", "0 2012 VA 5.0 NaN\n", "1 2013 VA 5.1 NaN\n", "2 2014 VA 5.2 6.0\n", "3 2014 MD 4.0 6.0\n", "4 2015 MD 4.1 6.1\n", "\n", "[5 rows x 4 columns]" ] } ], "prompt_number": 29 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a DataFrame from a nested dict of dicts (the keys in the inner dicts are unioned and sorted to form the index in the result, unless an explicit index is specified):" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pop = {'VA' : {2013 : 5.1, 2014 : 5.2},\n", " 'MD' : {2014 : 4.0, 2015 : 4.1}}\n", "frame_4 = DataFrame(pop)\n", "frame_4" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MDVA
2013 NaN 5.1
2014 4.0 5.2
2015 4.1 NaN
\n", "

3 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ " MD VA\n", "2013 NaN 5.1\n", "2014 4.0 5.2\n", "2015 4.1 NaN\n", "\n", "[3 rows x 2 columns]" ] } ], "prompt_number": 30 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Transpose the DataFrame:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_4.T" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
201320142015
MD NaN 4.0 4.1
VA 5.1 5.2 NaN
\n", "

2 rows \u00d7 3 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 31, "text": [ " 2013 2014 2015\n", "MD NaN 4.0 4.1\n", "VA 5.1 5.2 NaN\n", "\n", "[2 rows x 3 columns]" ] } ], "prompt_number": 31 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a DataFrame from a dict of Series:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data_2 = {'VA' : frame_4['VA'][1:],\n", " 'MD' : frame_4['MD'][2:]}\n", "frame_5 = DataFrame(data_2)\n", "frame_5" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MDVA
2014 NaN 5.2
2015 4.1 NaN
\n", "

2 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 32, "text": [ " MD VA\n", "2014 NaN 5.2\n", "2015 4.1 NaN\n", "\n", "[2 rows x 2 columns]" ] } ], "prompt_number": 32 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set the DataFrame index name:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_5.index.name = 'year'\n", "frame_5" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MDVA
year
2014 NaN 5.2
2015 4.1 NaN
\n", "

2 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 37, "text": [ " MD VA\n", "year \n", "2014 NaN 5.2\n", "2015 4.1 NaN\n", "\n", "[2 rows x 2 columns]" ] } ], "prompt_number": 37 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set the DataFrame columns name:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_5.columns.name = 'state'\n", "frame_5" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateMDVA
year
2014 NaN 5.2
2015 4.1 NaN
\n", "

2 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 38, "text": [ "state MD VA\n", "year \n", "2014 NaN 5.2\n", "2015 4.1 NaN\n", "\n", "[2 rows x 2 columns]" ] } ], "prompt_number": 38 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Return the data contained in a DataFrame as a 2D ndarray:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_5.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 39, "text": [ "array([[ nan, 5.2],\n", " [ 4.1, nan]])" ] } ], "prompt_number": 39 }, { "cell_type": "markdown", "metadata": {}, "source": [ "If the columns are different dtypes, the 2D ndarray's dtype will accomodate all of the columns:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame_3.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 41, "text": [ "array([[2012, 'VA', 5.0, nan],\n", " [2013, 'VA', 5.1, nan],\n", " [2014, 'VA', 5.2, 6.0],\n", " [2014, 'MD', 4.0, 6.0],\n", " [2015, 'MD', 4.1, 6.1]], dtype=object)" ] } ], "prompt_number": 41 } ], "metadata": {} } ] }