From 689d029499dc2ca5a03ecee9affc7edda5cb68be Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Tue, 27 Jan 2015 19:22:37 -0500 Subject: [PATCH] Added Pandas Series snippets. --- README.md | 12 +- pandas/pandas.ipynb | 577 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 584 insertions(+), 5 deletions(-) create mode 100644 pandas/pandas.ipynb diff --git a/README.md b/README.md index 4c87b7a..314b1f1 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,11 @@ IPython Notebooks geared towards Python data analysis (core Python, NumPy, panda * [datetime](http://nbviewer.ipython.org/github/donnemartin/ipython-data-notebooks/blob/master/core/datetime.ipynb) * [unit tests](http://nbviewer.ipython.org/github/donnemartin/ipython-data-notebooks/blob/master/core/unit_tests.ipynb) -## numpy - -[Coming Soon] IPython Notebooks demonstrating NumPy functionality. - ## pandas -[Coming Soon] IPython Notebooks demonstrating pandas functionality. +[Active Development] IPython Notebooks demonstrating pandas functionality. + +* [pandas](http://nbviewer.ipython.org/github/donnemartin/ipython-data-notebooks/blob/master/pandas/pandas.ipynb) ## matplotlib @@ -31,6 +29,10 @@ IPython Notebooks geared towards Python data analysis (core Python, NumPy, panda [Coming Soon] IPython Notebooks demonstrating SciPy functionality. +## numpy + +[Coming Soon] IPython Notebooks demonstrating NumPy functionality. + ##License Copyright 2014 Donne Martin diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb new file mode 100644 index 0000000..3a09034 --- /dev/null +++ b/pandas/pandas.ipynb @@ -0,0 +1,577 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:5af6c8db3042b9d07306a075e560855c3bd9a73234feb466482830d025b58068" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Basics" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from pandas import Series, DataFrame\n", + "import pandas as pd" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Series\n", + "\n", + "A Series is a one-dimensional array-like object containing an array of data and an associated array of data labels. The data can be any NumPy data type and the labels are the Series' index." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_1 = Series([1, 1, 2, -3, -5, 8, 13])\n", + "ser_1" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 2, + "text": [ + "0 1\n", + "1 1\n", + "2 2\n", + "3 -3\n", + "4 -5\n", + "5 8\n", + "6 13\n", + "dtype: int64" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the array representation of a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_1.values" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 3, + "text": [ + "array([ 1, 1, 2, -3, -5, 8, 13])" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the index of the Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_1.index" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 4, + "text": [ + "Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a Series with a custom index:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2 = Series([1, 1, 2, -3, -5], index=['a', 'b', 'c', 'd', 'e'])\n", + "ser_2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 5, + "text": [ + "a 1\n", + "b 1\n", + "c 2\n", + "d -3\n", + "e -5\n", + "dtype: int64" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a value from a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[4] == ser_2['e']" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 6, + "text": [ + "True" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a set of values from a Series by passing in a list:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[['c', 'a', 'b']]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 7, + "text": [ + "c 2\n", + "a 1\n", + "b 1\n", + "dtype: int64" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get values great than 0:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2[ser_2 > 0]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 8, + "text": [ + "a 1\n", + "b 1\n", + "c 2\n", + "dtype: int64" + ] + } + ], + "prompt_number": 8 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scalar multiply:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_2 * 2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 9, + "text": [ + "a 2\n", + "b 2\n", + "c 4\n", + "d -6\n", + "e -10\n", + "dtype: int64" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apply a numpy math function:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "np.exp(ser_2)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 10, + "text": [ + "a 2.718282\n", + "b 2.718282\n", + "c 7.389056\n", + "d 0.049787\n", + "e 0.006738\n", + "dtype: float64" + ] + } + ], + "prompt_number": 10 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A Series is like a fixed-length, ordered dict. \n", + "\n", + "Create a series by passing in a dict:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "dict_1 = {'foo' : 100, 'bar' : 200, 'baz' : 300}\n", + "ser_3 = Series(dict_1)\n", + "ser_3" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 11, + "text": [ + "bar 200\n", + "baz 300\n", + "foo 100\n", + "dtype: int64" + ] + } + ], + "prompt_number": 11 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Re-order a Series by passing in an index (indices not found are NaN):" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "index = ['foo', 'bar', 'baz', 'qux']\n", + "ser_4 = Series(dict_1, index=index)\n", + "ser_4" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 12, + "text": [ + "foo 100\n", + "bar 200\n", + "baz 300\n", + "qux NaN\n", + "dtype: float64" + ] + } + ], + "prompt_number": 12 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check for NaN with the pandas method:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "pd.isnull(ser_4)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 13, + "text": [ + "foo False\n", + "bar False\n", + "baz False\n", + "qux True\n", + "dtype: bool" + ] + } + ], + "prompt_number": 13 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check for NaN with the Series method:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_4.isnull()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 14, + "text": [ + "foo False\n", + "bar False\n", + "baz False\n", + "qux True\n", + "dtype: bool" + ] + } + ], + "prompt_number": 14 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Series automatically aligns differently indexed data in arithmetic operations:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_3 + ser_4" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 15, + "text": [ + "bar 400\n", + "baz 600\n", + "foo 200\n", + "qux NaN\n", + "dtype: float64" + ] + } + ], + "prompt_number": 15 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Name a Series:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_4.name = 'foobarbazqux'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 16 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Name a Series index:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_4.index.name = 'label'" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 17 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_4" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 18, + "text": [ + "label\n", + "foo 100\n", + "bar 200\n", + "baz 300\n", + "qux NaN\n", + "Name: foobarbazqux, dtype: float64" + ] + } + ], + "prompt_number": 18 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rename a Series' index in place:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "ser_4.index = ['fo', 'br', 'bz', 'qx']\n", + "ser_4" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 19, + "text": [ + "fo 100\n", + "br 200\n", + "bz 300\n", + "qx NaN\n", + "Name: foobarbazqux, dtype: float64" + ] + } + ], + "prompt_number": 19 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file