From 7563eeae21e3e030882f1f8b15bb59499a34f0fc Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Sun, 15 Feb 2015 17:48:13 -0500 Subject: [PATCH] Added code to read CSV data to Pandas, describe, list head, then write the CSV to another file --- pandas/pandas_io.ipynb | 325 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 pandas/pandas_io.ipynb diff --git a/pandas/pandas_io.ipynb b/pandas/pandas_io.ipynb new file mode 100644 index 0000000..0afa320 --- /dev/null +++ b/pandas/pandas_io.ipynb @@ -0,0 +1,325 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:da65d8daa07d931a0cc29a47c0b548fe12fdef8b547eb73d08edd31f44a3df38" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas I/O\n", + "* Reading\n", + "* Writing" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from pandas import Series, DataFrame\n", + "import pandas as pd" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read data from a CSV file into a DataFrame (use sep='\\t' for TSV):" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_1 = pd.read_csv(\"../data/ozone.csv\")" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a summary of the DataFrame:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_1.describe()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OzoneSolar.RWindTempMonthDay
count 116.000000 146.000000 153.000000 153.000000 153.000000 153.000000
mean 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922
std 32.987885 90.058422 3.523001 9.465270 1.416522 8.864520
min 1.000000 7.000000 1.700000 56.000000 5.000000 1.000000
25% 18.000000 115.750000 7.400000 72.000000 6.000000 8.000000
50% 31.500000 205.000000 9.700000 79.000000 7.000000 16.000000
75% 63.250000 258.750000 11.500000 85.000000 8.000000 23.000000
max 168.000000 334.000000 20.700000 97.000000 9.000000 31.000000
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 3, + "text": [ + " Ozone Solar.R Wind Temp Month Day\n", + "count 116.000000 146.000000 153.000000 153.000000 153.000000 153.000000\n", + "mean 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922\n", + "std 32.987885 90.058422 3.523001 9.465270 1.416522 8.864520\n", + "min 1.000000 7.000000 1.700000 56.000000 5.000000 1.000000\n", + "25% 18.000000 115.750000 7.400000 72.000000 6.000000 8.000000\n", + "50% 31.500000 205.000000 9.700000 79.000000 7.000000 16.000000\n", + "75% 63.250000 258.750000 11.500000 85.000000 8.000000 23.000000\n", + "max 168.000000 334.000000 20.700000 97.000000 9.000000 31.000000" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List the first five rows of the DataFrame:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_1.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OzoneSolar.RWindTempMonthDay
0 41 190 7.4 67 5 1
1 36 118 8.0 72 5 2
2 12 149 12.6 74 5 3
3 18 313 11.5 62 5 4
4NaN NaN 14.3 56 5 5
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 4, + "text": [ + " Ozone Solar.R Wind Temp Month Day\n", + "0 41 190 7.4 67 5 1\n", + "1 36 118 8.0 72 5 2\n", + "2 12 149 12.6 74 5 3\n", + "3 18 313 11.5 62 5 4\n", + "4 NaN NaN 14.3 56 5 5" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a copy of the CSV file, encoded in UTF-8 and hiding the index and header labels:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_1.to_csv('../data/ozone_copy.csv', \n", + " encoding='utf-8', \n", + " index=False, \n", + " header=False)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 5 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "View the data directory:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!ls -l ../data/" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "total 16\r\n", + "-rw-r--r--@ 1 dmartin 1443163707 2902 Dec 26 2012 ozone.csv\r\n", + "-rw-r--r-- 1 dmartin 1443163707 3324 Feb 14 06:40 ozone_copy.csv\r\n" + ] + } + ], + "prompt_number": 6 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file