data-science-ipython-notebooks/pandas/pandas_io.ipynb

325 lines
9.1 KiB
Plaintext
Raw Normal View History

{
"metadata": {
"name": "",
"signature": "sha256:da65d8daa07d931a0cc29a47c0b548fe12fdef8b547eb73d08edd31f44a3df38"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pandas I/O\n",
"* Reading\n",
"* Writing"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pandas import Series, DataFrame\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read data from a CSV file into a DataFrame (use sep='\\t' for TSV):"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_1 = pd.read_csv(\"../data/ozone.csv\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get a summary of the DataFrame:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_1.describe()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ozone</th>\n",
" <th>Solar.R</th>\n",
" <th>Wind</th>\n",
" <th>Temp</th>\n",
" <th>Month</th>\n",
" <th>Day</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td> 116.000000</td>\n",
" <td> 146.000000</td>\n",
" <td> 153.000000</td>\n",
" <td> 153.000000</td>\n",
" <td> 153.000000</td>\n",
" <td> 153.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 42.129310</td>\n",
" <td> 185.931507</td>\n",
" <td> 9.957516</td>\n",
" <td> 77.882353</td>\n",
" <td> 6.993464</td>\n",
" <td> 15.803922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 32.987885</td>\n",
" <td> 90.058422</td>\n",
" <td> 3.523001</td>\n",
" <td> 9.465270</td>\n",
" <td> 1.416522</td>\n",
" <td> 8.864520</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 1.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 1.700000</td>\n",
" <td> 56.000000</td>\n",
" <td> 5.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 18.000000</td>\n",
" <td> 115.750000</td>\n",
" <td> 7.400000</td>\n",
" <td> 72.000000</td>\n",
" <td> 6.000000</td>\n",
" <td> 8.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 31.500000</td>\n",
" <td> 205.000000</td>\n",
" <td> 9.700000</td>\n",
" <td> 79.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 16.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 63.250000</td>\n",
" <td> 258.750000</td>\n",
" <td> 11.500000</td>\n",
" <td> 85.000000</td>\n",
" <td> 8.000000</td>\n",
" <td> 23.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 168.000000</td>\n",
" <td> 334.000000</td>\n",
" <td> 20.700000</td>\n",
" <td> 97.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 31.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
" Ozone Solar.R Wind Temp Month Day\n",
"count 116.000000 146.000000 153.000000 153.000000 153.000000 153.000000\n",
"mean 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922\n",
"std 32.987885 90.058422 3.523001 9.465270 1.416522 8.864520\n",
"min 1.000000 7.000000 1.700000 56.000000 5.000000 1.000000\n",
"25% 18.000000 115.750000 7.400000 72.000000 6.000000 8.000000\n",
"50% 31.500000 205.000000 9.700000 79.000000 7.000000 16.000000\n",
"75% 63.250000 258.750000 11.500000 85.000000 8.000000 23.000000\n",
"max 168.000000 334.000000 20.700000 97.000000 9.000000 31.000000"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the first five rows of the DataFrame:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_1.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ozone</th>\n",
" <th>Solar.R</th>\n",
" <th>Wind</th>\n",
" <th>Temp</th>\n",
" <th>Month</th>\n",
" <th>Day</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 41</td>\n",
" <td> 190</td>\n",
" <td> 7.4</td>\n",
" <td> 67</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 36</td>\n",
" <td> 118</td>\n",
" <td> 8.0</td>\n",
" <td> 72</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 12</td>\n",
" <td> 149</td>\n",
" <td> 12.6</td>\n",
" <td> 74</td>\n",
" <td> 5</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 18</td>\n",
" <td> 313</td>\n",
" <td> 11.5</td>\n",
" <td> 62</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td> NaN</td>\n",
" <td> 14.3</td>\n",
" <td> 56</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
" Ozone Solar.R Wind Temp Month Day\n",
"0 41 190 7.4 67 5 1\n",
"1 36 118 8.0 72 5 2\n",
"2 12 149 12.6 74 5 3\n",
"3 18 313 11.5 62 5 4\n",
"4 NaN NaN 14.3 56 5 5"
]
}
],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a copy of the CSV file, encoded in UTF-8 and hiding the index and header labels:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df_1.to_csv('../data/ozone_copy.csv', \n",
" encoding='utf-8', \n",
" index=False, \n",
" header=False)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View the data directory:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!ls -l ../data/"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"total 16\r\n",
"-rw-r--r--@ 1 dmartin 1443163707 2902 Dec 26 2012 ozone.csv\r\n",
"-rw-r--r-- 1 dmartin 1443163707 3324 Feb 14 06:40 ozone_copy.csv\r\n"
]
}
],
"prompt_number": 6
}
],
"metadata": {}
}
]
}