Combined pandas notebooks until pandas I/O and pandas cleaning are further developed.

2024-03-22 13:30:56 +08:00 · 2015-06-12 20:51:00 -04:00 · 2015-06-12 20:51:00 -04:00 · 1bf68e0689
commit 1bf68e0689
parent 0f7fa880a5
4 changed files with 889 additions and 948 deletions
--- a/README.md
+++ b/README.md
@ -125,8 +125,6 @@ IPython Notebook(s) demonstrating pandas functionality.
 | Notebook | Description |
 |--------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [pandas](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/pandas.ipynb) | Software library written for data manipulation and analysis in Python. Offers data structures and operations for manipulating numerical tables and time series. |
-| [pandas io](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/pandas_io.ipynb) | Input and output operations. |
-| [pandas cleaning](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/pandas_clean.ipynb) | Data wrangling operations. |

 <br/>
 <p align="center">
--- a/pandas/pandas.ipynb
+++ b/pandas/pandas.ipynb
@ -15,7 +15,9 @@
    "* Function Application and Mapping\n",
    "* Sorting and Ranking\n",
    "* Axis Indices with Duplicate Values\n",
-    "* Summarizing and Computing Descriptive Statistics"
+    "* Summarizing and Computing Descriptive Statistics\n",
+    "* Cleaning Data (Under Construction)\n",
+    "* Input and Output (Under Construction)"
   ]
  },
  {
@ -5749,6 +5751,891 @@
   "source": [
    "df_6.sum(axis=1, skipna=False)"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleaning Data (Under Construction)\n",
+    "* Replace\n",
+    "* Drop\n",
+    "* Concatenate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from pandas import Series, DataFrame\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Setup a DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td> MD</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td> MD</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population state  year\n",
+       "0         5.0    VA  2012\n",
+       "1         5.1    VA  2013\n",
+       "2         5.2    VA  2014\n",
+       "3         4.0    MD  2014\n",
+       "4         4.1    MD  2015"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_1 = {'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],\n",
+    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
+    "          'population' : [5.0, 5.1, 5.2, 4.0, 4.1]}\n",
+    "df_1 = DataFrame(data_1)\n",
+    "df_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Replace"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Replace all occurrences of a string with another string, in place (no copy):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td>       MD</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td>       MD</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population     state  year\n",
+       "0         5.0  VIRGINIA  2012\n",
+       "1         5.1  VIRGINIA  2013\n",
+       "2         5.2  VIRGINIA  2014\n",
+       "3         4.0        MD  2014\n",
+       "4         4.1        MD  2015"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.replace('VA', 'VIRGINIA', inplace=True)\n",
+    "df_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In a specified column, replace all occurrences of a string with another string, in place (no copy):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population     state  year\n",
+       "0         5.0  VIRGINIA  2012\n",
+       "1         5.1  VIRGINIA  2013\n",
+       "2         5.2  VIRGINIA  2014\n",
+       "3         4.0  MARYLAND  2014\n",
+       "4         4.1  MARYLAND  2015"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.replace({'state' : { 'MD' : 'MARYLAND' }}, inplace=True)\n",
+    "df_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Drop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Drop the 'population' column and return a copy of the DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      state  year\n",
+       "0  VIRGINIA  2012\n",
+       "1  VIRGINIA  2013\n",
+       "2  VIRGINIA  2014\n",
+       "3  MARYLAND  2014\n",
+       "4  MARYLAND  2015"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_2 = df_1.drop('population', axis=1)\n",
+    "df_2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Concatenate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Concatenate two DataFrames:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 6.0</td>\n",
+       "      <td> NY</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 6.1</td>\n",
+       "      <td> NY</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 6.2</td>\n",
+       "      <td> NY</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 3.0</td>\n",
+       "      <td> FL</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 3.1</td>\n",
+       "      <td> FL</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population state  year\n",
+       "0         6.0    NY  2012\n",
+       "1         6.1    NY  2013\n",
+       "2         6.2    NY  2014\n",
+       "3         3.0    FL  2014\n",
+       "4         3.1    FL  2015"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_2 = {'state' : ['NY', 'NY', 'NY', 'FL', 'FL'],\n",
+    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
+    "          'population' : [6.0, 6.1, 6.2, 3.0, 3.1]}\n",
+    "df_3 = DataFrame(data_2)\n",
+    "df_3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 6.0</td>\n",
+       "      <td>       NY</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 6.1</td>\n",
+       "      <td>       NY</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 6.2</td>\n",
+       "      <td>       NY</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 3.0</td>\n",
+       "      <td>       FL</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 3.1</td>\n",
+       "      <td>       FL</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population     state  year\n",
+       "0         5.0  VIRGINIA  2012\n",
+       "1         5.1  VIRGINIA  2013\n",
+       "2         5.2  VIRGINIA  2014\n",
+       "3         4.0  MARYLAND  2014\n",
+       "4         4.1  MARYLAND  2015\n",
+       "0         6.0        NY  2012\n",
+       "1         6.1        NY  2013\n",
+       "2         6.2        NY  2014\n",
+       "3         3.0        FL  2014\n",
+       "4         3.1        FL  2015"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_4 = pd.concat([df_1, df_3])\n",
+    "df_4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input and Output (Under Construction)\n",
+    "* Reading\n",
+    "* Writing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from pandas import Series, DataFrame\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Reading"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read data from a CSV file into a DataFrame (use sep='\\t' for TSV):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "df_1 = pd.read_csv(\"../data/ozone.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get a summary of the DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Ozone</th>\n",
+       "      <th>Solar.R</th>\n",
+       "      <th>Wind</th>\n",
+       "      <th>Temp</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>Day</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td> 116.000000</td>\n",
+       "      <td> 146.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>  42.129310</td>\n",
+       "      <td> 185.931507</td>\n",
+       "      <td>   9.957516</td>\n",
+       "      <td>  77.882353</td>\n",
+       "      <td>   6.993464</td>\n",
+       "      <td>  15.803922</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>  32.987885</td>\n",
+       "      <td>  90.058422</td>\n",
+       "      <td>   3.523001</td>\n",
+       "      <td>   9.465270</td>\n",
+       "      <td>   1.416522</td>\n",
+       "      <td>   8.864520</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>   1.000000</td>\n",
+       "      <td>   7.000000</td>\n",
+       "      <td>   1.700000</td>\n",
+       "      <td>  56.000000</td>\n",
+       "      <td>   5.000000</td>\n",
+       "      <td>   1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>  18.000000</td>\n",
+       "      <td> 115.750000</td>\n",
+       "      <td>   7.400000</td>\n",
+       "      <td>  72.000000</td>\n",
+       "      <td>   6.000000</td>\n",
+       "      <td>   8.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>  31.500000</td>\n",
+       "      <td> 205.000000</td>\n",
+       "      <td>   9.700000</td>\n",
+       "      <td>  79.000000</td>\n",
+       "      <td>   7.000000</td>\n",
+       "      <td>  16.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>  63.250000</td>\n",
+       "      <td> 258.750000</td>\n",
+       "      <td>  11.500000</td>\n",
+       "      <td>  85.000000</td>\n",
+       "      <td>   8.000000</td>\n",
+       "      <td>  23.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td> 168.000000</td>\n",
+       "      <td> 334.000000</td>\n",
+       "      <td>  20.700000</td>\n",
+       "      <td>  97.000000</td>\n",
+       "      <td>   9.000000</td>\n",
+       "      <td>  31.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            Ozone     Solar.R        Wind        Temp       Month         Day\n",
+       "count  116.000000  146.000000  153.000000  153.000000  153.000000  153.000000\n",
+       "mean    42.129310  185.931507    9.957516   77.882353    6.993464   15.803922\n",
+       "std     32.987885   90.058422    3.523001    9.465270    1.416522    8.864520\n",
+       "min      1.000000    7.000000    1.700000   56.000000    5.000000    1.000000\n",
+       "25%     18.000000  115.750000    7.400000   72.000000    6.000000    8.000000\n",
+       "50%     31.500000  205.000000    9.700000   79.000000    7.000000   16.000000\n",
+       "75%     63.250000  258.750000   11.500000   85.000000    8.000000   23.000000\n",
+       "max    168.000000  334.000000   20.700000   97.000000    9.000000   31.000000"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "List the first five rows of the DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Ozone</th>\n",
+       "      <th>Solar.R</th>\n",
+       "      <th>Wind</th>\n",
+       "      <th>Temp</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>Day</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 41</td>\n",
+       "      <td> 190</td>\n",
+       "      <td>  7.4</td>\n",
+       "      <td> 67</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 36</td>\n",
+       "      <td> 118</td>\n",
+       "      <td>  8.0</td>\n",
+       "      <td> 72</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 12</td>\n",
+       "      <td> 149</td>\n",
+       "      <td> 12.6</td>\n",
+       "      <td> 74</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 18</td>\n",
+       "      <td> 313</td>\n",
+       "      <td> 11.5</td>\n",
+       "      <td> 62</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td> NaN</td>\n",
+       "      <td> 14.3</td>\n",
+       "      <td> 56</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Ozone  Solar.R  Wind  Temp  Month  Day\n",
+       "0     41      190   7.4    67      5    1\n",
+       "1     36      118   8.0    72      5    2\n",
+       "2     12      149  12.6    74      5    3\n",
+       "3     18      313  11.5    62      5    4\n",
+       "4    NaN      NaN  14.3    56      5    5"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Writing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a copy of the CSV file, encoded in UTF-8 and hiding the index and header labels:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "df_1.to_csv('../data/ozone_copy.csv', \n",
+    "            encoding='utf-8', \n",
+    "            index=False, \n",
+    "            header=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "View the data directory:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 16\r\n",
+      "-rw-r--r--@ 1 dmartin  1443163707  2902 Dec 26  2012 ozone.csv\r\n",
+      "-rw-r--r--  1 dmartin  1443163707  3324 Feb 14 06:40 ozone_copy.csv\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls -l ../data/"
+   ]
  }
 ],
 "metadata": {
@ -5767,7 +6654,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
+   "version": "2.7.10"
  }
 },
 "nbformat": 4,
--- a/pandas/pandas_clean.ipynb
+++ b/pandas/pandas_clean.ipynb
@ -1,591 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Pandas Cleaning\n",
-    "* Replace\n",
-    "* Drop\n",
-    "* Concatenate"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from pandas import Series, DataFrame\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Setup a DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td> MD</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td> MD</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population state  year\n",
-       "0         5.0    VA  2012\n",
-       "1         5.1    VA  2013\n",
-       "2         5.2    VA  2014\n",
-       "3         4.0    MD  2014\n",
-       "4         4.1    MD  2015"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data_1 = {'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],\n",
-    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
-    "          'population' : [5.0, 5.1, 5.2, 4.0, 4.1]}\n",
-    "df_1 = DataFrame(data_1)\n",
-    "df_1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Replace"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Replace all occurrences of a string with another string, in place (no copy):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td>       MD</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td>       MD</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population     state  year\n",
-       "0         5.0  VIRGINIA  2012\n",
-       "1         5.1  VIRGINIA  2013\n",
-       "2         5.2  VIRGINIA  2014\n",
-       "3         4.0        MD  2014\n",
-       "4         4.1        MD  2015"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.replace('VA', 'VIRGINIA', inplace=True)\n",
-    "df_1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In a specified column, replace all occurrences of a string with another string, in place (no copy):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population     state  year\n",
-       "0         5.0  VIRGINIA  2012\n",
-       "1         5.1  VIRGINIA  2013\n",
-       "2         5.2  VIRGINIA  2014\n",
-       "3         4.0  MARYLAND  2014\n",
-       "4         4.1  MARYLAND  2015"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.replace({'state' : { 'MD' : 'MARYLAND' }}, inplace=True)\n",
-    "df_1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Drop"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Drop the 'population' column and return a copy of the DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      state  year\n",
-       "0  VIRGINIA  2012\n",
-       "1  VIRGINIA  2013\n",
-       "2  VIRGINIA  2014\n",
-       "3  MARYLAND  2014\n",
-       "4  MARYLAND  2015"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_2 = df_1.drop('population', axis=1)\n",
-    "df_2"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Concatenate"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Concatenate two DataFrames:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 6.0</td>\n",
-       "      <td> NY</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 6.1</td>\n",
-       "      <td> NY</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 6.2</td>\n",
-       "      <td> NY</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 3.0</td>\n",
-       "      <td> FL</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 3.1</td>\n",
-       "      <td> FL</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population state  year\n",
-       "0         6.0    NY  2012\n",
-       "1         6.1    NY  2013\n",
-       "2         6.2    NY  2014\n",
-       "3         3.0    FL  2014\n",
-       "4         3.1    FL  2015"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data_2 = {'state' : ['NY', 'NY', 'NY', 'FL', 'FL'],\n",
-    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
-    "          'population' : [6.0, 6.1, 6.2, 3.0, 3.1]}\n",
-    "df_3 = DataFrame(data_2)\n",
-    "df_3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 6.0</td>\n",
-       "      <td>       NY</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 6.1</td>\n",
-       "      <td>       NY</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 6.2</td>\n",
-       "      <td>       NY</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 3.0</td>\n",
-       "      <td>       FL</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 3.1</td>\n",
-       "      <td>       FL</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population     state  year\n",
-       "0         5.0  VIRGINIA  2012\n",
-       "1         5.1  VIRGINIA  2013\n",
-       "2         5.2  VIRGINIA  2014\n",
-       "3         4.0  MARYLAND  2014\n",
-       "4         4.1  MARYLAND  2015\n",
-       "0         6.0        NY  2012\n",
-       "1         6.1        NY  2013\n",
-       "2         6.2        NY  2014\n",
-       "3         3.0        FL  2014\n",
-       "4         3.1        FL  2015"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_4 = pd.concat([df_1, df_3])\n",
-    "df_4"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
--- a/pandas/pandas_io.ipynb
+++ b/pandas/pandas_io.ipynb
@ -1,353 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Pandas I/O\n",
-    "* Reading\n",
-    "* Writing"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from pandas import Series, DataFrame\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Reading"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Read data from a CSV file into a DataFrame (use sep='\\t' for TSV):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "df_1 = pd.read_csv(\"../data/ozone.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Get a summary of the DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Ozone</th>\n",
-       "      <th>Solar.R</th>\n",
-       "      <th>Wind</th>\n",
-       "      <th>Temp</th>\n",
-       "      <th>Month</th>\n",
-       "      <th>Day</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>count</th>\n",
-       "      <td> 116.000000</td>\n",
-       "      <td> 146.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mean</th>\n",
-       "      <td>  42.129310</td>\n",
-       "      <td> 185.931507</td>\n",
-       "      <td>   9.957516</td>\n",
-       "      <td>  77.882353</td>\n",
-       "      <td>   6.993464</td>\n",
-       "      <td>  15.803922</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>std</th>\n",
-       "      <td>  32.987885</td>\n",
-       "      <td>  90.058422</td>\n",
-       "      <td>   3.523001</td>\n",
-       "      <td>   9.465270</td>\n",
-       "      <td>   1.416522</td>\n",
-       "      <td>   8.864520</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>min</th>\n",
-       "      <td>   1.000000</td>\n",
-       "      <td>   7.000000</td>\n",
-       "      <td>   1.700000</td>\n",
-       "      <td>  56.000000</td>\n",
-       "      <td>   5.000000</td>\n",
-       "      <td>   1.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25%</th>\n",
-       "      <td>  18.000000</td>\n",
-       "      <td> 115.750000</td>\n",
-       "      <td>   7.400000</td>\n",
-       "      <td>  72.000000</td>\n",
-       "      <td>   6.000000</td>\n",
-       "      <td>   8.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50%</th>\n",
-       "      <td>  31.500000</td>\n",
-       "      <td> 205.000000</td>\n",
-       "      <td>   9.700000</td>\n",
-       "      <td>  79.000000</td>\n",
-       "      <td>   7.000000</td>\n",
-       "      <td>  16.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>75%</th>\n",
-       "      <td>  63.250000</td>\n",
-       "      <td> 258.750000</td>\n",
-       "      <td>  11.500000</td>\n",
-       "      <td>  85.000000</td>\n",
-       "      <td>   8.000000</td>\n",
-       "      <td>  23.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>max</th>\n",
-       "      <td> 168.000000</td>\n",
-       "      <td> 334.000000</td>\n",
-       "      <td>  20.700000</td>\n",
-       "      <td>  97.000000</td>\n",
-       "      <td>   9.000000</td>\n",
-       "      <td>  31.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            Ozone     Solar.R        Wind        Temp       Month         Day\n",
-       "count  116.000000  146.000000  153.000000  153.000000  153.000000  153.000000\n",
-       "mean    42.129310  185.931507    9.957516   77.882353    6.993464   15.803922\n",
-       "std     32.987885   90.058422    3.523001    9.465270    1.416522    8.864520\n",
-       "min      1.000000    7.000000    1.700000   56.000000    5.000000    1.000000\n",
-       "25%     18.000000  115.750000    7.400000   72.000000    6.000000    8.000000\n",
-       "50%     31.500000  205.000000    9.700000   79.000000    7.000000   16.000000\n",
-       "75%     63.250000  258.750000   11.500000   85.000000    8.000000   23.000000\n",
-       "max    168.000000  334.000000   20.700000   97.000000    9.000000   31.000000"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.describe()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "List the first five rows of the DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Ozone</th>\n",
-       "      <th>Solar.R</th>\n",
-       "      <th>Wind</th>\n",
-       "      <th>Temp</th>\n",
-       "      <th>Month</th>\n",
-       "      <th>Day</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 41</td>\n",
-       "      <td> 190</td>\n",
-       "      <td>  7.4</td>\n",
-       "      <td> 67</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 36</td>\n",
-       "      <td> 118</td>\n",
-       "      <td>  8.0</td>\n",
-       "      <td> 72</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 12</td>\n",
-       "      <td> 149</td>\n",
-       "      <td> 12.6</td>\n",
-       "      <td> 74</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 18</td>\n",
-       "      <td> 313</td>\n",
-       "      <td> 11.5</td>\n",
-       "      <td> 62</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td> NaN</td>\n",
-       "      <td> 14.3</td>\n",
-       "      <td> 56</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 5</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Ozone  Solar.R  Wind  Temp  Month  Day\n",
-       "0     41      190   7.4    67      5    1\n",
-       "1     36      118   8.0    72      5    2\n",
-       "2     12      149  12.6    74      5    3\n",
-       "3     18      313  11.5    62      5    4\n",
-       "4    NaN      NaN  14.3    56      5    5"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Writing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create a copy of the CSV file, encoded in UTF-8 and hiding the index and header labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "df_1.to_csv('../data/ozone_copy.csv', \n",
-    "            encoding='utf-8', \n",
-    "            index=False, \n",
-    "            header=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "View the data directory:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "total 16\r\n",
-      "-rw-r--r--@ 1 dmartin  1443163707  2902 Dec 26  2012 ozone.csv\r\n",
-      "-rw-r--r--  1 dmartin  1443163707  3324 Feb 14 06:40 ozone_copy.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!ls -l ../data/"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}