From 1bf68e068976591f34a48d42de9ba37ff1b51f92 Mon Sep 17 00:00:00 2001
From: Donne Martin <donne.martin@gmail.com>
Date: Fri, 12 Jun 2015 20:51:00 -0400
Subject: [PATCH] Combined pandas notebooks until pandas I/O and pandas
 cleaning are further developed.

---
 README.md                 |   2 -
 pandas/pandas.ipynb       | 891 +++++++++++++++++++++++++++++++++++++-
 pandas/pandas_clean.ipynb | 591 -------------------------
 pandas/pandas_io.ipynb    | 353 ---------------
 4 files changed, 889 insertions(+), 948 deletions(-)
 delete mode 100644 pandas/pandas_clean.ipynb
 delete mode 100644 pandas/pandas_io.ipynb
diff --git a/README.md b/README.md
index 4bf8f8f..c91a770 100644
--- a/README.md
+++ b/README.md
@@ -125,8 +125,6 @@ IPython Notebook(s) demonstrating pandas functionality.
 | Notebook | Description |
 |--------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [pandas](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/pandas.ipynb) | Software library written for data manipulation and analysis in Python. Offers data structures and operations for manipulating numerical tables and time series. |
-| [pandas io](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/pandas_io.ipynb) | Input and output operations. |
-| [pandas cleaning](http://nbviewer.ipython.org/github/donnemartin/data-science-ipython-notebooks/blob/master/pandas/pandas_clean.ipynb) | Data wrangling operations. |
 
 <br/>
 <p align="center">
diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb
index b0ff2b5..a758c35 100644
--- a/pandas/pandas.ipynb
+++ b/pandas/pandas.ipynb
@@ -15,7 +15,9 @@
     "* Function Application and Mapping\n",
     "* Sorting and Ranking\n",
     "* Axis Indices with Duplicate Values\n",
-    "* Summarizing and Computing Descriptive Statistics"
+    "* Summarizing and Computing Descriptive Statistics\n",
+    "* Cleaning Data (Under Construction)\n",
+    "* Input and Output (Under Construction)"
    ]
   },
   {
@@ -5749,6 +5751,891 @@
    "source": [
     "df_6.sum(axis=1, skipna=False)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleaning Data (Under Construction)\n",
+    "* Replace\n",
+    "* Drop\n",
+    "* Concatenate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from pandas import Series, DataFrame\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Setup a DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td> MD</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td> MD</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population state  year\n",
+       "0         5.0    VA  2012\n",
+       "1         5.1    VA  2013\n",
+       "2         5.2    VA  2014\n",
+       "3         4.0    MD  2014\n",
+       "4         4.1    MD  2015"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_1 = {'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],\n",
+    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
+    "          'population' : [5.0, 5.1, 5.2, 4.0, 4.1]}\n",
+    "df_1 = DataFrame(data_1)\n",
+    "df_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Replace"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Replace all occurrences of a string with another string, in place (no copy):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td>       MD</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td>       MD</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population     state  year\n",
+       "0         5.0  VIRGINIA  2012\n",
+       "1         5.1  VIRGINIA  2013\n",
+       "2         5.2  VIRGINIA  2014\n",
+       "3         4.0        MD  2014\n",
+       "4         4.1        MD  2015"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.replace('VA', 'VIRGINIA', inplace=True)\n",
+    "df_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In a specified column, replace all occurrences of a string with another string, in place (no copy):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population     state  year\n",
+       "0         5.0  VIRGINIA  2012\n",
+       "1         5.1  VIRGINIA  2013\n",
+       "2         5.2  VIRGINIA  2014\n",
+       "3         4.0  MARYLAND  2014\n",
+       "4         4.1  MARYLAND  2015"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.replace({'state' : { 'MD' : 'MARYLAND' }}, inplace=True)\n",
+    "df_1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Drop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Drop the 'population' column and return a copy of the DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      state  year\n",
+       "0  VIRGINIA  2012\n",
+       "1  VIRGINIA  2013\n",
+       "2  VIRGINIA  2014\n",
+       "3  MARYLAND  2014\n",
+       "4  MARYLAND  2015"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_2 = df_1.drop('population', axis=1)\n",
+    "df_2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Concatenate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Concatenate two DataFrames:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 6.0</td>\n",
+       "      <td> NY</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 6.1</td>\n",
+       "      <td> NY</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 6.2</td>\n",
+       "      <td> NY</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 3.0</td>\n",
+       "      <td> FL</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 3.1</td>\n",
+       "      <td> FL</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population state  year\n",
+       "0         6.0    NY  2012\n",
+       "1         6.1    NY  2013\n",
+       "2         6.2    NY  2014\n",
+       "3         3.0    FL  2014\n",
+       "4         3.1    FL  2015"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_2 = {'state' : ['NY', 'NY', 'NY', 'FL', 'FL'],\n",
+    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
+    "          'population' : [6.0, 6.1, 6.2, 3.0, 3.1]}\n",
+    "df_3 = DataFrame(data_2)\n",
+    "df_3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>population</th>\n",
+       "      <th>state</th>\n",
+       "      <th>year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 5.0</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 5.1</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 5.2</td>\n",
+       "      <td> VIRGINIA</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 4.0</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 4.1</td>\n",
+       "      <td> MARYLAND</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 6.0</td>\n",
+       "      <td>       NY</td>\n",
+       "      <td> 2012</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 6.1</td>\n",
+       "      <td>       NY</td>\n",
+       "      <td> 2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 6.2</td>\n",
+       "      <td>       NY</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 3.0</td>\n",
+       "      <td>       FL</td>\n",
+       "      <td> 2014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td> 3.1</td>\n",
+       "      <td>       FL</td>\n",
+       "      <td> 2015</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   population     state  year\n",
+       "0         5.0  VIRGINIA  2012\n",
+       "1         5.1  VIRGINIA  2013\n",
+       "2         5.2  VIRGINIA  2014\n",
+       "3         4.0  MARYLAND  2014\n",
+       "4         4.1  MARYLAND  2015\n",
+       "0         6.0        NY  2012\n",
+       "1         6.1        NY  2013\n",
+       "2         6.2        NY  2014\n",
+       "3         3.0        FL  2014\n",
+       "4         3.1        FL  2015"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_4 = pd.concat([df_1, df_3])\n",
+    "df_4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input and Output (Under Construction)\n",
+    "* Reading\n",
+    "* Writing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from pandas import Series, DataFrame\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Reading"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read data from a CSV file into a DataFrame (use sep='\\t' for TSV):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "df_1 = pd.read_csv(\"../data/ozone.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get a summary of the DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Ozone</th>\n",
+       "      <th>Solar.R</th>\n",
+       "      <th>Wind</th>\n",
+       "      <th>Temp</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>Day</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td> 116.000000</td>\n",
+       "      <td> 146.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "      <td> 153.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>  42.129310</td>\n",
+       "      <td> 185.931507</td>\n",
+       "      <td>   9.957516</td>\n",
+       "      <td>  77.882353</td>\n",
+       "      <td>   6.993464</td>\n",
+       "      <td>  15.803922</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>  32.987885</td>\n",
+       "      <td>  90.058422</td>\n",
+       "      <td>   3.523001</td>\n",
+       "      <td>   9.465270</td>\n",
+       "      <td>   1.416522</td>\n",
+       "      <td>   8.864520</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>   1.000000</td>\n",
+       "      <td>   7.000000</td>\n",
+       "      <td>   1.700000</td>\n",
+       "      <td>  56.000000</td>\n",
+       "      <td>   5.000000</td>\n",
+       "      <td>   1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>  18.000000</td>\n",
+       "      <td> 115.750000</td>\n",
+       "      <td>   7.400000</td>\n",
+       "      <td>  72.000000</td>\n",
+       "      <td>   6.000000</td>\n",
+       "      <td>   8.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>  31.500000</td>\n",
+       "      <td> 205.000000</td>\n",
+       "      <td>   9.700000</td>\n",
+       "      <td>  79.000000</td>\n",
+       "      <td>   7.000000</td>\n",
+       "      <td>  16.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>  63.250000</td>\n",
+       "      <td> 258.750000</td>\n",
+       "      <td>  11.500000</td>\n",
+       "      <td>  85.000000</td>\n",
+       "      <td>   8.000000</td>\n",
+       "      <td>  23.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td> 168.000000</td>\n",
+       "      <td> 334.000000</td>\n",
+       "      <td>  20.700000</td>\n",
+       "      <td>  97.000000</td>\n",
+       "      <td>   9.000000</td>\n",
+       "      <td>  31.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            Ozone     Solar.R        Wind        Temp       Month         Day\n",
+       "count  116.000000  146.000000  153.000000  153.000000  153.000000  153.000000\n",
+       "mean    42.129310  185.931507    9.957516   77.882353    6.993464   15.803922\n",
+       "std     32.987885   90.058422    3.523001    9.465270    1.416522    8.864520\n",
+       "min      1.000000    7.000000    1.700000   56.000000    5.000000    1.000000\n",
+       "25%     18.000000  115.750000    7.400000   72.000000    6.000000    8.000000\n",
+       "50%     31.500000  205.000000    9.700000   79.000000    7.000000   16.000000\n",
+       "75%     63.250000  258.750000   11.500000   85.000000    8.000000   23.000000\n",
+       "max    168.000000  334.000000   20.700000   97.000000    9.000000   31.000000"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "List the first five rows of the DataFrame:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Ozone</th>\n",
+       "      <th>Solar.R</th>\n",
+       "      <th>Wind</th>\n",
+       "      <th>Temp</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>Day</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td> 41</td>\n",
+       "      <td> 190</td>\n",
+       "      <td>  7.4</td>\n",
+       "      <td> 67</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td> 36</td>\n",
+       "      <td> 118</td>\n",
+       "      <td>  8.0</td>\n",
+       "      <td> 72</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td> 12</td>\n",
+       "      <td> 149</td>\n",
+       "      <td> 12.6</td>\n",
+       "      <td> 74</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td> 18</td>\n",
+       "      <td> 313</td>\n",
+       "      <td> 11.5</td>\n",
+       "      <td> 62</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td> NaN</td>\n",
+       "      <td> 14.3</td>\n",
+       "      <td> 56</td>\n",
+       "      <td> 5</td>\n",
+       "      <td> 5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Ozone  Solar.R  Wind  Temp  Month  Day\n",
+       "0     41      190   7.4    67      5    1\n",
+       "1     36      118   8.0    72      5    2\n",
+       "2     12      149  12.6    74      5    3\n",
+       "3     18      313  11.5    62      5    4\n",
+       "4    NaN      NaN  14.3    56      5    5"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_1.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Writing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a copy of the CSV file, encoded in UTF-8 and hiding the index and header labels:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "df_1.to_csv('../data/ozone_copy.csv', \n",
+    "            encoding='utf-8', \n",
+    "            index=False, \n",
+    "            header=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "View the data directory:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "total 16\r\n",
+      "-rw-r--r--@ 1 dmartin  1443163707  2902 Dec 26  2012 ozone.csv\r\n",
+      "-rw-r--r--  1 dmartin  1443163707  3324 Feb 14 06:40 ozone_copy.csv\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls -l ../data/"
+   ]
   }
  ],
  "metadata": {
@@ -5767,7 +6654,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.9"
+   "version": "2.7.10"
   }
  },
  "nbformat": 4,
diff --git a/pandas/pandas_clean.ipynb b/pandas/pandas_clean.ipynb
deleted file mode 100644
index fec0430..0000000
--- a/pandas/pandas_clean.ipynb
+++ /dev/null
@@ -1,591 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Pandas Cleaning\n",
-    "* Replace\n",
-    "* Drop\n",
-    "* Concatenate"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from pandas import Series, DataFrame\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Setup a DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td> MD</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td> MD</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population state  year\n",
-       "0         5.0    VA  2012\n",
-       "1         5.1    VA  2013\n",
-       "2         5.2    VA  2014\n",
-       "3         4.0    MD  2014\n",
-       "4         4.1    MD  2015"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data_1 = {'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],\n",
-    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
-    "          'population' : [5.0, 5.1, 5.2, 4.0, 4.1]}\n",
-    "df_1 = DataFrame(data_1)\n",
-    "df_1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Replace"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Replace all occurrences of a string with another string, in place (no copy):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td>       MD</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td>       MD</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population     state  year\n",
-       "0         5.0  VIRGINIA  2012\n",
-       "1         5.1  VIRGINIA  2013\n",
-       "2         5.2  VIRGINIA  2014\n",
-       "3         4.0        MD  2014\n",
-       "4         4.1        MD  2015"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.replace('VA', 'VIRGINIA', inplace=True)\n",
-    "df_1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In a specified column, replace all occurrences of a string with another string, in place (no copy):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population     state  year\n",
-       "0         5.0  VIRGINIA  2012\n",
-       "1         5.1  VIRGINIA  2013\n",
-       "2         5.2  VIRGINIA  2014\n",
-       "3         4.0  MARYLAND  2014\n",
-       "4         4.1  MARYLAND  2015"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.replace({'state' : { 'MD' : 'MARYLAND' }}, inplace=True)\n",
-    "df_1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Drop"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Drop the 'population' column and return a copy of the DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      state  year\n",
-       "0  VIRGINIA  2012\n",
-       "1  VIRGINIA  2013\n",
-       "2  VIRGINIA  2014\n",
-       "3  MARYLAND  2014\n",
-       "4  MARYLAND  2015"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_2 = df_1.drop('population', axis=1)\n",
-    "df_2"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Concatenate"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Concatenate two DataFrames:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 6.0</td>\n",
-       "      <td> NY</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 6.1</td>\n",
-       "      <td> NY</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 6.2</td>\n",
-       "      <td> NY</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 3.0</td>\n",
-       "      <td> FL</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 3.1</td>\n",
-       "      <td> FL</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population state  year\n",
-       "0         6.0    NY  2012\n",
-       "1         6.1    NY  2013\n",
-       "2         6.2    NY  2014\n",
-       "3         3.0    FL  2014\n",
-       "4         3.1    FL  2015"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data_2 = {'state' : ['NY', 'NY', 'NY', 'FL', 'FL'],\n",
-    "          'year' : [2012, 2013, 2014, 2014, 2015],\n",
-    "          'population' : [6.0, 6.1, 6.2, 3.0, 3.1]}\n",
-    "df_3 = DataFrame(data_2)\n",
-    "df_3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>population</th>\n",
-       "      <th>state</th>\n",
-       "      <th>year</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 5.0</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 5.1</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 5.2</td>\n",
-       "      <td> VIRGINIA</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 4.0</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 4.1</td>\n",
-       "      <td> MARYLAND</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 6.0</td>\n",
-       "      <td>       NY</td>\n",
-       "      <td> 2012</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 6.1</td>\n",
-       "      <td>       NY</td>\n",
-       "      <td> 2013</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 6.2</td>\n",
-       "      <td>       NY</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 3.0</td>\n",
-       "      <td>       FL</td>\n",
-       "      <td> 2014</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td> 3.1</td>\n",
-       "      <td>       FL</td>\n",
-       "      <td> 2015</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   population     state  year\n",
-       "0         5.0  VIRGINIA  2012\n",
-       "1         5.1  VIRGINIA  2013\n",
-       "2         5.2  VIRGINIA  2014\n",
-       "3         4.0  MARYLAND  2014\n",
-       "4         4.1  MARYLAND  2015\n",
-       "0         6.0        NY  2012\n",
-       "1         6.1        NY  2013\n",
-       "2         6.2        NY  2014\n",
-       "3         3.0        FL  2014\n",
-       "4         3.1        FL  2015"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_4 = pd.concat([df_1, df_3])\n",
-    "df_4"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/pandas/pandas_io.ipynb b/pandas/pandas_io.ipynb
deleted file mode 100644
index b9fa5cb..0000000
--- a/pandas/pandas_io.ipynb
+++ /dev/null
@@ -1,353 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Pandas I/O\n",
-    "* Reading\n",
-    "* Writing"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "from pandas import Series, DataFrame\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Reading"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Read data from a CSV file into a DataFrame (use sep='\\t' for TSV):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "df_1 = pd.read_csv(\"../data/ozone.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Get a summary of the DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Ozone</th>\n",
-       "      <th>Solar.R</th>\n",
-       "      <th>Wind</th>\n",
-       "      <th>Temp</th>\n",
-       "      <th>Month</th>\n",
-       "      <th>Day</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>count</th>\n",
-       "      <td> 116.000000</td>\n",
-       "      <td> 146.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "      <td> 153.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mean</th>\n",
-       "      <td>  42.129310</td>\n",
-       "      <td> 185.931507</td>\n",
-       "      <td>   9.957516</td>\n",
-       "      <td>  77.882353</td>\n",
-       "      <td>   6.993464</td>\n",
-       "      <td>  15.803922</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>std</th>\n",
-       "      <td>  32.987885</td>\n",
-       "      <td>  90.058422</td>\n",
-       "      <td>   3.523001</td>\n",
-       "      <td>   9.465270</td>\n",
-       "      <td>   1.416522</td>\n",
-       "      <td>   8.864520</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>min</th>\n",
-       "      <td>   1.000000</td>\n",
-       "      <td>   7.000000</td>\n",
-       "      <td>   1.700000</td>\n",
-       "      <td>  56.000000</td>\n",
-       "      <td>   5.000000</td>\n",
-       "      <td>   1.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25%</th>\n",
-       "      <td>  18.000000</td>\n",
-       "      <td> 115.750000</td>\n",
-       "      <td>   7.400000</td>\n",
-       "      <td>  72.000000</td>\n",
-       "      <td>   6.000000</td>\n",
-       "      <td>   8.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50%</th>\n",
-       "      <td>  31.500000</td>\n",
-       "      <td> 205.000000</td>\n",
-       "      <td>   9.700000</td>\n",
-       "      <td>  79.000000</td>\n",
-       "      <td>   7.000000</td>\n",
-       "      <td>  16.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>75%</th>\n",
-       "      <td>  63.250000</td>\n",
-       "      <td> 258.750000</td>\n",
-       "      <td>  11.500000</td>\n",
-       "      <td>  85.000000</td>\n",
-       "      <td>   8.000000</td>\n",
-       "      <td>  23.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>max</th>\n",
-       "      <td> 168.000000</td>\n",
-       "      <td> 334.000000</td>\n",
-       "      <td>  20.700000</td>\n",
-       "      <td>  97.000000</td>\n",
-       "      <td>   9.000000</td>\n",
-       "      <td>  31.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            Ozone     Solar.R        Wind        Temp       Month         Day\n",
-       "count  116.000000  146.000000  153.000000  153.000000  153.000000  153.000000\n",
-       "mean    42.129310  185.931507    9.957516   77.882353    6.993464   15.803922\n",
-       "std     32.987885   90.058422    3.523001    9.465270    1.416522    8.864520\n",
-       "min      1.000000    7.000000    1.700000   56.000000    5.000000    1.000000\n",
-       "25%     18.000000  115.750000    7.400000   72.000000    6.000000    8.000000\n",
-       "50%     31.500000  205.000000    9.700000   79.000000    7.000000   16.000000\n",
-       "75%     63.250000  258.750000   11.500000   85.000000    8.000000   23.000000\n",
-       "max    168.000000  334.000000   20.700000   97.000000    9.000000   31.000000"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.describe()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "List the first five rows of the DataFrame:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Ozone</th>\n",
-       "      <th>Solar.R</th>\n",
-       "      <th>Wind</th>\n",
-       "      <th>Temp</th>\n",
-       "      <th>Month</th>\n",
-       "      <th>Day</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td> 41</td>\n",
-       "      <td> 190</td>\n",
-       "      <td>  7.4</td>\n",
-       "      <td> 67</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td> 36</td>\n",
-       "      <td> 118</td>\n",
-       "      <td>  8.0</td>\n",
-       "      <td> 72</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td> 12</td>\n",
-       "      <td> 149</td>\n",
-       "      <td> 12.6</td>\n",
-       "      <td> 74</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td> 18</td>\n",
-       "      <td> 313</td>\n",
-       "      <td> 11.5</td>\n",
-       "      <td> 62</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td> NaN</td>\n",
-       "      <td> 14.3</td>\n",
-       "      <td> 56</td>\n",
-       "      <td> 5</td>\n",
-       "      <td> 5</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Ozone  Solar.R  Wind  Temp  Month  Day\n",
-       "0     41      190   7.4    67      5    1\n",
-       "1     36      118   8.0    72      5    2\n",
-       "2     12      149  12.6    74      5    3\n",
-       "3     18      313  11.5    62      5    4\n",
-       "4    NaN      NaN  14.3    56      5    5"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df_1.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Writing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create a copy of the CSV file, encoded in UTF-8 and hiding the index and header labels:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "df_1.to_csv('../data/ozone_copy.csv', \n",
-    "            encoding='utf-8', \n",
-    "            index=False, \n",
-    "            header=False)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "View the data directory:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "total 16\r\n",
-      "-rw-r--r--@ 1 dmartin  1443163707  2902 Dec 26  2012 ozone.csv\r\n",
-      "-rw-r--r--  1 dmartin  1443163707  3324 Feb 14 06:40 ozone_copy.csv\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!ls -l ../data/"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}

	population	state	year
0	5.0	VIRGINIA	2012
1	5.1	VIRGINIA	2013
2	5.2	VIRGINIA	2014
3	4.0	MD	2014
4	4.1	MD	2015
	Ozone	Solar.R	Wind	Temp	Month	Day
count	116.000000	146.000000	153.000000	153.000000	153.000000	153.000000
mean	42.129310	185.931507	9.957516	77.882353	6.993464	15.803922
std	32.987885	90.058422	3.523001	9.465270	1.416522	8.864520
min	1.000000	7.000000	1.700000	56.000000	5.000000	1.000000
25%	18.000000	115.750000	7.400000	72.000000	6.000000	8.000000
50%	31.500000	205.000000	9.700000	79.000000	7.000000	16.000000
75%	63.250000	258.750000	11.500000	85.000000	8.000000	23.000000
max	168.000000	334.000000	20.700000	97.000000	9.000000	31.000000
	Ozone	Solar.R	Wind	Temp	Month	Day
0	41	190	7.4	67	5	1
1	36	118	8.0	72	5	2
2	12	149	12.6	74	5	3
3	18	313	11.5	62	5	4
4	NaN	NaN	14.3	56	5	5