diff --git a/pandas/pandas.ipynb b/pandas/pandas.ipynb
index 46f32ac..c0c9169 100644
--- a/pandas/pandas.ipynb
+++ b/pandas/pandas.ipynb
@@ -1,7 +1,7 @@
{
"metadata": {
"name": "",
- "signature": "sha256:a0f056a23acdf795f7de8660469dcb0323a659e710786e7d33a158a6b72c6c03"
+ "signature": "sha256:364642bdaf4f304a679c7bbde135fea5a7e31eb8da19fb9b24192091e3e34cb0"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -22,7 +22,8 @@
"* Arithmetic and Data Alignment\n",
"* Function Application and Mapping\n",
"* Sorting and Ranking\n",
- "* Axis Indices with Duplicate Values"
+ "* Axis Indices with Duplicate Values\n",
+ "* Summarizing and Computing Descriptive Statistics"
]
},
{
@@ -5354,6 +5355,238 @@
}
],
"prompt_number": 102
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summarizing and Computing Descriptive Statistics"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Unlike NumPy arrays, Pandas descriptive statistics automatically exclude missing data. NaN values are excluded unless the entire row or column is NA."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_6"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " state | \n",
+ " pop | \n",
+ " unempl | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " VA | \n",
+ " 5.0 | \n",
+ " NaN | \n",
+ " 2012 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " VA | \n",
+ " 5.1 | \n",
+ " NaN | \n",
+ " 2013 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " VA | \n",
+ " 5.2 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " MD | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " MD | \n",
+ " 4.1 | \n",
+ " 6.1 | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 120,
+ "text": [
+ " state pop unempl year\n",
+ "0 VA 5.0 NaN 2012\n",
+ "1 VA 5.1 NaN 2013\n",
+ "2 VA 5.2 6.0 2014\n",
+ "3 MD 4.0 6.0 2014\n",
+ "4 MD 4.1 6.1 2015\n",
+ "5 NaN NaN NaN NaN\n",
+ "6 NaN NaN NaN NaN"
+ ]
+ }
+ ],
+ "prompt_number": 120
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_6.sum()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 112,
+ "text": [
+ "pop 23.4\n",
+ "unempl 18.1\n",
+ "year 10068.0\n",
+ "dtype: float64"
+ ]
+ }
+ ],
+ "prompt_number": 112
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Sum over the rows:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_6.sum(axis=1)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 113,
+ "text": [
+ "0 2017.0\n",
+ "1 2018.1\n",
+ "2 2025.2\n",
+ "3 2024.0\n",
+ "4 2025.2\n",
+ "5 NaN\n",
+ "6 NaN\n",
+ "dtype: float64"
+ ]
+ }
+ ],
+ "prompt_number": 113
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Account for NaNs:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_6.sum(axis=1, skipna=False)"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "metadata": {},
+ "output_type": "pyout",
+ "prompt_number": 115,
+ "text": [
+ "0 NaN\n",
+ "1 NaN\n",
+ "2 2025.2\n",
+ "3 2024.0\n",
+ "4 2025.2\n",
+ "5 NaN\n",
+ "6 NaN\n",
+ "dtype: float64"
+ ]
+ }
+ ],
+ "prompt_number": 115
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [
+ "df_6.idxmax()"
+ ],
+ "language": "python",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "could not convert string to float: MD",
+ "output_type": "pyerr",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_6\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0midxmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;32m/Users/dmartin/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36midxmax\u001b[0;34m(self, axis, skipna)\u001b[0m\n\u001b[1;32m 4197\u001b[0m \"\"\"\n\u001b[1;32m 4198\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis_number\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4199\u001b[0;31m \u001b[0mindices\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnanops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnanargmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4200\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4201\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mNA\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/Users/dmartin/anaconda/lib/python2.7/site-packages/pandas/core/nanops.pyc\u001b[0m in \u001b[0;36mnanargmax\u001b[0;34m(values, axis, skipna)\u001b[0m\n\u001b[1;32m 432\u001b[0m \"\"\"\n\u001b[1;32m 433\u001b[0m values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf',\n\u001b[0;32m--> 434\u001b[0;31m isfinite=True)\n\u001b[0m\u001b[1;32m 435\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 436\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_maybe_arg_null_out\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/Users/dmartin/anaconda/lib/python2.7/site-packages/pandas/core/nanops.pyc\u001b[0m in \u001b[0;36m_get_values\u001b[0;34m(values, skipna, fill_value, fill_value_typ, isfinite, copy)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_values_from_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misfinite\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 159\u001b[0;31m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_isfinite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 160\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[0mmask\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0misnull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/Users/dmartin/anaconda/lib/python2.7/site-packages/pandas/core/nanops.pyc\u001b[0m in \u001b[0;36m_isfinite\u001b[0;34m(values)\u001b[0m\n\u001b[1;32m 200\u001b[0m is_integer_dtype(values) or is_bool_dtype(values)):\n\u001b[1;32m 201\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m~\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfinite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 202\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;34m~\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfinite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'float64'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 203\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mValueError\u001b[0m: could not convert string to float: MD"
+ ]
+ }
+ ],
+ "prompt_number": 117
+ },
+ {
+ "cell_type": "code",
+ "collapsed": false,
+ "input": [],
+ "language": "python",
+ "metadata": {},
+ "outputs": []
}
],
"metadata": {}