diff --git a/pandas/pandas_clean.ipynb b/pandas/pandas_clean.ipynb index 42b5484..221366f 100644 --- a/pandas/pandas_clean.ipynb +++ b/pandas/pandas_clean.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:1c8b7cab9b55eb5888612d0b5149649565c258456e73e61b039225439aa11502" + "signature": "sha256:b619f1fd1f2d4495d6a2fe9d048c09b7319b119d4e10a5b2348f0ac6f380a27c" }, "nbformat": 3, "nbformat_minor": 0, @@ -45,7 +45,7 @@ "input": [ "data_1 = {'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],\n", " 'year' : [2012, 2013, 2014, 2014, 2015],\n", - " 'pop' : [5.0, 5.1, 5.2, 4.0, 4.1]}\n", + " 'population' : [5.0, 5.1, 5.2, 4.0, 4.1]}\n", "df_1 = DataFrame(data_1)\n", "df_1" ], @@ -59,7 +59,7 @@ " \n", " \n", " \n", - " pop\n", + " population\n", " state\n", " year\n", " \n", @@ -103,12 +103,12 @@ "output_type": "pyout", "prompt_number": 2, "text": [ - " pop state year\n", - "0 5.0 VA 2012\n", - "1 5.1 VA 2013\n", - "2 5.2 VA 2014\n", - "3 4.0 MD 2014\n", - "4 4.1 MD 2015" + " population state year\n", + "0 5.0 VA 2012\n", + "1 5.1 VA 2013\n", + "2 5.2 VA 2014\n", + "3 4.0 MD 2014\n", + "4 4.1 MD 2015" ] } ], @@ -130,7 +130,7 @@ " \n", " \n", " \n", - " pop\n", + " population\n", " state\n", " year\n", " \n", @@ -162,10 +162,10 @@ "output_type": "pyout", "prompt_number": 3, "text": [ - " pop state year\n", - "0 5.0 VA 2012\n", - "1 5.1 VA 2013\n", - "2 5.2 VA 2014" + " population state year\n", + "0 5.0 VA 2012\n", + "1 5.1 VA 2013\n", + "2 5.2 VA 2014" ] } ], @@ -195,7 +195,7 @@ " \n", " \n", " \n", - " pop\n", + " population\n", " state\n", " year\n", " \n", @@ -239,12 +239,12 @@ "output_type": "pyout", "prompt_number": 4, "text": [ - " pop state year\n", - "0 5.0 VIRGINIA 2012\n", - "1 5.1 VIRGINIA 2013\n", - "2 5.2 VIRGINIA 2014\n", - "3 4.0 MD 2014\n", - "4 4.1 MD 2015" + " population state year\n", + "0 5.0 VIRGINIA 2012\n", + "1 5.1 VIRGINIA 2013\n", + "2 5.2 VIRGINIA 2014\n", + "3 4.0 MD 2014\n", + "4 4.1 MD 2015" ] } ], @@ -274,7 +274,7 @@ " \n", " \n", " \n", - " pop\n", + " population\n", " state\n", " year\n", " \n", @@ -318,17 +318,90 @@ "output_type": "pyout", "prompt_number": 5, "text": [ - " pop state year\n", - "0 5.0 VIRGINIA 2012\n", - "1 5.1 VIRGINIA 2013\n", - "2 5.2 VIRGINIA 2014\n", - "3 4.0 MARYLAND 2014\n", - "4 4.1 MARYLAND 2015" + " population state year\n", + "0 5.0 VIRGINIA 2012\n", + "1 5.1 VIRGINIA 2013\n", + "2 5.2 VIRGINIA 2014\n", + "3 4.0 MARYLAND 2014\n", + "4 4.1 MARYLAND 2015" ] } ], "prompt_number": 5 }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Drop the 'population' column and return a copy of the DataFrame:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_2 = df_1.drop('population', axis=1)\n", + "df_2" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateyear
0 VIRGINIA 2012
1 VIRGINIA 2013
2 VIRGINIA 2014
3 MARYLAND 2014
4 MARYLAND 2015
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 6, + "text": [ + " state year\n", + "0 VIRGINIA 2012\n", + "1 VIRGINIA 2013\n", + "2 VIRGINIA 2014\n", + "3 MARYLAND 2014\n", + "4 MARYLAND 2015" + ] + } + ], + "prompt_number": 6 + }, { "cell_type": "markdown", "metadata": {}, @@ -342,9 +415,9 @@ "input": [ "data_2 = {'state' : ['NY', 'NY', 'NY', 'FL', 'FL'],\n", " 'year' : [2012, 2013, 2014, 2014, 2015],\n", - " 'pop' : [6.0, 6.1, 6.2, 3.0, 3.1]}\n", - "df_2 = DataFrame(data_2)\n", - "df_2" + " 'population' : [6.0, 6.1, 6.2, 3.0, 3.1]}\n", + "df_3 = DataFrame(data_2)\n", + "df_3" ], "language": "python", "metadata": {}, @@ -356,7 +429,7 @@ " \n", " \n", " \n", - " pop\n", + " population\n", " state\n", " year\n", " \n", @@ -398,25 +471,25 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 6, + "prompt_number": 7, "text": [ - " pop state year\n", - "0 6.0 NY 2012\n", - "1 6.1 NY 2013\n", - "2 6.2 NY 2014\n", - "3 3.0 FL 2014\n", - "4 3.1 FL 2015" + " population state year\n", + "0 6.0 NY 2012\n", + "1 6.1 NY 2013\n", + "2 6.2 NY 2014\n", + "3 3.0 FL 2014\n", + "4 3.1 FL 2015" ] } ], - "prompt_number": 6 + "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ - "df_3 = pd.concat([df_1, df_2])\n", - "df_3" + "df_4 = pd.concat([df_1, df_3])\n", + "df_4" ], "language": "python", "metadata": {}, @@ -428,7 +501,7 @@ " \n", " \n", " \n", - " pop\n", + " population\n", " state\n", " year\n", " \n", @@ -500,23 +573,23 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 7, + "prompt_number": 8, "text": [ - " pop state year\n", - "0 5.0 VIRGINIA 2012\n", - "1 5.1 VIRGINIA 2013\n", - "2 5.2 VIRGINIA 2014\n", - "3 4.0 MARYLAND 2014\n", - "4 4.1 MARYLAND 2015\n", - "0 6.0 NY 2012\n", - "1 6.1 NY 2013\n", - "2 6.2 NY 2014\n", - "3 3.0 FL 2014\n", - "4 3.1 FL 2015" + " population state year\n", + "0 5.0 VIRGINIA 2012\n", + "1 5.1 VIRGINIA 2013\n", + "2 5.2 VIRGINIA 2014\n", + "3 4.0 MARYLAND 2014\n", + "4 4.1 MARYLAND 2015\n", + "0 6.0 NY 2012\n", + "1 6.1 NY 2013\n", + "2 6.2 NY 2014\n", + "3 3.0 FL 2014\n", + "4 3.1 FL 2015" ] } ], - "prompt_number": 7 + "prompt_number": 8 }, { "cell_type": "code", @@ -525,7 +598,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 7 + "prompt_number": 8 } ], "metadata": {}