diff --git a/pandas/pandas_clean.ipynb b/pandas/pandas_clean.ipynb index ff7d3cc..69d8c37 100644 --- a/pandas/pandas_clean.ipynb +++ b/pandas/pandas_clean.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:a3874e8de1273dccf0e0d39f584d1345a1c1d55f2b6c1ad780fedce8f9421bcf" + "signature": "sha256:82be3772f18b2c96a1486b76adf812e774890e366b350dfd5b1257ab4b086b8e" }, "nbformat": 3, "nbformat_minor": 0, @@ -16,7 +16,8 @@ "* Clean\n", "* Transform\n", "* Merge\n", - "* Reshape" + "* Reshape\n", + "* Concatenate" ] }, { @@ -35,7 +36,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Replace all occurrences of a string with another string, in place (no copy):" + "Check for matching values in a specific column for replacement:" ] }, { @@ -113,6 +114,70 @@ ], "prompt_number": 2 }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "df_1[df_1['state'] == 'VA']" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
popstateyear
0 5.0 VA 2012
1 5.1 VA 2013
2 5.2 VA 2014
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 3, + "text": [ + " pop state year\n", + "0 5.0 VA 2012\n", + "1 5.1 VA 2013\n", + "2 5.2 VA 2014" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Replace all occurrences of a string with another string, in place (no copy):" + ] + }, { "cell_type": "code", "collapsed": false, @@ -172,7 +237,7 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 3, + "prompt_number": 4, "text": [ " pop state year\n", "0 5.0 VIRGINIA 2012\n", @@ -183,7 +248,7 @@ ] } ], - "prompt_number": 3 + "prompt_number": 4 }, { "cell_type": "markdown", @@ -196,7 +261,8 @@ "cell_type": "code", "collapsed": false, "input": [ - "df_1.replace({'state' : { 'MD' : 'MARYLAND' }})" + "df_1.replace({'state' : { 'MD' : 'MARYLAND' }})\n", + "df_1" ], "language": "python", "metadata": {}, @@ -250,7 +316,7 @@ ], "metadata": {}, "output_type": "pyout", - "prompt_number": 4, + "prompt_number": 5, "text": [ " pop state year\n", "0 5.0 VIRGINIA 2012\n", @@ -261,7 +327,7 @@ ] } ], - "prompt_number": 4 + "prompt_number": 5 } ], "metadata": {}