diff --git a/README.md b/README.md index 5e6a131..1e4af19 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ IPython Notebooks geared towards Python data analysis (core Python, NumPy, panda * [data structures](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/structs.ipynb) * [data structure utilities](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/structs_utils.ipynb) +* [functions](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/functions.ipynb) * [datetime](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/datetime.ipynb) * [unit tests](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/unit_tests.ipynb) diff --git a/core/functions.ipynb b/core/functions.ipynb new file mode 100644 index 0000000..8165934 --- /dev/null +++ b/core/functions.ipynb @@ -0,0 +1,159 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:8c91cadf8bbcbcdd5a60fc0a89e964b846a6f5328eaa57d57afbaedda06ad3ca" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Functions as Objects\n", + "* Lambdas\n", + "* Closures\n", + "* \\*args, \\*\\*kwargs\n", + "* Currying\n", + "* Generators\n", + "* Generator Expressions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Functions as Objects" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Python treats functions as objects which can simplify data cleaning" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file transform_util.py\n", + "import re\n", + "\n", + "\n", + "class TransformUtil:\n", + "\n", + " @classmethod\n", + " def remove_punctuation(cls, value):\n", + " \"\"\"Removes !, #, and ?.\n", + " \"\"\" \n", + " return re.sub('[!#?]', '', value) \n", + "\n", + " @classmethod\n", + " def clean_strings(cls, strings, ops): \n", + " \"\"\"General purpose method to clean strings.\n", + "\n", + " Pass in a sequence of strings and the operations to perform.\n", + " \"\"\" \n", + " result = [] \n", + " for value in strings: \n", + " for function in ops: \n", + " value = function(value) \n", + " result.append(value) \n", + " return result" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting transform_util.py\n" + ] + } + ], + "prompt_number": 1 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file tests/test_transform_util.py\n", + "from nose.tools import assert_equal\n", + "from ..transform_util import TransformUtil\n", + "\n", + "\n", + "class TestTransformUtil():\n", + "\n", + " states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \\\n", + " 'FlOrIda', 'south carolina##', 'West virginia?']\n", + " \n", + " expected_output = ['Alabama',\n", + " 'Georgia',\n", + " 'Georgia',\n", + " 'Georgia',\n", + " 'Florida',\n", + " 'South Carolina',\n", + " 'West Virginia']\n", + " \n", + " def test_remove_punctuation(self):\n", + " assert_equal(TransformUtil.remove_punctuation('!#?'), '')\n", + "\n", + " def test_clean_strings(self):\n", + " clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] \n", + " output = TransformUtil.clean_strings(self.states, clean_ops)\n", + " assert_equal(output, self.expected_output)\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting tests/test_transform_util.py\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!nosetests tests/test_transform_util.py -v" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "core.tests.test_transform_util.TestTransformUtil.test_clean_strings ... ok\r\n", + "core.tests.test_transform_util.TestTransformUtil.test_remove_punctuation ... ok\r\n", + "\r\n", + "----------------------------------------------------------------------\r\n", + "Ran 2 tests in 0.001s\r\n", + "\r\n", + "OK\r\n" + ] + } + ], + "prompt_number": 3 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/core/tests/test_transform_util.py b/core/tests/test_transform_util.py new file mode 100644 index 0000000..b21bd2d --- /dev/null +++ b/core/tests/test_transform_util.py @@ -0,0 +1,24 @@ +from nose.tools import assert_equal +from ..transform_util import TransformUtil + + +class TestTransformUtil(): + + states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \ + 'FlOrIda', 'south carolina##', 'West virginia?'] + + expected_output = ['Alabama', + 'Georgia', + 'Georgia', + 'Georgia', + 'Florida', + 'South Carolina', + 'West Virginia'] + + def test_remove_punctuation(self): + assert_equal(TransformUtil.remove_punctuation('!#?'), '') + + def test_clean_strings(self): + clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] + output = TransformUtil.clean_strings(self.states, clean_ops) + assert_equal(output, self.expected_output) \ No newline at end of file diff --git a/core/transform_util.py b/core/transform_util.py new file mode 100644 index 0000000..76a734c --- /dev/null +++ b/core/transform_util.py @@ -0,0 +1,23 @@ +import re + + +class TransformUtil: + + @classmethod + def remove_punctuation(cls, value): + """Removes !, #, and ?. + """ + return re.sub('[!#?]', '', value) + + @classmethod + def clean_strings(cls, strings, ops): + """General purpose method to clean strings. + + Pass in a sequence of strings and the operations to perform. + """ + result = [] + for value in strings: + for function in ops: + value = function(value) + result.append(value) + return result \ No newline at end of file