data-science-ipython-notebooks/core/functions.ipynb

{
 "metadata": {
  "name": "",
  "signature": "sha256:a3a2ee34a40ca6d18902bc7ba52393ce71aa5dda1937d48cdd0db3044dc235bf"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Functions"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "* Functions as Objects\n",
      "* Lambda Functions\n",
      "* Closures\n",
      "* \\*args, \\*\\*kwargs\n",
      "* Currying\n",
      "* Generators\n",
      "* Generator Expressions"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Functions as Objects"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Python treats functions as objects which can simplify data cleaning"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%file transform_util.py\n",
      "import re\n",
      "\n",
      "\n",
      "class TransformUtil:\n",
      "\n",
      "    @classmethod\n",
      "    def remove_punctuation(cls, value):\n",
      "        \"\"\"Removes !, #, and ?.\n",
      "        \"\"\"        \n",
      "        return re.sub('[!#?]', '', value) \n",
      "\n",
      "    @classmethod\n",
      "    def clean_strings(cls, strings, ops): \n",
      "        \"\"\"General purpose method to clean strings.\n",
      "\n",
      "        Pass in a sequence of strings and the operations to perform.\n",
      "        \"\"\"        \n",
      "        result = [] \n",
      "        for value in strings: \n",
      "            for function in ops: \n",
      "                value = function(value) \n",
      "            result.append(value) \n",
      "        return result"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Overwriting transform_util.py\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%file tests/test_transform_util.py\n",
      "from nose.tools import assert_equal\n",
      "from ..transform_util import TransformUtil\n",
      "\n",
      "\n",
      "class TestTransformUtil():\n",
      "\n",
      "    states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \\\n",
      "          'FlOrIda', 'south carolina##', 'West virginia?']\n",
      "    \n",
      "    expected_output = ['Alabama',\n",
      "                       'Georgia',\n",
      "                       'Georgia',\n",
      "                       'Georgia',\n",
      "                       'Florida',\n",
      "                       'South Carolina',\n",
      "                       'West Virginia']\n",
      "    \n",
      "    def test_remove_punctuation(self):\n",
      "        assert_equal(TransformUtil.remove_punctuation('!#?'), '')\n",
      "        \n",
      "    def test_map_remove_punctuation(self):\n",
      "        # Map applies a function to a collection\n",
      "        output = map(TransformUtil.remove_punctuation, self.states)\n",
      "        assert_equal('!#?' not in output, True)\n",
      "\n",
      "    def test_clean_strings(self):\n",
      "        clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] \n",
      "        output = TransformUtil.clean_strings(self.states, clean_ops)\n",
      "        assert_equal(output, self.expected_output)\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Overwriting tests/test_transform_util.py\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!nosetests tests/test_transform_util.py -v"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "core.tests.test_transform_util.TestTransformUtil.test_clean_strings ... ok\r\n",
        "core.tests.test_transform_util.TestTransformUtil.test_map_remove_punctuation ... ok\r\n",
        "core.tests.test_transform_util.TestTransformUtil.test_remove_punctuation ... ok\r\n",
        "\r\n",
        "----------------------------------------------------------------------\r\n",
        "Ran 3 tests in 0.001s\r\n",
        "\r\n",
        "OK\r\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Lambda Functions"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Lambda functions are anonymous function and are convenient for data analysis, as data transformation functions take functions as arguments."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Sort a sequence of strings by the number of letters"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "strings = ['foo', 'bar,', 'baz', 'f', 'fo', 'b', 'ba']\n",
      "strings.sort(key=lambda x: len(list(x)))\n",
      "strings"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "['f', 'b', 'fo', 'ba', 'foo', 'baz', 'bar,']"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Closures"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Closures are dynamically-genearated functions returned by another function.  The returned function has access to the variables in the local namespace where it was created."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def make_closure(x):\n",
      "    def closure():\n",
      "        print('Secret value is: %s' % x)\n",
      "    return closure\n",
      "\n",
      "closure = make_closure(7)\n",
      "closure()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Secret value is: 7\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The following function returns a function that keeps track of arguments it has seen."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def make_watcher():\n",
      "    dict_seen = {}\n",
      "    \n",
      "    def watcher(x):\n",
      "        if x in dict_seen:\n",
      "            return True\n",
      "        else:\n",
      "            dict_seen[x] = True\n",
      "            return False\n",
      "        \n",
      "    return watcher"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "watcher = make_watcher()\n",
      "seq = [1, 1, 2, 3, 5, 8, 13, 2, 5, 13]\n",
      "[watcher(x) for x in seq]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "[False, True, False, False, False, False, False, True, True, True]"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## \\*args, \\*\\*kwargs"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "\\*args and \\*\\*kwargs are useful when you don't know how many arguments might be passed to your function or to handle named arguments that you have not defined in advance."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def foo(func, arg, *args, **kwargs):\n",
      "    print('arg: %s', arg)\n",
      "    print('args: %s', args)\n",
      "    print('kwargs: %s', kwargs)\n",
      "    \n",
      "    print('func result: %s', func(args))\n",
      "\n",
      "foo(sum, \"foo\", 1, 2, 3, 4, 5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "('arg: %s', 'foo')\n",
        "('args: %s', (1, 2, 3, 4, 5))\n",
        "('kwargs: %s', {})\n",
        "('func result: %s', 15)\n"
       ]
      }
     ],
     "prompt_number": 8
    }
   ],
   "metadata": {}
  }
 ]
}