mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
340 lines
8.7 KiB
Plaintext
340 lines
8.7 KiB
Plaintext
{
|
|
"metadata": {
|
|
"name": "",
|
|
"signature": "sha256:a3a2ee34a40ca6d18902bc7ba52393ce71aa5dda1937d48cdd0db3044dc235bf"
|
|
},
|
|
"nbformat": 3,
|
|
"nbformat_minor": 0,
|
|
"worksheets": [
|
|
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"* Functions as Objects\n",
|
|
"* Lambda Functions\n",
|
|
"* Closures\n",
|
|
"* \\*args, \\*\\*kwargs\n",
|
|
"* Currying\n",
|
|
"* Generators\n",
|
|
"* Generator Expressions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Functions as Objects"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Python treats functions as objects which can simplify data cleaning"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"%%file transform_util.py\n",
|
|
"import re\n",
|
|
"\n",
|
|
"\n",
|
|
"class TransformUtil:\n",
|
|
"\n",
|
|
" @classmethod\n",
|
|
" def remove_punctuation(cls, value):\n",
|
|
" \"\"\"Removes !, #, and ?.\n",
|
|
" \"\"\" \n",
|
|
" return re.sub('[!#?]', '', value) \n",
|
|
"\n",
|
|
" @classmethod\n",
|
|
" def clean_strings(cls, strings, ops): \n",
|
|
" \"\"\"General purpose method to clean strings.\n",
|
|
"\n",
|
|
" Pass in a sequence of strings and the operations to perform.\n",
|
|
" \"\"\" \n",
|
|
" result = [] \n",
|
|
" for value in strings: \n",
|
|
" for function in ops: \n",
|
|
" value = function(value) \n",
|
|
" result.append(value) \n",
|
|
" return result"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"Overwriting transform_util.py\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 1
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"%%file tests/test_transform_util.py\n",
|
|
"from nose.tools import assert_equal\n",
|
|
"from ..transform_util import TransformUtil\n",
|
|
"\n",
|
|
"\n",
|
|
"class TestTransformUtil():\n",
|
|
"\n",
|
|
" states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \\\n",
|
|
" 'FlOrIda', 'south carolina##', 'West virginia?']\n",
|
|
" \n",
|
|
" expected_output = ['Alabama',\n",
|
|
" 'Georgia',\n",
|
|
" 'Georgia',\n",
|
|
" 'Georgia',\n",
|
|
" 'Florida',\n",
|
|
" 'South Carolina',\n",
|
|
" 'West Virginia']\n",
|
|
" \n",
|
|
" def test_remove_punctuation(self):\n",
|
|
" assert_equal(TransformUtil.remove_punctuation('!#?'), '')\n",
|
|
" \n",
|
|
" def test_map_remove_punctuation(self):\n",
|
|
" # Map applies a function to a collection\n",
|
|
" output = map(TransformUtil.remove_punctuation, self.states)\n",
|
|
" assert_equal('!#?' not in output, True)\n",
|
|
"\n",
|
|
" def test_clean_strings(self):\n",
|
|
" clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] \n",
|
|
" output = TransformUtil.clean_strings(self.states, clean_ops)\n",
|
|
" assert_equal(output, self.expected_output)\n"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"Overwriting tests/test_transform_util.py\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 2
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"!nosetests tests/test_transform_util.py -v"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"core.tests.test_transform_util.TestTransformUtil.test_clean_strings ... ok\r\n",
|
|
"core.tests.test_transform_util.TestTransformUtil.test_map_remove_punctuation ... ok\r\n",
|
|
"core.tests.test_transform_util.TestTransformUtil.test_remove_punctuation ... ok\r\n",
|
|
"\r\n",
|
|
"----------------------------------------------------------------------\r\n",
|
|
"Ran 3 tests in 0.001s\r\n",
|
|
"\r\n",
|
|
"OK\r\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 3
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Lambda Functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Lambda functions are anonymous function and are convenient for data analysis, as data transformation functions take functions as arguments."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Sort a sequence of strings by the number of letters"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"strings = ['foo', 'bar,', 'baz', 'f', 'fo', 'b', 'ba']\n",
|
|
"strings.sort(key=lambda x: len(list(x)))\n",
|
|
"strings"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 4,
|
|
"text": [
|
|
"['f', 'b', 'fo', 'ba', 'foo', 'baz', 'bar,']"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 4
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Closures"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Closures are dynamically-genearated functions returned by another function. The returned function has access to the variables in the local namespace where it was created."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"def make_closure(x):\n",
|
|
" def closure():\n",
|
|
" print('Secret value is: %s' % x)\n",
|
|
" return closure\n",
|
|
"\n",
|
|
"closure = make_closure(7)\n",
|
|
"closure()"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"Secret value is: 7\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 5
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The following function returns a function that keeps track of arguments it has seen."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"def make_watcher():\n",
|
|
" dict_seen = {}\n",
|
|
" \n",
|
|
" def watcher(x):\n",
|
|
" if x in dict_seen:\n",
|
|
" return True\n",
|
|
" else:\n",
|
|
" dict_seen[x] = True\n",
|
|
" return False\n",
|
|
" \n",
|
|
" return watcher"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"prompt_number": 6
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"watcher = make_watcher()\n",
|
|
"seq = [1, 1, 2, 3, 5, 8, 13, 2, 5, 13]\n",
|
|
"[watcher(x) for x in seq]"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"metadata": {},
|
|
"output_type": "pyout",
|
|
"prompt_number": 7,
|
|
"text": [
|
|
"[False, True, False, False, False, False, False, True, True, True]"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 7
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## \\*args, \\*\\*kwargs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"\\*args and \\*\\*kwargs are useful when you don't know how many arguments might be passed to your function or to handle named arguments that you have not defined in advance."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"collapsed": false,
|
|
"input": [
|
|
"def foo(func, arg, *args, **kwargs):\n",
|
|
" print('arg: %s', arg)\n",
|
|
" print('args: %s', args)\n",
|
|
" print('kwargs: %s', kwargs)\n",
|
|
" \n",
|
|
" print('func result: %s', func(args))\n",
|
|
"\n",
|
|
"foo(sum, \"foo\", 1, 2, 3, 4, 5)"
|
|
],
|
|
"language": "python",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"stream": "stdout",
|
|
"text": [
|
|
"('arg: %s', 'foo')\n",
|
|
"('args: %s', (1, 2, 3, 4, 5))\n",
|
|
"('kwargs: %s', {})\n",
|
|
"('func result: %s', 15)\n"
|
|
]
|
|
}
|
|
],
|
|
"prompt_number": 8
|
|
}
|
|
],
|
|
"metadata": {}
|
|
}
|
|
]
|
|
} |