data-science-ipython-notebooks/core/functions.ipynb
2015-01-26 15:43:27 -05:00

208 lines
5.5 KiB
Plaintext

{
"metadata": {
"name": "",
"signature": "sha256:a37a234cd37895919493dabe7ab645bb2b5e24d9b304ab564e18b59eac4f7a0a"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Functions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* Functions as Objects\n",
"* Lambda Functions\n",
"* Closures\n",
"* \\*args, \\*\\*kwargs\n",
"* Currying\n",
"* Generators\n",
"* Generator Expressions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Functions as Objects"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Python treats functions as objects which can simplify data cleaning"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%file transform_util.py\n",
"import re\n",
"\n",
"\n",
"class TransformUtil:\n",
"\n",
" @classmethod\n",
" def remove_punctuation(cls, value):\n",
" \"\"\"Removes !, #, and ?.\n",
" \"\"\" \n",
" return re.sub('[!#?]', '', value) \n",
"\n",
" @classmethod\n",
" def clean_strings(cls, strings, ops): \n",
" \"\"\"General purpose method to clean strings.\n",
"\n",
" Pass in a sequence of strings and the operations to perform.\n",
" \"\"\" \n",
" result = [] \n",
" for value in strings: \n",
" for function in ops: \n",
" value = function(value) \n",
" result.append(value) \n",
" return result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Overwriting transform_util.py\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%file tests/test_transform_util.py\n",
"from nose.tools import assert_equal\n",
"from ..transform_util import TransformUtil\n",
"\n",
"\n",
"class TestTransformUtil():\n",
"\n",
" states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \\\n",
" 'FlOrIda', 'south carolina##', 'West virginia?']\n",
" \n",
" expected_output = ['Alabama',\n",
" 'Georgia',\n",
" 'Georgia',\n",
" 'Georgia',\n",
" 'Florida',\n",
" 'South Carolina',\n",
" 'West Virginia']\n",
" \n",
" def test_remove_punctuation(self):\n",
" assert_equal(TransformUtil.remove_punctuation('!#?'), '')\n",
" \n",
" def test_map_remove_punctuation(self):\n",
" # Map applies a function to a collection\n",
" output = map(TransformUtil.remove_punctuation, self.states)\n",
" assert_equal('!#?' not in output, True)\n",
"\n",
" def test_clean_strings(self):\n",
" clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] \n",
" output = TransformUtil.clean_strings(self.states, clean_ops)\n",
" assert_equal(output, self.expected_output)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Overwriting tests/test_transform_util.py\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!nosetests tests/test_transform_util.py -v"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"core.tests.test_transform_util.TestTransformUtil.test_clean_strings ... ok\r\n",
"core.tests.test_transform_util.TestTransformUtil.test_map_remove_punctuation ... ok\r\n",
"core.tests.test_transform_util.TestTransformUtil.test_remove_punctuation ... ok\r\n",
"\r\n",
"----------------------------------------------------------------------\r\n",
"Ran 3 tests in 0.002s\r\n",
"\r\n",
"OK\r\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Lambda Functions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Lambda functions are anonymous function and are convenient for data analysis, as data transformation functions take functions as arguments."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sort a sequence of strings by the number of letters"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"strings = ['foo', 'bar,', 'baz', 'f', 'fo', 'b', 'ba']\n",
"strings.sort(key=lambda x: len(list(x)))\n",
"strings"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"['f', 'b', 'fo', 'ba', 'foo', 'baz', 'bar,']"
]
}
],
"prompt_number": 7
}
],
"metadata": {}
}
]
}