mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
Added functions snippet IPython Notebook.
This commit is contained in:
parent
6fa1d5f20e
commit
42f6f9f6b5
|
@ -7,6 +7,7 @@ IPython Notebooks geared towards Python data analysis (core Python, NumPy, panda
|
||||||
|
|
||||||
* [data structures](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/structs.ipynb)
|
* [data structures](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/structs.ipynb)
|
||||||
* [data structure utilities](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/structs_utils.ipynb)
|
* [data structure utilities](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/structs_utils.ipynb)
|
||||||
|
* [functions](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/functions.ipynb)
|
||||||
* [datetime](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/datetime.ipynb)
|
* [datetime](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/datetime.ipynb)
|
||||||
* [unit tests](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/unit_tests.ipynb)
|
* [unit tests](http://nbviewer.ipython.org/github/donnemartin/pydatanotebooks/blob/master/core/unit_tests.ipynb)
|
||||||
|
|
||||||
|
|
159
core/functions.ipynb
Normal file
159
core/functions.ipynb
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"name": "",
|
||||||
|
"signature": "sha256:8c91cadf8bbcbcdd5a60fc0a89e964b846a6f5328eaa57d57afbaedda06ad3ca"
|
||||||
|
},
|
||||||
|
"nbformat": 3,
|
||||||
|
"nbformat_minor": 0,
|
||||||
|
"worksheets": [
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Functions"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"* Functions as Objects\n",
|
||||||
|
"* Lambdas\n",
|
||||||
|
"* Closures\n",
|
||||||
|
"* \\*args, \\*\\*kwargs\n",
|
||||||
|
"* Currying\n",
|
||||||
|
"* Generators\n",
|
||||||
|
"* Generator Expressions"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Functions as Objects"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Python treats functions as objects which can simplify data cleaning"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"collapsed": false,
|
||||||
|
"input": [
|
||||||
|
"%%file transform_util.py\n",
|
||||||
|
"import re\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"class TransformUtil:\n",
|
||||||
|
"\n",
|
||||||
|
" @classmethod\n",
|
||||||
|
" def remove_punctuation(cls, value):\n",
|
||||||
|
" \"\"\"Removes !, #, and ?.\n",
|
||||||
|
" \"\"\" \n",
|
||||||
|
" return re.sub('[!#?]', '', value) \n",
|
||||||
|
"\n",
|
||||||
|
" @classmethod\n",
|
||||||
|
" def clean_strings(cls, strings, ops): \n",
|
||||||
|
" \"\"\"General purpose method to clean strings.\n",
|
||||||
|
"\n",
|
||||||
|
" Pass in a sequence of strings and the operations to perform.\n",
|
||||||
|
" \"\"\" \n",
|
||||||
|
" result = [] \n",
|
||||||
|
" for value in strings: \n",
|
||||||
|
" for function in ops: \n",
|
||||||
|
" value = function(value) \n",
|
||||||
|
" result.append(value) \n",
|
||||||
|
" return result"
|
||||||
|
],
|
||||||
|
"language": "python",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"stream": "stdout",
|
||||||
|
"text": [
|
||||||
|
"Overwriting transform_util.py\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"prompt_number": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"collapsed": false,
|
||||||
|
"input": [
|
||||||
|
"%%file tests/test_transform_util.py\n",
|
||||||
|
"from nose.tools import assert_equal\n",
|
||||||
|
"from ..transform_util import TransformUtil\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"class TestTransformUtil():\n",
|
||||||
|
"\n",
|
||||||
|
" states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \\\n",
|
||||||
|
" 'FlOrIda', 'south carolina##', 'West virginia?']\n",
|
||||||
|
" \n",
|
||||||
|
" expected_output = ['Alabama',\n",
|
||||||
|
" 'Georgia',\n",
|
||||||
|
" 'Georgia',\n",
|
||||||
|
" 'Georgia',\n",
|
||||||
|
" 'Florida',\n",
|
||||||
|
" 'South Carolina',\n",
|
||||||
|
" 'West Virginia']\n",
|
||||||
|
" \n",
|
||||||
|
" def test_remove_punctuation(self):\n",
|
||||||
|
" assert_equal(TransformUtil.remove_punctuation('!#?'), '')\n",
|
||||||
|
"\n",
|
||||||
|
" def test_clean_strings(self):\n",
|
||||||
|
" clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] \n",
|
||||||
|
" output = TransformUtil.clean_strings(self.states, clean_ops)\n",
|
||||||
|
" assert_equal(output, self.expected_output)\n"
|
||||||
|
],
|
||||||
|
"language": "python",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"stream": "stdout",
|
||||||
|
"text": [
|
||||||
|
"Overwriting tests/test_transform_util.py\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"prompt_number": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"collapsed": false,
|
||||||
|
"input": [
|
||||||
|
"!nosetests tests/test_transform_util.py -v"
|
||||||
|
],
|
||||||
|
"language": "python",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"stream": "stdout",
|
||||||
|
"text": [
|
||||||
|
"core.tests.test_transform_util.TestTransformUtil.test_clean_strings ... ok\r\n",
|
||||||
|
"core.tests.test_transform_util.TestTransformUtil.test_remove_punctuation ... ok\r\n",
|
||||||
|
"\r\n",
|
||||||
|
"----------------------------------------------------------------------\r\n",
|
||||||
|
"Ran 2 tests in 0.001s\r\n",
|
||||||
|
"\r\n",
|
||||||
|
"OK\r\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"prompt_number": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
24
core/tests/test_transform_util.py
Normal file
24
core/tests/test_transform_util.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
from nose.tools import assert_equal
|
||||||
|
from ..transform_util import TransformUtil
|
||||||
|
|
||||||
|
|
||||||
|
class TestTransformUtil():
|
||||||
|
|
||||||
|
states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', \
|
||||||
|
'FlOrIda', 'south carolina##', 'West virginia?']
|
||||||
|
|
||||||
|
expected_output = ['Alabama',
|
||||||
|
'Georgia',
|
||||||
|
'Georgia',
|
||||||
|
'Georgia',
|
||||||
|
'Florida',
|
||||||
|
'South Carolina',
|
||||||
|
'West Virginia']
|
||||||
|
|
||||||
|
def test_remove_punctuation(self):
|
||||||
|
assert_equal(TransformUtil.remove_punctuation('!#?'), '')
|
||||||
|
|
||||||
|
def test_clean_strings(self):
|
||||||
|
clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title]
|
||||||
|
output = TransformUtil.clean_strings(self.states, clean_ops)
|
||||||
|
assert_equal(output, self.expected_output)
|
23
core/transform_util.py
Normal file
23
core/transform_util.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class TransformUtil:
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def remove_punctuation(cls, value):
|
||||||
|
"""Removes !, #, and ?.
|
||||||
|
"""
|
||||||
|
return re.sub('[!#?]', '', value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def clean_strings(cls, strings, ops):
|
||||||
|
"""General purpose method to clean strings.
|
||||||
|
|
||||||
|
Pass in a sequence of strings and the operations to perform.
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
for value in strings:
|
||||||
|
for function in ops:
|
||||||
|
value = function(value)
|
||||||
|
result.append(value)
|
||||||
|
return result
|
Loading…
Reference in New Issue
Block a user