data-science-ipython-notebooks/numpy/numpy.ipynb

632 lines
53 KiB
Plaintext
Raw Normal View History

{
"metadata": {
"name": "",
"signature": "sha256:cb8fc4454a69123dcb745c323968d06c15444cee91494edb720893b06e98c249"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# NumPy\n",
"\n",
"* NumPy Arrays, dtype, and shape\n",
"* Common Array Operations\n",
"* Reshaping and In-Place Updating\n",
"* Combining Arrays\n",
"* Creating Fake Data and Adding Noise"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NumPy Arrays, dtypes, and shapes"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = np.array([1, 2, 3])\n",
"print(a)\n",
"print(a.shape)\n",
"print(a.dtype)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[1 2 3]\n",
"(3,)\n",
"int64\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"b = np.array([[0, 2, 4], [1, 3, 5]])\n",
"print(b)\n",
"print(b.shape)\n",
"print(b.dtype)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[0 2 4]\n",
" [1 3 5]]\n",
"(2, 3)\n",
"int64\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.zeros(5)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"array([ 0., 0., 0., 0., 0.])"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.ones(shape=(3, 4), dtype=np.int32)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"array([[1, 1, 1, 1],\n",
" [1, 1, 1, 1],\n",
" [1, 1, 1, 1]], dtype=int32)"
]
}
],
"prompt_number": 5
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Common Array Operations"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = b * 0.5\n",
"print(c)\n",
"print(c.shape)\n",
"print(c.dtype)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 0. 1. 2. ]\n",
" [ 0.5 1.5 2.5]]\n",
"(2, 3)\n",
"float64\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d = a + c\n",
"print(d)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 1. 3. 5. ]\n",
" [ 1.5 3.5 5.5]]\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d[0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"array([ 1., 3., 5.])"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d[0, 0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"1.0"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d[:, 0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"array([ 1. , 1.5])"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.sum()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"19.5"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.mean()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"3.25"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.sum(axis=0)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"array([ 2.5, 6.5, 10.5])"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d.mean(axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"array([ 3. , 3.5])"
]
}
],
"prompt_number": 14
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Reshaping and In-Place Updating"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"e = np.arange(12)\n",
"print(e)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[ 0 1 2 3 4 5 6 7 8 9 10 11]\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# f is a view of contents of e\n",
"f = e.reshape(3, 4)\n",
"print(f)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 0 1 2 3]\n",
" [ 4 5 6 7]\n",
" [ 8 9 10 11]]\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Set last five values of e to zero\n",
"e[5:] = 0\n",
"print(e)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[0 1 2 3 4 0 0 0 0 0 0 0]\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# f is also updated\n",
"f"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 18,
"text": [
"array([[0, 1, 2, 3],\n",
" [4, 0, 0, 0],\n",
" [0, 0, 0, 0]])"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# OWNDATA shows f does not own its data\n",
"f.flags"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 19,
"text": [
" C_CONTIGUOUS : True\n",
" F_CONTIGUOUS : False\n",
" OWNDATA : False\n",
" WRITEABLE : True\n",
" ALIGNED : True\n",
" UPDATEIFCOPY : False"
]
}
],
"prompt_number": 19
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Combining Arrays"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
"array([1, 2, 3])"
]
}
],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"b"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 21,
"text": [
"array([[0, 2, 4],\n",
" [1, 3, 5]])"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 22,
"text": [
"array([[ 1. , 3. , 5. ],\n",
" [ 1.5, 3.5, 5.5]])"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.concatenate([a, a, a])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 23,
"text": [
"array([1, 2, 3, 1, 2, 3, 1, 2, 3])"
]
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Use broadcasting when needed to do this automatically\n",
"np.vstack([a, b, d])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
"array([[ 1. , 2. , 3. ],\n",
" [ 0. , 2. , 4. ],\n",
" [ 1. , 3. , 5. ],\n",
" [ 1. , 3. , 5. ],\n",
" [ 1.5, 3.5, 5.5]])"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# In machine learning, useful to enrich or \n",
"# add new/concatenate features with hstack\n",
"np.hstack([b, d])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
"array([[ 0. , 2. , 4. , 1. , 3. , 5. ],\n",
" [ 1. , 3. , 5. , 1.5, 3.5, 5.5]])"
]
}
],
"prompt_number": 25
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creating Fake Data and Adding Noise"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pylab as plt\n",
"import seaborn\n",
"\n",
"seaborn.set()\n",
"\n",
"x = np.random.uniform(1, 100, 1000)\n",
"y = np.log(x) + np.random.normal(0, .3, 1000)\n",
"\n",
"plt.scatter(x, y)\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAeQAAAFVCAYAAAA+OJwpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXtcVHX+/58II5OKuZKt1YZMpdhFcYu4bF7aTE1JgrzE\nz0LIdLut2VVt6UIZbphiai0lXkC6oKYYgoiapVaAUV/JaiOtAVfLLmMEhQMj8fvjc87MOXNBIFCU\nz/Px8FEznDnn8znnzLzP5315vb0aGxuRSCQSiURyeulyugcgkUgkEolEGmSJRCKRSDoE0iBLJBKJ\nRNIBkAZZIpFIJJIOgDTIEolEIpF0AKRBlkgkEomkA+DTmg8FBQU9DowHDMBL5eXlmW06KolEIpFI\nOhktXiEHBQVdD0SUl5f/DbgeuKSNxySRSCQSSaejNSvk0cD+oKCgTUBP4LG2HZJEIpFIJJ2P1hjk\nPsDFwM2I1XEuMLAtByWRSCQSSWejNQb5J+C/5eXlJ4CvgoKCrEFBQeeVl5f/5LxhY2Njo5eX1x8e\npEQikUgkZxCtMnytMcjvA7OA1KCgoAuB7oDF7Yi8vPjxx5rWjOusoE8fPzl/Of/TPYzTQmeeO8j5\ny/n7tepzLU7qKi8vzwf+LygoaC/CXX1feXm57FAhkUgkEskfoFVlT+Xl5XPaeiASiUQikXRmpDCI\nRCKRSCQdAGmQJRKJRCLpAEiDLJFIJBJJB0AaZIlEIpFIOgDSIEskEolE0gGQBlkikUgkkg6ANMgS\niUQikXQApEGWSCQSiaQDIA2yRCKRSCQdAGmQJRKJRCLpAEiDLJFIJBJJB0AaZIlEIpFIOgDSIEsk\nEolE0gGQBlkikUgkkg6ANMgSiUQikXQApEGWSCQSiaQDIA2yRCKRSCQdAGmQJRKJRCLpAPic7gFI\nJBKJpPVYrVays3cDEBs7HKPReJpHJGkt0iBLJBLJGYrVauW223IoKroTgJyc1axdGyON8hmKdFlL\nJBLJGUp29m7FGBsAA0VFCfbVsuTMQxpkiUQikUg6ANIgSyQSyRlKbOxwIiJWA/VAPRERGcTGDj9l\nx7darWRkbCMjYxtWq/WUHfdsRcaQJRKJ5AzFaDSydm0M2dmbAYiNPXXxYxm/bnukQZZIJJIzGKPR\nSELC6HY9hrtMbn38GiV+vbndx3I2Iw2yRCKRnKGcipInTythSdsjY8gSiURyBqIaytmzo5g9O4rb\nbstpVhy3pXFfT5nczvHrsLB0bDabjCf/AeQKWSKRSM5AWuMybsu4rzZ+bbPVs3mzN4mJEwDIz88i\nK2u8jCe3ELlClkgkkk5Ca+qWm8rkVuPXBkNXiovvsu931647ZD10K5ArZIlEIjkDiY0dTk7OaoqK\nEgAUQ9n2sd3Tmcnd2fBqbGxsz/03/vhjTXvuv0PTp48fcv5y/p2Rzjx3OHXzb2lSl8NlnQAII94W\npUrO+x0x4rVO7bLu08fPqzWfkwa5HZE/SnL+nXX+nXnu0LHn316Z2dr9zpwZSU2NrU32eybSWoMs\nXdYSiUTSiWivumXtfo1GY6c2yK1FJnVJJBKJRNIBkCtkiURy2pC9fM8s5PVqX6RBlkgkp4XOroXc\nEY1bU2Pq7NfrVNAqgxwUFPQJ8Ivy8pvy8vK72m5IEomkM9CZtZBPpXFrruE/2Zg68/U6VbTYIAcF\nBRkBysvL/972w5FIJJKzD2ejeKqMW0sMvzS4p5/WJHUFA92CgoIKg4KC3gkKCgpr60FJJJKzn9Pd\ny/dU4U5zura21mU7m63ts5Jbo8zliejoUEymJ4FNQPVZe71OJ60xyL8BL5SXl48B7gFeDwoKktna\nEomkRagKUAsWbGbBgs1nZTzSarUya9YrLkbxk08OAJmoDyOwBnCvCdHSZhCtxfkBKTx8FTZbPRkZ\n26iqqiI+vgCzeR4QicmURmbmWJfrpY71lVfyZYOJVtBiYZCgoKCuQJfy8nKr8roEuLW8vPyIm83b\nVXVEIpFIOipWq5WbbnqDXbv8gXGormCoJzY2lezs+wF1tTqMtLQ93HNPpId9xAEwYkQWW7dOafaD\ni+Pzd4ijDMtk0qTzMBi6kpAw0oNBfQebzcbatYf54IOLAbjssg85ePAJj+O1Wq2kp29l6dK9HDz4\nOGBs8VjPMk6ZMMidwGDg/qCgoAuBnsB3njbuqGo1p4KOrNZzKpDz77zz/yNz74jZxy2lTx8/li3L\nVwzpb8DTQChwAxER2Tz77O1UVr6pk7CMjIxxOWcZGduUfQhjvmvXHSxb1vy4rtVqZfToc+jdO4Xg\n4EAKCup54IGbAVizZjWZmWPZtGkv4DjXEyYMJz09jw8+MCIeJODgwfeBtUC8sudMjh3rzo8/1jjF\nqSOBLGBKi8d6NtGnj1+rPtcag7wSWB0UFKQ+Kt1ZXl7+e6uOLpFIJBrOvtIaK7AReBzYSbduj7F8\n+aP06tWr3Rs2OJ/LfftSMZvvw5G0FcuYMWmYzQ8D+nNdWnoQmItjVR8G3KJ5PRWbbS3gmgwGdwDb\ngVFtOp/OQItjv+Xl5SfKy8vjysvLhyv/ittjYBKJpPPR0iSkUxVfbc1xY2OHYzIlA5OA9cA4amtf\nIipqI1ar1S41mZAwGqPR6HaffyTxzflcms0PATs1W+xUjLHruQ4J6e+0N9e1W05OERkZ27DZ6t0c\n3SaTvlqBFAaRSCRnJH9kNf1H3OLNPa7RaGT69CEkJu4EHG5ns/khl3Iixz7/H7CTtLRFFBbe36KV\ntNVqJStrByUl5co7vwNRum1MpiLM5rHK/xdjNutj1ipxcSN5++109u6drrzzGfADkKC8XkBZWQpl\nZUZCQ9MJC1tOSckMZb+LeeihAURHn8mejdOD7PbUjnTmGCLI+Xfm+bd27i1pD5iRsY3Zs6PQJkst\nWHDymKWzQY2IaJlbvDnHVedvtVoZMWKRkp3seXuxz9GIlbRI4DKZUtm1a1qzHzAmTVpHSYkvjjjv\nSvz8vqOmJlGZZ4YuZhwdHUp8fIHHc52enkdiYjfEuu064HXgIuBz4AFAjZPWk5S0lm7dugHiAefi\ni/t02nsfZLcniURyFqCWQrVnbPVUCmAYjUYKC+9nzJhUxWWM4sqNcbP1yVfSnsjO3k1JyV/QZ3NP\no6Yml5iYFCIirrCfS+3+mjrXBkNXzf6siAinamf016SsrIJXX33wpOOUNI00yBKJpEPR3PaAsbHD\nyclZrVvhRUePJSNjm/3vrTHmJ3NnuzuuewMr6NWrF7t2TWvyISM2djhpaYswm8e1aoye8SEi4gqP\n59P5XGv3Gx0dqpnnVmA6wjiPQdRQRwHpgJkBA67WnXfH6lnSEqTLuh3pzC5LkPPvjPNXf9D9/IxE\nRoa2ewzR2YAIF6xwRZtMqRQWTqFXr14un3G4xa2YTPOZPn0IcXEjsVqtjBnzhj3z2JM7Wz2uSGjy\nwmAw6Iy3p2vflLGvqqpSji1W0uHhqxg//lwMhq66bd253DMzxxIXl6u4rKcqe8wgLMzG+vWTm+32\ndrffTZv2UlT0BTk52qzrQ8CrQBIAXbrM4/ffZwK9iIhYzc6dUzt1P+TWuqylQW5HOuMPshY5/+bN\n/2you4U/HpttzfGysnZQWnpQyQpuJDFxItpYrcn0FLt2PeLWoGZl7WDFikN24xsYuJDff/+RQ4dS\naE5c2nm+4eEr7QZ05sxIF4PUnPPjMPQ2cnMtlJT8w2VbTzHs2NjhuqSusLAg4uJuBGjW/eXYbwNi\nRfw5UVF1vPTSTKxWKxER6VgsTyhb3w/8RzcGSEWUStWTlradCRM6b4a1jCFLJGcgp6Putr0eAJob\nm/V0/JaMS5/ENJecHGFQRazTYN/ObA4nO3u3yxiMRiMGQ1dN2Q9UVDwCvNDq+RYXT6O4uAAYR3r6\niyQk/IW4uBtb1S2ptPQAJSVzm7Wtdk4zZtzMjBk3685Ty+4vK/AGMA2IJDc3lW+/zcLb2xeLZRBQ\ngDAbFwFblP8fiXMFbXvocncG
"text": [
"<matplotlib.figure.Figure at 0x108053c50>"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
}
],
"metadata": {}
}
]
}