mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
279 lines
212 KiB
Plaintext
279 lines
212 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# scikit-learn-k-means"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Credits: Forked from [PyCon 2015 Scikit-learn Tutorial](https://github.com/jakevdp/sklearn_pycon2015) by Jake VanderPlas"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%matplotlib inline\n",
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import seaborn; \n",
|
||
|
"from sklearn.linear_model import LinearRegression\n",
|
||
|
"from scipy import stats\n",
|
||
|
"import pylab as pl\n",
|
||
|
"\n",
|
||
|
"seaborn.set()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## K-Means Clustering"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"('Reduced dataset shape:', (150, 2))\n",
|
||
|
"Meaning of the 2 components:\n",
|
||
|
"0.362 x sepal length (cm) + -0.082 x sepal width (cm) + 0.857 x petal length (cm) + 0.359 x petal width (cm)\n",
|
||
|
"-0.657 x sepal length (cm) + -0.730 x sepal width (cm) + 0.176 x petal length (cm) + 0.075 x petal width (cm)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAFVCAYAAADLxheZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdAVFf68PHvnaFKR5oKgiCMiNh77yYauyaxxahpZlPd\n3nez7/52N7vp3ZJoEltiNCb2GmOw9z42UBBEpHem3PePMSAqisAU8Pn85Z17597nMMgz99xznqOo\nqooQQgghHI/G3gEIIYQQ4s4kSQshhBAOSpK0EEII4aAkSQshhBAOSpK0EEII4aAkSQshhBAOyqk2\nb9bpdN2Af+v1+gG3vP4qMAvIuPHSs3q9/mxtriWEEEI8aGqcpHU63W+AqUDBHXZ3BKbp9frDNT2/\nEEII8aCrTXf3eWAcoNxhXyfgDzqdbqdOp/tdLa4hhBBCPLBqnKT1ev1KwFjF7qXAs8BAoLdOpxtR\n0+sIIYQQD6paPZO+i3f0en0egE6nWwt0ANZWdbCqqqqi3OmGXAghhGiQqpX06jxJ63Q6H+CYTqdr\nDRRhuZtecLf3KIpCRkZ+XYdic4GBXvW+HQ2hDdAw2tEQ2gDSDkfSENoADaMdgYFe1TquLpK0CqDT\n6SYBnnq9ft6N59DbgVJgi16v31AH1xFCCCEeKLVK0nq9PgnoeePfS296fSmW59JCCCGEqCEpZiKE\nEEI4KEnSQgghhIOSJC2EEEI4KEnSQgghhIOSJC2EEEI4KEnSQgghhIOSJC2EEEI4KEnSQgghhIOS\nJC2EEEI4KEnSQgghhIOSJC2EEEI4KEnSQgghhIOSJC2EEEI4KEnSQgghhIOqi/WkhXhgXL16haNH\nt+HsrCUgMIa28Z3tHZIQogGTJC1ENRUUFHD06LdMmdoDgIQEPXq9KzpdvJ0jE0I0VNLdLUQ1nTlz\njGEPxZZv9+qlIzn5tB0jEkI0dJKkhaimgIAQEhOvl28XFhaj0bjaMSIhREMn3d1CVFNERCTbth0l\n/epBPDxdOKsvZMyYZ+0dlhCiAZMkLcR9GDhwLMXFxZSVlRLfxtfe4QghGjhJ0kLcJ3d3d9zd3e0d\nRq2pqsrKlWvJzMpj6JBeRESE2zskIcQtJEkL8QBSVZU//fkd9h1qhEbjzoZNX/La38bRJi723m8W\nQtiMDBwT4gGUkZHBvgNFaDSWHoHC4uasXLnTzlEJIW4lSVqIB5BGo0FRzJVeUzSqnaIRQlRFkrQQ\nD6CAgAD692mMaspFVU14eyQydfIwe4clhLiFPJMW4gH1hz/MpscPO0lLy2DI4OcJDAywd0hCiFtI\nkhbiATagfx97hyCEuAvp7hZCCCEclCRpIYQQwkFJkhZCCCEclCRpIYQQwkFJkhZCCCEclCRpIYQQ\nwkFJkhZCCCEclCRpIYQQwkFJkhZCCCEcVK0qjul0um7Av/V6/YBbXh8J/BkwAp/q9fr5tbmOELZw\n7NgB0q+lEBUZR2RktL3DEUKImt9J63S63wDzANdbXncG3gSGAP2AZ3Q6XVBtghTCWq5cuczGjQuZ\nP/8vBIckM2lSc8oMBzhyZI+9QxNCiFp1d58HxgHKLa/HAuf1en2uXq83AD8BfWtxHSGsorS0lKPH\nvmPK1La0im1MXFw4AL1768jMPG3n6IQQohZJWq/Xr8TSnX0rbyD3pu18wKem1xHCWpKTL9GlSzN7\nhyGEEFWyxipYuYDXTdteQPa93hQY6HWvQ+qFhtCOhtAGuHc7nJ2j2LM3gdjYcFxcnDl3LoXo6FD2\n7j1PRIv2DvFzcIQY6oK0w3E0hDZAw2nHvVgjSZ8BonU6nR9QiKWr+7/3elNGRr4VQrGtwECvet+O\nhtAGqG47tKjmSJYv24ObuxNLlhwkKrKMmJj2REbG2P3n8GB9Fo6vIbSjIbQBGkY7qvsloy6StAqg\n0+kmAZ56vX6eTqebA2zE0p2+QK/Xp9XBdYSoc50790NV+2I2m+nVU2vvcIQQopJaJWm9Xp8E9Lzx\n76U3vb4GWFOryBqIA9t/4Oii5YBK/BOP0nXgQHuHJG6hKAparSRoIYTjsUZ3t7ghUa/n4Mt/JfJq\nIQBHD56m8fImRLWOtXNkQggh6gOpOGZFh7fvoMXVgvLtiPRCju7YYceIhBBC1CeSpK0oVBdDpltF\nN+p1Nw1No1vaMSIhhBD1iSRpK+o6oD/Or0zlZAtfTkb44vzSJLoPHmzvsIQQQtQT8kzayibMeQn1\n1RcBywAlIYQQorokSduAJGchhBA1Id3dQgghhIOSJC2EEEI4KEnSQtRT586dYtOmVSQlXbR3KEII\nK5EkLUQ9lLBrE65up5k0uTklpXs5dCjB3iEJIaxAkrQQ9VBZaRIdO7ZAURR699aRna23d0hCCCuQ\nJC1EPXTrjAFF/icL0SDJf20h6qUQ9PorABw5kkQj93A7x2M9qqpiMpnsHYYQdiHzpIWoh/r3H8mR\nI3s5fCiJZs1a0r17W3uHZBVfr1jHsq/2UVamEB/nzT9ee0lWLBMPFEnSQtRT7dt3s3cIVrN1WwJr\n1+4jYfdpGnm0wcXVj32HSli46BtmzXzU3uEJYTOSpEWDlpFxjX37vsPTS0N+HgwePBk3Nzd7hyXu\nYs+eA7z5zi6MphB8/fuQnXUIrZM7Wq0b168X3PsEQjQgkqRFg7Znzzc8OaMriqJgNBpZ/OVyRoyY\nbu+wxF3s2n0CoymkfNvTK5qSolQ8vbzp3KmLHSMTwvYkSYsGzcdXWz4S2snJCQ8P1c4RVc/169fR\naMoAF3uHYnP+/h6YzTloNJYeD6Mhg9axCg8/FMugQb3tHJ0QtiVJWjRoeXkVo4JNJhOFhY6dpFVV\nZfXqBehauZJ+TSUpEUaMeMLeYdnUtKnjOHPmHY6duIpWa+TxafHMmDHB3mEJYReSpEWD1qnjSBYu\nXIuPtxM5OWYGDHjc3iHdUVpaCsdP7CQlOYlHRraidWvLlKqoqOvs359Aly697Byh7Wi1Wv79rzkU\nFRXh7OyMs7OzvUMSwm4kSYsGrUmTUB4Z8ay9w7ir69czOH36eyZP7gbo+Oqr7YSE+OHv702TJn78\ntDPV3iHaRaNGjewdghB2J8VMhLCh4uJitm5bw/bt6zAYDAAcOvwTEx/tWn7MhAn92LXrBACrVh2i\nffuet53n8OFdbN26jO3bv8NsNtsmeCGEzcmdtBA2UlRUxIYNc3lyRg+MRhNffP4xo0fPxtXFnby8\nInx8PADIzi7gyOHrGAzniYocRkBAYKXz7Nq1meiYYoYMjSEvr4ivv/qU0aOfqlYMBoOBbdu+wdnF\nCKonAwaMvq3EqBDCcUiSFsJGEnZtYuasnjg5OeHi4syUqR3YvGk7ffoMZdnST+jdJwSz2czuXdd5\n9tm/EhTkTUZG/m3nKStLIza2DQDe3o1oHGBCVdVqJdv16z9n0uTWuLu7kpWVz/p1yxk2zDGf0wsh\nJEkLYTuqGY2m4gmTRqPBbDah0WgYN+45Ll48j6IojB0bddeEW1ZWuXu7tMRU7bthX18z7u6uAPj7\ne+HmnliDhgghbEWeSQthIz16DOazTxMwmUyUlRn4fNF+evQYCFhWtYqKiiYysuU9E25MTB+WLdvL\npUtX2bL5GF5esdWOoaSk8hS04iJZuEIIRyZ30kLYiKenN4MGzWD58q1oFA0jRz6Hi8v9FyuJiIgi\nKGg6Fy+eIzS0KwEBAdV+b3h4DxYv/pGwMG8uXsyhXduR9319IYTtSJIWwoY8Pb0ZOmRsrc/TqFEj\n2rRpd9/v0+niadmyNbm5OcS28neYQWMGgwGNRiMrXAlxC0nSQjxgtFot/v6N7R0GYKmw9vd/fMCB\ng1lotSZGj2zDzBkT7R2WEA5DnkkLIexm+fLvSNjjgsHUgpKyliz7OhH92XP2DksIhyF30lZW3akx\nQtyqsLCQ776bi68fFBSYiWs9hPDwKHuHVafSM3LRaNzLt81mXy5eSEIXE33H4zdv3s6qbzfRsWMs\nT816sGqaiweTJGkr2bV
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x10c2dabd0>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn import neighbors, datasets\n",
|
||
|
"\n",
|
||
|
"iris = datasets.load_iris()\n",
|
||
|
"\n",
|
||
|
"X, y = iris.data, iris.target\n",
|
||
|
"from sklearn.decomposition import PCA\n",
|
||
|
"pca = PCA(n_components=2)\n",
|
||
|
"pca.fit(X)\n",
|
||
|
"X_reduced = pca.transform(X)\n",
|
||
|
"print(\"Reduced dataset shape:\", X_reduced.shape)\n",
|
||
|
"\n",
|
||
|
"import pylab as pl\n",
|
||
|
"pl.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y,\n",
|
||
|
" cmap='RdYlBu')\n",
|
||
|
"\n",
|
||
|
"print(\"Meaning of the 2 components:\")\n",
|
||
|
"for component in pca.components_:\n",
|
||
|
" print(\" + \".join(\"%.3f x %s\" % (value, name)\n",
|
||
|
" for value, name in zip(component,\n",
|
||
|
" iris.feature_names)))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAFVCAYAAADLxheZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3WdAVFfawPH/nRl674qiqOiAvRessbfYe0sxGtNMzO6m\n7242u8mb7KYXU01To8beYu8ttoiK6KCCiiCIivQyzNz3wxgQCyIwDODz+8Sde+49z2FgnrnnnnuO\noqoqQgghhKh8NLYOQAghhBB3JklaCCGEqKQkSQshhBCVlCRpIYQQopKSJC2EEEJUUpKkhRBCiEpK\nV5aD9Xp9B+Bdg8Hw0C2vzwKmAsk3XnrSYDBEl6UuIYQQ4kFT6iSt1+tfAiYBGXfY3RqYbDAYjpT2\n/EIIIcSDrizd3WeAEYByh31tgNf0ev0uvV7/ShnqEEIIIR5YpU7SBoNhGZB/l90LgCeBnkAXvV4/\nqLT1CCGEEA+qMt2TLsYnBoMhDUCv168FWgFr71ZYVVVVUe50QS6EEEJUSyVKeuWepPV6vQdwTK/X\nNwaysFxNzynuGEVRSE5OL+9QKpyfn1uVb0d1aANUj3ZUhzaAtKMyqQ5tgOrRDj8/txKVK48krQLo\n9frxgKvBYPj2xn3obUAusNlgMKwvh3qEEEKIB0qZkrTBYDgHhN/4ecFNry/Acl9aCCGEEKUkk5kI\nIYQQlZQkaSGEEKKSkiQthBBCVFKSpIUQQohKSpK0EEIIUUlJkhZCCCEqKUnSQgghRCUlSVoIIYSo\npCRJCyGEEJWUJGkhhBCikpIkLYQQQlRSkqSFEEKISkqStBBCCFFJSZIWQgghKqnyWE9aiAdGwvnz\nbPz0SzR5+TQeMZi2D/WwdUhCiGpMkrQQJZSRkcGix56laeQlACK27Mfhh/dp1qGDjSMTQlRX0t0t\nRAkdP3CAupEXC7brXskmatM2G0YkhKjuJEkLUUI16tQhxdWuYDsHMw6+3jaMSAhR3UmSFqKE6oWE\n4DVzClEBTpz20HF+eDgPT5tq67CEENWY3JMW4j6MeOFZsp+cSl5eLh4enrYORwhRzUmSFuI+OTk5\n4eTkZOswykxVVTYu/JWM5GTaDR5Infr1bR2SEOIWkqSFeACpqso3z/+VgEXbcVcVVs9dQZ85H9Ko\neXNbhyaEuInckxbiAZScnAyrd+KiKgA0On+d339eaOOohBC3kiQtxANIo9Fg1mpufdE2wQgh7kr+\nK4V4APn6+uI0ph/X7MCEyolGfnR/8jFbhyWEuIXckxbiAfXoO2+xr2c3rsQnMHngAHz8/GwdkhDi\nFpKkhXiAderd29YhCCGKId3dQgghRCUlSVoIIYSopCRJCyGEEJWUJGkhhBCikpIkLYQQQlRSkqSF\nEEKISkqStBBCCFFJSZIWQgghKilJ0kIIIUQlVaYZx/R6fQfgXYPB8NAtrz8M/B3IB743GAzflaUe\nISrCoZ07iTtpoFmXzoQ0aWzrcIQQovRJWq/XvwRMAjJued0O+BBoC2QBe/R6/SqDwXC5LIEKYQ0X\nYmLY9L9PiYmMpFFMCoFGhe3+c0l+/3U69e9n6/CEEA+4snR3nwFGAMotr4cBZwwGQ6rBYDACu4Fu\nZahHCKvIzc1l6fRZ1Fm6G1fDRQKNlj/lupezOP7zYhtHJ4QQZUjSBoNhGZbu7Fu5A6k3bacDHqWt\nRwhruXD+HN7Hz9k6DCGEuCtrrIKVCrjdtO0GpNzrID8/t3sVqRKqQzuqQxvg3u2wswthU6AXxGeg\nAy6RS00cuBjgSudnJlWK30NliKE8SDsqj+rQBqg+7bgXayTpU0BDvV7vBWRi6er+370OSk5Ot0Io\nFcvPz63Kt6M6tAFK2g4tjV+fyZGPv8U+w4UzDWrg3LMHnbt1pVGzpjb/PTxY70XlVx3aUR3aANWj\nHSX9klEeSVoF0Ov14wFXg8HwrV6vfxHYgKU7fY7BYLhUDvUIUe56jBpB95HDMZvNaLVaW4cjhBBF\nlClJGwyGc0D4jZ8X3PT6GmBNmSKrJo4dP0Ry8gkA/HzDaN68vY0jErdSFEUStBCiUrJGd7e44fz5\nGDTKKcaObQbAjh0niY31pl69EBtHJoQQoiqQGcesyBB9lO49wgq2u3cP48yZYzaMSAghRFUiSdqK\n/P0CiYlJKtiOiUnEx6emDSMSQghRlUh3txW1bNmBLVuWceSP/aAo5Bv96N17oK3DEkIIUUVIkray\nXr1GoKoqYBmgJIQQQpSUJOkKIMlZCCFEacg9aSGEEKKSkiQthBBCVFKSpIWook5GHGHFnO+JMZyy\ndShCCCuRJC1EFbTh57nsH/00zq9+wdYR09mzZq2tQxJCWIEkaSGqoNNzl1Ir1YiCQt3kLCJ/XGTr\nkIQQViBJWoiqyKQW3TabbROHEMKqJEkLUQXVGTWQy86WRUESPB0IGTPExhFZj6qqmEwmW4chhE3I\nc9JCVEFDnn6S/fqGXDxxklbt29C8Y0dbh2QVm+cv4PjnP6DNzsO+a2umffw/WbFMPFAkSQtRRXXo\n1ZMOvXraOgyr2L9xIxELlhOzeTutcu3xwY6cRdtZFfotw5+ZYevwhKgwkqRFtZZ06RLL3/g36qVk\n7PX1mPh/b+Ho6GjrsEQxInbv4cTMt6h/LYf6uLCfdJzR4oSG5EtJ9z6BENWIJGlRrS1+8TUabTmG\ngoLpUAwL7HQ89t93bB2WKMapbTsJupZTsB2KExfJxc3VjZAu1bNbX4i7kSQtqjVzbDwKlrnTtSgY\nz8bZOKKSuXLlChpNHmBv61AqnKO/LzmoON5435J0ZrI7t0I/aiid+vezcXRCVCxJ0qJaU4ICIOYa\nAGZUtEGVez1vVVX5ZtZLqKt3oGo0OI/pxyNv/8vWYVWoh594nG+OncC07SD5DnbUnzaBIU9Pt3VY\nQtiEJGlRrQ3/379Z+fq/ISEZbaO6TPrP320d0h3FnzvPjh/mEmswUGvrMWphB5hJ+XEte7t3Ibxv\nH1uHWGG0Wi1PffExWVlZ2NnZYWdnZ+uQhLAZSdKiWqsdHMwz8+fYOoxiXU5KZPHkGTQxJOMH/E4O\nHmhwRYuHUSU5Pt7WIdqEs7OzrUMQwuYkSQtRgbKzs9myZAkanY4+o0ZhZ2fH7mUraWy4DDfuwbbH\nlQgyaY0rhoZ+jBvY/7bz7P1tHef2HsClVk0efvIJNBqZl0iI6kiStBAVJCsri9ljH6HJ/hhMqHyx\neiPP/vwtzp6eZKPifCNJZykqWZ1bcL1pGEMmTcQ/oEaR82ycv4DkNz7GP9NENirfRZ9h+kf/LVEM\nRqORJe9/RG7cJTzCGjHs2RkoilLubRVClA9J0kJUkA0/z6Pp/hi0KOhQaLD5KNtWrqTv2NHM3r4L\n19V7UIGc4T148/MP8fd3Jzk5/bbzxG3YTp1MyzSZTihk7ziIqqolSrY//u1VAn/Zij0aMpWdLExP\nZ/xrL5V3U4UQ5USStBAVRDWbuTmNKoDZZEKj0fDMV59x9i/RKIpC/ZCGxSZc1bHoY1lmZ8cSXw3n\nHD6J/Y0p+11UhcsHj91vM4QQFUhuZAlRQfpMnsDx1nUwo5KPiqFrGA8NGwaAoiiENNLToGGjeybc\nnrOe5kRYAMkYOe3vRIvnHi9xDIqXW9EXPFzvux1CiIojV9JCVBA3N3eeXPwTm35ZiFan49lJE7G3\nv//JSuqHhTF93WLOnDpJYJ26+Pr6lvjY7q/OYtMrb+Ecf4WsBoGMeO0v912/EKLiSJIWogK5ubkz\n4smyT8zh7OxM89Zt7vu4Zp060HjrKlJTr+Pl5V1pBo0ZjUY0Go2scCXELSRJC/GA0Wq1eHv72DoM\nwDLD2py/vUbWxj2Y7XXUe2wsQ5950tZhCVFpyD1pIYTNrJ87D9+5GwlLzKLJhTSufPA9Z0+dsnVY\nQlQakqStTFVVW4cgqqjMzEy+nPYMn3UZwGcjJnI2KsrWIZW7tPhEXNTCLnffjDzizpy5a/ld69fz\n1qNTWfjllxURnhA2J0n
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x105f65390>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.cluster import KMeans\n",
|
||
|
"k_means = KMeans(n_clusters=3, random_state=0) # Fixing the RNG in kmeans\n",
|
||
|
"k_means.fit(X)\n",
|
||
|
"y_pred = k_means.predict(X)\n",
|
||
|
"\n",
|
||
|
"pl.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y_pred,\n",
|
||
|
" cmap='RdYlBu');"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"K Means is an algorithm for **unsupervised clustering**: that is, finding clusters in data based on the data attributes alone (not the labels).\n",
|
||
|
"\n",
|
||
|
"K Means is a relatively easy-to-understand algorithm. It searches for cluster centers which are the mean of the points within them, such that every point is closest to the cluster center it is assigned to.\n",
|
||
|
"\n",
|
||
|
"Let's look at how KMeans operates on the simple clusters we looked at previously. To emphasize that this is unsupervised, we'll not plot the colors of the clusters:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAFVCAYAAAA30zxTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt0VfWd//9nciACVrFWvFCoqMj+WjtYKaKC3KTqKAEL\nBshFZmypGEdRJ9LO2Pmt/jqzpnOpYuyAgoDV1nALNKESmeKvEkwKHURB0+nYTYsFkUhLv46xFBA4\nnN8f+5zkXPY+131uO6/HWiySc9n78zkn57z35/25lQQCAURERCS3SvNdABERkd5IAVhERCQPFIBF\nRETyQAFYREQkDxSARURE8kABWEREJA/6JPMgwzCuB/7NNM3JhmEMB14AzgD/DTxgmqbmMomIiKQg\nYQvYMIxvAiuAs4I3PQl8yzTNCUAJcGf2iiciIuJNyaSgfwvMxAq2AKNM02wL/vyfwJezUTAREREv\nSxiATdNsAk6H3VQS9vNRYKDbhRIREfG6pPqAo5wJ+/kc4KNETwgEAoGSkpJEDxMREfGKhEEvnQC8\nxzCMiaZpvgbcDryasBQlJRw58qc0TlX4Bg06x7N1A9Wv2Kl+xcvLdYPeUb9EUgnAoZHOjwIrDMMo\nA/4H2JB60URERHq3pAKwaZr7gbHBn38DTMpekURERLxPC3GIiIjkgQKwiIhIHigAi4iI5IECsIiI\nSB4oAIuIiOSBArCIiEgeKACLiIjkgQKwiIhIHigAi4iI5IECsIiISB6ksxmDSMb8fj9r1rTx85+f\nBOCmm8qoqpqAz+fLc8lERHJDAVhyzu/3M3/+ejZtmk1oO+mmpi5aWxtZvnyWgrCI9ApKQUvOrVnT\nFhF8LQPZtGk2a9e256tYIiI5pQAsOWelnQfa3DOQ9vZPcl0cEZG8UAAWERHJAwVgybmbbioDumzu\n6WL8+LNyXRwRkbxQAJacq6qawLRpjUQG4S6mTWuksnJ8voolIpJTGgUtOefz+Vi+fBZr17Z19/mO\nH38WlZUaAS0ivYcCsOSFz+ejpmYSNTX5LomISH4oBS0iIpIHCsAiIiJ5oBS0eJ6WvRSRQqQALJ6W\naNlLEZF8UQpaPE3LXopIoVILWAqWG6njRMtePvKIO2UVEUmVArAUJO2YJCJepwAsBSl+6riNmppJ\nSR3nppvKaGrqIrYVnP6yl6GWeXv7cfbv30dJyblceumFjB/fT4O7RCRpCsASl9/vp6GhNe00cLpp\n5ESp42QX8KiqmkBra2NUMA8te5n6IKyelvldwEvAA8BAdu+G5uYutm5dx6RJg9ixww9oxLWIOFMA\nFkd+v5/KyhfZsGEG6aSBCyGN7Paylz0t851Az+tiGUhLyxxaWrYF71PaXEScaRS0OFqzpi0i+FqS\nH0GcyQhkN3dMCi17uWzZbSxbdhs1NZPSDoY9LfPjOLXQo3/XiGsRsaMALI4SpYGz+Xz7HZM+ZOTI\nx2lrO05t7RYaGlrx+/0Jy5F/yb1eItK7KAUtBSk6dRwI+Nm3bx8dHd+go8O9dHYyfdzh/dj79x8G\nNgJnYV0cxA7uggFplUVEehcFYHGU6QjiTJ8fvmNSQ0Mrzc0PkOmo6HDJ9HHb9WNbQXYN8AFwV9Tt\n64B5adU3Xjm1lKaI9ygFLY6qqiZQUdFMZBo4NIJ4fFLPj00jJ//8cJmmw+0k08ft1I8NVVx77QeM\nGvUMo0Y9zYwZjSxa1Ep5eQA4GvbY9OobEroAqKubSFNTBU1NFdTVTWT+/PVFkn4XESdqAedZIbdu\nfD4fa9fOZfHizWmNIHZ7BLLbkpnqFO8xl11msGzZbRG3Vlf7Xa2vW/OhRaTwKADnUSFM00nE5/NR\nWTmeQMC6SLD6Y9uSvkgITyNnIl46u0+fd6mt3dL9uHxewLhV3xC35kOLSOFRCjqPimGjgEJJgTql\nsy+5ZBGNjV9Pq2zJTHVyczqUiEg4BeA8yka/ptuef/7VgrhICKWz6+vbmDlzAzNnbmD27Gf54INH\ngPPTKlsyfdxu9mOnQxcAIt6lFLTE1drqvOBErlOg0eldK+18vs0j7ctm19++alU148ZtceyzzXc/\ntttLaYpI4VAAzqNsbBTQmwQCfuAVrFWpAPoDU4DYwOjU375jx2oWL/4KNTXOwdTtfl078QbjFfJA\nNhFJnwJwHhVD62by5P6sXp2/iwSnwASwb99vgQeJnIf7IjA9pmxO/e0bNsxg3Dh3RxOnOrI9mcF4\n2b4AEJHcUwDOI7vWzbhxfQgELuCBB34GuDuqN50pT1/96hReeukHeblIiBeYJk68gI6O8OBL8OcZ\njBz5OJWVCyOOFa+/va3tGIFA+js+JVtmp5Htmmok0jspAOdZeOsmm9OS0j12PlOg8QLToUMNOAXU\nK64YGVO2QOCMw1n87Ny5l+bmh3DjNU8nmGqqkUjvlFYANgyjFFgJjADOAPeapmm6WbDeaNWq1qy1\nhDJpZeUrBRovMB054jzNqKQkdnB/3777sV+7eROdnQ/h1muuYCoiyUp3GtKtwNmmad4E/BPwXfeK\n1Dv5/X6efPJdsjUtKVtTnkKbGdTWbonZoSjefck8P57PfOZjrE0RfhL89wrgx6lv+tSpS4HYKUeQ\nvdc8WZpqJNI7pZuCPg4MNAyjBOvb66R7Reqd1qxpo7Pzc/kuRkripbWXLp3J/fc3xdz3xBOLqau7\nmpqayQBx0+LOo8Q/5MyZEmBy2H1dwHOUlweorJwTU9aSEh8wF9gKHAveOgC4zJXXIiSdke3FMBhP\nRNyXbgDeDvQDfg18BpjmWol6qZ4WanZGHGdjylO8tPaAAc+yadN9Mfd1di5g4cJtvPbaeiZOvCBu\nWtwpMI0c+SQdHd+IeR7MYfLkVoCYLQbHjvXR1HQUuCWqFhtx8zV3KvPgwYtpa7uSQKA1ZoCXphqJ\n9FKBQCDlfyNGjPjWiBEjvhv8eciIESP2jhgxoizOc8TB6dOnAytWbAlceuniAPw4AP8agI8CEAj+\n+ygwdOg/B06fPp3xeSoqno85dkXF82kfu7p6Y9ixIv9Z9bG/DzYG4KPA9dcvdXxMdfXG7nKvXPlK\noLp6Y6C6emNg5cpXAlVVTY7Pq6pqsq3nXXc9F7jrrueSvj2T1yVU5qqqpsDQoYuC9T3tyrFFpGgk\njKXptoDPBj4O/vy/QF/sVj8Ic+TIn9I8VWEbNOictOsWmcK9NXjrh8CTwBcJddE/8sjVfPjhMfuD\npGDx4q8wblx0K+srcY9tV7/QdKb29gNY/a+xC2CcOeM06jhkIJ2dzj0XJ06c6j7v9Ok3MH16z32b\nN29xfJ5pHmL37rnApwhfpOPHPz6fxx/3M3ZsK6tWvc+RI2cYNOjP3HrrF5k6dQw33ZTa65LI9Ok3\n8PHHraxZMw+7uceLF2/Oydzjiy8+L+PPXiHv2JXJ56/Qeblu0Dvql0i6Afhx4HnDMNqxgu9jpmke\nT/AciWKfwj0fqANeB8YwbVoj1dXu9AO6MZo58qIhFBVDC2DMxQrCXYwde4J16+xTu1bfK1xwwVEO\nHoyf/rX78rfSyU7H/hNW8H0RCN/rt4v6+v9g1KgLgwF6IAcPwn33WX2ty5fPirsaVjpyNSI6Xl98\nc/PXsnbsQtmxS6RYpRWATdP8COvbTTIQ7wt66NB3qKs7UXD9gM4b1M/AGuA0hvLydSxaNI9jx2L7\nQq2RyHOBLu6++/MMGWI/+GjWrJn86Ec/48knf0Vn5wLCv/zLy9dSXr6OlpY5Mc/r0+dydu9+lcjg\na5Wxs/MhOju3xdxe7AtexOuLf+GFrUyffkNWjl3Mr5lIIdBCHAXquusGF+SXW7yLBjCxRhgHugcW\nrV7dyqJF+4IjvM/DCr5HmTatkTlzZhIItHHoUANHjvi54IKj3H3355kzJzSCehCwgOgv/5aWShYt\namXKlNhBS2vWtNHcbNc6xuE26/Z0WqR2rfPZs8fR2Lidn//8JPv3H8Ya5DWNyB4ad6cXxXtPtm49\nFpG+d/PYmtcskhkF4Dz
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x10c5b1d10>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.datasets.samples_generator import make_blobs\n",
|
||
|
"X, y = make_blobs(n_samples=300, centers=4,\n",
|
||
|
" random_state=0, cluster_std=0.60)\n",
|
||
|
"plt.scatter(X[:, 0], X[:, 1], s=50);"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"By eye, it is relatively easy to pick out the four clusters. If you were to perform an exhaustive search for the different segmentations of the data, however, the search space would be exponential in the number of points. Fortunately, there is a well-known *Expectation Maximization (EM)* procedure which scikit-learn implements, so that KMeans can be solved relatively quickly."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAFVCAYAAAA30zxTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3WdAVFf+8PHvnWGG3kGpooKM2LtIVbH3GI3Gkl432Wz7\nb8tuspvkSTabbJJNNtlsuokxxsResIIoYseGgGPDioA0Yagzc+/zAkRwBkREsJzPKzy3nYPAb865\n5/yOpCgKgiAIgiC0LVV7V0AQBEEQ7kciAAuCIAhCOxABWBAEQRDagQjAgiAIgtAORAAWBEEQhHYg\nArAgCIIgtAOb5pyk0+mGAm/r9foROp0uBFgAyMBR4AW9Xi/WMgmCIAjCTbhhD1in0/0B+AKwrS16\nH3hZr9fHABIw9fZVTxAEQRDuTc0Zgj4JTKcm2AIM0Ov122u/Xg+Muh0VEwRBEIR72Q0DsF6vXw6Y\n6hVJ9b42AK6tXSlBEARBuNc16x3wdeR6XzsDxTe6QFEURZKkG50mCIIgCPeKGwa9lgTggzqdLlav\n128DxgMJN6yFJHH5cmkLHnXn8/Z2vmfbBqJ9dzvRvrvXvdw2uD/adyM3E4CvznT+HfCFTqfTAhnA\n0puvmiAIgiDc35oVgPV6/RkgovbrE8Dw21clQRAEQbj3iUQcgiAIgtAORAAWBEEQhHYgArAgCIIg\ntAMRgAVBEAShHYgALAiCIAjtQARgQRAEQWgHIgALgiAIQjsQAVgQBEEQ2oEIwIIgCILQDkQAFgRB\nEIR20JLNGAShVVRUVLAsOYkiYzUDAwKJ6DugvaskCILQZkQAFtpFStoh3j2RSWF0FCo7O5aeP0+v\nhV/z3sw52NnZtXf1BEEQbjsxBC20OaPRyPv6dIpHj0JVG2ylwECOThzH++vXtHPtBEEQ2oYIwEKb\nW5uynbzICItySaPhIHI71EgQBKHtiQAstLmC8jLUTk5Wj5VLbVwZQRCEdiICsNDmhvfojeroUavH\nOosOsCAI9wkRgIU2F9K5CxHnLiIbDA3KbY8cYVZwaDvVShAEoW2JWdBCu3h95hw+W7+G3VUVGNQq\nAkwys0LDiOjdt72rJgiC0CZEABbahUql4vmJU3m+vSsiCILQTsQQtCAIgiC0AxGAhftCVVUVJ04c\np6iosL2rIgiCAIghaOEepygKH8evJsFsJC8oEIcDJ+h7uYhXxk/CzdWtvasnCMJ9TARg4Z725ab1\nLO2pQ/L0RAuYunZlv6Lw57Ur+XTOY+1dPUEQ7mNiCFq4pyUariB5ejYokySJzB7dOZSZ3k61EgRB\nED1g4Q5mMpn4MXEzByrKUCToqbHlkbixaLXaZl1vNBop0GqsHwwO5lDybkbHhLdijQVBEJpPBGDh\njmQ2m/ntD99yaEwcKkdHAFIrK9n743d8PPuRZgVhjUaDR7WRHGsHs7LoHdT5lutpNBopLi7Gzc0N\njaaRYC8IgmCFGIIW7kgrk7dyaGRsXfAFUNnZcWz8WBZt3dzs+8TaOyIXFzcoUxQFXVoGA3v2bnH9\njp8+xaz33ybup++Ynp7KQ+tW8P6a5ciyyKUpCELziB6wcEMHMo6y/2wWHR2dmRQZjVqtvqnrFUVh\n95GD6C9eZGBIKL1DdTe8JrW0BJWrq0W5ys6OI1UVzX72c+MnU7F2BYkqhaKQEOxyc+mVncsr4yff\nVBvq+9+GtXxxSo/Dw7PqNpUoAlYaDFSvWsaLo8exZucOVCoVkyOicXBwaPGzBEG4d4kALDSqsrKS\nx/67kH3dukFUOHJJCT8s/YG/DBxKn5Dm5Wy+lJvLKwnrOdG/L1J0ON+eOEmvHxbw9tSZONbr3V6v\nqaEZFc3fMkmSJH47eTrPlpWhP30Sf10fOsb4NPv666WfPMFirRopKMhiRyeVkxMbZCNJm9ZQERuL\noih8nxDPPFdPZsWMaPEzBUG4N4khaKFR/1q3ir1xcRAcDIDKxYXcCeN4O3V3s4daX9+6kZOTJyIF\nBACgdAvhyIRxvBm/qsnrhnl4IefnW5SbS0sZYN944G6Mo6MjA3r3pWPHlgdfgDWZaVS5OKPt0sXq\n8eruOoqDgpC0WlS2tpQOj+VLDaSdOH5LzxUE4d4jArBglaIopEpmJCsTiy4MHcKW3TtveI+TWac5\n1qWzRbmkVnPQ0Z6ysrJGr50QEU3krn0NgnDlsWPYfvc9W8pLeXHlz/ywZSOKojSrPa3FKEloOnbE\nmJ1t/Xh2NpoOHRqW9erFysy0tqieIAh3ETEELVglyzJlNtZnGkseHlzKPHnDe5zNzcHk74u1N8Zl\nbq6UlFxpdBhakiTeeng+63cmszMtg6KiQo452GN64Xmyas85UlTEqWVLeGXG7Ga2ypKiKCQfSGXP\npQtoZIUZg4fi7+NrcZ7ZbOZwxlHcSwxgMmHKy0Mxm5HqvQ9XzGZMeXmowy2XNhlU4rOuIAgNiQAs\nWKVWqwk0mjhl5ZjmyBFim7Ft4ABddxz3plAZYRmQOuTk4T0kpsnrJUliQmQME4A/rvgJ86iG71FV\n7u4kBfgwK+s0oV263rA+1zOZTDz96afs7NULKSYCRVFYu28fj2ZqmTdidN156/ak8N3F81wM06H0\nCUPZuBF1166UrF2LtnNnbENCqMrMxHXvfpwenW/xHMVsxl9p/ntraw5mprPq+DFKVRJ+isIjEbF4\ne3nd0j0FQWhf4mO50KjpgZ3QnDjRoEwuLyf8Yg6dAwJveL27uwcxZRXIpaUND+TmMs7JFRub5n/+\nO9nIT6q5Z082px9p9n3q+3LjOlJiY+veT0uShHHIEL6Vq7lwqWaI+cjxY3xoqiR3dBw2AQFogoLQ\nzp2LQ2UVYSaZTidOEfLDj3zi4cd3jz+L5559Fs/xSkjkkejYFtUR4Mdtify+tICkkTGkDo9mdWwU\nz+xIJDPL2scjQRDuFqIH3M7Kysr4LnEzJxUzdorCCN8ARg25M7IzTRwSgbv+MN9tTuCiWo2z2Uy4\n1o4XZjzc7Hv8adoMnNevIbmqgmIbNR1MZsa4evDImPE3VReNbP1dr2IyYXeTy6Ku2m+sRmVvb1Fe\nPXgwS7ft5NeTp7E0PY3qUcMtzpGGx9J3azJ/nPpgg/J/9BnA55sTOaYCSVEIk+GFIVG4ubm3qI7l\n5eX8UFKIacC1AC6pVBTFjeDzzYl80CW4RfcVBKH9iQDcjgqLCvll/EoujBuLVJvZaceFCxxevYzf\nTXnwBle3jalRUUTo+nLiTBbrjx5GAs5fym5WDxhApVLxy4lT+SU171Fvdg3xVX1Qcem6d64ADrt2\n8WDECC7lXOJ8bg49gkNwcnJu1j2rG+lVS5JEVe2xKzbW6ytJEkVqyxuEdQnmgy7ByLKMJElI0q0N\nPcfvTKZ4WLjVoapMGxUmk+mmRhIEQbhziCHodvTfrVu4MHlSXfAFkAICiPf25HjW6XasWUP/WrWU\nZ7OzWB4bydKYCJ7KOsbH65peRmRNS4MvwK/GTCBk9VrMtVmtFEVBu28fs23s+PuGNczVH+E39moe\nSk7gH8t/atYyqa5m6+VKdjbhHf0B8DRbP0mRZbyaeIZKpbrl4NscbfEMQRBuDxGA29ExlWL1D6i5\nV0/ijx5uhxpZWrt9O2u6d0PuXZO2UZIkTP36sTTQl5RDB9qsHo6Ojnw+93F+m3WB0dtTmLptB191\n78vu/DwOThiHPGAAGl9fyqMiWR8+iI/Wrb7hPR8bMgzv7dsblMlVVfTds5+YgYMAmN13IA779ltc\n67Ijhfnh0a3TuCZMiIjGbdduq8fCTPItfagRBKF9ibGrO9Ud0rNZf/480rBhlge6dGFT4nYi+w1o\ns7qo1WoeiBnBA7X/Tj9xnGO6UIsPMSpnZ3YYK3lJllHVW/5TWFTIf5MSyFQpoEB3ReLdoQP5ZksS\np9WglRUGqLU88/AjdfcM7dKVP14pZsHGzZzy9UGSZUJz83i2Z186Xrfe93ZwcHBgjosHX2ZkYOrR\nA6jpfXskJvFs/8G3/fm
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x10c597fd0>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.cluster import KMeans\n",
|
||
|
"est = KMeans(4) # 4 clusters\n",
|
||
|
"est.fit(X)\n",
|
||
|
"y_kmeans = est.predict(X)\n",
|
||
|
"plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='rainbow');"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"The algorithm identifies the four clusters of points in a manner very similar to what we would do by eye!"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## The K-Means Algorithm: Expectation Maximization\n",
|
||
|
"\n",
|
||
|
"K-Means is an example of an algorithm which uses an *Expectation-Maximization* approach to arrive at the solution.\n",
|
||
|
"*Expectation-Maximization* is a two-step approach which works as follows:\n",
|
||
|
"\n",
|
||
|
"1. Guess some cluster centers\n",
|
||
|
"2. Repeat until converged\n",
|
||
|
"A. Assign points to the nearest cluster center\n",
|
||
|
"B. Set the cluster centers to the mean \n",
|
||
|
" \n",
|
||
|
"Let's quickly visualize this process:"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAFVCAYAAAA30zxTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XdAlEfewPHvdmDpHUUEUbFg7733rrGbxJhe3rSL5pLz\nLpfkLpdcersUY0yMib1j7yJ27G1BQVHpsPSyy+7z/oGim10UUFnA+eSfME+bQZPfM/PMzE8mSRKC\nIAiCIFQvub0rIAiCIAgPIxGABUEQBMEORAAWBEEQBDsQAVgQBEEQ7EAEYEEQBEGwAxGABUEQBMEO\nlBU5KSwsrAvwoU6n6xcWFtYY+AUwA2eAF3U6nVjLJAiCIAiVcNcecFhY2BxgHqC5UfQZ8LZOp+sN\nyIAxD656giAIglA3VWQI+iIwntJgC9Bep9PtvfHvm4CBD6JigiAIglCX3TUA63S6VUDJbUWy2/49\nD3C735USBEEQhLquQt+A/8R827+7AFl3u0CSJEkmk93tNEEQBEGoK+4a9KoSgI+HhYX10el0e4Bh\nwI671kImIy0ttwqPqvl8fFzqbNtAtK+2E+2rvepy2+DhaN/dVCYA35zp/BdgXlhYmBo4B6yofNUE\nQRAE4eFWoQCs0+kuA91v/Hss0PfBVUkQBEEQ6j6xEYcgCIIg2IEIwIIgCIJgByIAC4IgCIIdiAAs\nCIIgCHYgArAgCIIg2IEIwIIgCIJgByIAC4IgCIIdiAAsCIIgCHYgArAgCIIg2IEIwIIgCIJgB1VJ\nxiAI90VhYSGbFkeRn15Cs27+dOrV1t5VEgRBqDYiAAt2cWTvGdb99TreF0egwpEd6ji29VvC6/PG\n4uDgYO/qCYIgPHBiCFqodkajkfVzEwi4+AgqHAFwNTTCZctMFv5rm51rJwiCUD1EABaq3fbVUXhc\nGG5VrkBFUqTGDjUSBEGofiIAC9UuO70QDbaTVRvzFNVcG0EQBPsQAViodl0GNSfd9YjNY27Niqu5\nNoIgCPYhArBQ7UKaBOM++izF5FiUZ3gfpM9TgXaqlSAIQvUSs6AFu3jpk3EsDtrC5e0SxhwVzo0K\nGTYrhI69W9u7aoIgCNVCBGDBLuRyOdNfHQqv2rsmgiAI9iGGoAVBEATBDkQAFh4KxcXFxMbGotdn\n2rsqgiAIgBiCFuo4SZJY9MkWLq1SI78URolnHG69d/PUfwfg5u5m7+oJgvAQEwFYqNOWf7ud1M/6\n4GvyLS3IbIa0RuLb/AW8/ftE+1ZOEISHmhiCFuq082tLcLwZfG+QIUOK7MbpY+ftVCtBEATRAxZq\nsJKSEtYt3E3CvhKQZAR0khj3VH/UanWFrjcajRQnOdk85lHUnAvHltF/SOf7WWVBEIQKEwFYqJFM\nJhMfP7Mch4hpaHAGIHlDIR/tXcSbC8dXKAirVCo0/gWQan0syyGGHm2D7rmeRqORrKws3N3dUalU\n93w/QRAeHmIIWqiRNi+PRB3xSFnwBVDhiOvOGaz9eXeF79NkpJxCWYZFmYSE1D2KNh1bVrl+l3Rx\nvDb8K95ouZEvOyTyrz57WfDvDZjN5irfUxCEh4voAQt3deroOc4eSMCzvpaBY7qjUFQuYYIkSUQf\nOEn8uSTCO4fSvHXTu15zOaoIRzytylU4cv2wBM9V7NlTXxnML/kbiV/tjENCawxuV9H2vMhzH/ev\nVBtut+TLbWz9+BqtDC/fSipxEbK+zGG+IYKps/uyffUBFHI5A8Z1x8nJ9jC4IAgPNxGAhXIVFRXx\n9oRVFGzsjEfRRBLRs/9/G5nySTOat21SoXskJ6by08uRKA/2wdXQi1VOZ1H2Xc5L3w5Hq9WWe51c\nDlI5x2SVGLeRyWQ88bcR5L+aT+yFSwQE+uHnV/XtLi+cjuXMFz64GZysMjppcEW32IF/r9tPwPXR\ngMSHX2yhy8tKRjzaq8rPFAShbhJD0EK5Fry3BeWqaXgUNQfAEQ/8Tk1j6ZvnKjzUuuCNKDz3zsTV\nEAKAR0FLtBsf56e3tt7xuqb9XciXJ1uVF5FDw56Vf2/UarW07dAaPz+/Sl97u6gVsSjyPfDCdi/e\nPastmuvNUaJGiQb/K6M59r4P50/F3tNzBUGoe0QAFmySJInEPRoUWE8scj4xmL1bDt71HnGx8Rj3\nt7Iql6Mgfa83+fn55V7bf3R3lJM3WgThZE5xPuhzzq8y8cmMzaz9ZSeSVF4/+cEwFylwoR7ZJNg8\nnk0CzgRYlHlldWLvHzHVUT1BEGoRMQQt2GQ2mzHlONg85ij5kH49x+ax2127nIJTQQebx+SZvuTk\nZJc7DC2TyXj1i0fYOWA/uh056PUZFEV70y7hHW7Gvovb0/let5bn/zO2Yo2yQZIkDu05zrk9KSg0\nZgbNaE9AfX+r80wmE6ePn0Xmn4GEkTySMGNCzq3v4WZM5JJICH2trjdmVWzplCAIDw8RgAWbFAoF\nLk0KIMX6WLr7QUYNDL/rPVp1DGOHfzROyYOsjslCLuPjM+CO18tkMgaM7sGA0fDVCxvwSLPcucrR\n7E368uZcmhlPaFjIXevzZyUlJcyd+itFq/vgauiNhMR3v+ymw2wdY2b1KTtv55pDRH6tR3WmI5LM\njwTXFbjktOAMS/CkCT60IFV+nIyg3TS//JrVc8yYcG1krHT9bnc6+jxRi+MxZClxCTYw6tluePt4\n3dM9BUGwLzEELZSr6+PeZLuesSgzkI/HCB1BwYF3vd7Dw4N6o1MpItuiPE91nZaTFCiVFX//yzyt\nsVnuldORAxsuVPg+t1v27XZYOqns+7QMGf4Z/Yj+WEvitSQAzp2MIeotZ3xOT8RdCsHD3Jg2OS8i\nc87Hu0M+5lbHSR7wORPXyXh/3XSyQrdbPSe12UpGP9O9SnUEiFi4l7VTlJgXTka5bgL5X03m6zEn\niD0bV+V7CoJgf6IHbGf5+fmsnreHzDMK5E4m2ozwplcN2Z2p35jOuHucZdu3S8mN06D2KKHhQDPT\n/1LxId+n3xvJ725bidskozhVg2NgIeETHBj39MBK1UWusT3py0QJKkdZpe5109W9MlywXiLkl9GX\n7b8v47E3A9i9MAavjKlW5wTnjUDTeQlPvTvSonz6TyFEfLaYjGOOSDIJn46FzJrTFnd39yrVsaCg\ngCPfmPDLbl9WJkeO38WxrP90Ma//3KhK9xUEwf5EALYjfaaez2fswPvodJSUfiOMWhXPxecieGLu\nyLtcXT0GT+hKu94tiYu5zL4155HJ4PrVpAr1gAHkcjmPzh4Ks0u/o1Z2DfFN/l2KMZyy/OYKkFpv\nC49O7UpyUjKJCck0bRmKs7NLOXexZCq0PQAkQ0bJjWOGNA22vt7KkFGcZt0rb9qyEa/Pb4TZbEYm\nkyGTVe3l4KYda/fjeXmYzWMZ0U6UlJRUaiRBEISaQwxB29HSTyLxOzqzLPgCuBpCuLqgKZd08Xas\nmaX5769n0fBC8j6ZRM7HE/l5sJ5Fn2yu9H2qGnwBpr/dn4zeP1NA6a5WEhIp3rtp+2Ix817fwXe9\nU9gyKoSPe53kx3+sq9AyKY+WxTbLc1RXaNbLBwCHerbPMWPGMcD2MSh98bjX4FsR1fEMQRAeDBGA\n7Sj9mAYZ1v8D9c7twL5VNSNTz7a1+0n/oQfeOaXD4jJk+GZ1J+Gr1hzee7za6qHVapm7dCLh3x5E\n/dQKnF5ZytObA9DtzsIp4nH8snviRiB+14eR9/1IFn189xeEES+0I6P5eosyI0UwZAvd+pcO+Q56\noiXp/rutrk1ruJERz3S5L227kwFjupMZbHvNtFeHgnt6qREEwb7E2FUNVVN6NkdWpuJisJ5A5F4U\nRvSaE3Tu3a7a6qJQKBg6sTfcmAx9/nQs5n2drV5iHHDj0gYF5tlm5PJb75j6TD3LPtlHerQGCRne\n7Yp4fEErIv63GP1ZDQoHM4E9TUx++ZGy339oWAiDv8hhx5eLMRxvDHITDh3jGT87FD8/nwfeZicn\nJzq9pOD4+9F4ZZcu6TJ
|
||
|
"text/plain": [
|
||
|
"<matplotlib.figure.Figure at 0x10d30d2d0>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from fig_code import plot_kmeans_interactive\n",
|
||
|
"plot_kmeans_interactive();"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"This algorithm will (often) converge to the optimal cluster centers."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### KMeans Caveats\n",
|
||
|
"\n",
|
||
|
"* The convergence of this algorithm is not guaranteed; for that reason, by default scikit-learn uses a large number of random initializations and finds the best results.\n",
|
||
|
"\n",
|
||
|
"* The number of clusters must be set beforehand. There are other clustering algorithms for which this requirement may be lifted."
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 2",
|
||
|
"language": "python",
|
||
|
"name": "python2"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 2
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython2",
|
||
|
"version": "2.7.9"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 0
|
||
|
}
|