2015-04-11 14:01:49 -04:00
{
2015-04-14 14:24:23 -04:00
" cells " : [
2015-04-11 14:01:49 -04:00
{
2015-04-14 14:24:23 -04:00
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
2015-04-15 12:35:19 -04:00
" # scikit-learn-intro "
2015-04-14 14:24:23 -04:00
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
2015-05-31 09:46:07 -04:00
" Credits: Forked from [PyCon 2015 Scikit-learn Tutorial](https://github.com/jakevdp/sklearn_pycon2015) by Jake VanderPlas \n " ,
" \n " ,
2015-04-14 14:24:23 -04:00
" * Machine Learning Models Cheat Sheet \n " ,
" * Estimators \n " ,
" * Introduction: Iris Dataset \n " ,
2015-04-15 12:35:19 -04:00
" * K-Nearest Neighbors Classifier "
2015-04-14 14:24:23 -04:00
]
} ,
{
" cell_type " : " code " ,
" execution_count " : 1 ,
" metadata " : {
" collapsed " : false
} ,
" outputs " : [ ] ,
" source " : [
" % matplotlib inline \n " ,
" import numpy as np \n " ,
" import matplotlib.pyplot as plt \n " ,
" import seaborn; \n " ,
" from sklearn.linear_model import LinearRegression \n " ,
" from scipy import stats \n " ,
" import pylab as pl \n " ,
" \n " ,
" seaborn.set() "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
" ## Machine Learning Models Cheat Sheet "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : 2 ,
" metadata " : {
" collapsed " : false
} ,
" outputs " : [
2015-04-11 16:33:28 -04:00
{
2015-04-14 14:24:23 -04:00
" data " : {
" image/png " : " iVBORw0KGgoAAAANSUhEUgAACEoAAAUrCAYAAAA3g9BsAAAAAXNSR0IArs4c6QAAAAlwSFlzAAAG \n bgAABm4BRLh3LwAAAAd0SU1FB90EEw8XG1Hi8acAAAAGYktHRAD/AP8A/6C9p5MAC5xvSURBVHja \n 7N0HfBR1+sfxv3eo2M7ee+9nOT0PxcKJZzlUVFA5sYsKdk9PTz3RS68EAklIQhopEAgJkEKAAAkh \n BFJJQJoo0gWVTrb//vuMbm53szUkkIQPr9f79dJkZnZ2ZrKZ7PPd5/k/pVQLAAAAAAAAAAAAAADA \n keD/OAgAAAAAAAAAAAAAAICgBAAAAAAAAAAAAAAAAEEJAAAAAAAAAAAAAAAAghIAAAAAAAAAAAAA \n AAAEJQAAAAAAAAAAAAAAAAhKAAAAAAAAAAAAAAAAEJQAAAAAAAAAAAAAAAAgKAEAAAAAAAAAAAAA \n AEBQAgAAAAAAAAAAAAAAgKAEAAAAAAAAAAAAAAAAQQkAAAAAAAAAAAAAAEBQAgAAAAAAAAAAAAAA \n gKAEAAAAAAAAAAAAAAAAQQkAAAAAAAAAAAAAAACCEgAAAAAAAAAAAAAAAAQlAAAAAAAAAAAAAAAA \n CEoAwKFnsVhaDAZDy/79+3W7du3S/fTTT7qtW7fqN2zYoP/+++9brV+/3qUffvjBJVnflY0bN7ba \n vHmz/scff9QeUx573759uhbrP6PRqO0X5wcAAAAAAAAAAAAgKAHgCCehhp07d+o2bdqkX716tb6h \n ocFQXV1tqKysNM6fP984e/ZsY1FRkWn69OmmqVOnmnJycswTJ040p6SkmMePH28eO3asJSYmxhIR \n EWEJDg5W//3vf7uswMBAFRoaqmRfo6OjLWPGjLGMGzfOkpCQYElOTjanpqZqzy07O9s8efJkU15e \n nva8CwsLTSUlJaY5c+YYFyxYYKyqqjLW1dUZmpubDWvWrNECHlu3btX98ssvWjhDghlcWwAAAAAA \n AAAAACAoAQCHqKODFOuls8J3332nl2K+BB/KysqMM2bM0IIOEyZMMEtIQEIDXTnY0J1JKEMCGaNH \n j7bEx8db5JhnZmaac3NztfCFBC/knEggpaamxrBs2TLDypUr9XLOpDPGjh07dHv27NHp9Xq6YQAA \n AAAAAAAAAICgBIAjlxTPZXSFdDSQbg/S5SE9Pd0sxfjIyEhLQEAAQYUeSEIt0gVDOmAkJSWZMzIy \n zJMmTTJPmzbNJB0/pNtFRUWFUUIx0hVkxYoVhm+//VYbTyKhGRlFcuDAAZ3ZbObnCAAAAAAAAAAA \n AAQlAHQt0kVgy5Yt+qamJoOMepAREImJiWY6QKAjyIgUCdXExsZaZHyKjByRcSNynUnoYu7cuVqn \n i9raWm28yNq1a/UbN25s7XIh41r4OQUAAAAAAAAAAABBCQB+kU/2//TTT7rVq1frq6qqjIWFhVpn \n COkaQDEf3WG0iC1sIR0uJk6cqI0VkfEupaWlxvLycq27RWNjo2HVqlV66YKydetW3c6dO3UtLS06 \n xokAAAAAAAAAAAAQlADQg+3bt0+3cuVKvYxHyMnJMcv4BCk0d9cieZB138NCQlR0eLiKjYpSCTEx \n Kjk2VqXHx6us8eNV7oQJKj8tTRVOnKhmZWeruZMnq/KpU1XltGmdqsL6GPNyc9WcSZNUSVaW9vgF \n 6ekqLyVFTU5OVtmJiSrDuo+p48apJOv+JowercZFR6vRkZHacwm3PqfgoCDF+JJDIywsTMXExFhk \n bIytq4WMEikuLjaVlZUZFy1aZJQRM8uXL9fGiGzatEkv4aK9e/fqjEYjry0AAAAAAAAAAAAEJQB0 \n BfJJ+R9//FEnBd6CggLT2LFju1yHiNDgYC3gkDJunBYgKMzMVPOnTFE1M2aoFaWlat38+WpDebna \n smiR2lFdrXbV1Kh99fVKv2yZsixfrtSKFT2e2fo8Ddbne6ChQe2tq1M7rcdAjsWWykr1/YIFavXc \n uaqppETVzpypFuXnq/m5uVooZEZGhpoyYYIWGJHjK2EMCWJEhIaq7hyO6ZKBnaAgFRUVZZGfseTk \n ZHNmZqZ5ypQpppkzZ5pmz55trKioMC5ZssR6GpcZpHPL+vXr9du2bdPt2rVLZ/3XQlcLAAAAAAAA \n AAAAghIA2sFgMLR89913einKZmVlmcPDww95MEI6IESFhWmdHSYmJKhpqamqNDtb67LQUFSk1syd \n qzZXVmqBB2NT0xERdOiqTM3Nan99vfpl6VK1rapKC6SsLStTy0tLVX1hoaouKNC6b8zOydE6Ysi5 \n zElKUmlxcSpxzBg1NjpaO9fS+YKwxMH/3ERERLQGLeTnVzpalJSUmBYsWKCFLJqamgxr167Vuln8 \n 8ssvjA0BAAAAAAAAAAAgKAEceXbv3q2TUQBSTE1KSjIfqi4BMu4iPiZG6wAhoywkACFFdul2cKR0 \n eYAjOe8tjY1qd22t2r54sdq0cKHWDWTl7NmqsbhYLZ0xQy3My1Nl1uulOCtLFaSladePhGlkVErc \n qFFqVESENkaFcSP+BSwiIyO1gMWECRO0kSH5+fmmWbNmGcvLy41Lly41yKidzZs36/fs2aMzm828 \n dgIAAAAAAAAAAIISALoPGaMhnyyfOnWqKSYmptO7RcRERGiFbClsL5k+XX07b57WfYAwBDqbrrFR \n 6zoinS5+KC/XxossKynRAhcVeXlal4uZEydqo0Uyfwtb2DpcBDFWxGOwIjo62iLBqpycHHNhYaFJ \n AhX19fVatwoZB7J//34dr7cAAAAAAAAAAICgBIDDQtrqS5v9OXPmGGNjYzslGBEeEqKSx47VPuEv \n BegVpaVacdrAWAx0YzLWRTqc7Kiu1rpbSMhHru26wkJVlZ+v5uXmagGgfOt1n5OYqFLj4rRxMRIO \n Cg0OPuIDFUFBQWr06NGWlJQUc25urjYCpLKy0rhs2TKDjPfZsWOHzvqP12kAAAAAAAAAAEBQAkDH \n hCPWr1+vl8JkR3eNCA4KUmlxcdr4gzVlZWp/fT1FdcDdKJGGBrVz6VK1ddEitX7BArVqzhxtjIh0 \n VymfOlWV5uSoGRkZKnfCBJURH6+SYmNVbFSUiggNVYFHyPiQUOtzTUhIsEyePNk0e/ZsY01NjdaZ \n 4ueff9aZTCZe0wEAAAAAAAAAAEEJAK5JQVGKizNnzjRFRkZ2WDgiOjxcG01QPX262rxwoTI3N1ME \n Bw4Rw7Jlak9dndq+eLHaWFGh1paVqeZZs1TtzJmqcto0NXfyZFWUmanyUlNV1vjx2vgQCVqEh4b2 \n mDEf0pUiIyPDPGPGDNPChQuNy5cvN2zevFl/4MABxnsAAAAAAAAAAACCEsCRxmAwtKxcuVKfn59v \n Cg8Pt3REUXL86NFa4bWppETtrKmhWA10444WBxoa1E9LlmijQ6QDzDLrz7WEnuZPmaKNDZGARWZC \n gtbJYkxkpAoLCelWQQp53UtMTDRPmTLFJOOF6urqDOvWrdPv3LlTZzab+T0BAAAAAAAAAAAISgA9 \n gU6na2lubjZIYTDkIIuaocHBamJCglowZYpaN2+e0jU2UmAGjnDm5cvVvvp6taO6Wm2oqFCr585V \n dYWF2uvEzIkTVXZiohaoigwL69IhisDAQBUXF2eR18r58+drnSi2b9/OOA8AAAAAAAAAAEBQAugO \n pLDX1NRkyM7ONgcFBR1U8TBxzBi1MC9PbV20SPvEOYVhAO1lam5WO5cuVRvKy9WK0lJVXVCgZufk \n qGmpqSotLk4bAxJ8kK9ZnRWgyM3NbQ1Q/PjjjwQoAAAAAAAAAAAAQQmgK9i/f7+uoqLCGB0dfVBj \n NSaMHauq8vO1gibFXQCHWktDg/px8WKtc01DUZGqyMvTRvzkJidrYz/Cu8DIDwIUAAAAAAAAAACA \n oARwGG3btk03Y8YMU3BwcLsKfgEBASo9Pl4tnTFD7amtpVALoMs70NCgtlRWquWlpapy2jQ1MyND \n ex0bHRmpvaYRoAAAAAA
" text/plain " : [
" <IPython.core.display.Image object> "
]
} ,
" execution_count " : 2 ,
" metadata " : {
" image/png " : {
" width " : 800
2015-04-11 16:33:28 -04:00
}
2015-04-14 14:24:23 -04:00
} ,
" output_type " : " execute_result "
}
] ,
" source " : [
" from IPython.display import Image \n " ,
" Image( \" http://scikit-learn.org/dev/_static/ml_map.png \" , width=800) "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
" ## Estimators "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
" Given a scikit-learn *estimator* object named `model`, the following methods are available: \n " ,
" \n " ,
" - Available in **all Estimators** \n " ,
" + `model.fit()` : fit training data. For supervised learning applications, \n " ,
" this accepts two arguments: the data `X` and the labels `y` (e.g. `model.fit(X, y)`). \n " ,
" For unsupervised learning applications, this accepts only a single argument, \n " ,
" the data `X` (e.g. `model.fit(X)`). \n " ,
" - Available in **supervised estimators** \n " ,
" + `model.predict()` : given a trained model, predict the label of a new set of data. \n " ,
" This method accepts one argument, the new data `X_new` (e.g. `model.predict(X_new)`), \n " ,
" and returns the learned label for each object in the array. \n " ,
" + `model.predict_proba()` : For classification problems, some estimators also provide \n " ,
" this method, which returns the probability that a new observation has each categorical label. \n " ,
" In this case, the label with the highest probability is returned by `model.predict()`. \n " ,
" + `model.score()` : for classification or regression problems, most (all?) estimators implement \n " ,
" a score method. Scores are between 0 and 1, with a larger score indicating a better fit. \n " ,
" - Available in **unsupervised estimators** \n " ,
" + `model.predict()` : predict labels in clustering algorithms. \n " ,
" + `model.transform()` : given an unsupervised model, transform new data into the new basis. \n " ,
" This also accepts one argument `X_new`, and returns the new representation of the data based \n " ,
" on the unsupervised model. \n " ,
" + `model.fit_transform()` : some estimators implement this method, \n " ,
" which more efficiently performs a fit and a transform on the same input data. "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
" ## Introduction: Iris Dataset "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : 3 ,
" metadata " : {
" collapsed " : false
} ,
" outputs " : [
2015-04-11 16:33:28 -04:00
{
2015-04-14 14:24:23 -04:00
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" [ ' target_names ' , ' data ' , ' target ' , ' DESCR ' , ' feature_names ' ] \n " ,
" (150, 4) \n " ,
" (150, 4) \n " ,
" (150,) \n " ,
" [ ' setosa ' ' versicolor ' ' virginica ' ] \n " ,
" [ ' sepal length (cm) ' , ' sepal width (cm) ' , ' petal length (cm) ' , ' petal width (cm) ' ] \n "
2015-04-11 16:33:28 -04:00
]
2015-04-14 14:24:23 -04:00
}
] ,
" source " : [
" from sklearn.datasets import load_iris \n " ,
" iris = load_iris() \n " ,
" \n " ,
" n_samples, n_features = iris.data.shape \n " ,
" print(iris.keys()) \n " ,
" print((n_samples, n_features)) \n " ,
" print(iris.data.shape) \n " ,
" print(iris.target.shape) \n " ,
" print(iris.target_names) \n " ,
" print(iris.feature_names) "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : 4 ,
" metadata " : {
" collapsed " : false
} ,
" outputs " : [
2015-04-11 16:33:28 -04:00
{
2015-04-14 14:24:23 -04:00
" data " : {
" image/png " : " iVBORw0KGgoAAAANSUhEUgAAAeYAAAFkCAYAAAD165gcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz \n AAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd8VFXawPHfnZn0TgihBEKTQw9NmggoNoqoWFcFG4pd \n dxXL7rvuru66urq2VXTXBoJYEUWKoii9KL2FAwSQDiEhvc7Mff+YEMpMkiHJJJPh+frhY+7k3HOe \n k/bMPefecwzTNBFCCCGEf7DUdwBCCCGEOEkSsxBCCOFHJDELIYQQfkQSsxBCCOFHJDELIYQQfkQS \n sxBCCOFHbL6qWCllAd4DOgBO4G6ttfZVe0IIIUQg8OUV82VAhNZ6EPAs8A8ftiWEEEIEBF8m5kIg \n RillADFAiQ/bEkIIIQKCz4aygWVAKLANiAeu9GFbQgghREAwfLUkp1Lqj7iGsv+klEoCfgK6aq09 \n XjmbpmkahuGTWIQQQpw1n/1B7tVvUrUTz9pV9wd8ovDlFXMEkFP28XEgCLBWVNgwDNLTc30YTt1J \n SIiSvvgh6Yt/kr74p4SEqPoO4Zzly8T8EvChUmoJrqT8tNa60IftCSGEEA2ezxKz1joLuMZX9Qsh \n hBCBSBYYEUIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyI \n JGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkII \n IfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hi \n FkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDCj0hiFkIIIfyIJGYhhBDC \n j0hiFkIIIfyIzZeVK6VuA24vOwwDUoBErXWOL9sVQgghGiqfJmat9RRgCoBS6k3gPUnKQgghRMXq \n ZChbKdUH6KK1fq8u2hNCCCEaKp9eMZ/ij8BfqyqUkBDl+0jqiPTFP0lf/JP0RYiTfJ6YlVKxQAet \n 9aKqyqan5/o6nDqRkBB1TvVl27btrF23mT69u9OhQ/s6iuzsnWvfl4ZC+uKf5A1G/amLK+bBwII6 \n aEfUg29mzed/762j1NGU6Z9+yX0T+jJyxMX1HZYQQjRYdTHH3AFIq4N2RD34etY67M7mGIaFUkcL \n vv5mdX2HJIQQDZrPr5i11i/7ug1Rf5xO84zjegpECCEChCwwImpk2NDzMMgEwEIGwy5W9RyREEI0 \n bHV1V7YIUOPGXUty8nK2pu6mW9cBDBrUr75DEkKIBk0Ss6ixIUMGMmTIwPoOQwghAoIMZQshhBB+ \n RK6Yhd9ZuuwXvvt+DTYr3H77SFont6rvkASwctUaZs/5BasFxo29gnbt2tR3SEJUSil1OdBKa/2u \n F2UTgWe01g9U8PkUYLTW+rlaDtONJGbhV9au28QL//qZUkdzALbpybz3v98TGSmLHdSnLVu38fwL \n P1Bc2gKArdum8t9JD9KoUaN6jkyIimmtvz+LskcAj0m57PMbgA21EVdVJDELv7J4ydrypAyQnpnI \n ihW/cumlsmhJfVq4cHV5UgbIykli8eKVXH31iHqMSojTKaVmAK9rrReX7dGwAJgEvAPMBo4Bc4FF \n wJtALnAUKMK1bPSnWusBSqmNwEKgO2ACVwG9gAla698ppe4C7gWswCyt9V+VUg8C1wARZe1co7Uu \n rU4/ZI5Z+JVGcZE4HEXlxxYjh5YtW1RyhqgLCY2jMZ0nvy8G2SQny/dF+J13gdvKPr4D1z4NJyQC \n l2qtX8KVqG/TWg/DtQDW6QsyQBQwXWs9FDgADD9RRimVADwJDNJa9wKClVJRQCPgEq11f1wXvedX \n txOSmIVfueXmq+mdkg3OPVgtuxhzVQs6dqzZs9G7du3hD4+9wg03Pcsrr36AU1ZBOWvXXTeKvr3z \n wbkHi5HGqOHx9OyZUt9hCXGm+UBfpVQcMAgoPOVzu7XW9rKPm2mtU8s+XgIYHupaV/b/fUDoKa+3 \n BTZrrYsBtNZ/1FrnAqXAJ0qp94AkajAiLUPZwq9YrVZefOFxcnKyCQoKJiwsrEb1mabJ3577iMPp \n rhuVtu8sJCLiMybc87vaCPecYbFY+Ptzj5KTk43NFkR4eHh9hySEG621Uyn1Ba4r4pmA45RPn/qO \n fJ9SqlNZch5QQXVnXkWfkAZ0VEoFa61LlFKfAW8BV2mt+yulwoHV1ODCVxKz8EvR0TG1Uk9ubg5H \n jlrK3w9brGHs2p1ZK3Wfi2rr+yKED30I7AQmAhdxMsGemmjvBz5QSuUBJcB+D2VOVV6H1vqYUupF \n YJFSygRmAb8C+Uqpxbjml9cCzarbAUnMIqBFRUUTG2PneI7r2OksJbFJza7ChRD+S2u9DwgpO5xy \n yqdOXQWpL3BlWZJ9DijWWv92oozWuvxZQK3106ect6jstSln1A0wrHZ6IHPMIsAZhsHEx8aQ1Gw/ \n 8XH76dsrlwcfuLW+wxJC1K8jwPyyK9wUXEPRfkOumEXA6927O++/2z2gNrEXQlSf1noGMKO+46iI \n XDELIYQQfkQSsxDCK6ZpcuTIYTIyMuo7FCECmgxlCyGq5HA4eOqPr7JufSkWq4NLL27GxMfvqu+w \n hAhIcsUshKjSZ599w/pNMViDkjAsycxfkMuvv66p77CECEiSmIUQVTqeXYDFcsriR0YUBw4eqb+A \n hAhgkphFg2WaJj8uWMisWfMoKiqq+gRRbZcO609oyL7y4/jY/Vw0dGAlZwghqkvmmEWDZJomE598 \n mfUbI8AI4quvX+Ct/0wkIiKivkMLSB06tOfZZ67im2+XYbGY3Db2LmJiYus7LCECkiRm0SAtW7aS \n dRtCsQW59mk+eKQNU6d9zb0TbqnnyAJXSkoXUlK61HcYQgQ8GcoWDVJxcTGnv680sNtl1yghRMMn \n iVk0SIMHX0Db5HRMpwPTNImN2sX1111W32EJIUSNyVC2aJCCgoL4zxtP8PH0rykpcTDmmvtITEys \n 77CEEKLGJDGLBis0NJS77rypvsMQQohaJUPZQgghhB+RK2bhd1asWM1389dgtcAdt4+kZcukGtWX \n n5/PW5M+obTUoGvXllw1OvDnok3T5P33P2fP3uMktYjmnrtvwmKR9+FCNASSmIVf2bBhC8+/+CMl \n 9hYAbN32Pu/99w9ERkZVu86JT77Gzt1JGIaFRctSsZfaufbaEbUVsl96+d/vM/8nJxZLBKtW53Ms \n 47/83x/vq++whBBekLfQwq8sXLS6PCkDpGc0ZcXK6q/JnJeXy67dTgzD9aNuGI34dc2emobp97am \n ZmKxhAFgsYSybVtWPUckhPCWT6+YlVJPA1cCQcCbWuspvmxPNHyxMeE4HNlYrSEAWI0cklo0rXZ9 \n YWHhhIU6KCh2HZumSXhYbUTq38LDjUqPhRD+y2dXzEqpocAArfVAYCjQ1ldticBx661j6NEtE9O5 \n F6uxm6tGNaNTp47Vrs9qtXLH7YMIC07DdB4gqdkeHnygZndym6bJG29M4b4HXuHxJ15jz569FZad \n MWMuDzz0Kg898grLl/9ao3bPxn0TRtEoZhcO+35io3dx7z0j66xtIUTNGKZp+qRipdTzgAl0AaKB \n iVrrysYkzfT0XJ/EUtcSEqKQvlSfaZocP55JcHAIkZGRtVJnUVERwcFODCMMw6jZ1eO7733K51/l \n lA8VN03YzeQP/uxW7+IlK3n+xeWYJAAQFryXdybdRdOm1R8BOMGb74vdbicjI4P4+HhsNv+9nUR+ \n X/xTQkKUz4ZZevWbVO3Es3bV/QE//OPLOeYEoDdwHXAv8LEP2xIBxDAMGjWKr7WkDK5nnhMTE2uc \n lAHSdmWUJ2WAw0cs5ObmuJVbv357eVIGyCtM5Ndf19e4fW/ZbDYSExP9OikLIdz58jf2GJCqtbYD \n 25VSRUqpxlrrYxWdkJB
" text/plain " : [
2015-04-15 12:35:19 -04:00
" <matplotlib.figure.Figure at 0x10da12b90> "
2015-04-14 14:24:23 -04:00
]
} ,
2015-04-11 16:33:28 -04:00
" metadata " : { } ,
2015-04-14 14:24:23 -04:00
" output_type " : " display_data "
}
] ,
" source " : [
" import numpy as np \n " ,
" import matplotlib.pyplot as plt \n " ,
" \n " ,
" # ' sepal width (cm) ' \n " ,
" x_index = 1 \n " ,
" # ' petal length (cm) ' \n " ,
" y_index = 2 \n " ,
" \n " ,
" # this formatter will label the colorbar with the correct target names \n " ,
" formatter = plt.FuncFormatter(lambda i, *args: iris.target_names[int(i)]) \n " ,
" \n " ,
" plt.scatter(iris.data[:, x_index], iris.data[:, y_index], \n " ,
" c=iris.target, cmap=plt.cm.get_cmap( ' RdYlBu ' , 3)) \n " ,
" plt.colorbar(ticks=[0, 1, 2], format=formatter) \n " ,
" plt.clim(-0.5, 2.5) \n " ,
" plt.xlabel(iris.feature_names[x_index]) \n " ,
" plt.ylabel(iris.feature_names[y_index]); "
]
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
" ## K-Nearest Neighbors Classifier \n " ,
" \n " ,
" The K-Nearest Neighbors (KNN) algorithm is a method used for algorithm used for **classification** or for **regression**. In both cases, the input consists of the k closest training examples in the feature space. Given a new, unknown observation, look up which points have the closest features and assign the predominant class. "
]
} ,
{
" cell_type " : " code " ,
" execution_count " : 5 ,
" metadata " : {
" collapsed " : false
} ,
" outputs " : [
2015-04-11 16:33:28 -04:00
{
2015-04-14 14:24:23 -04:00
" name " : " stdout " ,
" output_type " : " stream " ,
" text " : [
" [ ' versicolor ' ] \n " ,
" [ ' setosa ' ' versicolor ' ' virginica ' ] \n " ,
" [[ 0. 0.8 0.2]] \n "
2015-04-11 16:33:28 -04:00
]
} ,
{
2015-04-14 14:24:23 -04:00
" data " : {
" image/png " : " iVBORw0KGgoAAAANSUhEUgAAAfAAAAFgCAYAAABEyiulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz \n AAALEgAACxIB0t1+/AAAIABJREFUeJzs3Xd4VFX6wPHvJJPeSELovV2qCCKgSAcLIChVVNTVXXVd \n XXV3rdtcd127a10Lrj8UKQJKV5AuRemhcwIJISQEEkhvM5ny+2NCCGSSDExP3s/z8Dw5M2fufe8w \n yTvn3HPfq7NarQghhBDCvwR4OwAhhBBCXDlJ4EIIIYQfkgQuhBBC+CFJ4EIIIYQfkgQuhBBC+CFJ \n 4EIIIYQf0ns7gCuRnV0o17wJIYRoMBISonQ1PScjcCGEEMIPSQIXQggh/JAkcCGEEMIPSQIXQggh \n /JAkcCGEEMIPSQIXQggh/JAkcCGEEMIPSQIXQggh/JAkcCGEEMIPub0Sm6ZpTYDdwEilVFKVx58G \n HgKyKx56pOrzQgghhKiZWxO4pmlBwKdAsZ2n+wIzlFJ73RmDEEIIUR+5ewr9TeBjINPOc9cBL2qa \n tlnTtOfdHIcQQghRr7htBK5p2gNAtlLqR03TXgAuL8g+D/gIKAQWa5o2Vim10l3xXC5h4ypP7UoI \n 4UcWMsXbIQg/NmxYocf25c4R+K+A0ZqmbQCuBb6sOB9+wXtKqRylVDmwEujjxliEEEKIesVtI3Cl \n 1NALP1ck8UeUUlkV7Rhgv6Zp3YESYATwP3fFIoQQQtQ3nrwfuE7TtOlApFJqZsV57w2AAVirlJI5 \n bSGEEMJBHkngSqnhF36s8tg8bOfBhRBCCHGFpJCLEEII4YckgQshhBB+yJPnwIUQwufIZWPCX8kI \n XAghhPBDMgIXwsMS9+8neelS9OXlBF1/PWNuv93bIQkh/JAkcCE86HxhIWc++YRJ584BkJqSwtb4 \n eAbdeKOXIxNC+BuZQhfCg5KSk+lbkbwB2hmNnE9O9mJEQgh/JQlcCA/q2K4dB2JjK9unAwOJbtPG \n ixEJIfyVTKEL4UFNGjUi8qGH+HbZMoLKyzH37cudQ4fW/ULhMrLqXNQXksCF8LAB/fszoH9/b4ch \n hPBzMoUuhBBC+CFJ4EIIIYQfkgQuhBBC+CE5By6EqLdkwZqoz2QELoQQQvghSeBCCCGEH5IELoQQ \n QvghSeBCCCGEH5IELoQQQvghSeBCCCGEH5IELoQQQvghSeBCCCGEH5IELoQQQvghSeBCCCGEH5IE \n LoQQQvghqYUuRC3OnDvHjl9+ITI2luE33ohOp/N2SEIIAUgCF6JGqenpHHjzTW7PzCRHp2PugQPc \n 8+ij3g5LCCEAmUIXokaJq1dze2YmOiDeaqXj1q1k5uZ6OywhhAAkgQvhMKtMnwshfIgkcCFqcO0t \n t7C0RQusQLZOR8qgQTSPjfV2WEIIAcg5cCFq1K5VK8L++ldWbN9OZKNG3H3DDd4OSQghKkkCF6IW \n TePjuX3MGG+HIYQQ1cgUuhBCCOGHZAQuhKg3FjLF2yEI4TEyAhdCCCH8kCRwIYQQwg/JFLrwaZt/ \n +onc/fsxhodz87RpREdEeDsk4UNkylw0ZJLAhc/asmULTWbOZLDBgAX4Kj2d+//6V6lHLoQQyBS6 \n 8GG5+/ejGQyA7YPa9fhxsgsKvBuUEEL4CBmBC59lDA/HDARWtLOiorg2LMybIQkvkylzIS6SBC58 \n 1i3TpvFVejqdjx3jXGQk8VOmEBoc7O2whBDCJ0gCFz4rMiyMB/78Z3KLi4kMDSVYLx/XhkZG3ELU \n TP4iCp+m0+mIi4z0dhhCCOFzZBGbEEII4YckgQshhBB+yO1T6JqmNQF2AyOVUklVHr8d+CtgAr5Q \n Sn3u7liEEEKI+sKtI3BN04KAT4FiO4+/A4wGhgIPVyR6IYQQQjjA3SPwN4GPgRcue7wbcFwplQ+g \n adoWYAiwyM3xCOE3rFYr3379NcH792MMCaH9xIlc17evt8MSQvgIt43ANU17AMhWSv1Y8VDV+pfR \n QH6VdiEQ465YhPBHP65ezYgVKxh/8iSTk5I487//kV9S4u2whBA+wp1T6L8CRmuatgG4FviyyjR5 \n PhBVpW8UkOvGWITwO4bTp4mzWivbXbOzOZWV5cWIhBC+xG1T6EqpoRd+rkjijyilLvz1OQp01jQt \n Ftv58SHYptuFEBWi2rcnIzCQlmYzAAdatGBUs2ZejkoI4Ss8WchFp2nadCBSKTVT07Q/AKuxzQL8 \n TymV6cFYhPB5w4cPZ0VeHnsSEzGGhtJt4kQiQ0O9HZYQwkforFWm6Hxddnahy4JN2LjKVZsSQriJ \n lFIV/mbYsEKXbi8hIarG+ydLKVUhhM+QhC2E46QSmxBCCOGHZAQuhPAaGXELcfVkBC6EEEL4IUng \n okHarhRr9uzBZDJ5OxQhhLgqMoUuGpxXnn2WkamptAT+GRPDcx98QLhcniWE8DMyAhcNyrdr1zIp \n NZWBQHfghfx8PnjvPW+HJYQQV0wSuGhQzpw5Q/Mq7VAgoLTUW+EIIcRVkyl00aBMGz+e/1u9micN \n BnTA4oAABo4Z4+2wGgxZdS6E60gCFw1K4+hobn/tNd744AOCLRZ6TZjA4P79vR2WEEJcMUngosHp \n 2LIlz732mrfDEEIIp8g5cCGEEMIPSQIXQggh/JAkcCGEEMIPyTlwIYTbyKpzIdxHErjwuBNZWbz3 \n r38RYLEw9be/ZWCPHt4OyaMM5eX8uGoVVoOB6wYPpmXTpt4OSQi/l5KSxO7dikaNghk5cjQBAc5P \n MBsMJfzww1rMZhg+fCBxcU1cEKnrSAIXHnUmN5evn3iCt61WAoAP//EPAl56if7du3s7NI8wmc3M \n efNN7k1MJBhYumUL1ueeo1Xz5nW+Vghh36FD+3n//Xxyc28H8jl2bA6PPTbDqW2Wlxv497/nc+TI \n fUAgP/+8iBdfHOpTSVzOgQuPeuPDD/mj1UogoAMeB+Z/8omXo/KcfUlJjK5I3gATTp9m94YNXo1J \n CH+3ceMJcnMHV7Ri2LmzFSUl+U5tc8+enzlyZCK2ca6OtLTJbNjws7OhupQkcOFRwaGhlFRpmwBz \n YKC3wvG40JAQSqpM7VkAawM6fiHcITDQfElbrzcQGBjk1DZDQkLgkr9WZgIDdU5t09UkgQuPevnJ \n J3ktKIgzQB7wj4AAXvjb37wdlsd0b9+eXcOGcSoggEJgdpcujBg3ztthucxCplzyTwhPGD/+Olq1 \n +g4oITj4MKNHGwkJCXdqm717D2TQoDVAJpBP9+6zGTNmtCvCdRmd1Wr1dgwOy84udFmwCRtXuWpT \n 4goZjUb+9cknGMrKeO6xx4iLjPR2SB5ltVrZdfAghYWF3NCvH2HBwXW/yE9I0hbeUlSUR2LiLpo1 \n a06nTq5ZGGu1Wtm37xcMBgN9+95AUFBIna8ZNqzQJfu+ICEhqsZhvyRwIYTLSAIXDZ0nE7isQhdC \n XDVJ2EJ4j5wDF0IIIfyQJHAhhBDCD0kCF0IIIfyQnAMXHndIKdT33xNosdB86FD69+vn9DZLDAZW \n zJ5NaE4O5tatGT9tGoE1lFJUyckcWr6cQLOZJoMGccPAgU7vXwghPE0SuPCoM+fPc+qDD5iYlQXA \n 9iNHOPr883Tt1Mmp7X730UdM/+UXAoHCXbtYYjYz6d57q/U7n59P0vvvMzEzE4A9hw6xPyqKaxpY \n PXYhhP+TBC48ak9iIjdXJG+AAQUFLNu3z+kEHn3yJBfqmUUBwSdO2O2379AhhlUkb4C+RUUsO3hQ \n ErgDZMW5EL5FzoELj2rTpg0qNLSyfSYggJhmzZzebmlMTOXPVqA0Otr+/lu3Jin8YoWm8zod4QkJ \n Tu9fCCE8TUbgwqN6du7MD5MmkbR2LYFmM4aBA5kyaJDT2+1z333M++ILos+fJ7dlS262M30O0Kl1 \n a9ZMncp3q1YRZDJR3K8f04YPd3r/QgjhaVKJTXiF1WrFarW65J69VZktlhoXr12+f4vV6lDfhkqm \n zIW4clKJTdR7Op0Onc7
" text/plain " : [
2015-04-15 12:35:19 -04:00
" <matplotlib.figure.Figure at 0x10ddfad10> "
2015-04-14 14:24:23 -04:00
]
} ,
2015-04-11 16:33:28 -04:00
" metadata " : { } ,
2015-04-14 14:24:23 -04:00
" output_type " : " display_data "
}
] ,
" source " : [
" from sklearn import neighbors, datasets \n " ,
" \n " ,
" iris = datasets.load_iris() \n " ,
" X, y = iris.data, iris.target \n " ,
" \n " ,
" # create the model \n " ,
" knn = neighbors.KNeighborsClassifier(n_neighbors=5, weights= ' uniform ' ) \n " ,
" \n " ,
" # fit the model \n " ,
" knn.fit(X, y) \n " ,
" \n " ,
" # What kind of iris has 3cm x 5cm sepal and 4cm x 2cm petal? \n " ,
" X_pred = [3, 5, 4, 2] \n " ,
" result = knn.predict([X_pred, ]) \n " ,
" \n " ,
" print(iris.target_names[result]) \n " ,
" print(iris.target_names) \n " ,
" print(knn.predict_proba([X_pred, ])) \n " ,
" \n " ,
" from fig_code import plot_iris_knn \n " ,
" plot_iris_knn() "
]
2015-04-22 13:29:07 -04:00
} ,
{
" cell_type " : " markdown " ,
" metadata " : { } ,
" source " : [
" Note we see overfitting in the K-Nearest Neighbors model above. We ' ll be addressing overfitting and model validation in a later notebook. "
]
2015-04-14 14:24:23 -04:00
}
] ,
" metadata " : {
" kernelspec " : {
" display_name " : " Python 2 " ,
" language " : " python " ,
" name " : " python2 "
} ,
" language_info " : {
" codemirror_mode " : {
" name " : " ipython " ,
" version " : 2
} ,
" file_extension " : " .py " ,
" mimetype " : " text/x-python " ,
" name " : " python " ,
" nbconvert_exporter " : " python " ,
" pygments_lexer " : " ipython2 " ,
" version " : " 2.7.9 "
2015-04-11 14:01:49 -04:00
}
2015-04-14 14:24:23 -04:00
} ,
" nbformat " : 4 ,
" nbformat_minor " : 0
}