mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
76 lines
2.2 KiB
Python
76 lines
2.2 KiB
Python
|
"""
|
||
|
Small helpers for code that is not shown in the notebooks
|
||
|
"""
|
||
|
|
||
|
from sklearn import neighbors, datasets, linear_model
|
||
|
import pylab as pl
|
||
|
import numpy as np
|
||
|
from matplotlib.colors import ListedColormap
|
||
|
|
||
|
# Create color maps for 3-class classification problem, as with iris
|
||
|
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
|
||
|
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
|
||
|
|
||
|
def plot_iris_knn():
|
||
|
iris = datasets.load_iris()
|
||
|
X = iris.data[:, :2] # we only take the first two features. We could
|
||
|
# avoid this ugly slicing by using a two-dim dataset
|
||
|
y = iris.target
|
||
|
|
||
|
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
|
||
|
knn.fit(X, y)
|
||
|
|
||
|
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
|
||
|
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
|
||
|
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
|
||
|
np.linspace(y_min, y_max, 100))
|
||
|
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
|
||
|
|
||
|
# Put the result into a color plot
|
||
|
Z = Z.reshape(xx.shape)
|
||
|
pl.figure()
|
||
|
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
|
||
|
|
||
|
# Plot also the training points
|
||
|
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
|
||
|
pl.xlabel('sepal length (cm)')
|
||
|
pl.ylabel('sepal width (cm)')
|
||
|
pl.axis('tight')
|
||
|
|
||
|
|
||
|
def plot_polynomial_regression():
|
||
|
rng = np.random.RandomState(0)
|
||
|
x = 2*rng.rand(100) - 1
|
||
|
|
||
|
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
|
||
|
y = f(x) + .4 * rng.normal(size=100)
|
||
|
|
||
|
x_test = np.linspace(-1, 1, 100)
|
||
|
|
||
|
pl.figure()
|
||
|
pl.scatter(x, y, s=4)
|
||
|
|
||
|
X = np.array([x**i for i in range(5)]).T
|
||
|
X_test = np.array([x_test**i for i in range(5)]).T
|
||
|
regr = linear_model.LinearRegression()
|
||
|
regr.fit(X, y)
|
||
|
pl.plot(x_test, regr.predict(X_test), label='4th order')
|
||
|
|
||
|
X = np.array([x**i for i in range(10)]).T
|
||
|
X_test = np.array([x_test**i for i in range(10)]).T
|
||
|
regr = linear_model.LinearRegression()
|
||
|
regr.fit(X, y)
|
||
|
pl.plot(x_test, regr.predict(X_test), label='9th order')
|
||
|
|
||
|
pl.legend(loc='best')
|
||
|
pl.axis('tight')
|
||
|
pl.title('Fitting a 4th and a 9th order polynomial')
|
||
|
|
||
|
pl.figure()
|
||
|
pl.scatter(x, y, s=4)
|
||
|
pl.plot(x_test, f(x_test), label="truth")
|
||
|
pl.axis('tight')
|
||
|
pl.title('Ground truth (9th order polynomial)')
|
||
|
|
||
|
|