mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
Added fig_code from https://github.com/jakevdp/sklearn_pycon2015, which is used in the scikit-learn notebooks.
This commit is contained in:
parent
815accffa0
commit
0ebd99c4e8
135
scikit-learn/fig_code/ML_flow_chart.py
Normal file
135
scikit-learn/fig_code/ML_flow_chart.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
"""
|
||||
Tutorial Diagrams
|
||||
-----------------
|
||||
|
||||
This script plots the flow-charts used in the scikit-learn tutorials.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pylab as pl
|
||||
from matplotlib.patches import Circle, Rectangle, Polygon, Arrow, FancyArrow
|
||||
|
||||
def create_base(box_bg = '#CCCCCC',
|
||||
arrow1 = '#88CCFF',
|
||||
arrow2 = '#88FF88',
|
||||
supervised=True):
|
||||
fig = pl.figure(figsize=(9, 6), facecolor='w')
|
||||
ax = pl.axes((0, 0, 1, 1),
|
||||
xticks=[], yticks=[], frameon=False)
|
||||
ax.set_xlim(0, 9)
|
||||
ax.set_ylim(0, 6)
|
||||
|
||||
patches = [Rectangle((0.3, 3.6), 1.5, 1.8, zorder=1, fc=box_bg),
|
||||
Rectangle((0.5, 3.8), 1.5, 1.8, zorder=2, fc=box_bg),
|
||||
Rectangle((0.7, 4.0), 1.5, 1.8, zorder=3, fc=box_bg),
|
||||
|
||||
Rectangle((2.9, 3.6), 0.2, 1.8, fc=box_bg),
|
||||
Rectangle((3.1, 3.8), 0.2, 1.8, fc=box_bg),
|
||||
Rectangle((3.3, 4.0), 0.2, 1.8, fc=box_bg),
|
||||
|
||||
Rectangle((0.3, 0.2), 1.5, 1.8, fc=box_bg),
|
||||
|
||||
Rectangle((2.9, 0.2), 0.2, 1.8, fc=box_bg),
|
||||
|
||||
Circle((5.5, 3.5), 1.0, fc=box_bg),
|
||||
|
||||
Polygon([[5.5, 1.7],
|
||||
[6.1, 1.1],
|
||||
[5.5, 0.5],
|
||||
[4.9, 1.1]], fc=box_bg),
|
||||
|
||||
FancyArrow(2.3, 4.6, 0.35, 0, fc=arrow1,
|
||||
width=0.25, head_width=0.5, head_length=0.2),
|
||||
|
||||
FancyArrow(3.75, 4.2, 0.5, -0.2, fc=arrow1,
|
||||
width=0.25, head_width=0.5, head_length=0.2),
|
||||
|
||||
FancyArrow(5.5, 2.4, 0, -0.4, fc=arrow1,
|
||||
width=0.25, head_width=0.5, head_length=0.2),
|
||||
|
||||
FancyArrow(2.0, 1.1, 0.5, 0, fc=arrow2,
|
||||
width=0.25, head_width=0.5, head_length=0.2),
|
||||
|
||||
FancyArrow(3.3, 1.1, 1.3, 0, fc=arrow2,
|
||||
width=0.25, head_width=0.5, head_length=0.2),
|
||||
|
||||
FancyArrow(6.2, 1.1, 0.8, 0, fc=arrow2,
|
||||
width=0.25, head_width=0.5, head_length=0.2)]
|
||||
|
||||
if supervised:
|
||||
patches += [Rectangle((0.3, 2.4), 1.5, 0.5, zorder=1, fc=box_bg),
|
||||
Rectangle((0.5, 2.6), 1.5, 0.5, zorder=2, fc=box_bg),
|
||||
Rectangle((0.7, 2.8), 1.5, 0.5, zorder=3, fc=box_bg),
|
||||
FancyArrow(2.3, 2.9, 2.0, 0, fc=arrow1,
|
||||
width=0.25, head_width=0.5, head_length=0.2),
|
||||
Rectangle((7.3, 0.85), 1.5, 0.5, fc=box_bg)]
|
||||
else:
|
||||
patches += [Rectangle((7.3, 0.2), 1.5, 1.8, fc=box_bg)]
|
||||
|
||||
for p in patches:
|
||||
ax.add_patch(p)
|
||||
|
||||
pl.text(1.45, 4.9, "Training\nText,\nDocuments,\nImages,\netc.",
|
||||
ha='center', va='center', fontsize=14)
|
||||
|
||||
pl.text(3.6, 4.9, "Feature\nVectors",
|
||||
ha='left', va='center', fontsize=14)
|
||||
|
||||
pl.text(5.5, 3.5, "Machine\nLearning\nAlgorithm",
|
||||
ha='center', va='center', fontsize=14)
|
||||
|
||||
pl.text(1.05, 1.1, "New Text,\nDocument,\nImage,\netc.",
|
||||
ha='center', va='center', fontsize=14)
|
||||
|
||||
pl.text(3.3, 1.7, "Feature\nVector",
|
||||
ha='left', va='center', fontsize=14)
|
||||
|
||||
pl.text(5.5, 1.1, "Predictive\nModel",
|
||||
ha='center', va='center', fontsize=12)
|
||||
|
||||
if supervised:
|
||||
pl.text(1.45, 3.05, "Labels",
|
||||
ha='center', va='center', fontsize=14)
|
||||
|
||||
pl.text(8.05, 1.1, "Expected\nLabel",
|
||||
ha='center', va='center', fontsize=14)
|
||||
pl.text(8.8, 5.8, "Supervised Learning Model",
|
||||
ha='right', va='top', fontsize=18)
|
||||
|
||||
else:
|
||||
pl.text(8.05, 1.1,
|
||||
"Likelihood\nor Cluster ID\nor Better\nRepresentation",
|
||||
ha='center', va='center', fontsize=12)
|
||||
pl.text(8.8, 5.8, "Unsupervised Learning Model",
|
||||
ha='right', va='top', fontsize=18)
|
||||
|
||||
|
||||
|
||||
def plot_supervised_chart(annotate=False):
|
||||
create_base(supervised=True)
|
||||
if annotate:
|
||||
fontdict = dict(color='r', weight='bold', size=14)
|
||||
pl.text(1.9, 4.55, 'X = vec.fit_transform(input)',
|
||||
fontdict=fontdict,
|
||||
rotation=20, ha='left', va='bottom')
|
||||
pl.text(3.7, 3.2, 'clf.fit(X, y)',
|
||||
fontdict=fontdict,
|
||||
rotation=20, ha='left', va='bottom')
|
||||
pl.text(1.7, 1.5, 'X_new = vec.transform(input)',
|
||||
fontdict=fontdict,
|
||||
rotation=20, ha='left', va='bottom')
|
||||
pl.text(6.1, 1.5, 'y_new = clf.predict(X_new)',
|
||||
fontdict=fontdict,
|
||||
rotation=20, ha='left', va='bottom')
|
||||
|
||||
def plot_unsupervised_chart():
|
||||
create_base(supervised=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
plot_supervised_chart(False)
|
||||
plot_supervised_chart(True)
|
||||
plot_unsupervised_chart()
|
||||
pl.show()
|
||||
|
||||
|
6
scikit-learn/fig_code/__init__.py
Normal file
6
scikit-learn/fig_code/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from .data import *
|
||||
from .figures import *
|
||||
|
||||
from .sgd_separator import plot_sgd_separator
|
||||
from .linear_regression import plot_linear_regression
|
||||
from .helpers import plot_iris_knn
|
4
scikit-learn/fig_code/__init__.py~
Normal file
4
scikit-learn/fig_code/__init__.py~
Normal file
|
@ -0,0 +1,4 @@
|
|||
from .sgd_separator import plot_sgd_separator
|
||||
from .linear_regression import plot_linear_regression
|
||||
from .ML_flow_chart import plot_supervised_chart, plot_unsupervised_chart
|
||||
from .helpers import plot_iris_knn
|
47
scikit-learn/fig_code/data.py
Normal file
47
scikit-learn/fig_code/data.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def linear_data_sample(N=40, rseed=0, m=3, b=-2):
|
||||
rng = np.random.RandomState(rseed)
|
||||
|
||||
x = 10 * rng.rand(N)
|
||||
dy = m / 2 * (1 + rng.rand(N))
|
||||
y = m * x + b + dy * rng.randn(N)
|
||||
|
||||
return (x, y, dy)
|
||||
|
||||
|
||||
def linear_data_sample_big_errs(N=40, rseed=0, m=3, b=-2):
|
||||
rng = np.random.RandomState(rseed)
|
||||
|
||||
x = 10 * rng.rand(N)
|
||||
dy = m / 2 * (1 + rng.rand(N))
|
||||
dy[20:25] *= 10
|
||||
y = m * x + b + dy * rng.randn(N)
|
||||
|
||||
return (x, y, dy)
|
||||
|
||||
|
||||
def sample_light_curve(phased=True):
|
||||
from astroML.datasets import fetch_LINEAR_sample
|
||||
data = fetch_LINEAR_sample()
|
||||
t, y, dy = data[18525697].T
|
||||
|
||||
if phased:
|
||||
P_best = 0.580313015651
|
||||
t /= P_best
|
||||
|
||||
return (t, y, dy)
|
||||
|
||||
|
||||
def sample_light_curve_2(phased=True):
|
||||
from astroML.datasets import fetch_LINEAR_sample
|
||||
data = fetch_LINEAR_sample()
|
||||
t, y, dy = data[10022663].T
|
||||
|
||||
if phased:
|
||||
P_best = 0.61596079804
|
||||
t /= P_best
|
||||
|
||||
return (t, y, dy)
|
||||
|
233
scikit-learn/fig_code/figures.py
Normal file
233
scikit-learn/fig_code/figures.py
Normal file
|
@ -0,0 +1,233 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import warnings
|
||||
|
||||
|
||||
def plot_venn_diagram():
|
||||
fig, ax = plt.subplots(subplot_kw=dict(frameon=False, xticks=[], yticks=[]))
|
||||
ax.add_patch(plt.Circle((0.3, 0.3), 0.3, fc='red', alpha=0.5))
|
||||
ax.add_patch(plt.Circle((0.6, 0.3), 0.3, fc='blue', alpha=0.5))
|
||||
ax.add_patch(plt.Rectangle((-0.1, -0.1), 1.1, 0.8, fc='none', ec='black'))
|
||||
ax.text(0.2, 0.3, '$x$', size=30, ha='center', va='center')
|
||||
ax.text(0.7, 0.3, '$y$', size=30, ha='center', va='center')
|
||||
ax.text(0.0, 0.6, '$I$', size=30)
|
||||
ax.axis('equal')
|
||||
|
||||
|
||||
def plot_example_decision_tree():
|
||||
fig = plt.figure(figsize=(10, 4))
|
||||
ax = fig.add_axes([0, 0, 0.8, 1], frameon=False, xticks=[], yticks=[])
|
||||
ax.set_title('Example Decision Tree: Animal Classification', size=24)
|
||||
|
||||
def text(ax, x, y, t, size=20, **kwargs):
|
||||
ax.text(x, y, t,
|
||||
ha='center', va='center', size=size,
|
||||
bbox=dict(boxstyle='round', ec='k', fc='w'), **kwargs)
|
||||
|
||||
text(ax, 0.5, 0.9, "How big is\nthe animal?", 20)
|
||||
text(ax, 0.3, 0.6, "Does the animal\nhave horns?", 18)
|
||||
text(ax, 0.7, 0.6, "Does the animal\nhave two legs?", 18)
|
||||
text(ax, 0.12, 0.3, "Are the horns\nlonger than 10cm?", 14)
|
||||
text(ax, 0.38, 0.3, "Is the animal\nwearing a collar?", 14)
|
||||
text(ax, 0.62, 0.3, "Does the animal\nhave wings?", 14)
|
||||
text(ax, 0.88, 0.3, "Does the animal\nhave a tail?", 14)
|
||||
|
||||
text(ax, 0.4, 0.75, "> 1m", 12, alpha=0.4)
|
||||
text(ax, 0.6, 0.75, "< 1m", 12, alpha=0.4)
|
||||
|
||||
text(ax, 0.21, 0.45, "yes", 12, alpha=0.4)
|
||||
text(ax, 0.34, 0.45, "no", 12, alpha=0.4)
|
||||
|
||||
text(ax, 0.66, 0.45, "yes", 12, alpha=0.4)
|
||||
text(ax, 0.79, 0.45, "no", 12, alpha=0.4)
|
||||
|
||||
ax.plot([0.3, 0.5, 0.7], [0.6, 0.9, 0.6], '-k')
|
||||
ax.plot([0.12, 0.3, 0.38], [0.3, 0.6, 0.3], '-k')
|
||||
ax.plot([0.62, 0.7, 0.88], [0.3, 0.6, 0.3], '-k')
|
||||
ax.plot([0.0, 0.12, 0.20], [0.0, 0.3, 0.0], '--k')
|
||||
ax.plot([0.28, 0.38, 0.48], [0.0, 0.3, 0.0], '--k')
|
||||
ax.plot([0.52, 0.62, 0.72], [0.0, 0.3, 0.0], '--k')
|
||||
ax.plot([0.8, 0.88, 1.0], [0.0, 0.3, 0.0], '--k')
|
||||
ax.axis([0, 1, 0, 1])
|
||||
|
||||
|
||||
def visualize_tree(estimator, X, y, boundaries=True,
|
||||
xlim=None, ylim=None):
|
||||
estimator.fit(X, y)
|
||||
|
||||
if xlim is None:
|
||||
xlim = (X[:, 0].min() - 0.1, X[:, 0].max() + 0.1)
|
||||
if ylim is None:
|
||||
ylim = (X[:, 1].min() - 0.1, X[:, 1].max() + 0.1)
|
||||
|
||||
x_min, x_max = xlim
|
||||
y_min, y_max = ylim
|
||||
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
|
||||
np.linspace(y_min, y_max, 100))
|
||||
Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
|
||||
# Put the result into a color plot
|
||||
Z = Z.reshape(xx.shape)
|
||||
plt.figure()
|
||||
plt.pcolormesh(xx, yy, Z, alpha=0.2, cmap='rainbow')
|
||||
plt.clim(y.min(), y.max())
|
||||
|
||||
# Plot also the training points
|
||||
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')
|
||||
plt.axis('off')
|
||||
|
||||
plt.xlim(x_min, x_max)
|
||||
plt.ylim(y_min, y_max)
|
||||
plt.clim(y.min(), y.max())
|
||||
|
||||
# Plot the decision boundaries
|
||||
def plot_boundaries(i, xlim, ylim):
|
||||
if i < 0:
|
||||
return
|
||||
|
||||
tree = estimator.tree_
|
||||
|
||||
if tree.feature[i] == 0:
|
||||
plt.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k')
|
||||
plot_boundaries(tree.children_left[i],
|
||||
[xlim[0], tree.threshold[i]], ylim)
|
||||
plot_boundaries(tree.children_right[i],
|
||||
[tree.threshold[i], xlim[1]], ylim)
|
||||
|
||||
elif tree.feature[i] == 1:
|
||||
plt.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k')
|
||||
plot_boundaries(tree.children_left[i], xlim,
|
||||
[ylim[0], tree.threshold[i]])
|
||||
plot_boundaries(tree.children_right[i], xlim,
|
||||
[tree.threshold[i], ylim[1]])
|
||||
|
||||
if boundaries:
|
||||
plot_boundaries(0, plt.xlim(), plt.ylim())
|
||||
|
||||
|
||||
def plot_tree_interactive(X, y):
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
|
||||
def interactive_tree(depth=1):
|
||||
clf = DecisionTreeClassifier(max_depth=depth, random_state=0)
|
||||
visualize_tree(clf, X, y)
|
||||
|
||||
from IPython.html.widgets import interact
|
||||
return interact(interactive_tree, depth=[1, 5])
|
||||
|
||||
|
||||
def plot_kmeans_interactive(min_clusters=1, max_clusters=6):
|
||||
from IPython.html.widgets import interact
|
||||
from sklearn.metrics.pairwise import euclidean_distances
|
||||
from sklearn.datasets.samples_generator import make_blobs
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
X, y = make_blobs(n_samples=300, centers=4,
|
||||
random_state=0, cluster_std=0.60)
|
||||
|
||||
def _kmeans_step(frame=0, n_clusters=4):
|
||||
rng = np.random.RandomState(2)
|
||||
labels = np.zeros(X.shape[0])
|
||||
centers = rng.randn(n_clusters, 2)
|
||||
|
||||
nsteps = frame // 3
|
||||
|
||||
for i in range(nsteps + 1):
|
||||
old_centers = centers
|
||||
if i < nsteps or frame % 3 > 0:
|
||||
dist = euclidean_distances(X, centers)
|
||||
labels = dist.argmin(1)
|
||||
|
||||
if i < nsteps or frame % 3 > 1:
|
||||
centers = np.array([X[labels == j].mean(0)
|
||||
for j in range(n_clusters)])
|
||||
nans = np.isnan(centers)
|
||||
centers[nans] = old_centers[nans]
|
||||
|
||||
|
||||
# plot the data and cluster centers
|
||||
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='rainbow',
|
||||
vmin=0, vmax=n_clusters - 1);
|
||||
plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
|
||||
c=np.arange(n_clusters),
|
||||
s=200, cmap='rainbow')
|
||||
plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
|
||||
c='black', s=50)
|
||||
|
||||
# plot new centers if third frame
|
||||
if frame % 3 == 2:
|
||||
for i in range(n_clusters):
|
||||
plt.annotate('', centers[i], old_centers[i],
|
||||
arrowprops=dict(arrowstyle='->', linewidth=1))
|
||||
plt.scatter(centers[:, 0], centers[:, 1], marker='o',
|
||||
c=np.arange(n_clusters),
|
||||
s=200, cmap='rainbow')
|
||||
plt.scatter(centers[:, 0], centers[:, 1], marker='o',
|
||||
c='black', s=50)
|
||||
|
||||
plt.xlim(-4, 4)
|
||||
plt.ylim(-2, 10)
|
||||
|
||||
if frame % 3 == 1:
|
||||
plt.text(3.8, 9.5, "1. Reassign points to nearest centroid",
|
||||
ha='right', va='top', size=14)
|
||||
elif frame % 3 == 2:
|
||||
plt.text(3.8, 9.5, "2. Update centroids to cluster means",
|
||||
ha='right', va='top', size=14)
|
||||
|
||||
|
||||
return interact(_kmeans_step, frame=[0, 50],
|
||||
n_clusters=[min_clusters, max_clusters])
|
||||
|
||||
|
||||
def plot_image_components(x, coefficients=None, mean=0, components=None,
|
||||
imshape=(8, 8), n_components=6, fontsize=12):
|
||||
if coefficients is None:
|
||||
coefficients = x
|
||||
|
||||
if components is None:
|
||||
components = np.eye(len(coefficients), len(x))
|
||||
|
||||
mean = np.zeros_like(x) + mean
|
||||
|
||||
|
||||
fig = plt.figure(figsize=(1.2 * (5 + n_components), 1.2 * 2))
|
||||
g = plt.GridSpec(2, 5 + n_components, hspace=0.3)
|
||||
|
||||
def show(i, j, x, title=None):
|
||||
ax = fig.add_subplot(g[i, j], xticks=[], yticks=[])
|
||||
ax.imshow(x.reshape(imshape), interpolation='nearest')
|
||||
if title:
|
||||
ax.set_title(title, fontsize=fontsize)
|
||||
|
||||
show(slice(2), slice(2), x, "True")
|
||||
|
||||
approx = mean.copy()
|
||||
show(0, 2, np.zeros_like(x) + mean, r'$\mu$')
|
||||
show(1, 2, approx, r'$1 \cdot \mu$')
|
||||
|
||||
for i in range(0, n_components):
|
||||
approx = approx + coefficients[i] * components[i]
|
||||
show(0, i + 3, components[i], r'$c_{0}$'.format(i + 1))
|
||||
show(1, i + 3, approx,
|
||||
r"${0:.2f} \cdot c_{1}$".format(coefficients[i], i + 1))
|
||||
plt.gca().text(0, 1.05, '$+$', ha='right', va='bottom',
|
||||
transform=plt.gca().transAxes, fontsize=fontsize)
|
||||
|
||||
show(slice(2), slice(-2, None), approx, "Approx")
|
||||
|
||||
|
||||
def plot_pca_interactive(data, n_components=6):
|
||||
from sklearn.decomposition import PCA
|
||||
from IPython.html.widgets import interact
|
||||
|
||||
pca = PCA(n_components=n_components)
|
||||
Xproj = pca.fit_transform(data)
|
||||
|
||||
def show_decomp(i=0):
|
||||
plot_image_components(data[i], Xproj[i],
|
||||
pca.mean_, pca.components_)
|
||||
|
||||
interact(show_decomp, i=(0, data.shape[0] - 1));
|
75
scikit-learn/fig_code/helpers.py
Normal file
75
scikit-learn/fig_code/helpers.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
"""
|
||||
Small helpers for code that is not shown in the notebooks
|
||||
"""
|
||||
|
||||
from sklearn import neighbors, datasets, linear_model
|
||||
import pylab as pl
|
||||
import numpy as np
|
||||
from matplotlib.colors import ListedColormap
|
||||
|
||||
# Create color maps for 3-class classification problem, as with iris
|
||||
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
|
||||
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
|
||||
|
||||
def plot_iris_knn():
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :2] # we only take the first two features. We could
|
||||
# avoid this ugly slicing by using a two-dim dataset
|
||||
y = iris.target
|
||||
|
||||
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
|
||||
knn.fit(X, y)
|
||||
|
||||
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
|
||||
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
|
||||
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
|
||||
np.linspace(y_min, y_max, 100))
|
||||
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
|
||||
# Put the result into a color plot
|
||||
Z = Z.reshape(xx.shape)
|
||||
pl.figure()
|
||||
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
|
||||
|
||||
# Plot also the training points
|
||||
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
|
||||
pl.xlabel('sepal length (cm)')
|
||||
pl.ylabel('sepal width (cm)')
|
||||
pl.axis('tight')
|
||||
|
||||
|
||||
def plot_polynomial_regression():
|
||||
rng = np.random.RandomState(0)
|
||||
x = 2*rng.rand(100) - 1
|
||||
|
||||
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
|
||||
y = f(x) + .4 * rng.normal(size=100)
|
||||
|
||||
x_test = np.linspace(-1, 1, 100)
|
||||
|
||||
pl.figure()
|
||||
pl.scatter(x, y, s=4)
|
||||
|
||||
X = np.array([x**i for i in range(5)]).T
|
||||
X_test = np.array([x_test**i for i in range(5)]).T
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X, y)
|
||||
pl.plot(x_test, regr.predict(X_test), label='4th order')
|
||||
|
||||
X = np.array([x**i for i in range(10)]).T
|
||||
X_test = np.array([x_test**i for i in range(10)]).T
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X, y)
|
||||
pl.plot(x_test, regr.predict(X_test), label='9th order')
|
||||
|
||||
pl.legend(loc='best')
|
||||
pl.axis('tight')
|
||||
pl.title('Fitting a 4th and a 9th order polynomial')
|
||||
|
||||
pl.figure()
|
||||
pl.scatter(x, y, s=4)
|
||||
pl.plot(x_test, f(x_test), label="truth")
|
||||
pl.axis('tight')
|
||||
pl.title('Ground truth (9th order polynomial)')
|
||||
|
||||
|
75
scikit-learn/fig_code/helpers.py~
Normal file
75
scikit-learn/fig_code/helpers.py~
Normal file
|
@ -0,0 +1,75 @@
|
|||
"""
|
||||
Small helpers for code that is not shown in the notebooks
|
||||
"""
|
||||
|
||||
from sklearn import neighbors, datasets, linear_model
|
||||
import pylab as pl
|
||||
import numpy as np
|
||||
from matplotlib.colors import ListedColormap
|
||||
|
||||
# Create color maps for 3-class classification problem, as with iris
|
||||
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
|
||||
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
|
||||
|
||||
def plot_iris_knn():
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :2] # we only take the first two features. We could
|
||||
# avoid this ugly slicing by using a two-dim dataset
|
||||
y = iris.target
|
||||
|
||||
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
|
||||
knn.fit(X, y)
|
||||
|
||||
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
|
||||
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
|
||||
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
|
||||
np.linspace(y_min, y_max, 100))
|
||||
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
|
||||
# Put the result into a color plot
|
||||
Z = Z.reshape(xx.shape)
|
||||
pl.figure()
|
||||
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
|
||||
|
||||
# Plot also the training points
|
||||
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
|
||||
pl.xlabel('sepal length (cm)')
|
||||
pl.ylabel('sepal width (cm)')
|
||||
pl.axis('tight')
|
||||
|
||||
|
||||
def plot_polynomial_regression():
|
||||
rng = np.random.RandomState(0)
|
||||
x = 2*rng.rand(100) - 1
|
||||
|
||||
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
|
||||
y = f(x) + .4 * rng.normal(size=100)
|
||||
|
||||
x_test = np.linspace(-1, 1, 100)
|
||||
|
||||
pl.figure()
|
||||
pl.scatter(x, y, s=4)
|
||||
|
||||
X = np.array([x**i for i in range(5)]).T
|
||||
X_test = np.array([x_test**i for i in range(5)]).T
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X, y)
|
||||
pl.plot(x_test, regr.predict(X_test), label='4th order')
|
||||
|
||||
X = np.array([x**i for i in range(10)]).T
|
||||
X_test = np.array([x_test**i for i in range(10)]).T
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X, y)
|
||||
pl.plot(x_test, regr.predict(X_test), label='9th order')
|
||||
|
||||
pl.legend(loc='best')
|
||||
pl.axis('tight')
|
||||
pl.title('Fitting a 4th and a 9th order polynomial')
|
||||
|
||||
pl.figure()
|
||||
pl.scatter(x, y, s=4)
|
||||
pl.plot(x_test, f(x_test), label="truth")
|
||||
pl.axis('tight')
|
||||
pl.title('Ground truth (9th order polynomial)')
|
||||
|
||||
|
37
scikit-learn/fig_code/linear_regression.py
Normal file
37
scikit-learn/fig_code/linear_regression.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
|
||||
def plot_linear_regression():
|
||||
a = 0.5
|
||||
b = 1.0
|
||||
|
||||
# x from 0 to 10
|
||||
x = 30 * np.random.random(20)
|
||||
|
||||
# y = a*x + b with noise
|
||||
y = a * x + b + np.random.normal(size=x.shape)
|
||||
|
||||
# create a linear regression classifier
|
||||
clf = LinearRegression()
|
||||
clf.fit(x[:, None], y)
|
||||
|
||||
# predict y from the data
|
||||
x_new = np.linspace(0, 30, 100)
|
||||
y_new = clf.predict(x_new[:, None])
|
||||
|
||||
# plot the results
|
||||
ax = plt.axes()
|
||||
ax.scatter(x, y)
|
||||
ax.plot(x_new, y_new)
|
||||
|
||||
ax.set_xlabel('x')
|
||||
ax.set_ylabel('y')
|
||||
|
||||
ax.axis('tight')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
plot_linear_regression()
|
||||
plt.show()
|
101
scikit-learn/fig_code/scikit-learn.ipynb
Normal file
101
scikit-learn/fig_code/scikit-learn.ipynb
Normal file
|
@ -0,0 +1,101 @@
|
|||
{
|
||||
"metadata": {
|
||||
"name": "",
|
||||
"signature": "sha256:29899a15bea89b9d8275879798b23011cecabc0eff03dd41bb606324221e0bc3"
|
||||
},
|
||||
"nbformat": 3,
|
||||
"nbformat_minor": 0,
|
||||
"worksheets": [
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# scikit-learn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"# set seaborn plot defaults.\n",
|
||||
"# This can be safely commented out\n",
|
||||
"import seaborn; seaborn.set()"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"prompt_number": 3
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [
|
||||
"# Import the example plot from the figures directory\n",
|
||||
"from fig_code import plot_sgd_separator\n",
|
||||
"plot_sgd_separator()"
|
||||
],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ImportError",
|
||||
"evalue": "No module named fig_code",
|
||||
"output_type": "pyerr",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-4-ce8360b266e1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Import the example plot from the figures directory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mfig_code\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mplot_sgd_separator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mplot_sgd_separator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mImportError\u001b[0m: No module named fig_code"
|
||||
]
|
||||
}
|
||||
],
|
||||
"prompt_number": 4
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"collapsed": false,
|
||||
"input": [],
|
||||
"language": "python",
|
||||
"metadata": {},
|
||||
"outputs": []
|
||||
}
|
||||
],
|
||||
"metadata": {}
|
||||
}
|
||||
]
|
||||
}
|
40
scikit-learn/fig_code/sgd_separator.py
Normal file
40
scikit-learn/fig_code/sgd_separator.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
from sklearn.datasets.samples_generator import make_blobs
|
||||
|
||||
def plot_sgd_separator():
|
||||
# we create 50 separable points
|
||||
X, Y = make_blobs(n_samples=50, centers=2,
|
||||
random_state=0, cluster_std=0.60)
|
||||
|
||||
# fit the model
|
||||
clf = SGDClassifier(loss="hinge", alpha=0.01,
|
||||
n_iter=200, fit_intercept=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# plot the line, the points, and the nearest vectors to the plane
|
||||
xx = np.linspace(-1, 5, 10)
|
||||
yy = np.linspace(-1, 5, 10)
|
||||
|
||||
X1, X2 = np.meshgrid(xx, yy)
|
||||
Z = np.empty(X1.shape)
|
||||
for (i, j), val in np.ndenumerate(X1):
|
||||
x1 = val
|
||||
x2 = X2[i, j]
|
||||
p = clf.decision_function([x1, x2])
|
||||
Z[i, j] = p[0]
|
||||
levels = [-1.0, 0.0, 1.0]
|
||||
linestyles = ['dashed', 'solid', 'dashed']
|
||||
colors = 'k'
|
||||
|
||||
ax = plt.axes()
|
||||
ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
|
||||
ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
|
||||
|
||||
ax.axis('tight')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
plot_sgd_separator()
|
||||
plt.show()
|
341
scikit-learn/fig_code/svm_gui.py
Normal file
341
scikit-learn/fig_code/svm_gui.py
Normal file
|
@ -0,0 +1,341 @@
|
|||
"""
|
||||
==========
|
||||
Libsvm GUI
|
||||
==========
|
||||
|
||||
A simple graphical frontend for Libsvm mainly intended for didactic
|
||||
purposes. You can create data points by point and click and visualize
|
||||
the decision region induced by different kernels and parameter settings.
|
||||
|
||||
To create positive examples click the left mouse button; to create
|
||||
negative examples click the right button.
|
||||
|
||||
If all examples are from the same class, it uses a one-class SVM.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function
|
||||
|
||||
print(__doc__)
|
||||
|
||||
# Author: Peter Prettenhoer <peter.prettenhofer@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
|
||||
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
||||
from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.contour import ContourSet
|
||||
|
||||
import Tkinter as Tk
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
from sklearn import svm
|
||||
from sklearn.datasets import dump_svmlight_file
|
||||
from sklearn.externals.six.moves import xrange
|
||||
|
||||
y_min, y_max = -50, 50
|
||||
x_min, x_max = -50, 50
|
||||
|
||||
|
||||
class Model(object):
|
||||
"""The Model which hold the data. It implements the
|
||||
observable in the observer pattern and notifies the
|
||||
registered observers on change event.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.observers = []
|
||||
self.surface = None
|
||||
self.data = []
|
||||
self.cls = None
|
||||
self.surface_type = 0
|
||||
|
||||
def changed(self, event):
|
||||
"""Notify the observers. """
|
||||
for observer in self.observers:
|
||||
observer.update(event, self)
|
||||
|
||||
def add_observer(self, observer):
|
||||
"""Register an observer. """
|
||||
self.observers.append(observer)
|
||||
|
||||
def set_surface(self, surface):
|
||||
self.surface = surface
|
||||
|
||||
def dump_svmlight_file(self, file):
|
||||
data = np.array(self.data)
|
||||
X = data[:, 0:2]
|
||||
y = data[:, 2]
|
||||
dump_svmlight_file(X, y, file)
|
||||
|
||||
|
||||
class Controller(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
self.kernel = Tk.IntVar()
|
||||
self.surface_type = Tk.IntVar()
|
||||
# Whether or not a model has been fitted
|
||||
self.fitted = False
|
||||
|
||||
def fit(self):
|
||||
print("fit the model")
|
||||
train = np.array(self.model.data)
|
||||
X = train[:, 0:2]
|
||||
y = train[:, 2]
|
||||
|
||||
C = float(self.complexity.get())
|
||||
gamma = float(self.gamma.get())
|
||||
coef0 = float(self.coef0.get())
|
||||
degree = int(self.degree.get())
|
||||
kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
|
||||
if len(np.unique(y)) == 1:
|
||||
clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
|
||||
gamma=gamma, coef0=coef0, degree=degree)
|
||||
clf.fit(X)
|
||||
else:
|
||||
clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
|
||||
gamma=gamma, coef0=coef0, degree=degree)
|
||||
clf.fit(X, y)
|
||||
if hasattr(clf, 'score'):
|
||||
print("Accuracy:", clf.score(X, y) * 100)
|
||||
X1, X2, Z = self.decision_surface(clf)
|
||||
self.model.clf = clf
|
||||
self.model.set_surface((X1, X2, Z))
|
||||
self.model.surface_type = self.surface_type.get()
|
||||
self.fitted = True
|
||||
self.model.changed("surface")
|
||||
|
||||
def decision_surface(self, cls):
|
||||
delta = 1
|
||||
x = np.arange(x_min, x_max + delta, delta)
|
||||
y = np.arange(y_min, y_max + delta, delta)
|
||||
X1, X2 = np.meshgrid(x, y)
|
||||
Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()])
|
||||
Z = Z.reshape(X1.shape)
|
||||
return X1, X2, Z
|
||||
|
||||
def clear_data(self):
|
||||
self.model.data = []
|
||||
self.fitted = False
|
||||
self.model.changed("clear")
|
||||
|
||||
def add_example(self, x, y, label):
|
||||
self.model.data.append((x, y, label))
|
||||
self.model.changed("example_added")
|
||||
|
||||
# update decision surface if already fitted.
|
||||
self.refit()
|
||||
|
||||
def refit(self):
|
||||
"""Refit the model if already fitted. """
|
||||
if self.fitted:
|
||||
self.fit()
|
||||
|
||||
|
||||
class View(object):
|
||||
"""Test docstring. """
|
||||
def __init__(self, root, controller):
|
||||
f = Figure()
|
||||
ax = f.add_subplot(111)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
ax.set_xlim((x_min, x_max))
|
||||
ax.set_ylim((y_min, y_max))
|
||||
canvas = FigureCanvasTkAgg(f, master=root)
|
||||
canvas.show()
|
||||
canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
||||
canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
||||
canvas.mpl_connect('key_press_event', self.onkeypress)
|
||||
canvas.mpl_connect('key_release_event', self.onkeyrelease)
|
||||
canvas.mpl_connect('button_press_event', self.onclick)
|
||||
toolbar = NavigationToolbar2TkAgg(canvas, root)
|
||||
toolbar.update()
|
||||
self.shift_down = False
|
||||
self.controllbar = ControllBar(root, controller)
|
||||
self.f = f
|
||||
self.ax = ax
|
||||
self.canvas = canvas
|
||||
self.controller = controller
|
||||
self.contours = []
|
||||
self.c_labels = None
|
||||
self.plot_kernels()
|
||||
|
||||
def plot_kernels(self):
|
||||
self.ax.text(-50, -60, "Linear: $u^T v$")
|
||||
self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$")
|
||||
self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$")
|
||||
|
||||
def onkeypress(self, event):
|
||||
if event.key == "shift":
|
||||
self.shift_down = True
|
||||
|
||||
def onkeyrelease(self, event):
|
||||
if event.key == "shift":
|
||||
self.shift_down = False
|
||||
|
||||
def onclick(self, event):
|
||||
if event.xdata and event.ydata:
|
||||
if self.shift_down or event.button == 3:
|
||||
self.controller.add_example(event.xdata, event.ydata, -1)
|
||||
elif event.button == 1:
|
||||
self.controller.add_example(event.xdata, event.ydata, 1)
|
||||
|
||||
def update_example(self, model, idx):
|
||||
x, y, l = model.data[idx]
|
||||
if l == 1:
|
||||
color = 'w'
|
||||
elif l == -1:
|
||||
color = 'k'
|
||||
self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0)
|
||||
|
||||
def update(self, event, model):
|
||||
if event == "examples_loaded":
|
||||
for i in xrange(len(model.data)):
|
||||
self.update_example(model, i)
|
||||
|
||||
if event == "example_added":
|
||||
self.update_example(model, -1)
|
||||
|
||||
if event == "clear":
|
||||
self.ax.clear()
|
||||
self.ax.set_xticks([])
|
||||
self.ax.set_yticks([])
|
||||
self.contours = []
|
||||
self.c_labels = None
|
||||
self.plot_kernels()
|
||||
|
||||
if event == "surface":
|
||||
self.remove_surface()
|
||||
self.plot_support_vectors(model.clf.support_vectors_)
|
||||
self.plot_decision_surface(model.surface, model.surface_type)
|
||||
|
||||
self.canvas.draw()
|
||||
|
||||
def remove_surface(self):
|
||||
"""Remove old decision surface."""
|
||||
if len(self.contours) > 0:
|
||||
for contour in self.contours:
|
||||
if isinstance(contour, ContourSet):
|
||||
for lineset in contour.collections:
|
||||
lineset.remove()
|
||||
else:
|
||||
contour.remove()
|
||||
self.contours = []
|
||||
|
||||
def plot_support_vectors(self, support_vectors):
|
||||
"""Plot the support vectors by placing circles over the
|
||||
corresponding data points and adds the circle collection
|
||||
to the contours list."""
|
||||
cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1],
|
||||
s=80, edgecolors="k", facecolors="none")
|
||||
self.contours.append(cs)
|
||||
|
||||
def plot_decision_surface(self, surface, type):
|
||||
X1, X2, Z = surface
|
||||
if type == 0:
|
||||
levels = [-1.0, 0.0, 1.0]
|
||||
linestyles = ['dashed', 'solid', 'dashed']
|
||||
colors = 'k'
|
||||
self.contours.append(self.ax.contour(X1, X2, Z, levels,
|
||||
colors=colors,
|
||||
linestyles=linestyles))
|
||||
elif type == 1:
|
||||
self.contours.append(self.ax.contourf(X1, X2, Z, 10,
|
||||
cmap=matplotlib.cm.bone,
|
||||
origin='lower', alpha=0.85))
|
||||
self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k',
|
||||
linestyles=['solid']))
|
||||
else:
|
||||
raise ValueError("surface type unknown")
|
||||
|
||||
|
||||
class ControllBar(object):
|
||||
def __init__(self, root, controller):
|
||||
fm = Tk.Frame(root)
|
||||
kernel_group = Tk.Frame(fm)
|
||||
Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel,
|
||||
value=0, command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel,
|
||||
value=1, command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel,
|
||||
value=2, command=controller.refit).pack(anchor=Tk.W)
|
||||
kernel_group.pack(side=Tk.LEFT)
|
||||
|
||||
valbox = Tk.Frame(fm)
|
||||
controller.complexity = Tk.StringVar()
|
||||
controller.complexity.set("1.0")
|
||||
c = Tk.Frame(valbox)
|
||||
Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(c, width=6, textvariable=controller.complexity).pack(
|
||||
side=Tk.LEFT)
|
||||
c.pack()
|
||||
|
||||
controller.gamma = Tk.StringVar()
|
||||
controller.gamma.set("0.01")
|
||||
g = Tk.Frame(valbox)
|
||||
Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT)
|
||||
g.pack()
|
||||
|
||||
controller.degree = Tk.StringVar()
|
||||
controller.degree.set("3")
|
||||
d = Tk.Frame(valbox)
|
||||
Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT)
|
||||
d.pack()
|
||||
|
||||
controller.coef0 = Tk.StringVar()
|
||||
controller.coef0.set("0")
|
||||
r = Tk.Frame(valbox)
|
||||
Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT)
|
||||
r.pack()
|
||||
valbox.pack(side=Tk.LEFT)
|
||||
|
||||
cmap_group = Tk.Frame(fm)
|
||||
Tk.Radiobutton(cmap_group, text="Hyperplanes",
|
||||
variable=controller.surface_type, value=0,
|
||||
command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(cmap_group, text="Surface",
|
||||
variable=controller.surface_type, value=1,
|
||||
command=controller.refit).pack(anchor=Tk.W)
|
||||
|
||||
cmap_group.pack(side=Tk.LEFT)
|
||||
|
||||
train_button = Tk.Button(fm, text='Fit', width=5,
|
||||
command=controller.fit)
|
||||
train_button.pack()
|
||||
fm.pack(side=Tk.LEFT)
|
||||
Tk.Button(fm, text='Clear', width=5,
|
||||
command=controller.clear_data).pack(side=Tk.LEFT)
|
||||
|
||||
|
||||
def get_parser():
|
||||
from optparse import OptionParser
|
||||
op = OptionParser()
|
||||
op.add_option("--output",
|
||||
action="store", type="str", dest="output",
|
||||
help="Path where to dump data.")
|
||||
return op
|
||||
|
||||
|
||||
def main(argv):
|
||||
op = get_parser()
|
||||
opts, args = op.parse_args(argv[1:])
|
||||
root = Tk.Tk()
|
||||
model = Model()
|
||||
controller = Controller(model)
|
||||
root.wm_title("Scikit-learn Libsvm GUI")
|
||||
view = View(root, controller)
|
||||
model.add_observer(view)
|
||||
Tk.mainloop()
|
||||
|
||||
if opts.output:
|
||||
model.dump_svmlight_file(opts.output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
331
scikit-learn/fig_code/svm_gui.py~
Normal file
331
scikit-learn/fig_code/svm_gui.py~
Normal file
|
@ -0,0 +1,331 @@
|
|||
"""
|
||||
==========
|
||||
Libsvm GUI
|
||||
==========
|
||||
|
||||
A simple graphical frontend for Libsvm mainly intended for didactic
|
||||
purposes. You can create data points by point and click and visualize
|
||||
the decision region induced by different kernels and parameter settings.
|
||||
|
||||
To create positive examples click the left mouse button; to create
|
||||
negative examples click the right button.
|
||||
|
||||
If all examples are from the same class, it uses a one-class SVM.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function
|
||||
|
||||
print(__doc__)
|
||||
|
||||
# Author: Peter Prettenhoer <peter.prettenhofer@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
|
||||
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
||||
from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.contour import ContourSet
|
||||
|
||||
import Tkinter as Tk
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
from sklearn import svm
|
||||
from sklearn.datasets import dump_svmlight_file
|
||||
from sklearn.externals.six.moves import xrange
|
||||
|
||||
y_min, y_max = -50, 50
|
||||
x_min, x_max = -50, 50
|
||||
|
||||
|
||||
class Model(object):
|
||||
"""The Model which hold the data. It implements the
|
||||
observable in the observer pattern and notifies the
|
||||
registered observers on change event.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.observers = []
|
||||
self.surface = None
|
||||
self.data = []
|
||||
self.cls = None
|
||||
self.surface_type = 0
|
||||
|
||||
def changed(self, event):
|
||||
"""Notify the observers. """
|
||||
for observer in self.observers:
|
||||
observer.update(event, self)
|
||||
|
||||
def add_observer(self, observer):
|
||||
"""Register an observer. """
|
||||
self.observers.append(observer)
|
||||
|
||||
def set_surface(self, surface):
|
||||
self.surface = surface
|
||||
|
||||
def dump_svmlight_file(self, file):
|
||||
data = np.array(self.data)
|
||||
X = data[:, 0:2]
|
||||
y = data[:, 2]
|
||||
dump_svmlight_file(X, y, file)
|
||||
|
||||
|
||||
class Controller(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
self.kernel = Tk.IntVar()
|
||||
self.surface_type = Tk.IntVar()
|
||||
# Whether or not a model has been fitted
|
||||
self.fitted = False
|
||||
|
||||
def fit(self):
|
||||
print("fit the model")
|
||||
train = np.array(self.model.data)
|
||||
X = train[:, 0:2]
|
||||
y = train[:, 2]
|
||||
|
||||
C = float(self.complexity.get())
|
||||
gamma = float(self.gamma.get())
|
||||
coef0 = float(self.coef0.get())
|
||||
degree = int(self.degree.get())
|
||||
kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
|
||||
if len(np.unique(y)) == 1:
|
||||
clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
|
||||
gamma=gamma, coef0=coef0, degree=degree)
|
||||
clf.fit(X)
|
||||
else:
|
||||
clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
|
||||
gamma=gamma, coef0=coef0, degree=degree)
|
||||
clf.fit(X, y)
|
||||
if hasattr(clf, 'score'):
|
||||
print("Accuracy:", clf.score(X, y) * 100)
|
||||
X1, X2, Z = self.decision_surface(clf)
|
||||
self.model.clf = clf
|
||||
self.model.set_surface((X1, X2, Z))
|
||||
self.model.surface_type = self.surface_type.get()
|
||||
self.fitted = True
|
||||
self.model.changed("surface")
|
||||
|
||||
def decision_surface(self, cls):
|
||||
delta = 1
|
||||
x = np.arange(x_min, x_max + delta, delta)
|
||||
y = np.arange(y_min, y_max + delta, delta)
|
||||
X1, X2 = np.meshgrid(x, y)
|
||||
Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()])
|
||||
Z = Z.reshape(X1.shape)
|
||||
return X1, X2, Z
|
||||
|
||||
def clear_data(self):
|
||||
self.model.data = []
|
||||
self.fitted = False
|
||||
self.model.changed("clear")
|
||||
|
||||
def add_example(self, x, y, label):
|
||||
self.model.data.append((x, y, label))
|
||||
self.model.changed("example_added")
|
||||
|
||||
# update decision surface if already fitted.
|
||||
self.refit()
|
||||
|
||||
def refit(self):
|
||||
"""Refit the model if already fitted. """
|
||||
if self.fitted:
|
||||
self.fit()
|
||||
|
||||
|
||||
class View(object):
|
||||
"""Test docstring. """
|
||||
def __init__(self, root, controller):
|
||||
f = Figure()
|
||||
ax = f.add_subplot(111)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
ax.set_xlim((x_min, x_max))
|
||||
ax.set_ylim((y_min, y_max))
|
||||
canvas = FigureCanvasTkAgg(f, master=root)
|
||||
canvas.show()
|
||||
canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
||||
canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
||||
canvas.mpl_connect('button_press_event', self.onclick)
|
||||
toolbar = NavigationToolbar2TkAgg(canvas, root)
|
||||
toolbar.update()
|
||||
self.controllbar = ControllBar(root, controller)
|
||||
self.f = f
|
||||
self.ax = ax
|
||||
self.canvas = canvas
|
||||
self.controller = controller
|
||||
self.contours = []
|
||||
self.c_labels = None
|
||||
self.plot_kernels()
|
||||
|
||||
def plot_kernels(self):
|
||||
self.ax.text(-50, -60, "Linear: $u^T v$")
|
||||
self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$")
|
||||
self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$")
|
||||
|
||||
def onclick(self, event):
|
||||
print(event.button)
|
||||
if event.xdata and event.ydata:
|
||||
if event.button == 1:
|
||||
self.controller.add_example(event.xdata, event.ydata, 1)
|
||||
elif event.button == 3:
|
||||
self.controller.add_example(event.xdata, event.ydata, -1)
|
||||
|
||||
def update_example(self, model, idx):
|
||||
x, y, l = model.data[idx]
|
||||
if l == 1:
|
||||
color = 'w'
|
||||
elif l == -1:
|
||||
color = 'k'
|
||||
self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0)
|
||||
|
||||
def update(self, event, model):
|
||||
if event == "examples_loaded":
|
||||
for i in xrange(len(model.data)):
|
||||
self.update_example(model, i)
|
||||
|
||||
if event == "example_added":
|
||||
self.update_example(model, -1)
|
||||
|
||||
if event == "clear":
|
||||
self.ax.clear()
|
||||
self.ax.set_xticks([])
|
||||
self.ax.set_yticks([])
|
||||
self.contours = []
|
||||
self.c_labels = None
|
||||
self.plot_kernels()
|
||||
|
||||
if event == "surface":
|
||||
self.remove_surface()
|
||||
self.plot_support_vectors(model.clf.support_vectors_)
|
||||
self.plot_decision_surface(model.surface, model.surface_type)
|
||||
|
||||
self.canvas.draw()
|
||||
|
||||
def remove_surface(self):
|
||||
"""Remove old decision surface."""
|
||||
if len(self.contours) > 0:
|
||||
for contour in self.contours:
|
||||
if isinstance(contour, ContourSet):
|
||||
for lineset in contour.collections:
|
||||
lineset.remove()
|
||||
else:
|
||||
contour.remove()
|
||||
self.contours = []
|
||||
|
||||
def plot_support_vectors(self, support_vectors):
|
||||
"""Plot the support vectors by placing circles over the
|
||||
corresponding data points and adds the circle collection
|
||||
to the contours list."""
|
||||
cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1],
|
||||
s=80, edgecolors="k", facecolors="none")
|
||||
self.contours.append(cs)
|
||||
|
||||
def plot_decision_surface(self, surface, type):
|
||||
X1, X2, Z = surface
|
||||
if type == 0:
|
||||
levels = [-1.0, 0.0, 1.0]
|
||||
linestyles = ['dashed', 'solid', 'dashed']
|
||||
colors = 'k'
|
||||
self.contours.append(self.ax.contour(X1, X2, Z, levels,
|
||||
colors=colors,
|
||||
linestyles=linestyles))
|
||||
elif type == 1:
|
||||
self.contours.append(self.ax.contourf(X1, X2, Z, 10,
|
||||
cmap=matplotlib.cm.bone,
|
||||
origin='lower', alpha=0.85))
|
||||
self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k',
|
||||
linestyles=['solid']))
|
||||
else:
|
||||
raise ValueError("surface type unknown")
|
||||
|
||||
|
||||
class ControllBar(object):
|
||||
def __init__(self, root, controller):
|
||||
fm = Tk.Frame(root)
|
||||
kernel_group = Tk.Frame(fm)
|
||||
Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel,
|
||||
value=0, command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel,
|
||||
value=1, command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel,
|
||||
value=2, command=controller.refit).pack(anchor=Tk.W)
|
||||
kernel_group.pack(side=Tk.LEFT)
|
||||
|
||||
valbox = Tk.Frame(fm)
|
||||
controller.complexity = Tk.StringVar()
|
||||
controller.complexity.set("1.0")
|
||||
c = Tk.Frame(valbox)
|
||||
Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(c, width=6, textvariable=controller.complexity).pack(
|
||||
side=Tk.LEFT)
|
||||
c.pack()
|
||||
|
||||
controller.gamma = Tk.StringVar()
|
||||
controller.gamma.set("0.01")
|
||||
g = Tk.Frame(valbox)
|
||||
Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT)
|
||||
g.pack()
|
||||
|
||||
controller.degree = Tk.StringVar()
|
||||
controller.degree.set("3")
|
||||
d = Tk.Frame(valbox)
|
||||
Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT)
|
||||
d.pack()
|
||||
|
||||
controller.coef0 = Tk.StringVar()
|
||||
controller.coef0.set("0")
|
||||
r = Tk.Frame(valbox)
|
||||
Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT)
|
||||
r.pack()
|
||||
valbox.pack(side=Tk.LEFT)
|
||||
|
||||
cmap_group = Tk.Frame(fm)
|
||||
Tk.Radiobutton(cmap_group, text="Hyperplanes",
|
||||
variable=controller.surface_type, value=0,
|
||||
command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(cmap_group, text="Surface",
|
||||
variable=controller.surface_type, value=1,
|
||||
command=controller.refit).pack(anchor=Tk.W)
|
||||
|
||||
cmap_group.pack(side=Tk.LEFT)
|
||||
|
||||
train_button = Tk.Button(fm, text='Fit', width=5,
|
||||
command=controller.fit)
|
||||
train_button.pack()
|
||||
fm.pack(side=Tk.LEFT)
|
||||
Tk.Button(fm, text='Clear', width=5,
|
||||
command=controller.clear_data).pack(side=Tk.LEFT)
|
||||
|
||||
|
||||
def get_parser():
|
||||
from optparse import OptionParser
|
||||
op = OptionParser()
|
||||
op.add_option("--output",
|
||||
action="store", type="str", dest="output",
|
||||
help="Path where to dump data.")
|
||||
return op
|
||||
|
||||
|
||||
def main(argv):
|
||||
op = get_parser()
|
||||
opts, args = op.parse_args(argv[1:])
|
||||
root = Tk.Tk()
|
||||
model = Model()
|
||||
controller = Controller(model)
|
||||
root.wm_title("Scikit-learn Libsvm GUI")
|
||||
view = View(root, controller)
|
||||
model.add_observer(view)
|
||||
Tk.mainloop()
|
||||
|
||||
if opts.output:
|
||||
model.dump_svmlight_file(opts.output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
Loading…
Reference in New Issue
Block a user