Added fig_code from https://github.com/jakevdp/sklearn_pycon2015, which is used in the scikit-learn notebooks.

This commit is contained in:
Donne Martin 2015-04-17 14:14:29 -04:00
parent 815accffa0
commit 0ebd99c4e8
12 changed files with 1425 additions and 0 deletions

View File

@ -0,0 +1,135 @@
"""
Tutorial Diagrams
-----------------
This script plots the flow-charts used in the scikit-learn tutorials.
"""
import numpy as np
import pylab as pl
from matplotlib.patches import Circle, Rectangle, Polygon, Arrow, FancyArrow
def create_base(box_bg = '#CCCCCC',
arrow1 = '#88CCFF',
arrow2 = '#88FF88',
supervised=True):
fig = pl.figure(figsize=(9, 6), facecolor='w')
ax = pl.axes((0, 0, 1, 1),
xticks=[], yticks=[], frameon=False)
ax.set_xlim(0, 9)
ax.set_ylim(0, 6)
patches = [Rectangle((0.3, 3.6), 1.5, 1.8, zorder=1, fc=box_bg),
Rectangle((0.5, 3.8), 1.5, 1.8, zorder=2, fc=box_bg),
Rectangle((0.7, 4.0), 1.5, 1.8, zorder=3, fc=box_bg),
Rectangle((2.9, 3.6), 0.2, 1.8, fc=box_bg),
Rectangle((3.1, 3.8), 0.2, 1.8, fc=box_bg),
Rectangle((3.3, 4.0), 0.2, 1.8, fc=box_bg),
Rectangle((0.3, 0.2), 1.5, 1.8, fc=box_bg),
Rectangle((2.9, 0.2), 0.2, 1.8, fc=box_bg),
Circle((5.5, 3.5), 1.0, fc=box_bg),
Polygon([[5.5, 1.7],
[6.1, 1.1],
[5.5, 0.5],
[4.9, 1.1]], fc=box_bg),
FancyArrow(2.3, 4.6, 0.35, 0, fc=arrow1,
width=0.25, head_width=0.5, head_length=0.2),
FancyArrow(3.75, 4.2, 0.5, -0.2, fc=arrow1,
width=0.25, head_width=0.5, head_length=0.2),
FancyArrow(5.5, 2.4, 0, -0.4, fc=arrow1,
width=0.25, head_width=0.5, head_length=0.2),
FancyArrow(2.0, 1.1, 0.5, 0, fc=arrow2,
width=0.25, head_width=0.5, head_length=0.2),
FancyArrow(3.3, 1.1, 1.3, 0, fc=arrow2,
width=0.25, head_width=0.5, head_length=0.2),
FancyArrow(6.2, 1.1, 0.8, 0, fc=arrow2,
width=0.25, head_width=0.5, head_length=0.2)]
if supervised:
patches += [Rectangle((0.3, 2.4), 1.5, 0.5, zorder=1, fc=box_bg),
Rectangle((0.5, 2.6), 1.5, 0.5, zorder=2, fc=box_bg),
Rectangle((0.7, 2.8), 1.5, 0.5, zorder=3, fc=box_bg),
FancyArrow(2.3, 2.9, 2.0, 0, fc=arrow1,
width=0.25, head_width=0.5, head_length=0.2),
Rectangle((7.3, 0.85), 1.5, 0.5, fc=box_bg)]
else:
patches += [Rectangle((7.3, 0.2), 1.5, 1.8, fc=box_bg)]
for p in patches:
ax.add_patch(p)
pl.text(1.45, 4.9, "Training\nText,\nDocuments,\nImages,\netc.",
ha='center', va='center', fontsize=14)
pl.text(3.6, 4.9, "Feature\nVectors",
ha='left', va='center', fontsize=14)
pl.text(5.5, 3.5, "Machine\nLearning\nAlgorithm",
ha='center', va='center', fontsize=14)
pl.text(1.05, 1.1, "New Text,\nDocument,\nImage,\netc.",
ha='center', va='center', fontsize=14)
pl.text(3.3, 1.7, "Feature\nVector",
ha='left', va='center', fontsize=14)
pl.text(5.5, 1.1, "Predictive\nModel",
ha='center', va='center', fontsize=12)
if supervised:
pl.text(1.45, 3.05, "Labels",
ha='center', va='center', fontsize=14)
pl.text(8.05, 1.1, "Expected\nLabel",
ha='center', va='center', fontsize=14)
pl.text(8.8, 5.8, "Supervised Learning Model",
ha='right', va='top', fontsize=18)
else:
pl.text(8.05, 1.1,
"Likelihood\nor Cluster ID\nor Better\nRepresentation",
ha='center', va='center', fontsize=12)
pl.text(8.8, 5.8, "Unsupervised Learning Model",
ha='right', va='top', fontsize=18)
def plot_supervised_chart(annotate=False):
create_base(supervised=True)
if annotate:
fontdict = dict(color='r', weight='bold', size=14)
pl.text(1.9, 4.55, 'X = vec.fit_transform(input)',
fontdict=fontdict,
rotation=20, ha='left', va='bottom')
pl.text(3.7, 3.2, 'clf.fit(X, y)',
fontdict=fontdict,
rotation=20, ha='left', va='bottom')
pl.text(1.7, 1.5, 'X_new = vec.transform(input)',
fontdict=fontdict,
rotation=20, ha='left', va='bottom')
pl.text(6.1, 1.5, 'y_new = clf.predict(X_new)',
fontdict=fontdict,
rotation=20, ha='left', va='bottom')
def plot_unsupervised_chart():
create_base(supervised=False)
if __name__ == '__main__':
plot_supervised_chart(False)
plot_supervised_chart(True)
plot_unsupervised_chart()
pl.show()

View File

@ -0,0 +1,6 @@
from .data import *
from .figures import *
from .sgd_separator import plot_sgd_separator
from .linear_regression import plot_linear_regression
from .helpers import plot_iris_knn

View File

@ -0,0 +1,4 @@
from .sgd_separator import plot_sgd_separator
from .linear_regression import plot_linear_regression
from .ML_flow_chart import plot_supervised_chart, plot_unsupervised_chart
from .helpers import plot_iris_knn

View File

@ -0,0 +1,47 @@
import numpy as np
def linear_data_sample(N=40, rseed=0, m=3, b=-2):
rng = np.random.RandomState(rseed)
x = 10 * rng.rand(N)
dy = m / 2 * (1 + rng.rand(N))
y = m * x + b + dy * rng.randn(N)
return (x, y, dy)
def linear_data_sample_big_errs(N=40, rseed=0, m=3, b=-2):
rng = np.random.RandomState(rseed)
x = 10 * rng.rand(N)
dy = m / 2 * (1 + rng.rand(N))
dy[20:25] *= 10
y = m * x + b + dy * rng.randn(N)
return (x, y, dy)
def sample_light_curve(phased=True):
from astroML.datasets import fetch_LINEAR_sample
data = fetch_LINEAR_sample()
t, y, dy = data[18525697].T
if phased:
P_best = 0.580313015651
t /= P_best
return (t, y, dy)
def sample_light_curve_2(phased=True):
from astroML.datasets import fetch_LINEAR_sample
data = fetch_LINEAR_sample()
t, y, dy = data[10022663].T
if phased:
P_best = 0.61596079804
t /= P_best
return (t, y, dy)

View File

@ -0,0 +1,233 @@
import numpy as np
import matplotlib.pyplot as plt
import warnings
def plot_venn_diagram():
fig, ax = plt.subplots(subplot_kw=dict(frameon=False, xticks=[], yticks=[]))
ax.add_patch(plt.Circle((0.3, 0.3), 0.3, fc='red', alpha=0.5))
ax.add_patch(plt.Circle((0.6, 0.3), 0.3, fc='blue', alpha=0.5))
ax.add_patch(plt.Rectangle((-0.1, -0.1), 1.1, 0.8, fc='none', ec='black'))
ax.text(0.2, 0.3, '$x$', size=30, ha='center', va='center')
ax.text(0.7, 0.3, '$y$', size=30, ha='center', va='center')
ax.text(0.0, 0.6, '$I$', size=30)
ax.axis('equal')
def plot_example_decision_tree():
fig = plt.figure(figsize=(10, 4))
ax = fig.add_axes([0, 0, 0.8, 1], frameon=False, xticks=[], yticks=[])
ax.set_title('Example Decision Tree: Animal Classification', size=24)
def text(ax, x, y, t, size=20, **kwargs):
ax.text(x, y, t,
ha='center', va='center', size=size,
bbox=dict(boxstyle='round', ec='k', fc='w'), **kwargs)
text(ax, 0.5, 0.9, "How big is\nthe animal?", 20)
text(ax, 0.3, 0.6, "Does the animal\nhave horns?", 18)
text(ax, 0.7, 0.6, "Does the animal\nhave two legs?", 18)
text(ax, 0.12, 0.3, "Are the horns\nlonger than 10cm?", 14)
text(ax, 0.38, 0.3, "Is the animal\nwearing a collar?", 14)
text(ax, 0.62, 0.3, "Does the animal\nhave wings?", 14)
text(ax, 0.88, 0.3, "Does the animal\nhave a tail?", 14)
text(ax, 0.4, 0.75, "> 1m", 12, alpha=0.4)
text(ax, 0.6, 0.75, "< 1m", 12, alpha=0.4)
text(ax, 0.21, 0.45, "yes", 12, alpha=0.4)
text(ax, 0.34, 0.45, "no", 12, alpha=0.4)
text(ax, 0.66, 0.45, "yes", 12, alpha=0.4)
text(ax, 0.79, 0.45, "no", 12, alpha=0.4)
ax.plot([0.3, 0.5, 0.7], [0.6, 0.9, 0.6], '-k')
ax.plot([0.12, 0.3, 0.38], [0.3, 0.6, 0.3], '-k')
ax.plot([0.62, 0.7, 0.88], [0.3, 0.6, 0.3], '-k')
ax.plot([0.0, 0.12, 0.20], [0.0, 0.3, 0.0], '--k')
ax.plot([0.28, 0.38, 0.48], [0.0, 0.3, 0.0], '--k')
ax.plot([0.52, 0.62, 0.72], [0.0, 0.3, 0.0], '--k')
ax.plot([0.8, 0.88, 1.0], [0.0, 0.3, 0.0], '--k')
ax.axis([0, 1, 0, 1])
def visualize_tree(estimator, X, y, boundaries=True,
xlim=None, ylim=None):
estimator.fit(X, y)
if xlim is None:
xlim = (X[:, 0].min() - 0.1, X[:, 0].max() + 0.1)
if ylim is None:
ylim = (X[:, 1].min() - 0.1, X[:, 1].max() + 0.1)
x_min, x_max = xlim
y_min, y_max = ylim
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
np.linspace(y_min, y_max, 100))
Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, alpha=0.2, cmap='rainbow')
plt.clim(y.min(), y.max())
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow')
plt.axis('off')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.clim(y.min(), y.max())
# Plot the decision boundaries
def plot_boundaries(i, xlim, ylim):
if i < 0:
return
tree = estimator.tree_
if tree.feature[i] == 0:
plt.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k')
plot_boundaries(tree.children_left[i],
[xlim[0], tree.threshold[i]], ylim)
plot_boundaries(tree.children_right[i],
[tree.threshold[i], xlim[1]], ylim)
elif tree.feature[i] == 1:
plt.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k')
plot_boundaries(tree.children_left[i], xlim,
[ylim[0], tree.threshold[i]])
plot_boundaries(tree.children_right[i], xlim,
[tree.threshold[i], ylim[1]])
if boundaries:
plot_boundaries(0, plt.xlim(), plt.ylim())
def plot_tree_interactive(X, y):
from sklearn.tree import DecisionTreeClassifier
def interactive_tree(depth=1):
clf = DecisionTreeClassifier(max_depth=depth, random_state=0)
visualize_tree(clf, X, y)
from IPython.html.widgets import interact
return interact(interactive_tree, depth=[1, 5])
def plot_kmeans_interactive(min_clusters=1, max_clusters=6):
from IPython.html.widgets import interact
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.datasets.samples_generator import make_blobs
with warnings.catch_warnings():
warnings.filterwarnings('ignore')
X, y = make_blobs(n_samples=300, centers=4,
random_state=0, cluster_std=0.60)
def _kmeans_step(frame=0, n_clusters=4):
rng = np.random.RandomState(2)
labels = np.zeros(X.shape[0])
centers = rng.randn(n_clusters, 2)
nsteps = frame // 3
for i in range(nsteps + 1):
old_centers = centers
if i < nsteps or frame % 3 > 0:
dist = euclidean_distances(X, centers)
labels = dist.argmin(1)
if i < nsteps or frame % 3 > 1:
centers = np.array([X[labels == j].mean(0)
for j in range(n_clusters)])
nans = np.isnan(centers)
centers[nans] = old_centers[nans]
# plot the data and cluster centers
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='rainbow',
vmin=0, vmax=n_clusters - 1);
plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
c=np.arange(n_clusters),
s=200, cmap='rainbow')
plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o',
c='black', s=50)
# plot new centers if third frame
if frame % 3 == 2:
for i in range(n_clusters):
plt.annotate('', centers[i], old_centers[i],
arrowprops=dict(arrowstyle='->', linewidth=1))
plt.scatter(centers[:, 0], centers[:, 1], marker='o',
c=np.arange(n_clusters),
s=200, cmap='rainbow')
plt.scatter(centers[:, 0], centers[:, 1], marker='o',
c='black', s=50)
plt.xlim(-4, 4)
plt.ylim(-2, 10)
if frame % 3 == 1:
plt.text(3.8, 9.5, "1. Reassign points to nearest centroid",
ha='right', va='top', size=14)
elif frame % 3 == 2:
plt.text(3.8, 9.5, "2. Update centroids to cluster means",
ha='right', va='top', size=14)
return interact(_kmeans_step, frame=[0, 50],
n_clusters=[min_clusters, max_clusters])
def plot_image_components(x, coefficients=None, mean=0, components=None,
imshape=(8, 8), n_components=6, fontsize=12):
if coefficients is None:
coefficients = x
if components is None:
components = np.eye(len(coefficients), len(x))
mean = np.zeros_like(x) + mean
fig = plt.figure(figsize=(1.2 * (5 + n_components), 1.2 * 2))
g = plt.GridSpec(2, 5 + n_components, hspace=0.3)
def show(i, j, x, title=None):
ax = fig.add_subplot(g[i, j], xticks=[], yticks=[])
ax.imshow(x.reshape(imshape), interpolation='nearest')
if title:
ax.set_title(title, fontsize=fontsize)
show(slice(2), slice(2), x, "True")
approx = mean.copy()
show(0, 2, np.zeros_like(x) + mean, r'$\mu$')
show(1, 2, approx, r'$1 \cdot \mu$')
for i in range(0, n_components):
approx = approx + coefficients[i] * components[i]
show(0, i + 3, components[i], r'$c_{0}$'.format(i + 1))
show(1, i + 3, approx,
r"${0:.2f} \cdot c_{1}$".format(coefficients[i], i + 1))
plt.gca().text(0, 1.05, '$+$', ha='right', va='bottom',
transform=plt.gca().transAxes, fontsize=fontsize)
show(slice(2), slice(-2, None), approx, "Approx")
def plot_pca_interactive(data, n_components=6):
from sklearn.decomposition import PCA
from IPython.html.widgets import interact
pca = PCA(n_components=n_components)
Xproj = pca.fit_transform(data)
def show_decomp(i=0):
plot_image_components(data[i], Xproj[i],
pca.mean_, pca.components_)
interact(show_decomp, i=(0, data.shape[0] - 1));

View File

@ -0,0 +1,75 @@
"""
Small helpers for code that is not shown in the notebooks
"""
from sklearn import neighbors, datasets, linear_model
import pylab as pl
import numpy as np
from matplotlib.colors import ListedColormap
# Create color maps for 3-class classification problem, as with iris
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
def plot_iris_knn():
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
np.linspace(y_min, y_max, 100))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
pl.figure()
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
pl.xlabel('sepal length (cm)')
pl.ylabel('sepal width (cm)')
pl.axis('tight')
def plot_polynomial_regression():
rng = np.random.RandomState(0)
x = 2*rng.rand(100) - 1
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
y = f(x) + .4 * rng.normal(size=100)
x_test = np.linspace(-1, 1, 100)
pl.figure()
pl.scatter(x, y, s=4)
X = np.array([x**i for i in range(5)]).T
X_test = np.array([x_test**i for i in range(5)]).T
regr = linear_model.LinearRegression()
regr.fit(X, y)
pl.plot(x_test, regr.predict(X_test), label='4th order')
X = np.array([x**i for i in range(10)]).T
X_test = np.array([x_test**i for i in range(10)]).T
regr = linear_model.LinearRegression()
regr.fit(X, y)
pl.plot(x_test, regr.predict(X_test), label='9th order')
pl.legend(loc='best')
pl.axis('tight')
pl.title('Fitting a 4th and a 9th order polynomial')
pl.figure()
pl.scatter(x, y, s=4)
pl.plot(x_test, f(x_test), label="truth")
pl.axis('tight')
pl.title('Ground truth (9th order polynomial)')

View File

@ -0,0 +1,75 @@
"""
Small helpers for code that is not shown in the notebooks
"""
from sklearn import neighbors, datasets, linear_model
import pylab as pl
import numpy as np
from matplotlib.colors import ListedColormap
# Create color maps for 3-class classification problem, as with iris
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
def plot_iris_knn():
iris = datasets.load_iris()
X = iris.data[:, :2] # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
knn.fit(X, y)
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
np.linspace(y_min, y_max, 100))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
pl.figure()
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
pl.xlabel('sepal length (cm)')
pl.ylabel('sepal width (cm)')
pl.axis('tight')
def plot_polynomial_regression():
rng = np.random.RandomState(0)
x = 2*rng.rand(100) - 1
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
y = f(x) + .4 * rng.normal(size=100)
x_test = np.linspace(-1, 1, 100)
pl.figure()
pl.scatter(x, y, s=4)
X = np.array([x**i for i in range(5)]).T
X_test = np.array([x_test**i for i in range(5)]).T
regr = linear_model.LinearRegression()
regr.fit(X, y)
pl.plot(x_test, regr.predict(X_test), label='4th order')
X = np.array([x**i for i in range(10)]).T
X_test = np.array([x_test**i for i in range(10)]).T
regr = linear_model.LinearRegression()
regr.fit(X, y)
pl.plot(x_test, regr.predict(X_test), label='9th order')
pl.legend(loc='best')
pl.axis('tight')
pl.title('Fitting a 4th and a 9th order polynomial')
pl.figure()
pl.scatter(x, y, s=4)
pl.plot(x_test, f(x_test), label="truth")
pl.axis('tight')
pl.title('Ground truth (9th order polynomial)')

View File

@ -0,0 +1,37 @@
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
def plot_linear_regression():
a = 0.5
b = 1.0
# x from 0 to 10
x = 30 * np.random.random(20)
# y = a*x + b with noise
y = a * x + b + np.random.normal(size=x.shape)
# create a linear regression classifier
clf = LinearRegression()
clf.fit(x[:, None], y)
# predict y from the data
x_new = np.linspace(0, 30, 100)
y_new = clf.predict(x_new[:, None])
# plot the results
ax = plt.axes()
ax.scatter(x, y)
ax.plot(x_new, y_new)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.axis('tight')
if __name__ == '__main__':
plot_linear_regression()
plt.show()

View File

@ -0,0 +1,101 @@
{
"metadata": {
"name": "",
"signature": "sha256:29899a15bea89b9d8275879798b23011cecabc0eff03dd41bb606324221e0bc3"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# scikit-learn"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%matplotlib inline\n",
"\n",
"# set seaborn plot defaults.\n",
"# This can be safely commented out\n",
"import seaborn; seaborn.set()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Import the example plot from the figures directory\n",
"from fig_code import plot_sgd_separator\n",
"plot_sgd_separator()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "No module named fig_code",
"output_type": "pyerr",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-ce8360b266e1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Import the example plot from the figures directory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mfig_code\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mplot_sgd_separator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mplot_sgd_separator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mImportError\u001b[0m: No module named fig_code"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}

View File

@ -0,0 +1,40 @@
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier
from sklearn.datasets.samples_generator import make_blobs
def plot_sgd_separator():
# we create 50 separable points
X, Y = make_blobs(n_samples=50, centers=2,
random_state=0, cluster_std=0.60)
# fit the model
clf = SGDClassifier(loss="hinge", alpha=0.01,
n_iter=200, fit_intercept=True)
clf.fit(X, Y)
# plot the line, the points, and the nearest vectors to the plane
xx = np.linspace(-1, 5, 10)
yy = np.linspace(-1, 5, 10)
X1, X2 = np.meshgrid(xx, yy)
Z = np.empty(X1.shape)
for (i, j), val in np.ndenumerate(X1):
x1 = val
x2 = X2[i, j]
p = clf.decision_function([x1, x2])
Z[i, j] = p[0]
levels = [-1.0, 0.0, 1.0]
linestyles = ['dashed', 'solid', 'dashed']
colors = 'k'
ax = plt.axes()
ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
ax.axis('tight')
if __name__ == '__main__':
plot_sgd_separator()
plt.show()

View File

@ -0,0 +1,341 @@
"""
==========
Libsvm GUI
==========
A simple graphical frontend for Libsvm mainly intended for didactic
purposes. You can create data points by point and click and visualize
the decision region induced by different kernels and parameter settings.
To create positive examples click the left mouse button; to create
negative examples click the right button.
If all examples are from the same class, it uses a one-class SVM.
"""
from __future__ import division, print_function
print(__doc__)
# Author: Peter Prettenhoer <peter.prettenhofer@gmail.com>
#
# License: BSD 3 clause
import matplotlib
matplotlib.use('TkAgg')
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg
from matplotlib.figure import Figure
from matplotlib.contour import ContourSet
import Tkinter as Tk
import sys
import numpy as np
from sklearn import svm
from sklearn.datasets import dump_svmlight_file
from sklearn.externals.six.moves import xrange
y_min, y_max = -50, 50
x_min, x_max = -50, 50
class Model(object):
"""The Model which hold the data. It implements the
observable in the observer pattern and notifies the
registered observers on change event.
"""
def __init__(self):
self.observers = []
self.surface = None
self.data = []
self.cls = None
self.surface_type = 0
def changed(self, event):
"""Notify the observers. """
for observer in self.observers:
observer.update(event, self)
def add_observer(self, observer):
"""Register an observer. """
self.observers.append(observer)
def set_surface(self, surface):
self.surface = surface
def dump_svmlight_file(self, file):
data = np.array(self.data)
X = data[:, 0:2]
y = data[:, 2]
dump_svmlight_file(X, y, file)
class Controller(object):
def __init__(self, model):
self.model = model
self.kernel = Tk.IntVar()
self.surface_type = Tk.IntVar()
# Whether or not a model has been fitted
self.fitted = False
def fit(self):
print("fit the model")
train = np.array(self.model.data)
X = train[:, 0:2]
y = train[:, 2]
C = float(self.complexity.get())
gamma = float(self.gamma.get())
coef0 = float(self.coef0.get())
degree = int(self.degree.get())
kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
if len(np.unique(y)) == 1:
clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
gamma=gamma, coef0=coef0, degree=degree)
clf.fit(X)
else:
clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
gamma=gamma, coef0=coef0, degree=degree)
clf.fit(X, y)
if hasattr(clf, 'score'):
print("Accuracy:", clf.score(X, y) * 100)
X1, X2, Z = self.decision_surface(clf)
self.model.clf = clf
self.model.set_surface((X1, X2, Z))
self.model.surface_type = self.surface_type.get()
self.fitted = True
self.model.changed("surface")
def decision_surface(self, cls):
delta = 1
x = np.arange(x_min, x_max + delta, delta)
y = np.arange(y_min, y_max + delta, delta)
X1, X2 = np.meshgrid(x, y)
Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()])
Z = Z.reshape(X1.shape)
return X1, X2, Z
def clear_data(self):
self.model.data = []
self.fitted = False
self.model.changed("clear")
def add_example(self, x, y, label):
self.model.data.append((x, y, label))
self.model.changed("example_added")
# update decision surface if already fitted.
self.refit()
def refit(self):
"""Refit the model if already fitted. """
if self.fitted:
self.fit()
class View(object):
"""Test docstring. """
def __init__(self, root, controller):
f = Figure()
ax = f.add_subplot(111)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlim((x_min, x_max))
ax.set_ylim((y_min, y_max))
canvas = FigureCanvasTkAgg(f, master=root)
canvas.show()
canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
canvas.mpl_connect('key_press_event', self.onkeypress)
canvas.mpl_connect('key_release_event', self.onkeyrelease)
canvas.mpl_connect('button_press_event', self.onclick)
toolbar = NavigationToolbar2TkAgg(canvas, root)
toolbar.update()
self.shift_down = False
self.controllbar = ControllBar(root, controller)
self.f = f
self.ax = ax
self.canvas = canvas
self.controller = controller
self.contours = []
self.c_labels = None
self.plot_kernels()
def plot_kernels(self):
self.ax.text(-50, -60, "Linear: $u^T v$")
self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$")
self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$")
def onkeypress(self, event):
if event.key == "shift":
self.shift_down = True
def onkeyrelease(self, event):
if event.key == "shift":
self.shift_down = False
def onclick(self, event):
if event.xdata and event.ydata:
if self.shift_down or event.button == 3:
self.controller.add_example(event.xdata, event.ydata, -1)
elif event.button == 1:
self.controller.add_example(event.xdata, event.ydata, 1)
def update_example(self, model, idx):
x, y, l = model.data[idx]
if l == 1:
color = 'w'
elif l == -1:
color = 'k'
self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0)
def update(self, event, model):
if event == "examples_loaded":
for i in xrange(len(model.data)):
self.update_example(model, i)
if event == "example_added":
self.update_example(model, -1)
if event == "clear":
self.ax.clear()
self.ax.set_xticks([])
self.ax.set_yticks([])
self.contours = []
self.c_labels = None
self.plot_kernels()
if event == "surface":
self.remove_surface()
self.plot_support_vectors(model.clf.support_vectors_)
self.plot_decision_surface(model.surface, model.surface_type)
self.canvas.draw()
def remove_surface(self):
"""Remove old decision surface."""
if len(self.contours) > 0:
for contour in self.contours:
if isinstance(contour, ContourSet):
for lineset in contour.collections:
lineset.remove()
else:
contour.remove()
self.contours = []
def plot_support_vectors(self, support_vectors):
"""Plot the support vectors by placing circles over the
corresponding data points and adds the circle collection
to the contours list."""
cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1],
s=80, edgecolors="k", facecolors="none")
self.contours.append(cs)
def plot_decision_surface(self, surface, type):
X1, X2, Z = surface
if type == 0:
levels = [-1.0, 0.0, 1.0]
linestyles = ['dashed', 'solid', 'dashed']
colors = 'k'
self.contours.append(self.ax.contour(X1, X2, Z, levels,
colors=colors,
linestyles=linestyles))
elif type == 1:
self.contours.append(self.ax.contourf(X1, X2, Z, 10,
cmap=matplotlib.cm.bone,
origin='lower', alpha=0.85))
self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k',
linestyles=['solid']))
else:
raise ValueError("surface type unknown")
class ControllBar(object):
def __init__(self, root, controller):
fm = Tk.Frame(root)
kernel_group = Tk.Frame(fm)
Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel,
value=0, command=controller.refit).pack(anchor=Tk.W)
Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel,
value=1, command=controller.refit).pack(anchor=Tk.W)
Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel,
value=2, command=controller.refit).pack(anchor=Tk.W)
kernel_group.pack(side=Tk.LEFT)
valbox = Tk.Frame(fm)
controller.complexity = Tk.StringVar()
controller.complexity.set("1.0")
c = Tk.Frame(valbox)
Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(c, width=6, textvariable=controller.complexity).pack(
side=Tk.LEFT)
c.pack()
controller.gamma = Tk.StringVar()
controller.gamma.set("0.01")
g = Tk.Frame(valbox)
Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT)
g.pack()
controller.degree = Tk.StringVar()
controller.degree.set("3")
d = Tk.Frame(valbox)
Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT)
d.pack()
controller.coef0 = Tk.StringVar()
controller.coef0.set("0")
r = Tk.Frame(valbox)
Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT)
r.pack()
valbox.pack(side=Tk.LEFT)
cmap_group = Tk.Frame(fm)
Tk.Radiobutton(cmap_group, text="Hyperplanes",
variable=controller.surface_type, value=0,
command=controller.refit).pack(anchor=Tk.W)
Tk.Radiobutton(cmap_group, text="Surface",
variable=controller.surface_type, value=1,
command=controller.refit).pack(anchor=Tk.W)
cmap_group.pack(side=Tk.LEFT)
train_button = Tk.Button(fm, text='Fit', width=5,
command=controller.fit)
train_button.pack()
fm.pack(side=Tk.LEFT)
Tk.Button(fm, text='Clear', width=5,
command=controller.clear_data).pack(side=Tk.LEFT)
def get_parser():
from optparse import OptionParser
op = OptionParser()
op.add_option("--output",
action="store", type="str", dest="output",
help="Path where to dump data.")
return op
def main(argv):
op = get_parser()
opts, args = op.parse_args(argv[1:])
root = Tk.Tk()
model = Model()
controller = Controller(model)
root.wm_title("Scikit-learn Libsvm GUI")
view = View(root, controller)
model.add_observer(view)
Tk.mainloop()
if opts.output:
model.dump_svmlight_file(opts.output)
if __name__ == "__main__":
main(sys.argv)

View File

@ -0,0 +1,331 @@
"""
==========
Libsvm GUI
==========
A simple graphical frontend for Libsvm mainly intended for didactic
purposes. You can create data points by point and click and visualize
the decision region induced by different kernels and parameter settings.
To create positive examples click the left mouse button; to create
negative examples click the right button.
If all examples are from the same class, it uses a one-class SVM.
"""
from __future__ import division, print_function
print(__doc__)
# Author: Peter Prettenhoer <peter.prettenhofer@gmail.com>
#
# License: BSD 3 clause
import matplotlib
matplotlib.use('TkAgg')
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg
from matplotlib.figure import Figure
from matplotlib.contour import ContourSet
import Tkinter as Tk
import sys
import numpy as np
from sklearn import svm
from sklearn.datasets import dump_svmlight_file
from sklearn.externals.six.moves import xrange
y_min, y_max = -50, 50
x_min, x_max = -50, 50
class Model(object):
"""The Model which hold the data. It implements the
observable in the observer pattern and notifies the
registered observers on change event.
"""
def __init__(self):
self.observers = []
self.surface = None
self.data = []
self.cls = None
self.surface_type = 0
def changed(self, event):
"""Notify the observers. """
for observer in self.observers:
observer.update(event, self)
def add_observer(self, observer):
"""Register an observer. """
self.observers.append(observer)
def set_surface(self, surface):
self.surface = surface
def dump_svmlight_file(self, file):
data = np.array(self.data)
X = data[:, 0:2]
y = data[:, 2]
dump_svmlight_file(X, y, file)
class Controller(object):
def __init__(self, model):
self.model = model
self.kernel = Tk.IntVar()
self.surface_type = Tk.IntVar()
# Whether or not a model has been fitted
self.fitted = False
def fit(self):
print("fit the model")
train = np.array(self.model.data)
X = train[:, 0:2]
y = train[:, 2]
C = float(self.complexity.get())
gamma = float(self.gamma.get())
coef0 = float(self.coef0.get())
degree = int(self.degree.get())
kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
if len(np.unique(y)) == 1:
clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
gamma=gamma, coef0=coef0, degree=degree)
clf.fit(X)
else:
clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
gamma=gamma, coef0=coef0, degree=degree)
clf.fit(X, y)
if hasattr(clf, 'score'):
print("Accuracy:", clf.score(X, y) * 100)
X1, X2, Z = self.decision_surface(clf)
self.model.clf = clf
self.model.set_surface((X1, X2, Z))
self.model.surface_type = self.surface_type.get()
self.fitted = True
self.model.changed("surface")
def decision_surface(self, cls):
delta = 1
x = np.arange(x_min, x_max + delta, delta)
y = np.arange(y_min, y_max + delta, delta)
X1, X2 = np.meshgrid(x, y)
Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()])
Z = Z.reshape(X1.shape)
return X1, X2, Z
def clear_data(self):
self.model.data = []
self.fitted = False
self.model.changed("clear")
def add_example(self, x, y, label):
self.model.data.append((x, y, label))
self.model.changed("example_added")
# update decision surface if already fitted.
self.refit()
def refit(self):
"""Refit the model if already fitted. """
if self.fitted:
self.fit()
class View(object):
"""Test docstring. """
def __init__(self, root, controller):
f = Figure()
ax = f.add_subplot(111)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlim((x_min, x_max))
ax.set_ylim((y_min, y_max))
canvas = FigureCanvasTkAgg(f, master=root)
canvas.show()
canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
canvas.mpl_connect('button_press_event', self.onclick)
toolbar = NavigationToolbar2TkAgg(canvas, root)
toolbar.update()
self.controllbar = ControllBar(root, controller)
self.f = f
self.ax = ax
self.canvas = canvas
self.controller = controller
self.contours = []
self.c_labels = None
self.plot_kernels()
def plot_kernels(self):
self.ax.text(-50, -60, "Linear: $u^T v$")
self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$")
self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$")
def onclick(self, event):
print(event.button)
if event.xdata and event.ydata:
if event.button == 1:
self.controller.add_example(event.xdata, event.ydata, 1)
elif event.button == 3:
self.controller.add_example(event.xdata, event.ydata, -1)
def update_example(self, model, idx):
x, y, l = model.data[idx]
if l == 1:
color = 'w'
elif l == -1:
color = 'k'
self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0)
def update(self, event, model):
if event == "examples_loaded":
for i in xrange(len(model.data)):
self.update_example(model, i)
if event == "example_added":
self.update_example(model, -1)
if event == "clear":
self.ax.clear()
self.ax.set_xticks([])
self.ax.set_yticks([])
self.contours = []
self.c_labels = None
self.plot_kernels()
if event == "surface":
self.remove_surface()
self.plot_support_vectors(model.clf.support_vectors_)
self.plot_decision_surface(model.surface, model.surface_type)
self.canvas.draw()
def remove_surface(self):
"""Remove old decision surface."""
if len(self.contours) > 0:
for contour in self.contours:
if isinstance(contour, ContourSet):
for lineset in contour.collections:
lineset.remove()
else:
contour.remove()
self.contours = []
def plot_support_vectors(self, support_vectors):
"""Plot the support vectors by placing circles over the
corresponding data points and adds the circle collection
to the contours list."""
cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1],
s=80, edgecolors="k", facecolors="none")
self.contours.append(cs)
def plot_decision_surface(self, surface, type):
X1, X2, Z = surface
if type == 0:
levels = [-1.0, 0.0, 1.0]
linestyles = ['dashed', 'solid', 'dashed']
colors = 'k'
self.contours.append(self.ax.contour(X1, X2, Z, levels,
colors=colors,
linestyles=linestyles))
elif type == 1:
self.contours.append(self.ax.contourf(X1, X2, Z, 10,
cmap=matplotlib.cm.bone,
origin='lower', alpha=0.85))
self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k',
linestyles=['solid']))
else:
raise ValueError("surface type unknown")
class ControllBar(object):
def __init__(self, root, controller):
fm = Tk.Frame(root)
kernel_group = Tk.Frame(fm)
Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel,
value=0, command=controller.refit).pack(anchor=Tk.W)
Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel,
value=1, command=controller.refit).pack(anchor=Tk.W)
Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel,
value=2, command=controller.refit).pack(anchor=Tk.W)
kernel_group.pack(side=Tk.LEFT)
valbox = Tk.Frame(fm)
controller.complexity = Tk.StringVar()
controller.complexity.set("1.0")
c = Tk.Frame(valbox)
Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(c, width=6, textvariable=controller.complexity).pack(
side=Tk.LEFT)
c.pack()
controller.gamma = Tk.StringVar()
controller.gamma.set("0.01")
g = Tk.Frame(valbox)
Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT)
g.pack()
controller.degree = Tk.StringVar()
controller.degree.set("3")
d = Tk.Frame(valbox)
Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT)
d.pack()
controller.coef0 = Tk.StringVar()
controller.coef0.set("0")
r = Tk.Frame(valbox)
Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT)
Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT)
r.pack()
valbox.pack(side=Tk.LEFT)
cmap_group = Tk.Frame(fm)
Tk.Radiobutton(cmap_group, text="Hyperplanes",
variable=controller.surface_type, value=0,
command=controller.refit).pack(anchor=Tk.W)
Tk.Radiobutton(cmap_group, text="Surface",
variable=controller.surface_type, value=1,
command=controller.refit).pack(anchor=Tk.W)
cmap_group.pack(side=Tk.LEFT)
train_button = Tk.Button(fm, text='Fit', width=5,
command=controller.fit)
train_button.pack()
fm.pack(side=Tk.LEFT)
Tk.Button(fm, text='Clear', width=5,
command=controller.clear_data).pack(side=Tk.LEFT)
def get_parser():
from optparse import OptionParser
op = OptionParser()
op.add_option("--output",
action="store", type="str", dest="output",
help="Path where to dump data.")
return op
def main(argv):
op = get_parser()
opts, args = op.parse_args(argv[1:])
root = Tk.Tk()
model = Model()
controller = Controller(model)
root.wm_title("Scikit-learn Libsvm GUI")
view = View(root, controller)
model.add_observer(view)
Tk.mainloop()
if opts.output:
model.dump_svmlight_file(opts.output)
if __name__ == "__main__":
main(sys.argv)