mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
commit
179c906ab3
|
@ -1,4 +0,0 @@
|
||||||
from .sgd_separator import plot_sgd_separator
|
|
||||||
from .linear_regression import plot_linear_regression
|
|
||||||
from .ML_flow_chart import plot_supervised_chart, plot_unsupervised_chart
|
|
||||||
from .helpers import plot_iris_knn
|
|
|
@ -1,75 +0,0 @@
|
||||||
"""
|
|
||||||
Small helpers for code that is not shown in the notebooks
|
|
||||||
"""
|
|
||||||
|
|
||||||
from sklearn import neighbors, datasets, linear_model
|
|
||||||
import pylab as pl
|
|
||||||
import numpy as np
|
|
||||||
from matplotlib.colors import ListedColormap
|
|
||||||
|
|
||||||
# Create color maps for 3-class classification problem, as with iris
|
|
||||||
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
|
|
||||||
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
|
|
||||||
|
|
||||||
def plot_iris_knn():
|
|
||||||
iris = datasets.load_iris()
|
|
||||||
X = iris.data[:, :2] # we only take the first two features. We could
|
|
||||||
# avoid this ugly slicing by using a two-dim dataset
|
|
||||||
y = iris.target
|
|
||||||
|
|
||||||
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
|
|
||||||
knn.fit(X, y)
|
|
||||||
|
|
||||||
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
|
|
||||||
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
|
|
||||||
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
|
|
||||||
np.linspace(y_min, y_max, 100))
|
|
||||||
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
|
|
||||||
|
|
||||||
# Put the result into a color plot
|
|
||||||
Z = Z.reshape(xx.shape)
|
|
||||||
pl.figure()
|
|
||||||
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
|
|
||||||
|
|
||||||
# Plot also the training points
|
|
||||||
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
|
|
||||||
pl.xlabel('sepal length (cm)')
|
|
||||||
pl.ylabel('sepal width (cm)')
|
|
||||||
pl.axis('tight')
|
|
||||||
|
|
||||||
|
|
||||||
def plot_polynomial_regression():
|
|
||||||
rng = np.random.RandomState(0)
|
|
||||||
x = 2*rng.rand(100) - 1
|
|
||||||
|
|
||||||
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
|
|
||||||
y = f(x) + .4 * rng.normal(size=100)
|
|
||||||
|
|
||||||
x_test = np.linspace(-1, 1, 100)
|
|
||||||
|
|
||||||
pl.figure()
|
|
||||||
pl.scatter(x, y, s=4)
|
|
||||||
|
|
||||||
X = np.array([x**i for i in range(5)]).T
|
|
||||||
X_test = np.array([x_test**i for i in range(5)]).T
|
|
||||||
regr = linear_model.LinearRegression()
|
|
||||||
regr.fit(X, y)
|
|
||||||
pl.plot(x_test, regr.predict(X_test), label='4th order')
|
|
||||||
|
|
||||||
X = np.array([x**i for i in range(10)]).T
|
|
||||||
X_test = np.array([x_test**i for i in range(10)]).T
|
|
||||||
regr = linear_model.LinearRegression()
|
|
||||||
regr.fit(X, y)
|
|
||||||
pl.plot(x_test, regr.predict(X_test), label='9th order')
|
|
||||||
|
|
||||||
pl.legend(loc='best')
|
|
||||||
pl.axis('tight')
|
|
||||||
pl.title('Fitting a 4th and a 9th order polynomial')
|
|
||||||
|
|
||||||
pl.figure()
|
|
||||||
pl.scatter(x, y, s=4)
|
|
||||||
pl.plot(x_test, f(x_test), label="truth")
|
|
||||||
pl.axis('tight')
|
|
||||||
pl.title('Ground truth (9th order polynomial)')
|
|
||||||
|
|
||||||
|
|
|
@ -1,331 +0,0 @@
|
||||||
"""
|
|
||||||
==========
|
|
||||||
Libsvm GUI
|
|
||||||
==========
|
|
||||||
|
|
||||||
A simple graphical frontend for Libsvm mainly intended for didactic
|
|
||||||
purposes. You can create data points by point and click and visualize
|
|
||||||
the decision region induced by different kernels and parameter settings.
|
|
||||||
|
|
||||||
To create positive examples click the left mouse button; to create
|
|
||||||
negative examples click the right button.
|
|
||||||
|
|
||||||
If all examples are from the same class, it uses a one-class SVM.
|
|
||||||
|
|
||||||
"""
|
|
||||||
from __future__ import division, print_function
|
|
||||||
|
|
||||||
print(__doc__)
|
|
||||||
|
|
||||||
# Author: Peter Prettenhoer <peter.prettenhofer@gmail.com>
|
|
||||||
#
|
|
||||||
# License: BSD 3 clause
|
|
||||||
|
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('TkAgg')
|
|
||||||
|
|
||||||
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
|
||||||
from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg
|
|
||||||
from matplotlib.figure import Figure
|
|
||||||
from matplotlib.contour import ContourSet
|
|
||||||
|
|
||||||
import Tkinter as Tk
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from sklearn import svm
|
|
||||||
from sklearn.datasets import dump_svmlight_file
|
|
||||||
from sklearn.externals.six.moves import xrange
|
|
||||||
|
|
||||||
y_min, y_max = -50, 50
|
|
||||||
x_min, x_max = -50, 50
|
|
||||||
|
|
||||||
|
|
||||||
class Model(object):
|
|
||||||
"""The Model which hold the data. It implements the
|
|
||||||
observable in the observer pattern and notifies the
|
|
||||||
registered observers on change event.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.observers = []
|
|
||||||
self.surface = None
|
|
||||||
self.data = []
|
|
||||||
self.cls = None
|
|
||||||
self.surface_type = 0
|
|
||||||
|
|
||||||
def changed(self, event):
|
|
||||||
"""Notify the observers. """
|
|
||||||
for observer in self.observers:
|
|
||||||
observer.update(event, self)
|
|
||||||
|
|
||||||
def add_observer(self, observer):
|
|
||||||
"""Register an observer. """
|
|
||||||
self.observers.append(observer)
|
|
||||||
|
|
||||||
def set_surface(self, surface):
|
|
||||||
self.surface = surface
|
|
||||||
|
|
||||||
def dump_svmlight_file(self, file):
|
|
||||||
data = np.array(self.data)
|
|
||||||
X = data[:, 0:2]
|
|
||||||
y = data[:, 2]
|
|
||||||
dump_svmlight_file(X, y, file)
|
|
||||||
|
|
||||||
|
|
||||||
class Controller(object):
|
|
||||||
def __init__(self, model):
|
|
||||||
self.model = model
|
|
||||||
self.kernel = Tk.IntVar()
|
|
||||||
self.surface_type = Tk.IntVar()
|
|
||||||
# Whether or not a model has been fitted
|
|
||||||
self.fitted = False
|
|
||||||
|
|
||||||
def fit(self):
|
|
||||||
print("fit the model")
|
|
||||||
train = np.array(self.model.data)
|
|
||||||
X = train[:, 0:2]
|
|
||||||
y = train[:, 2]
|
|
||||||
|
|
||||||
C = float(self.complexity.get())
|
|
||||||
gamma = float(self.gamma.get())
|
|
||||||
coef0 = float(self.coef0.get())
|
|
||||||
degree = int(self.degree.get())
|
|
||||||
kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
|
|
||||||
if len(np.unique(y)) == 1:
|
|
||||||
clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
|
|
||||||
gamma=gamma, coef0=coef0, degree=degree)
|
|
||||||
clf.fit(X)
|
|
||||||
else:
|
|
||||||
clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
|
|
||||||
gamma=gamma, coef0=coef0, degree=degree)
|
|
||||||
clf.fit(X, y)
|
|
||||||
if hasattr(clf, 'score'):
|
|
||||||
print("Accuracy:", clf.score(X, y) * 100)
|
|
||||||
X1, X2, Z = self.decision_surface(clf)
|
|
||||||
self.model.clf = clf
|
|
||||||
self.model.set_surface((X1, X2, Z))
|
|
||||||
self.model.surface_type = self.surface_type.get()
|
|
||||||
self.fitted = True
|
|
||||||
self.model.changed("surface")
|
|
||||||
|
|
||||||
def decision_surface(self, cls):
|
|
||||||
delta = 1
|
|
||||||
x = np.arange(x_min, x_max + delta, delta)
|
|
||||||
y = np.arange(y_min, y_max + delta, delta)
|
|
||||||
X1, X2 = np.meshgrid(x, y)
|
|
||||||
Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()])
|
|
||||||
Z = Z.reshape(X1.shape)
|
|
||||||
return X1, X2, Z
|
|
||||||
|
|
||||||
def clear_data(self):
|
|
||||||
self.model.data = []
|
|
||||||
self.fitted = False
|
|
||||||
self.model.changed("clear")
|
|
||||||
|
|
||||||
def add_example(self, x, y, label):
|
|
||||||
self.model.data.append((x, y, label))
|
|
||||||
self.model.changed("example_added")
|
|
||||||
|
|
||||||
# update decision surface if already fitted.
|
|
||||||
self.refit()
|
|
||||||
|
|
||||||
def refit(self):
|
|
||||||
"""Refit the model if already fitted. """
|
|
||||||
if self.fitted:
|
|
||||||
self.fit()
|
|
||||||
|
|
||||||
|
|
||||||
class View(object):
|
|
||||||
"""Test docstring. """
|
|
||||||
def __init__(self, root, controller):
|
|
||||||
f = Figure()
|
|
||||||
ax = f.add_subplot(111)
|
|
||||||
ax.set_xticks([])
|
|
||||||
ax.set_yticks([])
|
|
||||||
ax.set_xlim((x_min, x_max))
|
|
||||||
ax.set_ylim((y_min, y_max))
|
|
||||||
canvas = FigureCanvasTkAgg(f, master=root)
|
|
||||||
canvas.show()
|
|
||||||
canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
|
||||||
canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
|
||||||
canvas.mpl_connect('button_press_event', self.onclick)
|
|
||||||
toolbar = NavigationToolbar2TkAgg(canvas, root)
|
|
||||||
toolbar.update()
|
|
||||||
self.controllbar = ControllBar(root, controller)
|
|
||||||
self.f = f
|
|
||||||
self.ax = ax
|
|
||||||
self.canvas = canvas
|
|
||||||
self.controller = controller
|
|
||||||
self.contours = []
|
|
||||||
self.c_labels = None
|
|
||||||
self.plot_kernels()
|
|
||||||
|
|
||||||
def plot_kernels(self):
|
|
||||||
self.ax.text(-50, -60, "Linear: $u^T v$")
|
|
||||||
self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$")
|
|
||||||
self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$")
|
|
||||||
|
|
||||||
def onclick(self, event):
|
|
||||||
print(event.button)
|
|
||||||
if event.xdata and event.ydata:
|
|
||||||
if event.button == 1:
|
|
||||||
self.controller.add_example(event.xdata, event.ydata, 1)
|
|
||||||
elif event.button == 3:
|
|
||||||
self.controller.add_example(event.xdata, event.ydata, -1)
|
|
||||||
|
|
||||||
def update_example(self, model, idx):
|
|
||||||
x, y, l = model.data[idx]
|
|
||||||
if l == 1:
|
|
||||||
color = 'w'
|
|
||||||
elif l == -1:
|
|
||||||
color = 'k'
|
|
||||||
self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0)
|
|
||||||
|
|
||||||
def update(self, event, model):
|
|
||||||
if event == "examples_loaded":
|
|
||||||
for i in xrange(len(model.data)):
|
|
||||||
self.update_example(model, i)
|
|
||||||
|
|
||||||
if event == "example_added":
|
|
||||||
self.update_example(model, -1)
|
|
||||||
|
|
||||||
if event == "clear":
|
|
||||||
self.ax.clear()
|
|
||||||
self.ax.set_xticks([])
|
|
||||||
self.ax.set_yticks([])
|
|
||||||
self.contours = []
|
|
||||||
self.c_labels = None
|
|
||||||
self.plot_kernels()
|
|
||||||
|
|
||||||
if event == "surface":
|
|
||||||
self.remove_surface()
|
|
||||||
self.plot_support_vectors(model.clf.support_vectors_)
|
|
||||||
self.plot_decision_surface(model.surface, model.surface_type)
|
|
||||||
|
|
||||||
self.canvas.draw()
|
|
||||||
|
|
||||||
def remove_surface(self):
|
|
||||||
"""Remove old decision surface."""
|
|
||||||
if len(self.contours) > 0:
|
|
||||||
for contour in self.contours:
|
|
||||||
if isinstance(contour, ContourSet):
|
|
||||||
for lineset in contour.collections:
|
|
||||||
lineset.remove()
|
|
||||||
else:
|
|
||||||
contour.remove()
|
|
||||||
self.contours = []
|
|
||||||
|
|
||||||
def plot_support_vectors(self, support_vectors):
|
|
||||||
"""Plot the support vectors by placing circles over the
|
|
||||||
corresponding data points and adds the circle collection
|
|
||||||
to the contours list."""
|
|
||||||
cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1],
|
|
||||||
s=80, edgecolors="k", facecolors="none")
|
|
||||||
self.contours.append(cs)
|
|
||||||
|
|
||||||
def plot_decision_surface(self, surface, type):
|
|
||||||
X1, X2, Z = surface
|
|
||||||
if type == 0:
|
|
||||||
levels = [-1.0, 0.0, 1.0]
|
|
||||||
linestyles = ['dashed', 'solid', 'dashed']
|
|
||||||
colors = 'k'
|
|
||||||
self.contours.append(self.ax.contour(X1, X2, Z, levels,
|
|
||||||
colors=colors,
|
|
||||||
linestyles=linestyles))
|
|
||||||
elif type == 1:
|
|
||||||
self.contours.append(self.ax.contourf(X1, X2, Z, 10,
|
|
||||||
cmap=matplotlib.cm.bone,
|
|
||||||
origin='lower', alpha=0.85))
|
|
||||||
self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k',
|
|
||||||
linestyles=['solid']))
|
|
||||||
else:
|
|
||||||
raise ValueError("surface type unknown")
|
|
||||||
|
|
||||||
|
|
||||||
class ControllBar(object):
|
|
||||||
def __init__(self, root, controller):
|
|
||||||
fm = Tk.Frame(root)
|
|
||||||
kernel_group = Tk.Frame(fm)
|
|
||||||
Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel,
|
|
||||||
value=0, command=controller.refit).pack(anchor=Tk.W)
|
|
||||||
Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel,
|
|
||||||
value=1, command=controller.refit).pack(anchor=Tk.W)
|
|
||||||
Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel,
|
|
||||||
value=2, command=controller.refit).pack(anchor=Tk.W)
|
|
||||||
kernel_group.pack(side=Tk.LEFT)
|
|
||||||
|
|
||||||
valbox = Tk.Frame(fm)
|
|
||||||
controller.complexity = Tk.StringVar()
|
|
||||||
controller.complexity.set("1.0")
|
|
||||||
c = Tk.Frame(valbox)
|
|
||||||
Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT)
|
|
||||||
Tk.Entry(c, width=6, textvariable=controller.complexity).pack(
|
|
||||||
side=Tk.LEFT)
|
|
||||||
c.pack()
|
|
||||||
|
|
||||||
controller.gamma = Tk.StringVar()
|
|
||||||
controller.gamma.set("0.01")
|
|
||||||
g = Tk.Frame(valbox)
|
|
||||||
Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT)
|
|
||||||
Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT)
|
|
||||||
g.pack()
|
|
||||||
|
|
||||||
controller.degree = Tk.StringVar()
|
|
||||||
controller.degree.set("3")
|
|
||||||
d = Tk.Frame(valbox)
|
|
||||||
Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT)
|
|
||||||
Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT)
|
|
||||||
d.pack()
|
|
||||||
|
|
||||||
controller.coef0 = Tk.StringVar()
|
|
||||||
controller.coef0.set("0")
|
|
||||||
r = Tk.Frame(valbox)
|
|
||||||
Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT)
|
|
||||||
Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT)
|
|
||||||
r.pack()
|
|
||||||
valbox.pack(side=Tk.LEFT)
|
|
||||||
|
|
||||||
cmap_group = Tk.Frame(fm)
|
|
||||||
Tk.Radiobutton(cmap_group, text="Hyperplanes",
|
|
||||||
variable=controller.surface_type, value=0,
|
|
||||||
command=controller.refit).pack(anchor=Tk.W)
|
|
||||||
Tk.Radiobutton(cmap_group, text="Surface",
|
|
||||||
variable=controller.surface_type, value=1,
|
|
||||||
command=controller.refit).pack(anchor=Tk.W)
|
|
||||||
|
|
||||||
cmap_group.pack(side=Tk.LEFT)
|
|
||||||
|
|
||||||
train_button = Tk.Button(fm, text='Fit', width=5,
|
|
||||||
command=controller.fit)
|
|
||||||
train_button.pack()
|
|
||||||
fm.pack(side=Tk.LEFT)
|
|
||||||
Tk.Button(fm, text='Clear', width=5,
|
|
||||||
command=controller.clear_data).pack(side=Tk.LEFT)
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser():
|
|
||||||
from optparse import OptionParser
|
|
||||||
op = OptionParser()
|
|
||||||
op.add_option("--output",
|
|
||||||
action="store", type="str", dest="output",
|
|
||||||
help="Path where to dump data.")
|
|
||||||
return op
|
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
|
||||||
op = get_parser()
|
|
||||||
opts, args = op.parse_args(argv[1:])
|
|
||||||
root = Tk.Tk()
|
|
||||||
model = Model()
|
|
||||||
controller = Controller(model)
|
|
||||||
root.wm_title("Scikit-learn Libsvm GUI")
|
|
||||||
view = View(root, controller)
|
|
||||||
model.add_observer(view)
|
|
||||||
Tk.mainloop()
|
|
||||||
|
|
||||||
if opts.output:
|
|
||||||
model.dump_svmlight_file(opts.output)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main(sys.argv)
|
|
Loading…
Reference in New Issue
Block a user