mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
commit
179c906ab3
|
@ -1,4 +0,0 @@
|
|||
from .sgd_separator import plot_sgd_separator
|
||||
from .linear_regression import plot_linear_regression
|
||||
from .ML_flow_chart import plot_supervised_chart, plot_unsupervised_chart
|
||||
from .helpers import plot_iris_knn
|
|
@ -1,75 +0,0 @@
|
|||
"""
|
||||
Small helpers for code that is not shown in the notebooks
|
||||
"""
|
||||
|
||||
from sklearn import neighbors, datasets, linear_model
|
||||
import pylab as pl
|
||||
import numpy as np
|
||||
from matplotlib.colors import ListedColormap
|
||||
|
||||
# Create color maps for 3-class classification problem, as with iris
|
||||
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
|
||||
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
|
||||
|
||||
def plot_iris_knn():
|
||||
iris = datasets.load_iris()
|
||||
X = iris.data[:, :2] # we only take the first two features. We could
|
||||
# avoid this ugly slicing by using a two-dim dataset
|
||||
y = iris.target
|
||||
|
||||
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
|
||||
knn.fit(X, y)
|
||||
|
||||
x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
|
||||
y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
|
||||
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
|
||||
np.linspace(y_min, y_max, 100))
|
||||
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
|
||||
|
||||
# Put the result into a color plot
|
||||
Z = Z.reshape(xx.shape)
|
||||
pl.figure()
|
||||
pl.pcolormesh(xx, yy, Z, cmap=cmap_light)
|
||||
|
||||
# Plot also the training points
|
||||
pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
|
||||
pl.xlabel('sepal length (cm)')
|
||||
pl.ylabel('sepal width (cm)')
|
||||
pl.axis('tight')
|
||||
|
||||
|
||||
def plot_polynomial_regression():
|
||||
rng = np.random.RandomState(0)
|
||||
x = 2*rng.rand(100) - 1
|
||||
|
||||
f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9
|
||||
y = f(x) + .4 * rng.normal(size=100)
|
||||
|
||||
x_test = np.linspace(-1, 1, 100)
|
||||
|
||||
pl.figure()
|
||||
pl.scatter(x, y, s=4)
|
||||
|
||||
X = np.array([x**i for i in range(5)]).T
|
||||
X_test = np.array([x_test**i for i in range(5)]).T
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X, y)
|
||||
pl.plot(x_test, regr.predict(X_test), label='4th order')
|
||||
|
||||
X = np.array([x**i for i in range(10)]).T
|
||||
X_test = np.array([x_test**i for i in range(10)]).T
|
||||
regr = linear_model.LinearRegression()
|
||||
regr.fit(X, y)
|
||||
pl.plot(x_test, regr.predict(X_test), label='9th order')
|
||||
|
||||
pl.legend(loc='best')
|
||||
pl.axis('tight')
|
||||
pl.title('Fitting a 4th and a 9th order polynomial')
|
||||
|
||||
pl.figure()
|
||||
pl.scatter(x, y, s=4)
|
||||
pl.plot(x_test, f(x_test), label="truth")
|
||||
pl.axis('tight')
|
||||
pl.title('Ground truth (9th order polynomial)')
|
||||
|
||||
|
|
@ -1,331 +0,0 @@
|
|||
"""
|
||||
==========
|
||||
Libsvm GUI
|
||||
==========
|
||||
|
||||
A simple graphical frontend for Libsvm mainly intended for didactic
|
||||
purposes. You can create data points by point and click and visualize
|
||||
the decision region induced by different kernels and parameter settings.
|
||||
|
||||
To create positive examples click the left mouse button; to create
|
||||
negative examples click the right button.
|
||||
|
||||
If all examples are from the same class, it uses a one-class SVM.
|
||||
|
||||
"""
|
||||
from __future__ import division, print_function
|
||||
|
||||
print(__doc__)
|
||||
|
||||
# Author: Peter Prettenhoer <peter.prettenhofer@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
|
||||
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
||||
from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg
|
||||
from matplotlib.figure import Figure
|
||||
from matplotlib.contour import ContourSet
|
||||
|
||||
import Tkinter as Tk
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
from sklearn import svm
|
||||
from sklearn.datasets import dump_svmlight_file
|
||||
from sklearn.externals.six.moves import xrange
|
||||
|
||||
y_min, y_max = -50, 50
|
||||
x_min, x_max = -50, 50
|
||||
|
||||
|
||||
class Model(object):
|
||||
"""The Model which hold the data. It implements the
|
||||
observable in the observer pattern and notifies the
|
||||
registered observers on change event.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.observers = []
|
||||
self.surface = None
|
||||
self.data = []
|
||||
self.cls = None
|
||||
self.surface_type = 0
|
||||
|
||||
def changed(self, event):
|
||||
"""Notify the observers. """
|
||||
for observer in self.observers:
|
||||
observer.update(event, self)
|
||||
|
||||
def add_observer(self, observer):
|
||||
"""Register an observer. """
|
||||
self.observers.append(observer)
|
||||
|
||||
def set_surface(self, surface):
|
||||
self.surface = surface
|
||||
|
||||
def dump_svmlight_file(self, file):
|
||||
data = np.array(self.data)
|
||||
X = data[:, 0:2]
|
||||
y = data[:, 2]
|
||||
dump_svmlight_file(X, y, file)
|
||||
|
||||
|
||||
class Controller(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
self.kernel = Tk.IntVar()
|
||||
self.surface_type = Tk.IntVar()
|
||||
# Whether or not a model has been fitted
|
||||
self.fitted = False
|
||||
|
||||
def fit(self):
|
||||
print("fit the model")
|
||||
train = np.array(self.model.data)
|
||||
X = train[:, 0:2]
|
||||
y = train[:, 2]
|
||||
|
||||
C = float(self.complexity.get())
|
||||
gamma = float(self.gamma.get())
|
||||
coef0 = float(self.coef0.get())
|
||||
degree = int(self.degree.get())
|
||||
kernel_map = {0: "linear", 1: "rbf", 2: "poly"}
|
||||
if len(np.unique(y)) == 1:
|
||||
clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()],
|
||||
gamma=gamma, coef0=coef0, degree=degree)
|
||||
clf.fit(X)
|
||||
else:
|
||||
clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C,
|
||||
gamma=gamma, coef0=coef0, degree=degree)
|
||||
clf.fit(X, y)
|
||||
if hasattr(clf, 'score'):
|
||||
print("Accuracy:", clf.score(X, y) * 100)
|
||||
X1, X2, Z = self.decision_surface(clf)
|
||||
self.model.clf = clf
|
||||
self.model.set_surface((X1, X2, Z))
|
||||
self.model.surface_type = self.surface_type.get()
|
||||
self.fitted = True
|
||||
self.model.changed("surface")
|
||||
|
||||
def decision_surface(self, cls):
|
||||
delta = 1
|
||||
x = np.arange(x_min, x_max + delta, delta)
|
||||
y = np.arange(y_min, y_max + delta, delta)
|
||||
X1, X2 = np.meshgrid(x, y)
|
||||
Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()])
|
||||
Z = Z.reshape(X1.shape)
|
||||
return X1, X2, Z
|
||||
|
||||
def clear_data(self):
|
||||
self.model.data = []
|
||||
self.fitted = False
|
||||
self.model.changed("clear")
|
||||
|
||||
def add_example(self, x, y, label):
|
||||
self.model.data.append((x, y, label))
|
||||
self.model.changed("example_added")
|
||||
|
||||
# update decision surface if already fitted.
|
||||
self.refit()
|
||||
|
||||
def refit(self):
|
||||
"""Refit the model if already fitted. """
|
||||
if self.fitted:
|
||||
self.fit()
|
||||
|
||||
|
||||
class View(object):
|
||||
"""Test docstring. """
|
||||
def __init__(self, root, controller):
|
||||
f = Figure()
|
||||
ax = f.add_subplot(111)
|
||||
ax.set_xticks([])
|
||||
ax.set_yticks([])
|
||||
ax.set_xlim((x_min, x_max))
|
||||
ax.set_ylim((y_min, y_max))
|
||||
canvas = FigureCanvasTkAgg(f, master=root)
|
||||
canvas.show()
|
||||
canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
||||
canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1)
|
||||
canvas.mpl_connect('button_press_event', self.onclick)
|
||||
toolbar = NavigationToolbar2TkAgg(canvas, root)
|
||||
toolbar.update()
|
||||
self.controllbar = ControllBar(root, controller)
|
||||
self.f = f
|
||||
self.ax = ax
|
||||
self.canvas = canvas
|
||||
self.controller = controller
|
||||
self.contours = []
|
||||
self.c_labels = None
|
||||
self.plot_kernels()
|
||||
|
||||
def plot_kernels(self):
|
||||
self.ax.text(-50, -60, "Linear: $u^T v$")
|
||||
self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$")
|
||||
self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$")
|
||||
|
||||
def onclick(self, event):
|
||||
print(event.button)
|
||||
if event.xdata and event.ydata:
|
||||
if event.button == 1:
|
||||
self.controller.add_example(event.xdata, event.ydata, 1)
|
||||
elif event.button == 3:
|
||||
self.controller.add_example(event.xdata, event.ydata, -1)
|
||||
|
||||
def update_example(self, model, idx):
|
||||
x, y, l = model.data[idx]
|
||||
if l == 1:
|
||||
color = 'w'
|
||||
elif l == -1:
|
||||
color = 'k'
|
||||
self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0)
|
||||
|
||||
def update(self, event, model):
|
||||
if event == "examples_loaded":
|
||||
for i in xrange(len(model.data)):
|
||||
self.update_example(model, i)
|
||||
|
||||
if event == "example_added":
|
||||
self.update_example(model, -1)
|
||||
|
||||
if event == "clear":
|
||||
self.ax.clear()
|
||||
self.ax.set_xticks([])
|
||||
self.ax.set_yticks([])
|
||||
self.contours = []
|
||||
self.c_labels = None
|
||||
self.plot_kernels()
|
||||
|
||||
if event == "surface":
|
||||
self.remove_surface()
|
||||
self.plot_support_vectors(model.clf.support_vectors_)
|
||||
self.plot_decision_surface(model.surface, model.surface_type)
|
||||
|
||||
self.canvas.draw()
|
||||
|
||||
def remove_surface(self):
|
||||
"""Remove old decision surface."""
|
||||
if len(self.contours) > 0:
|
||||
for contour in self.contours:
|
||||
if isinstance(contour, ContourSet):
|
||||
for lineset in contour.collections:
|
||||
lineset.remove()
|
||||
else:
|
||||
contour.remove()
|
||||
self.contours = []
|
||||
|
||||
def plot_support_vectors(self, support_vectors):
|
||||
"""Plot the support vectors by placing circles over the
|
||||
corresponding data points and adds the circle collection
|
||||
to the contours list."""
|
||||
cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1],
|
||||
s=80, edgecolors="k", facecolors="none")
|
||||
self.contours.append(cs)
|
||||
|
||||
def plot_decision_surface(self, surface, type):
|
||||
X1, X2, Z = surface
|
||||
if type == 0:
|
||||
levels = [-1.0, 0.0, 1.0]
|
||||
linestyles = ['dashed', 'solid', 'dashed']
|
||||
colors = 'k'
|
||||
self.contours.append(self.ax.contour(X1, X2, Z, levels,
|
||||
colors=colors,
|
||||
linestyles=linestyles))
|
||||
elif type == 1:
|
||||
self.contours.append(self.ax.contourf(X1, X2, Z, 10,
|
||||
cmap=matplotlib.cm.bone,
|
||||
origin='lower', alpha=0.85))
|
||||
self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k',
|
||||
linestyles=['solid']))
|
||||
else:
|
||||
raise ValueError("surface type unknown")
|
||||
|
||||
|
||||
class ControllBar(object):
|
||||
def __init__(self, root, controller):
|
||||
fm = Tk.Frame(root)
|
||||
kernel_group = Tk.Frame(fm)
|
||||
Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel,
|
||||
value=0, command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel,
|
||||
value=1, command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel,
|
||||
value=2, command=controller.refit).pack(anchor=Tk.W)
|
||||
kernel_group.pack(side=Tk.LEFT)
|
||||
|
||||
valbox = Tk.Frame(fm)
|
||||
controller.complexity = Tk.StringVar()
|
||||
controller.complexity.set("1.0")
|
||||
c = Tk.Frame(valbox)
|
||||
Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(c, width=6, textvariable=controller.complexity).pack(
|
||||
side=Tk.LEFT)
|
||||
c.pack()
|
||||
|
||||
controller.gamma = Tk.StringVar()
|
||||
controller.gamma.set("0.01")
|
||||
g = Tk.Frame(valbox)
|
||||
Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT)
|
||||
g.pack()
|
||||
|
||||
controller.degree = Tk.StringVar()
|
||||
controller.degree.set("3")
|
||||
d = Tk.Frame(valbox)
|
||||
Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT)
|
||||
d.pack()
|
||||
|
||||
controller.coef0 = Tk.StringVar()
|
||||
controller.coef0.set("0")
|
||||
r = Tk.Frame(valbox)
|
||||
Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT)
|
||||
Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT)
|
||||
r.pack()
|
||||
valbox.pack(side=Tk.LEFT)
|
||||
|
||||
cmap_group = Tk.Frame(fm)
|
||||
Tk.Radiobutton(cmap_group, text="Hyperplanes",
|
||||
variable=controller.surface_type, value=0,
|
||||
command=controller.refit).pack(anchor=Tk.W)
|
||||
Tk.Radiobutton(cmap_group, text="Surface",
|
||||
variable=controller.surface_type, value=1,
|
||||
command=controller.refit).pack(anchor=Tk.W)
|
||||
|
||||
cmap_group.pack(side=Tk.LEFT)
|
||||
|
||||
train_button = Tk.Button(fm, text='Fit', width=5,
|
||||
command=controller.fit)
|
||||
train_button.pack()
|
||||
fm.pack(side=Tk.LEFT)
|
||||
Tk.Button(fm, text='Clear', width=5,
|
||||
command=controller.clear_data).pack(side=Tk.LEFT)
|
||||
|
||||
|
||||
def get_parser():
|
||||
from optparse import OptionParser
|
||||
op = OptionParser()
|
||||
op.add_option("--output",
|
||||
action="store", type="str", dest="output",
|
||||
help="Path where to dump data.")
|
||||
return op
|
||||
|
||||
|
||||
def main(argv):
|
||||
op = get_parser()
|
||||
opts, args = op.parse_args(argv[1:])
|
||||
root = Tk.Tk()
|
||||
model = Model()
|
||||
controller = Controller(model)
|
||||
root.wm_title("Scikit-learn Libsvm GUI")
|
||||
view = View(root, controller)
|
||||
model.add_observer(view)
|
||||
Tk.mainloop()
|
||||
|
||||
if opts.output:
|
||||
model.dump_svmlight_file(opts.output)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
Loading…
Reference in New Issue
Block a user