diff --git a/scikit-learn/fig_code/ML_flow_chart.py b/scikit-learn/fig_code/ML_flow_chart.py new file mode 100644 index 0000000..ecd7c30 --- /dev/null +++ b/scikit-learn/fig_code/ML_flow_chart.py @@ -0,0 +1,135 @@ +""" +Tutorial Diagrams +----------------- + +This script plots the flow-charts used in the scikit-learn tutorials. +""" + +import numpy as np +import pylab as pl +from matplotlib.patches import Circle, Rectangle, Polygon, Arrow, FancyArrow + +def create_base(box_bg = '#CCCCCC', + arrow1 = '#88CCFF', + arrow2 = '#88FF88', + supervised=True): + fig = pl.figure(figsize=(9, 6), facecolor='w') + ax = pl.axes((0, 0, 1, 1), + xticks=[], yticks=[], frameon=False) + ax.set_xlim(0, 9) + ax.set_ylim(0, 6) + + patches = [Rectangle((0.3, 3.6), 1.5, 1.8, zorder=1, fc=box_bg), + Rectangle((0.5, 3.8), 1.5, 1.8, zorder=2, fc=box_bg), + Rectangle((0.7, 4.0), 1.5, 1.8, zorder=3, fc=box_bg), + + Rectangle((2.9, 3.6), 0.2, 1.8, fc=box_bg), + Rectangle((3.1, 3.8), 0.2, 1.8, fc=box_bg), + Rectangle((3.3, 4.0), 0.2, 1.8, fc=box_bg), + + Rectangle((0.3, 0.2), 1.5, 1.8, fc=box_bg), + + Rectangle((2.9, 0.2), 0.2, 1.8, fc=box_bg), + + Circle((5.5, 3.5), 1.0, fc=box_bg), + + Polygon([[5.5, 1.7], + [6.1, 1.1], + [5.5, 0.5], + [4.9, 1.1]], fc=box_bg), + + FancyArrow(2.3, 4.6, 0.35, 0, fc=arrow1, + width=0.25, head_width=0.5, head_length=0.2), + + FancyArrow(3.75, 4.2, 0.5, -0.2, fc=arrow1, + width=0.25, head_width=0.5, head_length=0.2), + + FancyArrow(5.5, 2.4, 0, -0.4, fc=arrow1, + width=0.25, head_width=0.5, head_length=0.2), + + FancyArrow(2.0, 1.1, 0.5, 0, fc=arrow2, + width=0.25, head_width=0.5, head_length=0.2), + + FancyArrow(3.3, 1.1, 1.3, 0, fc=arrow2, + width=0.25, head_width=0.5, head_length=0.2), + + FancyArrow(6.2, 1.1, 0.8, 0, fc=arrow2, + width=0.25, head_width=0.5, head_length=0.2)] + + if supervised: + patches += [Rectangle((0.3, 2.4), 1.5, 0.5, zorder=1, fc=box_bg), + Rectangle((0.5, 2.6), 1.5, 0.5, zorder=2, fc=box_bg), + Rectangle((0.7, 2.8), 1.5, 0.5, zorder=3, fc=box_bg), + FancyArrow(2.3, 2.9, 2.0, 0, fc=arrow1, + width=0.25, head_width=0.5, head_length=0.2), + Rectangle((7.3, 0.85), 1.5, 0.5, fc=box_bg)] + else: + patches += [Rectangle((7.3, 0.2), 1.5, 1.8, fc=box_bg)] + + for p in patches: + ax.add_patch(p) + + pl.text(1.45, 4.9, "Training\nText,\nDocuments,\nImages,\netc.", + ha='center', va='center', fontsize=14) + + pl.text(3.6, 4.9, "Feature\nVectors", + ha='left', va='center', fontsize=14) + + pl.text(5.5, 3.5, "Machine\nLearning\nAlgorithm", + ha='center', va='center', fontsize=14) + + pl.text(1.05, 1.1, "New Text,\nDocument,\nImage,\netc.", + ha='center', va='center', fontsize=14) + + pl.text(3.3, 1.7, "Feature\nVector", + ha='left', va='center', fontsize=14) + + pl.text(5.5, 1.1, "Predictive\nModel", + ha='center', va='center', fontsize=12) + + if supervised: + pl.text(1.45, 3.05, "Labels", + ha='center', va='center', fontsize=14) + + pl.text(8.05, 1.1, "Expected\nLabel", + ha='center', va='center', fontsize=14) + pl.text(8.8, 5.8, "Supervised Learning Model", + ha='right', va='top', fontsize=18) + + else: + pl.text(8.05, 1.1, + "Likelihood\nor Cluster ID\nor Better\nRepresentation", + ha='center', va='center', fontsize=12) + pl.text(8.8, 5.8, "Unsupervised Learning Model", + ha='right', va='top', fontsize=18) + + + +def plot_supervised_chart(annotate=False): + create_base(supervised=True) + if annotate: + fontdict = dict(color='r', weight='bold', size=14) + pl.text(1.9, 4.55, 'X = vec.fit_transform(input)', + fontdict=fontdict, + rotation=20, ha='left', va='bottom') + pl.text(3.7, 3.2, 'clf.fit(X, y)', + fontdict=fontdict, + rotation=20, ha='left', va='bottom') + pl.text(1.7, 1.5, 'X_new = vec.transform(input)', + fontdict=fontdict, + rotation=20, ha='left', va='bottom') + pl.text(6.1, 1.5, 'y_new = clf.predict(X_new)', + fontdict=fontdict, + rotation=20, ha='left', va='bottom') + +def plot_unsupervised_chart(): + create_base(supervised=False) + + +if __name__ == '__main__': + plot_supervised_chart(False) + plot_supervised_chart(True) + plot_unsupervised_chart() + pl.show() + + diff --git a/scikit-learn/fig_code/__init__.py b/scikit-learn/fig_code/__init__.py new file mode 100644 index 0000000..2f3b5d6 --- /dev/null +++ b/scikit-learn/fig_code/__init__.py @@ -0,0 +1,6 @@ +from .data import * +from .figures import * + +from .sgd_separator import plot_sgd_separator +from .linear_regression import plot_linear_regression +from .helpers import plot_iris_knn diff --git a/scikit-learn/fig_code/__init__.py~ b/scikit-learn/fig_code/__init__.py~ new file mode 100644 index 0000000..9d6fdd3 --- /dev/null +++ b/scikit-learn/fig_code/__init__.py~ @@ -0,0 +1,4 @@ +from .sgd_separator import plot_sgd_separator +from .linear_regression import plot_linear_regression +from .ML_flow_chart import plot_supervised_chart, plot_unsupervised_chart +from .helpers import plot_iris_knn diff --git a/scikit-learn/fig_code/data.py b/scikit-learn/fig_code/data.py new file mode 100644 index 0000000..8545771 --- /dev/null +++ b/scikit-learn/fig_code/data.py @@ -0,0 +1,47 @@ +import numpy as np + + +def linear_data_sample(N=40, rseed=0, m=3, b=-2): + rng = np.random.RandomState(rseed) + + x = 10 * rng.rand(N) + dy = m / 2 * (1 + rng.rand(N)) + y = m * x + b + dy * rng.randn(N) + + return (x, y, dy) + + +def linear_data_sample_big_errs(N=40, rseed=0, m=3, b=-2): + rng = np.random.RandomState(rseed) + + x = 10 * rng.rand(N) + dy = m / 2 * (1 + rng.rand(N)) + dy[20:25] *= 10 + y = m * x + b + dy * rng.randn(N) + + return (x, y, dy) + + +def sample_light_curve(phased=True): + from astroML.datasets import fetch_LINEAR_sample + data = fetch_LINEAR_sample() + t, y, dy = data[18525697].T + + if phased: + P_best = 0.580313015651 + t /= P_best + + return (t, y, dy) + + +def sample_light_curve_2(phased=True): + from astroML.datasets import fetch_LINEAR_sample + data = fetch_LINEAR_sample() + t, y, dy = data[10022663].T + + if phased: + P_best = 0.61596079804 + t /= P_best + + return (t, y, dy) + diff --git a/scikit-learn/fig_code/figures.py b/scikit-learn/fig_code/figures.py new file mode 100644 index 0000000..9ecacee --- /dev/null +++ b/scikit-learn/fig_code/figures.py @@ -0,0 +1,233 @@ +import numpy as np +import matplotlib.pyplot as plt +import warnings + + +def plot_venn_diagram(): + fig, ax = plt.subplots(subplot_kw=dict(frameon=False, xticks=[], yticks=[])) + ax.add_patch(plt.Circle((0.3, 0.3), 0.3, fc='red', alpha=0.5)) + ax.add_patch(plt.Circle((0.6, 0.3), 0.3, fc='blue', alpha=0.5)) + ax.add_patch(plt.Rectangle((-0.1, -0.1), 1.1, 0.8, fc='none', ec='black')) + ax.text(0.2, 0.3, '$x$', size=30, ha='center', va='center') + ax.text(0.7, 0.3, '$y$', size=30, ha='center', va='center') + ax.text(0.0, 0.6, '$I$', size=30) + ax.axis('equal') + + +def plot_example_decision_tree(): + fig = plt.figure(figsize=(10, 4)) + ax = fig.add_axes([0, 0, 0.8, 1], frameon=False, xticks=[], yticks=[]) + ax.set_title('Example Decision Tree: Animal Classification', size=24) + + def text(ax, x, y, t, size=20, **kwargs): + ax.text(x, y, t, + ha='center', va='center', size=size, + bbox=dict(boxstyle='round', ec='k', fc='w'), **kwargs) + + text(ax, 0.5, 0.9, "How big is\nthe animal?", 20) + text(ax, 0.3, 0.6, "Does the animal\nhave horns?", 18) + text(ax, 0.7, 0.6, "Does the animal\nhave two legs?", 18) + text(ax, 0.12, 0.3, "Are the horns\nlonger than 10cm?", 14) + text(ax, 0.38, 0.3, "Is the animal\nwearing a collar?", 14) + text(ax, 0.62, 0.3, "Does the animal\nhave wings?", 14) + text(ax, 0.88, 0.3, "Does the animal\nhave a tail?", 14) + + text(ax, 0.4, 0.75, "> 1m", 12, alpha=0.4) + text(ax, 0.6, 0.75, "< 1m", 12, alpha=0.4) + + text(ax, 0.21, 0.45, "yes", 12, alpha=0.4) + text(ax, 0.34, 0.45, "no", 12, alpha=0.4) + + text(ax, 0.66, 0.45, "yes", 12, alpha=0.4) + text(ax, 0.79, 0.45, "no", 12, alpha=0.4) + + ax.plot([0.3, 0.5, 0.7], [0.6, 0.9, 0.6], '-k') + ax.plot([0.12, 0.3, 0.38], [0.3, 0.6, 0.3], '-k') + ax.plot([0.62, 0.7, 0.88], [0.3, 0.6, 0.3], '-k') + ax.plot([0.0, 0.12, 0.20], [0.0, 0.3, 0.0], '--k') + ax.plot([0.28, 0.38, 0.48], [0.0, 0.3, 0.0], '--k') + ax.plot([0.52, 0.62, 0.72], [0.0, 0.3, 0.0], '--k') + ax.plot([0.8, 0.88, 1.0], [0.0, 0.3, 0.0], '--k') + ax.axis([0, 1, 0, 1]) + + +def visualize_tree(estimator, X, y, boundaries=True, + xlim=None, ylim=None): + estimator.fit(X, y) + + if xlim is None: + xlim = (X[:, 0].min() - 0.1, X[:, 0].max() + 0.1) + if ylim is None: + ylim = (X[:, 1].min() - 0.1, X[:, 1].max() + 0.1) + + x_min, x_max = xlim + y_min, y_max = ylim + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) + + # Put the result into a color plot + Z = Z.reshape(xx.shape) + plt.figure() + plt.pcolormesh(xx, yy, Z, alpha=0.2, cmap='rainbow') + plt.clim(y.min(), y.max()) + + # Plot also the training points + plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow') + plt.axis('off') + + plt.xlim(x_min, x_max) + plt.ylim(y_min, y_max) + plt.clim(y.min(), y.max()) + + # Plot the decision boundaries + def plot_boundaries(i, xlim, ylim): + if i < 0: + return + + tree = estimator.tree_ + + if tree.feature[i] == 0: + plt.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k') + plot_boundaries(tree.children_left[i], + [xlim[0], tree.threshold[i]], ylim) + plot_boundaries(tree.children_right[i], + [tree.threshold[i], xlim[1]], ylim) + + elif tree.feature[i] == 1: + plt.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k') + plot_boundaries(tree.children_left[i], xlim, + [ylim[0], tree.threshold[i]]) + plot_boundaries(tree.children_right[i], xlim, + [tree.threshold[i], ylim[1]]) + + if boundaries: + plot_boundaries(0, plt.xlim(), plt.ylim()) + + +def plot_tree_interactive(X, y): + from sklearn.tree import DecisionTreeClassifier + + def interactive_tree(depth=1): + clf = DecisionTreeClassifier(max_depth=depth, random_state=0) + visualize_tree(clf, X, y) + + from IPython.html.widgets import interact + return interact(interactive_tree, depth=[1, 5]) + + +def plot_kmeans_interactive(min_clusters=1, max_clusters=6): + from IPython.html.widgets import interact + from sklearn.metrics.pairwise import euclidean_distances + from sklearn.datasets.samples_generator import make_blobs + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore') + + X, y = make_blobs(n_samples=300, centers=4, + random_state=0, cluster_std=0.60) + + def _kmeans_step(frame=0, n_clusters=4): + rng = np.random.RandomState(2) + labels = np.zeros(X.shape[0]) + centers = rng.randn(n_clusters, 2) + + nsteps = frame // 3 + + for i in range(nsteps + 1): + old_centers = centers + if i < nsteps or frame % 3 > 0: + dist = euclidean_distances(X, centers) + labels = dist.argmin(1) + + if i < nsteps or frame % 3 > 1: + centers = np.array([X[labels == j].mean(0) + for j in range(n_clusters)]) + nans = np.isnan(centers) + centers[nans] = old_centers[nans] + + + # plot the data and cluster centers + plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='rainbow', + vmin=0, vmax=n_clusters - 1); + plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o', + c=np.arange(n_clusters), + s=200, cmap='rainbow') + plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o', + c='black', s=50) + + # plot new centers if third frame + if frame % 3 == 2: + for i in range(n_clusters): + plt.annotate('', centers[i], old_centers[i], + arrowprops=dict(arrowstyle='->', linewidth=1)) + plt.scatter(centers[:, 0], centers[:, 1], marker='o', + c=np.arange(n_clusters), + s=200, cmap='rainbow') + plt.scatter(centers[:, 0], centers[:, 1], marker='o', + c='black', s=50) + + plt.xlim(-4, 4) + plt.ylim(-2, 10) + + if frame % 3 == 1: + plt.text(3.8, 9.5, "1. Reassign points to nearest centroid", + ha='right', va='top', size=14) + elif frame % 3 == 2: + plt.text(3.8, 9.5, "2. Update centroids to cluster means", + ha='right', va='top', size=14) + + + return interact(_kmeans_step, frame=[0, 50], + n_clusters=[min_clusters, max_clusters]) + + +def plot_image_components(x, coefficients=None, mean=0, components=None, + imshape=(8, 8), n_components=6, fontsize=12): + if coefficients is None: + coefficients = x + + if components is None: + components = np.eye(len(coefficients), len(x)) + + mean = np.zeros_like(x) + mean + + + fig = plt.figure(figsize=(1.2 * (5 + n_components), 1.2 * 2)) + g = plt.GridSpec(2, 5 + n_components, hspace=0.3) + + def show(i, j, x, title=None): + ax = fig.add_subplot(g[i, j], xticks=[], yticks=[]) + ax.imshow(x.reshape(imshape), interpolation='nearest') + if title: + ax.set_title(title, fontsize=fontsize) + + show(slice(2), slice(2), x, "True") + + approx = mean.copy() + show(0, 2, np.zeros_like(x) + mean, r'$\mu$') + show(1, 2, approx, r'$1 \cdot \mu$') + + for i in range(0, n_components): + approx = approx + coefficients[i] * components[i] + show(0, i + 3, components[i], r'$c_{0}$'.format(i + 1)) + show(1, i + 3, approx, + r"${0:.2f} \cdot c_{1}$".format(coefficients[i], i + 1)) + plt.gca().text(0, 1.05, '$+$', ha='right', va='bottom', + transform=plt.gca().transAxes, fontsize=fontsize) + + show(slice(2), slice(-2, None), approx, "Approx") + + +def plot_pca_interactive(data, n_components=6): + from sklearn.decomposition import PCA + from IPython.html.widgets import interact + + pca = PCA(n_components=n_components) + Xproj = pca.fit_transform(data) + + def show_decomp(i=0): + plot_image_components(data[i], Xproj[i], + pca.mean_, pca.components_) + + interact(show_decomp, i=(0, data.shape[0] - 1)); diff --git a/scikit-learn/fig_code/helpers.py b/scikit-learn/fig_code/helpers.py new file mode 100644 index 0000000..349f8cd --- /dev/null +++ b/scikit-learn/fig_code/helpers.py @@ -0,0 +1,75 @@ +""" +Small helpers for code that is not shown in the notebooks +""" + +from sklearn import neighbors, datasets, linear_model +import pylab as pl +import numpy as np +from matplotlib.colors import ListedColormap + +# Create color maps for 3-class classification problem, as with iris +cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) +cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) + +def plot_iris_knn(): + iris = datasets.load_iris() + X = iris.data[:, :2] # we only take the first two features. We could + # avoid this ugly slicing by using a two-dim dataset + y = iris.target + + knn = neighbors.KNeighborsClassifier(n_neighbors=5) + knn.fit(X, y) + + x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1 + y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1 + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) + + # Put the result into a color plot + Z = Z.reshape(xx.shape) + pl.figure() + pl.pcolormesh(xx, yy, Z, cmap=cmap_light) + + # Plot also the training points + pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) + pl.xlabel('sepal length (cm)') + pl.ylabel('sepal width (cm)') + pl.axis('tight') + + +def plot_polynomial_regression(): + rng = np.random.RandomState(0) + x = 2*rng.rand(100) - 1 + + f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9 + y = f(x) + .4 * rng.normal(size=100) + + x_test = np.linspace(-1, 1, 100) + + pl.figure() + pl.scatter(x, y, s=4) + + X = np.array([x**i for i in range(5)]).T + X_test = np.array([x_test**i for i in range(5)]).T + regr = linear_model.LinearRegression() + regr.fit(X, y) + pl.plot(x_test, regr.predict(X_test), label='4th order') + + X = np.array([x**i for i in range(10)]).T + X_test = np.array([x_test**i for i in range(10)]).T + regr = linear_model.LinearRegression() + regr.fit(X, y) + pl.plot(x_test, regr.predict(X_test), label='9th order') + + pl.legend(loc='best') + pl.axis('tight') + pl.title('Fitting a 4th and a 9th order polynomial') + + pl.figure() + pl.scatter(x, y, s=4) + pl.plot(x_test, f(x_test), label="truth") + pl.axis('tight') + pl.title('Ground truth (9th order polynomial)') + + diff --git a/scikit-learn/fig_code/helpers.py~ b/scikit-learn/fig_code/helpers.py~ new file mode 100644 index 0000000..a220917 --- /dev/null +++ b/scikit-learn/fig_code/helpers.py~ @@ -0,0 +1,75 @@ +""" +Small helpers for code that is not shown in the notebooks +""" + +from sklearn import neighbors, datasets, linear_model +import pylab as pl +import numpy as np +from matplotlib.colors import ListedColormap + +# Create color maps for 3-class classification problem, as with iris +cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) +cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) + +def plot_iris_knn(): + iris = datasets.load_iris() + X = iris.data[:, :2] # we only take the first two features. We could + # avoid this ugly slicing by using a two-dim dataset + y = iris.target + + knn = neighbors.KNeighborsClassifier(n_neighbors=3) + knn.fit(X, y) + + x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1 + y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1 + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) + + # Put the result into a color plot + Z = Z.reshape(xx.shape) + pl.figure() + pl.pcolormesh(xx, yy, Z, cmap=cmap_light) + + # Plot also the training points + pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) + pl.xlabel('sepal length (cm)') + pl.ylabel('sepal width (cm)') + pl.axis('tight') + + +def plot_polynomial_regression(): + rng = np.random.RandomState(0) + x = 2*rng.rand(100) - 1 + + f = lambda t: 1.2 * t**2 + .1 * t**3 - .4 * t **5 - .5 * t ** 9 + y = f(x) + .4 * rng.normal(size=100) + + x_test = np.linspace(-1, 1, 100) + + pl.figure() + pl.scatter(x, y, s=4) + + X = np.array([x**i for i in range(5)]).T + X_test = np.array([x_test**i for i in range(5)]).T + regr = linear_model.LinearRegression() + regr.fit(X, y) + pl.plot(x_test, regr.predict(X_test), label='4th order') + + X = np.array([x**i for i in range(10)]).T + X_test = np.array([x_test**i for i in range(10)]).T + regr = linear_model.LinearRegression() + regr.fit(X, y) + pl.plot(x_test, regr.predict(X_test), label='9th order') + + pl.legend(loc='best') + pl.axis('tight') + pl.title('Fitting a 4th and a 9th order polynomial') + + pl.figure() + pl.scatter(x, y, s=4) + pl.plot(x_test, f(x_test), label="truth") + pl.axis('tight') + pl.title('Ground truth (9th order polynomial)') + + diff --git a/scikit-learn/fig_code/linear_regression.py b/scikit-learn/fig_code/linear_regression.py new file mode 100644 index 0000000..1122b68 --- /dev/null +++ b/scikit-learn/fig_code/linear_regression.py @@ -0,0 +1,37 @@ +import numpy as np +import matplotlib.pyplot as plt +from sklearn.linear_model import LinearRegression + + +def plot_linear_regression(): + a = 0.5 + b = 1.0 + + # x from 0 to 10 + x = 30 * np.random.random(20) + + # y = a*x + b with noise + y = a * x + b + np.random.normal(size=x.shape) + + # create a linear regression classifier + clf = LinearRegression() + clf.fit(x[:, None], y) + + # predict y from the data + x_new = np.linspace(0, 30, 100) + y_new = clf.predict(x_new[:, None]) + + # plot the results + ax = plt.axes() + ax.scatter(x, y) + ax.plot(x_new, y_new) + + ax.set_xlabel('x') + ax.set_ylabel('y') + + ax.axis('tight') + + +if __name__ == '__main__': + plot_linear_regression() + plt.show() diff --git a/scikit-learn/fig_code/scikit-learn.ipynb b/scikit-learn/fig_code/scikit-learn.ipynb new file mode 100644 index 0000000..df3394a --- /dev/null +++ b/scikit-learn/fig_code/scikit-learn.ipynb @@ -0,0 +1,101 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:29899a15bea89b9d8275879798b23011cecabc0eff03dd41bb606324221e0bc3" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# scikit-learn" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%matplotlib inline\n", + "\n", + "# set seaborn plot defaults.\n", + "# This can be safely commented out\n", + "import seaborn; seaborn.set()" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# Import the example plot from the figures directory\n", + "from fig_code import plot_sgd_separator\n", + "plot_sgd_separator()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "No module named fig_code", + "output_type": "pyerr", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Import the example plot from the figures directory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mfig_code\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mplot_sgd_separator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mplot_sgd_separator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mImportError\u001b[0m: No module named fig_code" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/scikit-learn/fig_code/sgd_separator.py b/scikit-learn/fig_code/sgd_separator.py new file mode 100644 index 0000000..14ecb15 --- /dev/null +++ b/scikit-learn/fig_code/sgd_separator.py @@ -0,0 +1,40 @@ +import numpy as np +import matplotlib.pyplot as plt +from sklearn.linear_model import SGDClassifier +from sklearn.datasets.samples_generator import make_blobs + +def plot_sgd_separator(): + # we create 50 separable points + X, Y = make_blobs(n_samples=50, centers=2, + random_state=0, cluster_std=0.60) + + # fit the model + clf = SGDClassifier(loss="hinge", alpha=0.01, + n_iter=200, fit_intercept=True) + clf.fit(X, Y) + + # plot the line, the points, and the nearest vectors to the plane + xx = np.linspace(-1, 5, 10) + yy = np.linspace(-1, 5, 10) + + X1, X2 = np.meshgrid(xx, yy) + Z = np.empty(X1.shape) + for (i, j), val in np.ndenumerate(X1): + x1 = val + x2 = X2[i, j] + p = clf.decision_function([x1, x2]) + Z[i, j] = p[0] + levels = [-1.0, 0.0, 1.0] + linestyles = ['dashed', 'solid', 'dashed'] + colors = 'k' + + ax = plt.axes() + ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) + ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) + + ax.axis('tight') + + +if __name__ == '__main__': + plot_sgd_separator() + plt.show() diff --git a/scikit-learn/fig_code/svm_gui.py b/scikit-learn/fig_code/svm_gui.py new file mode 100644 index 0000000..d60efad --- /dev/null +++ b/scikit-learn/fig_code/svm_gui.py @@ -0,0 +1,341 @@ +""" +========== +Libsvm GUI +========== + +A simple graphical frontend for Libsvm mainly intended for didactic +purposes. You can create data points by point and click and visualize +the decision region induced by different kernels and parameter settings. + +To create positive examples click the left mouse button; to create +negative examples click the right button. + +If all examples are from the same class, it uses a one-class SVM. + +""" +from __future__ import division, print_function + +print(__doc__) + +# Author: Peter Prettenhoer +# +# License: BSD 3 clause + +import matplotlib +matplotlib.use('TkAgg') + +from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg +from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg +from matplotlib.figure import Figure +from matplotlib.contour import ContourSet + +import Tkinter as Tk +import sys +import numpy as np + +from sklearn import svm +from sklearn.datasets import dump_svmlight_file +from sklearn.externals.six.moves import xrange + +y_min, y_max = -50, 50 +x_min, x_max = -50, 50 + + +class Model(object): + """The Model which hold the data. It implements the + observable in the observer pattern and notifies the + registered observers on change event. + """ + + def __init__(self): + self.observers = [] + self.surface = None + self.data = [] + self.cls = None + self.surface_type = 0 + + def changed(self, event): + """Notify the observers. """ + for observer in self.observers: + observer.update(event, self) + + def add_observer(self, observer): + """Register an observer. """ + self.observers.append(observer) + + def set_surface(self, surface): + self.surface = surface + + def dump_svmlight_file(self, file): + data = np.array(self.data) + X = data[:, 0:2] + y = data[:, 2] + dump_svmlight_file(X, y, file) + + +class Controller(object): + def __init__(self, model): + self.model = model + self.kernel = Tk.IntVar() + self.surface_type = Tk.IntVar() + # Whether or not a model has been fitted + self.fitted = False + + def fit(self): + print("fit the model") + train = np.array(self.model.data) + X = train[:, 0:2] + y = train[:, 2] + + C = float(self.complexity.get()) + gamma = float(self.gamma.get()) + coef0 = float(self.coef0.get()) + degree = int(self.degree.get()) + kernel_map = {0: "linear", 1: "rbf", 2: "poly"} + if len(np.unique(y)) == 1: + clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()], + gamma=gamma, coef0=coef0, degree=degree) + clf.fit(X) + else: + clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C, + gamma=gamma, coef0=coef0, degree=degree) + clf.fit(X, y) + if hasattr(clf, 'score'): + print("Accuracy:", clf.score(X, y) * 100) + X1, X2, Z = self.decision_surface(clf) + self.model.clf = clf + self.model.set_surface((X1, X2, Z)) + self.model.surface_type = self.surface_type.get() + self.fitted = True + self.model.changed("surface") + + def decision_surface(self, cls): + delta = 1 + x = np.arange(x_min, x_max + delta, delta) + y = np.arange(y_min, y_max + delta, delta) + X1, X2 = np.meshgrid(x, y) + Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()]) + Z = Z.reshape(X1.shape) + return X1, X2, Z + + def clear_data(self): + self.model.data = [] + self.fitted = False + self.model.changed("clear") + + def add_example(self, x, y, label): + self.model.data.append((x, y, label)) + self.model.changed("example_added") + + # update decision surface if already fitted. + self.refit() + + def refit(self): + """Refit the model if already fitted. """ + if self.fitted: + self.fit() + + +class View(object): + """Test docstring. """ + def __init__(self, root, controller): + f = Figure() + ax = f.add_subplot(111) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_xlim((x_min, x_max)) + ax.set_ylim((y_min, y_max)) + canvas = FigureCanvasTkAgg(f, master=root) + canvas.show() + canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) + canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) + canvas.mpl_connect('key_press_event', self.onkeypress) + canvas.mpl_connect('key_release_event', self.onkeyrelease) + canvas.mpl_connect('button_press_event', self.onclick) + toolbar = NavigationToolbar2TkAgg(canvas, root) + toolbar.update() + self.shift_down = False + self.controllbar = ControllBar(root, controller) + self.f = f + self.ax = ax + self.canvas = canvas + self.controller = controller + self.contours = [] + self.c_labels = None + self.plot_kernels() + + def plot_kernels(self): + self.ax.text(-50, -60, "Linear: $u^T v$") + self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$") + self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$") + + def onkeypress(self, event): + if event.key == "shift": + self.shift_down = True + + def onkeyrelease(self, event): + if event.key == "shift": + self.shift_down = False + + def onclick(self, event): + if event.xdata and event.ydata: + if self.shift_down or event.button == 3: + self.controller.add_example(event.xdata, event.ydata, -1) + elif event.button == 1: + self.controller.add_example(event.xdata, event.ydata, 1) + + def update_example(self, model, idx): + x, y, l = model.data[idx] + if l == 1: + color = 'w' + elif l == -1: + color = 'k' + self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0) + + def update(self, event, model): + if event == "examples_loaded": + for i in xrange(len(model.data)): + self.update_example(model, i) + + if event == "example_added": + self.update_example(model, -1) + + if event == "clear": + self.ax.clear() + self.ax.set_xticks([]) + self.ax.set_yticks([]) + self.contours = [] + self.c_labels = None + self.plot_kernels() + + if event == "surface": + self.remove_surface() + self.plot_support_vectors(model.clf.support_vectors_) + self.plot_decision_surface(model.surface, model.surface_type) + + self.canvas.draw() + + def remove_surface(self): + """Remove old decision surface.""" + if len(self.contours) > 0: + for contour in self.contours: + if isinstance(contour, ContourSet): + for lineset in contour.collections: + lineset.remove() + else: + contour.remove() + self.contours = [] + + def plot_support_vectors(self, support_vectors): + """Plot the support vectors by placing circles over the + corresponding data points and adds the circle collection + to the contours list.""" + cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1], + s=80, edgecolors="k", facecolors="none") + self.contours.append(cs) + + def plot_decision_surface(self, surface, type): + X1, X2, Z = surface + if type == 0: + levels = [-1.0, 0.0, 1.0] + linestyles = ['dashed', 'solid', 'dashed'] + colors = 'k' + self.contours.append(self.ax.contour(X1, X2, Z, levels, + colors=colors, + linestyles=linestyles)) + elif type == 1: + self.contours.append(self.ax.contourf(X1, X2, Z, 10, + cmap=matplotlib.cm.bone, + origin='lower', alpha=0.85)) + self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k', + linestyles=['solid'])) + else: + raise ValueError("surface type unknown") + + +class ControllBar(object): + def __init__(self, root, controller): + fm = Tk.Frame(root) + kernel_group = Tk.Frame(fm) + Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel, + value=0, command=controller.refit).pack(anchor=Tk.W) + Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel, + value=1, command=controller.refit).pack(anchor=Tk.W) + Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel, + value=2, command=controller.refit).pack(anchor=Tk.W) + kernel_group.pack(side=Tk.LEFT) + + valbox = Tk.Frame(fm) + controller.complexity = Tk.StringVar() + controller.complexity.set("1.0") + c = Tk.Frame(valbox) + Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(c, width=6, textvariable=controller.complexity).pack( + side=Tk.LEFT) + c.pack() + + controller.gamma = Tk.StringVar() + controller.gamma.set("0.01") + g = Tk.Frame(valbox) + Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT) + g.pack() + + controller.degree = Tk.StringVar() + controller.degree.set("3") + d = Tk.Frame(valbox) + Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT) + d.pack() + + controller.coef0 = Tk.StringVar() + controller.coef0.set("0") + r = Tk.Frame(valbox) + Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT) + r.pack() + valbox.pack(side=Tk.LEFT) + + cmap_group = Tk.Frame(fm) + Tk.Radiobutton(cmap_group, text="Hyperplanes", + variable=controller.surface_type, value=0, + command=controller.refit).pack(anchor=Tk.W) + Tk.Radiobutton(cmap_group, text="Surface", + variable=controller.surface_type, value=1, + command=controller.refit).pack(anchor=Tk.W) + + cmap_group.pack(side=Tk.LEFT) + + train_button = Tk.Button(fm, text='Fit', width=5, + command=controller.fit) + train_button.pack() + fm.pack(side=Tk.LEFT) + Tk.Button(fm, text='Clear', width=5, + command=controller.clear_data).pack(side=Tk.LEFT) + + +def get_parser(): + from optparse import OptionParser + op = OptionParser() + op.add_option("--output", + action="store", type="str", dest="output", + help="Path where to dump data.") + return op + + +def main(argv): + op = get_parser() + opts, args = op.parse_args(argv[1:]) + root = Tk.Tk() + model = Model() + controller = Controller(model) + root.wm_title("Scikit-learn Libsvm GUI") + view = View(root, controller) + model.add_observer(view) + Tk.mainloop() + + if opts.output: + model.dump_svmlight_file(opts.output) + +if __name__ == "__main__": + main(sys.argv) diff --git a/scikit-learn/fig_code/svm_gui.py~ b/scikit-learn/fig_code/svm_gui.py~ new file mode 100644 index 0000000..3fcb480 --- /dev/null +++ b/scikit-learn/fig_code/svm_gui.py~ @@ -0,0 +1,331 @@ +""" +========== +Libsvm GUI +========== + +A simple graphical frontend for Libsvm mainly intended for didactic +purposes. You can create data points by point and click and visualize +the decision region induced by different kernels and parameter settings. + +To create positive examples click the left mouse button; to create +negative examples click the right button. + +If all examples are from the same class, it uses a one-class SVM. + +""" +from __future__ import division, print_function + +print(__doc__) + +# Author: Peter Prettenhoer +# +# License: BSD 3 clause + +import matplotlib +matplotlib.use('TkAgg') + +from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg +from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg +from matplotlib.figure import Figure +from matplotlib.contour import ContourSet + +import Tkinter as Tk +import sys +import numpy as np + +from sklearn import svm +from sklearn.datasets import dump_svmlight_file +from sklearn.externals.six.moves import xrange + +y_min, y_max = -50, 50 +x_min, x_max = -50, 50 + + +class Model(object): + """The Model which hold the data. It implements the + observable in the observer pattern and notifies the + registered observers on change event. + """ + + def __init__(self): + self.observers = [] + self.surface = None + self.data = [] + self.cls = None + self.surface_type = 0 + + def changed(self, event): + """Notify the observers. """ + for observer in self.observers: + observer.update(event, self) + + def add_observer(self, observer): + """Register an observer. """ + self.observers.append(observer) + + def set_surface(self, surface): + self.surface = surface + + def dump_svmlight_file(self, file): + data = np.array(self.data) + X = data[:, 0:2] + y = data[:, 2] + dump_svmlight_file(X, y, file) + + +class Controller(object): + def __init__(self, model): + self.model = model + self.kernel = Tk.IntVar() + self.surface_type = Tk.IntVar() + # Whether or not a model has been fitted + self.fitted = False + + def fit(self): + print("fit the model") + train = np.array(self.model.data) + X = train[:, 0:2] + y = train[:, 2] + + C = float(self.complexity.get()) + gamma = float(self.gamma.get()) + coef0 = float(self.coef0.get()) + degree = int(self.degree.get()) + kernel_map = {0: "linear", 1: "rbf", 2: "poly"} + if len(np.unique(y)) == 1: + clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()], + gamma=gamma, coef0=coef0, degree=degree) + clf.fit(X) + else: + clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C, + gamma=gamma, coef0=coef0, degree=degree) + clf.fit(X, y) + if hasattr(clf, 'score'): + print("Accuracy:", clf.score(X, y) * 100) + X1, X2, Z = self.decision_surface(clf) + self.model.clf = clf + self.model.set_surface((X1, X2, Z)) + self.model.surface_type = self.surface_type.get() + self.fitted = True + self.model.changed("surface") + + def decision_surface(self, cls): + delta = 1 + x = np.arange(x_min, x_max + delta, delta) + y = np.arange(y_min, y_max + delta, delta) + X1, X2 = np.meshgrid(x, y) + Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()]) + Z = Z.reshape(X1.shape) + return X1, X2, Z + + def clear_data(self): + self.model.data = [] + self.fitted = False + self.model.changed("clear") + + def add_example(self, x, y, label): + self.model.data.append((x, y, label)) + self.model.changed("example_added") + + # update decision surface if already fitted. + self.refit() + + def refit(self): + """Refit the model if already fitted. """ + if self.fitted: + self.fit() + + +class View(object): + """Test docstring. """ + def __init__(self, root, controller): + f = Figure() + ax = f.add_subplot(111) + ax.set_xticks([]) + ax.set_yticks([]) + ax.set_xlim((x_min, x_max)) + ax.set_ylim((y_min, y_max)) + canvas = FigureCanvasTkAgg(f, master=root) + canvas.show() + canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) + canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) + canvas.mpl_connect('button_press_event', self.onclick) + toolbar = NavigationToolbar2TkAgg(canvas, root) + toolbar.update() + self.controllbar = ControllBar(root, controller) + self.f = f + self.ax = ax + self.canvas = canvas + self.controller = controller + self.contours = [] + self.c_labels = None + self.plot_kernels() + + def plot_kernels(self): + self.ax.text(-50, -60, "Linear: $u^T v$") + self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$") + self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$") + + def onclick(self, event): + print(event.button) + if event.xdata and event.ydata: + if event.button == 1: + self.controller.add_example(event.xdata, event.ydata, 1) + elif event.button == 3: + self.controller.add_example(event.xdata, event.ydata, -1) + + def update_example(self, model, idx): + x, y, l = model.data[idx] + if l == 1: + color = 'w' + elif l == -1: + color = 'k' + self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0) + + def update(self, event, model): + if event == "examples_loaded": + for i in xrange(len(model.data)): + self.update_example(model, i) + + if event == "example_added": + self.update_example(model, -1) + + if event == "clear": + self.ax.clear() + self.ax.set_xticks([]) + self.ax.set_yticks([]) + self.contours = [] + self.c_labels = None + self.plot_kernels() + + if event == "surface": + self.remove_surface() + self.plot_support_vectors(model.clf.support_vectors_) + self.plot_decision_surface(model.surface, model.surface_type) + + self.canvas.draw() + + def remove_surface(self): + """Remove old decision surface.""" + if len(self.contours) > 0: + for contour in self.contours: + if isinstance(contour, ContourSet): + for lineset in contour.collections: + lineset.remove() + else: + contour.remove() + self.contours = [] + + def plot_support_vectors(self, support_vectors): + """Plot the support vectors by placing circles over the + corresponding data points and adds the circle collection + to the contours list.""" + cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1], + s=80, edgecolors="k", facecolors="none") + self.contours.append(cs) + + def plot_decision_surface(self, surface, type): + X1, X2, Z = surface + if type == 0: + levels = [-1.0, 0.0, 1.0] + linestyles = ['dashed', 'solid', 'dashed'] + colors = 'k' + self.contours.append(self.ax.contour(X1, X2, Z, levels, + colors=colors, + linestyles=linestyles)) + elif type == 1: + self.contours.append(self.ax.contourf(X1, X2, Z, 10, + cmap=matplotlib.cm.bone, + origin='lower', alpha=0.85)) + self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k', + linestyles=['solid'])) + else: + raise ValueError("surface type unknown") + + +class ControllBar(object): + def __init__(self, root, controller): + fm = Tk.Frame(root) + kernel_group = Tk.Frame(fm) + Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel, + value=0, command=controller.refit).pack(anchor=Tk.W) + Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel, + value=1, command=controller.refit).pack(anchor=Tk.W) + Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel, + value=2, command=controller.refit).pack(anchor=Tk.W) + kernel_group.pack(side=Tk.LEFT) + + valbox = Tk.Frame(fm) + controller.complexity = Tk.StringVar() + controller.complexity.set("1.0") + c = Tk.Frame(valbox) + Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(c, width=6, textvariable=controller.complexity).pack( + side=Tk.LEFT) + c.pack() + + controller.gamma = Tk.StringVar() + controller.gamma.set("0.01") + g = Tk.Frame(valbox) + Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT) + g.pack() + + controller.degree = Tk.StringVar() + controller.degree.set("3") + d = Tk.Frame(valbox) + Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT) + d.pack() + + controller.coef0 = Tk.StringVar() + controller.coef0.set("0") + r = Tk.Frame(valbox) + Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT) + Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT) + r.pack() + valbox.pack(side=Tk.LEFT) + + cmap_group = Tk.Frame(fm) + Tk.Radiobutton(cmap_group, text="Hyperplanes", + variable=controller.surface_type, value=0, + command=controller.refit).pack(anchor=Tk.W) + Tk.Radiobutton(cmap_group, text="Surface", + variable=controller.surface_type, value=1, + command=controller.refit).pack(anchor=Tk.W) + + cmap_group.pack(side=Tk.LEFT) + + train_button = Tk.Button(fm, text='Fit', width=5, + command=controller.fit) + train_button.pack() + fm.pack(side=Tk.LEFT) + Tk.Button(fm, text='Clear', width=5, + command=controller.clear_data).pack(side=Tk.LEFT) + + +def get_parser(): + from optparse import OptionParser + op = OptionParser() + op.add_option("--output", + action="store", type="str", dest="output", + help="Path where to dump data.") + return op + + +def main(argv): + op = get_parser() + opts, args = op.parse_args(argv[1:]) + root = Tk.Tk() + model = Model() + controller = Controller(model) + root.wm_title("Scikit-learn Libsvm GUI") + view = View(root, controller) + model.add_observer(view) + Tk.mainloop() + + if opts.output: + model.dump_svmlight_file(opts.output) + +if __name__ == "__main__": + main(sys.argv)