{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Credits: Forked from [deep-learning-keras-tensorflow](https://github.com/leriomaggio/deep-learning-keras-tensorflow) by Valerio Maggio" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# A simple implementation of ANN for MNIST\n", "\n", "This code was taken from: https://github.com/mnielsen/neural-networks-and-deep-learning\n", "\n", "This accompanies the online text http://neuralnetworksanddeeplearning.com/ . The book is highly recommended. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using Theano backend.\n", "Using gpu device 0: GeForce GTX 760 (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 4007)\n" ] } ], "source": [ "# Import libraries\n", "import random\n", "import numpy as np\n", "import keras\n", "from keras.datasets import mnist" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Set the full path to mnist.pkl.gz\n", "# Point this to the data folder inside the repository\n", "path_to_dataset = \"euroscipy2016_dl-tutorial/data/mnist.pkl.gz\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "!mkdir -p $HOME/.keras/datasets/euroscipy2016_dl-tutorial/data/" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading data from https://s3.amazonaws.com/img-datasets/mnist.pkl.gz\n", "15286272/15296311 [============================>.] - ETA: 0s" ] } ], "source": [ "# Load the datasets\n", "(X_train, y_train), (X_test, y_test) = mnist.load_data(path_to_dataset)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(60000, 28, 28) (60000,)\n", "(10000, 28, 28) (10000,)\n" ] } ], "source": [ "print(X_train.shape, y_train.shape)\n", "print(X_test.shape, y_test.shape)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\"\"\"\n", "network.py\n", "~~~~~~~~~~\n", "A module to implement the stochastic gradient descent learning\n", "algorithm for a feedforward neural network. Gradients are calculated\n", "using backpropagation. Note that I have focused on making the code\n", "simple, easily readable, and easily modifiable. It is not optimized,\n", "and omits many desirable features.\n", "\"\"\"\n", "\n", "#### Libraries\n", "# Standard library\n", "import random\n", "\n", "# Third-party libraries\n", "import numpy as np\n", "\n", "class Network(object):\n", "\n", " def __init__(self, sizes):\n", " \"\"\"The list ``sizes`` contains the number of neurons in the\n", " respective layers of the network. For example, if the list\n", " was [2, 3, 1] then it would be a three-layer network, with the\n", " first layer containing 2 neurons, the second layer 3 neurons,\n", " and the third layer 1 neuron. The biases and weights for the\n", " network are initialized randomly, using a Gaussian\n", " distribution with mean 0, and variance 1. Note that the first\n", " layer is assumed to be an input layer, and by convention we\n", " won't set any biases for those neurons, since biases are only\n", " ever used in computing the outputs from later layers.\"\"\"\n", " self.num_layers = len(sizes)\n", " self.sizes = sizes\n", " self.biases = [np.random.randn(y, 1) for y in sizes[1:]]\n", " self.weights = [np.random.randn(y, x)\n", " for x, y in zip(sizes[:-1], sizes[1:])]\n", "\n", " def feedforward(self, a):\n", " \"\"\"Return the output of the network if ``a`` is input.\"\"\"\n", " for b, w in zip(self.biases, self.weights):\n", " a = sigmoid(np.dot(w, a)+b)\n", " return a\n", "\n", " def SGD(self, training_data, epochs, mini_batch_size, eta,\n", " test_data=None):\n", " \"\"\"Train the neural network using mini-batch stochastic\n", " gradient descent. The ``training_data`` is a list of tuples\n", " ``(x, y)`` representing the training inputs and the desired\n", " outputs. The other non-optional parameters are\n", " self-explanatory. If ``test_data`` is provided then the\n", " network will be evaluated against the test data after each\n", " epoch, and partial progress printed out. This is useful for\n", " tracking progress, but slows things down substantially.\"\"\"\n", " training_data = list(training_data)\n", " test_data = list(test_data)\n", " if test_data: n_test = len(test_data)\n", " n = len(training_data)\n", " for j in range(epochs):\n", " random.shuffle(training_data)\n", " mini_batches = [\n", " training_data[k:k+mini_batch_size]\n", " for k in range(0, n, mini_batch_size)]\n", " for mini_batch in mini_batches:\n", " self.update_mini_batch(mini_batch, eta)\n", " if test_data:\n", " print( \"Epoch {0}: {1} / {2}\".format(\n", " j, self.evaluate(test_data), n_test))\n", " else:\n", " print( \"Epoch {0} complete\".format(j))\n", "\n", " def update_mini_batch(self, mini_batch, eta):\n", " \"\"\"Update the network's weights and biases by applying\n", " gradient descent using backpropagation to a single mini batch.\n", " The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``\n", " is the learning rate.\"\"\"\n", " nabla_b = [np.zeros(b.shape) for b in self.biases]\n", " nabla_w = [np.zeros(w.shape) for w in self.weights]\n", " for x, y in mini_batch:\n", " delta_nabla_b, delta_nabla_w = self.backprop(x, y)\n", " nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n", " nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n", " self.weights = [w-(eta/len(mini_batch))*nw\n", " for w, nw in zip(self.weights, nabla_w)]\n", " self.biases = [b-(eta/len(mini_batch))*nb\n", " for b, nb in zip(self.biases, nabla_b)]\n", "\n", " def backprop(self, x, y):\n", " \"\"\"Return a tuple ``(nabla_b, nabla_w)`` representing the\n", " gradient for the cost function C_x. ``nabla_b`` and\n", " ``nabla_w`` are layer-by-layer lists of numpy arrays, similar\n", " to ``self.biases`` and ``self.weights``.\"\"\"\n", " nabla_b = [np.zeros(b.shape) for b in self.biases]\n", " nabla_w = [np.zeros(w.shape) for w in self.weights]\n", " # feedforward\n", " activation = x\n", " activations = [x] # list to store all the activations, layer by layer\n", " zs = [] # list to store all the z vectors, layer by layer\n", " for b, w in zip(self.biases, self.weights):\n", " z = np.dot(w, activation)+b\n", " zs.append(z)\n", " activation = sigmoid(z)\n", " activations.append(activation)\n", " # backward pass\n", " delta = self.cost_derivative(activations[-1], y) * \\\n", " sigmoid_prime(zs[-1])\n", " nabla_b[-1] = delta\n", " nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n", " # Note that the variable l in the loop below is used a little\n", " # differently to the notation in Chapter 2 of the book. Here,\n", " # l = 1 means the last layer of neurons, l = 2 is the\n", " # second-last layer, and so on. It's a renumbering of the\n", " # scheme in the book, used here to take advantage of the fact\n", " # that Python can use negative indices in lists.\n", " for l in range(2, self.num_layers):\n", " z = zs[-l]\n", " sp = sigmoid_prime(z)\n", " delta = np.dot(self.weights[-l+1].transpose(), delta) * sp\n", " nabla_b[-l] = delta\n", " nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())\n", " return (nabla_b, nabla_w)\n", "\n", " def evaluate(self, test_data):\n", " \"\"\"Return the number of test inputs for which the neural\n", " network outputs the correct result. Note that the neural\n", " network's output is assumed to be the index of whichever\n", " neuron in the final layer has the highest activation.\"\"\"\n", " test_results = [(np.argmax(self.feedforward(x)), y)\n", " for (x, y) in test_data]\n", " return sum(int(x == y) for (x, y) in test_results)\n", "\n", " def cost_derivative(self, output_activations, y):\n", " \"\"\"Return the vector of partial derivatives \\partial C_x /\n", " \\partial a for the output activations.\"\"\"\n", " return (output_activations-y)\n", "\n", "#### Miscellaneous functions\n", "def sigmoid(z):\n", " \"\"\"The sigmoid function.\"\"\"\n", " return 1.0/(1.0+np.exp(-z))\n", "\n", "def sigmoid_prime(z):\n", " \"\"\"Derivative of the sigmoid function.\"\"\"\n", " return sigmoid(z)*(1-sigmoid(z))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def vectorized_result(j):\n", " \"\"\"Return a 10-dimensional unit vector with a 1.0 in the jth\n", " position and zeroes elsewhere. This is used to convert a digit\n", " (0...9) into a corresponding desired output from the neural\n", " network.\"\"\"\n", " e = np.zeros((10, 1))\n", " e[j] = 1.0\n", " return e" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "net = Network([784, 30, 10])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "training_inputs = [np.reshape(x, (784, 1)) for x in X_train.copy()]\n", "training_results = [vectorized_result(y) for y in y_train.copy()]\n", "training_data = zip(training_inputs, training_results)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "test_inputs = [np.reshape(x, (784, 1)) for x in X_test.copy()]\n", "test_data = zip(test_inputs, y_test.copy())" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: 1348 / 10000\n", "Epoch 1: 1939 / 10000\n", "Epoch 2: 2046 / 10000\n", "Epoch 3: 1422 / 10000\n", "Epoch 4: 1365 / 10000\n", "Epoch 5: 1351 / 10000\n", "Epoch 6: 1879 / 10000\n", "Epoch 7: 1806 / 10000\n", "Epoch 8: 1754 / 10000\n", "Epoch 9: 1974 / 10000\n" ] } ], "source": [ "net.SGD(training_data, 10, 10, 3.0, test_data=test_data)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 0: 3526 / 10000\n", "Epoch 1: 3062 / 10000\n", "Epoch 2: 2946 / 10000\n", "Epoch 3: 2462 / 10000\n", "Epoch 4: 3617 / 10000\n", "Epoch 5: 3773 / 10000\n", "Epoch 6: 3568 / 10000\n", "Epoch 7: 4459 / 10000\n", "Epoch 8: 3009 / 10000\n", "Epoch 9: 2660 / 10000\n" ] } ], "source": [ "net = Network([784, 10, 10])\n", "\n", "training_inputs = [np.reshape(x, (784, 1)) for x in X_train.copy()]\n", "training_results = [vectorized_result(y) for y in y_train.copy()]\n", "training_data = zip(training_inputs, training_results)\n", "\n", "test_inputs = [np.reshape(x, (784, 1)) for x in X_test.copy()]\n", "test_data = zip(test_inputs, y_test.copy())\n", "\n", "net.SGD(training_data, 10, 10, 1.0, test_data=test_data)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }