mirror of
https://github.com/donnemartin/data-science-ipython-notebooks.git
synced 2024-03-22 13:30:56 +08:00
1019 lines
21 KiB
Python
1019 lines
21 KiB
Python
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Credits: Forked from [deep-learning-keras-tensorflow](https://github.com/leriomaggio/deep-learning-keras-tensorflow) by Valerio Maggio"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Theano \n",
|
|||
|
"===\n",
|
|||
|
"A language in a language"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Dealing with weights matrices and gradients can be tricky and sometimes not trivial.\n",
|
|||
|
"Theano is a great framework for handling vectors, matrices and high dimensional tensor algebra. \n",
|
|||
|
"Most of this tutorial will refer to Theano however TensorFlow is another great framework capable of providing an incredible abstraction for complex algebra.\n",
|
|||
|
"More on TensorFlow in the next chapters."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import theano\n",
|
|||
|
"import theano.tensor as T"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Symbolic variables\n",
|
|||
|
"=========="
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Theano has it's own variables and functions, defined the following"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x = T.scalar()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Variables can be used in expressions"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"y = 3*(x**2) + 1"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"y is an expression now "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Result is symbolic as well"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Shape.0"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"type(y)\n",
|
|||
|
"y.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#####printing"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"As we are about to see, normal printing isn't the best when it comes to theano"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Elemwise{add,no_inplace}.0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(y)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'((TensorConstant{3} * (<TensorType(float32, scalar)> ** TensorConstant{2})) + TensorConstant{1})'"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"theano.pprint(y)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Elemwise{add,no_inplace} [@A] '' \n",
|
|||
|
" |Elemwise{mul,no_inplace} [@B] '' \n",
|
|||
|
" | |TensorConstant{3} [@C]\n",
|
|||
|
" | |Elemwise{pow,no_inplace} [@D] '' \n",
|
|||
|
" | |<TensorType(float32, scalar)> [@E]\n",
|
|||
|
" | |TensorConstant{2} [@F]\n",
|
|||
|
" |TensorConstant{1} [@G]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"theano.printing.debugprint(y)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Evaluating expressions\n",
|
|||
|
"============\n",
|
|||
|
"\n",
|
|||
|
"Supply a `dict` mapping variables to values"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(13.0, dtype=float32)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"y.eval({x: 2})"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Or compile a function"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"f = theano.function([x], y)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(13.0, dtype=float32)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"f(2)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Other tensor types\n",
|
|||
|
"=========="
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"X = T.vector()\n",
|
|||
|
"X = T.matrix()\n",
|
|||
|
"X = T.tensor3()\n",
|
|||
|
"X = T.tensor4()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Automatic differention\n",
|
|||
|
"============\n",
|
|||
|
"- Gradients are free!"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x = T.scalar()\n",
|
|||
|
"y = T.log(x)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Elemwise{true_div}.0\n",
|
|||
|
"0.5\n",
|
|||
|
"Elemwise{mul,no_inplace}.0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"gradient = T.grad(y, x)\n",
|
|||
|
"print gradient\n",
|
|||
|
"print gradient.eval({x: 2})\n",
|
|||
|
"print (2 * gradient)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Shared Variables\n",
|
|||
|
"\n",
|
|||
|
"- Symbolic + Storage"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"x = theano.shared(np.zeros((2, 3), dtype=theano.config.floatX))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<CudaNdarrayType(float32, matrix)>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"x"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"We can get and set the variable's value"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"(2, 3)\n",
|
|||
|
"[[ 0. 0. 0.]\n",
|
|||
|
" [ 0. 0. 0.]]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"values = x.get_value()\n",
|
|||
|
"print(values.shape)\n",
|
|||
|
"print(values)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"x.set_value(values)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Shared variables can be used in expressions as well"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Elemwise{pow,no_inplace}.0"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"(x + 2) ** 2"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Their value is used as input when evaluating"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array([[ 4., 4., 4.],\n",
|
|||
|
" [ 4., 4., 4.]], dtype=float32)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"((x + 2) ** 2).eval()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array([[ 4., 4., 4.],\n",
|
|||
|
" [ 4., 4., 4.]], dtype=float32)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"theano.function([], (x + 2) ** 2)()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Updates\n",
|
|||
|
"\n",
|
|||
|
"- Store results of function evalution\n",
|
|||
|
"- `dict` mapping shared variables to new values"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 46,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"count = theano.shared(0)\n",
|
|||
|
"new_count = count + 1\n",
|
|||
|
"updates = {count: new_count}\n",
|
|||
|
"\n",
|
|||
|
"f = theano.function([], count, updates=updates)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 47,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(0)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 47,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"f()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 48,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(1)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 48,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"f()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 49,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"array(2)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 49,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"f()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Warming up! Logistic Regression"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"%matplotlib inline"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Using Theano backend.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import theano\n",
|
|||
|
"import theano.tensor as T\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.preprocessing import LabelEncoder \n",
|
|||
|
"from keras.utils import np_utils"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"For this section we will use the Kaggle otto challenge.\n",
|
|||
|
"If you want to follow, Get the data from Kaggle: https://www.kaggle.com/c/otto-group-product-classification-challenge/data"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### About the data"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"The Otto Group is one of the world’s biggest e-commerce companies, A consistent analysis of the performance of products is crucial. However, due to diverse global infrastructure, many identical products get classified differently.\n",
|
|||
|
"For this competition, we have provided a dataset with 93 features for more than 200,000 products. The objective is to build a predictive model which is able to distinguish between our main product categories. \n",
|
|||
|
"Each row corresponds to a single product. There are a total of 93 numerical features, which represent counts of different events. All features have been obfuscated and will not be defined any further.\n",
|
|||
|
"\n",
|
|||
|
"https://www.kaggle.com/c/otto-group-product-classification-challenge/data"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def load_data(path, train=True):\n",
|
|||
|
" \"\"\"Load data from a CSV File\n",
|
|||
|
" \n",
|
|||
|
" Parameters\n",
|
|||
|
" ----------\n",
|
|||
|
" path: str\n",
|
|||
|
" The path to the CSV file\n",
|
|||
|
" \n",
|
|||
|
" train: bool (default True)\n",
|
|||
|
" Decide whether or not data are *training data*.\n",
|
|||
|
" If True, some random shuffling is applied.\n",
|
|||
|
" \n",
|
|||
|
" Return\n",
|
|||
|
" ------\n",
|
|||
|
" X: numpy.ndarray \n",
|
|||
|
" The data as a multi dimensional array of floats\n",
|
|||
|
" ids: numpy.ndarray\n",
|
|||
|
" A vector of ids for each sample\n",
|
|||
|
" \"\"\"\n",
|
|||
|
" df = pd.read_csv(path)\n",
|
|||
|
" X = df.values.copy()\n",
|
|||
|
" if train:\n",
|
|||
|
" np.random.shuffle(X) # https://youtu.be/uyUXoap67N8\n",
|
|||
|
" X, labels = X[:, 1:-1].astype(np.float32), X[:, -1]\n",
|
|||
|
" return X, labels\n",
|
|||
|
" else:\n",
|
|||
|
" X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)\n",
|
|||
|
" return X, ids"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def preprocess_data(X, scaler=None):\n",
|
|||
|
" \"\"\"Preprocess input data by standardise features \n",
|
|||
|
" by removing the mean and scaling to unit variance\"\"\"\n",
|
|||
|
" if not scaler:\n",
|
|||
|
" scaler = StandardScaler()\n",
|
|||
|
" scaler.fit(X)\n",
|
|||
|
" X = scaler.transform(X)\n",
|
|||
|
" return X, scaler\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def preprocess_labels(labels, encoder=None, categorical=True):\n",
|
|||
|
" \"\"\"Encode labels with values among 0 and `n-classes-1`\"\"\"\n",
|
|||
|
" if not encoder:\n",
|
|||
|
" encoder = LabelEncoder()\n",
|
|||
|
" encoder.fit(labels)\n",
|
|||
|
" y = encoder.transform(labels).astype(np.int32)\n",
|
|||
|
" if categorical:\n",
|
|||
|
" y = np_utils.to_categorical(y)\n",
|
|||
|
" return y, encoder"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Loading data...\n",
|
|||
|
"[[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 3. 0. 0. 0. 3.\n",
|
|||
|
" 2. 1. 0. 0. 0. 0. 0. 0. 0. 5. 3. 1. 1. 0.\n",
|
|||
|
" 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0.\n",
|
|||
|
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
|
|||
|
" 0. 0. 0. 0. 0. 0. 0. 3. 0. 0. 0. 0. 1. 1.\n",
|
|||
|
" 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n",
|
|||
|
" 0. 11. 1. 20. 0. 0. 0. 0. 0.]]\n",
|
|||
|
"(9L, 'classes')\n",
|
|||
|
"(93L, 'dims')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(\"Loading data...\")\n",
|
|||
|
"X, labels = load_data('train.csv', train=True)\n",
|
|||
|
"X, scaler = preprocess_data(X)\n",
|
|||
|
"Y, encoder = preprocess_labels(labels)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"X_test, ids = load_data('test.csv', train=False)\n",
|
|||
|
"X_test, ids = X_test[:1000], ids[:1000]\n",
|
|||
|
"\n",
|
|||
|
"#Plotting the data\n",
|
|||
|
"print(X_test[:1])\n",
|
|||
|
"\n",
|
|||
|
"X_test, _ = preprocess_data(X_test, scaler)\n",
|
|||
|
"\n",
|
|||
|
"nb_classes = Y.shape[1]\n",
|
|||
|
"print(nb_classes, 'classes')\n",
|
|||
|
"\n",
|
|||
|
"dims = X.shape[1]\n",
|
|||
|
"print(dims, 'dims')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Now lets create and train a logistic regression model."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Hands On - Logistic Regression"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 46,
|
|||
|
"metadata": {
|
|||
|
"collapsed": false
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Epoch 1\n",
|
|||
|
"target values for Data:\n",
|
|||
|
"[ 0. 0. 1. ..., 0. 0. 0.]\n",
|
|||
|
"prediction on training set:\n",
|
|||
|
"[0 0 0 ..., 0 0 0]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#Based on example from DeepLearning.net\n",
|
|||
|
"rng = np.random\n",
|
|||
|
"N = 400\n",
|
|||
|
"feats = 93\n",
|
|||
|
"training_steps = 1\n",
|
|||
|
"\n",
|
|||
|
"# Declare Theano symbolic variables\n",
|
|||
|
"x = T.matrix(\"x\")\n",
|
|||
|
"y = T.vector(\"y\")\n",
|
|||
|
"w = theano.shared(rng.randn(feats), name=\"w\")\n",
|
|||
|
"b = theano.shared(0., name=\"b\")\n",
|
|||
|
"\n",
|
|||
|
"# Construct Theano expression graph\n",
|
|||
|
"p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1\n",
|
|||
|
"prediction = p_1 > 0.5 # The prediction thresholded\n",
|
|||
|
"xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function\n",
|
|||
|
"cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize\n",
|
|||
|
"gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost\n",
|
|||
|
" # (we shall return to this in a\n",
|
|||
|
" # following section of this tutorial)\n",
|
|||
|
"\n",
|
|||
|
"# Compile\n",
|
|||
|
"train = theano.function(\n",
|
|||
|
" inputs=[x,y],\n",
|
|||
|
" outputs=[prediction, xent],\n",
|
|||
|
" updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)),\n",
|
|||
|
" allow_input_downcast=True)\n",
|
|||
|
"predict = theano.function(inputs=[x], outputs=prediction, allow_input_downcast=True)\n",
|
|||
|
"\n",
|
|||
|
"#Transform for class1\n",
|
|||
|
"y_class1 = []\n",
|
|||
|
"for i in Y:\n",
|
|||
|
" y_class1.append(i[0])\n",
|
|||
|
"y_class1 = np.array(y_class1)\n",
|
|||
|
"\n",
|
|||
|
"# Train\n",
|
|||
|
"for i in range(training_steps):\n",
|
|||
|
" print('Epoch %s' % (i+1,))\n",
|
|||
|
" pred, err = train(X, y_class1)\n",
|
|||
|
"\n",
|
|||
|
"print(\"target values for Data:\")\n",
|
|||
|
"print(y_class1)\n",
|
|||
|
"print(\"prediction on training set:\")\n",
|
|||
|
"print(predict(X))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {
|
|||
|
"collapsed": true
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.4.3"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 0
|
|||
|
}
|