data-science-ipython-notebooks/scikit-learn/scikit-learn-linear-reg.ipynb

136 lines
24 KiB
Python
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# scikit-learn-linear-reg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2015-05-31 09:46:22 -04:00
"Credits: Forked from [PyCon 2015 Scikit-learn Tutorial](https://github.com/jakevdp/sklearn_pycon2015) by Jake VanderPlas\n",
"\n",
"* Linear Regression"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn; \n",
"from sklearn.linear_model import LinearRegression\n",
"import pylab as pl\n",
"\n",
"seaborn.set()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Linear Regression\n",
"\n",
"Linear Regression is a supervised learning algorithm that models the relationship between a scalar dependent variable y and one or more explanatory variables (or independent variable) denoted X.\n",
"\n",
"Generate some data:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAFVCAYAAABB6Y7YAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFLVJREFUeJzt3W2MZNl5F/B/r3fda5zyumQqHoLDNCTZw4ioTQuD4x3H\n8SovQGQD9liiCcSxSUIsD8wqTJNgJ9kPJFYsecZSNh4Sxy8xfICRyDpvSgAJx3HIgIyAMRuYzUGb\nqFcoykJjNfZgrSext/jQPePxzna9NH1PdVX9ftJIc6ur6z46avW/n3PPPXdlOBwGAGjjrlkXAADL\nRPACQEOCFwAaErwA0JDgBYCGBC8ANHT3qC+WUu5K8sEk9yd5Jsn31lpri8IAYBGN63i/LckLa62v\nTvKPkryr+5IAYHGNC96nk9xXSllJcl+SP+i+JABYXCOnmpNcSXJvkt9O8pIkr++8IgBYYCujtows\npbwze1PNP1RKeVmSX0vy9bXW5+x8h8PhcGVlpZtKAeB4mir4xnW8L0zy2f3/7ya5J8nzDjzzykp2\ndq5Pc36mNBj0jHEDxrl7xrh7xriNwaA31fvHBe97kvxsKeXfZi9031FrffqQtQHA0hsZvLXW/5Pk\nDY1qAYCFZwMNAGhI8AJAQ4IXABoSvADQkOAFgIYELwA0JHgBoCHBCwANCV4AaEjwAkBDghcAGhK8\nANCQ4AWAhgQvADQkeAGgIcELAA0JXgBoSPACQEOCFwAaErwA0JDgBYCGBC8ANCR4AaAhwQsADQle\nAGjo7lkXAACtXbh8NY9v7yZJTq31s7W50ezcOl4AlsqFy1dzbXs3wyTDJNe2d3P+0pU8+dT1JucX\nvAAslZud7u12r9/II48+1uT8ghcAGhK8ACyVU2v9O17r91Zz7sx6k/MLXgCWytbmRvq91VvH/d5q\nLp49nZMnek3OP3ZVcynlu5K8Zf/wBUlenuSltdbPdlgXAHTm3Jn1W9d0W3W6N60Mh8OJ31xKeV+S\nT9VaP3jAW4Y7O21WhS2rwaAXY9w949w9Y9w9Y9zGYNBbmeb9E081l1JekeTPjAhdAGCMiTveUspH\nk/xErfUTI942efsMAIthqo53op2rSikvTnL/mNBNEtMaHTN11IZx7p4x7p4xbmMwmG5R1qRTza9J\n8rGpqwEAvsykwXt/kt/pshAAWAYTTTXXWi90XQgALAMbaABAQ4IXABoSvADQkOAFgIYELwA0JHgB\noCHBCwANCV4AaEjwAkBDghcAGhK8ANCQ4AWAhgQvADQkeAGgIcELAA0JXgBoSPACQEOCFwAaErwA\n0JDgBYCGBC8ANHT3rAsAgGlcuHw1j2/vJklOrfWztbkx44qmo+MFYG5cuHw117Z3M0wyTHJtezfn\nL13Jk09dn3VpExO8AMyNm53u7Xav38gjjz42g2oOR/ACQEOCF4C5cWqtf8dr/d5qzp1Zn0E1hyN4\nAZgbW5sb6fdWbx33e6u5ePZ0Tp7ozbCq6QheAObKuTPr6fdW567TvcntRADMlZMnerl49vSsyzg0\nHS8ANDS24y2lvCPJ65Pck+R9tdZ/0nlVALCgRna8pZTXJnlVrfWBJK9N8qca1AQAC2tcx/ttSX6r\nlPILSV6U5B90XxIALK5xwTtI8tVJXpe9bveXkvzprosCgEW1MhwOD/xiKeXHk+zUWt+7f/ypJN9S\na/3fB3zLwR8GAItpZZo3j+t4fzPJQ0neW0r5qiQvTPLpUd+wszM/G1XPo8GgZ4wbMM7dM8bdM8Zt\nDAbTbd4xcnFVrfVXklwtpfyH7E0zv73WqqsFgEMaeztRrfUHWxQCAMvABhoA0JDgBYCGBC8ANCR4\nAaAhwQsADQleAGhI8AJAQ4IXABoSvADQkOAFgIYELwA0JHgBoCHBCwANCV4AaEjwAkBDghcAGhK8\nANCQ4AWAhgQvADQkeAGgIcELAA0JXgBo6O5ZFwDMpwuXr+bx7d0kyam1frY2N2ZcEcwHHS8wtQuX\nr+ba9m6GSYZJrm3v5vylK3nyqeuzLg2OPcELTO1mp3u73es38sijj82gGpgvghcAGnKNF7jDuOu3\np9b6ufasrrffW825M+vNaoR5peMFvswk12+3NjfS763eOu73VnPx7OmcPNFrXzDMGcELfJlJr9+e\nO7Oefm9VpwtTMtUMHMrJE71cPHt61mXA3BkbvKWU/5zkM/uHv1tr/e5uSwJmyfVb6NbI4C2l3Jsk\ntdYH25QDzNrW5kbOX7qS3es3knzp+i1wNMZd4315kj9SSvnXpZSPlVJe2aIoYLZcv4XujJtq/lyS\n99RaP1RK+bok/7KUcn+t9ZkGtQEz4votdGdlOBwe+MVSyvOT3FVr/fz+8SeTvLHW+nsHfMvBHwYA\ni2llmjeP63jfmmQ9ydlSylcleVGS3x/1DTs79mrt0mDQM8YNGOfuGePuGeM2BoPp7l8fF7wfSvKz\npZTf2D9+q2lmADi8kcFba/1Cku9sVAsALDw7VwFAQ4IXABqyZSQAR2rc062WnY4XgCMzydOtlp3g\nBeDITPp0q2UmeAGgIcELwJE5tda/4zV7fn85wQvAkdna3Ei/t3rr+ObTrU6emG53p0UmeAE4Up5u\nNZrbiYC55/aV48XTrUbT8QJzze0rzBvBC8w1t68wbwQvADQkeIG55vYV5o3gBeaa21eYN1Y1A3Pv\n3Jn1W9d0l7XTtbJ7fqwMh8Oj/Lzhzo6VhF0aDHoxxt0zzt0zxkfn5sru2/V7q3n4e74h960+b0ZV\nLY/BoLcyzftNNQPMuYNWdv/Yhz85g2oYR/ACQEOCF2DOHbSy+4f/9itnUA3jCF6AOXfQyu6vfdmL\nZ1gVBxG8AAvAgwnmh9uJABaABxPMDx0vADQkeAGgIVPNcMTsIASMouOFI+TZsMA4Ol44QqOeDWvh\ny5dcuHw1jz+5mwzNCrB8dLxAU7dmBYZmBVhOgheOkGfDjjdqVgCWgeCFI+TZsMA4EwVvKeUrSyn/\no5Ryf9cFwbyzg9BoZgVYdmMXV5VS7kny/iSf674cmH92EBpta3Mj5y9dye71G0m+NCvAZNyuNv8m\n6Xjfk+Snkvx+x7UAS+LcmfW85L57dbpTcrvaYlgZDocHfrGU8pYkf7zW+q5SyseTvK3WWkd83sEf\nBrBgfuSn/13+yxM7SZKXf+0gP/q2Bzo931/Z+sU816/sl9x3bz7y8F/s9NxHrfXYdWxlqjePCd5P\nJLf+uPqzSWqSv1pr/Z8HfMtwZ8dfXl0aDHoxxt0zzt2b9zG+2X3e7mYH39Viuu9+9689Z3dz0HT9\ncR3jWYxdlwaD3lTBO3Kqudb6TbXW19ZaH0zyqSRvHhG6AEtjFrdFLcrCtGW/pcztRABzwu1qi2Hi\n4K21Plhr/e9dFgMwL2bVfS7C7WqL0rkf1shrvIfgGm/Hjus1m0VjnLu3CGN83G+LOs5jfNzHbhpH\neo0XgIMtQvc5K8s8djreOXOc/4JdJMa5e8a4e8a4DR0vABxjghcAGhK8ANCQ4AWAhgQvADQkeAGg\nIcELAA0JXgBoSPACQEOCFwAaErwA0JDgBYCG7p51AUBbFy5fzePbu0n2nou6tbkx44pgueh4YYlc\nuHw117Z3M0wyTHJtezfnL13Jk095gg20InhhidzsdG+3e/1GHnn0sRlUA8tJ8AJAQ4IXlsiptf4d\nr/V7qzl3Zn0G1cByErywRLY2N9Lvrd467vdWc/Hs6Zw80ZthVbBcBC8smXNn1tPvrep0YUbcTgRL\n5uSJXi6ePT3rMmBp6XgBoCHBCwANCV4AaEjwAkBDghcAGhK8ANCQ4AWAhsbex1tKeV6SDyS5P3sP\nNHlbrfW/dV0YACyiSTre1yV5ptb66iQ/nORd3ZYEAItrbPDWWn8xyfftH64lufO5YgDARCbaMrLW\n+sVSykeSvCHJmzqtCAAW2MpwOJz4zaWUlyb5ZJJTtdann+Mtk38YACyGlWnePMniqu9M8rJa648n\neTrJM/v/ntPOzvVpzs+
"text/plain": [
"<matplotlib.figure.Figure at 0x11006e550>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Create some simple data\n",
"import numpy as np\n",
"np.random.seed(0)\n",
"X = np.random.random(size=(20, 1))\n",
"y = 3 * X.squeeze() + 2 + np.random.randn(20)\n",
"\n",
"plt.plot(X.squeeze(), y, 'o');"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fit the model:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAFVCAYAAABB6Y7YAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3WlwG2l+3/EveF8QCZJoUDdHI6lFHYAIzK09y0fKqbXj\nXbnKihNfsR07nkRbzih2dh3vi9iuuGo1rvLYEx9rrzd5kahij484Z1Xs9drW2rtlgAIokXpGx1DS\njEZskAIp3gfQeQGKqxmNSIJDNAjg96maKoJsoP/zDAY/PE8//Tw+13URERERb9SUugAREZFqouAV\nERHxkIJXRETEQwpeERERDyl4RUREPKTgFRER8VDdWn+0bbsG+F3gMJADfsIYY7woTEREpBKt1+P9\nTqDVGPMR4N8Dv1z8kkRERCrXesE7B7Tbtu0D2oHF4pckIiJSudYcagYuAk3AVaAL+O6iVyQiIlLB\nfGstGWnb9ufJDzX/vG3be4C/AI4bYz6w5+u6ruvz+YpTqYiIyPZUUPCt1+NtBR6s/JwB6oHaJ57Z\n5yOdnirk/FKgYNCvNvaA2rn41MbFpzb2RjDoL+j49YL3i8Dv27b91+RD93PGmLlN1iYiIlL11gxe\nY8wE8GmPahEREal4WkBDRETEQwpeERERDyl4RUREPKTgFRER8ZCCV0RExEMKXhEREQ8peEVERDyk\n4BUREfGQgldERMRDCl4REREPKXhFREQ8pOAVERHxkIJXRETEQwpeERERDyl4RUREPKTgFRER8ZCC\nV0RExEMKXhEREQ8peEVERDyk4BUREfGQgldERMRDCl4REREPKXhFREQ8pOAVERHxkIJXRETEQ3Wl\nLkBERMRr5y8MMDySAaCvN8C5M/2enVs9XhERqSrnLwwwNJLBBVxgaCTDK69f5Na9KU/Or+AVEZGq\n8rCn+6jM1AKvvZHy5PwKXhEREQ8peEVEpKr09QYe+13A38jZ02FPzq/gFRGRqnLuTD8Bf+Pq44C/\nkVdfPsX+Hr8n5193VrNt2z8M/MjKw2YgAoSMMQ+KWJeIiEjRnD0dXr2m61VP9yGf67obPti27d8A\nLhljfvcJh7jptDezwqpVMOhHbVx8aufiUxsXn9rYG8Gg31fI8RsearZt+xng2BqhKyIiIuvYcI/X\ntu0/An7NGPO1NQ7bePdZRESkMhTU493QylW2bXcAh9cJXQANaxSZho68oXYuPrVx8amNvREMFjYp\na6NDzR8D/rzgakREROQ9Nhq8h4EbxSxERESkGmxoqNkYc77YhYiIiFQDLaAhIiLiIQWviIiIhxS8\nIiIiHlLwioiIeEjBKyIi4iEFr4iIiIcUvCIiIh5S8IqIiHhIwSsiIuIhBa+IiIiHFLwiIiIeUvCK\niIh4SMErIiLiIQWviIiIhxS8IiIiHlLwioiIeEjBKyIi4iEFr4iIiIcUvCIiIh5S8IqIiHhIwSsi\nIuKhulIXICIiUojzFwYYHskA0Ncb4NyZ/hJXVBj1eEVEpGycvzDA0EgGF3CBoZEMr7x+kVv3pkpd\n2oYpeEVEpGw87Ok+KjO1wGtvpEpQzeYoeEVERDyk4BURkbLR1xt47HcBfyNnT4dLUM3mKHhFRKRs\nnDvTT8DfuPo44G/k1ZdPsb/HX8KqCqPgFRGRsnL2dJiAv7HseroP6XYiEREpK/t7/Lz68qlSl7Fp\n6vGKiIh4aN0er23bnwO+G6gHfsMY85+KXpWIiEiFWrPHa9v2J4AXjTEvAZ8ADnhQk4iISMVar8f7\nncCgbdt/AuwA/k3xSxIREalc6wVvENgLfIp8b/e/A0eKXZSIiEil8rmu+8Q/2rb9H4C0MeZXVx5f\nAr7dGDP2hKc8+cVEREQqk6+Qg9fr8f4N8FngV23b3gW0AuNrPSGdLp+FqstRMOhXG3tA7Vx8auPi\nUxt7IxgsbPGONSdXGWP+JzBg2/Y3yQ8z/7QxRr1aERGRTVr3diJjzM95UYiIiEg10AIaIiIiHlLw\nioiIeEjBKyIi4iEFr4iIiIcUvCIiIh5S8IqIiHhIwSsiIuIhBa+IiIiHFLwiIiIeUvCKiIh4SMEr\nIiLiIQWviIiIhxS8IiIiHlLwioiIeEjBKyIi4iEFr4iIiIcUvCIiIh5S8IqIiGzS/PJ8wc+pK0Id\nIiIiFWtsbpyEkyIxmuTO9F3+2/f/ZkHPV/CKiIis4/58ZiVsU9yaugNAra+W411HCn4tBa+IiMgH\nyMxPMJAeJDGa5K0HtwGo8dXQ13mYmBUhEjxGS31Lwa+r4BUREVkxufCAAWeQuJPk5uQIAD582IGD\nK2F7nLaG1g91DgWviGzK+QsDDI9kAOjrDXDuTH+JKxLZnKnFaS6lB4mPJrk+8RYuLj58HOx4ipgV\n4aR1gh0N/i07n4JXRAp2/sIAQyuhCzA0kuGV1y9y9nSY/T1b9wElUizTizMk05dJOClM5jouLgBP\nt/cStSL0Wydob9xRlHMreEWkYMOPhO5DmakFXnsjxasvnypBRSLrm12a5VL6CgkniclcJ+fmAHhq\nxz6iVph+K0ygqaPodSh4RUSkYs0tz5FKDxF3kly9f42smwVgn383UStC1IrQ1RzwtCYFr4g8Zr3r\nt329gfcMNQME/I2cPR32rEaRJ5lfnmdwbJi4k2R43LC8ErZ72nYRsyL0W2GCLV0lq8/nuu5Wvp6b\nTk9t5evJ+wSDftTGxVfN7fz+67fwrVB99PrtK69fJDO1sPr3QoeYq7mNvVJNbbyQXeTy2DAJJ8mV\n8ass5ZYB2NXak+/ZhsKEWoJFOXcw6PcVcrx6vCLyHhu9fnv2dJjX3kit/izitcXsEkPjV4k7SS6P\nDbOYWwKgp8UiGooQs8L0tIZKXOXjFLwisin7e/yaSCWeW8otMzRuSDhJBseGWMguAmA1dxMNRYha\nYXa19uDzFdQJ9dS6wWvbdgKYXHl40xjzY8UtSURKSddvZbtZzi1z9f41Ek6KZPoK89n8xgRdTZ18\nfE8+bPe07drWYfuoNYPXtu0mAGPMJ70pR0RK7dyZ/g99/Vbkw8rmspjMdeJOkmT6CnPLcwAEGjs4\ntfs5YlaEff49ZRO2j1qvxxsBWmzb/r8rx37eGPON4pclIqWk67dSCtlclmsTN0k4SS6lLzOzNAtA\nR2M7L+58hqgVpnfHvrIM20etF7wzwBeNMb9n2/Yh4H/btn3YGJPzoDYRKRFdvxWv5Nwc1yfeIu4k\nueQMMr00A8COBj8f33OKqBXmQPt+anyVs338mrcT2bbdANQYY+ZXHn8D+Iwx5p0nPGVL700SEZHK\nk3NzvDl2k6/fjvN3byeYmH8AwI7GNl7YE+WlfTGOdB+kpqZswnZLbyf6USAMvGzb9i5gB/DuWk+o\nlnvGSqWa7ssrJbVz8amNi287tbHruow8uE3cSTLgDDKxkJ+z21rfwqldzxG1IhzqOEBtTS0A4+Mz\npSy3IMFgYeuTrxe8vwf8vm3bf7Xy+Ec1zCwiIhvhui63p94m7iRJjKbILEwA0FzXzIs7nyVqhbED\nB1fDtlqsGbzGmGXgBz2qRUREypzrurw9fZf4aJKEk2J8/j4ATbVNPN8TI2qFOdJ5iLqa6l1Gonr/\nzUVEZEu4rsvdmXskVsLWmRsDoLG2gWdD/UStMH1dNvVVHLaPUiuIiMimvDszuhq292YdABpq6ola\nYWJWhKNdR2iorS9xlduPgldERDZsdDa9GrZ3Z+4BUF9Tx8ngCaJWmOPdffz6H1zh10fSQPoDd7eq\ndgpeERFZU3p2nISTJO4keWc6f2NLna+WcPcxYlaY491HaaprBB7f3WpoJMMrr198bHeraqbgFRGR\nx4zP3SfhpEg4SW5P5ZduqPXVcryrj6gVJhw8SnNd82PP2+juVtVMwSsiIgBk5icYcFLEnRQjD24D\nUOOr4WinTTQUIdJ9lJb6lhJXWf4UvCIiVWxiYZIBZ5CEk+Lm5AiQD9sjgUNEQ2EiweO01bdu+PW0\nu9X6FLwiIlXmweIUl5x
"text/plain": [
"<matplotlib.figure.Figure at 0x103f774d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = LinearRegression()\n",
"model.fit(X, y)\n",
"\n",
"# Plot the data and the model prediction\n",
"X_fit = np.linspace(0, 1, 100)[:, np.newaxis]\n",
"y_fit = model.predict(X_fit)\n",
"\n",
"plt.plot(X.squeeze(), y, 'o')\n",
"plt.plot(X_fit.squeeze(), y_fit);"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}