From b15edb75859ece2938550dd74b0a9a0d7aaadb10 Mon Sep 17 00:00:00 2001 From: Alessandro Date: Sat, 20 Feb 2016 19:49:32 +0100 Subject: [PATCH] Added DataFrames section and cleared outputs --- .DS_Store | Bin 0 -> 6148 bytes spark/spark.ipynb | 45 +++++++-------------------------------------- 2 files changed, 7 insertions(+), 38 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..df3e054fbb5df58a533420c860cfc13a5096e329 GIT binary patch literal 6148 zcmeHK%}T>S5Zl3;a2$0^a>j$m)jO`e zZqoLG$Qz3#$Nyvi_imactj$~&v*G>w@Y;b)(o*T2=ce-0GqYA<4xfd>dE+YTHc~h3 zBsDiaRXy4AJ!9A1^lD%qMN#l&yWX+%yG?s(FA7svhJHMhdUhhiR@ALUov@WO?d5vx z-#~QT4gF)8NKXbby|oJXH0_mMuUaXJ&B{){D0SNna-T3avg9-f4~yZeX7 zr{|Yf%{qtAQ^}&i0la}RwEUY!90pN%3En}*Ad-+6AO?tm31Yw;an}3Kmh~s%g6tM9wfhY_*1~ZK?0>X7FpibrHiota{_=Smc3}za2 zI^$|)c#oNxn;QyOvx8r#aK;^t)Di>4z$gP#x@%+oKlu6nKbk~6Vt^Q!C-V}UnyXyQVg+JidR9EfM1{i=orj2f(L|t1QZR_5Ceb8z$ZkP BTtol> literal 0 HcmV?d00001 diff --git a/spark/spark.ipynb b/spark/spark.ipynb index 4bfac0a..40cdd7e 100644 --- a/spark/spark.ipynb +++ b/spark/spark.ipynb @@ -64,19 +64,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/bin/sh: pyspark: command not found\r\n" - ] - } - ], + "outputs": [], "source": [ "!pyspark" ] @@ -90,22 +82,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sc" ] @@ -132,7 +113,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": false }, @@ -555,23 +536,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'df' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupBy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"column_name\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" - ] - } - ], + "outputs": [], "source": [ "df.groupBy(\"column_name\").count()" ]