diff --git a/core/structs.ipynb b/core/structs.ipynb index ca6e085..018a6e1 100644 --- a/core/structs.ipynb +++ b/core/structs.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:c5099245c74de456735197b7914b9a407b7fd42bc3fd7db0d637d1423436d0e6" + "signature": "sha256:cb47861bd46984af8db5d342d5bff2e0b95ddc5e4a853bedf4905e698ffde31d" }, "nbformat": 3, "nbformat_minor": 0, @@ -621,683 +621,6 @@ } ], "prompt_number": 22 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## sort" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Sort in-place O(n log n)" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = [1, 5, 3, 9, 7, 6]\n", - "seq.sort()\n", - "seq" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 23, - "text": [ - "[1, 3, 5, 6, 7, 9]" - ] - } - ], - "prompt_number": 23 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Sort by secondary key: str length" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = ['the', 'quick', 'brown', 'fox', 'jumps', 'over']\n", - "seq.sort(key=len)\n", - "seq" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 24, - "text": [ - "['the', 'fox', 'over', 'quick', 'brown', 'jumps']" - ] - } - ], - "prompt_number": 24 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## bisect" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The bisect module does not check whether the list is sorted, as this check would be expensive O(n). Using bisect on an unsorted list will not result in an error but could lead to incorrect results." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import bisect" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 25 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Find the location where an element should be inserted to keep the list sorted" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = [1, 2, 2, 3, 5, 13]\n", - "bisect.bisect(seq, 8)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 26, - "text": [ - "5" - ] - } - ], - "prompt_number": 26 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Inserts an element into a location to keep the list sorted" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "bisect.insort(seq, 8)\n", - "seq" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 27, - "text": [ - "[1, 2, 2, 3, 5, 8, 13]" - ] - } - ], - "prompt_number": 27 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## slice" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![alt text](http://www.nltk.org/images/string-slicing.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Select a section of list types (arrays, tuples, NumPy arrays) using [start:stop]. start is included, stop is not. The number of elements in the result is stop - start." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = 'Monty Python'\n", - "seq[6:10]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 28, - "text": [ - "'Pyth'" - ] - } - ], - "prompt_number": 28 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Omit start to default to start of the sequence" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq[:5]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 29, - "text": [ - "'Monty'" - ] - } - ], - "prompt_number": 29 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Omit end to default to end of the sequence" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq[6:]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 30, - "text": [ - "'Python'" - ] - } - ], - "prompt_number": 30 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Negative indices slice relative to the end" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq[-12:-7]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 31, - "text": [ - "'Monty'" - ] - } - ], - "prompt_number": 31 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Slice can also take a step such as the one below, which takes every other element" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq[::2]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 32, - "text": [ - "'MnyPto'" - ] - } - ], - "prompt_number": 32 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Passing -1 for the step reverses the list or tuple:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq[::-1]" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 33, - "text": [ - "'nohtyP ytnoM'" - ] - } - ], - "prompt_number": 33 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assign elements to a slice. Slice range does not have to equal number of elements to assign." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = [1, 1, 2, 3, 5, 8, 13]\n", - "seq[5:] = ['H', 'a', 'l', 'l']\n", - "seq" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 34, - "text": [ - "[1, 1, 2, 3, 5, 'H', 'a', 'l', 'l']" - ] - } - ], - "prompt_number": 34 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Compare assigning into a slice (above) versus assigning into an index (below)" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = [1, 1, 2, 3, 5, 8, 13]\n", - "seq[5] = ['H', 'a', 'l', 'l']\n", - "seq" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 35, - "text": [ - "[1, 1, 2, 3, 5, ['H', 'a', 'l', 'l'], 13]" - ] - } - ], - "prompt_number": 35 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## sorted" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Return a new sorted list from the elements of a sequence" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "sorted([2, 5, 1, 8, 7, 9])" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 36, - "text": [ - "[1, 2, 5, 7, 8, 9]" - ] - } - ], - "prompt_number": 36 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "sorted('foo bar baz')" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 37, - "text": [ - "[' ', ' ', 'a', 'a', 'b', 'b', 'f', 'o', 'o', 'r', 'z']" - ] - } - ], - "prompt_number": 37 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It's common to get a sorted list of unique elements by combining sorted and set" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq = [2, 5, 1, 8, 7, 9, 9, 2, 5, 1, (4, 2), (1, 2), (1, 2)]\n", - "sorted(set(seq))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 38, - "text": [ - "[1, 2, 5, 7, 8, 9, (1, 2), (4, 2)]" - ] - } - ], - "prompt_number": 38 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## reversed" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Iterates over the sequence elements in reverse order" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "list(reversed(seq))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 39, - "text": [ - "[(1, 2), (1, 2), (4, 2), 1, 5, 2, 9, 9, 7, 8, 1, 5, 2]" - ] - } - ], - "prompt_number": 39 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## enumerate" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "enumerate gives you the index of a collection and the value" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "numbers = ['zero', 'one', 'two', 'three']\n", - "for i, number in enumerate(numbers):\n", - " print(i, number)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "(0, 'zero')\n", - "(1, 'one')\n", - "(2, 'two')\n", - "(3, 'three')\n" - ] - } - ], - "prompt_number": 40 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## zip" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pairs up the elements of sequences to create a list of tuples" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq_1 = [1, 2, 3]\n", - "seq_2 = ['foo', 'bar', 'baz']\n", - "zip(seq_1, seq_2)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 41, - "text": [ - "[(1, 'foo'), (2, 'bar'), (3, 'baz')]" - ] - } - ], - "prompt_number": 41 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Takes an arbitrary number of sequences. The number of elements it produces is determined by the shortest sequence." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "seq_3 = [True, False]\n", - "zip(seq_1, seq_2, seq_3)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 42, - "text": [ - "[(1, 'foo', True), (2, 'bar', False)]" - ] - } - ], - "prompt_number": 42 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is common to use zip for simultaneously iterating over multiple sequences combined with enumerate" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for i, (a, b) in enumerate(zip(seq_1, seq_2)):\n", - " print('%d: %s, %s' % (i, a, b))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "0: 1, foo\n", - "1: 2, bar\n", - "2: 3, baz\n" - ] - } - ], - "prompt_number": 43 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "zip can unzip a zipped sequence, which you can think of as converting a list of rows into a list of columns" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "numbers = [(1, 'one'), (2, 'two'), (3, 'three')]\n", - "a, b = zip(*numbers)\n", - "a" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 44, - "text": [ - "(1, 2, 3)" - ] - } - ], - "prompt_number": 44 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "b" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "metadata": {}, - "output_type": "pyout", - "prompt_number": 45, - "text": [ - "('one', 'two', 'three')" - ] - } - ], - "prompt_number": 45 } ], "metadata": {} diff --git a/core/structs_utils.ipynb b/core/structs_utils.ipynb new file mode 100644 index 0000000..a5269db --- /dev/null +++ b/core/structs_utils.ipynb @@ -0,0 +1,699 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:aff21c8ceeca329c78b70a0f35aeb222827eaa712b68639150e540efb9afb0ea" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Structures Utilities" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## slice" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![alt text](http://www.nltk.org/images/string-slicing.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select a section of list types (arrays, tuples, NumPy arrays) using [start:stop]. start is included, stop is not. The number of elements in the result is stop - start." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = 'Monty Python'\n", + "seq[6:10]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 1, + "text": [ + "'Pyth'" + ] + } + ], + "prompt_number": 1 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Omit start to default to start of the sequence" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq[:5]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 2, + "text": [ + "'Monty'" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Omit end to default to end of the sequence" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq[6:]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 3, + "text": [ + "'Python'" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Negative indices slice relative to the end" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq[-12:-7]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 4, + "text": [ + "'Monty'" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Slice can also take a step such as the one below, which takes every other element" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq[::2]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 5, + "text": [ + "'MnyPto'" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Passing -1 for the step reverses the list or tuple:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq[::-1]" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 6, + "text": [ + "'nohtyP ytnoM'" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Assign elements to a slice. Slice range does not have to equal number of elements to assign." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = [1, 1, 2, 3, 5, 8, 13]\n", + "seq[5:] = ['H', 'a', 'l', 'l']\n", + "seq" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 7, + "text": [ + "[1, 1, 2, 3, 5, 'H', 'a', 'l', 'l']" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compare assigning into a slice (above) versus assigning into an index (below)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = [1, 1, 2, 3, 5, 8, 13]\n", + "seq[5] = ['H', 'a', 'l', 'l']\n", + "seq" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 8, + "text": [ + "[1, 1, 2, 3, 5, ['H', 'a', 'l', 'l'], 13]" + ] + } + ], + "prompt_number": 8 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## bisect" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The bisect module does not check whether the list is sorted, as this check would be expensive O(n). Using bisect on an unsorted list will not result in an error but could lead to incorrect results." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import bisect" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 9 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Find the location where an element should be inserted to keep the list sorted" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = [1, 2, 2, 3, 5, 13]\n", + "bisect.bisect(seq, 8)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 10, + "text": [ + "5" + ] + } + ], + "prompt_number": 10 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Inserts an element into a location to keep the list sorted" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "bisect.insort(seq, 8)\n", + "seq" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 11, + "text": [ + "[1, 2, 2, 3, 5, 8, 13]" + ] + } + ], + "prompt_number": 11 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## sort" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort in-place O(n log n)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = [1, 5, 3, 9, 7, 6]\n", + "seq.sort()\n", + "seq" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 12, + "text": [ + "[1, 3, 5, 6, 7, 9]" + ] + } + ], + "prompt_number": 12 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort by secondary key: str length" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = ['the', 'quick', 'brown', 'fox', 'jumps', 'over']\n", + "seq.sort(key=len)\n", + "seq" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 13, + "text": [ + "['the', 'fox', 'over', 'quick', 'brown', 'jumps']" + ] + } + ], + "prompt_number": 13 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## sorted" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Return a new sorted list from the elements of a sequence" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sorted([2, 5, 1, 8, 7, 9])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 14, + "text": [ + "[1, 2, 5, 7, 8, 9]" + ] + } + ], + "prompt_number": 14 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "sorted('foo bar baz')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 15, + "text": [ + "[' ', ' ', 'a', 'a', 'b', 'b', 'f', 'o', 'o', 'r', 'z']" + ] + } + ], + "prompt_number": 15 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's common to get a sorted list of unique elements by combining sorted and set" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq = [2, 5, 1, 8, 7, 9, 9, 2, 5, 1, (4, 2), (1, 2), (1, 2)]\n", + "sorted(set(seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 16, + "text": [ + "[1, 2, 5, 7, 8, 9, (1, 2), (4, 2)]" + ] + } + ], + "prompt_number": 16 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## reversed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Iterates over the sequence elements in reverse order" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "list(reversed(seq))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 17, + "text": [ + "[(1, 2), (1, 2), (4, 2), 1, 5, 2, 9, 9, 7, 8, 1, 5, 2]" + ] + } + ], + "prompt_number": 17 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## enumerate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "enumerate gives you the index of a collection and the value" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "numbers = ['zero', 'one', 'two', 'three']\n", + "for i, number in enumerate(numbers):\n", + " print(i, number)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "(0, 'zero')\n", + "(1, 'one')\n", + "(2, 'two')\n", + "(3, 'three')\n" + ] + } + ], + "prompt_number": 18 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pairs up the elements of sequences to create a list of tuples" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq_1 = [1, 2, 3]\n", + "seq_2 = ['foo', 'bar', 'baz']\n", + "zip(seq_1, seq_2)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 19, + "text": [ + "[(1, 'foo'), (2, 'bar'), (3, 'baz')]" + ] + } + ], + "prompt_number": 19 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Takes an arbitrary number of sequences. The number of elements it produces is determined by the shortest sequence." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "seq_3 = [True, False]\n", + "zip(seq_1, seq_2, seq_3)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 20, + "text": [ + "[(1, 'foo', True), (2, 'bar', False)]" + ] + } + ], + "prompt_number": 20 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is common to use zip for simultaneously iterating over multiple sequences combined with enumerate" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "for i, (a, b) in enumerate(zip(seq_1, seq_2)):\n", + " print('%d: %s, %s' % (i, a, b))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "0: 1, foo\n", + "1: 2, bar\n", + "2: 3, baz\n" + ] + } + ], + "prompt_number": 21 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "zip can unzip a zipped sequence, which you can think of as converting a list of rows into a list of columns" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "numbers = [(1, 'one'), (2, 'two'), (3, 'three')]\n", + "a, b = zip(*numbers)\n", + "a" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 22, + "text": [ + "(1, 2, 3)" + ] + } + ], + "prompt_number": 22 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "b" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 23, + "text": [ + "('one', 'two', 'three')" + ] + } + ], + "prompt_number": 23 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file