{ "metadata": { "name": "", "signature": "sha256:6d8f33055bf348eb4c2871340e68a3f7577e0a027f5c83423e35757e01e8243a" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Structures" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* tuple\n", "* list\n", "* dict\n", "* set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## tuple" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One dimensional, fixed-length, immutable sequence" ] }, { "cell_type": "code", "collapsed": false, "input": [ "tup = (1, 2, 3)\n", "tup" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "(1, 2, 3)" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "list_1 = [1, 2, 3]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert to a tuple" ] }, { "cell_type": "code", "collapsed": false, "input": [ "type(tuple(list_1))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "tuple" ] } ], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Nested tuples" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_tup = ([1, 2, 3], (4, 5))\n", "nested_tup" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "([1, 2, 3], (4, 5))" ] } ], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Access by index O(1)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_tup[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "[1, 2, 3]" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Although tuples are immutable, their contents can contain mutable objects" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_tup[0].append(4)\n", "nested_tup[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "[1, 2, 3, 4]" ] } ], "prompt_number": 6 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate tuples by creating a new tuple and copying objects" ] }, { "cell_type": "code", "collapsed": false, "input": [ "(1, 3, 2) + (4, 5, 6)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "(1, 3, 2, 4, 5, 6)" ] } ], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Multiply copies references to objects (objects themselves are not copied)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "('foo', 'bar') * 2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "('foo', 'bar', 'foo', 'bar')" ] } ], "prompt_number": 8 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Unpack tuples" ] }, { "cell_type": "code", "collapsed": false, "input": [ "a, b = nested_tup\n", "a, b" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "([1, 2, 3, 4], (4, 5))" ] } ], "prompt_number": 9 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Unpack nested tuples" ] }, { "cell_type": "code", "collapsed": false, "input": [ "(a, b, c, d), (e, f) = nested_tup\n", "a, b, c, d, e, f" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "(1, 2, 3, 4, 4, 5)" ] } ], "prompt_number": 10 }, { "cell_type": "markdown", "metadata": {}, "source": [ "A common use of variable unpacking is when iterating over sequences of tuples or lists" ] }, { "cell_type": "code", "collapsed": false, "input": [ "seq = [( 1, 2, 3), (4, 5, 6), (7, 8, 9)] \n", "for a, b, c in seq: \n", " print(a, b, c)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1, 2, 3)\n", "(4, 5, 6)\n", "(7, 8, 9)\n" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "One dimensional, variable-length, mutable sequence" ] }, { "cell_type": "code", "collapsed": false, "input": [ "list_1 = [1, 2, 3]\n", "list_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "[1, 2, 3]" ] } ], "prompt_number": 12 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert to a list" ] }, { "cell_type": "code", "collapsed": false, "input": [ "type(list(tup))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "list" ] } ], "prompt_number": 13 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Nested list" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list = [(1, 2, 3), [4, 5]]\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "[(1, 2, 3), [4, 5]]" ] } ], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Access by index" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list[1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 15, "text": [ "[4, 5]" ] } ], "prompt_number": 15 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Append an element O(1)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list.append(6)\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 16, "text": [ "[(1, 2, 3), [4, 5], 6]" ] } ], "prompt_number": 16 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Insert an element at a specific index. Insert is expensive as it has to shift subsequent elements O(n)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list.insert(0, 'start')\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 17, "text": [ "['start', (1, 2, 3), [4, 5], 6]" ] } ], "prompt_number": 17 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pop removes and returns an element from a specified index. Pop is expensive as it has to shift subsequent elements O(n). O(1) if pop is used for the last element" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list.pop(0)\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 18, "text": [ "[(1, 2, 3), [4, 5], 6]" ] } ], "prompt_number": 18 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remove locates the first such value and removes it O(n)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list.remove((1, 2, 3))\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ "[[4, 5], 6]" ] } ], "prompt_number": 19 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check if a list contains a value O(n)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "6 in nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "True" ] } ], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate lists by creating a new list and copying objects" ] }, { "cell_type": "code", "collapsed": false, "input": [ "[1, 3, 2] + [4, 5, 6]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 21, "text": [ "[1, 3, 2, 4, 5, 6]" ] } ], "prompt_number": 21 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extend a list by appending elements. Faster than concatenating lists." ] }, { "cell_type": "code", "collapsed": false, "input": [ "nested_list.extend([7, 8, 9])\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 22, "text": [ "[[4, 5], 6, 7, 8, 9]" ] } ], "prompt_number": 22 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## dict" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Also known as a hash map or associative array. A dict is a mutable collection of key-value pairs.\n", "\n", "Big O complexities are listed as average case, with most worst case complexities being O(n)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1 = { 'a' : 'foo', 'b' : [0, 1, 2, 3] }\n", "dict_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 23, "text": [ "{'a': 'foo', 'b': [0, 1, 2, 3]}" ] } ], "prompt_number": 23 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Access by index O(1)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1['b']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 24, "text": [ "[0, 1, 2, 3]" ] } ], "prompt_number": 24 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Insert or set by index O(1)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1[5] = 'bar'\n", "dict_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ "{5: 'bar', 'a': 'foo', 'b': [0, 1, 2, 3]}" ] } ], "prompt_number": 25 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check if a dict contains a key O(1)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "5 in dict_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ "True" ] } ], "prompt_number": 26 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Delete a value O(1)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_2 = dict(dict_1)\n", "del dict_2[5]\n", "dict_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 27, "text": [ "{'a': 'foo', 'b': [0, 1, 2, 3]}" ] } ], "prompt_number": 27 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pop a value O(1) deletes the key and returns the value" ] }, { "cell_type": "code", "collapsed": false, "input": [ "value = dict_2.pop('b')\n", "print(value)\n", "print(dict_2)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[0, 1, 2, 3]\n", "{'a': 'foo'}\n" ] } ], "prompt_number": 28 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get or pop with a default value if the key is not found. By default, get() will return None and pop() will throw an exception if the key is not found." ] }, { "cell_type": "code", "collapsed": false, "input": [ "value = dict_1.get('z', 0)\n", "value" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 29, "text": [ "0" ] } ], "prompt_number": 29 }, { "cell_type": "markdown", "metadata": {}, "source": [ "setdefault() is similar to get(), but returns a default value if the key is not found" ] }, { "cell_type": "code", "collapsed": false, "input": [ "print(dict_1.setdefault('b', None))\n", "print(dict_1.setdefault('z', None))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "[0, 1, 2, 3]\n", "None\n" ] } ], "prompt_number": 30 }, { "cell_type": "markdown", "metadata": {}, "source": [ "By contrast to setdefault(), defaultdict lets you specify the default when the container is initialized, which works well if the default is appropriate for all keys." ] }, { "cell_type": "code", "collapsed": false, "input": [ "from collections import defaultdict\n", "\n", "seq = ['foo', 'bar', 'baz']\n", "first_letter = defaultdict(list)\n", "for elem in seq:\n", " first_letter[elem[0]].append(elem)\n", "first_letter" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 31, "text": [ "defaultdict(, {'b': ['bar', 'baz'], 'f': ['foo']})" ] } ], "prompt_number": 31 }, { "cell_type": "markdown", "metadata": {}, "source": [ "dict keys must be \"hashable\": immutable objects like scalars (int, float, string) or tuples whose objects are all immutable." ] }, { "cell_type": "code", "collapsed": false, "input": [ "print(hash('string'))\n", "print(hash((1, 2, (3, 4))))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "-9167918882415130555\n", "-2725224101759650258\n" ] } ], "prompt_number": 32 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lists are mutable and therefore are not hashable, although you can convert the list portion to a tuple as a quick fix" ] }, { "cell_type": "code", "collapsed": false, "input": [ "hash((1, 2, [3, 4]))" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "unhashable type: 'list'", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mhash\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'" ] } ], "prompt_number": 33 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get the list of keys in no particular order (although keys() outputs the keys in the same order). In Python 3, keys() returns an iterator instead of a list." ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1.keys()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 34, "text": [ "['a', 'b', 5, 'z']" ] } ], "prompt_number": 34 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get the list of values in no particular order (although values() outputs the keys in the same order). In Python 3, keys() returns an iterator instead of a list." ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1.values()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 35, "text": [ "['foo', [0, 1, 2, 3], 'bar', None]" ] } ], "prompt_number": 35 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Merge one dict into another" ] }, { "cell_type": "code", "collapsed": false, "input": [ "dict_1.update({'e' : 'elephant', 'f' : 'fish'})\n", "dict_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 36, "text": [ "{5: 'bar',\n", " 'a': 'foo',\n", " 'b': [0, 1, 2, 3],\n", " 'e': 'elephant',\n", " 'f': 'fish',\n", " 'z': None}" ] } ], "prompt_number": 36 }, { "cell_type": "markdown", "metadata": {}, "source": [ "A common operation is to pair up two sequences element-wise in a dict" ] }, { "cell_type": "code", "collapsed": false, "input": [ "mapping = dict(zip(range(7), reversed(range(7))))\n", "mapping" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 37, "text": [ "{0: 6, 1: 5, 2: 4, 3: 3, 4: 2, 5: 1, 6: 0}" ] } ], "prompt_number": 37 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A set is an unordered sequence of unique elements. " ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_1 = set([0, 1, 2, 3, 4, 5])\n", "set_1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 38, "text": [ "{0, 1, 2, 3, 4, 5}" ] } ], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "set_2 = {1, 2, 3, 5, 8, 13}\n", "set_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 39, "text": [ "{1, 2, 3, 5, 8, 13}" ] } ], "prompt_number": 39 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Sets support set operations like union, intersection, difference, and symmetric difference" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Union O(len(set_1) + len(set_2))" ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_1 | set_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 40, "text": [ "{0, 1, 2, 3, 4, 5, 8, 13}" ] } ], "prompt_number": 40 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Intersection O(min(len(set_1), len(set_2))" ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_1 & set_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 41, "text": [ "{1, 2, 3, 5}" ] } ], "prompt_number": 41 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Difference O(len(set_1))" ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_1 - set_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 42, "text": [ "{0, 4}" ] } ], "prompt_number": 42 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Symmetric Difference O(len(set_1))" ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_1 ^ set_2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 43, "text": [ "{0, 4, 8, 13}" ] } ], "prompt_number": 43 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Subset" ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_3 = {1, 2, 3}\n", "set_3.issubset(set_2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 44, "text": [ "True" ] } ], "prompt_number": 44 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Superset" ] }, { "cell_type": "code", "collapsed": false, "input": [ "set_2.issuperset(set_3)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 45, "text": [ "True" ] } ], "prompt_number": 45 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Equal" ] }, { "cell_type": "code", "collapsed": false, "input": [ "{1, 2, 3} == {3, 2, 1}" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 46, "text": [ "True" ] } ], "prompt_number": 46 } ], "metadata": {} } ] }