{ "metadata": { "name": "", "signature": "sha256:f24f6112f8e8e28d262a4f84be84d1ae459154a9a445687d55392ff177e6a03c" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Structures" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## tuple" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# One dimensional, fixed-length, immutable sequence\n", "tup = (1, 2, 3)\n", "tup" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 1, "text": [ "(1, 2, 3)" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "a_list = [1, 2, 3]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# Convert to a tuple\n", "type(tuple(a_list))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "tuple" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# Nested tuples\n", "nested_tup = ([1, 2, 3], (4, 5))\n", "nested_tup" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "([1, 2, 3], (4, 5))" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "# Access by index O(1)\n", "nested_tup[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "[1, 2, 3]" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# Although tuples are immutable, their contents can contain mutable objects\n", "nested_tup[0].append(4)\n", "nested_tup[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "[1, 2, 3, 4]" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "# Concatenate tuples\n", "# Creates a new tuple and copies objects\n", "(1, 3, 2) + (4, 5, 6)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "(1, 3, 2, 4, 5, 6)" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "# Multiply copies references to objects (objects themselves are not copied)\n", "('foo', 'bar') * 2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "('foo', 'bar', 'foo', 'bar')" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "# Unpack tuples\n", "a, b = nested_tup\n", "a, b" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "([1, 2, 3, 4], (4, 5))" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "# Unpack nested tuples\n", "(a, b, c, d), (e, f) = nested_tup\n", "a, b, c, d, e, f" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "(1, 2, 3, 4, 4, 5)" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "# A common use of variable unpacking is when iterating over sequences\n", "# of tuples or lists\n", "seq = [( 1, 2, 3), (4, 5, 6), (7, 8, 9)] \n", "for a, b, c in seq: \n", " print(a, b, c)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "(1, 2, 3)\n", "(4, 5, 6)\n", "(7, 8, 9)\n" ] } ], "prompt_number": 11 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## list" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# One dimensional, variable-length, mutable sequence\n", "a_list = [1, 2, 3]\n", "a_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "[1, 2, 3]" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "# Convert to a list\n", "type(list(tup))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "list" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "# Nested list\n", "nested_list = [(1, 2, 3), [4, 5]]\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "[(1, 2, 3), [4, 5]]" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "# Access by index\n", "nested_list[1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 15, "text": [ "[4, 5]" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "# Append an element O(1)\n", "nested_list.append(6)\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 16, "text": [ "[(1, 2, 3), [4, 5], 6]" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "# Insert an element at a specific index\n", "# Insert is expensive as it has to shift subsequent elements O(n)\n", "nested_list.insert(0, 'start')\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 17, "text": [ "['start', (1, 2, 3), [4, 5], 6]" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "# Pop removes and returns an element from a specified index\n", "# Pop is expensive as it has to shift subsequent elements O(n)\n", "# O(1) if pop is used for the last element\n", "nested_list.pop(0)\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 18, "text": [ "[(1, 2, 3), [4, 5], 6]" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "# Remove locates the first such value and removes it O(n)\n", "nested_list.remove((1, 2, 3))\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ "[[4, 5], 6]" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "# Check if a list contains a value O(n)\n", "6 in nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "True" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "# Concatenate lists\n", "# Creates a new list and copies objects\n", "[1, 3, 2] + [4, 5, 6]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 21, "text": [ "[1, 3, 2, 4, 5, 6]" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "# Extend a list by appending elements\n", "# Faster than concatenating lists\n", "nested_list.extend([7, 8, 9])\n", "nested_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 22, "text": [ "[[4, 5], 6, 7, 8, 9]" ] } ], "prompt_number": 22 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## sort" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Sort in-place O(n log n)\n", "a_list = [1, 5, 3, 9, 7, 6]\n", "a_list.sort()\n", "a_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 23, "text": [ "[1, 3, 5, 6, 7, 9]" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "# Sort by secondary key: str length\n", "b_list = ['the', 'quick', 'brown', 'fox', 'jumps', 'over']\n", "b_list.sort(key=len)\n", "b_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 24, "text": [ "['the', 'fox', 'over', 'quick', 'brown', 'jumps']" ] } ], "prompt_number": 24 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## bisect" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# The bisect module does not check whether the list is sorted, as this check\n", "# would be expensive O(n). Using bisect on an unsorted list will not result\n", "# in an error but could lead to incorrect results.\n", "import bisect\n", "\n", "# Find the location where an element should be inserted to keep the\n", "# list sorted\n", "c_list = [1, 2, 2, 3, 5, 13]\n", "bisect.bisect(c_list, 8)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ "5" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "# Inserts an element into a location to keep the list sorted\n", "bisect.insort(c_list, 8)\n", "c_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ "[1, 2, 2, 3, 5, 8, 13]" ] } ], "prompt_number": 26 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## slice" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![alt text](http://www.nltk.org/images/string-slicing.png)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Select a section of list types (arrays, tuples, NumPy arrays)\n", "# start:stop\n", "# start is included, stop is not\n", "# number of elements in the result is stop - start\n", "d_list = 'Monty Python'\n", "d_list[6:10]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 27, "text": [ "'Pyth'" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "# Omit start to default to start of the sequence\n", "d_list[:5]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 28, "text": [ "'Monty'" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "# Omit end to default to end of the sequence\n", "d_list[6:]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 29, "text": [ "'Python'" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "# Negative indices slice relative to the end\n", "d_list[-12:-7]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ "'Monty'" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "# Slice can also take a step such as the one below, which takes\n", "# every other element\n", "e_list[::2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 33, "text": [ "[1, 2, 5, 13]" ] } ], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "# Passing -1 for the step reverses the list or tuple:\n", "e_list[::-1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 34, "text": [ "[13, ['H', 'a', 'l', 'l'], 5, 3, 2, 1, 1]" ] } ], "prompt_number": 34 }, { "cell_type": "code", "collapsed": false, "input": [ "# Assign elements to a slice\n", "# Slice range does not have to equal number of elements to assign\n", "e_list = [1, 1, 2, 3, 5, 8, 13]\n", "e_list[5:] = ['H', 'a', 'l', 'l']\n", "e_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 31, "text": [ "[1, 1, 2, 3, 5, 'H', 'a', 'l', 'l']" ] } ], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "# Compare assigning into a slice (above) versus assigning into\n", "# an inde\n", "e_list" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 32, "text": [ "[1, 1, 2, 3, 5, ['H', 'a', 'l', 'l'], 13]" ] } ], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }