diff --git a/graphs_trees/trie/__init__.py b/graphs_trees/trie/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/graphs_trees/trie/test_trie.py b/graphs_trees/trie/test_trie.py new file mode 100644 index 0000000..a0c1bdf --- /dev/null +++ b/graphs_trees/trie/test_trie.py @@ -0,0 +1,74 @@ +from nose.tools import assert_true +from nose.tools import raises + + +class TestTrie(object): + + def test_trie(self): + trie = Trie() + + print('Test: Insert') + words = ['a', 'at', 'has', 'hat', 'he', + 'me', 'men', 'mens', 'met'] + for word in words: + trie.insert(word) + for word in trie.list_words(): + assert_true(trie.find(word) is not None) + + print('Test: Remove me') + trie.remove('me') + words_removed = ['me'] + words = ['a', 'at', 'has', 'hat', 'he', + 'men', 'mens', 'met'] + for word in words: + assert_true(trie.find(word) is not None) + for word in words_removed: + assert_true(trie.find(word) is None) + + print('Test: Remove mens') + trie.remove('mens') + words_removed = ['me', 'mens'] + words = ['a', 'at', 'has', 'hat', 'he', + 'men', 'met'] + for word in words: + assert_true(trie.find(word) is not None) + for word in words_removed: + assert_true(trie.find(word) is None) + + print('Test: Remove a') + trie.remove('a') + words_removed = ['a', 'me', 'mens'] + words = ['at', 'has', 'hat', 'he', + 'men', 'met'] + for word in words: + assert_true(trie.find(word) is not None) + for word in words_removed: + assert_true(trie.find(word) is None) + + print('Test: Remove has') + trie.remove('has') + words_removed = ['a', 'has', 'me', 'mens'] + words = ['at', 'hat', 'he', + 'men', 'met'] + for word in words: + assert_true(trie.find(word) is not None) + for word in words_removed: + assert_true(trie.find(word) is None) + + print('Success: test_trie') + + @raises(Exception) + def test_trie_remove_invalid(self): + print('Test: Remove from empty trie') + trie = Trie() + assert_true(trie.remove('foo') is None) + + +def main(): + test = TestTrie() + test.test_trie() + test.test_trie_remove_invalid() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/graphs_trees/trie/trie.py b/graphs_trees/trie/trie.py new file mode 100644 index 0000000..7cfe0fa --- /dev/null +++ b/graphs_trees/trie/trie.py @@ -0,0 +1,76 @@ +from collections import OrderedDict + + +class Node(object): + + def __init__(self, key, parent=None, terminates=False): + self.key = key + self.terminates = False + self.parent = parent + self.children = {} + + +class Trie(object): + + def __init__(self): + self.root = Node('') + + def find(self, word): + if word is None: + raise TypeError('word cannot be None') + node = self.root + for char in word: + if char in node.children: + node = node.children[char] + else: + return None + return node if node.terminates else None + + def insert(self, word): + if word is None: + raise TypeError('word cannot be None') + node = self.root + parent = None + for char in word: + if char in node.children: + node = node.children[char] + else: + node.children[char] = Node(char, parent=node) + node = node.children[char] + node.terminates = True + + def remove(self, word): + if word is None: + raise TypeError('word cannot be None') + node = self.find(word) + if node is None: + raise KeyError('word does not exist') + node.terminates = False + parent = node.parent + while parent is not None: + # As we are propagating the delete up the + # parents, if this node has children, stop + # here to prevent orphaning its children. + # Or + # if this node is a terminating node that is + # not the terminating node of the input word, + # stop to prevent removing the associated word. + if node.children or node.terminates: + return + del parent.children[node.key] + node = parent + parent = parent.parent + + def list_words(self): + result = [] + curr_word = '' + self._list_words(self.root, curr_word, result) + return result + + def _list_words(self, node, curr_word, result): + if node is None: + return + for key, child in node.children.items(): + if child.terminates: + result.append(curr_word+key) + self._list_words(child, curr_word+key, result) \ No newline at end of file diff --git a/graphs_trees/trie/trie_challenge.ipynb b/graphs_trees/trie/trie_challenge.ipynb new file mode 100644 index 0000000..8889146 --- /dev/null +++ b/graphs_trees/trie/trie_challenge.ipynb @@ -0,0 +1,279 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/interactive-coding-challenges)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Challenge Notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Problem: Implement a trie with find, insert, remove, and list_words methods.\n", + "\n", + "* [Constraints](#Constraints)\n", + "* [Test Cases](#Test-Cases)\n", + "* [Algorithm](#Algorithm)\n", + "* [Code](#Code)\n", + "* [Unit Test](#Unit-Test)\n", + "* [Solution Notebook](#Solution-Notebook)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constraints\n", + "\n", + "* Can we assume we are working with strings?\n", + " * Yes\n", + "* Are the strings in ASCII?\n", + " * Yes\n", + "* Should `find` only match exact words with a terminating character?\n", + " * Yes\n", + "* Should `list_words` only return words with a terminating character?\n", + " * Yes\n", + "* Can we assume this fits memory?\n", + " * Yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Cases\n", + "\n", + "
\n",
+    "\n",
+    "         root\n",
+    "       /  |  \\\n",
+    "      h   a*  m\n",
+    "     / \\   \\   \\\n",
+    "    a   e*  t*  e*\n",
+    "   / \\         / \\\n",
+    "  s*  t*      n*  t*\n",
+    "             /\n",
+    "            s*\n",
+    "\n",
+    "find\n",
+    "\n",
+    "* Find on an empty trie\n",
+    "* Find non-matching\n",
+    "* Find matching\n",
+    "\n",
+    "insert\n",
+    "\n",
+    "* Insert on empty trie\n",
+    "* Insert to make a leaf terminator char\n",
+    "* Insert to extend an existing terminator char\n",
+    "\n",
+    "remove\n",
+    "\n",
+    "* Remove me\n",
+    "* Remove mens\n",
+    "* Remove a\n",
+    "* Remove has\n",
+    "\n",
+    "list_words\n",
+    "\n",
+    "* List empty\n",
+    "* List general case\n",
+    "
\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithm\n", + "\n", + "Refer to the [Solution Notebook](http://nbviewer.ipython.org/github/donnemartin/interactive-coding-challenges/blob/master/graphs_trees/trie/trie_solution.ipynb). If you are stuck and need a hint, the solution notebook's algorithm discussion might be a good place to start." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from collections import OrderedDict\n", + "\n", + "\n", + "class Node(object):\n", + "\n", + " def __init__(self, data, parent=None, terminates=False):\n", + " # TODO: Implement me\n", + " pass\n", + "\n", + "\n", + "class Trie(object):\n", + "\n", + " def __init__(self):\n", + " # TODO: Implement me\n", + " pass\n", + "\n", + " def find(self, word):\n", + " # TODO: Implement me\n", + " pass\n", + "\n", + " def insert(self, word):\n", + " # TODO: Implement me\n", + " pass\n", + "\n", + " def remove(self, word):\n", + " # TODO: Implement me\n", + " pass\n", + "\n", + " def list_words(self):\n", + " # TODO: Implement me\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unit Test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**The following unit test is expected to fail until you solve the challenge.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# %load test_trie.py\n", + "from nose.tools import assert_true\n", + "\n", + "\n", + "class TestTrie(object):\n", + "\n", + " def test_trie(self):\n", + " print('Test: Remove from empty trie')\n", + " trie = Trie()\n", + " assert_true(trie.remove('foo') is None)\n", + "\n", + " print('Test: Insert')\n", + " words = ['a', 'at', 'has', 'hat', 'he',\n", + " 'me', 'men', 'mens', 'met']\n", + " for word in words:\n", + " trie.insert(word)\n", + " for word in trie.list_words():\n", + " assert_true(trie.find(word) is not None)\n", + "\n", + " # Remove me\n", + " # Remove mens\n", + " # Remove a\n", + " \n", + " print('Test: Remove me')\n", + " trie.remove('me')\n", + " words_removed = ['me']\n", + " words = ['a', 'at', 'has', 'hat', 'he',\n", + " 'men', 'mens', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Test: Remove mens')\n", + " trie.remove('mens')\n", + " words_removed = ['me', 'mens']\n", + " words = ['a', 'at', 'has', 'hat', 'he',\n", + " 'men', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Test: Remove a')\n", + " trie.remove('a')\n", + " words_removed = ['a', 'me', 'mens']\n", + " words = ['at', 'has', 'hat', 'he',\n", + " 'men', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Test: Remove has')\n", + " trie.remove('has')\n", + " words_removed = ['a', 'has', 'me', 'mens']\n", + " words = ['at', 'hat', 'he',\n", + " 'men', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Success: test_trie')\n", + "\n", + "\n", + "def main():\n", + " test = TestTrie()\n", + " test.test_trie()\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Solution Notebook\n", + "\n", + "Review the [Solution Notebook](http://nbviewer.ipython.org/github/donnemartin/interactive-coding-challenges/blob/master/graphs_trees/trie/trie_solution.ipynb) for a discussion on algorithms and code solutions." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/graphs_trees/trie/trie_solution.ipynb b/graphs_trees/trie/trie_solution.ipynb new file mode 100644 index 0000000..aec6ba2 --- /dev/null +++ b/graphs_trees/trie/trie_solution.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/interactive-coding-challenges)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Solution Notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Problem: Implement a trie with find, insert, remove, and list_words methods.\n", + "\n", + "* [Constraints](#Constraints)\n", + "* [Test Cases](#Test-Cases)\n", + "* [Algorithm](#Algorithm)\n", + "* [Code](#Code)\n", + "* [Unit Test](#Unit-Test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constraints\n", + "\n", + "* Can we assume we are working with strings?\n", + " * Yes\n", + "* Are the strings in ASCII?\n", + " * Yes\n", + "* Should `find` only match exact words with a terminating character?\n", + " * Yes\n", + "* Should `list_words` only return words with a terminating character?\n", + " * Yes\n", + "* Can we assume this fits memory?\n", + " * Yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Cases\n", + "\n", + "
\n",
+    "\n",
+    "root node is denoted by ''\n",
+    "\n",
+    "         ''\n",
+    "       /  |  \\\n",
+    "      h   a*  m\n",
+    "     / \\   \\   \\\n",
+    "    a   e*  t*  e*\n",
+    "   / \\         / \\\n",
+    "  s*  t*      n*  t*\n",
+    "             /\n",
+    "            s*\n",
+    "\n",
+    "find\n",
+    "\n",
+    "* Find on an empty trie\n",
+    "* Find non-matching\n",
+    "* Find matching\n",
+    "\n",
+    "insert\n",
+    "\n",
+    "* Insert on empty trie\n",
+    "* Insert to make a leaf terminator char\n",
+    "* Insert to extend an existing terminator char\n",
+    "\n",
+    "remove\n",
+    "\n",
+    "* Remove me\n",
+    "* Remove mens\n",
+    "* Remove a\n",
+    "* Remove has\n",
+    "\n",
+    "list_words\n",
+    "\n",
+    "* List empty\n",
+    "* List general case\n",
+    "
\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithm\n", + "\n", + "### find\n", + "\n", + "* Set node to the root\n", + "* For each char in the input word\n", + " * Check the current node's children to see if it contains the char\n", + " * If a child has the char, set node to the child\n", + " * Else, return None\n", + "* Return the last child node if it has a terminator, else None\n", + "\n", + "Complexity:\n", + "* Time: O(m), where m is the length of the word\n", + "* Space: O(h) for the recursion depth (tree height), or O(1) if using an iterative approach\n", + "\n", + "### insert\n", + "\n", + "* set node to the root\n", + "* For each char in the input word\n", + " * Check the current node's children to see if it contains the char\n", + " * If a child has the char, set node to the child\n", + " * Else, insert a new node with the char\n", + " * Update children and parents\n", + "* Set the last node as a terminating node\n", + "\n", + "Complexity:\n", + "* Time: O(m), where m is the length of the word\n", + "* Space: O(h) for the recursion depth (tree height), or O(1) if using an iterative approach\n", + "\n", + "### remove\n", + "\n", + "* Determine the matching terminating node by calling the find method\n", + "* If the matching node has children, remove the terminator to prevent orphaning its children\n", + "* Set the parent node to the matching node's parent\n", + "* We'll be looping up the parent chain to propagate the delete\n", + "* While the parent is valid\n", + " * If the node has children\n", + " * Return to prevent orphaning its remaining children\n", + " * If the node is a terminating node and it isn't the original matching node from the find call\n", + " * Return to prevent deleting this additional valid word\n", + " * Remove the parent node's child entry matching the node\n", + " * Set the node to the parent\n", + " * Set the parent to the parent's parent\n", + "\n", + "Complexity:\n", + "* Time: O(m+h), where where m is the length of the word and h is the tree height\n", + "* Space: O(h) for the recursion depth (tree height), or O(1) if using an iterative approach\n", + "\n", + "### list_words\n", + "\n", + "* Do a pre-order traversal, passing down the current word\n", + " * When you reach a terminating node, add it to the list of results\n", + "\n", + "Complexity:\n", + "* Time: O(n)\n", + "* Space: O(h) for the recursion depth (tree height), or O(1) if using an iterative approach" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting trie.py\n" + ] + } + ], + "source": [ + "%%writefile trie.py\n", + "from collections import OrderedDict\n", + "\n", + "\n", + "class Node(object):\n", + "\n", + " def __init__(self, key, parent=None, terminates=False):\n", + " self.key = key\n", + " self.terminates = False\n", + " self.parent = parent\n", + " self.children = {}\n", + "\n", + "\n", + "class Trie(object):\n", + "\n", + " def __init__(self):\n", + " self.root = Node('')\n", + "\n", + " def find(self, word):\n", + " if word is None:\n", + " raise TypeError('word cannot be None')\n", + " node = self.root\n", + " for char in word:\n", + " if char in node.children:\n", + " node = node.children[char]\n", + " else:\n", + " return None\n", + " return node if node.terminates else None\n", + "\n", + " def insert(self, word):\n", + " if word is None:\n", + " raise TypeError('word cannot be None')\n", + " node = self.root\n", + " parent = None\n", + " for char in word:\n", + " if char in node.children:\n", + " node = node.children[char]\n", + " else:\n", + " node.children[char] = Node(char, parent=node)\n", + " node = node.children[char]\n", + " node.terminates = True\n", + "\n", + " def remove(self, word):\n", + " if word is None:\n", + " raise TypeError('word cannot be None')\n", + " node = self.find(word)\n", + " if node is None:\n", + " raise KeyError('word does not exist')\n", + " node.terminates = False\n", + " parent = node.parent\n", + " while parent is not None:\n", + " # As we are propagating the delete up the \n", + " # parents, if this node has children, stop\n", + " # here to prevent orphaning its children.\n", + " # Or\n", + " # if this node is a terminating node that is\n", + " # not the terminating node of the input word, \n", + " # stop to prevent removing the associated word.\n", + " if node.children or node.terminates:\n", + " return\n", + " del parent.children[node.key]\n", + " node = parent\n", + " parent = parent.parent\n", + "\n", + " def list_words(self):\n", + " result = []\n", + " curr_word = ''\n", + " self._list_words(self.root, curr_word, result)\n", + " return result\n", + "\n", + " def _list_words(self, node, curr_word, result):\n", + " if node is None:\n", + " return\n", + " for key, child in node.children.items():\n", + " if child.terminates:\n", + " result.append(curr_word + key)\n", + " self._list_words(child, curr_word + key, result)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%run trie.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unit Test" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting test_trie.py\n" + ] + } + ], + "source": [ + "%%writefile test_trie.py\n", + "from nose.tools import assert_true\n", + "from nose.tools import raises\n", + "\n", + "\n", + "class TestTrie(object): \n", + "\n", + " def test_trie(self):\n", + " trie = Trie()\n", + "\n", + " print('Test: Insert')\n", + " words = ['a', 'at', 'has', 'hat', 'he',\n", + " 'me', 'men', 'mens', 'met']\n", + " for word in words:\n", + " trie.insert(word)\n", + " for word in trie.list_words():\n", + " assert_true(trie.find(word) is not None)\n", + " \n", + " print('Test: Remove me')\n", + " trie.remove('me')\n", + " words_removed = ['me']\n", + " words = ['a', 'at', 'has', 'hat', 'he',\n", + " 'men', 'mens', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Test: Remove mens')\n", + " trie.remove('mens')\n", + " words_removed = ['me', 'mens']\n", + " words = ['a', 'at', 'has', 'hat', 'he',\n", + " 'men', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Test: Remove a')\n", + " trie.remove('a')\n", + " words_removed = ['a', 'me', 'mens']\n", + " words = ['at', 'has', 'hat', 'he',\n", + " 'men', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Test: Remove has')\n", + " trie.remove('has')\n", + " words_removed = ['a', 'has', 'me', 'mens']\n", + " words = ['at', 'hat', 'he',\n", + " 'men', 'met']\n", + " for word in words:\n", + " assert_true(trie.find(word) is not None)\n", + " for word in words_removed:\n", + " assert_true(trie.find(word) is None)\n", + "\n", + " print('Success: test_trie')\n", + "\n", + " @raises(Exception)\n", + " def test_trie_remove_invalid(self):\n", + " print('Test: Remove from empty trie')\n", + " trie = Trie()\n", + " assert_true(trie.remove('foo') is None) \n", + "\n", + "\n", + "def main():\n", + " test = TestTrie()\n", + " test.test_trie()\n", + " test.test_trie_remove_invalid()\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test: Insert\n", + "Test: Remove me\n", + "Test: Remove mens\n", + "Test: Remove a\n", + "Test: Remove has\n", + "Success: test_trie\n", + "Test: Remove from empty trie\n" + ] + } + ], + "source": [ + "%run -i test_trie.py" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}