From 92590ff3b6bb2205b8c9ec3a8e006caf6f7d8ef8 Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Tue, 28 Mar 2017 05:02:34 -0400 Subject: [PATCH] Add longest common substring challenge --- .../longest_common_substring/__init__.py | 0 .../longest_common_substr_challenge.ipynb | 177 ++++++++++++ .../longest_common_substr_solution.ipynb | 252 ++++++++++++++++++ .../test_longest_common_substr.py | 23 ++ 4 files changed, 452 insertions(+) create mode 100644 recursion_dynamic/longest_common_substring/__init__.py create mode 100644 recursion_dynamic/longest_common_substring/longest_common_substr_challenge.ipynb create mode 100644 recursion_dynamic/longest_common_substring/longest_common_substr_solution.ipynb create mode 100644 recursion_dynamic/longest_common_substring/test_longest_common_substr.py diff --git a/recursion_dynamic/longest_common_substring/__init__.py b/recursion_dynamic/longest_common_substring/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/recursion_dynamic/longest_common_substring/longest_common_substr_challenge.ipynb b/recursion_dynamic/longest_common_substring/longest_common_substr_challenge.ipynb new file mode 100644 index 0000000..77d4218 --- /dev/null +++ b/recursion_dynamic/longest_common_substring/longest_common_substr_challenge.ipynb @@ -0,0 +1,177 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/interactive-coding-challenges)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Challenge Notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Problem: Given two strings, find the longest common substring.\n", + "\n", + "* [Constraints](#Constraints)\n", + "* [Test Cases](#Test-Cases)\n", + "* [Algorithm](#Algorithm)\n", + "* [Code](#Code)\n", + "* [Unit Test](#Unit-Test)\n", + "* [Solution Notebook](#Solution-Notebook)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constraints\n", + "\n", + "* Can we assume the inputs are valid?\n", + " * No\n", + "* Can we assume the strings are ASCII?\n", + " * Yes\n", + "* Is this case sensitive?\n", + " * Yes\n", + "* Is a substring a contiguous block of chars?\n", + " * Yes\n", + "* Do we expect a string as a result?\n", + " * Yes\n", + "* Can we assume this fits memory?\n", + " * Yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Cases\n", + "\n", + "* str0 or str1 is None -> Exception\n", + "* str0 or str1 equals 0 -> ''\n", + "* General case\n", + "\n", + "str0 = 'ABCDEFGHIJ'\n", + "str1 = 'FOOBCDBCDE'\n", + "\n", + "result: 'BCDE'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithm\n", + "\n", + "Refer to the [Solution Notebook](). If you are stuck and need a hint, the solution notebook's algorithm discussion might be a good place to start." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class StringCompare(object):\n", + "\n", + " def longest_common_substr(self, str0, str1):\n", + " # TODO: Implement me\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unit Test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**The following unit test is expected to fail until you solve the challenge.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# %load test_longest_common_substr.py\n", + "from nose.tools import assert_equal, assert_raises\n", + "\n", + "\n", + "class TestLongestCommonSubstr(object):\n", + "\n", + " def test_longest_common_substr(self):\n", + " str_comp = StringCompare()\n", + " assert_raises(TypeError, str_comp.longest_common_substr, None, None)\n", + " assert_equal(str_comp.longest_common_substr('', ''), '')\n", + " str0 = 'ABCDEFGHIJ'\n", + " str1 = 'FOOBCDBCDE'\n", + " expected = 'BCDE'\n", + " assert_equal(str_comp.longest_common_substr(str0, str1), expected)\n", + " print('Success: test_longest_common_substr')\n", + "\n", + "\n", + "def main():\n", + " test = TestLongestCommonSubstr()\n", + " test.test_longest_common_substr()\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Solution Notebook\n", + "\n", + "Review the [Solution Notebook]() for a discussion on algorithms and code solutions." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/recursion_dynamic/longest_common_substring/longest_common_substr_solution.ipynb b/recursion_dynamic/longest_common_substring/longest_common_substr_solution.ipynb new file mode 100644 index 0000000..4c90b2b --- /dev/null +++ b/recursion_dynamic/longest_common_substring/longest_common_substr_solution.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/interactive-coding-challenges)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Solution Notebook" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Problem: Given two strings, find the longest common substring.\n", + "\n", + "* [Constraints](#Constraints)\n", + "* [Test Cases](#Test-Cases)\n", + "* [Algorithm](#Algorithm)\n", + "* [Code](#Code)\n", + "* [Unit Test](#Unit-Test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constraints\n", + "\n", + "* Can we assume the inputs are valid?\n", + " * No\n", + "* Can we assume the strings are ASCII?\n", + " * Yes\n", + "* Is this case sensitive?\n", + " * Yes\n", + "* Is a substring a contiguous block of chars?\n", + " * Yes\n", + "* Do we expect a string as a result?\n", + " * Yes\n", + "* Can we assume this fits memory?\n", + " * Yes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Cases\n", + "\n", + "* str0 or str1 is None -> Exception\n", + "* str0 or str1 equals 0 -> ''\n", + "* General case\n", + "\n", + "str0 = 'ABCDEFGHIJ'\n", + "str1 = 'FOOBCDBCDE'\n", + "\n", + "result: 'BCDE'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Algorithm\n", + "\n", + "We'll use bottom up dynamic programming to build a table. \n", + "\n", + "
\n",
+    "\n",
+    "The rows (i) represent str0.\n",
+    "The columns (j) represent str1.\n",
+    "\n",
+    "                       str1\n",
+    "  -------------------------------------------------\n",
+    "  |   |   | A | B | C | D | E | F | G | H | I | J |\n",
+    "  -------------------------------------------------\n",
+    "  |   | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
+    "  | F | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |\n",
+    "  | O | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |\n",
+    "s | O | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |\n",
+    "t | B | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |\n",
+    "r | C | 0 | 0 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |\n",
+    "0 | D | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
+    "  | B | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
+    "  | C | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
+    "  | D | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
+    "  | E | 0 | 0 | 1 | 2 | 3 | 4 | 4 | 4 | 4 | 4 | 4 |\n",
+    "  -------------------------------------------------\n",
+    "\n",
+    "if str1[j] != str0[i]:\n",
+    "    T[i][j] = max(\n",
+    "        T[i][j - 1],\n",
+    "        T[i - 1][j])\n",
+    "else:\n",
+    "    T[i][j] = T[i - 1][j - 1] + 1\n",
+    "
\n", + "\n", + "Complexity:\n", + "* Time: O(m * n), where m is the length of str0 and n is the length of str1\n", + "* Space: O(m * n), where m is the length of str0 and n is the length of str1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "class StringCompare(object):\n", + "\n", + " def longest_common_substr(self, str0, str1):\n", + " if str0 is None or str1 is None:\n", + " raise TypeError('str input cannot be None')\n", + " # Add one to number of rows and cols for the dp table's\n", + " # first row of 0's and first col of 0's\n", + " num_rows = len(str0) + 1\n", + " num_cols = len(str1) + 1\n", + " T = [[None] * num_cols for _ in range(num_rows)]\n", + " for i in range(num_rows):\n", + " for j in range(num_cols):\n", + " if i == 0 or j == 0:\n", + " T[i][j] = 0\n", + " elif str0[j - 1] != str1[i - 1]:\n", + " T[i][j] = max(T[i][j - 1],\n", + " T[i - 1][j])\n", + " else:\n", + " T[i][j] = T[i - 1][j - 1] + 1\n", + " results = ''\n", + " i = num_rows - 1\n", + " j = num_cols - 1\n", + " # Walk backwards to determine the substring\n", + " while T[i][j]:\n", + " if T[i][j] == T[i][j - 1]:\n", + " j -= 1\n", + " elif T[i][j] == T[i - 1][j]:\n", + " i -= 1\n", + " elif T[i][j] == T[i - 1][j - 1] + 1:\n", + " results += str1[i - 1]\n", + " i -= 1\n", + " j -= 1\n", + " else:\n", + " raise Exception('Error constructing table')\n", + " # Walking backwards results in a string in reverse order\n", + " return results[::-1] " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unit Test" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting test_longest_common_substr.py\n" + ] + } + ], + "source": [ + "%%writefile test_longest_common_substr.py\n", + "from nose.tools import assert_equal, assert_raises\n", + "\n", + "\n", + "class TestLongestCommonSubstr(object):\n", + "\n", + " def test_longest_common_substr(self):\n", + " str_comp = StringCompare()\n", + " assert_raises(TypeError, str_comp.longest_common_substr, None, None)\n", + " assert_equal(str_comp.longest_common_substr('', ''), '')\n", + " str0 = 'ABCDEFGHIJ'\n", + " str1 = 'FOOBCDBCDE'\n", + " expected = 'BCDE'\n", + " assert_equal(str_comp.longest_common_substr(str0, str1), expected)\n", + " print('Success: test_longest_common_substr')\n", + "\n", + "\n", + "def main():\n", + " test = TestLongestCommonSubstr()\n", + " test.test_longest_common_substr()\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Success: test_longest_common_substr\n" + ] + } + ], + "source": [ + "%run -i test_longest_common_substr.py" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/recursion_dynamic/longest_common_substring/test_longest_common_substr.py b/recursion_dynamic/longest_common_substring/test_longest_common_substr.py new file mode 100644 index 0000000..8af8fe7 --- /dev/null +++ b/recursion_dynamic/longest_common_substring/test_longest_common_substr.py @@ -0,0 +1,23 @@ +from nose.tools import assert_equal, assert_raises + + +class TestLongestCommonSubstr(object): + + def test_longest_common_substr(self): + str_comp = StringCompare() + assert_raises(TypeError, str_comp.longest_common_substr, None, None) + assert_equal(str_comp.longest_common_substr('', ''), '') + str0 = 'ABCDEFGHIJ' + str1 = 'FOOBCDBCDE' + expected = 'BCDE' + assert_equal(str_comp.longest_common_substr(str0, str1), expected) + print('Success: test_longest_common_substr') + + +def main(): + test = TestLongestCommonSubstr() + test.test_longest_common_substr() + + +if __name__ == '__main__': + main() \ No newline at end of file