mirror of
https://github.com/donnemartin/interactive-coding-challenges.git
synced 2024-03-22 13:11:13 +08:00
Add longest common substring challenge
This commit is contained in:
parent
f6abe150e3
commit
92590ff3b6
|
@ -0,0 +1,177 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/interactive-coding-challenges)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Challenge Notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Problem: Given two strings, find the longest common substring.\n",
|
||||
"\n",
|
||||
"* [Constraints](#Constraints)\n",
|
||||
"* [Test Cases](#Test-Cases)\n",
|
||||
"* [Algorithm](#Algorithm)\n",
|
||||
"* [Code](#Code)\n",
|
||||
"* [Unit Test](#Unit-Test)\n",
|
||||
"* [Solution Notebook](#Solution-Notebook)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Constraints\n",
|
||||
"\n",
|
||||
"* Can we assume the inputs are valid?\n",
|
||||
" * No\n",
|
||||
"* Can we assume the strings are ASCII?\n",
|
||||
" * Yes\n",
|
||||
"* Is this case sensitive?\n",
|
||||
" * Yes\n",
|
||||
"* Is a substring a contiguous block of chars?\n",
|
||||
" * Yes\n",
|
||||
"* Do we expect a string as a result?\n",
|
||||
" * Yes\n",
|
||||
"* Can we assume this fits memory?\n",
|
||||
" * Yes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test Cases\n",
|
||||
"\n",
|
||||
"* str0 or str1 is None -> Exception\n",
|
||||
"* str0 or str1 equals 0 -> ''\n",
|
||||
"* General case\n",
|
||||
"\n",
|
||||
"str0 = 'ABCDEFGHIJ'\n",
|
||||
"str1 = 'FOOBCDBCDE'\n",
|
||||
"\n",
|
||||
"result: 'BCDE'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Algorithm\n",
|
||||
"\n",
|
||||
"Refer to the [Solution Notebook](). If you are stuck and need a hint, the solution notebook's algorithm discussion might be a good place to start."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class StringCompare(object):\n",
|
||||
"\n",
|
||||
" def longest_common_substr(self, str0, str1):\n",
|
||||
" # TODO: Implement me\n",
|
||||
" pass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Unit Test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**The following unit test is expected to fail until you solve the challenge.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %load test_longest_common_substr.py\n",
|
||||
"from nose.tools import assert_equal, assert_raises\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class TestLongestCommonSubstr(object):\n",
|
||||
"\n",
|
||||
" def test_longest_common_substr(self):\n",
|
||||
" str_comp = StringCompare()\n",
|
||||
" assert_raises(TypeError, str_comp.longest_common_substr, None, None)\n",
|
||||
" assert_equal(str_comp.longest_common_substr('', ''), '')\n",
|
||||
" str0 = 'ABCDEFGHIJ'\n",
|
||||
" str1 = 'FOOBCDBCDE'\n",
|
||||
" expected = 'BCDE'\n",
|
||||
" assert_equal(str_comp.longest_common_substr(str0, str1), expected)\n",
|
||||
" print('Success: test_longest_common_substr')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def main():\n",
|
||||
" test = TestLongestCommonSubstr()\n",
|
||||
" test.test_longest_common_substr()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"if __name__ == '__main__':\n",
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Solution Notebook\n",
|
||||
"\n",
|
||||
"Review the [Solution Notebook]() for a discussion on algorithms and code solutions."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1,252 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook was prepared by [Donne Martin](https://github.com/donnemartin). Source and license info is on [GitHub](https://github.com/donnemartin/interactive-coding-challenges)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Solution Notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Problem: Given two strings, find the longest common substring.\n",
|
||||
"\n",
|
||||
"* [Constraints](#Constraints)\n",
|
||||
"* [Test Cases](#Test-Cases)\n",
|
||||
"* [Algorithm](#Algorithm)\n",
|
||||
"* [Code](#Code)\n",
|
||||
"* [Unit Test](#Unit-Test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Constraints\n",
|
||||
"\n",
|
||||
"* Can we assume the inputs are valid?\n",
|
||||
" * No\n",
|
||||
"* Can we assume the strings are ASCII?\n",
|
||||
" * Yes\n",
|
||||
"* Is this case sensitive?\n",
|
||||
" * Yes\n",
|
||||
"* Is a substring a contiguous block of chars?\n",
|
||||
" * Yes\n",
|
||||
"* Do we expect a string as a result?\n",
|
||||
" * Yes\n",
|
||||
"* Can we assume this fits memory?\n",
|
||||
" * Yes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test Cases\n",
|
||||
"\n",
|
||||
"* str0 or str1 is None -> Exception\n",
|
||||
"* str0 or str1 equals 0 -> ''\n",
|
||||
"* General case\n",
|
||||
"\n",
|
||||
"str0 = 'ABCDEFGHIJ'\n",
|
||||
"str1 = 'FOOBCDBCDE'\n",
|
||||
"\n",
|
||||
"result: 'BCDE'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Algorithm\n",
|
||||
"\n",
|
||||
"We'll use bottom up dynamic programming to build a table. \n",
|
||||
"\n",
|
||||
"<pre>\n",
|
||||
"\n",
|
||||
"The rows (i) represent str0.\n",
|
||||
"The columns (j) represent str1.\n",
|
||||
"\n",
|
||||
" str1\n",
|
||||
" -------------------------------------------------\n",
|
||||
" | | | A | B | C | D | E | F | G | H | I | J |\n",
|
||||
" -------------------------------------------------\n",
|
||||
" | | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |\n",
|
||||
" | F | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |\n",
|
||||
" | O | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |\n",
|
||||
"s | O | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 |\n",
|
||||
"t | B | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |\n",
|
||||
"r | C | 0 | 0 | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |\n",
|
||||
"0 | D | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
|
||||
" | B | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
|
||||
" | C | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
|
||||
" | D | 0 | 0 | 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |\n",
|
||||
" | E | 0 | 0 | 1 | 2 | 3 | 4 | 4 | 4 | 4 | 4 | 4 |\n",
|
||||
" -------------------------------------------------\n",
|
||||
"\n",
|
||||
"if str1[j] != str0[i]:\n",
|
||||
" T[i][j] = max(\n",
|
||||
" T[i][j - 1],\n",
|
||||
" T[i - 1][j])\n",
|
||||
"else:\n",
|
||||
" T[i][j] = T[i - 1][j - 1] + 1\n",
|
||||
"</pre>\n",
|
||||
"\n",
|
||||
"Complexity:\n",
|
||||
"* Time: O(m * n), where m is the length of str0 and n is the length of str1\n",
|
||||
"* Space: O(m * n), where m is the length of str0 and n is the length of str1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Code"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class StringCompare(object):\n",
|
||||
"\n",
|
||||
" def longest_common_substr(self, str0, str1):\n",
|
||||
" if str0 is None or str1 is None:\n",
|
||||
" raise TypeError('str input cannot be None')\n",
|
||||
" # Add one to number of rows and cols for the dp table's\n",
|
||||
" # first row of 0's and first col of 0's\n",
|
||||
" num_rows = len(str0) + 1\n",
|
||||
" num_cols = len(str1) + 1\n",
|
||||
" T = [[None] * num_cols for _ in range(num_rows)]\n",
|
||||
" for i in range(num_rows):\n",
|
||||
" for j in range(num_cols):\n",
|
||||
" if i == 0 or j == 0:\n",
|
||||
" T[i][j] = 0\n",
|
||||
" elif str0[j - 1] != str1[i - 1]:\n",
|
||||
" T[i][j] = max(T[i][j - 1],\n",
|
||||
" T[i - 1][j])\n",
|
||||
" else:\n",
|
||||
" T[i][j] = T[i - 1][j - 1] + 1\n",
|
||||
" results = ''\n",
|
||||
" i = num_rows - 1\n",
|
||||
" j = num_cols - 1\n",
|
||||
" # Walk backwards to determine the substring\n",
|
||||
" while T[i][j]:\n",
|
||||
" if T[i][j] == T[i][j - 1]:\n",
|
||||
" j -= 1\n",
|
||||
" elif T[i][j] == T[i - 1][j]:\n",
|
||||
" i -= 1\n",
|
||||
" elif T[i][j] == T[i - 1][j - 1] + 1:\n",
|
||||
" results += str1[i - 1]\n",
|
||||
" i -= 1\n",
|
||||
" j -= 1\n",
|
||||
" else:\n",
|
||||
" raise Exception('Error constructing table')\n",
|
||||
" # Walking backwards results in a string in reverse order\n",
|
||||
" return results[::-1] "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Unit Test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting test_longest_common_substr.py\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile test_longest_common_substr.py\n",
|
||||
"from nose.tools import assert_equal, assert_raises\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class TestLongestCommonSubstr(object):\n",
|
||||
"\n",
|
||||
" def test_longest_common_substr(self):\n",
|
||||
" str_comp = StringCompare()\n",
|
||||
" assert_raises(TypeError, str_comp.longest_common_substr, None, None)\n",
|
||||
" assert_equal(str_comp.longest_common_substr('', ''), '')\n",
|
||||
" str0 = 'ABCDEFGHIJ'\n",
|
||||
" str1 = 'FOOBCDBCDE'\n",
|
||||
" expected = 'BCDE'\n",
|
||||
" assert_equal(str_comp.longest_common_substr(str0, str1), expected)\n",
|
||||
" print('Success: test_longest_common_substr')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def main():\n",
|
||||
" test = TestLongestCommonSubstr()\n",
|
||||
" test.test_longest_common_substr()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"if __name__ == '__main__':\n",
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Success: test_longest_common_substr\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%run -i test_longest_common_substr.py"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.4.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
from nose.tools import assert_equal, assert_raises
|
||||
|
||||
|
||||
class TestLongestCommonSubstr(object):
|
||||
|
||||
def test_longest_common_substr(self):
|
||||
str_comp = StringCompare()
|
||||
assert_raises(TypeError, str_comp.longest_common_substr, None, None)
|
||||
assert_equal(str_comp.longest_common_substr('', ''), '')
|
||||
str0 = 'ABCDEFGHIJ'
|
||||
str1 = 'FOOBCDBCDE'
|
||||
expected = 'BCDE'
|
||||
assert_equal(str_comp.longest_common_substr(str0, str1), expected)
|
||||
print('Success: test_longest_common_substr')
|
||||
|
||||
|
||||
def main():
|
||||
test = TestLongestCommonSubstr()
|
||||
test.test_longest_common_substr()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user