From afa2f60483941f66c0f992a74e3e7b193161f218 Mon Sep 17 00:00:00 2001 From: Donne Martin Date: Wed, 24 Jun 2015 18:20:20 -0400 Subject: [PATCH] Reworked notebook: Added more detail to clarifying questions and test cases. Reworked algorithm discussion, code, and unit test. --- arrays-strings/compress.ipynb | 156 ++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 73 deletions(-) diff --git a/arrays-strings/compress.ipynb b/arrays-strings/compress.ipynb index 2e58357..e5b7225 100644 --- a/arrays-strings/compress.ipynb +++ b/arrays-strings/compress.ipynb @@ -18,7 +18,8 @@ "* [Algorithm: List](#Algorithm:-List)\n", "* [Code: List](#Code:-List)\n", "* [Algorithm: Byte Array](#Algorithm:-Byte-Array)\n", - "* [Code: Byte array](#Code:-Byte-Array)" + "* [Code: Byte array](#Code:-Byte-Array)\n", + "* [Unit Test](#Unit-Test)" ] }, { @@ -27,8 +28,11 @@ "source": [ "## Clarifying Questions\n", "\n", - "* Is the string ASCII (extended)? Or Unicode?\n", - " * ASCII extended, which is 256 characters\n", + "*Problem statements are sometimes intentionally ambiguous. Asking clarifying questions, identifying constraints, and stating assumptions help to ensure you code the intended solution.*\n", + "\n", + "* Can I assume the string is ASCII?\n", + " * Yes\n", + " * Note: Unicode strings could require special handling depending on your language\n", "* Can you use additional data structures? \n", " * Yes\n", "* Is this case sensitive?\n", @@ -43,69 +47,37 @@ "source": [ "## Test Cases\n", "\n", + "*Identifying and running through general and edge cases are important. You generally will not be asked to write a unit test like what is shown below.*\n", + "\n", "* NULL\n", "* '' -> ''\n", "* 'ABC' -> 'ABC'\n", "* 'AAABCCDDDD' -> 'A3B1C2D4'" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from nose.tools import assert_equal\n", - "\n", - "class Test(object):\n", - " def test_compress(self, func):\n", - " assert_equal(func(None), None)\n", - " assert_equal(func(''), '')\n", - " assert_equal(func('ABC'), 'ABC')\n", - " assert_equal(func('AAABCCDDDD'), 'A3B1C2D4')\n", - "\n", - "def run_tests(func):\n", - " test = Test()\n", - " test.test_compress(func)" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Algorithm: List\n", "\n", + "Since Python strings are immutable, we'll use a list of characters instead to exercise in-place string manipulation as you would get with a C string (which is null terminated, as seen in the diagram below). Python does not use a null-terminator.\n", + "\n", "![alt text](https://raw.githubusercontent.com/donnemartin/algorithms-data-structures/master/images/compress_string.jpg)\n", "\n", - "Since Python strings are immutable, we'll use a list of characters to exercise string manipulation. Note using a list vs a bytearray will will result in additional space to create the list and to convert the list to a string.\n", - "\n", - "* If string is empty return string\n", - "* count = 0\n", - "* size = 0\n", - "* last_char = first char in string\n", - "* For each char in string\n", - " * If char == last_char\n", - " count++\n", - " * Else\n", - " size += 2\n", - " count++\n", - " last_char = char\n", - "* size += 2\n", + "* Calculate the size of the compressed string\n", "* If the compressed string size is >= string size, return string\n", "* Create compressed_string\n", - "* For each char in string\n", - " * If char == last_char\n", - " count++\n", - " * Else\n", + " * For each char in string\n", + " * If char is the same as last_char, increment count\n", + " * Else\n", + " * Append last_char to compressed_string\n", + " * append count to compressed_string\n", + " * count = 1\n", + " * last_char = char\n", " * Append last_char to compressed_string\n", - " * append count to compressed_string\n", - " * count = 1\n", - " * last_char = char\n", - " * Append last_char to compressed_string\n", - " * append count to compressed_string\n", - "* return compressed_string\n", + " * Append count to compressed_string\n", + " * Return compressed_string\n", "\n", "Complexity:\n", "* Time: O(n)\n", @@ -121,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "collapsed": false }, @@ -130,19 +102,22 @@ "def compress_string(string):\n", " if string is None or len(string) == 0:\n", " return string\n", + " \n", + " # Calculate the size of the compressed string\n", " size = 0\n", - " count = 0\n", " last_char = string[0]\n", " for char in string:\n", - " if char == last_char:\n", - " count += 1\n", - " else:\n", + " if char != last_char:\n", " size += 2\n", - " count = 1\n", " last_char = char\n", " size += 2\n", + " \n", + " # If the compressed string size is greater than \n", + " # or equal to string size, return string\n", " if size >= len(string):\n", " return string\n", + "\n", + " # Create compressed_string\n", " compressed_string = list()\n", " count = 0\n", " last_char = string[0]\n", @@ -156,9 +131,7 @@ " last_char = char\n", " compressed_string.append(last_char)\n", " compressed_string.append(str(count))\n", - " return \"\".join(compressed_string)\n", - "\n", - "run_tests(compress_string)" + " return \"\".join(compressed_string)" ] }, { @@ -167,11 +140,7 @@ "source": [ "## Algorithm: Byte Array\n", "\n", - "![alt text](https://raw.githubusercontent.com/donnemartin/algorithms-data-structures/master/images/compress_string.jpg)\n", - "\n", - "Since Python strings are immutable, we'll use a bytearray to exercise array manipulation. As seen above, we could use a list of characters to create the compressed string then convert it to a string in the end, but this will result in additional space.\n", - "\n", - "The algorithm is the same, except we will need to work with the bytearray's character codes instead of the characters as we did above when we implemented this solution with a list.\n", + "The byte array algorithm similar when using a list, except we will need to work with the bytearray's character codes instead of the characters as we did above when we implemented this solution with a list.\n", "\n", "Complexity:\n", "* Time: O(n)\n", @@ -187,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "collapsed": false }, @@ -196,19 +165,22 @@ "def compress_string_alt(string):\n", " if string is None or len(string) == 0:\n", " return string\n", + " \n", + " # Calculate the size of the compressed string\n", " size = 0\n", - " count = 0\n", " last_char_code = string[0]\n", " for char_code in string:\n", - " if char_code == last_char_code:\n", - " count += 1\n", - " else:\n", + " if char_code != last_char_code:\n", " size += 2\n", - " count = 1\n", " last_char_code = char_code\n", " size += 2\n", + " \n", + " # If the compressed string size is greater than \n", + " # or equal to string size, return string \n", " if size >= len(string):\n", " return string\n", + " \n", + " # Create compressed_string\n", " compressed_string = bytearray(size)\n", " pos = 0\n", " count = 0\n", @@ -218,15 +190,53 @@ " count += 1\n", " else:\n", " compressed_string[pos] = last_char_code\n", - " compressed_string[pos + 1] = ord(str(count))\n", + " compressed_string[pos+1] = ord(str(count))\n", " pos += 2\n", " count = 1\n", " last_char_code = char_code\n", " compressed_string[pos] = last_char_code\n", - " compressed_string[pos + 1] = ord(str(count))\n", - " return compressed_string\n", + " compressed_string[pos+1] = ord(str(count))\n", + " return compressed_string" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unit Test" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Success: test_compress\n", + "Success: test_compress\n" + ] + } + ], + "source": [ + "from nose.tools import assert_equal\n", "\n", - "run_tests(compress_string_alt)" + "class Test(object):\n", + " def test_compress(self, func):\n", + " assert_equal(func(None), None)\n", + " assert_equal(func(''), '')\n", + " assert_equal(func('ABC'), 'ABC')\n", + " assert_equal(func('AAABCCDDDD'), 'A3B1C2D4')\n", + " print('Success: test_compress')\n", + "\n", + "if __name__ == '__main__':\n", + " test = Test()\n", + " test.test_compress(compress_string)\n", + " test.test_compress(compress_string_alt)" ] } ],