Improve fenced code block detection for first pass.

In first pass, there may not be a trailing newline after a fenced code block yet. Make newline optional in isFenceLine when calling fencedCodeBlock to detect the fenced code block it anyway. This is more complex, but it avoids creating temporary buffers or modifying input in order to maintain performance (see #148). Document and rename fencedCode to fencedCodeBlock. Add regression tests. Fixes #279.
2016-07-15 14:51:15 -04:00 · 2016-07-15 14:51:15 -04:00 · a5812bb8f2
parent 0049676599
commit a5812bb8f2
3 changed files with 55 additions and 45 deletions
--- a/block.go
+++ b/block.go
@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) {
 		// }
 		// ```
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if i := p.fencedCode(out, data, true); i > 0 {
+			if i := p.fencedCodeBlock(out, data, true); i > 0 {
 				data = data[i:]
 				continue
 			}
@ -659,7 +659,10 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
 	return i + 1, marker // Take newline into account.
 }

-func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
+// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
+// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
+// If doRender is true, a final newline is mandatory to recognize the fenced code block.
+func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
 	var syntax string
 	beg, marker := isFenceLine(data, &syntax, "", true)
 	if beg == 0 || beg >= len(data) {
@ -672,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		// safe to assume beg < len(data)

 		// check for the end of the code block
-		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true)
+		newlineOptional := !doRender
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 		// irregardless of any contents inside it
 		for data[end] != '\n' {
 			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				if i := p.fencedCode(out, data[end:], false); i > 0 {
+				if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
 					end += i - 1
 					break
@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {

 		// if there's a fenced code block, paragraph is over
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if p.fencedCode(out, current, false) > 0 {
+			if p.fencedCodeBlock(out, current, false) > 0 {
 				p.renderParagraph(out, data[:i])
 				return i
 			}
--- a/block_test.go
+++ b/block_test.go
@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) {

 		"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
 		"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
+
+		"```\n[]:()\n```\n",
+		"<pre><code>[]:()\n</code></pre>\n",
+
+		"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
+		"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
 	}
 	doTestsBlock(t, tests, EXTENSION_FENCED_CODE)
 }
--- a/markdown.go
+++ b/markdown.go
@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
 }

 // first pass:
-// - extract references
-// - expand tabs
 // - normalize newlines
+// - extract references (outside of fenced code blocks)
+// - expand tabs (outside of fenced code blocks)
 // - copy everything else
 func firstPass(p *parser, input []byte) []byte {
 	var out bytes.Buffer
@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte {
 	if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
 		tabSize = TAB_SIZE_EIGHT
 	}
-	beg, end := 0, 0
+	beg := 0
 	lastFencedCodeBlockEnd := 0
-	for beg < len(input) { // iterate over lines
-		if end = isReference(p, input[beg:], tabSize); end > 0 {
-			beg += end
-		} else { // skip to the next line
-			end = beg
-			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
-				end++
-			}
-
-			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				// track fenced code block boundaries to suppress tab expansion
-				// inside them:
-				if beg >= lastFencedCodeBlockEnd {
-					if i := p.fencedCode(&out, input[beg:], false); i > 0 {
-						lastFencedCodeBlockEnd = beg + i
-					}
-				}
-			}
-
-			// add the line body if present
-			if end > beg {
-				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
-					out.Write(input[beg:end])
-				} else {
-					expandTabs(&out, input[beg:end], tabSize)
-				}
-			}
-			out.WriteByte('\n')
-
-			if end < len(input) && input[end] == '\r' {
-				end++
-			}
-			if end < len(input) && input[end] == '\n' {
-				end++
-			}
-
-			beg = end
+	for beg < len(input) {
+		// Find end of this line, then process the line.
+		end := beg
+		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
+			end++
 		}
+
+		if p.flags&EXTENSION_FENCED_CODE != 0 {
+			// track fenced code block boundaries to suppress tab expansion
+			// and reference extraction inside them:
+			if beg >= lastFencedCodeBlockEnd {
+				if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
+					lastFencedCodeBlockEnd = beg + i
+				}
+			}
+		}
+
+		// add the line body if present
+		if end > beg {
+			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
+				out.Write(input[beg:end])
+			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
+				beg += refEnd
+				continue
+			} else {
+				expandTabs(&out, input[beg:end], tabSize)
+			}
+		}
+
+		if end < len(input) && input[end] == '\r' {
+			end++
+		}
+		if end < len(input) && input[end] == '\n' {
+			end++
+		}
+		out.WriteByte('\n')
+
+		beg = end
 	}

 	// empty input?