From a5812bb8f2987abe2294beaf65c08baaceb2969f Mon Sep 17 00:00:00 2001
From: Dmitri Shuralyov <shurcooL@gmail.com>
Date: Fri, 15 Jul 2016 14:51:15 -0400
Subject: [PATCH] Improve fenced code block detection for first pass.

In first pass, there may not be a trailing newline after a fenced code
block yet. Make newline optional in isFenceLine when calling
fencedCodeBlock to detect the fenced code block it anyway. This is more
complex, but it avoids creating temporary buffers or modifying input in
order to maintain performance (see #148).

Document and rename fencedCode to fencedCodeBlock.

Add regression tests.

Fixes #279.
---
 block.go      | 14 +++++----
 block_test.go |  6 ++++
 markdown.go   | 80 +++++++++++++++++++++++++--------------------------
 3 files changed, 55 insertions(+), 45 deletions(-)
diff --git a/block.go b/block.go
index 021a54d..b237c7e 100644
--- a/block.go
+++ b/block.go
@@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) {
 		// }
 		// ```
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if i := p.fencedCode(out, data, true); i > 0 {
+			if i := p.fencedCodeBlock(out, data, true); i > 0 {
 				data = data[i:]
 				continue
 			}
@@ -659,7 +659,10 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
 	return i + 1, marker // Take newline into account.
 }
 
-func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
+// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
+// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
+// If doRender is true, a final newline is mandatory to recognize the fenced code block.
+func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
 	var syntax string
 	beg, marker := isFenceLine(data, &syntax, "", true)
 	if beg == 0 || beg >= len(data) {
@@ -672,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		// safe to assume beg < len(data)
 
 		// check for the end of the code block
-		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true)
+		newlineOptional := !doRender
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 		// irregardless of any contents inside it
 		for data[end] != '\n' {
 			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				if i := p.fencedCode(out, data[end:], false); i > 0 {
+				if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
 					end += i - 1
 					break
@@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
 
 		// if there's a fenced code block, paragraph is over
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if p.fencedCode(out, current, false) > 0 {
+			if p.fencedCodeBlock(out, current, false) > 0 {
 				p.renderParagraph(out, data[:i])
 				return i
 			}
diff --git a/block_test.go b/block_test.go
index a20f5df..6170e56 100644
--- a/block_test.go
+++ b/block_test.go
@@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) {
 
 		"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
 		"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
+
+		"```\n[]:()\n```\n",
+		"<pre><code>[]:()\n</code></pre>\n",
+
+		"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
+		"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
 	}
 	doTestsBlock(t, tests, EXTENSION_FENCED_CODE)
 }
diff --git a/markdown.go b/markdown.go
index aea997a..58ba68d 100644
--- a/markdown.go
+++ b/markdown.go
@@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
 }
 
 // first pass:
-// - extract references
-// - expand tabs
 // - normalize newlines
+// - extract references (outside of fenced code blocks)
+// - expand tabs (outside of fenced code blocks)
 // - copy everything else
 func firstPass(p *parser, input []byte) []byte {
 	var out bytes.Buffer
@@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte {
 	if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
 		tabSize = TAB_SIZE_EIGHT
 	}
-	beg, end := 0, 0
+	beg := 0
 	lastFencedCodeBlockEnd := 0
-	for beg < len(input) { // iterate over lines
-		if end = isReference(p, input[beg:], tabSize); end > 0 {
-			beg += end
-		} else { // skip to the next line
-			end = beg
-			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
-				end++
-			}
-
-			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				// track fenced code block boundaries to suppress tab expansion
-				// inside them:
-				if beg >= lastFencedCodeBlockEnd {
-					if i := p.fencedCode(&out, input[beg:], false); i > 0 {
-						lastFencedCodeBlockEnd = beg + i
-					}
-				}
-			}
-
-			// add the line body if present
-			if end > beg {
-				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
-					out.Write(input[beg:end])
-				} else {
-					expandTabs(&out, input[beg:end], tabSize)
-				}
-			}
-			out.WriteByte('\n')
-
-			if end < len(input) && input[end] == '\r' {
-				end++
-			}
-			if end < len(input) && input[end] == '\n' {
-				end++
-			}
-
-			beg = end
+	for beg < len(input) {
+		// Find end of this line, then process the line.
+		end := beg
+		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
+			end++
 		}
+
+		if p.flags&EXTENSION_FENCED_CODE != 0 {
+			// track fenced code block boundaries to suppress tab expansion
+			// and reference extraction inside them:
+			if beg >= lastFencedCodeBlockEnd {
+				if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
+					lastFencedCodeBlockEnd = beg + i
+				}
+			}
+		}
+
+		// add the line body if present
+		if end > beg {
+			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
+				out.Write(input[beg:end])
+			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
+				beg += refEnd
+				continue
+			} else {
+				expandTabs(&out, input[beg:end], tabSize)
+			}
+		}
+
+		if end < len(input) && input[end] == '\r' {
+			end++
+		}
+		if end < len(input) && input[end] == '\n' {
+			end++
+		}
+		out.WriteByte('\n')
+
+		beg = end
 	}
 
 	// empty input?