Merge pull request #282 from russross/v2-fix-279

v2: Fix issue in fenced code block pre-processing.
2024-03-22 13:40:34 +08:00 · 2016-07-16 10:25:34 +03:00 · 2016-07-16 10:25:34 +03:00 · fd97b7d32f
commit fd97b7d32f
parent 6291a00f2f 2560c5f148
5 changed files with 205 additions and 77 deletions
--- a/block.go
+++ b/block.go
@ -115,7 +115,7 @@ func (p *parser) block(data []byte) {
 		// }
 		// ```
 		if p.flags&FencedCode != 0 {
-			if i := p.fencedCode(data, true); i > 0 {
+			if i := p.fencedCodeBlock(data, true); i > 0 {
 				data = data[i:]
 				continue
 			}
@ -526,7 +526,7 @@ func (p *parser) htmlFindEnd(tag string, data []byte) int {
 	return i + skip
 }

-func (p *parser) isEmpty(data []byte) int {
+func (*parser) isEmpty(data []byte) int {
 	// it is okay to call isEmpty on an empty buffer
 	if len(data) == 0 {
 		return 0
@ -541,7 +541,7 @@ func (p *parser) isEmpty(data []byte) int {
 	return i + 1
 }

-func (p *parser) isHRule(data []byte) bool {
+func (*parser) isHRule(data []byte) bool {
 	i := 0

 	// skip up to three spaces
@ -570,21 +570,24 @@ func (p *parser) isHRule(data []byte) bool {
 	return n >= 3
 }

-func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
+// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
+// and returns the end index if so, or 0 otherwise. It also returns the marker found.
+// If syntax is not nil, it gets set to the syntax specified in the fence line.
+// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
+func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 	i, size := 0, 0
-	skip = 0

 	// skip up to three spaces
 	for i < len(data) && i < 3 && data[i] == ' ' {
 		i++
 	}
-	if i >= len(data) {
-		return
-	}

 	// check for the marker characters: ~ or `
+	if i >= len(data) {
+		return 0, ""
+	}
 	if data[i] != '~' && data[i] != '`' {
-		return
+		return 0, ""
 	}

 	c := data[i]
@ -595,27 +598,28 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 		i++
 	}

-	if i >= len(data) {
-		return
-	}
-
 	// the marker char must occur at least 3 times
 	if size < 3 {
-		return
+		return 0, ""
 	}
 	marker = string(data[i-size : i])

 	// if this is the end marker, it must match the beginning marker
 	if oldmarker != "" && marker != oldmarker {
-		return
+		return 0, ""
 	}

+	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
+	// into one, always get the syntax, and discard it if the caller doesn't care.
 	if syntax != nil {
 		syn := 0
 		i = skipChar(data, i, ' ')

 		if i >= len(data) {
-			return
+			if newlineOptional && i == len(data) {
+				return i, marker
+			}
+			return 0, ""
 		}

 		syntaxStart := i
@ -630,7 +634,7 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 			}

 			if i >= len(data) || data[i] != '}' {
-				return
+				return 0, ""
 			}

 			// strip all whitespace at the beginning and the end
@ -652,37 +656,40 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 			}
 		}

-		language := string(data[syntaxStart : syntaxStart+syn])
-		*syntax = &language
+		*syntax = string(data[syntaxStart : syntaxStart+syn])
 	}

 	i = skipChar(data, i, ' ')
 	if i >= len(data) || data[i] != '\n' {
-		return
+		if newlineOptional && i == len(data) {
+			return i, marker
+		}
+		return 0, ""
 	}

-	skip = i + 1
-	return
+	return i + 1, marker // Take newline into account.
 }

-func (p *parser) fencedCode(data []byte, doRender bool) int {
-	var lang *string
-	beg, marker := p.isFencedCode(data, &lang, "")
+// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
+// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
+// If doRender is true, a final newline is mandatory to recognize the fenced code block.
+func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
+	var syntax string
+	beg, marker := isFenceLine(data, &syntax, "", false)
 	if beg == 0 || beg >= len(data) {
 		return 0
 	}

 	var work bytes.Buffer
-	if lang != nil {
-		work.Write([]byte(*lang))
-		work.WriteByte('\n')
-	}
+	work.Write([]byte(syntax))
+	work.WriteByte('\n')

 	for {
 		// safe to assume beg < len(data)

 		// check for the end of the code block
-		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
+		newlineOptional := !doRender
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@ -703,11 +710,6 @@ func (p *parser) fencedCode(data []byte, doRender bool) int {
 		beg = end
 	}

-	//syntax := ""
-	//if lang != nil {
-	//	syntax = *lang
-	//}
-
 	if doRender {
 		block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
 		block.IsFenced = true
@ -972,7 +974,7 @@ func (p *parser) quote(data []byte) int {
 		// irregardless of any contents inside it
 		for data[end] != '\n' {
 			if p.flags&FencedCode != 0 {
-				if i := p.fencedCode(data[end:], false); i > 0 {
+				if i := p.fencedCodeBlock(data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
 					end += i - 1
 					break
@ -1451,7 +1453,7 @@ func (p *parser) paragraph(data []byte) int {

 		// if there's a fenced code block, paragraph is over
 		if p.flags&FencedCode != 0 {
-			if p.fencedCode(current, false) > 0 {
+			if p.fencedCodeBlock(current, false) > 0 {
 				p.renderParagraph(data[:i])
 				return i
 			}
--- a/block_test.go
+++ b/block_test.go
@ -1011,6 +1011,12 @@ func TestFencedCodeBlock(t *testing.T) {

 		"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
 		"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
+
+		"```\n[]:()\n```\n",
+		"<pre><code>[]:()\n</code></pre>\n",
+
+		"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
+		"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
 	}
 	doTestsBlock(t, tests, FencedCode)
 }
@ -1579,3 +1585,74 @@ func TestCompletePage(t *testing.T) {
 	}
 	doTestsParam(t, tests, TestParams{HTMLFlags: UseXHTML | CompletePage})
 }
+
+func TestIsFenceLine(t *testing.T) {
+	tests := []struct {
+		data            []byte
+		syntaxRequested bool
+		newlineOptional bool
+		wantEnd         int
+		wantMarker      string
+		wantSyntax      string
+	}{
+		{
+			data:    []byte("```"),
+			wantEnd: 0,
+		},
+		{
+			data:       []byte("```\nstuff here\n"),
+			wantEnd:    4,
+			wantMarker: "```",
+		},
+		{
+			data:            []byte("```\nstuff here\n"),
+			syntaxRequested: true,
+			wantEnd:         4,
+			wantMarker:      "```",
+		},
+		{
+			data:    []byte("stuff here\n```\n"),
+			wantEnd: 0,
+		},
+		{
+			data:            []byte("```"),
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("```"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         3,
+			wantMarker:      "```",
+		},
+		{
+			data:            []byte("``` go"),
+			syntaxRequested: true,
+			newlineOptional: true,
+			wantEnd:         6,
+			wantMarker:      "```",
+			wantSyntax:      "go",
+		},
+	}
+
+	for _, test := range tests {
+		var syntax *string
+		if test.syntaxRequested {
+			syntax = new(string)
+		}
+		end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
+		if got, want := end, test.wantEnd; got != want {
+			t.Errorf("got end %v, want %v", got, want)
+		}
+		if got, want := marker, test.wantMarker; got != want {
+			t.Errorf("got marker %q, want %q", got, want)
+		}
+		if test.syntaxRequested {
+			if got, want := *syntax, test.wantSyntax; got != want {
+				t.Errorf("got syntax %q, want %q", got, want)
+			}
+		}
+	}
+}
--- a/helpers_test.go
+++ b/helpers_test.go
@ -49,6 +49,17 @@ func runMarkdown(input string, params TestParams) string {
 	return string(Markdown([]byte(input), renderer, params.Options))
 }

+// doTests runs full document tests using MarkdownCommon configuration.
+func doTests(t *testing.T, tests []string) {
+	doTestsParam(t, tests, TestParams{
+		Options: DefaultOptions,
+		HTMLRendererParameters: HTMLRendererParameters{
+			Flags:      CommonHtmlFlags,
+			Extensions: CommonExtensions,
+		},
+	})
+}
+
 func doTestsBlock(t *testing.T, tests []string, extensions Extensions) {
 	doTestsParam(t, tests, TestParams{
 		Options:   Options{Extensions: extensions},
--- a/markdown.go
+++ b/markdown.go
@ -526,9 +526,9 @@ func (p *parser) parseRefsToAST() {
 }

 // first pass:
-// - extract references
-// - expand tabs
 // - normalize newlines
+// - extract references (outside of fenced code blocks)
+// - expand tabs (outside of fenced code blocks)
 // - copy everything else
 func firstPass(p *parser, input []byte) []byte {
 	var out bytes.Buffer
@ -536,46 +536,46 @@ func firstPass(p *parser, input []byte) []byte {
 	if p.flags&TabSizeEight != 0 {
 		tabSize = TabSizeDouble
 	}
-	beg, end := 0, 0
+	beg := 0
 	lastFencedCodeBlockEnd := 0
-	for beg < len(input) { // iterate over lines
-		if end = isReference(p, input[beg:], tabSize); end > 0 {
-			beg += end
-		} else { // skip to the next line
-			end = beg
-			for end < len(input) && input[end] != '\n' && input[end] != '\r' {
-				end++
-			}
-
-			if p.flags&FencedCode != 0 {
-				// track fenced code block boundaries to suppress tab expansion
-				// inside them:
-				if beg >= lastFencedCodeBlockEnd {
-					if i := p.fencedCode(input[beg:], false); i > 0 {
-						lastFencedCodeBlockEnd = beg + i
-					}
-				}
-			}
-
-			// add the line body if present
-			if end > beg {
-				if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
-					out.Write(input[beg:end])
-				} else {
-					expandTabs(&out, input[beg:end], tabSize)
-				}
-			}
-			out.WriteByte('\n')
-
-			if end < len(input) && input[end] == '\r' {
-				end++
-			}
-			if end < len(input) && input[end] == '\n' {
-				end++
-			}
-
-			beg = end
+	for beg < len(input) {
+		// Find end of this line, then process the line.
+		end := beg
+		for end < len(input) && input[end] != '\n' && input[end] != '\r' {
+			end++
 		}
+
+		if p.flags&FencedCode != 0 {
+			// track fenced code block boundaries to suppress tab expansion
+			// and reference extraction inside them:
+			if beg >= lastFencedCodeBlockEnd {
+				if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
+					lastFencedCodeBlockEnd = beg + i
+				}
+			}
+		}
+
+		// add the line body if present
+		if end > beg {
+			if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
+				out.Write(input[beg:end])
+			} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
+				beg += refEnd
+				continue
+			} else {
+				expandTabs(&out, input[beg:end], tabSize)
+			}
+		}
+
+		if end < len(input) && input[end] == '\r' {
+			end++
+		}
+		if end < len(input) && input[end] == '\n' {
+			end++
+		}
+		out.WriteByte('\n')
+
+		beg = end
 	}

 	// empty input?
--- a/markdown_test.go
+++ b/markdown_test.go
@ -0,0 +1,38 @@
+//
+// Blackfriday Markdown Processor
+// Available at http://github.com/russross/blackfriday
+//
+// Copyright © 2011 Russ Ross <russ@russross.com>.
+// Distributed under the Simplified BSD License.
+// See README.md for details.
+//
+
+//
+// Unit tests for full document parsing and rendering
+//
+
+package blackfriday
+
+import "testing"
+
+func TestDocument(t *testing.T) {
+	var tests = []string{
+		// Empty document.
+		"",
+		"",
+
+		" ",
+		"",
+
+		// This shouldn't panic.
+		// https://github.com/russross/blackfriday/issues/172
+		"[]:<",
+		"<p>[]:&lt;</p>\n",
+
+		// This shouldn't panic.
+		// https://github.com/russross/blackfriday/issues/173
+		"   [",
+		"<p>[</p>\n",
+	}
+	doTests(t, tests)
+}