From 96537c6eaa663cfa2a46ddf3e380acd163152601 Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Fri, 15 Jul 2016 13:52:01 -0400 Subject: [PATCH 1/3] Create a place for document-level unit tests. These will be helpful for catching regressions or changes in behavior to edge cases such as empty input, or specifically crafted inputs that may cause panics, etc. Move test for issue #172 there since it's a document-level test, not an inline one. Add test for issue #173. Make some things more consistent. Don't use a named receiver in methods that don't use it. This makes the code more readable since one can more quickly tell the inputs to the method. --- block.go | 4 +-- inline_test.go | 16 ++++------- markdown_test.go | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ ref_test.go | 2 +- 4 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 markdown_test.go diff --git a/block.go b/block.go index 3f4af78..3fa1e92 100644 --- a/block.go +++ b/block.go @@ -515,7 +515,7 @@ func (p *parser) htmlFindEnd(tag string, data []byte) int { return i + skip } -func (p *parser) isEmpty(data []byte) int { +func (*parser) isEmpty(data []byte) int { // it is okay to call isEmpty on an empty buffer if len(data) == 0 { return 0 @@ -530,7 +530,7 @@ func (p *parser) isEmpty(data []byte) int { return i + 1 } -func (p *parser) isHRule(data []byte) bool { +func (*parser) isHRule(data []byte) bool { i := 0 // skip up to three spaces diff --git a/inline_test.go b/inline_test.go index 9fb937b..bcaba09 100644 --- a/inline_test.go +++ b/inline_test.go @@ -59,13 +59,11 @@ func doTestsInlineParam(t *testing.T, tests []string, opts Options, htmlFlags in params HtmlRendererParameters) { // catch and report panics var candidate string - /* - defer func() { - if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err) - } - }() - */ + defer func() { + if err := recover(); err != nil { + t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err) + } + }() for i := 0; i+1 < len(tests); i += 2 { input := tests[i] @@ -722,10 +720,6 @@ func TestReferenceLink(t *testing.T) { "[link][ref]\n [ref]: /url/", "

link

\n", - - // Issue 172 in blackfriday - "[]:<", - "

[]:<

\n", } doLinkTestsInline(t, tests) } diff --git a/markdown_test.go b/markdown_test.go new file mode 100644 index 0000000..d897644 --- /dev/null +++ b/markdown_test.go @@ -0,0 +1,75 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross . +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// Unit tests for full document parsing and rendering +// + +package blackfriday + +import ( + "testing" +) + +func runMarkdown(input string) string { + return string(MarkdownCommon([]byte(input))) +} + +func doTests(t *testing.T, tests []string) { + // catch and report panics + var candidate string + defer func() { + if err := recover(); err != nil { + t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err) + } + }() + + for i := 0; i+1 < len(tests); i += 2 { + input := tests[i] + candidate = input + expected := tests[i+1] + actual := runMarkdown(candidate) + if actual != expected { + t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]", + candidate, expected, actual) + } + + // now test every substring to stress test bounds checking + if !testing.Short() { + for start := 0; start < len(input); start++ { + for end := start + 1; end <= len(input); end++ { + candidate = input[start:end] + _ = runMarkdown(candidate) + } + } + } + } +} + +func TestDocument(t *testing.T) { + var tests = []string{ + // Empty document. + "", + "", + + " ", + "", + + // This shouldn't panic. + // https://github.com/russross/blackfriday/issues/172 + "[]:<", + "

[]:<

\n", + + // This shouldn't panic. + // https://github.com/russross/blackfriday/issues/173 + " [", + "

[

\n", + } + doTests(t, tests) +} diff --git a/ref_test.go b/ref_test.go index 770439c..733a63a 100644 --- a/ref_test.go +++ b/ref_test.go @@ -29,7 +29,7 @@ func doTestsReference(t *testing.T, files []string, flag int) { var candidate string defer func() { if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v]\n", candidate) + t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err) } }() From 00496765999eff4cae439cf36e45b661bdb971ad Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Fri, 15 Jul 2016 14:41:27 -0400 Subject: [PATCH 2/3] Improve fence line detection. Rename isFenceCode to isFenceLine, document it, add tests. Add support for making newline optional, this will be needed in future commits. --- block.go | 54 +++++++++++++++++++++--------------------- block_test.go | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 27 deletions(-) diff --git a/block.go b/block.go index 3fa1e92..021a54d 100644 --- a/block.go +++ b/block.go @@ -559,21 +559,24 @@ func (*parser) isHRule(data []byte) bool { return n >= 3 } -func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) { +// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, +// and returns the end index if so, or 0 otherwise. It also returns the marker found. +// If syntax is not nil, it gets set to the syntax specified in the fence line. +// A final newline is mandatory to recognize the fence line, unless newlineOptional is true. +func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) { i, size := 0, 0 - skip = 0 // skip up to three spaces for i < len(data) && i < 3 && data[i] == ' ' { i++ } - if i >= len(data) { - return - } // check for the marker characters: ~ or ` + if i >= len(data) { + return 0, "" + } if data[i] != '~' && data[i] != '`' { - return + return 0, "" } c := data[i] @@ -584,27 +587,28 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s i++ } - if i >= len(data) { - return - } - // the marker char must occur at least 3 times if size < 3 { - return + return 0, "" } marker = string(data[i-size : i]) // if this is the end marker, it must match the beginning marker if oldmarker != "" && marker != oldmarker { - return + return 0, "" } + // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here + // into one, always get the syntax, and discard it if the caller doesn't care. if syntax != nil { syn := 0 i = skipChar(data, i, ' ') if i >= len(data) { - return + if newlineOptional && i == len(data) { + return i, marker + } + return 0, "" } syntaxStart := i @@ -619,7 +623,7 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s } if i >= len(data) || data[i] != '}' { - return + return 0, "" } // strip all whitespace at the beginning and the end @@ -641,22 +645,23 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s } } - language := string(data[syntaxStart : syntaxStart+syn]) - *syntax = &language + *syntax = string(data[syntaxStart : syntaxStart+syn]) } i = skipChar(data, i, ' ') if i >= len(data) || data[i] != '\n' { - return + if newlineOptional && i == len(data) { + return i, marker + } + return 0, "" } - skip = i + 1 - return + return i + 1, marker // Take newline into account. } func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { - var lang *string - beg, marker := p.isFencedCode(data, &lang, "") + var syntax string + beg, marker := isFenceLine(data, &syntax, "", true) if beg == 0 || beg >= len(data) { return 0 } @@ -667,7 +672,7 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { // safe to assume beg < len(data) // check for the end of the code block - fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker) + fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true) if fenceEnd != 0 { beg += fenceEnd break @@ -688,11 +693,6 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { beg = end } - syntax := "" - if lang != nil { - syntax = *lang - } - if doRender { p.r.BlockCode(out, work.Bytes(), syntax) } diff --git a/block_test.go b/block_test.go index f3618fe..a20f5df 100644 --- a/block_test.go +++ b/block_test.go @@ -1636,3 +1636,68 @@ func TestCDATA(t *testing.T) { `, }, EXTENSION_FENCED_CODE) } + +func TestIsFenceLine(t *testing.T) { + tests := []struct { + data []byte + syntaxRequested bool + newlineOptional bool + wantEnd int + wantMarker string + wantSyntax string + }{ + { + data: []byte("```"), + wantEnd: 0, + }, + { + data: []byte("```\nstuff here\n"), + wantEnd: 4, + wantMarker: "```", + }, + { + data: []byte("stuff here\n```\n"), + wantEnd: 0, + }, + { + data: []byte("```"), + newlineOptional: true, + wantEnd: 3, + wantMarker: "```", + }, + { + data: []byte("```"), + syntaxRequested: true, + newlineOptional: true, + wantEnd: 3, + wantMarker: "```", + }, + { + data: []byte("``` go"), + syntaxRequested: true, + newlineOptional: true, + wantEnd: 6, + wantMarker: "```", + wantSyntax: "go", + }, + } + + for _, test := range tests { + var syntax *string + if test.syntaxRequested { + syntax = new(string) + } + end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional) + if got, want := end, test.wantEnd; got != want { + t.Errorf("got end %v, want %v", got, want) + } + if got, want := marker, test.wantMarker; got != want { + t.Errorf("got marker %q, want %q", got, want) + } + if test.syntaxRequested { + if got, want := *syntax, test.wantSyntax; got != want { + t.Errorf("got syntax %q, want %q", got, want) + } + } + } +} From a5812bb8f2987abe2294beaf65c08baaceb2969f Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Fri, 15 Jul 2016 14:51:15 -0400 Subject: [PATCH 3/3] Improve fenced code block detection for first pass. In first pass, there may not be a trailing newline after a fenced code block yet. Make newline optional in isFenceLine when calling fencedCodeBlock to detect the fenced code block it anyway. This is more complex, but it avoids creating temporary buffers or modifying input in order to maintain performance (see #148). Document and rename fencedCode to fencedCodeBlock. Add regression tests. Fixes #279. --- block.go | 14 +++++---- block_test.go | 6 ++++ markdown.go | 80 +++++++++++++++++++++++++-------------------------- 3 files changed, 55 insertions(+), 45 deletions(-) diff --git a/block.go b/block.go index 021a54d..b237c7e 100644 --- a/block.go +++ b/block.go @@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) { // } // ``` if p.flags&EXTENSION_FENCED_CODE != 0 { - if i := p.fencedCode(out, data, true); i > 0 { + if i := p.fencedCodeBlock(out, data, true); i > 0 { data = data[i:] continue } @@ -659,7 +659,10 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional return i + 1, marker // Take newline into account. } -func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { +// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning, +// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects. +// If doRender is true, a final newline is mandatory to recognize the fenced code block. +func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int { var syntax string beg, marker := isFenceLine(data, &syntax, "", true) if beg == 0 || beg >= len(data) { @@ -672,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { // safe to assume beg < len(data) // check for the end of the code block - fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true) + newlineOptional := !doRender + fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional) if fenceEnd != 0 { beg += fenceEnd break @@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int { // irregardless of any contents inside it for data[end] != '\n' { if p.flags&EXTENSION_FENCED_CODE != 0 { - if i := p.fencedCode(out, data[end:], false); i > 0 { + if i := p.fencedCodeBlock(out, data[end:], false); i > 0 { // -1 to compensate for the extra end++ after the loop: end += i - 1 break @@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { // if there's a fenced code block, paragraph is over if p.flags&EXTENSION_FENCED_CODE != 0 { - if p.fencedCode(out, current, false) > 0 { + if p.fencedCodeBlock(out, current, false) > 0 { p.renderParagraph(out, data[:i]) return i } diff --git a/block_test.go b/block_test.go index a20f5df..6170e56 100644 --- a/block_test.go +++ b/block_test.go @@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) { "Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block", "

Some text before a fenced code block

\n\n
code blocks breakup paragraphs\n
\n\n

Some text in between

\n\n
multiple code blocks work okay\n
\n\n

And some text after a fenced code block

\n", + + "```\n[]:()\n```\n", + "
[]:()\n
\n", + + "```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```", + "
[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n
\n", } doTestsBlock(t, tests, EXTENSION_FENCED_CODE) } diff --git a/markdown.go b/markdown.go index aea997a..58ba68d 100644 --- a/markdown.go +++ b/markdown.go @@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { } // first pass: -// - extract references -// - expand tabs // - normalize newlines +// - extract references (outside of fenced code blocks) +// - expand tabs (outside of fenced code blocks) // - copy everything else func firstPass(p *parser, input []byte) []byte { var out bytes.Buffer @@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte { if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { tabSize = TAB_SIZE_EIGHT } - beg, end := 0, 0 + beg := 0 lastFencedCodeBlockEnd := 0 - for beg < len(input) { // iterate over lines - if end = isReference(p, input[beg:], tabSize); end > 0 { - beg += end - } else { // skip to the next line - end = beg - for end < len(input) && input[end] != '\n' && input[end] != '\r' { - end++ - } - - if p.flags&EXTENSION_FENCED_CODE != 0 { - // track fenced code block boundaries to suppress tab expansion - // inside them: - if beg >= lastFencedCodeBlockEnd { - if i := p.fencedCode(&out, input[beg:], false); i > 0 { - lastFencedCodeBlockEnd = beg + i - } - } - } - - // add the line body if present - if end > beg { - if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. - out.Write(input[beg:end]) - } else { - expandTabs(&out, input[beg:end], tabSize) - } - } - out.WriteByte('\n') - - if end < len(input) && input[end] == '\r' { - end++ - } - if end < len(input) && input[end] == '\n' { - end++ - } - - beg = end + for beg < len(input) { + // Find end of this line, then process the line. + end := beg + for end < len(input) && input[end] != '\n' && input[end] != '\r' { + end++ } + + if p.flags&EXTENSION_FENCED_CODE != 0 { + // track fenced code block boundaries to suppress tab expansion + // and reference extraction inside them: + if beg >= lastFencedCodeBlockEnd { + if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { + lastFencedCodeBlockEnd = beg + i + } + } + } + + // add the line body if present + if end > beg { + if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. + out.Write(input[beg:end]) + } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { + beg += refEnd + continue + } else { + expandTabs(&out, input[beg:end], tabSize) + } + } + + if end < len(input) && input[end] == '\r' { + end++ + } + if end < len(input) && input[end] == '\n' { + end++ + } + out.WriteByte('\n') + + beg = end } // empty input?