diff --git a/block.go b/block.go index 3f4af78..b237c7e 100644 --- a/block.go +++ b/block.go @@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) { // } // ``` if p.flags&EXTENSION_FENCED_CODE != 0 { - if i := p.fencedCode(out, data, true); i > 0 { + if i := p.fencedCodeBlock(out, data, true); i > 0 { data = data[i:] continue } @@ -515,7 +515,7 @@ func (p *parser) htmlFindEnd(tag string, data []byte) int { return i + skip } -func (p *parser) isEmpty(data []byte) int { +func (*parser) isEmpty(data []byte) int { // it is okay to call isEmpty on an empty buffer if len(data) == 0 { return 0 @@ -530,7 +530,7 @@ func (p *parser) isEmpty(data []byte) int { return i + 1 } -func (p *parser) isHRule(data []byte) bool { +func (*parser) isHRule(data []byte) bool { i := 0 // skip up to three spaces @@ -559,21 +559,24 @@ func (p *parser) isHRule(data []byte) bool { return n >= 3 } -func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) { +// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, +// and returns the end index if so, or 0 otherwise. It also returns the marker found. +// If syntax is not nil, it gets set to the syntax specified in the fence line. +// A final newline is mandatory to recognize the fence line, unless newlineOptional is true. +func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) { i, size := 0, 0 - skip = 0 // skip up to three spaces for i < len(data) && i < 3 && data[i] == ' ' { i++ } - if i >= len(data) { - return - } // check for the marker characters: ~ or ` + if i >= len(data) { + return 0, "" + } if data[i] != '~' && data[i] != '`' { - return + return 0, "" } c := data[i] @@ -584,27 +587,28 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s i++ } - if i >= len(data) { - return - } - // the marker char must occur at least 3 times if size < 3 { - return + return 0, "" } marker = string(data[i-size : i]) // if this is the end marker, it must match the beginning marker if oldmarker != "" && marker != oldmarker { - return + return 0, "" } + // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here + // into one, always get the syntax, and discard it if the caller doesn't care. if syntax != nil { syn := 0 i = skipChar(data, i, ' ') if i >= len(data) { - return + if newlineOptional && i == len(data) { + return i, marker + } + return 0, "" } syntaxStart := i @@ -619,7 +623,7 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s } if i >= len(data) || data[i] != '}' { - return + return 0, "" } // strip all whitespace at the beginning and the end @@ -641,22 +645,26 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s } } - language := string(data[syntaxStart : syntaxStart+syn]) - *syntax = &language + *syntax = string(data[syntaxStart : syntaxStart+syn]) } i = skipChar(data, i, ' ') if i >= len(data) || data[i] != '\n' { - return + if newlineOptional && i == len(data) { + return i, marker + } + return 0, "" } - skip = i + 1 - return + return i + 1, marker // Take newline into account. } -func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { - var lang *string - beg, marker := p.isFencedCode(data, &lang, "") +// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning, +// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects. +// If doRender is true, a final newline is mandatory to recognize the fenced code block. +func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int { + var syntax string + beg, marker := isFenceLine(data, &syntax, "", true) if beg == 0 || beg >= len(data) { return 0 } @@ -667,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { // safe to assume beg < len(data) // check for the end of the code block - fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker) + newlineOptional := !doRender + fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional) if fenceEnd != 0 { beg += fenceEnd break @@ -688,11 +697,6 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { beg = end } - syntax := "" - if lang != nil { - syntax = *lang - } - if doRender { p.r.BlockCode(out, work.Bytes(), syntax) } @@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int { // irregardless of any contents inside it for data[end] != '\n' { if p.flags&EXTENSION_FENCED_CODE != 0 { - if i := p.fencedCode(out, data[end:], false); i > 0 { + if i := p.fencedCodeBlock(out, data[end:], false); i > 0 { // -1 to compensate for the extra end++ after the loop: end += i - 1 break @@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { // if there's a fenced code block, paragraph is over if p.flags&EXTENSION_FENCED_CODE != 0 { - if p.fencedCode(out, current, false) > 0 { + if p.fencedCodeBlock(out, current, false) > 0 { p.renderParagraph(out, data[:i]) return i } diff --git a/block_test.go b/block_test.go index f3618fe..6170e56 100644 --- a/block_test.go +++ b/block_test.go @@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) { "Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block", "

Some text before a fenced code block

\n\n
code blocks breakup paragraphs\n
\n\n

Some text in between

\n\n
multiple code blocks work okay\n
\n\n

And some text after a fenced code block

\n", + + "```\n[]:()\n```\n", + "
[]:()\n
\n", + + "```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```", + "
[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n
\n", } doTestsBlock(t, tests, EXTENSION_FENCED_CODE) } @@ -1636,3 +1642,68 @@ func TestCDATA(t *testing.T) { `, }, EXTENSION_FENCED_CODE) } + +func TestIsFenceLine(t *testing.T) { + tests := []struct { + data []byte + syntaxRequested bool + newlineOptional bool + wantEnd int + wantMarker string + wantSyntax string + }{ + { + data: []byte("```"), + wantEnd: 0, + }, + { + data: []byte("```\nstuff here\n"), + wantEnd: 4, + wantMarker: "```", + }, + { + data: []byte("stuff here\n```\n"), + wantEnd: 0, + }, + { + data: []byte("```"), + newlineOptional: true, + wantEnd: 3, + wantMarker: "```", + }, + { + data: []byte("```"), + syntaxRequested: true, + newlineOptional: true, + wantEnd: 3, + wantMarker: "```", + }, + { + data: []byte("``` go"), + syntaxRequested: true, + newlineOptional: true, + wantEnd: 6, + wantMarker: "```", + wantSyntax: "go", + }, + } + + for _, test := range tests { + var syntax *string + if test.syntaxRequested { + syntax = new(string) + } + end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional) + if got, want := end, test.wantEnd; got != want { + t.Errorf("got end %v, want %v", got, want) + } + if got, want := marker, test.wantMarker; got != want { + t.Errorf("got marker %q, want %q", got, want) + } + if test.syntaxRequested { + if got, want := *syntax, test.wantSyntax; got != want { + t.Errorf("got syntax %q, want %q", got, want) + } + } + } +} diff --git a/inline_test.go b/inline_test.go index 9fb937b..bcaba09 100644 --- a/inline_test.go +++ b/inline_test.go @@ -59,13 +59,11 @@ func doTestsInlineParam(t *testing.T, tests []string, opts Options, htmlFlags in params HtmlRendererParameters) { // catch and report panics var candidate string - /* - defer func() { - if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err) - } - }() - */ + defer func() { + if err := recover(); err != nil { + t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err) + } + }() for i := 0; i+1 < len(tests); i += 2 { input := tests[i] @@ -722,10 +720,6 @@ func TestReferenceLink(t *testing.T) { "[link][ref]\n [ref]: /url/", "

link

\n", - - // Issue 172 in blackfriday - "[]:<", - "

[]:<

\n", } doLinkTestsInline(t, tests) } diff --git a/markdown.go b/markdown.go index aea997a..58ba68d 100644 --- a/markdown.go +++ b/markdown.go @@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { } // first pass: -// - extract references -// - expand tabs // - normalize newlines +// - extract references (outside of fenced code blocks) +// - expand tabs (outside of fenced code blocks) // - copy everything else func firstPass(p *parser, input []byte) []byte { var out bytes.Buffer @@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte { if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { tabSize = TAB_SIZE_EIGHT } - beg, end := 0, 0 + beg := 0 lastFencedCodeBlockEnd := 0 - for beg < len(input) { // iterate over lines - if end = isReference(p, input[beg:], tabSize); end > 0 { - beg += end - } else { // skip to the next line - end = beg - for end < len(input) && input[end] != '\n' && input[end] != '\r' { - end++ - } - - if p.flags&EXTENSION_FENCED_CODE != 0 { - // track fenced code block boundaries to suppress tab expansion - // inside them: - if beg >= lastFencedCodeBlockEnd { - if i := p.fencedCode(&out, input[beg:], false); i > 0 { - lastFencedCodeBlockEnd = beg + i - } - } - } - - // add the line body if present - if end > beg { - if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. - out.Write(input[beg:end]) - } else { - expandTabs(&out, input[beg:end], tabSize) - } - } - out.WriteByte('\n') - - if end < len(input) && input[end] == '\r' { - end++ - } - if end < len(input) && input[end] == '\n' { - end++ - } - - beg = end + for beg < len(input) { + // Find end of this line, then process the line. + end := beg + for end < len(input) && input[end] != '\n' && input[end] != '\r' { + end++ } + + if p.flags&EXTENSION_FENCED_CODE != 0 { + // track fenced code block boundaries to suppress tab expansion + // and reference extraction inside them: + if beg >= lastFencedCodeBlockEnd { + if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { + lastFencedCodeBlockEnd = beg + i + } + } + } + + // add the line body if present + if end > beg { + if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. + out.Write(input[beg:end]) + } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { + beg += refEnd + continue + } else { + expandTabs(&out, input[beg:end], tabSize) + } + } + + if end < len(input) && input[end] == '\r' { + end++ + } + if end < len(input) && input[end] == '\n' { + end++ + } + out.WriteByte('\n') + + beg = end } // empty input? diff --git a/markdown_test.go b/markdown_test.go new file mode 100644 index 0000000..d897644 --- /dev/null +++ b/markdown_test.go @@ -0,0 +1,75 @@ +// +// Blackfriday Markdown Processor +// Available at http://github.com/russross/blackfriday +// +// Copyright © 2011 Russ Ross . +// Distributed under the Simplified BSD License. +// See README.md for details. +// + +// +// Unit tests for full document parsing and rendering +// + +package blackfriday + +import ( + "testing" +) + +func runMarkdown(input string) string { + return string(MarkdownCommon([]byte(input))) +} + +func doTests(t *testing.T, tests []string) { + // catch and report panics + var candidate string + defer func() { + if err := recover(); err != nil { + t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err) + } + }() + + for i := 0; i+1 < len(tests); i += 2 { + input := tests[i] + candidate = input + expected := tests[i+1] + actual := runMarkdown(candidate) + if actual != expected { + t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]", + candidate, expected, actual) + } + + // now test every substring to stress test bounds checking + if !testing.Short() { + for start := 0; start < len(input); start++ { + for end := start + 1; end <= len(input); end++ { + candidate = input[start:end] + _ = runMarkdown(candidate) + } + } + } + } +} + +func TestDocument(t *testing.T) { + var tests = []string{ + // Empty document. + "", + "", + + " ", + "", + + // This shouldn't panic. + // https://github.com/russross/blackfriday/issues/172 + "[]:<", + "

[]:<

\n", + + // This shouldn't panic. + // https://github.com/russross/blackfriday/issues/173 + " [", + "

[

\n", + } + doTests(t, tests) +} diff --git a/ref_test.go b/ref_test.go index 770439c..733a63a 100644 --- a/ref_test.go +++ b/ref_test.go @@ -29,7 +29,7 @@ func doTestsReference(t *testing.T, files []string, flag int) { var candidate string defer func() { if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v]\n", candidate) + t.Errorf("\npanic while processing [%#v]: %s\n", candidate, err) } }()