From a5812bb8f2987abe2294beaf65c08baaceb2969f Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Fri, 15 Jul 2016 14:51:15 -0400 Subject: [PATCH] Improve fenced code block detection for first pass. In first pass, there may not be a trailing newline after a fenced code block yet. Make newline optional in isFenceLine when calling fencedCodeBlock to detect the fenced code block it anyway. This is more complex, but it avoids creating temporary buffers or modifying input in order to maintain performance (see #148). Document and rename fencedCode to fencedCodeBlock. Add regression tests. Fixes #279. --- block.go | 14 +++++---- block_test.go | 6 ++++ markdown.go | 80 +++++++++++++++++++++++++-------------------------- 3 files changed, 55 insertions(+), 45 deletions(-) diff --git a/block.go b/block.go index 021a54d..b237c7e 100644 --- a/block.go +++ b/block.go @@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) { // } // ``` if p.flags&EXTENSION_FENCED_CODE != 0 { - if i := p.fencedCode(out, data, true); i > 0 { + if i := p.fencedCodeBlock(out, data, true); i > 0 { data = data[i:] continue } @@ -659,7 +659,10 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional return i + 1, marker // Take newline into account. } -func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { +// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning, +// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects. +// If doRender is true, a final newline is mandatory to recognize the fenced code block. +func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int { var syntax string beg, marker := isFenceLine(data, &syntax, "", true) if beg == 0 || beg >= len(data) { @@ -672,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int { // safe to assume beg < len(data) // check for the end of the code block - fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true) + newlineOptional := !doRender + fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional) if fenceEnd != 0 { beg += fenceEnd break @@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int { // irregardless of any contents inside it for data[end] != '\n' { if p.flags&EXTENSION_FENCED_CODE != 0 { - if i := p.fencedCode(out, data[end:], false); i > 0 { + if i := p.fencedCodeBlock(out, data[end:], false); i > 0 { // -1 to compensate for the extra end++ after the loop: end += i - 1 break @@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { // if there's a fenced code block, paragraph is over if p.flags&EXTENSION_FENCED_CODE != 0 { - if p.fencedCode(out, current, false) > 0 { + if p.fencedCodeBlock(out, current, false) > 0 { p.renderParagraph(out, data[:i]) return i } diff --git a/block_test.go b/block_test.go index a20f5df..6170e56 100644 --- a/block_test.go +++ b/block_test.go @@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) { "Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block", "

Some text before a fenced code block

\n\n
code blocks breakup paragraphs\n
\n\n

Some text in between

\n\n
multiple code blocks work okay\n
\n\n

And some text after a fenced code block

\n", + + "```\n[]:()\n```\n", + "
[]:()\n
\n", + + "```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```", + "
[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n
\n", } doTestsBlock(t, tests, EXTENSION_FENCED_CODE) } diff --git a/markdown.go b/markdown.go index aea997a..58ba68d 100644 --- a/markdown.go +++ b/markdown.go @@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { } // first pass: -// - extract references -// - expand tabs // - normalize newlines +// - extract references (outside of fenced code blocks) +// - expand tabs (outside of fenced code blocks) // - copy everything else func firstPass(p *parser, input []byte) []byte { var out bytes.Buffer @@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte { if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { tabSize = TAB_SIZE_EIGHT } - beg, end := 0, 0 + beg := 0 lastFencedCodeBlockEnd := 0 - for beg < len(input) { // iterate over lines - if end = isReference(p, input[beg:], tabSize); end > 0 { - beg += end - } else { // skip to the next line - end = beg - for end < len(input) && input[end] != '\n' && input[end] != '\r' { - end++ - } - - if p.flags&EXTENSION_FENCED_CODE != 0 { - // track fenced code block boundaries to suppress tab expansion - // inside them: - if beg >= lastFencedCodeBlockEnd { - if i := p.fencedCode(&out, input[beg:], false); i > 0 { - lastFencedCodeBlockEnd = beg + i - } - } - } - - // add the line body if present - if end > beg { - if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. - out.Write(input[beg:end]) - } else { - expandTabs(&out, input[beg:end], tabSize) - } - } - out.WriteByte('\n') - - if end < len(input) && input[end] == '\r' { - end++ - } - if end < len(input) && input[end] == '\n' { - end++ - } - - beg = end + for beg < len(input) { + // Find end of this line, then process the line. + end := beg + for end < len(input) && input[end] != '\n' && input[end] != '\r' { + end++ } + + if p.flags&EXTENSION_FENCED_CODE != 0 { + // track fenced code block boundaries to suppress tab expansion + // and reference extraction inside them: + if beg >= lastFencedCodeBlockEnd { + if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { + lastFencedCodeBlockEnd = beg + i + } + } + } + + // add the line body if present + if end > beg { + if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. + out.Write(input[beg:end]) + } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { + beg += refEnd + continue + } else { + expandTabs(&out, input[beg:end], tabSize) + } + } + + if end < len(input) && input[end] == '\r' { + end++ + } + if end < len(input) && input[end] == '\n' { + end++ + } + out.WriteByte('\n') + + beg = end } // empty input?