Improve fenced code block detection for first pass.

In first pass, there may not be a trailing newline after a fenced code
block yet. Make newline optional in isFenceLine when calling
fencedCodeBlock to detect the fenced code block it anyway. This is more
complex, but it avoids creating temporary buffers or modifying input in
order to maintain performance (see #148).

Document and rename fencedCode to fencedCodeBlock.

Add regression tests.

Fixes #279.
pull/280/head
Dmitri Shuralyov 2016-07-15 14:51:15 -04:00
parent 0049676599
commit a5812bb8f2
3 changed files with 55 additions and 45 deletions

View File

@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) {
// }
// ```
if p.flags&EXTENSION_FENCED_CODE != 0 {
if i := p.fencedCode(out, data, true); i > 0 {
if i := p.fencedCodeBlock(out, data, true); i > 0 {
data = data[i:]
continue
}
@ -659,7 +659,10 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
return i + 1, marker // Take newline into account.
}
func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
// If doRender is true, a final newline is mandatory to recognize the fenced code block.
func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
var syntax string
beg, marker := isFenceLine(data, &syntax, "", true)
if beg == 0 || beg >= len(data) {
@ -672,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
// safe to assume beg < len(data)
// check for the end of the code block
fenceEnd, _ := isFenceLine(data[beg:], nil, marker, true)
newlineOptional := !doRender
fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
if fenceEnd != 0 {
beg += fenceEnd
break
@ -934,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int {
// irregardless of any contents inside it
for data[end] != '\n' {
if p.flags&EXTENSION_FENCED_CODE != 0 {
if i := p.fencedCode(out, data[end:], false); i > 0 {
if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
// -1 to compensate for the extra end++ after the loop:
end += i - 1
break
@ -1384,7 +1388,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
// if there's a fenced code block, paragraph is over
if p.flags&EXTENSION_FENCED_CODE != 0 {
if p.fencedCode(out, current, false) > 0 {
if p.fencedCodeBlock(out, current, false) > 0 {
p.renderParagraph(out, data[:i])
return i
}

View File

@ -1130,6 +1130,12 @@ func TestFencedCodeBlock(t *testing.T) {
"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
"```\n[]:()\n```\n",
"<pre><code>[]:()\n</code></pre>\n",
"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
}
doTestsBlock(t, tests, EXTENSION_FENCED_CODE)
}

View File

@ -386,9 +386,9 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
}
// first pass:
// - extract references
// - expand tabs
// - normalize newlines
// - extract references (outside of fenced code blocks)
// - expand tabs (outside of fenced code blocks)
// - copy everything else
func firstPass(p *parser, input []byte) []byte {
var out bytes.Buffer
@ -396,46 +396,46 @@ func firstPass(p *parser, input []byte) []byte {
if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 {
tabSize = TAB_SIZE_EIGHT
}
beg, end := 0, 0
beg := 0
lastFencedCodeBlockEnd := 0
for beg < len(input) { // iterate over lines
if end = isReference(p, input[beg:], tabSize); end > 0 {
beg += end
} else { // skip to the next line
end = beg
for end < len(input) && input[end] != '\n' && input[end] != '\r' {
end++
}
if p.flags&EXTENSION_FENCED_CODE != 0 {
// track fenced code block boundaries to suppress tab expansion
// inside them:
if beg >= lastFencedCodeBlockEnd {
if i := p.fencedCode(&out, input[beg:], false); i > 0 {
lastFencedCodeBlockEnd = beg + i
}
}
}
// add the line body if present
if end > beg {
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
out.Write(input[beg:end])
} else {
expandTabs(&out, input[beg:end], tabSize)
}
}
out.WriteByte('\n')
if end < len(input) && input[end] == '\r' {
end++
}
if end < len(input) && input[end] == '\n' {
end++
}
beg = end
for beg < len(input) {
// Find end of this line, then process the line.
end := beg
for end < len(input) && input[end] != '\n' && input[end] != '\r' {
end++
}
if p.flags&EXTENSION_FENCED_CODE != 0 {
// track fenced code block boundaries to suppress tab expansion
// and reference extraction inside them:
if beg >= lastFencedCodeBlockEnd {
if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 {
lastFencedCodeBlockEnd = beg + i
}
}
}
// add the line body if present
if end > beg {
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
out.Write(input[beg:end])
} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
beg += refEnd
continue
} else {
expandTabs(&out, input[beg:end], tabSize)
}
}
if end < len(input) && input[end] == '\r' {
end++
}
if end < len(input) && input[end] == '\n' {
end++
}
out.WriteByte('\n')
beg = end
}
// empty input?