Merge pull request #282 from russross/v2-fix-279

v2: Fix issue in fenced code block pre-processing.
This commit is contained in:
Vytautas Šaltenis 2016-07-16 10:25:34 +03:00 committed by GitHub
commit fd97b7d32f
5 changed files with 205 additions and 77 deletions

View File

@ -115,7 +115,7 @@ func (p *parser) block(data []byte) {
// }
// ```
if p.flags&FencedCode != 0 {
if i := p.fencedCode(data, true); i > 0 {
if i := p.fencedCodeBlock(data, true); i > 0 {
data = data[i:]
continue
}
@ -526,7 +526,7 @@ func (p *parser) htmlFindEnd(tag string, data []byte) int {
return i + skip
}
func (p *parser) isEmpty(data []byte) int {
func (*parser) isEmpty(data []byte) int {
// it is okay to call isEmpty on an empty buffer
if len(data) == 0 {
return 0
@ -541,7 +541,7 @@ func (p *parser) isEmpty(data []byte) int {
return i + 1
}
func (p *parser) isHRule(data []byte) bool {
func (*parser) isHRule(data []byte) bool {
i := 0
// skip up to three spaces
@ -570,21 +570,24 @@ func (p *parser) isHRule(data []byte) bool {
return n >= 3
}
func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
// and returns the end index if so, or 0 otherwise. It also returns the marker found.
// If syntax is not nil, it gets set to the syntax specified in the fence line.
// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
i, size := 0, 0
skip = 0
// skip up to three spaces
for i < len(data) && i < 3 && data[i] == ' ' {
i++
}
if i >= len(data) {
return
}
// check for the marker characters: ~ or `
if i >= len(data) {
return 0, ""
}
if data[i] != '~' && data[i] != '`' {
return
return 0, ""
}
c := data[i]
@ -595,27 +598,28 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
i++
}
if i >= len(data) {
return
}
// the marker char must occur at least 3 times
if size < 3 {
return
return 0, ""
}
marker = string(data[i-size : i])
// if this is the end marker, it must match the beginning marker
if oldmarker != "" && marker != oldmarker {
return
return 0, ""
}
// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
// into one, always get the syntax, and discard it if the caller doesn't care.
if syntax != nil {
syn := 0
i = skipChar(data, i, ' ')
if i >= len(data) {
return
if newlineOptional && i == len(data) {
return i, marker
}
return 0, ""
}
syntaxStart := i
@ -630,7 +634,7 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
}
if i >= len(data) || data[i] != '}' {
return
return 0, ""
}
// strip all whitespace at the beginning and the end
@ -652,37 +656,40 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
}
}
language := string(data[syntaxStart : syntaxStart+syn])
*syntax = &language
*syntax = string(data[syntaxStart : syntaxStart+syn])
}
i = skipChar(data, i, ' ')
if i >= len(data) || data[i] != '\n' {
return
if newlineOptional && i == len(data) {
return i, marker
}
return 0, ""
}
skip = i + 1
return
return i + 1, marker // Take newline into account.
}
func (p *parser) fencedCode(data []byte, doRender bool) int {
var lang *string
beg, marker := p.isFencedCode(data, &lang, "")
// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
// If doRender is true, a final newline is mandatory to recognize the fenced code block.
func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
var syntax string
beg, marker := isFenceLine(data, &syntax, "", false)
if beg == 0 || beg >= len(data) {
return 0
}
var work bytes.Buffer
if lang != nil {
work.Write([]byte(*lang))
work.WriteByte('\n')
}
work.Write([]byte(syntax))
work.WriteByte('\n')
for {
// safe to assume beg < len(data)
// check for the end of the code block
fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
newlineOptional := !doRender
fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
if fenceEnd != 0 {
beg += fenceEnd
break
@ -703,11 +710,6 @@ func (p *parser) fencedCode(data []byte, doRender bool) int {
beg = end
}
//syntax := ""
//if lang != nil {
// syntax = *lang
//}
if doRender {
block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
block.IsFenced = true
@ -972,7 +974,7 @@ func (p *parser) quote(data []byte) int {
// irregardless of any contents inside it
for data[end] != '\n' {
if p.flags&FencedCode != 0 {
if i := p.fencedCode(data[end:], false); i > 0 {
if i := p.fencedCodeBlock(data[end:], false); i > 0 {
// -1 to compensate for the extra end++ after the loop:
end += i - 1
break
@ -1451,7 +1453,7 @@ func (p *parser) paragraph(data []byte) int {
// if there's a fenced code block, paragraph is over
if p.flags&FencedCode != 0 {
if p.fencedCode(current, false) > 0 {
if p.fencedCodeBlock(current, false) > 0 {
p.renderParagraph(data[:i])
return i
}

View File

@ -1011,6 +1011,12 @@ func TestFencedCodeBlock(t *testing.T) {
"Some text before a fenced code block\n``` oz\ncode blocks breakup paragraphs\n```\nSome text in between\n``` oz\nmultiple code blocks work okay\n```\nAnd some text after a fenced code block",
"<p>Some text before a fenced code block</p>\n\n<pre><code class=\"language-oz\">code blocks breakup paragraphs\n</code></pre>\n\n<p>Some text in between</p>\n\n<pre><code class=\"language-oz\">multiple code blocks work okay\n</code></pre>\n\n<p>And some text after a fenced code block</p>\n",
"```\n[]:()\n```\n",
"<pre><code>[]:()\n</code></pre>\n",
"```\n[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n```",
"<pre><code>[]:()\n[]:)\n[]:(\n[]:x\n[]:testing\n[:testing\n\n[]:\nlinebreak\n[]()\n\n[]:\n[]()\n</code></pre>\n",
}
doTestsBlock(t, tests, FencedCode)
}
@ -1579,3 +1585,74 @@ func TestCompletePage(t *testing.T) {
}
doTestsParam(t, tests, TestParams{HTMLFlags: UseXHTML | CompletePage})
}
func TestIsFenceLine(t *testing.T) {
tests := []struct {
data []byte
syntaxRequested bool
newlineOptional bool
wantEnd int
wantMarker string
wantSyntax string
}{
{
data: []byte("```"),
wantEnd: 0,
},
{
data: []byte("```\nstuff here\n"),
wantEnd: 4,
wantMarker: "```",
},
{
data: []byte("```\nstuff here\n"),
syntaxRequested: true,
wantEnd: 4,
wantMarker: "```",
},
{
data: []byte("stuff here\n```\n"),
wantEnd: 0,
},
{
data: []byte("```"),
newlineOptional: true,
wantEnd: 3,
wantMarker: "```",
},
{
data: []byte("```"),
syntaxRequested: true,
newlineOptional: true,
wantEnd: 3,
wantMarker: "```",
},
{
data: []byte("``` go"),
syntaxRequested: true,
newlineOptional: true,
wantEnd: 6,
wantMarker: "```",
wantSyntax: "go",
},
}
for _, test := range tests {
var syntax *string
if test.syntaxRequested {
syntax = new(string)
}
end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
if got, want := end, test.wantEnd; got != want {
t.Errorf("got end %v, want %v", got, want)
}
if got, want := marker, test.wantMarker; got != want {
t.Errorf("got marker %q, want %q", got, want)
}
if test.syntaxRequested {
if got, want := *syntax, test.wantSyntax; got != want {
t.Errorf("got syntax %q, want %q", got, want)
}
}
}
}

View File

@ -49,6 +49,17 @@ func runMarkdown(input string, params TestParams) string {
return string(Markdown([]byte(input), renderer, params.Options))
}
// doTests runs full document tests using MarkdownCommon configuration.
func doTests(t *testing.T, tests []string) {
doTestsParam(t, tests, TestParams{
Options: DefaultOptions,
HTMLRendererParameters: HTMLRendererParameters{
Flags: CommonHtmlFlags,
Extensions: CommonExtensions,
},
})
}
func doTestsBlock(t *testing.T, tests []string, extensions Extensions) {
doTestsParam(t, tests, TestParams{
Options: Options{Extensions: extensions},

View File

@ -526,9 +526,9 @@ func (p *parser) parseRefsToAST() {
}
// first pass:
// - extract references
// - expand tabs
// - normalize newlines
// - extract references (outside of fenced code blocks)
// - expand tabs (outside of fenced code blocks)
// - copy everything else
func firstPass(p *parser, input []byte) []byte {
var out bytes.Buffer
@ -536,46 +536,46 @@ func firstPass(p *parser, input []byte) []byte {
if p.flags&TabSizeEight != 0 {
tabSize = TabSizeDouble
}
beg, end := 0, 0
beg := 0
lastFencedCodeBlockEnd := 0
for beg < len(input) { // iterate over lines
if end = isReference(p, input[beg:], tabSize); end > 0 {
beg += end
} else { // skip to the next line
end = beg
for end < len(input) && input[end] != '\n' && input[end] != '\r' {
end++
}
if p.flags&FencedCode != 0 {
// track fenced code block boundaries to suppress tab expansion
// inside them:
if beg >= lastFencedCodeBlockEnd {
if i := p.fencedCode(input[beg:], false); i > 0 {
lastFencedCodeBlockEnd = beg + i
}
}
}
// add the line body if present
if end > beg {
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
out.Write(input[beg:end])
} else {
expandTabs(&out, input[beg:end], tabSize)
}
}
out.WriteByte('\n')
if end < len(input) && input[end] == '\r' {
end++
}
if end < len(input) && input[end] == '\n' {
end++
}
beg = end
for beg < len(input) {
// Find end of this line, then process the line.
end := beg
for end < len(input) && input[end] != '\n' && input[end] != '\r' {
end++
}
if p.flags&FencedCode != 0 {
// track fenced code block boundaries to suppress tab expansion
// and reference extraction inside them:
if beg >= lastFencedCodeBlockEnd {
if i := p.fencedCodeBlock(input[beg:], false); i > 0 {
lastFencedCodeBlockEnd = beg + i
}
}
}
// add the line body if present
if end > beg {
if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks.
out.Write(input[beg:end])
} else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 {
beg += refEnd
continue
} else {
expandTabs(&out, input[beg:end], tabSize)
}
}
if end < len(input) && input[end] == '\r' {
end++
}
if end < len(input) && input[end] == '\n' {
end++
}
out.WriteByte('\n')
beg = end
}
// empty input?

38
markdown_test.go Normal file
View File

@ -0,0 +1,38 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
// Unit tests for full document parsing and rendering
//
package blackfriday
import "testing"
func TestDocument(t *testing.T) {
var tests = []string{
// Empty document.
"",
"",
" ",
"",
// This shouldn't panic.
// https://github.com/russross/blackfriday/issues/172
"[]:<",
"<p>[]:&lt;</p>\n",
// This shouldn't panic.
// https://github.com/russross/blackfriday/issues/173
" [",
"<p>[</p>\n",
}
doTests(t, tests)
}