diff --git a/.travis.yml b/.travis.yml
index b96d06a..a4eb257 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,10 +5,9 @@
language: go
go:
- - 1.2
- - 1.3
- - 1.4
- 1.5
+ - 1.6
+ - 1.7
install:
- go get -d -t -v ./...
diff --git a/block.go b/block.go
index 42bed26..9d3a003 100644
--- a/block.go
+++ b/block.go
@@ -29,17 +29,12 @@ const (
var (
reBackslashOrAmp = regexp.MustCompile("[\\&]")
reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
- reTrailingWhitespace = regexp.MustCompile("(\n *)+$")
)
// Parse block-level data.
// Note: this function and many that it calls assume that
// the input buffer ends with a newline.
func (p *parser) block(data []byte) {
- if len(data) == 0 || data[len(data)-1] != '\n' {
- panic("block input is missing terminating newline")
- }
-
// this is called recursively: enforce a maximum depth
if p.nesting >= p.maxNesting {
return
@@ -131,7 +126,7 @@ func (p *parser) block(data []byte) {
if p.isHRule(data) {
p.addBlock(HorizontalRule, nil)
var i int
- for i = 0; data[i] != '\n'; i++ {
+ for i = 0; i < len(data) && data[i] != '\n'; i++ {
}
data = data[i:]
continue
@@ -216,10 +211,10 @@ func (p *parser) isPrefixHeader(data []byte) bool {
if p.flags&SpaceHeaders != 0 {
level := 0
- for level < 6 && data[level] == '#' {
+ for level < 6 && level < len(data) && data[level] == '#' {
level++
}
- if data[level] != ' ' {
+ if level == len(data) || data[level] != ' ' {
return false
}
}
@@ -228,7 +223,7 @@ func (p *parser) isPrefixHeader(data []byte) bool {
func (p *parser) prefixHeader(data []byte) int {
level := 0
- for level < 6 && data[level] == '#' {
+ for level < 6 && level < len(data) && data[level] == '#' {
level++
}
i := skipChar(data, level, ' ')
@@ -277,7 +272,7 @@ func (p *parser) isUnderlinedHeader(data []byte) int {
if data[0] == '=' {
i := skipChar(data, 1, '=')
i = skipChar(data, i, ' ')
- if data[i] == '\n' {
+ if i < len(data) && data[i] == '\n' {
return 1
}
return 0
@@ -287,7 +282,7 @@ func (p *parser) isUnderlinedHeader(data []byte) int {
if data[0] == '-' {
i := skipChar(data, 1, '-')
i = skipChar(data, i, ' ')
- if data[i] == '\n' {
+ if i < len(data) && data[i] == '\n' {
return 2
}
return 0
@@ -419,8 +414,8 @@ func (p *parser) html(data []byte, doRender bool) int {
}
func finalizeHTMLBlock(block *Node) {
- block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{})
- block.content = []byte{}
+ block.Literal = block.content
+ block.content = nil
}
// HTML comment, lax form
@@ -445,6 +440,9 @@ func (p *parser) htmlComment(data []byte, doRender bool) int {
// HR, which is the only self-closing block tag considered
func (p *parser) htmlHr(data []byte, doRender bool) int {
+ if len(data) < 4 {
+ return 0
+ }
if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
return 0
}
@@ -452,13 +450,11 @@ func (p *parser) htmlHr(data []byte, doRender bool) int {
// not an
tag after all; at least not a valid one
return 0
}
-
i := 3
- for data[i] != '>' && data[i] != '\n' {
+ for i < len(data) && data[i] != '>' && data[i] != '\n' {
i++
}
-
- if data[i] == '>' {
+ if i < len(data) && data[i] == '>' {
i++
if j := p.isEmpty(data[i:]); j > 0 {
size := i + j
@@ -473,13 +469,12 @@ func (p *parser) htmlHr(data []byte, doRender bool) int {
return size
}
}
-
return 0
}
func (p *parser) htmlFindTag(data []byte) (string, bool) {
i := 0
- for isalnum(data[i]) {
+ for i < len(data) && isalnum(data[i]) {
i++
}
key := string(data[:i])
@@ -536,7 +531,10 @@ func (*parser) isEmpty(data []byte) int {
return 0
}
}
- return i + 1
+ if i < len(data) && data[i] == '\n' {
+ i++
+ }
+ return i
}
func (*parser) isHRule(data []byte) bool {
@@ -555,7 +553,7 @@ func (*parser) isHRule(data []byte) bool {
// the whole line must be the char or whitespace
n := 0
- for data[i] != '\n' {
+ for i < len(data) && data[i] != '\n' {
switch {
case data[i] == c:
n++
@@ -571,8 +569,7 @@ func (*parser) isHRule(data []byte) bool {
// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
// and returns the end index if so, or 0 otherwise. It also returns the marker found.
// If syntax is not nil, it gets set to the syntax specified in the fence line.
-// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
-func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
+func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
i, size := 0, 0
// skip up to three spaces
@@ -614,7 +611,7 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
i = skipChar(data, i, ' ')
if i >= len(data) {
- if newlineOptional && i == len(data) {
+ if i == len(data) {
return i, marker
}
return 0, ""
@@ -659,12 +656,11 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
i = skipChar(data, i, ' ')
if i >= len(data) || data[i] != '\n' {
- if newlineOptional && i == len(data) {
+ if i == len(data) {
return i, marker
}
return 0, ""
}
-
return i + 1, marker // Take newline into account.
}
@@ -673,7 +669,7 @@ func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional
// If doRender is true, a final newline is mandatory to recognize the fenced code block.
func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
var syntax string
- beg, marker := isFenceLine(data, &syntax, "", false)
+ beg, marker := isFenceLine(data, &syntax, "")
if beg == 0 || beg >= len(data) {
return 0
}
@@ -686,8 +682,7 @@ func (p *parser) fencedCodeBlock(data []byte, doRender bool) int {
// safe to assume beg < len(data)
// check for the end of the code block
- newlineOptional := !doRender
- fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
+ fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
if fenceEnd != 0 {
beg += fenceEnd
break
@@ -739,7 +734,7 @@ func finalizeCodeBlock(block *Node) {
block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
block.Literal = rest
} else {
- block.Literal = reTrailingWhitespace.ReplaceAll(block.content, []byte{'\n'})
+ block.Literal = block.content
}
block.content = nil
}
@@ -757,7 +752,7 @@ func (p *parser) table(data []byte) int {
for i < len(data) {
pipes, rowStart := 0, i
- for ; data[i] != '\n'; i++ {
+ for ; i < len(data) && data[i] != '\n'; i++ {
if data[i] == '|' {
pipes++
}
@@ -769,7 +764,9 @@ func (p *parser) table(data []byte) int {
}
// include the newline in data sent to tableRow
- i++
+ if i < len(data) && data[i] == '\n' {
+ i++
+ }
p.tableRow(data[rowStart:i], columns, false)
}
@@ -788,7 +785,7 @@ func isBackslashEscaped(data []byte, i int) bool {
func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
i := 0
colCount := 1
- for i = 0; data[i] != '\n'; i++ {
+ for i = 0; i < len(data) && data[i] != '\n'; i++ {
if data[i] == '|' && !isBackslashEscaped(data, i) {
colCount++
}
@@ -800,7 +797,11 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
}
// include the newline in the data sent to tableRow
- header := data[:i+1]
+ j := i
+ if j < len(data) && data[j] == '\n' {
+ j++
+ }
+ header := data[:j]
// column count ignores pipes at beginning or end of line
if data[0] == '|' {
@@ -826,7 +827,7 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
// and trailing | optional on last column
col := 0
- for data[i] != '\n' {
+ for i < len(data) && data[i] != '\n' {
dashes := 0
if data[i] == ':' {
@@ -834,19 +835,21 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
columns[col] |= TableAlignmentLeft
dashes++
}
- for data[i] == '-' {
+ for i < len(data) && data[i] == '-' {
i++
dashes++
}
- if data[i] == ':' {
+ if i < len(data) && data[i] == ':' {
i++
columns[col] |= TableAlignmentRight
dashes++
}
- for data[i] == ' ' {
+ for i < len(data) && data[i] == ' ' {
i++
}
-
+ if i == len(data) {
+ return
+ }
// end of column test is messy
switch {
case dashes < 3:
@@ -857,12 +860,12 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
// marker found, now skip past trailing whitespace
col++
i++
- for data[i] == ' ' {
+ for i < len(data) && data[i] == ' ' {
i++
}
// trailing junk found after last column
- if col >= colCount && data[i] != '\n' {
+ if col >= colCount && i < len(data) && data[i] != '\n' {
return
}
@@ -885,7 +888,10 @@ func (p *parser) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
p.addBlock(TableHead, nil)
p.tableRow(header, columns, true)
- size = i + 1
+ size = i
+ if size < len(data) && data[size] == '\n' {
+ size++
+ }
return
}
@@ -898,13 +904,13 @@ func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
}
for col = 0; col < len(columns) && i < len(data); col++ {
- for data[i] == ' ' {
+ for i < len(data) && data[i] == ' ' {
i++
}
cellStart := i
- for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
+ for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
i++
}
@@ -913,7 +919,7 @@ func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
// skip the end-of-cell marker, possibly taking us past end of buffer
i++
- for cellEnd > cellStart && data[cellEnd-1] == ' ' {
+ for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
cellEnd--
}
@@ -935,11 +941,11 @@ func (p *parser) tableRow(data []byte, columns []CellAlignFlags, header bool) {
// returns blockquote prefix length
func (p *parser) quotePrefix(data []byte) int {
i := 0
- for i < 3 && data[i] == ' ' {
+ for i < 3 && i < len(data) && data[i] == ' ' {
i++
}
- if data[i] == '>' {
- if data[i+1] == ' ' {
+ if i < len(data) && data[i] == '>' {
+ if i+1 < len(data) && data[i+1] == ' ' {
return i + 2
}
return i + 1
@@ -969,7 +975,7 @@ func (p *parser) quote(data []byte) int {
// Step over whole lines, collecting them. While doing that, check for
// fenced code and if one's found, incorporate it altogether,
// irregardless of any contents inside it
- for data[end] != '\n' {
+ for end < len(data) && data[end] != '\n' {
if p.flags&FencedCode != 0 {
if i := p.fencedCodeBlock(data[end:], false); i > 0 {
// -1 to compensate for the extra end++ after the loop:
@@ -979,7 +985,9 @@ func (p *parser) quote(data []byte) int {
}
end++
}
- end++
+ if end < len(data) && data[end] == '\n' {
+ end++
+ }
if pre := p.quotePrefix(data[beg:]); pre > 0 {
// skip the prefix
beg += pre
@@ -997,7 +1005,10 @@ func (p *parser) quote(data []byte) int {
// returns prefix length for block code
func (p *parser) codePrefix(data []byte) int {
- if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
+ if len(data) >= 1 && data[0] == '\t' {
+ return 1
+ }
+ if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
return 4
}
return 0
@@ -1009,10 +1020,12 @@ func (p *parser) code(data []byte) int {
i := 0
for i < len(data) {
beg := i
- for data[i] != '\n' {
+ for i < len(data) && data[i] != '\n' {
+ i++
+ }
+ if i < len(data) && data[i] == '\n' {
i++
}
- i++
blankline := p.isEmpty(data[beg:i]) > 0
if pre := p.codePrefix(data[beg:i]); pre > 0 {
@@ -1023,7 +1036,7 @@ func (p *parser) code(data []byte) int {
break
}
- // verbatim copy to the working buffeu
+ // verbatim copy to the working buffer
if blankline {
work.WriteByte('\n')
} else {
@@ -1053,15 +1066,16 @@ func (p *parser) code(data []byte) int {
// returns unordered list item prefix
func (p *parser) uliPrefix(data []byte) int {
i := 0
-
// start with up to 3 spaces
- for i < 3 && data[i] == ' ' {
+ for i < len(data) && i < 3 && data[i] == ' ' {
i++
}
-
- // need a *, +, or - followed by a space
+ if i >= len(data)-1 {
+ return 0
+ }
+ // need one of {'*', '+', '-'} followed by a space or a tab
if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
- data[i+1] != ' ' {
+ (data[i+1] != ' ' && data[i+1] != '\t') {
return 0
}
return i + 2
@@ -1072,18 +1086,21 @@ func (p *parser) oliPrefix(data []byte) int {
i := 0
// start with up to 3 spaces
- for i < 3 && data[i] == ' ' {
+ for i < 3 && i < len(data) && data[i] == ' ' {
i++
}
// count the digits
start := i
- for data[i] >= '0' && data[i] <= '9' {
+ for i < len(data) && data[i] >= '0' && data[i] <= '9' {
i++
}
+ if start == i || i >= len(data)-1 {
+ return 0
+ }
- // we need >= 1 digits followed by a dot and a space
- if start == i || data[i] != '.' || data[i+1] != ' ' {
+ // we need >= 1 digits followed by a dot and a space or a tab
+ if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
return 0
}
return i + 2
@@ -1091,13 +1108,15 @@ func (p *parser) oliPrefix(data []byte) int {
// returns definition list item prefix
func (p *parser) dliPrefix(data []byte) int {
- i := 0
-
- // need a : followed by a spaces
- if data[i] != ':' || data[i+1] != ' ' {
+ if len(data) < 2 {
return 0
}
- for data[i] == ' ' {
+ i := 0
+ // need a ':' followed by a space or a tab
+ if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
+ return 0
+ }
+ for i < len(data) && data[i] == ' ' {
i++
}
return i + 2
@@ -1175,8 +1194,12 @@ func finalizeList(block *Node) {
func (p *parser) listItem(data []byte, flags *ListType) int {
// keep track of the indentation of the first line
itemIndent := 0
- for itemIndent < 3 && data[itemIndent] == ' ' {
- itemIndent++
+ if data[0] == '\t' {
+ itemIndent += 4
+ } else {
+ for itemIndent < 3 && data[itemIndent] == ' ' {
+ itemIndent++
+ }
}
var bulletChar byte = '*'
@@ -1203,13 +1226,13 @@ func (p *parser) listItem(data []byte, flags *ListType) int {
}
// skip leading whitespace on first line
- for data[i] == ' ' {
+ for i < len(data) && data[i] == ' ' {
i++
}
// find the end of the line
line := i
- for i > 0 && data[i-1] != '\n' {
+ for i > 0 && i < len(data) && data[i-1] != '\n' {
i++
}
@@ -1229,7 +1252,7 @@ gatherlines:
i++
// find the end of this line
- for data[i-1] != '\n' {
+ for i < len(data) && data[i-1] != '\n' {
i++
}
@@ -1243,11 +1266,18 @@ gatherlines:
// calculate the indentation
indent := 0
- for indent < 4 && line+indent < i && data[line+indent] == ' ' {
- indent++
+ indentIndex := 0
+ if data[line] == '\t' {
+ indentIndex++
+ indent += 4
+ } else {
+ for indent < 4 && line+indent < i && data[line+indent] == ' ' {
+ indent++
+ indentIndex++
+ }
}
- chunk := data[line+indent : i]
+ chunk := data[line+indentIndex : i]
// evaluate how this line fits in
switch {
@@ -1288,7 +1318,7 @@ gatherlines:
if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
// is the next item still a part of this list?
next := i
- for data[next] != '\n' {
+ for next < len(data) && data[next] != '\n' {
next++
}
for next < len(data)-1 && data[next] == '\n' {
@@ -1316,7 +1346,7 @@ gatherlines:
}
// add the line into the working buffer without prefix
- raw.Write(data[line+indent : i])
+ raw.Write(data[line+indentIndex : i])
line = i
}
@@ -1364,8 +1394,11 @@ func (p *parser) renderParagraph(data []byte) {
beg++
}
+ end := len(data)
// trim trailing newline
- end := len(data) - 1
+ if data[len(data)-1] == '\n' {
+ end--
+ }
// trim trailing spaces
for end > beg && data[end-1] == ' ' {
@@ -1437,7 +1470,7 @@ func (p *parser) paragraph(data []byte) int {
block.HeaderID = id
// find the end of the underline
- for data[i] != '\n' {
+ for i < len(data) && data[i] != '\n' {
i++
}
return i
@@ -1470,7 +1503,8 @@ func (p *parser) paragraph(data []byte) int {
// if there's a definition list item, prev line is a definition term
if p.flags&DefinitionLists != 0 {
if p.dliPrefix(current) != 0 {
- return p.list(data[prev:], ListTypeDefinition)
+ ret := p.list(data[prev:], ListTypeDefinition)
+ return ret
}
}
@@ -1486,10 +1520,12 @@ func (p *parser) paragraph(data []byte) int {
}
// otherwise, scan to the beginning of the next line
- for data[i] != '\n' {
- i++
+ nl := bytes.IndexByte(data[i:], '\n')
+ if nl >= 0 {
+ i += nl + 1
+ } else {
+ i += len(data[i:])
}
- i++
}
p.renderParagraph(data[:i])
diff --git a/block_test.go b/block_test.go
index 909f4a8..0a944a1 100644
--- a/block_test.go
+++ b/block_test.go
@@ -1661,14 +1661,14 @@ func TestIsFenceLine(t *testing.T) {
tests := []struct {
data []byte
syntaxRequested bool
- newlineOptional bool
wantEnd int
wantMarker string
wantSyntax string
}{
{
- data: []byte("```"),
- wantEnd: 0,
+ data: []byte("```"),
+ wantEnd: 3,
+ wantMarker: "```",
},
{
data: []byte("```\nstuff here\n"),
@@ -1685,23 +1685,15 @@ func TestIsFenceLine(t *testing.T) {
data: []byte("stuff here\n```\n"),
wantEnd: 0,
},
- {
- data: []byte("```"),
- newlineOptional: true,
- wantEnd: 3,
- wantMarker: "```",
- },
{
data: []byte("```"),
syntaxRequested: true,
- newlineOptional: true,
wantEnd: 3,
wantMarker: "```",
},
{
data: []byte("``` go"),
syntaxRequested: true,
- newlineOptional: true,
wantEnd: 6,
wantMarker: "```",
wantSyntax: "go",
@@ -1713,7 +1705,7 @@ func TestIsFenceLine(t *testing.T) {
if test.syntaxRequested {
syntax = new(string)
}
- end, marker := isFenceLine(test.data, syntax, "```", test.newlineOptional)
+ end, marker := isFenceLine(test.data, syntax, "```")
if got, want := end, test.wantEnd; got != want {
t.Errorf("got end %v, want %v", got, want)
}
diff --git a/esc.go b/esc.go
new file mode 100644
index 0000000..6385f27
--- /dev/null
+++ b/esc.go
@@ -0,0 +1,34 @@
+package blackfriday
+
+import (
+ "html"
+ "io"
+)
+
+var htmlEscaper = [256][]byte{
+ '&': []byte("&"),
+ '<': []byte("<"),
+ '>': []byte(">"),
+ '"': []byte("""),
+}
+
+func escapeHTML(w io.Writer, s []byte) {
+ var start, end int
+ for end < len(s) {
+ escSeq := htmlEscaper[s[end]]
+ if escSeq != nil {
+ w.Write(s[start:end])
+ w.Write(escSeq)
+ start = end + 1
+ }
+ end++
+ }
+ if start < len(s) && end <= len(s) {
+ w.Write(s[start:end])
+ }
+}
+
+func escLink(w io.Writer, text []byte) {
+ unesc := html.UnescapeString(string(text))
+ escapeHTML(w, []byte(unesc))
+}
diff --git a/esc_test.go b/esc_test.go
new file mode 100644
index 0000000..ff67d54
--- /dev/null
+++ b/esc_test.go
@@ -0,0 +1,48 @@
+package blackfriday
+
+import (
+ "bytes"
+ "testing"
+)
+
+func TestEsc(t *testing.T) {
+ tests := []string{
+ "abc", "abc",
+ "a&c", "a&c",
+ "<", "<",
+ "[]:<", "[]:<",
+ "Hello
Paragraph two.
diff --git a/testdata/Markdown Documentation - Syntax.html b/testdata/Markdown Documentation - Syntax.html
index 61dde59..6cd05fb 100644
--- a/testdata/Markdown Documentation - Syntax.html
+++ b/testdata/Markdown Documentation - Syntax.html
@@ -939,8 +939,8 @@ _ underscore
[] square brackets
() parentheses
# hash mark
-+ plus sign
-- minus sign (hyphen)
++ plus sign
+- minus sign (hyphen)
. dot
! exclamation mark
diff --git a/testdata/Tabs.html b/testdata/Tabs.html
index 64006d9..509b41c 100644
--- a/testdata/Tabs.html
+++ b/testdata/Tabs.html
@@ -13,13 +13,13 @@ indented with spaces
And:
- this code block is indented by two tabs
+ this code block is indented by two tabs
And:
-+ this is an example list item
- indented with tabs
++ this is an example list item
+ indented with tabs
+ this is an example list item
indented with spaces