Roll our own implementation of HTML escaper

This commit is contained in:
Vytautas Šaltenis 2016-09-10 14:33:37 +03:00
parent 31f2685bfe
commit 993325d13f
4 changed files with 130 additions and 33 deletions

45
esc.go Normal file
View File

@ -0,0 +1,45 @@
package blackfriday
import (
"html"
"io"
)
type escMap struct {
char byte
seq []byte
}
var htmlEscaper = []escMap{
{'&', []byte("&")},
{'<', []byte("&lt;")},
{'>', []byte("&gt;")},
{'"', []byte("&quot;")},
}
func escapeHTML(w io.Writer, s []byte) {
var start, end int
var sEnd byte
for end < len(s) {
sEnd = s[end]
if sEnd == '&' || sEnd == '<' || sEnd == '>' || sEnd == '"' {
for i := 0; i < len(htmlEscaper); i++ {
if sEnd == htmlEscaper[i].char {
w.Write(s[start:end])
w.Write(htmlEscaper[i].seq)
start = end + 1
break
}
}
}
end++
}
if start < len(s) && end <= len(s) {
w.Write(s[start:end])
}
}
func escLink(w io.Writer, text []byte) {
unesc := html.UnescapeString(string(text))
escapeHTML(w, []byte(unesc))
}

50
esc_test.go Normal file
View File

@ -0,0 +1,50 @@
package blackfriday
import (
"bytes"
"testing"
)
func TestEsc(t *testing.T) {
tests := []string{
"abc", "abc",
"a&c", "a&amp;c",
"<", "&lt;",
"[]:<", "[]:&lt;",
"Hello <!--", "Hello &lt;!--",
}
for i := 0; i < len(tests); i += 2 {
var b bytes.Buffer
escapeHTML(&b, []byte(tests[i]))
if !bytes.Equal(b.Bytes(), []byte(tests[i+1])) {
t.Errorf("\nInput [%#v]\nExpected[%#v]\nActual [%#v]",
tests[i], tests[i+1], b.String())
}
}
}
/*
func BenchmarkEscapeHTML(b *testing.B) {
tests := [][]byte{
[]byte(""),
[]byte("AT&T has an ampersand in their name."),
[]byte("AT&amp;T is another way to write it."),
[]byte("This & that."),
[]byte("4 < 5."),
[]byte("6 > 5."),
[]byte("Here's a [link] [1] with an ampersand in the URL."),
[]byte("Here's a link with an amersand in the link text: [AT&T] [2]."),
[]byte("Here's an inline [link](/script?foo=1&bar=2)."),
[]byte("Here's an inline [link](</script?foo=1&bar=2>)."),
[]byte("[1]: http://example.com/?foo=1&bar=2"),
[]byte("[2]: http://att.com/ \"AT&T\""),
}
var buff bytes.Buffer
for n := 0; n < b.N; n++ {
for _, t := range tests {
escapeHTML(&buff, t)
buff.Reset()
}
}
}
*/

57
html.go
View File

@ -18,7 +18,6 @@ package blackfriday
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"html"
"io" "io"
"regexp" "regexp"
"strings" "strings"
@ -375,17 +374,6 @@ func cellAlignment(align CellAlignFlags) string {
} }
} }
func esc(text []byte) []byte {
unesc := []byte(html.UnescapeString(string(text)))
return escCode(unesc)
}
func escCode(text []byte) []byte {
e1 := []byte(html.EscapeString(string(text)))
e2 := bytes.Replace(e1, []byte("&#34;"), []byte("&quot;"), -1)
return bytes.Replace(e2, []byte("&#39;"), []byte{'\''}, -1)
}
func (r *HTMLRenderer) out(w io.Writer, text []byte) { func (r *HTMLRenderer) out(w io.Writer, text []byte) {
if r.disableTags > 0 { if r.disableTags > 0 {
w.Write(htmlTagRe.ReplaceAll(text, []byte{})) w.Write(htmlTagRe.ReplaceAll(text, []byte{}))
@ -504,11 +492,17 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
attrs := []string{} attrs := []string{}
switch node.Type { switch node.Type {
case Text: case Text:
node.Literal = esc(node.Literal)
if r.Flags&Smartypants != 0 { if r.Flags&Smartypants != 0 {
node.Literal = r.sr.Process(node.Literal) var tmp bytes.Buffer
escapeHTML(&tmp, node.Literal)
r.sr.Process(w, tmp.Bytes())
} else {
if node.Parent.Type == Link {
escLink(w, node.Literal)
} else {
escapeHTML(w, node.Literal)
}
} }
r.out(w, node.Literal)
case Softbreak: case Softbreak:
r.out(w, []byte{'\n'}) r.out(w, []byte{'\n'})
// TODO: make it configurable via out(renderer.softbreak) // TODO: make it configurable via out(renderer.softbreak)
@ -561,16 +555,22 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
} else { } else {
if entering { if entering {
dest = r.addAbsPrefix(dest) dest = r.addAbsPrefix(dest)
//if (!(options.safe && potentiallyUnsafe(node.destination))) { var hrefBuff bytes.Buffer
attrs = append(attrs, fmt.Sprintf("href=%q", esc(dest))) hrefBuff.WriteString("href=\"")
//} escLink(&hrefBuff, dest)
hrefBuff.WriteByte('"')
attrs = append(attrs, hrefBuff.String())
if node.NoteID != 0 { if node.NoteID != 0 {
r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node)) r.out(w, footnoteRef(r.FootnoteAnchorPrefix, node))
break break
} }
attrs = appendLinkAttrs(attrs, r.Flags, dest) attrs = appendLinkAttrs(attrs, r.Flags, dest)
if len(node.LinkData.Title) > 0 { if len(node.LinkData.Title) > 0 {
attrs = append(attrs, fmt.Sprintf("title=%q", esc(node.LinkData.Title))) var titleBuff bytes.Buffer
titleBuff.WriteString("title=\"")
escapeHTML(&titleBuff, node.LinkData.Title)
titleBuff.WriteByte('"')
attrs = append(attrs, titleBuff.String())
} }
r.tag(w, aTag, attrs) r.tag(w, aTag, attrs)
} else { } else {
@ -591,7 +591,9 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
//if options.safe && potentiallyUnsafe(dest) { //if options.safe && potentiallyUnsafe(dest) {
//out(w, `<img src="" alt="`) //out(w, `<img src="" alt="`)
//} else { //} else {
r.out(w, []byte(fmt.Sprintf(`<img src="%s" alt="`, esc(dest)))) r.out(w, []byte(`<img src="`))
escLink(w, dest)
r.out(w, []byte(`" alt="`))
//} //}
} }
r.disableTags++ r.disableTags++
@ -600,14 +602,14 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
if r.disableTags == 0 { if r.disableTags == 0 {
if node.LinkData.Title != nil { if node.LinkData.Title != nil {
r.out(w, []byte(`" title="`)) r.out(w, []byte(`" title="`))
r.out(w, esc(node.LinkData.Title)) escapeHTML(w, node.LinkData.Title)
} }
r.out(w, []byte(`" />`)) r.out(w, []byte(`" />`))
} }
} }
case Code: case Code:
r.out(w, codeTag) r.out(w, codeTag)
r.out(w, escCode(node.Literal)) escapeHTML(w, node.Literal)
r.out(w, codeCloseTag) r.out(w, codeCloseTag)
case Document: case Document:
break break
@ -752,7 +754,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.cr(w) r.cr(w)
r.out(w, preTag) r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs) r.tag(w, codeTag[:len(codeTag)-1], attrs)
r.out(w, escCode(node.Literal)) escapeHTML(w, node.Literal)
r.out(w, codeCloseTag) r.out(w, codeCloseTag)
r.out(w, preCloseTag) r.out(w, preCloseTag)
if node.Parent.Type != Item { if node.Parent.Type != Item {
@ -837,9 +839,9 @@ func (r *HTMLRenderer) writeDocumentHeader(w *bytes.Buffer) {
w.WriteString("<head>\n") w.WriteString("<head>\n")
w.WriteString(" <title>") w.WriteString(" <title>")
if r.Flags&Smartypants != 0 { if r.Flags&Smartypants != 0 {
w.Write(r.sr.Process([]byte(r.Title))) r.sr.Process(w, []byte(r.Title))
} else { } else {
w.Write(esc([]byte(r.Title))) escapeHTML(w, []byte(r.Title))
} }
w.WriteString("</title>\n") w.WriteString("</title>\n")
w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v") w.WriteString(" <meta name=\"GENERATOR\" content=\"Blackfriday Markdown Processor v")
@ -852,14 +854,14 @@ func (r *HTMLRenderer) writeDocumentHeader(w *bytes.Buffer) {
w.WriteString(">\n") w.WriteString(">\n")
if r.CSS != "" { if r.CSS != "" {
w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"") w.WriteString(" <link rel=\"stylesheet\" type=\"text/css\" href=\"")
w.Write(esc([]byte(r.CSS))) escapeHTML(w, []byte(r.CSS))
w.WriteString("\"") w.WriteString("\"")
w.WriteString(ending) w.WriteString(ending)
w.WriteString(">\n") w.WriteString(">\n")
} }
if r.Icon != "" { if r.Icon != "" {
w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"") w.WriteString(" <link rel=\"icon\" type=\"image/x-icon\" href=\"")
w.Write(esc([]byte(r.Icon))) escapeHTML(w, []byte(r.Icon))
w.WriteString("\"") w.WriteString("\"")
w.WriteString(ending) w.WriteString(ending)
w.WriteString(">\n") w.WriteString(">\n")
@ -919,6 +921,7 @@ func (r *HTMLRenderer) writeTOC(w *bytes.Buffer, ast *Node) {
w.Write(buf.Bytes()) w.Write(buf.Bytes())
w.WriteString("\n\n</nav>\n") w.WriteString("\n\n</nav>\n")
} }
r.lastOutputLen = buf.Len()
} }
func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) { func (r *HTMLRenderer) writeDocumentFooter(w *bytes.Buffer) {

View File

@ -17,6 +17,7 @@ package blackfriday
import ( import (
"bytes" "bytes"
"io"
) )
// SPRenderer is a struct containing state of a Smartypants renderer. // SPRenderer is a struct containing state of a Smartypants renderer.
@ -401,13 +402,12 @@ func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer {
} }
// Process is the entry point of the Smartypants renderer. // Process is the entry point of the Smartypants renderer.
func (r *SPRenderer) Process(text []byte) []byte { func (r *SPRenderer) Process(w io.Writer, text []byte) {
var buff bytes.Buffer
mark := 0 mark := 0
for i := 0; i < len(text); i++ { for i := 0; i < len(text); i++ {
if action := r.callbacks[text[i]]; action != nil { if action := r.callbacks[text[i]]; action != nil {
if i > mark { if i > mark {
buff.Write(text[mark:i]) w.Write(text[mark:i])
} }
previousChar := byte(0) previousChar := byte(0)
if i > 0 { if i > 0 {
@ -415,12 +415,11 @@ func (r *SPRenderer) Process(text []byte) []byte {
} }
var tmp bytes.Buffer var tmp bytes.Buffer
i += action(&tmp, previousChar, text[i:]) i += action(&tmp, previousChar, text[i:])
buff.Write(tmp.Bytes()) w.Write(tmp.Bytes())
mark = i + 1 mark = i + 1
} }
} }
if mark < len(text) { if mark < len(text) {
buff.Write(text[mark:]) w.Write(text[mark:])
} }
return buff.Bytes()
} }