Merge pull request #468 from ruqqq/v2

Fix html renderer escaping valid entities
This commit is contained in:
Russ Ross 2020-07-14 10:41:29 -06:00 committed by GitHub
commit acedacffef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 2277 additions and 5 deletions

2236
entities.go Normal file

File diff suppressed because it is too large Load Diff

36
esc.go
View File

@ -13,14 +13,28 @@ var htmlEscaper = [256][]byte{
} }
func escapeHTML(w io.Writer, s []byte) { func escapeHTML(w io.Writer, s []byte) {
escapeEntities(w, s, false)
}
func escapeAllHTML(w io.Writer, s []byte) {
escapeEntities(w, s, true)
}
func escapeEntities(w io.Writer, s []byte, escapeValidEntities bool) {
var start, end int var start, end int
for end < len(s) { for end < len(s) {
escSeq := htmlEscaper[s[end]] escSeq := htmlEscaper[s[end]]
if escSeq != nil { if escSeq != nil {
isEntity, entityEnd := nodeIsEntity(s, end)
if isEntity && !escapeValidEntities {
w.Write(s[start : entityEnd+1])
start = entityEnd + 1
} else {
w.Write(s[start:end]) w.Write(s[start:end])
w.Write(escSeq) w.Write(escSeq)
start = end + 1 start = end + 1
} }
}
end++ end++
} }
if start < len(s) && end <= len(s) { if start < len(s) && end <= len(s) {
@ -28,6 +42,28 @@ func escapeHTML(w io.Writer, s []byte) {
} }
} }
func nodeIsEntity(s []byte, end int) (isEntity bool, endEntityPos int) {
isEntity = false
endEntityPos = end + 1
if s[end] == '&' {
for endEntityPos < len(s) {
if s[endEntityPos] == ';' {
if entities[string(s[end:endEntityPos+1])] {
isEntity = true
break
}
}
if !isalnum(s[endEntityPos]) && s[endEntityPos] != '&' && s[endEntityPos] != '#' {
break
}
endEntityPos++
}
}
return isEntity, endEntityPos
}
func escLink(w io.Writer, text []byte) { func escLink(w io.Writer, text []byte) {
unesc := html.UnescapeString(string(text)) unesc := html.UnescapeString(string(text))
escapeHTML(w, []byte(unesc)) escapeHTML(w, []byte(unesc))

View File

@ -619,7 +619,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
} }
case Code: case Code:
r.out(w, codeTag) r.out(w, codeTag)
escapeHTML(w, node.Literal) escapeAllHTML(w, node.Literal)
r.out(w, codeCloseTag) r.out(w, codeCloseTag)
case Document: case Document:
break break
@ -765,7 +765,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.cr(w) r.cr(w)
r.out(w, preTag) r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs) r.tag(w, codeTag[:len(codeTag)-1], attrs)
escapeHTML(w, node.Literal) escapeAllHTML(w, node.Literal)
r.out(w, codeCloseTag) r.out(w, codeCloseTag)
r.out(w, preCloseTag) r.out(w, preCloseTag)
if node.Parent.Type != Item { if node.Parent.Type != Item {