Merge pull request #468 from ruqqq/v2

Fix html renderer escaping valid entities
This commit is contained in:
Russ Ross 2020-07-14 10:41:29 -06:00 committed by GitHub
commit acedacffef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 2277 additions and 5 deletions

2236
entities.go Normal file

File diff suppressed because it is too large Load Diff

42
esc.go
View File

@ -13,13 +13,27 @@ var htmlEscaper = [256][]byte{
}
func escapeHTML(w io.Writer, s []byte) {
escapeEntities(w, s, false)
}
func escapeAllHTML(w io.Writer, s []byte) {
escapeEntities(w, s, true)
}
func escapeEntities(w io.Writer, s []byte, escapeValidEntities bool) {
var start, end int
for end < len(s) {
escSeq := htmlEscaper[s[end]]
if escSeq != nil {
w.Write(s[start:end])
w.Write(escSeq)
start = end + 1
isEntity, entityEnd := nodeIsEntity(s, end)
if isEntity && !escapeValidEntities {
w.Write(s[start : entityEnd+1])
start = entityEnd + 1
} else {
w.Write(s[start:end])
w.Write(escSeq)
start = end + 1
}
}
end++
}
@ -28,6 +42,28 @@ func escapeHTML(w io.Writer, s []byte) {
}
}
func nodeIsEntity(s []byte, end int) (isEntity bool, endEntityPos int) {
isEntity = false
endEntityPos = end + 1
if s[end] == '&' {
for endEntityPos < len(s) {
if s[endEntityPos] == ';' {
if entities[string(s[end:endEntityPos+1])] {
isEntity = true
break
}
}
if !isalnum(s[endEntityPos]) && s[endEntityPos] != '&' && s[endEntityPos] != '#' {
break
}
endEntityPos++
}
}
return isEntity, endEntityPos
}
func escLink(w io.Writer, text []byte) {
unesc := html.UnescapeString(string(text))
escapeHTML(w, []byte(unesc))

View File

@ -619,7 +619,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
}
case Code:
r.out(w, codeTag)
escapeHTML(w, node.Literal)
escapeAllHTML(w, node.Literal)
r.out(w, codeCloseTag)
case Document:
break
@ -765,7 +765,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.cr(w)
r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs)
escapeHTML(w, node.Literal)
escapeAllHTML(w, node.Literal)
r.out(w, codeCloseTag)
r.out(w, preCloseTag)
if node.Parent.Type != Item {