Only allow valid HTML entities to be unescaped. Do not escape HTML entities in code blocks.

This commit is contained in:
Faruq Rasid 2018-09-06 17:05:43 +08:00
parent c5c549b063
commit 6762cd3685
3 changed files with 2250 additions and 5 deletions

2235
entities.go Normal file

File diff suppressed because it is too large Load Diff

12
esc.go
View File

@ -13,12 +13,20 @@ var htmlEscaper = [256][]byte{
} }
func escapeHTML(w io.Writer, s []byte) { func escapeHTML(w io.Writer, s []byte) {
escapeEntities(w, s, false)
}
func escapeAllHTML(w io.Writer, s []byte) {
escapeEntities(w, s, true)
}
func escapeEntities(w io.Writer, s []byte, escapeValidEntities bool) {
var start, end int var start, end int
for end < len(s) { for end < len(s) {
escSeq := htmlEscaper[s[end]] escSeq := htmlEscaper[s[end]]
if escSeq != nil { if escSeq != nil {
isEntity, entityEnd := nodeIsEntity(s, end) isEntity, entityEnd := nodeIsEntity(s, end)
if isEntity { if isEntity && !escapeValidEntities {
w.Write(s[start : entityEnd+1]) w.Write(s[start : entityEnd+1])
start = entityEnd + 1 start = entityEnd + 1
} else { } else {
@ -41,9 +49,11 @@ func nodeIsEntity(s []byte, end int) (isEntity bool, endEntityPos int) {
if s[end] == '&' { if s[end] == '&' {
for endEntityPos < len(s) { for endEntityPos < len(s) {
if s[endEntityPos] == ';' { if s[endEntityPos] == ';' {
if entities[string(s[end:endEntityPos+1])] {
isEntity = true isEntity = true
break break
} }
}
if !isalnum(s[endEntityPos]) && s[endEntityPos] != '&' && s[endEntityPos] != '#' { if !isalnum(s[endEntityPos]) && s[endEntityPos] != '&' && s[endEntityPos] != '#' {
break break
} }

View File

@ -616,7 +616,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
} }
case Code: case Code:
r.out(w, codeTag) r.out(w, codeTag)
escapeHTML(w, node.Literal) escapeAllHTML(w, node.Literal)
r.out(w, codeCloseTag) r.out(w, codeCloseTag)
case Document: case Document:
break break
@ -762,7 +762,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
r.cr(w) r.cr(w)
r.out(w, preTag) r.out(w, preTag)
r.tag(w, codeTag[:len(codeTag)-1], attrs) r.tag(w, codeTag[:len(codeTag)-1], attrs)
escapeHTML(w, node.Literal) escapeAllHTML(w, node.Literal)
r.out(w, codeCloseTag) r.out(w, codeCloseTag)
r.out(w, preCloseTag) r.out(w, preCloseTag)
if node.Parent.Type != Item { if node.Parent.Type != Item {