Fix bug in autolink overescaping html entities

If autolink encounters a link which already has an escaped html entity,
it would escape the ampersand again, producing things like these:
    &  --> &
    " --> "
This commit solves that by first looking for all entity-looking things
in the link and copying those ranges verbatim, only considering the rest
of the string for escaping.
Doesn't seem to have considerable performance impact.
The mailto: links are processed the old way.
This commit is contained in:
Vytautas Šaltenis 2014-01-26 21:39:38 +02:00
parent cc0d56d092
commit b0bdfbec4c
2 changed files with 22 additions and 3 deletions

19
html.go
View File

@ -74,6 +74,8 @@ var (
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
// TODO: improve this regexp to catch all possible entities:
htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
)
// Html is a type that implements the Renderer interface for HTML output.
@ -164,6 +166,16 @@ func attrEscape(out *bytes.Buffer, src []byte) {
}
}
func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
end := 0
for _, rang := range skipRanges {
attrEscape(out, src[end:rang[0]])
out.Write(src[rang[0]:rang[1]])
end = rang[1]
}
attrEscape(out, src[end:])
}
func (options *Html) GetFlags() int {
return options.flags
}
@ -408,10 +420,11 @@ func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
}
func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
skipRanges := htmlEntity.FindAllIndex(link, -1)
if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
// mark it but don't link it if it is not a safe link: no smartypants
out.WriteString("<tt>")
attrEscape(out, link)
entityEscapeWithSkip(out, link, skipRanges)
out.WriteString("</tt>")
return
}
@ -420,7 +433,7 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
if kind == LINK_TYPE_EMAIL {
out.WriteString("mailto:")
}
attrEscape(out, link)
entityEscapeWithSkip(out, link, skipRanges)
out.WriteString("\">")
// Pretty print: if we get an email address as
@ -432,7 +445,7 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
case bytes.HasPrefix(link, []byte("mailto:")):
attrEscape(out, link[len("mailto:"):])
default:
attrEscape(out, link)
entityEscapeWithSkip(out, link, skipRanges)
}
out.WriteString("</a>")

View File

@ -692,6 +692,12 @@ func TestAutoLink(t *testing.T) {
"http://www.foo.com<br />\n",
"<p><a href=\"http://www.foo.com\">http://www.foo.com</a><br /></p>\n",
"http://foo.com/viewtopic.php?f=18&amp;t=297",
"<p><a href=\"http://foo.com/viewtopic.php?f=18&amp;t=297\">http://foo.com/viewtopic.php?f=18&amp;t=297</a></p>\n",
"http://foo.com/viewtopic.php?param=&quot;18&quot;zz",
"<p><a href=\"http://foo.com/viewtopic.php?param=&quot;18&quot;zz\">http://foo.com/viewtopic.php?param=&quot;18&quot;zz</a></p>\n",
}
doTestsInline(t, tests)
}