Fix bug with overzealous autolink processing

When the source Markdown contains an anchor tag with URL as link text
(i.e. <a href=...>http://foo.bar</a>), autolink converts that link text
into another anchor tag, which is nonsense. Detect this situation with
regexp and early exit autolink processing.
This commit is contained in:
Vytautas Šaltenis 2014-01-25 21:42:34 +02:00
parent 84ee8e62f6
commit 9fc8c9d866
2 changed files with 34 additions and 0 deletions

View File

@ -15,9 +15,14 @@ package blackfriday
import (
"bytes"
"regexp"
"strconv"
)
var (
anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
)
// Functions to parse text within a block
// Each function returns the number of chars taken care of
// data is the complete block being rendered
@ -618,6 +623,20 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
return 0
}
// Now a more expensive check to see if we're not inside an anchor element
anchorStart := offset
offsetFromAnchor := 0
for anchorStart > 0 && data[anchorStart] != '<' {
anchorStart--
offsetFromAnchor++
}
anchorStr := anchorRe.Find(data[anchorStart:])
if anchorStr != nil {
out.Write(anchorStr[offsetFromAnchor:])
return len(anchorStr) - offsetFromAnchor
}
// scan backward for a word boundary
rewind := 0
for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {

View File

@ -674,6 +674,21 @@ func TestAutoLink(t *testing.T) {
"even a > can be escaped <http://new.com?q=\\>&etc>\n",
"<p>even a &gt; can be escaped <a href=\"http://new.com?q=&gt;&amp;etc\">" +
"http://new.com?q=&gt;&amp;etc</a></p>\n",
"<a href=\"http://fancy.com\">http://fancy.com</a>\n",
"<p><a href=\"http://fancy.com\">http://fancy.com</a></p>\n",
"<a href=\"http://fancy.com\">This is a link</a>\n",
"<p><a href=\"http://fancy.com\">This is a link</a></p>\n",
"<a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a>\n",
"<p><a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a></p>\n",
"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (\n",
"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (</p>\n",
"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).\n",
"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n",
}
doTestsInline(t, tests)
}