mirror of
https://github.com/russross/blackfriday.git
synced 2024-03-22 13:40:34 +08:00
commit
55bb56bf9b
80
html.go
80
html.go
|
@ -43,7 +43,7 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
tags = []string{
|
||||
tags = []string{
|
||||
"b",
|
||||
"blockquote",
|
||||
"code",
|
||||
|
@ -71,10 +71,12 @@ var (
|
|||
"strike",
|
||||
"ul",
|
||||
}
|
||||
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
||||
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||
// TODO: improve this regexp to catch all possible entities:
|
||||
htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
|
||||
)
|
||||
|
||||
// Html is a type that implements the Renderer interface for HTML output.
|
||||
|
@ -128,45 +130,36 @@ func HtmlRenderer(flags int, title string, css string) Renderer {
|
|||
}
|
||||
}
|
||||
|
||||
// Using if statements is a bit faster than a switch statement. As the compiler
|
||||
// improves, this should be unnecessary this is only worthwhile because
|
||||
// attrEscape is the single largest CPU user in normal use.
|
||||
// Also tried using map, but that gave a ~3x slowdown.
|
||||
func escapeSingleChar(char byte) (string, bool) {
|
||||
if char == '"' {
|
||||
return """, true
|
||||
}
|
||||
if char == '&' {
|
||||
return "&", true
|
||||
}
|
||||
if char == '<' {
|
||||
return "<", true
|
||||
}
|
||||
if char == '>' {
|
||||
return ">", true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func attrEscape(out *bytes.Buffer, src []byte) {
|
||||
org := 0
|
||||
for i, ch := range src {
|
||||
// using if statements is a bit faster than a switch statement.
|
||||
// as the compiler improves, this should be unnecessary
|
||||
// this is only worthwhile because attrEscape is the single
|
||||
// largest CPU user in normal use
|
||||
if ch == '"' {
|
||||
if entity, ok := escapeSingleChar(ch); ok {
|
||||
if i > org {
|
||||
// copy all the normal characters since the last escape
|
||||
out.Write(src[org:i])
|
||||
}
|
||||
org = i + 1
|
||||
out.WriteString(""")
|
||||
continue
|
||||
}
|
||||
if ch == '&' {
|
||||
if i > org {
|
||||
out.Write(src[org:i])
|
||||
}
|
||||
org = i + 1
|
||||
out.WriteString("&")
|
||||
continue
|
||||
}
|
||||
if ch == '<' {
|
||||
if i > org {
|
||||
out.Write(src[org:i])
|
||||
}
|
||||
org = i + 1
|
||||
out.WriteString("<")
|
||||
continue
|
||||
}
|
||||
if ch == '>' {
|
||||
if i > org {
|
||||
out.Write(src[org:i])
|
||||
}
|
||||
org = i + 1
|
||||
out.WriteString(">")
|
||||
continue
|
||||
out.WriteString(entity)
|
||||
}
|
||||
}
|
||||
if org < len(src) {
|
||||
|
@ -174,6 +167,16 @@ func attrEscape(out *bytes.Buffer, src []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
|
||||
end := 0
|
||||
for _, rang := range skipRanges {
|
||||
attrEscape(out, src[end:rang[0]])
|
||||
out.Write(src[rang[0]:rang[1]])
|
||||
end = rang[1]
|
||||
}
|
||||
attrEscape(out, src[end:])
|
||||
}
|
||||
|
||||
func (options *Html) GetFlags() int {
|
||||
return options.flags
|
||||
}
|
||||
|
@ -418,10 +421,11 @@ func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
|
|||
}
|
||||
|
||||
func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
||||
skipRanges := htmlEntity.FindAllIndex(link, -1)
|
||||
if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
|
||||
// mark it but don't link it if it is not a safe link: no smartypants
|
||||
out.WriteString("<tt>")
|
||||
attrEscape(out, link)
|
||||
entityEscapeWithSkip(out, link, skipRanges)
|
||||
out.WriteString("</tt>")
|
||||
return
|
||||
}
|
||||
|
@ -430,7 +434,7 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
|||
if kind == LINK_TYPE_EMAIL {
|
||||
out.WriteString("mailto:")
|
||||
}
|
||||
attrEscape(out, link)
|
||||
entityEscapeWithSkip(out, link, skipRanges)
|
||||
out.WriteString("\">")
|
||||
|
||||
// Pretty print: if we get an email address as
|
||||
|
@ -442,7 +446,7 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
|||
case bytes.HasPrefix(link, []byte("mailto:")):
|
||||
attrEscape(out, link[len("mailto:"):])
|
||||
default:
|
||||
attrEscape(out, link)
|
||||
entityEscapeWithSkip(out, link, skipRanges)
|
||||
}
|
||||
|
||||
out.WriteString("</a>")
|
||||
|
|
40
inline.go
40
inline.go
|
@ -15,9 +15,14 @@ package blackfriday
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
var (
|
||||
anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
|
||||
)
|
||||
|
||||
// Functions to parse text within a block
|
||||
// Each function returns the number of chars taken care of
|
||||
// data is the complete block being rendered
|
||||
|
@ -612,12 +617,34 @@ func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
|||
return end
|
||||
}
|
||||
|
||||
func linkEndsWithEntity(data []byte, linkEnd int) bool {
|
||||
entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
|
||||
if entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
||||
// quick check to rule out most false hits on ':'
|
||||
if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Now a more expensive check to see if we're not inside an anchor element
|
||||
anchorStart := offset
|
||||
offsetFromAnchor := 0
|
||||
for anchorStart > 0 && data[anchorStart] != '<' {
|
||||
anchorStart--
|
||||
offsetFromAnchor++
|
||||
}
|
||||
|
||||
anchorStr := anchorRe.Find(data[anchorStart:])
|
||||
if anchorStr != nil {
|
||||
out.Write(anchorStr[offsetFromAnchor:])
|
||||
return len(anchorStr) - offsetFromAnchor
|
||||
}
|
||||
|
||||
// scan backward for a word boundary
|
||||
rewind := 0
|
||||
for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
|
||||
|
@ -635,12 +662,17 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
|||
}
|
||||
|
||||
linkEnd := 0
|
||||
for linkEnd < len(data) && !isspace(data[linkEnd]) {
|
||||
for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
|
||||
linkEnd++
|
||||
}
|
||||
|
||||
// Skip punctuation at the end of the link
|
||||
if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',' || data[linkEnd-1] == ';') && data[linkEnd-2] != '\\' {
|
||||
if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
|
||||
linkEnd--
|
||||
}
|
||||
|
||||
// But don't skip semicolon if it's a part of escaped entity:
|
||||
if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
|
||||
linkEnd--
|
||||
}
|
||||
|
||||
|
@ -718,6 +750,10 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
|||
return linkEnd - rewind
|
||||
}
|
||||
|
||||
func isEndOfLink(char byte) bool {
|
||||
return isspace(char) || char == '<'
|
||||
}
|
||||
|
||||
var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://"), []byte("/")}
|
||||
|
||||
func isSafeLink(link []byte) bool {
|
||||
|
|
|
@ -682,6 +682,33 @@ func TestAutoLink(t *testing.T) {
|
|||
"even a > can be escaped <http://new.com?q=\\>&etc>\n",
|
||||
"<p>even a > can be escaped <a href=\"http://new.com?q=>&etc\">" +
|
||||
"http://new.com?q=>&etc</a></p>\n",
|
||||
|
||||
"<a href=\"http://fancy.com\">http://fancy.com</a>\n",
|
||||
"<p><a href=\"http://fancy.com\">http://fancy.com</a></p>\n",
|
||||
|
||||
"<a href=\"http://fancy.com\">This is a link</a>\n",
|
||||
"<p><a href=\"http://fancy.com\">This is a link</a></p>\n",
|
||||
|
||||
"<a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a>\n",
|
||||
"<p><a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a></p>\n",
|
||||
|
||||
"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (\n",
|
||||
"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (</p>\n",
|
||||
|
||||
"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).\n",
|
||||
"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n",
|
||||
|
||||
"http://www.foo.com<br />\n",
|
||||
"<p><a href=\"http://www.foo.com\">http://www.foo.com</a><br /></p>\n",
|
||||
|
||||
"http://foo.com/viewtopic.php?f=18&t=297",
|
||||
"<p><a href=\"http://foo.com/viewtopic.php?f=18&t=297\">http://foo.com/viewtopic.php?f=18&t=297</a></p>\n",
|
||||
|
||||
"http://foo.com/viewtopic.php?param="18"zz",
|
||||
"<p><a href=\"http://foo.com/viewtopic.php?param="18"zz\">http://foo.com/viewtopic.php?param="18"zz</a></p>\n",
|
||||
|
||||
"http://foo.com/viewtopic.php?param="18"",
|
||||
"<p><a href=\"http://foo.com/viewtopic.php?param="18"\">http://foo.com/viewtopic.php?param="18"</a></p>\n",
|
||||
}
|
||||
doTestsInline(t, tests)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user