mirror of
https://github.com/russross/blackfriday.git
synced 2024-03-22 13:40:34 +08:00
commit
55bb56bf9b
80
html.go
80
html.go
|
@ -43,7 +43,7 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
tags = []string{
|
tags = []string{
|
||||||
"b",
|
"b",
|
||||||
"blockquote",
|
"blockquote",
|
||||||
"code",
|
"code",
|
||||||
|
@ -71,10 +71,12 @@ var (
|
||||||
"strike",
|
"strike",
|
||||||
"ul",
|
"ul",
|
||||||
}
|
}
|
||||||
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||||
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
||||||
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||||
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||||
|
// TODO: improve this regexp to catch all possible entities:
|
||||||
|
htmlEntity = regexp.MustCompile(`&[a-z]{2,5};`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// Html is a type that implements the Renderer interface for HTML output.
|
// Html is a type that implements the Renderer interface for HTML output.
|
||||||
|
@ -128,45 +130,36 @@ func HtmlRenderer(flags int, title string, css string) Renderer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Using if statements is a bit faster than a switch statement. As the compiler
|
||||||
|
// improves, this should be unnecessary this is only worthwhile because
|
||||||
|
// attrEscape is the single largest CPU user in normal use.
|
||||||
|
// Also tried using map, but that gave a ~3x slowdown.
|
||||||
|
func escapeSingleChar(char byte) (string, bool) {
|
||||||
|
if char == '"' {
|
||||||
|
return """, true
|
||||||
|
}
|
||||||
|
if char == '&' {
|
||||||
|
return "&", true
|
||||||
|
}
|
||||||
|
if char == '<' {
|
||||||
|
return "<", true
|
||||||
|
}
|
||||||
|
if char == '>' {
|
||||||
|
return ">", true
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
func attrEscape(out *bytes.Buffer, src []byte) {
|
func attrEscape(out *bytes.Buffer, src []byte) {
|
||||||
org := 0
|
org := 0
|
||||||
for i, ch := range src {
|
for i, ch := range src {
|
||||||
// using if statements is a bit faster than a switch statement.
|
if entity, ok := escapeSingleChar(ch); ok {
|
||||||
// as the compiler improves, this should be unnecessary
|
|
||||||
// this is only worthwhile because attrEscape is the single
|
|
||||||
// largest CPU user in normal use
|
|
||||||
if ch == '"' {
|
|
||||||
if i > org {
|
if i > org {
|
||||||
// copy all the normal characters since the last escape
|
// copy all the normal characters since the last escape
|
||||||
out.Write(src[org:i])
|
out.Write(src[org:i])
|
||||||
}
|
}
|
||||||
org = i + 1
|
org = i + 1
|
||||||
out.WriteString(""")
|
out.WriteString(entity)
|
||||||
continue
|
|
||||||
}
|
|
||||||
if ch == '&' {
|
|
||||||
if i > org {
|
|
||||||
out.Write(src[org:i])
|
|
||||||
}
|
|
||||||
org = i + 1
|
|
||||||
out.WriteString("&")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if ch == '<' {
|
|
||||||
if i > org {
|
|
||||||
out.Write(src[org:i])
|
|
||||||
}
|
|
||||||
org = i + 1
|
|
||||||
out.WriteString("<")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if ch == '>' {
|
|
||||||
if i > org {
|
|
||||||
out.Write(src[org:i])
|
|
||||||
}
|
|
||||||
org = i + 1
|
|
||||||
out.WriteString(">")
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if org < len(src) {
|
if org < len(src) {
|
||||||
|
@ -174,6 +167,16 @@ func attrEscape(out *bytes.Buffer, src []byte) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func entityEscapeWithSkip(out *bytes.Buffer, src []byte, skipRanges [][]int) {
|
||||||
|
end := 0
|
||||||
|
for _, rang := range skipRanges {
|
||||||
|
attrEscape(out, src[end:rang[0]])
|
||||||
|
out.Write(src[rang[0]:rang[1]])
|
||||||
|
end = rang[1]
|
||||||
|
}
|
||||||
|
attrEscape(out, src[end:])
|
||||||
|
}
|
||||||
|
|
||||||
func (options *Html) GetFlags() int {
|
func (options *Html) GetFlags() int {
|
||||||
return options.flags
|
return options.flags
|
||||||
}
|
}
|
||||||
|
@ -418,10 +421,11 @@ func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
||||||
|
skipRanges := htmlEntity.FindAllIndex(link, -1)
|
||||||
if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
|
if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
|
||||||
// mark it but don't link it if it is not a safe link: no smartypants
|
// mark it but don't link it if it is not a safe link: no smartypants
|
||||||
out.WriteString("<tt>")
|
out.WriteString("<tt>")
|
||||||
attrEscape(out, link)
|
entityEscapeWithSkip(out, link, skipRanges)
|
||||||
out.WriteString("</tt>")
|
out.WriteString("</tt>")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -430,7 +434,7 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
||||||
if kind == LINK_TYPE_EMAIL {
|
if kind == LINK_TYPE_EMAIL {
|
||||||
out.WriteString("mailto:")
|
out.WriteString("mailto:")
|
||||||
}
|
}
|
||||||
attrEscape(out, link)
|
entityEscapeWithSkip(out, link, skipRanges)
|
||||||
out.WriteString("\">")
|
out.WriteString("\">")
|
||||||
|
|
||||||
// Pretty print: if we get an email address as
|
// Pretty print: if we get an email address as
|
||||||
|
@ -442,7 +446,7 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
|
||||||
case bytes.HasPrefix(link, []byte("mailto:")):
|
case bytes.HasPrefix(link, []byte("mailto:")):
|
||||||
attrEscape(out, link[len("mailto:"):])
|
attrEscape(out, link[len("mailto:"):])
|
||||||
default:
|
default:
|
||||||
attrEscape(out, link)
|
entityEscapeWithSkip(out, link, skipRanges)
|
||||||
}
|
}
|
||||||
|
|
||||||
out.WriteString("</a>")
|
out.WriteString("</a>")
|
||||||
|
|
40
inline.go
40
inline.go
|
@ -15,9 +15,14 @@ package blackfriday
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
|
||||||
|
)
|
||||||
|
|
||||||
// Functions to parse text within a block
|
// Functions to parse text within a block
|
||||||
// Each function returns the number of chars taken care of
|
// Each function returns the number of chars taken care of
|
||||||
// data is the complete block being rendered
|
// data is the complete block being rendered
|
||||||
|
@ -612,12 +617,34 @@ func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
||||||
return end
|
return end
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func linkEndsWithEntity(data []byte, linkEnd int) bool {
|
||||||
|
entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
|
||||||
|
if entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
||||||
// quick check to rule out most false hits on ':'
|
// quick check to rule out most false hits on ':'
|
||||||
if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
|
if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now a more expensive check to see if we're not inside an anchor element
|
||||||
|
anchorStart := offset
|
||||||
|
offsetFromAnchor := 0
|
||||||
|
for anchorStart > 0 && data[anchorStart] != '<' {
|
||||||
|
anchorStart--
|
||||||
|
offsetFromAnchor++
|
||||||
|
}
|
||||||
|
|
||||||
|
anchorStr := anchorRe.Find(data[anchorStart:])
|
||||||
|
if anchorStr != nil {
|
||||||
|
out.Write(anchorStr[offsetFromAnchor:])
|
||||||
|
return len(anchorStr) - offsetFromAnchor
|
||||||
|
}
|
||||||
|
|
||||||
// scan backward for a word boundary
|
// scan backward for a word boundary
|
||||||
rewind := 0
|
rewind := 0
|
||||||
for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
|
for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
|
||||||
|
@ -635,12 +662,17 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
||||||
}
|
}
|
||||||
|
|
||||||
linkEnd := 0
|
linkEnd := 0
|
||||||
for linkEnd < len(data) && !isspace(data[linkEnd]) {
|
for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
|
||||||
linkEnd++
|
linkEnd++
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip punctuation at the end of the link
|
// Skip punctuation at the end of the link
|
||||||
if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',' || data[linkEnd-1] == ';') && data[linkEnd-2] != '\\' {
|
if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
|
||||||
|
linkEnd--
|
||||||
|
}
|
||||||
|
|
||||||
|
// But don't skip semicolon if it's a part of escaped entity:
|
||||||
|
if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
|
||||||
linkEnd--
|
linkEnd--
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -718,6 +750,10 @@ func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
|
||||||
return linkEnd - rewind
|
return linkEnd - rewind
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isEndOfLink(char byte) bool {
|
||||||
|
return isspace(char) || char == '<'
|
||||||
|
}
|
||||||
|
|
||||||
var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://"), []byte("/")}
|
var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://"), []byte("/")}
|
||||||
|
|
||||||
func isSafeLink(link []byte) bool {
|
func isSafeLink(link []byte) bool {
|
||||||
|
|
|
@ -682,6 +682,33 @@ func TestAutoLink(t *testing.T) {
|
||||||
"even a > can be escaped <http://new.com?q=\\>&etc>\n",
|
"even a > can be escaped <http://new.com?q=\\>&etc>\n",
|
||||||
"<p>even a > can be escaped <a href=\"http://new.com?q=>&etc\">" +
|
"<p>even a > can be escaped <a href=\"http://new.com?q=>&etc\">" +
|
||||||
"http://new.com?q=>&etc</a></p>\n",
|
"http://new.com?q=>&etc</a></p>\n",
|
||||||
|
|
||||||
|
"<a href=\"http://fancy.com\">http://fancy.com</a>\n",
|
||||||
|
"<p><a href=\"http://fancy.com\">http://fancy.com</a></p>\n",
|
||||||
|
|
||||||
|
"<a href=\"http://fancy.com\">This is a link</a>\n",
|
||||||
|
"<p><a href=\"http://fancy.com\">This is a link</a></p>\n",
|
||||||
|
|
||||||
|
"<a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a>\n",
|
||||||
|
"<p><a href=\"http://www.fancy.com/A_B.pdf\">http://www.fancy.com/A_B.pdf</a></p>\n",
|
||||||
|
|
||||||
|
"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (\n",
|
||||||
|
"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (</p>\n",
|
||||||
|
|
||||||
|
"(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).\n",
|
||||||
|
"<p>(<a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a> (part two: <a href=\"http://www.fancy.com/A_B\">http://www.fancy.com/A_B</a>)).</p>\n",
|
||||||
|
|
||||||
|
"http://www.foo.com<br />\n",
|
||||||
|
"<p><a href=\"http://www.foo.com\">http://www.foo.com</a><br /></p>\n",
|
||||||
|
|
||||||
|
"http://foo.com/viewtopic.php?f=18&t=297",
|
||||||
|
"<p><a href=\"http://foo.com/viewtopic.php?f=18&t=297\">http://foo.com/viewtopic.php?f=18&t=297</a></p>\n",
|
||||||
|
|
||||||
|
"http://foo.com/viewtopic.php?param="18"zz",
|
||||||
|
"<p><a href=\"http://foo.com/viewtopic.php?param="18"zz\">http://foo.com/viewtopic.php?param="18"zz</a></p>\n",
|
||||||
|
|
||||||
|
"http://foo.com/viewtopic.php?param="18"",
|
||||||
|
"<p><a href=\"http://foo.com/viewtopic.php?param="18"\">http://foo.com/viewtopic.php?param="18"</a></p>\n",
|
||||||
}
|
}
|
||||||
doTestsInline(t, tests)
|
doTestsInline(t, tests)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user