mirror of
https://github.com/russross/blackfriday.git
synced 2024-03-22 13:40:34 +08:00
Merge pull request #50 from rtfb/master
Better protection against JavaScript injection
This commit is contained in:
commit
d643453f1e
|
@ -89,6 +89,11 @@ All features of upskirt are supported, including:
|
|||
known inputs that make it crash. If you find one, please let me
|
||||
know and send me the input that does it.
|
||||
|
||||
NOTE: "safety" in this context means *runtime safety only*. It is
|
||||
not bullet proof against JavaScript injections, though we're working
|
||||
on it (https://github.com/russross/blackfriday/issues/11 tracks the
|
||||
progress).
|
||||
|
||||
* **Fast processing**. It is fast enough to render on-demand in
|
||||
most web applications without having to cache the output.
|
||||
|
||||
|
|
150
html.go
150
html.go
|
@ -18,6 +18,7 @@ package blackfriday
|
|||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
@ -28,7 +29,7 @@ const (
|
|||
HTML_SKIP_STYLE // skip embedded <style> elements
|
||||
HTML_SKIP_IMAGES // skip embedded images
|
||||
HTML_SKIP_LINKS // skip all links
|
||||
HTML_SKIP_SCRIPT // skip embedded <script> elements
|
||||
HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
|
||||
HTML_SAFELINK // only link to trusted protocols
|
||||
HTML_NOFOLLOW_LINKS // only link with rel="nofollow"
|
||||
HTML_TOC // generate a table of contents
|
||||
|
@ -41,6 +42,41 @@ const (
|
|||
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
|
||||
)
|
||||
|
||||
var (
|
||||
tags = []string{
|
||||
"b",
|
||||
"blockquote",
|
||||
"code",
|
||||
"del",
|
||||
"dd",
|
||||
"dl",
|
||||
"dt",
|
||||
"em",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"i",
|
||||
"kbd",
|
||||
"li",
|
||||
"ol",
|
||||
"p",
|
||||
"pre",
|
||||
"s",
|
||||
"sup",
|
||||
"sub",
|
||||
"strong",
|
||||
"strike",
|
||||
"ul",
|
||||
}
|
||||
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
||||
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||
)
|
||||
|
||||
// Html is a type that implements the Renderer interface for HTML output.
|
||||
//
|
||||
// Do not create this directly, instead use the HtmlRenderer function.
|
||||
|
@ -138,6 +174,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
func (options *Html) GetFlags() int {
|
||||
return options.flags
|
||||
}
|
||||
|
||||
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
|
||||
marker := out.Len()
|
||||
doubleSpace(out)
|
||||
|
@ -169,32 +209,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
|
|||
}
|
||||
|
||||
doubleSpace(out)
|
||||
if options.flags&HTML_SKIP_SCRIPT != 0 {
|
||||
out.Write(stripTag(string(text), "script", "p"))
|
||||
} else {
|
||||
out.Write(text)
|
||||
}
|
||||
out.WriteByte('\n')
|
||||
}
|
||||
|
||||
func stripTag(text, tag, newTag string) []byte {
|
||||
closeNewTag := fmt.Sprintf("</%s>", newTag)
|
||||
i := 0
|
||||
for i < len(text) && text[i] != '<' {
|
||||
i++
|
||||
}
|
||||
if i == len(text) {
|
||||
return []byte(text)
|
||||
}
|
||||
found, end := findHtmlTagPos([]byte(text[i:]), tag)
|
||||
closeTag := fmt.Sprintf("</%s>", tag)
|
||||
noOpen := text
|
||||
if found {
|
||||
noOpen = text[0:i+1] + newTag + text[end:]
|
||||
}
|
||||
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
|
||||
}
|
||||
|
||||
func (options *Html) HRule(out *bytes.Buffer) {
|
||||
doubleSpace(out)
|
||||
out.WriteString("<hr")
|
||||
|
@ -522,9 +540,6 @@ func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
|
|||
if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
|
||||
return
|
||||
}
|
||||
if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
|
||||
return
|
||||
}
|
||||
out.Write(text)
|
||||
}
|
||||
|
||||
|
@ -726,6 +741,29 @@ func isHtmlTag(tag []byte, tagname string) bool {
|
|||
return found
|
||||
}
|
||||
|
||||
// Look for a character, but ignore it when it's in any kind of quotes, it
|
||||
// might be JavaScript
|
||||
func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
|
||||
inSingleQuote := false
|
||||
inDoubleQuote := false
|
||||
inGraveQuote := false
|
||||
i := start
|
||||
for i < len(html) {
|
||||
switch {
|
||||
case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
|
||||
return i
|
||||
case html[i] == '\'':
|
||||
inSingleQuote = !inSingleQuote
|
||||
case html[i] == '"':
|
||||
inDoubleQuote = !inDoubleQuote
|
||||
case html[i] == '`':
|
||||
inGraveQuote = !inGraveQuote
|
||||
}
|
||||
i++
|
||||
}
|
||||
return start
|
||||
}
|
||||
|
||||
func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
||||
i := 0
|
||||
if i < len(tag) && tag[0] != '<' {
|
||||
|
@ -754,28 +792,54 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
|||
return false, -1
|
||||
}
|
||||
|
||||
// Now look for closing '>', but ignore it when it's in any kind of quotes,
|
||||
// it might be JavaScript
|
||||
inSingleQuote := false
|
||||
inDoubleQuote := false
|
||||
inGraveQuote := false
|
||||
for i < len(tag) {
|
||||
switch {
|
||||
case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
|
||||
return true, i
|
||||
case tag[i] == '\'':
|
||||
inSingleQuote = !inSingleQuote
|
||||
case tag[i] == '"':
|
||||
inDoubleQuote = !inDoubleQuote
|
||||
case tag[i] == '`':
|
||||
inGraveQuote = !inGraveQuote
|
||||
}
|
||||
i++
|
||||
rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
|
||||
if rightAngle > i {
|
||||
return true, rightAngle
|
||||
}
|
||||
|
||||
return false, -1
|
||||
}
|
||||
|
||||
func sanitizeHtml(html []byte) []byte {
|
||||
var result []byte
|
||||
for string(html) != "" {
|
||||
skip, tag, rest := findHtmlTag(html)
|
||||
html = rest
|
||||
result = append(result, skip...)
|
||||
result = append(result, sanitizeTag(tag)...)
|
||||
}
|
||||
return append(result, []byte("\n")...)
|
||||
}
|
||||
|
||||
func sanitizeTag(tag []byte) []byte {
|
||||
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
|
||||
return tag
|
||||
} else {
|
||||
return []byte("")
|
||||
}
|
||||
}
|
||||
|
||||
func skipUntilChar(text []byte, start int, char byte) int {
|
||||
i := start
|
||||
for i < len(text) && text[i] != char {
|
||||
i++
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
|
||||
start := skipUntilChar(html, 0, '<')
|
||||
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
|
||||
if rightAngle > start {
|
||||
skip = html[0:start]
|
||||
tag = html[start : rightAngle+1]
|
||||
rest = html[rightAngle+1:]
|
||||
return
|
||||
}
|
||||
|
||||
return []byte(""), []byte(""), []byte("")
|
||||
}
|
||||
|
||||
func skipSpace(tag []byte, i int) int {
|
||||
for i < len(tag) && isspace(tag[i]) {
|
||||
i++
|
||||
|
|
107
inline_test.go
107
inline_test.go
|
@ -90,18 +90,119 @@ func TestRawHtmlTag(t *testing.T) {
|
|||
"<p>alert()</p>\n",
|
||||
|
||||
"<script>alert()</script>\n",
|
||||
"<p>alert()</p>\n",
|
||||
"alert()\n",
|
||||
|
||||
"<script src='foo'></script>\n",
|
||||
"<p></p>\n",
|
||||
"\n",
|
||||
|
||||
"<script src='a>b'></script>\n",
|
||||
"\n",
|
||||
|
||||
"zz <script src='foo'></script>\n",
|
||||
"<p>zz </p>\n",
|
||||
|
||||
"zz <script src=foo></script>\n",
|
||||
"<p>zz </p>\n",
|
||||
|
||||
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
|
||||
"\n",
|
||||
|
||||
`'';!--"<XSS>=&{()}`,
|
||||
"<p>'';!--"=&{()}</p>\n",
|
||||
|
||||
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="javascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<a onmouseover="alert(document.cookie)">xss link</a>`,
|
||||
"<p>xss link</a></p>\n",
|
||||
|
||||
"<a onmouseover=alert(document.cookie)>xss link</a>",
|
||||
"<p>xss link</a></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
|
||||
//"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC=# onmouseover="alert('xxs')">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC= onmouseover="alert('xxs')">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG onmouseover="alert('xxs')">`,
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
"<IMG SRC=javascript:alert('XSS')>",
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="javascriptascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="jav	ascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<IMG SRC="  javascript:alert('XSS');">`,
|
||||
"<p></p>\n",
|
||||
|
||||
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||
"<p></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
|
||||
//"\n",
|
||||
|
||||
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||
"<p></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
|
||||
//"",
|
||||
|
||||
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
|
||||
"<p></p>\n",
|
||||
|
||||
"<SCRIPT SRC=//ha.ckers.org/.j>",
|
||||
"<p></p>\n",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//`<IMG SRC="javascript:alert('XSS')"`,
|
||||
//"",
|
||||
|
||||
// XXX: this doesn't pass yet
|
||||
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
|
||||
//"",
|
||||
}
|
||||
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
|
||||
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
|
||||
}
|
||||
|
||||
func TestEmphasis(t *testing.T) {
|
||||
|
|
4
latex.go
4
latex.go
|
@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
|
|||
return &Latex{}
|
||||
}
|
||||
|
||||
func (options *Latex) GetFlags() int {
|
||||
return 0
|
||||
}
|
||||
|
||||
// render code chunks using verbatim, or listings if we have a language
|
||||
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
|
||||
if lang == "" {
|
||||
|
|
|
@ -165,6 +165,8 @@ type Renderer interface {
|
|||
// Header and footer
|
||||
DocumentHeader(out *bytes.Buffer)
|
||||
DocumentFooter(out *bytes.Buffer)
|
||||
|
||||
GetFlags() int
|
||||
}
|
||||
|
||||
// Callback functions for inline parsing. One such function is defined
|
||||
|
@ -231,7 +233,7 @@ func MarkdownCommon(input []byte) []byte {
|
|||
htmlFlags |= HTML_USE_SMARTYPANTS
|
||||
htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
|
||||
htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
|
||||
htmlFlags |= HTML_SKIP_SCRIPT
|
||||
htmlFlags |= HTML_SANITIZE_OUTPUT
|
||||
renderer := HtmlRenderer(htmlFlags, "", "")
|
||||
|
||||
// set up the parser
|
||||
|
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
|
|||
first := firstPass(p, input)
|
||||
second := secondPass(p, first)
|
||||
|
||||
if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 {
|
||||
second = sanitizeHtml(second)
|
||||
}
|
||||
|
||||
return second
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user