Merge pull request #50 from rtfb/master

Better protection against JavaScript injection
This commit is contained in:
Vytautas Šaltenis 2014-03-30 19:52:13 +03:00
commit d643453f1e
5 changed files with 228 additions and 48 deletions

View File

@ -89,6 +89,11 @@ All features of upskirt are supported, including:
known inputs that make it crash. If you find one, please let me
know and send me the input that does it.
NOTE: "safety" in this context means *runtime safety only*. It is
not bullet proof against JavaScript injections, though we're working
on it (https://github.com/russross/blackfriday/issues/11 tracks the
progress).
* **Fast processing**. It is fast enough to render on-demand in
most web applications without having to cache the output.

150
html.go
View File

@ -18,6 +18,7 @@ package blackfriday
import (
"bytes"
"fmt"
"regexp"
"strconv"
"strings"
)
@ -28,7 +29,7 @@ const (
HTML_SKIP_STYLE // skip embedded <style> elements
HTML_SKIP_IMAGES // skip embedded images
HTML_SKIP_LINKS // skip all links
HTML_SKIP_SCRIPT // skip embedded <script> elements
HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
HTML_SAFELINK // only link to trusted protocols
HTML_NOFOLLOW_LINKS // only link with rel="nofollow"
HTML_TOC // generate a table of contents
@ -41,6 +42,41 @@ const (
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
)
var (
tags = []string{
"b",
"blockquote",
"code",
"del",
"dd",
"dl",
"dt",
"em",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"i",
"kbd",
"li",
"ol",
"p",
"pre",
"s",
"sup",
"sub",
"strong",
"strike",
"ul",
}
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
)
// Html is a type that implements the Renderer interface for HTML output.
//
// Do not create this directly, instead use the HtmlRenderer function.
@ -138,6 +174,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
}
}
func (options *Html) GetFlags() int {
return options.flags
}
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
marker := out.Len()
doubleSpace(out)
@ -169,32 +209,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
}
doubleSpace(out)
if options.flags&HTML_SKIP_SCRIPT != 0 {
out.Write(stripTag(string(text), "script", "p"))
} else {
out.Write(text)
}
out.WriteByte('\n')
}
func stripTag(text, tag, newTag string) []byte {
closeNewTag := fmt.Sprintf("</%s>", newTag)
i := 0
for i < len(text) && text[i] != '<' {
i++
}
if i == len(text) {
return []byte(text)
}
found, end := findHtmlTagPos([]byte(text[i:]), tag)
closeTag := fmt.Sprintf("</%s>", tag)
noOpen := text
if found {
noOpen = text[0:i+1] + newTag + text[end:]
}
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
}
func (options *Html) HRule(out *bytes.Buffer) {
doubleSpace(out)
out.WriteString("<hr")
@ -522,9 +540,6 @@ func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
return
}
if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
return
}
out.Write(text)
}
@ -726,6 +741,29 @@ func isHtmlTag(tag []byte, tagname string) bool {
return found
}
// Look for a character, but ignore it when it's in any kind of quotes, it
// might be JavaScript
func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
inSingleQuote := false
inDoubleQuote := false
inGraveQuote := false
i := start
for i < len(html) {
switch {
case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
return i
case html[i] == '\'':
inSingleQuote = !inSingleQuote
case html[i] == '"':
inDoubleQuote = !inDoubleQuote
case html[i] == '`':
inGraveQuote = !inGraveQuote
}
i++
}
return start
}
func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
i := 0
if i < len(tag) && tag[0] != '<' {
@ -754,28 +792,54 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
return false, -1
}
// Now look for closing '>', but ignore it when it's in any kind of quotes,
// it might be JavaScript
inSingleQuote := false
inDoubleQuote := false
inGraveQuote := false
for i < len(tag) {
switch {
case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
return true, i
case tag[i] == '\'':
inSingleQuote = !inSingleQuote
case tag[i] == '"':
inDoubleQuote = !inDoubleQuote
case tag[i] == '`':
inGraveQuote = !inGraveQuote
}
i++
rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
if rightAngle > i {
return true, rightAngle
}
return false, -1
}
func sanitizeHtml(html []byte) []byte {
var result []byte
for string(html) != "" {
skip, tag, rest := findHtmlTag(html)
html = rest
result = append(result, skip...)
result = append(result, sanitizeTag(tag)...)
}
return append(result, []byte("\n")...)
}
func sanitizeTag(tag []byte) []byte {
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
return tag
} else {
return []byte("")
}
}
func skipUntilChar(text []byte, start int, char byte) int {
i := start
for i < len(text) && text[i] != char {
i++
}
return i
}
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
start := skipUntilChar(html, 0, '<')
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
if rightAngle > start {
skip = html[0:start]
tag = html[start : rightAngle+1]
rest = html[rightAngle+1:]
return
}
return []byte(""), []byte(""), []byte("")
}
func skipSpace(tag []byte, i int) int {
for i < len(tag) && isspace(tag[i]) {
i++

View File

@ -90,18 +90,119 @@ func TestRawHtmlTag(t *testing.T) {
"<p>alert()</p>\n",
"<script>alert()</script>\n",
"<p>alert()</p>\n",
"alert()\n",
"<script src='foo'></script>\n",
"<p></p>\n",
"\n",
"<script src='a>b'></script>\n",
"\n",
"zz <script src='foo'></script>\n",
"<p>zz </p>\n",
"zz <script src=foo></script>\n",
"<p>zz </p>\n",
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
"\n",
`'';!--"<XSS>=&{()}`,
"<p>'';!--&quot;=&amp;{()}</p>\n",
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
"<p></p>\n",
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
"<p></p>\n",
`<IMG SRC="javascript:alert('XSS');">`,
"<p></p>\n",
"<IMG SRC=javascript:alert('XSS')>",
"<p></p>\n",
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
"<p></p>\n",
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
"<p></p>\n",
`<a onmouseover="alert(document.cookie)">xss link</a>`,
"<p>xss link</a></p>\n",
"<a onmouseover=alert(document.cookie)>xss link</a>",
"<p>xss link</a></p>\n",
// XXX: this doesn't pass yet
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
//"<p></p>\n",
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
"<p></p>\n",
`<IMG SRC=# onmouseover="alert('xxs')">`,
"<p></p>\n",
`<IMG SRC= onmouseover="alert('xxs')">`,
"<p></p>\n",
`<IMG onmouseover="alert('xxs')">`,
"<p></p>\n",
"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
"<p></p>\n",
"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
"<p></p>\n",
"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
"<p></p>\n",
`<IMG SRC="javascriptascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x09;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x0A;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC=" &#14; javascript:alert('XSS');">`,
"<p></p>\n",
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
"<p></p>\n",
// XXX: this doesn't pass yet
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
//"\n",
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
"<p></p>\n",
// XXX: this doesn't pass yet
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
//"",
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
"<p></p>\n",
"<SCRIPT SRC=//ha.ckers.org/.j>",
"<p></p>\n",
// XXX: this doesn't pass yet
//`<IMG SRC="javascript:alert('XSS')"`,
//"",
// XXX: this doesn't pass yet
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
//"",
}
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
}
func TestEmphasis(t *testing.T) {

View File

@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
return &Latex{}
}
func (options *Latex) GetFlags() int {
return 0
}
// render code chunks using verbatim, or listings if we have a language
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
if lang == "" {

View File

@ -165,6 +165,8 @@ type Renderer interface {
// Header and footer
DocumentHeader(out *bytes.Buffer)
DocumentFooter(out *bytes.Buffer)
GetFlags() int
}
// Callback functions for inline parsing. One such function is defined
@ -231,7 +233,7 @@ func MarkdownCommon(input []byte) []byte {
htmlFlags |= HTML_USE_SMARTYPANTS
htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
htmlFlags |= HTML_SKIP_SCRIPT
htmlFlags |= HTML_SANITIZE_OUTPUT
renderer := HtmlRenderer(htmlFlags, "", "")
// set up the parser
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
first := firstPass(p, input)
second := secondPass(p, first)
if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 {
second = sanitizeHtml(second)
}
return second
}