Merge pull request #50 from rtfb/master

Better protection against JavaScript injection
2024-03-22 13:40:34 +08:00 · 2014-03-30 19:52:13 +03:00 · 2014-03-30 19:52:13 +03:00 · d643453f1e
commit d643453f1e
parent e078bb8ec3 84ee8e62f6
5 changed files with 228 additions and 48 deletions
--- a/README.md
+++ b/README.md
@ -89,6 +89,11 @@ All features of upskirt are supported, including:
    known inputs that make it crash.  If you find one, please let me
    know and send me the input that does it.
    NOTE: "safety" in this context means *runtime safety only*. It is
    not bullet proof against JavaScript injections, though we're working
    on it (https://github.com/russross/blackfriday/issues/11 tracks the
    progress).
 *   **Fast processing**. It is fast enough to render on-demand in
    most web applications without having to cache the output.
--- a/html.go
+++ b/html.go
@ -18,6 +18,7 @@ package blackfriday
 import (
 	"bytes"
 	"fmt"
 	"regexp"
 	"strconv"
 	"strings"
 )
@ -28,7 +29,7 @@ const (
 	HTML_SKIP_STYLE                           // skip embedded <style> elements
 	HTML_SKIP_IMAGES                          // skip embedded images
 	HTML_SKIP_LINKS                           // skip all links
-	HTML_SKIP_SCRIPT                          // skip embedded <script> elements
+	HTML_SANITIZE_OUTPUT                      // strip output of everything that's not known to be safe
 	HTML_SAFELINK                             // only link to trusted protocols
 	HTML_NOFOLLOW_LINKS                       // only link with rel="nofollow"
 	HTML_TOC                                  // generate a table of contents
@ -41,6 +42,41 @@ const (
 	HTML_SMARTYPANTS_LATEX_DASHES             // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
 )
 var (
 	tags  = []string{
 		"b",
 		"blockquote",
 		"code",
 		"del",
 		"dd",
 		"dl",
 		"dt",
 		"em",
 		"h1",
 		"h2",
 		"h3",
 		"h4",
 		"h5",
 		"h6",
 		"i",
 		"kbd",
 		"li",
 		"ol",
 		"p",
 		"pre",
 		"s",
 		"sup",
 		"sub",
 		"strong",
 		"strike",
 		"ul",
 	}
 	urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
 	tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
 	anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
 	imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
 )
 // Html is a type that implements the Renderer interface for HTML output.
 //
 // Do not create this directly, instead use the HtmlRenderer function.
@ -138,6 +174,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
 	}
 }
 func (options *Html) GetFlags() int {
 	return options.flags
 }
 func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
 	marker := out.Len()
 	doubleSpace(out)
@ -169,32 +209,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
 	}
 	doubleSpace(out)
 	if options.flags&HTML_SKIP_SCRIPT != 0 {
 		out.Write(stripTag(string(text), "script", "p"))
 	} else {
 	out.Write(text)
 	}
 	out.WriteByte('\n')
 }
 func stripTag(text, tag, newTag string) []byte {
 	closeNewTag := fmt.Sprintf("</%s>", newTag)
 	i := 0
 	for i < len(text) && text[i] != '<' {
 		i++
 	}
 	if i == len(text) {
 		return []byte(text)
 	}
 	found, end := findHtmlTagPos([]byte(text[i:]), tag)
 	closeTag := fmt.Sprintf("</%s>", tag)
 	noOpen := text
 	if found {
 		noOpen = text[0:i+1] + newTag + text[end:]
 	}
 	return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
 }
 func (options *Html) HRule(out *bytes.Buffer) {
 	doubleSpace(out)
 	out.WriteString("<hr")
@ -522,9 +540,6 @@ func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
 	if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
 		return
 	}
 	if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
 		return
 	}
 	out.Write(text)
 }
@ -726,6 +741,29 @@ func isHtmlTag(tag []byte, tagname string) bool {
 	return found
 }
 // Look for a character, but ignore it when it's in any kind of quotes, it
 // might be JavaScript
 func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
 	inSingleQuote := false
 	inDoubleQuote := false
 	inGraveQuote := false
 	i := start
 	for i < len(html) {
 		switch {
 		case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
 			return i
 		case html[i] == '\'':
 			inSingleQuote = !inSingleQuote
 		case html[i] == '"':
 			inDoubleQuote = !inDoubleQuote
 		case html[i] == '`':
 			inGraveQuote = !inGraveQuote
 		}
 		i++
 	}
 	return start
 }
 func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
 	i := 0
 	if i < len(tag) && tag[0] != '<' {
@ -754,28 +792,54 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
 		return false, -1
 	}
-	// Now look for closing '>', but ignore it when it's in any kind of quotes,
+	rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
-	// it might be JavaScript
+	if rightAngle > i {
-	inSingleQuote := false
+		return true, rightAngle
 	inDoubleQuote := false
 	inGraveQuote := false
 	for i < len(tag) {
 		switch {
 		case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
 			return true, i
 		case tag[i] == '\'':
 			inSingleQuote = !inSingleQuote
 		case tag[i] == '"':
 			inDoubleQuote = !inDoubleQuote
 		case tag[i] == '`':
 			inGraveQuote = !inGraveQuote
 		}
 		i++
 	}
 	return false, -1
 }
 func sanitizeHtml(html []byte) []byte {
 	var result []byte
 	for string(html) != "" {
 		skip, tag, rest := findHtmlTag(html)
 		html = rest
 		result = append(result, skip...)
 		result = append(result, sanitizeTag(tag)...)
 	}
 	return append(result, []byte("\n")...)
 }
 func sanitizeTag(tag []byte) []byte {
 	if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
 		return tag
 	} else {
 		return []byte("")
 	}
 }
 func skipUntilChar(text []byte, start int, char byte) int {
 	i := start
 	for i < len(text) && text[i] != char {
 		i++
 	}
 	return i
 }
 func findHtmlTag(html []byte) (skip, tag, rest []byte) {
 	start := skipUntilChar(html, 0, '<')
 	rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
 	if rightAngle > start {
 		skip = html[0:start]
 		tag = html[start : rightAngle+1]
 		rest = html[rightAngle+1:]
 		return
 	}
 	return []byte(""), []byte(""), []byte("")
 }
 func skipSpace(tag []byte, i int) int {
 	for i < len(tag) && isspace(tag[i]) {
 		i++
--- a/inline_test.go
+++ b/inline_test.go
@ -90,18 +90,119 @@ func TestRawHtmlTag(t *testing.T) {
 		"<p>alert()</p>\n",
 		"<script>alert()</script>\n",
-		"<p>alert()</p>\n",
+		"alert()\n",
 		"<script src='foo'></script>\n",
-		"<p></p>\n",
+		"\n",
 		"<script src='a>b'></script>\n",
 		"\n",
 		"zz <script src='foo'></script>\n",
 		"<p>zz </p>\n",
 		"zz <script src=foo></script>\n",
 		"<p>zz </p>\n",
 		`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
 		"\n",
 		`'';!--"<XSS>=&{()}`,
 		"<p>'';!--&quot;=&amp;{()}</p>\n",
 		"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
 		"<p></p>\n",
 		"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
 		"<p></p>\n",
 		`<IMG SRC="javascript:alert('XSS');">`,
 		"<p></p>\n",
 		"<IMG SRC=javascript:alert('XSS')>",
 		"<p></p>\n",
 		"<IMG SRC=JaVaScRiPt:alert('XSS')>",
 		"<p></p>\n",
 		"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
 		"<p></p>\n",
 		`<a onmouseover="alert(document.cookie)">xss link</a>`,
 		"<p>xss link</a></p>\n",
 		"<a onmouseover=alert(document.cookie)>xss link</a>",
 		"<p>xss link</a></p>\n",
 		// XXX: this doesn't pass yet
 		//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
 		//"<p></p>\n",
 		"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
 		"<p></p>\n",
 		`<IMG SRC=# onmouseover="alert('xxs')">`,
 		"<p></p>\n",
 		`<IMG SRC= onmouseover="alert('xxs')">`,
 		"<p></p>\n",
 		`<IMG onmouseover="alert('xxs')">`,
 		"<p></p>\n",
 		"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
 		"<p></p>\n",
 		"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
 		"<p></p>\n",
 		"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
 		"<p></p>\n",
 		`<IMG SRC="javascriptascript:alert('XSS');">`,
 		"<p></p>\n",
 		`<IMG SRC="jav&#x09;ascript:alert('XSS');">`,
 		"<p></p>\n",
 		`<IMG SRC="jav&#x0A;ascript:alert('XSS');">`,
 		"<p></p>\n",
 		`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
 		"<p></p>\n",
 		`<IMG SRC=" &#14;  javascript:alert('XSS');">`,
 		"<p></p>\n",
 		`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
 		"<p></p>\n",
 		// XXX: this doesn't pass yet
 		//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
 		//"\n",
 		`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
 		"<p></p>\n",
 		// XXX: this doesn't pass yet
 		//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
 		//"",
 		"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
 		"<p></p>\n",
 		"<SCRIPT SRC=//ha.ckers.org/.j>",
 		"<p></p>\n",
 		// XXX: this doesn't pass yet
 		//`<IMG SRC="javascript:alert('XSS')"`,
 		//"",
 		// XXX: this doesn't pass yet
 		//"<iframe src=http://ha.ckers.org/scriptlet.html <",
 		//"",
 	}
-	doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
+	doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
 }
 func TestEmphasis(t *testing.T) {
--- a/latex.go
+++ b/latex.go
@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
 	return &Latex{}
 }
 func (options *Latex) GetFlags() int {
 	return 0
 }
 // render code chunks using verbatim, or listings if we have a language
 func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
 	if lang == "" {
--- a/markdown.go
+++ b/markdown.go
@ -165,6 +165,8 @@ type Renderer interface {
 	// Header and footer
 	DocumentHeader(out *bytes.Buffer)
 	DocumentFooter(out *bytes.Buffer)
 	GetFlags() int
 }
 // Callback functions for inline parsing. One such function is defined
@ -231,7 +233,7 @@ func MarkdownCommon(input []byte) []byte {
 	htmlFlags |= HTML_USE_SMARTYPANTS
 	htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
 	htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
-	htmlFlags |= HTML_SKIP_SCRIPT
+	htmlFlags |= HTML_SANITIZE_OUTPUT
 	renderer := HtmlRenderer(htmlFlags, "", "")
 	// set up the parser
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
 	first := firstPass(p, input)
 	second := secondPass(p, first)
 	if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 {
 		second = sanitizeHtml(second)
 	}
 	return second
 }