Rewrite protection against JavaScript injection

This drops the naive approach at <script> tag stripping and resorts to
full sanitization of html. The general idea (and the regexps) is grabbed
from Stack Exchange's PageDown JavaScript Markdown processor[1]. Like in
PageDown, it's implemented as a separate pass over resulting html.

Includes a metric ton (but not all) of test cases from here[2]. Several
are commented out since they don't pass yet.

Stronger (but still incomplete) fix for #11.

[1] http://code.google.com/p/pagedown/wiki/PageDown
[2] https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
pull/50/head
Vytautas Šaltenis 2014-01-22 01:14:35 +02:00
parent e02c392dc6
commit 55cd82008e
4 changed files with 194 additions and 25 deletions

104
html.go
View File

@ -18,6 +18,7 @@ package blackfriday
import (
"bytes"
"fmt"
"regexp"
"strconv"
"strings"
)
@ -40,6 +41,41 @@ const (
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
)
var (
tags = []string{
"b",
"blockquote",
"code",
"del",
"dd",
"dl",
"dt",
"em",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"i",
"kbd",
"li",
"ol",
"p",
"pre",
"s",
"sup",
"sub",
"strong",
"strike",
"ul",
}
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
)
// Html is a type that implements the Renderer interface for HTML output.
//
// Do not create this directly, instead use the HtmlRenderer function.
@ -137,6 +173,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
}
}
func (options *Html) GetFlags() int {
return options.flags
}
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
marker := out.Len()
doubleSpace(out)
@ -168,32 +208,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
}
doubleSpace(out)
if options.flags&HTML_SKIP_SCRIPT != 0 {
out.Write(stripTag(string(text), "script", "p"))
} else {
out.Write(text)
}
out.Write(text)
out.WriteByte('\n')
}
func stripTag(text, tag, newTag string) []byte {
closeNewTag := fmt.Sprintf("</%s>", newTag)
i := 0
for i < len(text) && text[i] != '<' {
i++
}
if i == len(text) {
return []byte(text)
}
found, end := findHtmlTagPos([]byte(text[i:]), tag)
closeTag := fmt.Sprintf("</%s>", tag)
noOpen := text
if found {
noOpen = text[0:i+1] + newTag + text[end:]
}
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
}
func (options *Html) HRule(out *bytes.Buffer) {
doubleSpace(out)
out.WriteString("<hr")
@ -781,6 +799,46 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
return false, -1
}
func sanitizeHtml(html []byte) []byte {
var result []byte
for string(html) != "" {
skip, tag, rest := findHtmlTag(html)
html = rest
result = append(result, skip...)
result = append(result, sanitizeTag(tag)...)
}
return append(result, []byte("\n")...)
}
func sanitizeTag(tag []byte) []byte {
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
return tag
} else {
return []byte("")
}
}
func skipUntilChar(text []byte, start int, char byte) int {
i := start
for i < len(text) && text[i] != char {
i++
}
return i
}
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
start := skipUntilChar(html, 0, '<')
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
if rightAngle > start {
skip = html[0:start]
tag = html[start : rightAngle+1]
rest = html[rightAngle+1:]
return
}
return []byte(""), []byte(""), []byte("")
}
func skipSpace(tag []byte, i int) int {
for i < len(tag) && isspace(tag[i]) {
i++

View File

@ -90,16 +90,117 @@ func TestRawHtmlTag(t *testing.T) {
"<p>alert()</p>\n",
"<script>alert()</script>\n",
"<p>alert()</p>\n",
"alert()\n",
"<script src='foo'></script>\n",
"<p></p>\n",
"\n",
"<script src='a>b'></script>\n",
"\n",
"zz <script src='foo'></script>\n",
"<p>zz </p>\n",
"zz <script src=foo></script>\n",
"<p>zz </p>\n",
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
"\n",
`'';!--"<XSS>=&{()}`,
"<p>'';!--&quot;=&amp;{()}</p>\n",
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
"<p></p>\n",
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
"<p></p>\n",
`<IMG SRC="javascript:alert('XSS');">`,
"<p></p>\n",
"<IMG SRC=javascript:alert('XSS')>",
"<p></p>\n",
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
"<p></p>\n",
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
"<p></p>\n",
`<a onmouseover="alert(document.cookie)">xss link</a>`,
"<p>xss link</a></p>\n",
"<a onmouseover=alert(document.cookie)>xss link</a>",
"<p>xss link</a></p>\n",
// XXX: this doesn't pass yet
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
//"<p></p>\n",
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
"<p></p>\n",
`<IMG SRC=# onmouseover="alert('xxs')">`,
"<p></p>\n",
`<IMG SRC= onmouseover="alert('xxs')">`,
"<p></p>\n",
`<IMG onmouseover="alert('xxs')">`,
"<p></p>\n",
"<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
"<p></p>\n",
"<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
"<p></p>\n",
"<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
"<p></p>\n",
`<IMG SRC="javascriptascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x09;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x0A;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`,
"<p></p>\n",
`<IMG SRC=" &#14; javascript:alert('XSS');">`,
"<p></p>\n",
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
"<p></p>\n",
// XXX: this doesn't pass yet
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
//"\n",
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
"<p></p>\n",
// XXX: this doesn't pass yet
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
//"",
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
"<p></p>\n",
"<SCRIPT SRC=//ha.ckers.org/.j>",
"<p></p>\n",
// XXX: this doesn't pass yet
//`<IMG SRC="javascript:alert('XSS')"`,
//"",
// XXX: this doesn't pass yet
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
//"",
}
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
}

View File

@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
return &Latex{}
}
func (options *Latex) GetFlags() int {
return 0
}
// render code chunks using verbatim, or listings if we have a language
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
if lang == "" {

View File

@ -165,6 +165,8 @@ type Renderer interface {
// Header and footer
DocumentHeader(out *bytes.Buffer)
DocumentFooter(out *bytes.Buffer)
GetFlags() int
}
// Callback functions for inline parsing. One such function is defined
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
first := firstPass(p, input)
second := secondPass(p, first)
if renderer.GetFlags()&HTML_SKIP_SCRIPT != 0 {
second = sanitizeHtml(second)
}
return second
}