mirror of
https://github.com/russross/blackfriday.git
synced 2024-03-22 13:40:34 +08:00
Merge pull request #50 from rtfb/master
Better protection against JavaScript injection
This commit is contained in:
commit
d643453f1e
|
@ -89,6 +89,11 @@ All features of upskirt are supported, including:
|
||||||
known inputs that make it crash. If you find one, please let me
|
known inputs that make it crash. If you find one, please let me
|
||||||
know and send me the input that does it.
|
know and send me the input that does it.
|
||||||
|
|
||||||
|
NOTE: "safety" in this context means *runtime safety only*. It is
|
||||||
|
not bullet proof against JavaScript injections, though we're working
|
||||||
|
on it (https://github.com/russross/blackfriday/issues/11 tracks the
|
||||||
|
progress).
|
||||||
|
|
||||||
* **Fast processing**. It is fast enough to render on-demand in
|
* **Fast processing**. It is fast enough to render on-demand in
|
||||||
most web applications without having to cache the output.
|
most web applications without having to cache the output.
|
||||||
|
|
||||||
|
|
152
html.go
152
html.go
|
@ -18,6 +18,7 @@ package blackfriday
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
@ -28,7 +29,7 @@ const (
|
||||||
HTML_SKIP_STYLE // skip embedded <style> elements
|
HTML_SKIP_STYLE // skip embedded <style> elements
|
||||||
HTML_SKIP_IMAGES // skip embedded images
|
HTML_SKIP_IMAGES // skip embedded images
|
||||||
HTML_SKIP_LINKS // skip all links
|
HTML_SKIP_LINKS // skip all links
|
||||||
HTML_SKIP_SCRIPT // skip embedded <script> elements
|
HTML_SANITIZE_OUTPUT // strip output of everything that's not known to be safe
|
||||||
HTML_SAFELINK // only link to trusted protocols
|
HTML_SAFELINK // only link to trusted protocols
|
||||||
HTML_NOFOLLOW_LINKS // only link with rel="nofollow"
|
HTML_NOFOLLOW_LINKS // only link with rel="nofollow"
|
||||||
HTML_TOC // generate a table of contents
|
HTML_TOC // generate a table of contents
|
||||||
|
@ -41,6 +42,41 @@ const (
|
||||||
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
|
HTML_SMARTYPANTS_LATEX_DASHES // enable LaTeX-style dashes (with HTML_USE_SMARTYPANTS)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
tags = []string{
|
||||||
|
"b",
|
||||||
|
"blockquote",
|
||||||
|
"code",
|
||||||
|
"del",
|
||||||
|
"dd",
|
||||||
|
"dl",
|
||||||
|
"dt",
|
||||||
|
"em",
|
||||||
|
"h1",
|
||||||
|
"h2",
|
||||||
|
"h3",
|
||||||
|
"h4",
|
||||||
|
"h5",
|
||||||
|
"h6",
|
||||||
|
"i",
|
||||||
|
"kbd",
|
||||||
|
"li",
|
||||||
|
"ol",
|
||||||
|
"p",
|
||||||
|
"pre",
|
||||||
|
"s",
|
||||||
|
"sup",
|
||||||
|
"sub",
|
||||||
|
"strong",
|
||||||
|
"strike",
|
||||||
|
"ul",
|
||||||
|
}
|
||||||
|
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
|
||||||
|
tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`)
|
||||||
|
anchorClean = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>|<\/a>)$`)
|
||||||
|
imgClean = regexp.MustCompile(`^(<img\ssrc="` + urlRe + `"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$`)
|
||||||
|
)
|
||||||
|
|
||||||
// Html is a type that implements the Renderer interface for HTML output.
|
// Html is a type that implements the Renderer interface for HTML output.
|
||||||
//
|
//
|
||||||
// Do not create this directly, instead use the HtmlRenderer function.
|
// Do not create this directly, instead use the HtmlRenderer function.
|
||||||
|
@ -138,6 +174,10 @@ func attrEscape(out *bytes.Buffer, src []byte) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (options *Html) GetFlags() int {
|
||||||
|
return options.flags
|
||||||
|
}
|
||||||
|
|
||||||
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
|
func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
|
||||||
marker := out.Len()
|
marker := out.Len()
|
||||||
doubleSpace(out)
|
doubleSpace(out)
|
||||||
|
@ -169,32 +209,10 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
doubleSpace(out)
|
doubleSpace(out)
|
||||||
if options.flags&HTML_SKIP_SCRIPT != 0 {
|
out.Write(text)
|
||||||
out.Write(stripTag(string(text), "script", "p"))
|
|
||||||
} else {
|
|
||||||
out.Write(text)
|
|
||||||
}
|
|
||||||
out.WriteByte('\n')
|
out.WriteByte('\n')
|
||||||
}
|
}
|
||||||
|
|
||||||
func stripTag(text, tag, newTag string) []byte {
|
|
||||||
closeNewTag := fmt.Sprintf("</%s>", newTag)
|
|
||||||
i := 0
|
|
||||||
for i < len(text) && text[i] != '<' {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
if i == len(text) {
|
|
||||||
return []byte(text)
|
|
||||||
}
|
|
||||||
found, end := findHtmlTagPos([]byte(text[i:]), tag)
|
|
||||||
closeTag := fmt.Sprintf("</%s>", tag)
|
|
||||||
noOpen := text
|
|
||||||
if found {
|
|
||||||
noOpen = text[0:i+1] + newTag + text[end:]
|
|
||||||
}
|
|
||||||
return []byte(strings.Replace(noOpen, closeTag, closeNewTag, -1))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (options *Html) HRule(out *bytes.Buffer) {
|
func (options *Html) HRule(out *bytes.Buffer) {
|
||||||
doubleSpace(out)
|
doubleSpace(out)
|
||||||
out.WriteString("<hr")
|
out.WriteString("<hr")
|
||||||
|
@ -522,9 +540,6 @@ func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
|
||||||
if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
|
if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if options.flags&HTML_SKIP_SCRIPT != 0 && isHtmlTag(text, "script") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
out.Write(text)
|
out.Write(text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -726,6 +741,29 @@ func isHtmlTag(tag []byte, tagname string) bool {
|
||||||
return found
|
return found
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look for a character, but ignore it when it's in any kind of quotes, it
|
||||||
|
// might be JavaScript
|
||||||
|
func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int {
|
||||||
|
inSingleQuote := false
|
||||||
|
inDoubleQuote := false
|
||||||
|
inGraveQuote := false
|
||||||
|
i := start
|
||||||
|
for i < len(html) {
|
||||||
|
switch {
|
||||||
|
case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
|
||||||
|
return i
|
||||||
|
case html[i] == '\'':
|
||||||
|
inSingleQuote = !inSingleQuote
|
||||||
|
case html[i] == '"':
|
||||||
|
inDoubleQuote = !inDoubleQuote
|
||||||
|
case html[i] == '`':
|
||||||
|
inGraveQuote = !inGraveQuote
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return start
|
||||||
|
}
|
||||||
|
|
||||||
func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
||||||
i := 0
|
i := 0
|
||||||
if i < len(tag) && tag[0] != '<' {
|
if i < len(tag) && tag[0] != '<' {
|
||||||
|
@ -754,28 +792,54 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) {
|
||||||
return false, -1
|
return false, -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now look for closing '>', but ignore it when it's in any kind of quotes,
|
rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>')
|
||||||
// it might be JavaScript
|
if rightAngle > i {
|
||||||
inSingleQuote := false
|
return true, rightAngle
|
||||||
inDoubleQuote := false
|
|
||||||
inGraveQuote := false
|
|
||||||
for i < len(tag) {
|
|
||||||
switch {
|
|
||||||
case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote:
|
|
||||||
return true, i
|
|
||||||
case tag[i] == '\'':
|
|
||||||
inSingleQuote = !inSingleQuote
|
|
||||||
case tag[i] == '"':
|
|
||||||
inDoubleQuote = !inDoubleQuote
|
|
||||||
case tag[i] == '`':
|
|
||||||
inGraveQuote = !inGraveQuote
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false, -1
|
return false, -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sanitizeHtml(html []byte) []byte {
|
||||||
|
var result []byte
|
||||||
|
for string(html) != "" {
|
||||||
|
skip, tag, rest := findHtmlTag(html)
|
||||||
|
html = rest
|
||||||
|
result = append(result, skip...)
|
||||||
|
result = append(result, sanitizeTag(tag)...)
|
||||||
|
}
|
||||||
|
return append(result, []byte("\n")...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeTag(tag []byte) []byte {
|
||||||
|
if tagWhitelist.Match(tag) || anchorClean.Match(tag) || imgClean.Match(tag) {
|
||||||
|
return tag
|
||||||
|
} else {
|
||||||
|
return []byte("")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func skipUntilChar(text []byte, start int, char byte) int {
|
||||||
|
i := start
|
||||||
|
for i < len(text) && text[i] != char {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
func findHtmlTag(html []byte) (skip, tag, rest []byte) {
|
||||||
|
start := skipUntilChar(html, 0, '<')
|
||||||
|
rightAngle := skipUntilCharIgnoreQuotes(html, start, '>')
|
||||||
|
if rightAngle > start {
|
||||||
|
skip = html[0:start]
|
||||||
|
tag = html[start : rightAngle+1]
|
||||||
|
rest = html[rightAngle+1:]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return []byte(""), []byte(""), []byte("")
|
||||||
|
}
|
||||||
|
|
||||||
func skipSpace(tag []byte, i int) int {
|
func skipSpace(tag []byte, i int) int {
|
||||||
for i < len(tag) && isspace(tag[i]) {
|
for i < len(tag) && isspace(tag[i]) {
|
||||||
i++
|
i++
|
||||||
|
|
107
inline_test.go
107
inline_test.go
|
@ -90,18 +90,119 @@ func TestRawHtmlTag(t *testing.T) {
|
||||||
"<p>alert()</p>\n",
|
"<p>alert()</p>\n",
|
||||||
|
|
||||||
"<script>alert()</script>\n",
|
"<script>alert()</script>\n",
|
||||||
"<p>alert()</p>\n",
|
"alert()\n",
|
||||||
|
|
||||||
"<script src='foo'></script>\n",
|
"<script src='foo'></script>\n",
|
||||||
"<p></p>\n",
|
"\n",
|
||||||
|
|
||||||
|
"<script src='a>b'></script>\n",
|
||||||
|
"\n",
|
||||||
|
|
||||||
"zz <script src='foo'></script>\n",
|
"zz <script src='foo'></script>\n",
|
||||||
"<p>zz </p>\n",
|
"<p>zz </p>\n",
|
||||||
|
|
||||||
"zz <script src=foo></script>\n",
|
"zz <script src=foo></script>\n",
|
||||||
"<p>zz </p>\n",
|
"<p>zz </p>\n",
|
||||||
|
|
||||||
|
`<script><script src="http://example.com/exploit.js"></SCRIPT></script>`,
|
||||||
|
"\n",
|
||||||
|
|
||||||
|
`'';!--"<XSS>=&{()}`,
|
||||||
|
"<p>'';!--"=&{()}</p>\n",
|
||||||
|
|
||||||
|
"<SCRIPT SRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<SCRIPT \nSRC=http://ha.ckers.org/xss.js></SCRIPT>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="javascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=JaVaScRiPt:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<a onmouseover="alert(document.cookie)">xss link</a>`,
|
||||||
|
"<p>xss link</a></p>\n",
|
||||||
|
|
||||||
|
"<a onmouseover=alert(document.cookie)>xss link</a>",
|
||||||
|
"<p>xss link</a></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`,
|
||||||
|
//"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC=# onmouseover="alert('xxs')">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC= onmouseover="alert('xxs')">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG onmouseover="alert('xxs')">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<IMG SRC=javascript:alert('XSS')>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="javascriptascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="jav	ascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="jav
ascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<IMG SRC="  javascript:alert('XSS');">`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
`<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//"<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>",
|
||||||
|
//"\n",
|
||||||
|
|
||||||
|
`<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT>`,
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//`<<SCRIPT>alert("XSS");//<</SCRIPT>`,
|
||||||
|
//"",
|
||||||
|
|
||||||
|
"<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
"<SCRIPT SRC=//ha.ckers.org/.j>",
|
||||||
|
"<p></p>\n",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//`<IMG SRC="javascript:alert('XSS')"`,
|
||||||
|
//"",
|
||||||
|
|
||||||
|
// XXX: this doesn't pass yet
|
||||||
|
//"<iframe src=http://ha.ckers.org/scriptlet.html <",
|
||||||
|
//"",
|
||||||
}
|
}
|
||||||
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SKIP_SCRIPT)
|
doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEmphasis(t *testing.T) {
|
func TestEmphasis(t *testing.T) {
|
||||||
|
|
4
latex.go
4
latex.go
|
@ -34,6 +34,10 @@ func LatexRenderer(flags int) Renderer {
|
||||||
return &Latex{}
|
return &Latex{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (options *Latex) GetFlags() int {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
// render code chunks using verbatim, or listings if we have a language
|
// render code chunks using verbatim, or listings if we have a language
|
||||||
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
|
func (options *Latex) BlockCode(out *bytes.Buffer, text []byte, lang string) {
|
||||||
if lang == "" {
|
if lang == "" {
|
||||||
|
|
|
@ -165,6 +165,8 @@ type Renderer interface {
|
||||||
// Header and footer
|
// Header and footer
|
||||||
DocumentHeader(out *bytes.Buffer)
|
DocumentHeader(out *bytes.Buffer)
|
||||||
DocumentFooter(out *bytes.Buffer)
|
DocumentFooter(out *bytes.Buffer)
|
||||||
|
|
||||||
|
GetFlags() int
|
||||||
}
|
}
|
||||||
|
|
||||||
// Callback functions for inline parsing. One such function is defined
|
// Callback functions for inline parsing. One such function is defined
|
||||||
|
@ -231,7 +233,7 @@ func MarkdownCommon(input []byte) []byte {
|
||||||
htmlFlags |= HTML_USE_SMARTYPANTS
|
htmlFlags |= HTML_USE_SMARTYPANTS
|
||||||
htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
|
htmlFlags |= HTML_SMARTYPANTS_FRACTIONS
|
||||||
htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
|
htmlFlags |= HTML_SMARTYPANTS_LATEX_DASHES
|
||||||
htmlFlags |= HTML_SKIP_SCRIPT
|
htmlFlags |= HTML_SANITIZE_OUTPUT
|
||||||
renderer := HtmlRenderer(htmlFlags, "", "")
|
renderer := HtmlRenderer(htmlFlags, "", "")
|
||||||
|
|
||||||
// set up the parser
|
// set up the parser
|
||||||
|
@ -291,6 +293,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
|
||||||
first := firstPass(p, input)
|
first := firstPass(p, input)
|
||||||
second := secondPass(p, first)
|
second := secondPass(p, first)
|
||||||
|
|
||||||
|
if renderer.GetFlags()&HTML_SANITIZE_OUTPUT != 0 {
|
||||||
|
second = sanitizeHtml(second)
|
||||||
|
}
|
||||||
|
|
||||||
return second
|
return second
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user