From e02c392dc656371e48ec50c6b0acdfbfbee31439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vytautas=20=C5=A0altenis?= Date: Wed, 22 Jan 2014 00:45:43 +0200 Subject: [PATCH 1/4] Extract useful code to separate func --- html.go | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/html.go b/html.go index 9c90c76..f57480f 100644 --- a/html.go +++ b/html.go @@ -722,6 +722,29 @@ func isHtmlTag(tag []byte, tagname string) bool { return found } +// Look for a character, but ignore it when it's in any kind of quotes, it +// might be JavaScript +func skipUntilCharIgnoreQuotes(html []byte, start int, char byte) int { + inSingleQuote := false + inDoubleQuote := false + inGraveQuote := false + i := start + for i < len(html) { + switch { + case html[i] == char && !inSingleQuote && !inDoubleQuote && !inGraveQuote: + return i + case html[i] == '\'': + inSingleQuote = !inSingleQuote + case html[i] == '"': + inDoubleQuote = !inDoubleQuote + case html[i] == '`': + inGraveQuote = !inGraveQuote + } + i++ + } + return start +} + func findHtmlTagPos(tag []byte, tagname string) (bool, int) { i := 0 if i < len(tag) && tag[0] != '<' { @@ -750,23 +773,9 @@ func findHtmlTagPos(tag []byte, tagname string) (bool, int) { return false, -1 } - // Now look for closing '>', but ignore it when it's in any kind of quotes, - // it might be JavaScript - inSingleQuote := false - inDoubleQuote := false - inGraveQuote := false - for i < len(tag) { - switch { - case tag[i] == '>' && !inSingleQuote && !inDoubleQuote && !inGraveQuote: - return true, i - case tag[i] == '\'': - inSingleQuote = !inSingleQuote - case tag[i] == '"': - inDoubleQuote = !inDoubleQuote - case tag[i] == '`': - inGraveQuote = !inGraveQuote - } - i++ + rightAngle := skipUntilCharIgnoreQuotes(tag, i, '>') + if rightAngle > i { + return true, rightAngle } return false, -1 From 55cd82008e9b35b9a03a80e06d5a4c4601320211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vytautas=20=C5=A0altenis?= Date: Wed, 22 Jan 2014 01:14:35 +0200 Subject: [PATCH 2/4] Rewrite protection against JavaScript injection This drops the naive approach at \n", - "

alert()

\n", + "alert()\n", "\n", - "

\n", + "\n", + + "\n", + "\n", "zz \n", "

zz

\n", "zz \n", "

zz

\n", + + ``, + "\n", + + `'';!--"=&{()}`, + "

'';!--"=&{()}

\n", + + "", + "

\n", + + "", + "

\n", + + ``, + "

\n", + + "", + "

\n", + + "", + "

\n", + + "", + "

\n", + + `xss link`, + "

xss link

\n", + + "xss link", + "

xss link

\n", + + // XXX: this doesn't pass yet + //`">`, + //"

\n", + + "", + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + "", + "

\n", + + "", + "

\n", + + "", + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + ``, + "

\n", + + // XXX: this doesn't pass yet + //"", + //"\n", + + ``, + "

\n", + + // XXX: this doesn't pass yet + //`<`, + //"", + + "