Merge pull request #70 from mprobst/master

fix: Handle all different token types that the parser can emit (d'oh).
2024-03-22 13:40:34 +08:00 · 2014-05-01 21:59:07 +03:00 · 2014-05-01 21:59:07 +03:00 · aeb569ff46
commit aeb569ff46
parent 60ba757eaa f9b7593e65
2 changed files with 18 additions and 1 deletions
--- a/inline_test.go
+++ b/inline_test.go
@ -201,6 +201,16 @@ func TestRawHtmlTag(t *testing.T) {
 		"<iframe src=http://ha.ckers.org/scriptlet.html <",
 		// The hyperlink gets linkified, the <iframe> gets escaped
 		"<p>&lt;iframe src=<a href=\"http://ha.ckers.org/scriptlet.html\">http://ha.ckers.org/scriptlet.html</a> &lt;</p>\n",
+
+		// Additonal token types: SelfClosing, Comment, DocType.
+		"<br/>",
+		"<p><br></p>\n",
+
+		"<!-- Comment -->",
+		"<!-- Comment -->\n",
+
+		"<!DOCTYPE test>",
+		"<p>&lt;!DOCTYPE test&gt;</p>\n",
 	}
 	doTestsInlineParam(t, tests, 0, HTML_SKIP_STYLE|HTML_SANITIZE_OUTPUT)
 }
--- a/sanitize.go
+++ b/sanitize.go
@ -64,7 +64,7 @@ func sanitizeHtmlSafe(input []byte) []byte {
 		case html.TextToken:
 			// Text is written escaped.
 			wr.WriteString(tokenizer.Token().String())
-		case html.StartTagToken:
+		case html.SelfClosingTagToken, html.StartTagToken:
 			// HTML tags are escaped unless whitelisted.
 			tag, hasAttributes := tokenizer.TagName()
 			tagName := string(tag)
@ -105,7 +105,14 @@ func sanitizeHtmlSafe(input []byte) []byte {
 			} else {
 				wr.WriteString(html.EscapeString(string(tokenizer.Raw())))
 			}
+		case html.CommentToken:
+			// Comments are not really expected, but harmless.
+			wr.Write(tokenizer.Raw())
+		case html.DoctypeToken:
+			// Escape DOCTYPES, entities etc can be dangerous
+			wr.WriteString(html.EscapeString(string(tokenizer.Raw())))
 		default:
+			tokenizer.Token()
 			panic(fmt.Errorf("Unexpected token type %v", t))
 		}
 	}