From be9cbc634a630f5e9d75a3e3d985152b17305b2f Mon Sep 17 00:00:00 2001 From: willnix Date: Sat, 19 Apr 2014 21:59:04 +0000 Subject: [PATCH] tagWhitelist allows alignment attribute now This is the closest I could get to removing everything "unsave" without introducing an additional regex. --- html.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/html.go b/html.go index 5d79ab6..54744c2 100644 --- a/html.go +++ b/html.go @@ -78,8 +78,15 @@ var ( "tbody", } + + alignments = []string{ + "left", + "right", + "center", + } + urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` - tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)>|<(br|hr)\s?\/?>)$`) + tagWhitelist = regexp.MustCompile(`^(<\/?(` + strings.Join(tags, "|") + `)(\salign="(` + strings.Join(alignments, "|") + `)")?>|<(br|hr)\s?\/?>)$`) anchorClean = regexp.MustCompile(`^(]+")?\s?>|<\/a>)$`) imgClean = regexp.MustCompile(`^(]*")?(\stitle="[^"<>]*")?\s?\/?>)$`) // TODO: improve this regexp to catch all possible entities: