move whitespace stripping to parser, not renderers

2024-03-22 13:40:34 +08:00 · 2011-06-29 15:38:35 -06:00 · 2011-06-29 15:38:35 -06:00 · ae9562f685
commit ae9562f685
parent d3c8225096
4 changed files with 136 additions and 151 deletions
--- a/block.go
+++ b/block.go
@ -258,7 +258,12 @@ func (parser *Parser) blockHtml(out *bytes.Buffer, data []byte, doRender bool) i
 			if j > 0 {
 				size := i + j
 				if doRender {
-					parser.r.BlockHtml(out, data[:size])
+					// trim newlines
+					end := size
+					for end > 0 && data[end-1] == '\n' {
+						end--
+					}
+					parser.r.BlockHtml(out, data[:end])
 				}
 				return size
 			}
@ -280,7 +285,12 @@ func (parser *Parser) blockHtml(out *bytes.Buffer, data []byte, doRender bool) i
 				if j > 0 {
 					size := i + j
 					if doRender {
-						parser.r.BlockHtml(out, data[:size])
+						// trim newlines
+						end := size
+						for end > 0 && data[end-1] == '\n' {
+							end--
+						}
+						parser.r.BlockHtml(out, data[:end])
 					}
 					return size
 				}
@ -326,7 +336,12 @@ func (parser *Parser) blockHtml(out *bytes.Buffer, data []byte, doRender bool) i

 	// the end of the block has been found
 	if doRender {
-		parser.r.BlockHtml(out, data[:i])
+		// trim newlines
+		end := i
+		for end > 0 && data[end-1] == '\n' {
+			end--
+		}
+		parser.r.BlockHtml(out, data[:end])
 	}

 	return i
@ -931,11 +946,11 @@ func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int)
 	}

 	// get working buffers
-	var work bytes.Buffer
-	var inter bytes.Buffer
+	var rawItem bytes.Buffer
+	var parsed bytes.Buffer

 	// put the first line into the working buffer
-	work.Write(data[beg:end])
+	rawItem.Write(data[beg:end])
 	beg = end

 	// process the following lines
@ -984,7 +999,7 @@ func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int)
 			}

 			if sublist == 0 {
-				sublist = work.Len()
+				sublist = rawItem.Len()
 			}
 		} else {
 			// how about a nested prefix header?
@ -1002,7 +1017,7 @@ func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int)
 					break
 				} else {
 					if containsBlankLine {
-						work.WriteByte('\n')
+						rawItem.WriteByte('\n')
 						containsBlock = true
 					}
 				}
@ -1012,7 +1027,7 @@ func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int)
 		containsBlankLine = false

 		// add the line into the working buffer without prefix
-		work.Write(data[beg+i : end])
+		rawItem.Write(data[beg+i : end])
 		beg = end
 	}

@ -1021,27 +1036,32 @@ func (parser *Parser) blockListItem(out *bytes.Buffer, data []byte, flags *int)
 		*flags |= LIST_ITEM_CONTAINS_BLOCK
 	}

-	workbytes := work.Bytes()
+	rawItemBytes := rawItem.Bytes()
 	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
 		// intermediate render of block li
-		if sublist > 0 && sublist < len(workbytes) {
-			parser.parseBlock(&inter, workbytes[:sublist])
-			parser.parseBlock(&inter, workbytes[sublist:])
+		if sublist > 0 && sublist < len(rawItemBytes) {
+			parser.parseBlock(&parsed, rawItemBytes[:sublist])
+			parser.parseBlock(&parsed, rawItemBytes[sublist:])
 		} else {
-			parser.parseBlock(&inter, workbytes)
+			parser.parseBlock(&parsed, rawItemBytes)
 		}
 	} else {
 		// intermediate render of inline li
-		if sublist > 0 && sublist < len(workbytes) {
-			parser.parseInline(&inter, workbytes[:sublist])
-			parser.parseBlock(&inter, workbytes[sublist:])
+		if sublist > 0 && sublist < len(rawItemBytes) {
+			parser.parseInline(&parsed, rawItemBytes[:sublist])
+			parser.parseBlock(&parsed, rawItemBytes[sublist:])
 		} else {
-			parser.parseInline(&inter, workbytes)
+			parser.parseInline(&parsed, rawItemBytes)
 		}
 	}

 	// render li itself
-	parser.r.ListItem(out, inter.Bytes(), *flags)
+	parsedBytes := parsed.Bytes()
+	parsedEnd := len(parsedBytes)
+	for parsedEnd > 0 && parsedBytes[parsedEnd-1] == '\n' {
+		parsedEnd--
+	}
+	parser.r.ListItem(out, parsedBytes[:parsedEnd], *flags)

 	return beg
 }
--- a/html.go
+++ b/html.go
@ -19,6 +19,7 @@ import (
 	"bytes"
 	"fmt"
 	"strconv"
+	"strings"
 )

 const (
@ -126,10 +127,7 @@ func attrEscape(out *bytes.Buffer, src []byte) {

 func (options *Html) Header(out *bytes.Buffer, text func() bool, level int) {
 	marker := out.Len()
-
-	if marker > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)

 	if options.flags&HTML_TOC != 0 {
 		// headerCount is incremented in htmlTocHeader
@ -157,28 +155,13 @@ func (options *Html) BlockHtml(out *bytes.Buffer, text []byte) {
 		return
 	}

-	sz := len(text)
-	for sz > 0 && text[sz-1] == '\n' {
-		sz--
-	}
-	org := 0
-	for org < sz && text[org] == '\n' {
-		org++
-	}
-	if org >= sz {
-		return
-	}
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
-	out.Write(text[org:sz])
+	doubleSpace(out)
+	out.Write(text)
 	out.WriteByte('\n')
 }

 func (options *Html) HRule(out *bytes.Buffer) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 	out.WriteString("<hr")
 	out.WriteString(options.closeTag)
 }
@ -192,44 +175,33 @@ func (options *Html) BlockCode(out *bytes.Buffer, text []byte, lang string) {
 }

 func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)

-	if lang != "" {
-		out.WriteString("<pre><code class=\"")
-
-		for i, cls := 0, 0; i < len(lang); i, cls = i+1, cls+1 {
-			for i < len(lang) && isspace(lang[i]) {
-				i++
-			}
-
-			if i < len(lang) {
-				org := i
-				for i < len(lang) && !isspace(lang[i]) {
-					i++
-				}
-
-				if lang[org] == '.' {
-					org++
-				}
-
-				if cls > 0 {
-					out.WriteByte(' ')
-				}
-				attrEscape(out, []byte(lang[org:]))
-			}
+	// parse out the language names/classes
+	count := 0
+	for _, elt := range strings.Fields(lang) {
+		if elt[0] == '.' {
+			elt = elt[1:]
 		}
+		if len(elt) == 0 {
+			continue
+		}
+		if count == 0 {
+			out.WriteString("<pre><code class=\"")
+		} else {
+			out.WriteByte(' ')
+		}
+		attrEscape(out, []byte(elt))
+		count++
+	}

-		out.WriteString("\">")
-	} else {
+	if count == 0 {
 		out.WriteString("<pre><code>")
+	} else {
+		out.WriteString("\">")
 	}

-	if len(text) > 0 {
-		attrEscape(out, text)
-	}
-
+	attrEscape(out, text)
 	out.WriteString("</code></pre>\n")
 }

@ -252,33 +224,29 @@ func (options *Html) BlockCodeNormal(out *bytes.Buffer, text []byte, lang string
 *              ~~~~ {.python .numbered}        =>      <pre lang="python"><code>
 */
 func (options *Html) BlockCodeGithub(out *bytes.Buffer, text []byte, lang string) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
+	doubleSpace(out)
+
+	// parse out the language name
+	count := 0
+	for _, elt := range strings.Fields(lang) {
+		if elt[0] == '.' {
+			elt = elt[1:]
+		}
+		if len(elt) == 0 {
+			continue
+		}
+		out.WriteString("<pre lang=\"")
+		attrEscape(out, []byte(elt))
+		out.WriteString("\"><code>")
+		count++
+		break
 	}

-	if len(lang) > 0 {
-		out.WriteString("<pre lang=\"")
-
-		i := 0
-		for i < len(lang) && !isspace(lang[i]) {
-			i++
-		}
-
-		if lang[0] == '.' {
-			attrEscape(out, []byte(lang[1:i]))
-		} else {
-			attrEscape(out, []byte(lang[:i]))
-		}
-
-		out.WriteString("\"><code>")
-	} else {
+	if count == 0 {
 		out.WriteString("<pre><code>")
 	}

-	if len(text) > 0 {
-		attrEscape(out, text)
-	}
-
+	attrEscape(out, text)
 	out.WriteString("</code></pre>\n")
 }

@ -290,29 +258,23 @@ func (options *Html) BlockQuote(out *bytes.Buffer, text []byte) {
 }

 func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
-	out.WriteString("<table><thead>\n")
+	doubleSpace(out)
+	out.WriteString("<table>\n<thead>\n")
 	out.Write(header)
-	out.WriteString("\n</thead><tbody>\n")
+	out.WriteString("\n</thead>\n<tbody>\n")
 	out.Write(body)
-	out.WriteString("\n</tbody></table>")
+	out.WriteString("\n</tbody>\n</table>")
 }

 func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 	out.WriteString("<tr>\n")
 	out.Write(text)
 	out.WriteString("\n</tr>")
 }

 func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
-	if out.Len() > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)
 	switch align {
 	case TABLE_ALIGNMENT_LEFT:
 		out.WriteString("<td align=\"left\">")
@ -330,10 +292,8 @@ func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {

 func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
 	marker := out.Len()
+	doubleSpace(out)

-	if marker > 0 {
-		out.WriteByte('\n')
-	}
 	if flags&LIST_TYPE_ORDERED != 0 {
 		out.WriteString("<ol>\n")
 	} else {
@ -352,19 +312,13 @@ func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {

 func (options *Html) ListItem(out *bytes.Buffer, text []byte, flags int) {
 	out.WriteString("<li>")
-	size := len(text)
-	for size > 0 && text[size-1] == '\n' {
-		size--
-	}
-	out.Write(text[:size])
+	out.Write(text)
 	out.WriteString("</li>\n")
 }

 func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
 	marker := out.Len()
-	if marker > 0 {
-		out.WriteByte('\n')
-	}
+	doubleSpace(out)

 	out.WriteString("<p>")
 	if !text() {
@ -375,10 +329,11 @@ func (options *Html) Paragraph(out *bytes.Buffer, text func() bool) {
 }

 func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
-	if len(link) == 0 {
-		return
-	}
 	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL {
+		// mark it but don't link it if it is not a safe link: no smartypants
+		out.WriteString("<tt>")
+		attrEscape(out, link)
+		out.WriteString("</tt>")
 		return
 	}

@ -389,16 +344,14 @@ func (options *Html) AutoLink(out *bytes.Buffer, link []byte, kind int) {
 	attrEscape(out, link)
 	out.WriteString("\">")

-	/*
-	 * Pretty print: if we get an email address as
-	 * an actual URI, e.g. `mailto:foo@bar.com`, we don't
-	 * want to print the `mailto:` prefix
-	 */
+	// Pretty print: if we get an email address as
+	// an actual URI, e.g. `mailto:foo@bar.com`, we don't
+	// want to print the `mailto:` prefix
 	switch {
 	case bytes.HasPrefix(link, []byte("mailto://")):
-		attrEscape(out, link[9:])
+		attrEscape(out, link[len("mailto://"):])
 	case bytes.HasPrefix(link, []byte("mailto:")):
-		attrEscape(out, link[7:])
+		attrEscape(out, link[len("mailto:"):])
 	default:
 		attrEscape(out, link)
 	}
@ -413,9 +366,6 @@ func (options *Html) CodeSpan(out *bytes.Buffer, text []byte) {
 }

 func (options *Html) DoubleEmphasis(out *bytes.Buffer, text []byte) {
-	if len(text) == 0 {
-		return
-	}
 	out.WriteString("<strong>")
 	out.Write(text)
 	out.WriteString("</strong>")
@ -435,9 +385,6 @@ func (options *Html) Image(out *bytes.Buffer, link []byte, title []byte, alt []b
 		return
 	}

-	if len(link) == 0 {
-		return
-	}
 	out.WriteString("<img src=\"")
 	attrEscape(out, link)
 	out.WriteString("\" alt=\"")
@ -461,10 +408,18 @@ func (options *Html) LineBreak(out *bytes.Buffer) {

 func (options *Html) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
 	if options.flags&HTML_SKIP_LINKS != 0 {
+		// write the link text out but don't link it, just mark it with typewriter font
+		out.WriteString("<tt>")
+		attrEscape(out, content)
+		out.WriteString("</tt>")
 		return
 	}

 	if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) {
+		// write the link text out but don't link it, just mark it with typewriter font
+		out.WriteString("<tt>")
+		attrEscape(out, content)
+		out.WriteString("</tt>")
 		return
 	}

@ -497,18 +452,12 @@ func (options *Html) RawHtmlTag(out *bytes.Buffer, text []byte) {
 }

 func (options *Html) TripleEmphasis(out *bytes.Buffer, text []byte) {
-	if len(text) == 0 {
-		return
-	}
 	out.WriteString("<strong><em>")
 	out.Write(text)
 	out.WriteString("</em></strong>")
 }

 func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
-	if len(text) == 0 {
-		return
-	}
 	out.WriteString("<del>")
 	out.Write(text)
 	out.WriteString("</del>")
@ -718,3 +667,9 @@ func isHtmlTag(tag []byte, tagname string) bool {

 	return isspace(tag[i]) || tag[i] == '>'
 }
+
+func doubleSpace(out *bytes.Buffer) {
+	if out.Len() > 0 {
+		out.WriteByte('\n')
+	}
+}
--- a/inline.go
+++ b/inline.go
@ -139,7 +139,9 @@ func inlineCodeSpan(parser *Parser, out *bytes.Buffer, data []byte, offset int)
 	}

 	// render the code span
-	parser.r.CodeSpan(out, data[fBegin:fEnd])
+	if fBegin != fEnd {
+		parser.r.CodeSpan(out, data[fBegin:fEnd])
+	}

 	return end

@ -409,7 +411,7 @@ func inlineLink(parser *Parser, out *bytes.Buffer, data []byte, offset int) int
 	}

 	// links need something to click on and somewhere to go
-	if len(uLink) == 0 || content.Len() == 0 {
+	if len(uLink) == 0 || (!isImg && content.Len() == 0) {
 		return 0
 	}

@ -439,7 +441,9 @@ func inlineLAngle(parser *Parser, out *bytes.Buffer, data []byte, offset int) in
 		if altype != LINK_TYPE_NOT_AUTOLINK {
 			var uLink bytes.Buffer
 			unescapeText(&uLink, data[1:end+1-2])
-			parser.r.AutoLink(out, uLink.Bytes(), altype)
+			if uLink.Len() > 0 {
+				parser.r.AutoLink(out, uLink.Bytes(), altype)
+			}
 		} else {
 			parser.r.RawHtmlTag(out, data[:end])
 		}
@ -611,7 +615,9 @@ func inlineAutoLink(parser *Parser, out *bytes.Buffer, data []byte, offset int)
 	var uLink bytes.Buffer
 	unescapeText(&uLink, data[:linkEnd])

-	parser.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
+	if uLink.Len() > 0 {
+		parser.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
+	}

 	return linkEnd - rewind
 }
@ -879,11 +885,13 @@ func inlineHelperEmph2(parser *Parser, out *bytes.Buffer, data []byte, c byte) i
 			var work bytes.Buffer
 			parser.parseInline(&work, data[:i])

-			// pick the right renderer
-			if c == '~' {
-				parser.r.StrikeThrough(out, work.Bytes())
-			} else {
-				parser.r.DoubleEmphasis(out, work.Bytes())
+			if work.Len() > 0 {
+				// pick the right renderer
+				if c == '~' {
+					parser.r.StrikeThrough(out, work.Bytes())
+				} else {
+					parser.r.DoubleEmphasis(out, work.Bytes())
+				}
 			}
 			return i + 2
 		}
@ -915,7 +923,9 @@ func inlineHelperEmph3(parser *Parser, out *bytes.Buffer, data []byte, offset in
 			var work bytes.Buffer

 			parser.parseInline(&work, data[:i])
-			parser.r.TripleEmphasis(out, work.Bytes())
+			if work.Len() > 0 {
+				parser.r.TripleEmphasis(out, work.Bytes())
+			}
 			return i + 3
 		case (i+1 < len(data) && data[i+1] == c):
 			// double symbol found, hand over to emph1
--- a/inline_test.go
+++ b/inline_test.go
@ -224,7 +224,7 @@ func TestCodeSpan(t *testing.T) {
 		"<p>a single multi-tick marker with ``` no text</p>\n",

 		"markers with ` ` a space\n",
-		"<p>markers with <code></code> a space</p>\n",
+		"<p>markers with  a space</p>\n",

 		"`source code` and a `stray\n",
 		"<p><code>source code</code> and a `stray</p>\n",