From bd60e3691bab374601f6fb1b36138db8ab435e05 Mon Sep 17 00:00:00 2001
From: Russ Ross <russ@dixie.edu>
Date: Fri, 1 Jul 2011 14:13:26 -0600
Subject: [PATCH] removing more redundant checks, additional cleanup of block
 parsing

---
 block.go | 272 ++++++++++++++++++++++++++++---------------------------
 html.go  |   6 +-
 2 files changed, 143 insertions(+), 135 deletions(-)

diff --git a/block.go b/block.go
index d2aec77..8dfade5 100644
--- a/block.go
+++ b/block.go
@@ -62,19 +62,16 @@ func (parser *Parser) parseBlock(out *bytes.Buffer, data []byte) {
 			continue
 		}
 
-		// horizontal rule:
+		// indented code block:
 		//
-		// ------
-		// or
-		// ******
-		// or
-		// ______
-		if parser.isHRule(data) {
-			parser.r.HRule(out)
-			var i int
-			for i = 0; data[i] != '\n'; i++ {
-			}
-			data = data[i:]
+		//     func max(a, b int) int {
+		//         if a > b {
+		//             return a
+		//         }
+		//         return b
+		//      }
+		if parser.blockCodePrefix(data) > 0 {
+			data = data[parser.blockCode(out, data):]
 			continue
 		}
 
@@ -95,17 +92,20 @@ func (parser *Parser) parseBlock(out *bytes.Buffer, data []byte) {
 			}
 		}
 
-		// table:
+		// horizontal rule:
 		//
-		// Name  | Age | Phone
-		// ------|-----|---------
-		// Bob   | 31  | 555-1234
-		// Alice | 27  | 555-4321
-		if parser.flags&EXTENSION_TABLES != 0 {
-			if i := parser.blockTable(out, data); i > 0 {
-				data = data[i:]
-				continue
+		// ------
+		// or
+		// ******
+		// or
+		// ______
+		if parser.isHRule(data) {
+			parser.r.HRule(out)
+			var i int
+			for i = 0; data[i] != '\n'; i++ {
 			}
+			data = data[i:]
+			continue
 		}
 
 		// block quote:
@@ -117,17 +117,17 @@ func (parser *Parser) parseBlock(out *bytes.Buffer, data []byte) {
 			continue
 		}
 
-		// indented code block:
+		// table:
 		//
-		//     func max(a, b int) int {
-		//         if a > b {
-		//             return a
-		//         }
-		//         return b
-		//      }
-		if parser.blockCodePrefix(data) > 0 {
-			data = data[parser.blockCode(out, data):]
-			continue
+		// Name  | Age | Phone
+		// ------|-----|---------
+		// Bob   | 31  | 555-1234
+		// Alice | 27  | 555-4321
+		if parser.flags&EXTENSION_TABLES != 0 {
+			if i := parser.blockTable(out, data); i > 0 {
+				data = data[i:]
+				continue
+			}
 		}
 
 		// an itemized/unordered list:
@@ -573,10 +573,11 @@ func (parser *Parser) isFencedCode(data []byte, syntax **string, oldmarker strin
 		*syntax = &language
 	}
 
-	for ; data[i] != '\n'; i++ {
-		if !isspace(data[i]) {
-			return
-		}
+	for data[i] == ' ' {
+		i++
+	}
+	if data[i] != '\n' {
+		return
 	}
 
 	skip = i + 1
@@ -586,41 +587,37 @@ func (parser *Parser) isFencedCode(data []byte, syntax **string, oldmarker strin
 func (parser *Parser) blockFencedCode(out *bytes.Buffer, data []byte) int {
 	var lang *string
 	beg, marker := parser.isFencedCode(data, &lang, "")
-	if beg == 0 {
+	if beg == 0 || beg >= len(data) {
 		return 0
 	}
 
 	var work bytes.Buffer
 
-	for beg < len(data) {
+	for {
+		// safe to assume beg < len(data)
+
+		// check for the end of the code block
 		fenceEnd, _ := parser.isFencedCode(data[beg:], nil, marker)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
 		}
 
-		var end int
-		for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ {
+		// copy the current line
+		end := beg
+		for data[end] != '\n' {
+			end++
 		}
+		end++
 
-		if beg < end {
-			// verbatim copy to the working buffer
-			if parser.isEmpty(data[beg:]) > 0 {
-				work.WriteByte('\n')
-			} else {
-				work.Write(data[beg:end])
-			}
-		}
-		beg = end
-
-		// did we find the end of the buffer without a closing marker?
-		if beg >= len(data) {
+		// did we reach the end of the buffer without a closing marker?
+		if end >= len(data) {
 			return 0
 		}
-	}
 
-	if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' {
-		work.WriteByte('\n')
+		// verbatim copy to the working buffer
+		work.Write(data[beg:end])
+		beg = end
 	}
 
 	syntax := ""
@@ -634,168 +631,175 @@ func (parser *Parser) blockFencedCode(out *bytes.Buffer, data []byte) int {
 }
 
 func (parser *Parser) blockTable(out *bytes.Buffer, data []byte) int {
-	var headerWork bytes.Buffer
-	i, columns, colData := parser.blockTableHeader(&headerWork, data)
+	var header bytes.Buffer
+	i, columns := parser.blockTableHeader(&header, data)
 	if i == 0 {
 		return 0
 	}
 
-	var bodyWork bytes.Buffer
+	var body bytes.Buffer
 
 	for i < len(data) {
 		pipes, rowStart := 0, i
-		for ; i < len(data) && data[i] != '\n'; i++ {
+		for ; data[i] != '\n'; i++ {
 			if data[i] == '|' {
 				pipes++
 			}
 		}
 
-		if pipes == 0 || i == len(data) {
+		if pipes == 0 {
 			i = rowStart
 			break
 		}
 
-		parser.blockTableRow(&bodyWork, data[rowStart:i], columns, colData)
+		// include the newline in data sent to blockTableRow
 		i++
+		parser.blockTableRow(&body, data[rowStart:i], columns)
 	}
 
-	parser.r.Table(out, headerWork.Bytes(), bodyWork.Bytes(), colData)
+	parser.r.Table(out, header.Bytes(), body.Bytes(), columns)
 
 	return i
 }
 
-func (parser *Parser) blockTableHeader(out *bytes.Buffer, data []byte) (size int, columns int, columnData []int) {
-	i, pipes := 0, 0
-	columnData = []int{}
-	for i = 0; i < len(data) && data[i] != '\n'; i++ {
+func (parser *Parser) blockTableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
+	i := 0
+	colCount := 1
+	for i = 0; data[i] != '\n'; i++ {
 		if data[i] == '|' {
-			pipes++
+			colCount++
 		}
 	}
 
-	if i == len(data) || pipes == 0 {
-		return 0, 0, columnData
+	// doesn't look like a table header
+	if colCount == 1 {
+		return
 	}
 
-	headerEnd := i
+	// include the newline in the data sent to blockTableRow
+	header := data[:i+1]
 
+	// column count ignores pipes at beginning or end of line
 	if data[0] == '|' {
-		pipes--
+		colCount--
 	}
-
 	if i > 2 && data[i-1] == '|' {
-		pipes--
+		colCount--
 	}
 
-	columns = pipes + 1
-	columnData = make([]int, columns)
+	columns = make([]int, colCount)
 
-	// parse the header underline
+	// move on to the header underline
 	i++
-	if i < len(data) && data[i] == '|' {
+	if i >= len(data) {
+		return
+	}
+
+	if data[i] == '|' {
+		i++
+	}
+	for data[i] == ' ' {
 		i++
 	}
 
-	underEnd := i
-	for underEnd < len(data) && data[underEnd] != '\n' {
-		underEnd++
-	}
-
+	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
+	// and trailing | optional on last column
 	col := 0
-	for ; col < columns && i < underEnd; col++ {
+	for data[i] != '\n' {
 		dashes := 0
 
-		for i < underEnd && data[i] == ' ' {
-			i++
-		}
-
 		if data[i] == ':' {
 			i++
-			columnData[col] |= TABLE_ALIGNMENT_LEFT
+			columns[col] |= TABLE_ALIGNMENT_LEFT
 			dashes++
 		}
-
-		for i < underEnd && data[i] == '-' {
+		for data[i] == '-' {
 			i++
 			dashes++
 		}
-
-		if i < underEnd && data[i] == ':' {
+		if data[i] == ':' {
 			i++
-			columnData[col] |= TABLE_ALIGNMENT_RIGHT
+			columns[col] |= TABLE_ALIGNMENT_RIGHT
 			dashes++
 		}
-
-		for i < underEnd && data[i] == ' ' {
+		for data[i] == ' ' {
 			i++
 		}
 
-		if i < underEnd && data[i] != '|' {
-			break
-		}
+		// end of column test is messy
+		switch {
+		case dashes < 3:
+			// not a valid column
+			return
 
-		if dashes < 3 {
-			break
-		}
+		case data[i] == '|':
+			// marker found, now skip past trailing whitespace
+			col++
+			i++
+			for data[i] == ' ' {
+				i++
+			}
 
-		i++
+		case data[i] != '|' && col+1 < colCount:
+			// something else found where marker was required
+			return
+
+		case data[i] == '\n':
+			// marker is optional for the last column
+			col++
+
+		default:
+			// trailing junk found after last column
+			return
+		}
+	}
+	if col != colCount {
+		return
 	}
 
-	if col < columns {
-		return 0, 0, columnData
-	}
-
-	parser.blockTableRow(out, data[:headerEnd], columns, columnData)
-	size = underEnd + 1
+	parser.blockTableRow(out, header, columns)
+	size = i + 1
 	return
 }
 
-func (parser *Parser) blockTableRow(out *bytes.Buffer, data []byte, columns int, colData []int) {
+func (parser *Parser) blockTableRow(out *bytes.Buffer, data []byte, columns []int) {
 	i, col := 0, 0
 	var rowWork bytes.Buffer
 
-	if i < len(data) && data[i] == '|' {
+	if data[i] == '|' {
 		i++
 	}
 
-	for col = 0; col < columns && i < len(data); col++ {
-		for i < len(data) && isspace(data[i]) {
+	for col = 0; col < len(columns) && data[i] != '\n'; col++ {
+		for data[i] == ' ' {
 			i++
 		}
 
 		cellStart := i
 
-		for i < len(data) && data[i] != '|' {
+		for data[i] != '|' && data[i] != '\n' {
 			i++
 		}
 
-		cellEnd := i - 1
+		cellEnd := i
+		i++
 
-		for cellEnd > cellStart && isspace(data[cellEnd]) {
+		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 			cellEnd--
 		}
 
 		var cellWork bytes.Buffer
-		parser.parseInline(&cellWork, data[cellStart:cellEnd+1])
-
-		cdata := 0
-		if col < len(colData) {
-			cdata = colData[col]
-		}
-		parser.r.TableCell(&rowWork, cellWork.Bytes(), cdata)
-
-		i++
+		parser.parseInline(&cellWork, data[cellStart:cellEnd])
+		parser.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
 	}
 
-	for ; col < columns; col++ {
-		emptyCell := []byte{}
-		cdata := 0
-		if col < len(colData) {
-			cdata = colData[col]
-		}
-		parser.r.TableCell(&rowWork, emptyCell, cdata)
+	// pad it out with empty columns to get the right number
+	for ; col < len(columns); col++ {
+		parser.r.TableCell(&rowWork, nil, columns[col])
 	}
 
+	// silently ignore rows with too many cells
+
 	parser.r.TableRow(out, rowWork.Bytes())
 }
 
@@ -819,8 +823,11 @@ func (parser *Parser) blockQuote(out *bytes.Buffer, data []byte) int {
 	var raw bytes.Buffer
 	beg, end := 0, 0
 	for beg < len(data) {
-		for end = beg + 1; data[end-1] != '\n'; end++ {
+		end = beg
+		for data[end] != '\n' {
+			end++
 		}
+		end++
 
 		if pre := parser.blockQuotePrefix(data[beg:]); pre > 0 {
 			// string the prefix
@@ -848,12 +855,13 @@ func (parser *Parser) blockQuote(out *bytes.Buffer, data []byte) int {
 
 // returns prefix length for block code
 func (parser *Parser) blockCodePrefix(data []byte) int {
-	if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
+	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 		return 4
 	}
 	return 0
 }
 
+// TODO: continue redundant end-of-buffer check removal here
 func (parser *Parser) blockCode(out *bytes.Buffer, data []byte) int {
 	var work bytes.Buffer
 
diff --git a/html.go b/html.go
index c71ca0e..51929cf 100644
--- a/html.go
+++ b/html.go
@@ -262,16 +262,16 @@ func (options *Html) Table(out *bytes.Buffer, header []byte, body []byte, column
 	doubleSpace(out)
 	out.WriteString("<table>\n<thead>\n")
 	out.Write(header)
-	out.WriteString("\n</thead>\n<tbody>\n")
+	out.WriteString("</thead>\n\n<tbody>\n")
 	out.Write(body)
-	out.WriteString("\n</tbody>\n</table>\n")
+	out.WriteString("</tbody>\n</table>\n")
 }
 
 func (options *Html) TableRow(out *bytes.Buffer, text []byte) {
 	doubleSpace(out)
 	out.WriteString("<tr>\n")
 	out.Write(text)
-	out.WriteString("\n</tr>")
+	out.WriteString("\n</tr>\n")
 }
 
 func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {