From 81cefb5e7cdbc5ad5676c979ae33797a4c191583 Mon Sep 17 00:00:00 2001 From: Russ Ross Date: Sun, 29 May 2011 17:00:31 -0600 Subject: [PATCH] split parser into multiple files, clean up naming --- Makefile | 4 +- block.go | 1029 +++++++++++++++++++++++++ html.go | 193 ++--- inline.go | 970 ++++++++++++++++++++++++ markdown.go | 1958 ------------------------------------------------ smartypants.go | 144 ++-- 6 files changed, 2155 insertions(+), 2143 deletions(-) create mode 100644 block.go create mode 100644 inline.go diff --git a/Makefile b/Makefile index e9c160a..267a713 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,7 @@ include $(GOROOT)/src/Make.inc TARG=github.com/russross/blackfriday -GOFILES=markdown.go html.go smartypants.go - -package: +GOFILES=markdown.go block.go inline.go html.go smartypants.go include $(GOROOT)/src/Make.pkg diff --git a/block.go b/block.go new file mode 100644 index 0000000..5073f65 --- /dev/null +++ b/block.go @@ -0,0 +1,1029 @@ +// +// Black Friday Markdown Processor +// Originally based on http://github.com/tanoku/upskirt +// by Russ Ross +// + +// +// Functions to parse block-level elements. +// + +package blackfriday + +import ( + "bytes" +) + +// parse block-level data +func parseBlock(out *bytes.Buffer, rndr *render, data []byte) { + if rndr.nesting >= rndr.maxNesting { + return + } + rndr.nesting++ + + for len(data) > 0 { + if isPrefixHeader(rndr, data) { + data = data[blockPrefixHeader(out, rndr, data):] + continue + } + if data[0] == '<' && rndr.mk.blockhtml != nil { + if i := blockHtml(out, rndr, data, true); i > 0 { + data = data[i:] + continue + } + } + if i := isEmpty(data); i > 0 { + data = data[i:] + continue + } + if isHrule(data) { + if rndr.mk.hrule != nil { + rndr.mk.hrule(out, rndr.mk.opaque) + } + var i int + for i = 0; i < len(data) && data[i] != '\n'; i++ { + } + data = data[i:] + continue + } + if rndr.flags&EXTENSION_FENCED_CODE != 0 { + if i := blockFencedCode(out, rndr, data); i > 0 { + data = data[i:] + continue + } + } + if rndr.flags&EXTENSION_TABLES != 0 { + if i := blockTable(out, rndr, data); i > 0 { + data = data[i:] + continue + } + } + if blockQuotePrefix(data) > 0 { + data = data[blockQuote(out, rndr, data):] + continue + } + if blockCodePrefix(data) > 0 { + data = data[blockCode(out, rndr, data):] + continue + } + if blockUliPrefix(data) > 0 { + data = data[blockList(out, rndr, data, 0):] + continue + } + if blockOliPrefix(data) > 0 { + data = data[blockList(out, rndr, data, LIST_TYPE_ORDERED):] + continue + } + + data = data[blockParagraph(out, rndr, data):] + } + + rndr.nesting-- +} + +func isPrefixHeader(rndr *render, data []byte) bool { + if data[0] != '#' { + return false + } + + if rndr.flags&EXTENSION_SPACE_HEADERS != 0 { + level := 0 + for level < len(data) && level < 6 && data[level] == '#' { + level++ + } + if level < len(data) && data[level] != ' ' && data[level] != '\t' { + return false + } + } + return true +} + +func blockPrefixHeader(out *bytes.Buffer, rndr *render, data []byte) int { + level := 0 + for level < len(data) && level < 6 && data[level] == '#' { + level++ + } + i, end := 0, 0 + for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ { + } + for end = i; end < len(data) && data[end] != '\n'; end++ { + } + skip := end + for end > 0 && data[end-1] == '#' { + end-- + } + for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') { + end-- + } + if end > i { + work := bytes.NewBuffer(nil) + parseInline(work, rndr, data[i:end]) + if rndr.mk.header != nil { + rndr.mk.header(out, work.Bytes(), level, rndr.mk.opaque) + } + } + return skip +} + +func isUnderlinedHeader(data []byte) int { + i := 0 + + // test of level 1 header + if data[i] == '=' { + for i = 1; i < len(data) && data[i] == '='; i++ { + } + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + if i >= len(data) || data[i] == '\n' { + return 1 + } else { + return 0 + } + } + + // test of level 2 header + if data[i] == '-' { + for i = 1; i < len(data) && data[i] == '-'; i++ { + } + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + if i >= len(data) || data[i] == '\n' { + return 2 + } else { + return 0 + } + } + + return 0 +} + +func blockHtml(out *bytes.Buffer, rndr *render, data []byte, do_render bool) int { + var i, j int + + // identify the opening tag + if len(data) < 2 || data[0] != '<' { + return 0 + } + curtag, tagfound := blockHtmlFindTag(data[1:]) + + // handle special cases + if !tagfound { + + // HTML comment, laxist form + if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' { + i = 5 + + for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { + i++ + } + i++ + + if i < len(data) { + j = isEmpty(data[i:]) + } + + if j > 0 { + size := i + j + if do_render && rndr.mk.blockhtml != nil { + rndr.mk.blockhtml(out, data[:size], rndr.mk.opaque) + } + return size + } + } + + // HR, which is the only self-closing block tag considered + if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') { + i = 3 + for i < len(data) && data[i] != '>' { + i++ + } + + if i+1 < len(data) { + i++ + j = isEmpty(data[i:]) + if j > 0 { + size := i + j + if do_render && rndr.mk.blockhtml != nil { + rndr.mk.blockhtml(out, data[:size], rndr.mk.opaque) + } + return size + } + } + } + + // no special case recognized + return 0 + } + + // look for an unindented matching closing tag + // followed by a blank line + i = 1 + found := false + + // if not found, try a second pass looking for indented match + // but not if tag is "ins" or "del" (following original Markdown.pl) + if curtag != "ins" && curtag != "del" { + i = 1 + for i < len(data) { + i++ + for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { + i++ + } + + if i+2+len(curtag) >= len(data) { + break + } + + j = blockHtmlFindEnd(curtag, rndr, data[i-1:]) + + if j > 0 { + i += j - 1 + found = true + break + } + } + } + + if !found { + return 0 + } + + // the end of the block has been found + if do_render && rndr.mk.blockhtml != nil { + rndr.mk.blockhtml(out, data[:i], rndr.mk.opaque) + } + + return i +} + +func blockHtmlFindTag(data []byte) (string, bool) { + i := 0 + for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) { + i++ + } + if i >= len(data) { + return "", false + } + key := string(data[:i]) + if block_tags[key] { + return key, true + } + return "", false +} + +func blockHtmlFindEnd(tag string, rndr *render, data []byte) int { + // assume data[0] == '<' && data[1] == '/' already tested + + // check if tag is a match + if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' { + return 0 + } + + // check white lines + i := len(tag) + 3 + w := 0 + if i < len(data) { + if w = isEmpty(data[i:]); w == 0 { + return 0 // non-blank after tag + } + } + i += w + w = 0 + + if rndr.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { + if i < len(data) { + w = isEmpty(data[i:]) + } + } else { + if i < len(data) { + if w = isEmpty(data[i:]); w == 0 { + return 0 // non-blank line after tag line + } + } + } + + return i + w +} + +func isEmpty(data []byte) int { + var i int + for i = 0; i < len(data) && data[i] != '\n'; i++ { + if data[i] != ' ' && data[i] != '\t' { + return 0 + } + } + return i + 1 +} + +func isHrule(data []byte) bool { + // skip initial spaces + if len(data) < 3 { + return false + } + i := 0 + if data[0] == ' ' { + i++ + if data[1] == ' ' { + i++ + if data[2] == ' ' { + i++ + } + } + } + + // look at the hrule char + if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') { + return false + } + c := data[i] + + // the whole line must be the char or whitespace + n := 0 + for i < len(data) && data[i] != '\n' { + switch { + case data[i] == c: + n++ + case data[i] != ' ' && data[i] != '\t': + return false + } + i++ + } + + return n >= 3 +} + +func isFencedCode(data []byte, syntax **string) int { + i, n := 0, 0 + + // skip initial spaces + if len(data) < 3 { + return 0 + } + if data[0] == ' ' { + i++ + if data[1] == ' ' { + i++ + if data[2] == ' ' { + i++ + } + } + } + + // look at the hrule char + if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') { + return 0 + } + + c := data[i] + + // the whole line must be the char or whitespace + for i < len(data) && data[i] == c { + n++ + i++ + } + + if n < 3 { + return 0 + } + + if syntax != nil { + syn := 0 + + for i < len(data) && (data[i] == ' ' || data[i] == '\t') { + i++ + } + + syntax_start := i + + if i < len(data) && data[i] == '{' { + i++ + syntax_start++ + + for i < len(data) && data[i] != '}' && data[i] != '\n' { + syn++ + i++ + } + + if i == len(data) || data[i] != '}' { + return 0 + } + + // string all whitespace at the beginning and the end + // of the {} block + for syn > 0 && isspace(data[syntax_start]) { + syntax_start++ + syn-- + } + + for syn > 0 && isspace(data[syntax_start+syn-1]) { + syn-- + } + + i++ + } else { + for i < len(data) && !isspace(data[i]) { + syn++ + i++ + } + } + + language := string(data[syntax_start : syntax_start+syn]) + *syntax = &language + } + + for i < len(data) && data[i] != '\n' { + if !isspace(data[i]) { + return 0 + } + i++ + } + + return i + 1 +} + +func blockFencedCode(out *bytes.Buffer, rndr *render, data []byte) int { + var lang *string + beg := isFencedCode(data, &lang) + if beg == 0 { + return 0 + } + + work := bytes.NewBuffer(nil) + + for beg < len(data) { + fence_end := isFencedCode(data[beg:], nil) + if fence_end != 0 { + beg += fence_end + break + } + + var end int + for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { + } + + if beg < end { + // verbatim copy to the working buffer, escaping entities + if isEmpty(data[beg:]) > 0 { + work.WriteByte('\n') + } else { + work.Write(data[beg:end]) + } + } + beg = end + } + + if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' { + work.WriteByte('\n') + } + + if rndr.mk.blockcode != nil { + syntax := "" + if lang != nil { + syntax = *lang + } + + rndr.mk.blockcode(out, work.Bytes(), syntax, rndr.mk.opaque) + } + + return beg +} + +func blockTable(out *bytes.Buffer, rndr *render, data []byte) int { + header_work := bytes.NewBuffer(nil) + i, columns, col_data := blockTableHeader(header_work, rndr, data) + if i > 0 { + body_work := bytes.NewBuffer(nil) + + for i < len(data) { + pipes, row_start := 0, i + for ; i < len(data) && data[i] != '\n'; i++ { + if data[i] == '|' { + pipes++ + } + } + + if pipes == 0 || i == len(data) { + i = row_start + break + } + + blockTableRow(body_work, rndr, data[row_start:i], columns, col_data) + i++ + } + + if rndr.mk.table != nil { + rndr.mk.table(out, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque) + } + } + + return i +} + +func blockTableHeader(out *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) { + i, pipes := 0, 0 + column_data = []int{} + for i = 0; i < len(data) && data[i] != '\n'; i++ { + if data[i] == '|' { + pipes++ + } + } + + if i == len(data) || pipes == 0 { + return 0, 0, column_data + } + + header_end := i + + if data[0] == '|' { + pipes-- + } + + if i > 2 && data[i-1] == '|' { + pipes-- + } + + columns = pipes + 1 + column_data = make([]int, columns) + + // parse the header underline + i++ + if i < len(data) && data[i] == '|' { + i++ + } + + under_end := i + for under_end < len(data) && data[under_end] != '\n' { + under_end++ + } + + col := 0 + for ; col < columns && i < under_end; col++ { + dashes := 0 + + for i < under_end && (data[i] == ' ' || data[i] == '\t') { + i++ + } + + if data[i] == ':' { + i++ + column_data[col] |= TABLE_ALIGNMENT_LEFT + dashes++ + } + + for i < under_end && data[i] == '-' { + i++ + dashes++ + } + + if i < under_end && data[i] == ':' { + i++ + column_data[col] |= TABLE_ALIGNMENT_RIGHT + dashes++ + } + + for i < under_end && (data[i] == ' ' || data[i] == '\t') { + i++ + } + + if i < under_end && data[i] != '|' { + break + } + + if dashes < 3 { + break + } + + i++ + } + + if col < columns { + return 0, 0, column_data + } + + blockTableRow(out, rndr, data[:header_end], columns, column_data) + size = under_end + 1 + return +} + +func blockTableRow(out *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) { + i, col := 0, 0 + row_work := bytes.NewBuffer(nil) + + if i < len(data) && data[i] == '|' { + i++ + } + + for col = 0; col < columns && i < len(data); col++ { + for i < len(data) && isspace(data[i]) { + i++ + } + + cell_start := i + + for i < len(data) && data[i] != '|' { + i++ + } + + cell_end := i - 1 + + for cell_end > cell_start && isspace(data[cell_end]) { + cell_end-- + } + + cell_work := bytes.NewBuffer(nil) + parseInline(cell_work, rndr, data[cell_start:cell_end+1]) + + if rndr.mk.tableCell != nil { + cdata := 0 + if col < len(col_data) { + cdata = col_data[col] + } + rndr.mk.tableCell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque) + } + + i++ + } + + for ; col < columns; col++ { + empty_cell := []byte{} + if rndr.mk.tableCell != nil { + cdata := 0 + if col < len(col_data) { + cdata = col_data[col] + } + rndr.mk.tableCell(row_work, empty_cell, cdata, rndr.mk.opaque) + } + } + + if rndr.mk.tableRow != nil { + rndr.mk.tableRow(out, row_work.Bytes(), rndr.mk.opaque) + } +} + +// returns blockquote prefix length +func blockQuotePrefix(data []byte) int { + i := 0 + for i < len(data) && i < 3 && data[i] == ' ' { + i++ + } + if i < len(data) && data[i] == '>' { + if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') { + return i + 2 + } + return i + 1 + } + return 0 +} + +// parse a blockquote fragment +func blockQuote(out *bytes.Buffer, rndr *render, data []byte) int { + block := bytes.NewBuffer(nil) + work := bytes.NewBuffer(nil) + beg, end := 0, 0 + for beg < len(data) { + for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { + } + + if pre := blockQuotePrefix(data[beg:]); pre > 0 { + beg += pre // skip prefix + } else { + // empty line followed by non-quote line + if isEmpty(data[beg:]) > 0 && (end >= len(data) || (blockQuotePrefix(data[end:]) == 0 && isEmpty(data[end:]) == 0)) { + break + } + } + + if beg < end { // copy into the in-place working buffer + work.Write(data[beg:end]) + } + beg = end + } + + parseBlock(block, rndr, work.Bytes()) + if rndr.mk.blockquote != nil { + rndr.mk.blockquote(out, block.Bytes(), rndr.mk.opaque) + } + return end +} + +// returns prefix length for block code +func blockCodePrefix(data []byte) int { + if len(data) > 0 && data[0] == '\t' { + return 1 + } + if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { + return 4 + } + return 0 +} + +func blockCode(out *bytes.Buffer, rndr *render, data []byte) int { + work := bytes.NewBuffer(nil) + + beg, end := 0, 0 + for beg < len(data) { + for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { + } + + if pre := blockCodePrefix(data[beg:end]); pre > 0 { + beg += pre + } else { + if isEmpty(data[beg:end]) == 0 { + // non-empty non-prefixed line breaks the pre + break + } + } + + if beg < end { + // verbatim copy to the working buffer, escaping entities + if isEmpty(data[beg:end]) > 0 { + work.WriteByte('\n') + } else { + work.Write(data[beg:end]) + } + } + beg = end + } + + // trim all the \n off the end of work + workbytes := work.Bytes() + n := 0 + for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' { + n++ + } + if n > 0 { + work = bytes.NewBuffer(workbytes[:len(workbytes)-n]) + } + + work.WriteByte('\n') + + if rndr.mk.blockcode != nil { + rndr.mk.blockcode(out, work.Bytes(), "", rndr.mk.opaque) + } + + return beg +} + +// returns unordered list item prefix +func blockUliPrefix(data []byte) int { + i := 0 + for i < len(data) && i < 3 && data[i] == ' ' { + i++ + } + if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') { + return 0 + } + return i + 2 +} + +// returns ordered list item prefix +func blockOliPrefix(data []byte) int { + i := 0 + for i < len(data) && i < 3 && data[i] == ' ' { + i++ + } + if i >= len(data) || data[i] < '0' || data[i] > '9' { + return 0 + } + for i < len(data) && data[i] >= '0' && data[i] <= '9' { + i++ + } + if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') { + return 0 + } + return i + 2 +} + +// parse ordered or unordered list block +func blockList(out *bytes.Buffer, rndr *render, data []byte, flags int) int { + work := bytes.NewBuffer(nil) + + i, j := 0, 0 + for i < len(data) { + j = blockListItem(work, rndr, data[i:], &flags) + i += j + + if j == 0 || flags&LIST_ITEM_END_OF_LIST != 0 { + break + } + } + + if rndr.mk.list != nil { + rndr.mk.list(out, work.Bytes(), flags, rndr.mk.opaque) + } + return i +} + +// parse a single list item +// assumes initial prefix is already removed +func blockListItem(out *bytes.Buffer, rndr *render, data []byte, flags *int) int { + // keep track of the first indentation prefix + beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0 + + for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' { + orgpre++ + } + + beg = blockUliPrefix(data) + if beg == 0 { + beg = blockOliPrefix(data) + } + if beg == 0 { + return 0 + } + + // skip leading whitespace on first line + for beg < len(data) && data[beg] == ' ' { + beg++ + } + + // skip to the beginning of the following line + end = beg + for end < len(data) && data[end-1] != '\n' { + end++ + } + + // get working buffers + work := bytes.NewBuffer(nil) + inter := bytes.NewBuffer(nil) + + // put the first line into the working buffer + work.Write(data[beg:end]) + beg = end + + // process the following lines + in_empty, has_inside_empty := false, false + for beg < len(data) { + end++ + + for end < len(data) && data[end-1] != '\n' { + end++ + } + + // process an empty line + if isEmpty(data[beg:end]) > 0 { + in_empty = true + beg = end + continue + } + + // calculate the indentation + i = 0 + for i < 4 && beg+i < end && data[beg+i] == ' ' { + i++ + } + + pre = i + if data[beg] == '\t' { + i = 1 + pre = 8 + } + + // check for a new item + chunk := data[beg+i : end] + if (blockUliPrefix(chunk) > 0 && !isHrule(chunk)) || blockOliPrefix(chunk) > 0 { + if in_empty { + has_inside_empty = true + } + + if pre == orgpre { // the following item must have the same indentation + break + } + + if sublist == 0 { + sublist = work.Len() + } + } else { + // only join indented stuff after empty lines + if in_empty && i < 4 && data[beg] != '\t' { + *flags |= LIST_ITEM_END_OF_LIST + break + } else { + if in_empty { + work.WriteByte('\n') + has_inside_empty = true + } + } + } + + in_empty = false + + // add the line into the working buffer without prefix + work.Write(data[beg+i : end]) + beg = end + } + + // render li contents + if has_inside_empty { + *flags |= LIST_ITEM_CONTAINS_BLOCK + } + + workbytes := work.Bytes() + if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 { + // intermediate render of block li + if sublist > 0 && sublist < len(workbytes) { + parseBlock(inter, rndr, workbytes[:sublist]) + parseBlock(inter, rndr, workbytes[sublist:]) + } else { + parseBlock(inter, rndr, workbytes) + } + } else { + // intermediate render of inline li + if sublist > 0 && sublist < len(workbytes) { + parseInline(inter, rndr, workbytes[:sublist]) + parseBlock(inter, rndr, workbytes[sublist:]) + } else { + parseInline(inter, rndr, workbytes) + } + } + + // render li itself + if rndr.mk.listitem != nil { + rndr.mk.listitem(out, inter.Bytes(), *flags, rndr.mk.opaque) + } + + return beg +} + +func blockParagraph(out *bytes.Buffer, rndr *render, data []byte) int { + i, end, level := 0, 0, 0 + + for i < len(data) { + for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ { + } + + if isEmpty(data[i:]) > 0 { + break + } + if level = isUnderlinedHeader(data[i:]); level > 0 { + break + } + + if rndr.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { + if data[i] == '<' && rndr.mk.blockhtml != nil && blockHtml(out, rndr, data[i:], false) > 0 { + end = i + break + } + } + + if isPrefixHeader(rndr, data[i:]) || isHrule(data[i:]) { + end = i + break + } + + i = end + } + + work := data + size := i + for size > 0 && work[size-1] == '\n' { + size-- + } + + if level == 0 { + tmp := bytes.NewBuffer(nil) + parseInline(tmp, rndr, work[:size]) + if rndr.mk.paragraph != nil { + rndr.mk.paragraph(out, tmp.Bytes(), rndr.mk.opaque) + } + } else { + if size > 0 { + beg := 0 + i = size + size-- + + for size > 0 && work[size] != '\n' { + size-- + } + + beg = size + 1 + for size > 0 && work[size-1] == '\n' { + size-- + } + + if size > 0 { + tmp := bytes.NewBuffer(nil) + parseInline(tmp, rndr, work[:size]) + if rndr.mk.paragraph != nil { + rndr.mk.paragraph(out, tmp.Bytes(), rndr.mk.opaque) + } + + work = work[beg:] + size = i - beg + } else { + size = i + } + } + + header_work := bytes.NewBuffer(nil) + parseInline(header_work, rndr, work[:size]) + + if rndr.mk.header != nil { + rndr.mk.header(out, header_work.Bytes(), level, rndr.mk.opaque) + } + } + + return end +} diff --git a/html.go b/html.go index 8de702c..1193ed6 100644 --- a/html.go +++ b/html.go @@ -18,12 +18,6 @@ import ( "strconv" ) -// -// -// HTML rendering -// -// - const ( HTML_SKIP_HTML = 1 << iota HTML_SKIP_STYLE @@ -41,7 +35,7 @@ const ( ) type htmlOptions struct { - Flags int + flags int close_tag string // how to end singleton tags: usually " />\n", possibly ">\n" toc_data struct { header_count int @@ -57,78 +51,78 @@ func HtmlRenderer(flags int) *Renderer { // configure the rendering engine r := new(Renderer) if flags&HTML_GITHUB_BLOCKCODE == 0 { - r.blockcode = rndr_blockcode + r.blockcode = htmlBlockcode } else { - r.blockcode = rndr_blockcode_github + r.blockcode = htmlBlockcodeGithub } - r.blockquote = rndr_blockquote + r.blockquote = htmlBlockquote if flags&HTML_SKIP_HTML == 0 { - r.blockhtml = rndr_raw_block + r.blockhtml = htmlRawBlock } - r.header = rndr_header - r.hrule = rndr_hrule - r.list = rndr_list - r.listitem = rndr_listitem - r.paragraph = rndr_paragraph - r.table = rndr_table - r.tableRow = rndr_tablerow - r.tableCell = rndr_tablecell + r.header = htmlHeader + r.hrule = htmlHrule + r.list = htmlList + r.listitem = htmlListitem + r.paragraph = htmlParagraph + r.table = htmlTable + r.tableRow = htmlTablerow + r.tableCell = htmlTablecell - r.autolink = rndr_autolink - r.codespan = rndr_codespan - r.doubleEmphasis = rndr_double_emphasis - r.emphasis = rndr_emphasis + r.autolink = htmlAutolink + r.codespan = htmlCodespan + r.doubleEmphasis = htmlDoubleEmphasis + r.emphasis = htmlEmphasis if flags&HTML_SKIP_IMAGES == 0 { - r.image = rndr_image + r.image = htmlImage } - r.linebreak = rndr_linebreak + r.linebreak = htmlLinebreak if flags&HTML_SKIP_LINKS == 0 { - r.link = rndr_link + r.link = htmlLink } - r.rawHtmlTag = rndr_raw_html_tag - r.tripleEmphasis = rndr_triple_emphasis - r.strikethrough = rndr_strikethrough + r.rawHtmlTag = htmlRawTag + r.tripleEmphasis = htmlTripleEmphasis + r.strikethrough = htmlStrikethrough var cb *SmartypantsRenderer if flags&HTML_USE_SMARTYPANTS == 0 { - r.normalText = rndr_normal_text + r.normalText = htmlNormalText } else { cb = Smartypants(flags) - r.normalText = rndr_smartypants + r.normalText = htmlSmartypants } close_tag := html_close if flags&HTML_USE_XHTML != 0 { close_tag = xhtml_close } - r.opaque = &htmlOptions{Flags: flags, close_tag: close_tag, smartypants: cb} + r.opaque = &htmlOptions{flags: flags, close_tag: close_tag, smartypants: cb} return r } func HtmlTocRenderer(flags int) *Renderer { // configure the rendering engine r := new(Renderer) - r.header = rndr_toc_header + r.header = htmlTocHeader - r.codespan = rndr_codespan - r.doubleEmphasis = rndr_double_emphasis - r.emphasis = rndr_emphasis - r.tripleEmphasis = rndr_triple_emphasis - r.strikethrough = rndr_strikethrough + r.codespan = htmlCodespan + r.doubleEmphasis = htmlDoubleEmphasis + r.emphasis = htmlEmphasis + r.tripleEmphasis = htmlTripleEmphasis + r.strikethrough = htmlStrikethrough - r.documentFooter = rndr_toc_finalize + r.documentFooter = htmlTocFinalize close_tag := ">\n" if flags&HTML_USE_XHTML != 0 { close_tag = " />\n" } - r.opaque = &htmlOptions{Flags: flags | HTML_TOC, close_tag: close_tag} + r.opaque = &htmlOptions{flags: flags | HTML_TOC, close_tag: close_tag} return r } -func attr_escape(ob *bytes.Buffer, src []byte) { +func attrEscape(ob *bytes.Buffer, src []byte) { for i := 0; i < len(src); i++ { - // directly copy unescaped characters + // directly copy normal characters org := i for i < len(src) && src[i] != '<' && src[i] != '>' && src[i] != '&' && src[i] != '"' { i++ @@ -154,35 +148,14 @@ func attr_escape(ob *bytes.Buffer, src []byte) { } } -func unescape_text(ob *bytes.Buffer, src []byte) { - i := 0 - for i < len(src) { - org := i - for i < len(src) && src[i] != '\\' { - i++ - } - - if i > org { - ob.Write(src[org:i]) - } - - if i+1 >= len(src) { - break - } - - ob.WriteByte(src[i+1]) - i += 2 - } -} - -func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) { +func htmlHeader(ob *bytes.Buffer, text []byte, level int, opaque interface{}) { options := opaque.(*htmlOptions) if ob.Len() > 0 { ob.WriteByte('\n') } - if options.Flags&HTML_TOC != 0 { + if options.flags&HTML_TOC != 0 { ob.WriteString(fmt.Sprintf("", level, options.toc_data.header_count)) options.toc_data.header_count++ } else { @@ -193,7 +166,7 @@ func rndr_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) { ob.WriteString(fmt.Sprintf("\n", level)) } -func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) { +func htmlRawBlock(ob *bytes.Buffer, text []byte, opaque interface{}) { sz := len(text) for sz > 0 && text[sz-1] == '\n' { sz-- @@ -212,7 +185,7 @@ func rndr_raw_block(ob *bytes.Buffer, text []byte, opaque interface{}) { ob.WriteByte('\n') } -func rndr_hrule(ob *bytes.Buffer, opaque interface{}) { +func htmlHrule(ob *bytes.Buffer, opaque interface{}) { options := opaque.(*htmlOptions) if ob.Len() > 0 { @@ -222,7 +195,7 @@ func rndr_hrule(ob *bytes.Buffer, opaque interface{}) { ob.WriteString(options.close_tag) } -func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) { +func htmlBlockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) { if ob.Len() > 0 { ob.WriteByte('\n') } @@ -248,7 +221,7 @@ func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface if cls > 0 { ob.WriteByte(' ') } - attr_escape(ob, []byte(lang[org:])) + attrEscape(ob, []byte(lang[org:])) } } @@ -258,7 +231,7 @@ func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface } if len(text) > 0 { - attr_escape(ob, text) + attrEscape(ob, text) } ob.WriteString("\n") @@ -282,7 +255,7 @@ func rndr_blockcode(ob *bytes.Buffer, text []byte, lang string, opaque interface * E.g. * ~~~~ {.python .numbered} =>

  */
-func rndr_blockcode_github(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
+func htmlBlockcodeGithub(ob *bytes.Buffer, text []byte, lang string, opaque interface{}) {
 	if ob.Len() > 0 {
 		ob.WriteByte('\n')
 	}
@@ -296,9 +269,9 @@ func rndr_blockcode_github(ob *bytes.Buffer, text []byte, lang string, opaque in
 		}
 
 		if lang[0] == '.' {
-			attr_escape(ob, []byte(lang[1:i]))
+			attrEscape(ob, []byte(lang[1:i]))
 		} else {
-			attr_escape(ob, []byte(lang[:i]))
+			attrEscape(ob, []byte(lang[:i]))
 		}
 
 		ob.WriteString("\">")
@@ -307,20 +280,20 @@ func rndr_blockcode_github(ob *bytes.Buffer, text []byte, lang string, opaque in
 	}
 
 	if len(text) > 0 {
-		attr_escape(ob, text)
+		attrEscape(ob, text)
 	}
 
 	ob.WriteString("
\n") } -func rndr_blockquote(ob *bytes.Buffer, text []byte, opaque interface{}) { +func htmlBlockquote(ob *bytes.Buffer, text []byte, opaque interface{}) { ob.WriteString("
\n") ob.Write(text) ob.WriteString("
") } -func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) { +func htmlTable(ob *bytes.Buffer, header []byte, body []byte, opaque interface{}) { if ob.Len() > 0 { ob.WriteByte('\n') } @@ -331,7 +304,7 @@ func rndr_table(ob *bytes.Buffer, header []byte, body []byte, opaque interface{} ob.WriteString("\n") } -func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) { +func htmlTablerow(ob *bytes.Buffer, text []byte, opaque interface{}) { if ob.Len() > 0 { ob.WriteByte('\n') } @@ -340,7 +313,7 @@ func rndr_tablerow(ob *bytes.Buffer, text []byte, opaque interface{}) { ob.WriteString("\n") } -func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) { +func htmlTablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{}) { if ob.Len() > 0 { ob.WriteByte('\n') } @@ -359,7 +332,7 @@ func rndr_tablecell(ob *bytes.Buffer, text []byte, align int, opaque interface{} ob.WriteString("") } -func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) { +func htmlList(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) { if ob.Len() > 0 { ob.WriteByte('\n') } @@ -376,7 +349,7 @@ func rndr_list(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) { } } -func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) { +func htmlListitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) { ob.WriteString("
  • ") size := len(text) for size > 0 && text[size-1] == '\n' { @@ -386,7 +359,7 @@ func rndr_listitem(ob *bytes.Buffer, text []byte, flags int, opaque interface{}) ob.WriteString("
  • \n") } -func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) { +func htmlParagraph(ob *bytes.Buffer, text []byte, opaque interface{}) { options := opaque.(*htmlOptions) i := 0 @@ -407,7 +380,7 @@ func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) { } ob.WriteString("

    ") - if options.Flags&HTML_HARD_WRAP != 0 { + if options.flags&HTML_HARD_WRAP != 0 { for i < len(text) { org := i for i < len(text) && text[i] != '\n' { @@ -432,13 +405,13 @@ func rndr_paragraph(ob *bytes.Buffer, text []byte, opaque interface{}) { ob.WriteString("

    \n") } -func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int { +func htmlAutolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) int { options := opaque.(*htmlOptions) if len(link) == 0 { return 0 } - if options.Flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL { + if options.flags&HTML_SAFELINK != 0 && !isSafeLink(link) && kind != LINK_TYPE_EMAIL { return 0 } @@ -455,9 +428,9 @@ func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) * want to print the `mailto:` prefix */ if bytes.HasPrefix(link, []byte("mailto:")) { - attr_escape(ob, link[7:]) + attrEscape(ob, link[7:]) } else { - attr_escape(ob, link) + attrEscape(ob, link) } ob.WriteString("") @@ -465,14 +438,14 @@ func rndr_autolink(ob *bytes.Buffer, link []byte, kind int, opaque interface{}) return 1 } -func rndr_codespan(ob *bytes.Buffer, text []byte, opaque interface{}) int { +func htmlCodespan(ob *bytes.Buffer, text []byte, opaque interface{}) int { ob.WriteString("") - attr_escape(ob, text) + attrEscape(ob, text) ob.WriteString("") return 1 } -func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { +func htmlDoubleEmphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { if len(text) == 0 { return 0 } @@ -482,7 +455,7 @@ func rndr_double_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int return 1 } -func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { +func htmlEmphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { if len(text) == 0 { return 0 } @@ -492,20 +465,20 @@ func rndr_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { return 1 } -func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int { +func htmlImage(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque interface{}) int { options := opaque.(*htmlOptions) if len(link) == 0 { return 0 } ob.WriteString("\"") 0 { - attr_escape(ob, alt) + attrEscape(ob, alt) } if len(title) > 0 { ob.WriteString("\" title=\"") - attr_escape(ob, title) + attrEscape(ob, title) } ob.WriteByte('"') @@ -513,17 +486,17 @@ func rndr_image(ob *bytes.Buffer, link []byte, title []byte, alt []byte, opaque return 1 } -func rndr_linebreak(ob *bytes.Buffer, opaque interface{}) int { +func htmlLinebreak(ob *bytes.Buffer, opaque interface{}) int { options := opaque.(*htmlOptions) ob.WriteString(" 0 { ob.WriteString("\" title=\"") - attr_escape(ob, title) + attrEscape(ob, title) } ob.WriteString("\">") if len(content) > 0 { @@ -543,25 +516,25 @@ func rndr_link(ob *bytes.Buffer, link []byte, title []byte, content []byte, opaq return 1 } -func rndr_raw_html_tag(ob *bytes.Buffer, text []byte, opaque interface{}) int { +func htmlRawTag(ob *bytes.Buffer, text []byte, opaque interface{}) int { options := opaque.(*htmlOptions) - if options.Flags&HTML_SKIP_HTML != 0 { + if options.flags&HTML_SKIP_HTML != 0 { return 1 } - if options.Flags&HTML_SKIP_STYLE != 0 && is_html_tag(text, "style") { + if options.flags&HTML_SKIP_STYLE != 0 && isHtmlTag(text, "style") { return 1 } - if options.Flags&HTML_SKIP_LINKS != 0 && is_html_tag(text, "a") { + if options.flags&HTML_SKIP_LINKS != 0 && isHtmlTag(text, "a") { return 1 } - if options.Flags&HTML_SKIP_IMAGES != 0 && is_html_tag(text, "img") { + if options.flags&HTML_SKIP_IMAGES != 0 && isHtmlTag(text, "img") { return 1 } ob.Write(text) return 1 } -func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { +func htmlTripleEmphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int { if len(text) == 0 { return 0 } @@ -571,7 +544,7 @@ func rndr_triple_emphasis(ob *bytes.Buffer, text []byte, opaque interface{}) int return 1 } -func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int { +func htmlStrikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int { if len(text) == 0 { return 0 } @@ -581,11 +554,11 @@ func rndr_strikethrough(ob *bytes.Buffer, text []byte, opaque interface{}) int { return 1 } -func rndr_normal_text(ob *bytes.Buffer, text []byte, opaque interface{}) { - attr_escape(ob, text) +func htmlNormalText(ob *bytes.Buffer, text []byte, opaque interface{}) { + attrEscape(ob, text) } -func rndr_toc_header(ob *bytes.Buffer, text []byte, level int, opaque interface{}) { +func htmlTocHeader(ob *bytes.Buffer, text []byte, level int, opaque interface{}) { options := opaque.(*htmlOptions) for level > options.toc_data.current_level { if options.toc_data.current_level > 0 { @@ -614,7 +587,7 @@ func rndr_toc_header(ob *bytes.Buffer, text []byte, level int, opaque interface{ ob.WriteString("\n") } -func rndr_toc_finalize(ob *bytes.Buffer, opaque interface{}) { +func htmlTocFinalize(ob *bytes.Buffer, opaque interface{}) { options := opaque.(*htmlOptions) for options.toc_data.current_level > 1 { ob.WriteString("\n") @@ -626,7 +599,7 @@ func rndr_toc_finalize(ob *bytes.Buffer, opaque interface{}) { } } -func is_html_tag(tag []byte, tagname string) bool { +func isHtmlTag(tag []byte, tagname string) bool { i := 0 if i < len(tag) && tag[0] != '<' { return false diff --git a/inline.go b/inline.go new file mode 100644 index 0000000..f6043ca --- /dev/null +++ b/inline.go @@ -0,0 +1,970 @@ +// +// Black Friday Markdown Processor +// Originally based on http://github.com/tanoku/upskirt +// by Russ Ross +// + +// +// Functions to parse inline elements. +// + +package blackfriday + +import ( + "bytes" +) + +// Functions to parse text within a block +// Each function returns the number of chars taken care of +// data is the complete block being rendered +// offset is the number of valid chars before the current cursor + +func parseInline(out *bytes.Buffer, rndr *render, data []byte) { + if rndr.nesting >= rndr.maxNesting { + return + } + rndr.nesting++ + + i, end := 0, 0 + for i < len(data) { + // copy inactive chars into the output + for end < len(data) && rndr.inline[data[end]] == nil { + end++ + } + + if rndr.mk.normalText != nil { + rndr.mk.normalText(out, data[i:end], rndr.mk.opaque) + } else { + out.Write(data[i:end]) + } + + if end >= len(data) { + break + } + i = end + + // call the trigger + parser := rndr.inline[data[end]] + end = parser(out, rndr, data, i) + + if end == 0 { // no action from the callback + end = i + 1 + } else { + i += end + end = i + } + } + + rndr.nesting-- +} + +// single and double emphasis parsing +func inlineEmphasis(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + data = data[offset:] + c := data[0] + ret := 0 + + if len(data) > 2 && data[1] != c { + // whitespace cannot follow an opening emphasis; + // strikethrough only takes two characters '~~' + if c == '~' || isspace(data[1]) { + return 0 + } + if ret = inlineHelperEmph1(out, rndr, data[1:], c); ret == 0 { + return 0 + } + + return ret + 1 + } + + if len(data) > 3 && data[1] == c && data[2] != c { + if isspace(data[2]) { + return 0 + } + if ret = inlineHelperEmph2(out, rndr, data[2:], c); ret == 0 { + return 0 + } + + return ret + 2 + } + + if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { + if c == '~' || isspace(data[3]) { + return 0 + } + if ret = inlineHelperEmph3(out, rndr, data, 3, c); ret == 0 { + return 0 + } + + return ret + 3 + } + + return 0 +} + +func inlineCodespan(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + data = data[offset:] + + nb := 0 + + // count the number of backticks in the delimiter + for nb < len(data) && data[nb] == '`' { + nb++ + } + + // find the next delimiter + i, end := 0, 0 + for end = nb; end < len(data) && i < nb; end++ { + if data[end] == '`' { + i++ + } else { + i = 0 + } + } + + if i < nb && end >= len(data) { + return 0 // no matching delimiter + } + + // trim outside whitespace + f_begin := nb + for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') { + f_begin++ + } + + f_end := end - nb + for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') { + f_end-- + } + + // real code span + if rndr.mk.codespan == nil { + return 0 + } + if f_begin < f_end { + if rndr.mk.codespan(out, data[f_begin:f_end], rndr.mk.opaque) == 0 { + end = 0 + } + } else { + if rndr.mk.codespan(out, nil, rndr.mk.opaque) == 0 { + end = 0 + } + } + + return end + +} + +// '\n' preceded by two spaces +func inlineLinebreak(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' { + return 0 + } + + // remove trailing spaces from out and render + outBytes := out.Bytes() + end := len(outBytes) + for end > 0 && outBytes[end-1] == ' ' { + end-- + } + out.Truncate(end) + + if rndr.mk.linebreak == nil { + return 0 + } + if rndr.mk.linebreak(out, rndr.mk.opaque) > 0 { + return 1 + } else { + return 0 + } + + return 0 +} + +// '[': parse a link or an image +func inlineLink(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + isImg := offset > 0 && data[offset-1] == '!' + + data = data[offset:] + + i := 1 + var title, link []byte + text_has_nl := false + + // check whether the correct renderer exists + if (isImg && rndr.mk.image == nil) || (!isImg && rndr.mk.link == nil) { + return 0 + } + + // look for the matching closing bracket + for level := 1; level > 0 && i < len(data); i++ { + switch { + case data[i] == '\n': + text_has_nl = true + + case data[i-1] == '\\': + continue + + case data[i] == '[': + level++ + + case data[i] == ']': + level-- + if level <= 0 { + i-- // compensate for extra i++ in for loop + } + } + } + + if i >= len(data) { + return 0 + } + + txt_e := i + i++ + + // skip any amount of whitespace or newline + // (this is much more lax than original markdown syntax) + for i < len(data) && isspace(data[i]) { + i++ + } + + // inline style link + switch { + case i < len(data) && data[i] == '(': + // skip initial whitespace + i++ + + for i < len(data) && isspace(data[i]) { + i++ + } + + link_b := i + + // look for link end: ' " ) + for i < len(data) { + if data[i] == '\\' { + i += 2 + } else { + if data[i] == ')' || data[i] == '\'' || data[i] == '"' { + break + } + i++ + } + } + + if i >= len(data) { + return 0 + } + link_e := i + + // look for title end if present + title_b, title_e := 0, 0 + if data[i] == '\'' || data[i] == '"' { + i++ + title_b = i + + for i < len(data) { + if data[i] == '\\' { + i += 2 + } else { + if data[i] == ')' { + break + } + i++ + } + } + + if i >= len(data) { + return 0 + } + + // skip whitespace after title + title_e = i - 1 + for title_e > title_b && isspace(data[title_e]) { + title_e-- + } + + // check for closing quote presence + if data[title_e] != '\'' && data[title_e] != '"' { + title_b, title_e = 0, 0 + link_e = i + } + } + + // remove whitespace at the end of the link + for link_e > link_b && isspace(data[link_e-1]) { + link_e-- + } + + // remove optional angle brackets around the link + if data[link_b] == '<' { + link_b++ + } + if data[link_e-1] == '>' { + link_e-- + } + + // build escaped link and title + if link_e > link_b { + link = data[link_b:link_e] + } + + if title_e > title_b { + title = data[title_b:title_e] + } + + i++ + + // reference style link + case i < len(data) && data[i] == '[': + var id []byte + + // look for the id + i++ + link_b := i + for i < len(data) && data[i] != ']' { + i++ + } + if i >= len(data) { + return 0 + } + link_e := i + + // find the reference + if link_b == link_e { + if text_has_nl { + b := bytes.NewBuffer(nil) + + for j := 1; j < txt_e; j++ { + switch { + case data[j] != '\n': + b.WriteByte(data[j]) + case data[j-1] != ' ': + b.WriteByte(' ') + } + } + + id = b.Bytes() + } else { + id = data[1:txt_e] + } + } else { + id = data[link_b:link_e] + } + + // find the reference with matching id (ids are case-insensitive) + key := string(bytes.ToLower(id)) + lr, ok := rndr.refs[key] + if !ok { + return 0 + } + + // keep link and title from reference + link = lr.link + title = lr.title + i++ + + // shortcut reference style link + default: + var id []byte + + // craft the id + if text_has_nl { + b := bytes.NewBuffer(nil) + + for j := 1; j < txt_e; j++ { + switch { + case data[j] != '\n': + b.WriteByte(data[j]) + case data[j-1] != ' ': + b.WriteByte(' ') + } + } + + id = b.Bytes() + } else { + id = data[1:txt_e] + } + + // find the reference with matching id + key := string(bytes.ToLower(id)) + lr, ok := rndr.refs[key] + if !ok { + return 0 + } + + // keep link and title from reference + link = lr.link + title = lr.title + + // rewind the whitespace + i = txt_e + 1 + } + + // build content: img alt is escaped, link content is parsed + content := bytes.NewBuffer(nil) + if txt_e > 1 { + if isImg { + content.Write(data[1:txt_e]) + } else { + parseInline(content, rndr, data[1:txt_e]) + } + } + + var u_link []byte + if len(link) > 0 { + u_link_buf := bytes.NewBuffer(nil) + unescapeText(u_link_buf, link) + u_link = u_link_buf.Bytes() + } + + // call the relevant rendering function + ret := 0 + if isImg { + outSize := out.Len() + outBytes := out.Bytes() + if outSize > 0 && outBytes[outSize-1] == '!' { + out.Truncate(outSize - 1) + } + + ret = rndr.mk.image(out, u_link, title, content.Bytes(), rndr.mk.opaque) + } else { + ret = rndr.mk.link(out, u_link, title, content.Bytes(), rndr.mk.opaque) + } + + if ret > 0 { + return i + } + return 0 +} + +// '<' when tags or autolinks are allowed +func inlineLangle(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + data = data[offset:] + altype := LINK_TYPE_NOT_AUTOLINK + end := tagLength(data, &altype) + ret := 0 + + if end > 2 { + switch { + case rndr.mk.autolink != nil && altype != LINK_TYPE_NOT_AUTOLINK: + u_link := bytes.NewBuffer(nil) + unescapeText(u_link, data[1:end+1-2]) + ret = rndr.mk.autolink(out, u_link.Bytes(), altype, rndr.mk.opaque) + case rndr.mk.rawHtmlTag != nil: + ret = rndr.mk.rawHtmlTag(out, data[:end], rndr.mk.opaque) + } + } + + if ret == 0 { + return 0 + } + return end +} + +// '\\' backslash escape +var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>") + +func inlineEscape(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + data = data[offset:] + + if len(data) > 1 { + if bytes.IndexByte(escapeChars, data[1]) < 0 { + return 0 + } + + if rndr.mk.normalText != nil { + rndr.mk.normalText(out, data[1:2], rndr.mk.opaque) + } else { + out.WriteByte(data[1]) + } + } + + return 2 +} + +func unescapeText(ob *bytes.Buffer, src []byte) { + i := 0 + for i < len(src) { + org := i + for i < len(src) && src[i] != '\\' { + i++ + } + + if i > org { + ob.Write(src[org:i]) + } + + if i+1 >= len(src) { + break + } + + ob.WriteByte(src[i+1]) + i += 2 + } +} + +// '&' escaped when it doesn't belong to an entity +// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; +func inlineEntity(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + data = data[offset:] + + end := 1 + + if end < len(data) && data[end] == '#' { + end++ + } + + for end < len(data) && isalnum(data[end]) { + end++ + } + + if end < len(data) && data[end] == ';' { + end++ // real entity + } else { + return 0 // lone '&' + } + + if rndr.mk.entity != nil { + rndr.mk.entity(out, data[:end], rndr.mk.opaque) + } else { + out.Write(data[:end]) + } + + return end +} + +func inlineAutolink(out *bytes.Buffer, rndr *render, data []byte, offset int) int { + orig_data := data + data = data[offset:] + + if offset > 0 { + if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) { + return 0 + } + } + + if !isSafeLink(data) { + return 0 + } + + link_end := 0 + for link_end < len(data) && !isspace(data[link_end]) { + link_end++ + } + + // Skip punctuation at the end of the link + if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' { + link_end-- + } + + // See if the link finishes with a punctuation sign that can be closed. + var copen byte + switch data[link_end-1] { + case '"': + copen = '"' + case '\'': + copen = '\'' + case ')': + copen = '(' + case ']': + copen = '[' + case '}': + copen = '{' + default: + copen = 0 + } + + if copen != 0 { + buf_end := offset + link_end - 2 + + open_delim := 1 + + /* Try to close the final punctuation sign in this same line; + * if we managed to close it outside of the URL, that means that it's + * not part of the URL. If it closes inside the URL, that means it + * is part of the URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric)) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 { + if orig_data[buf_end] == data[link_end-1] { + open_delim++ + } + + if orig_data[buf_end] == copen { + open_delim-- + } + + buf_end-- + } + + if open_delim == 0 { + link_end-- + } + } + + if rndr.mk.autolink != nil { + u_link := bytes.NewBuffer(nil) + unescapeText(u_link, data[:link_end]) + + rndr.mk.autolink(out, u_link.Bytes(), LINK_TYPE_NORMAL, rndr.mk.opaque) + } + + return link_end +} + +var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} + +func isSafeLink(link []byte) bool { + for _, prefix := range validUris { + // TODO: handle unicode here + // case-insensitive prefix test + if len(link) > len(prefix) && !less(link[:len(prefix)], prefix) && !less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) { + return true + } + } + + return false +} + +// return the length of the given tag, or 0 is it's not valid +func tagLength(data []byte, autolink *int) int { + var i, j int + + // a valid tag can't be shorter than 3 chars + if len(data) < 3 { + return 0 + } + + // begins with a '<' optionally followed by '/', followed by letter or number + if data[0] != '<' { + return 0 + } + if data[1] == '/' { + i = 2 + } else { + i = 1 + } + + if !isalnum(data[i]) { + return 0 + } + + // scheme test + *autolink = LINK_TYPE_NOT_AUTOLINK + + // try to find the beggining of an URI + for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { + i++ + } + + if i > 1 && data[i] == '@' { + if j = isMailtoAutolink(data[i:]); j != 0 { + *autolink = LINK_TYPE_EMAIL + return i + j + } + } + + if i > 2 && data[i] == ':' { + *autolink = LINK_TYPE_NORMAL + i++ + } + + // complete autolink test: no whitespace or ' or " + switch { + case i >= len(data): + *autolink = LINK_TYPE_NOT_AUTOLINK + case *autolink != 0: + j = i + + for i < len(data) { + if data[i] == '\\' { + i += 2 + } else { + if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { + break + } else { + i++ + } + } + + } + + if i >= len(data) { + return 0 + } + if i > j && data[i] == '>' { + return i + 1 + } + + // one of the forbidden chars has been found + *autolink = LINK_TYPE_NOT_AUTOLINK + } + + // look for something looking like a tag end + for i < len(data) && data[i] != '>' { + i++ + } + if i >= len(data) { + return 0 + } + return i + 1 +} + +// look for the address part of a mail autolink and '>' +// this is less strict than the original markdown e-mail address matching +func isMailtoAutolink(data []byte) int { + nb := 0 + + // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' + for i := 0; i < len(data); i++ { + if isalnum(data[i]) { + continue + } + + switch data[i] { + case '@': + nb++ + + case '-', '.', '_': + break + + case '>': + if nb == 1 { + return i + 1 + } else { + return 0 + } + default: + return 0 + } + } + + return 0 +} + +// look for the next emph char, skipping other constructs +func inlineHelperFindEmphChar(data []byte, c byte) int { + i := 1 + + for i < len(data) { + for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { + i++ + } + if i >= len(data) { + return 0 + } + if data[i] == c { + return i + } + + // do not count escaped chars + if i != 0 && data[i-1] == '\\' { + i++ + continue + } + + if data[i] == '`' { + // skip a code span + tmp_i := 0 + i++ + for i < len(data) && data[i] != '`' { + if tmp_i == 0 && data[i] == c { + tmp_i = i + } + i++ + } + if i >= len(data) { + return tmp_i + } + i++ + } else { + if data[i] == '[' { + // skip a link + tmp_i := 0 + i++ + for i < len(data) && data[i] != ']' { + if tmp_i == 0 && data[i] == c { + tmp_i = i + } + i++ + } + i++ + for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') { + i++ + } + if i >= len(data) { + return tmp_i + } + if data[i] != '[' && data[i] != '(' { // not a link + if tmp_i > 0 { + return tmp_i + } else { + continue + } + } + cc := data[i] + i++ + for i < len(data) && data[i] != cc { + if tmp_i == 0 && data[i] == c { + tmp_i = i + } + i++ + } + if i >= len(data) { + return tmp_i + } + i++ + } + } + } + return 0 +} + +func inlineHelperEmph1(out *bytes.Buffer, rndr *render, data []byte, c byte) int { + i := 0 + + if rndr.mk.emphasis == nil { + return 0 + } + + // skip one symbol if coming from emph3 + if len(data) > 1 && data[0] == c && data[1] == c { + i = 1 + } + + for i < len(data) { + length := inlineHelperFindEmphChar(data[i:], c) + if length == 0 { + return 0 + } + i += length + if i >= len(data) { + return 0 + } + + if i+1 < len(data) && data[i+1] == c { + i++ + continue + } + + if data[i] == c && !isspace(data[i-1]) { + + if rndr.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 { + if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { + continue + } + } + + work := bytes.NewBuffer(nil) + parseInline(work, rndr, data[:i]) + r := rndr.mk.emphasis(out, work.Bytes(), rndr.mk.opaque) + if r > 0 { + return i + 1 + } else { + return 0 + } + } + } + + return 0 +} + +func inlineHelperEmph2(out *bytes.Buffer, rndr *render, data []byte, c byte) int { + render_method := rndr.mk.doubleEmphasis + if c == '~' { + render_method = rndr.mk.strikethrough + } + + if render_method == nil { + return 0 + } + + i := 0 + + for i < len(data) { + length := inlineHelperFindEmphChar(data[i:], c) + if length == 0 { + return 0 + } + i += length + + if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { + work := bytes.NewBuffer(nil) + parseInline(work, rndr, data[:i]) + r := render_method(out, work.Bytes(), rndr.mk.opaque) + if r > 0 { + return i + 2 + } else { + return 0 + } + } + i++ + } + return 0 +} + +func inlineHelperEmph3(out *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int { + i := 0 + orig_data := data + data = data[offset:] + + for i < len(data) { + length := inlineHelperFindEmphChar(data[i:], c) + if length == 0 { + return 0 + } + i += length + + // skip whitespace preceded symbols + if data[i] != c || isspace(data[i-1]) { + continue + } + + switch { + case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.tripleEmphasis != nil): + // triple symbol found + work := bytes.NewBuffer(nil) + + parseInline(work, rndr, data[:i]) + r := rndr.mk.tripleEmphasis(out, work.Bytes(), rndr.mk.opaque) + if r > 0 { + return i + 3 + } else { + return 0 + } + case (i+1 < len(data) && data[i+1] == c): + // double symbol found, hand over to emph1 + length = inlineHelperEmph1(out, rndr, orig_data[offset-2:], c) + if length == 0 { + return 0 + } else { + return length - 2 + } + default: + // single symbol found, hand over to emph2 + length = inlineHelperEmph2(out, rndr, orig_data[offset-1:], c) + if length == 0 { + return 0 + } else { + return length - 1 + } + } + } + return 0 +} diff --git a/markdown.go b/markdown.go index 2cbb61c..7420216 100644 --- a/markdown.go +++ b/markdown.go @@ -254,1964 +254,6 @@ func Markdown(input []byte, renderer *Renderer, extensions uint32) []byte { } -// -// Inline parsing -// Functions to parse text within a block. Each: -// returns the number of chars taken care of -// data is the complete block being rendered -// offset is the number of valid chars before the data -// - -func parseInline(out *bytes.Buffer, rndr *render, data []byte) { - if rndr.nesting >= rndr.maxNesting { - return - } - rndr.nesting++ - - i, end := 0, 0 - for i < len(data) { - // copy inactive chars into the output - for end < len(data) && rndr.inline[data[end]] == nil { - end++ - } - - if rndr.mk.normalText != nil { - rndr.mk.normalText(out, data[i:end], rndr.mk.opaque) - } else { - out.Write(data[i:end]) - } - - if end >= len(data) { - break - } - i = end - - // call the trigger - parser := rndr.inline[data[end]] - end = parser(out, rndr, data, i) - - if end == 0 { // no action from the callback - end = i + 1 - } else { - i += end - end = i - } - } - - rndr.nesting-- -} - -// single and double emphasis parsing -func inlineEmphasis(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - data = data[offset:] - c := data[0] - ret := 0 - - if len(data) > 2 && data[1] != c { - // whitespace cannot follow an opening emphasis; - // strikethrough only takes two characters '~~' - if c == '~' || isspace(data[1]) { - return 0 - } - if ret = inlineHelperEmph1(out, rndr, data[1:], c); ret == 0 { - return 0 - } - - return ret + 1 - } - - if len(data) > 3 && data[1] == c && data[2] != c { - if isspace(data[2]) { - return 0 - } - if ret = inlineHelperEmph2(out, rndr, data[2:], c); ret == 0 { - return 0 - } - - return ret + 2 - } - - if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { - if c == '~' || isspace(data[3]) { - return 0 - } - if ret = inlineHelperEmph3(out, rndr, data, 3, c); ret == 0 { - return 0 - } - - return ret + 3 - } - - return 0 -} - -func inlineCodespan(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - data = data[offset:] - - nb := 0 - - // count the number of backticks in the delimiter - for nb < len(data) && data[nb] == '`' { - nb++ - } - - // find the next delimiter - i, end := 0, 0 - for end = nb; end < len(data) && i < nb; end++ { - if data[end] == '`' { - i++ - } else { - i = 0 - } - } - - if i < nb && end >= len(data) { - return 0 // no matching delimiter - } - - // trim outside whitespace - f_begin := nb - for f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t') { - f_begin++ - } - - f_end := end - nb - for f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t') { - f_end-- - } - - // real code span - if rndr.mk.codespan == nil { - return 0 - } - if f_begin < f_end { - if rndr.mk.codespan(out, data[f_begin:f_end], rndr.mk.opaque) == 0 { - end = 0 - } - } else { - if rndr.mk.codespan(out, nil, rndr.mk.opaque) == 0 { - end = 0 - } - } - - return end - -} - -// '\n' preceded by two spaces -func inlineLinebreak(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - if offset < 2 || data[offset-1] != ' ' || data[offset-2] != ' ' { - return 0 - } - - // remove trailing spaces from out and render - outBytes := out.Bytes() - end := len(outBytes) - for end > 0 && outBytes[end-1] == ' ' { - end-- - } - out.Truncate(end) - - if rndr.mk.linebreak == nil { - return 0 - } - if rndr.mk.linebreak(out, rndr.mk.opaque) > 0 { - return 1 - } else { - return 0 - } - - return 0 -} - -// '[': parse a link or an image -func inlineLink(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - isImg := offset > 0 && data[offset-1] == '!' - - data = data[offset:] - - i := 1 - var title, link []byte - text_has_nl := false - - // check whether the correct renderer exists - if (isImg && rndr.mk.image == nil) || (!isImg && rndr.mk.link == nil) { - return 0 - } - - // look for the matching closing bracket - for level := 1; level > 0 && i < len(data); i++ { - switch { - case data[i] == '\n': - text_has_nl = true - - case data[i-1] == '\\': - continue - - case data[i] == '[': - level++ - - case data[i] == ']': - level-- - if level <= 0 { - i-- // compensate for extra i++ in for loop - } - } - } - - if i >= len(data) { - return 0 - } - - txt_e := i - i++ - - // skip any amount of whitespace or newline - // (this is much more lax than original markdown syntax) - for i < len(data) && isspace(data[i]) { - i++ - } - - // inline style link - switch { - case i < len(data) && data[i] == '(': - // skip initial whitespace - i++ - - for i < len(data) && isspace(data[i]) { - i++ - } - - link_b := i - - // look for link end: ' " ) - for i < len(data) { - if data[i] == '\\' { - i += 2 - } else { - if data[i] == ')' || data[i] == '\'' || data[i] == '"' { - break - } - i++ - } - } - - if i >= len(data) { - return 0 - } - link_e := i - - // look for title end if present - title_b, title_e := 0, 0 - if data[i] == '\'' || data[i] == '"' { - i++ - title_b = i - - for i < len(data) { - if data[i] == '\\' { - i += 2 - } else { - if data[i] == ')' { - break - } - i++ - } - } - - if i >= len(data) { - return 0 - } - - // skip whitespace after title - title_e = i - 1 - for title_e > title_b && isspace(data[title_e]) { - title_e-- - } - - // check for closing quote presence - if data[title_e] != '\'' && data[title_e] != '"' { - title_b, title_e = 0, 0 - link_e = i - } - } - - // remove whitespace at the end of the link - for link_e > link_b && isspace(data[link_e-1]) { - link_e-- - } - - // remove optional angle brackets around the link - if data[link_b] == '<' { - link_b++ - } - if data[link_e-1] == '>' { - link_e-- - } - - // build escaped link and title - if link_e > link_b { - link = data[link_b:link_e] - } - - if title_e > title_b { - title = data[title_b:title_e] - } - - i++ - - // reference style link - case i < len(data) && data[i] == '[': - var id []byte - - // look for the id - i++ - link_b := i - for i < len(data) && data[i] != ']' { - i++ - } - if i >= len(data) { - return 0 - } - link_e := i - - // find the reference - if link_b == link_e { - if text_has_nl { - b := bytes.NewBuffer(nil) - - for j := 1; j < txt_e; j++ { - switch { - case data[j] != '\n': - b.WriteByte(data[j]) - case data[j-1] != ' ': - b.WriteByte(' ') - } - } - - id = b.Bytes() - } else { - id = data[1:txt_e] - } - } else { - id = data[link_b:link_e] - } - - // find the reference with matching id (ids are case-insensitive) - key := string(bytes.ToLower(id)) - lr, ok := rndr.refs[key] - if !ok { - return 0 - } - - // keep link and title from reference - link = lr.link - title = lr.title - i++ - - // shortcut reference style link - default: - var id []byte - - // craft the id - if text_has_nl { - b := bytes.NewBuffer(nil) - - for j := 1; j < txt_e; j++ { - switch { - case data[j] != '\n': - b.WriteByte(data[j]) - case data[j-1] != ' ': - b.WriteByte(' ') - } - } - - id = b.Bytes() - } else { - id = data[1:txt_e] - } - - // find the reference with matching id - key := string(bytes.ToLower(id)) - lr, ok := rndr.refs[key] - if !ok { - return 0 - } - - // keep link and title from reference - link = lr.link - title = lr.title - - // rewind the whitespace - i = txt_e + 1 - } - - // build content: img alt is escaped, link content is parsed - content := bytes.NewBuffer(nil) - if txt_e > 1 { - if isImg { - content.Write(data[1:txt_e]) - } else { - parseInline(content, rndr, data[1:txt_e]) - } - } - - var u_link []byte - if len(link) > 0 { - u_link_buf := bytes.NewBuffer(nil) - unescape_text(u_link_buf, link) - u_link = u_link_buf.Bytes() - } - - // call the relevant rendering function - ret := 0 - if isImg { - outSize := out.Len() - outBytes := out.Bytes() - if outSize > 0 && outBytes[outSize-1] == '!' { - out.Truncate(outSize - 1) - } - - ret = rndr.mk.image(out, u_link, title, content.Bytes(), rndr.mk.opaque) - } else { - ret = rndr.mk.link(out, u_link, title, content.Bytes(), rndr.mk.opaque) - } - - if ret > 0 { - return i - } - return 0 -} - -// '<' when tags or autolinks are allowed -func inlineLangle(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - data = data[offset:] - altype := LINK_TYPE_NOT_AUTOLINK - end := tagLength(data, &altype) - ret := 0 - - if end > 2 { - switch { - case rndr.mk.autolink != nil && altype != LINK_TYPE_NOT_AUTOLINK: - u_link := bytes.NewBuffer(nil) - unescape_text(u_link, data[1:end+1-2]) - ret = rndr.mk.autolink(out, u_link.Bytes(), altype, rndr.mk.opaque) - case rndr.mk.rawHtmlTag != nil: - ret = rndr.mk.rawHtmlTag(out, data[:end], rndr.mk.opaque) - } - } - - if ret == 0 { - return 0 - } - return end -} - -// '\\' backslash escape -var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>") - -func inlineEscape(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - data = data[offset:] - - if len(data) > 1 { - if bytes.IndexByte(escapeChars, data[1]) < 0 { - return 0 - } - - if rndr.mk.normalText != nil { - rndr.mk.normalText(out, data[1:2], rndr.mk.opaque) - } else { - out.WriteByte(data[1]) - } - } - - return 2 -} - -// '&' escaped when it doesn't belong to an entity -// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; -func inlineEntity(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - data = data[offset:] - - end := 1 - - if end < len(data) && data[end] == '#' { - end++ - } - - for end < len(data) && isalnum(data[end]) { - end++ - } - - if end < len(data) && data[end] == ';' { - end++ // real entity - } else { - return 0 // lone '&' - } - - if rndr.mk.entity != nil { - rndr.mk.entity(out, data[:end], rndr.mk.opaque) - } else { - out.Write(data[:end]) - } - - return end -} - -func inlineAutolink(out *bytes.Buffer, rndr *render, data []byte, offset int) int { - orig_data := data - data = data[offset:] - - if offset > 0 { - if !isspace(orig_data[offset-1]) && !ispunct(orig_data[offset-1]) { - return 0 - } - } - - if !isSafeLink(data) { - return 0 - } - - link_end := 0 - for link_end < len(data) && !isspace(data[link_end]) { - link_end++ - } - - // Skip punctuation at the end of the link - if (data[link_end-1] == '.' || data[link_end-1] == ',' || data[link_end-1] == ';') && data[link_end-2] != '\\' { - link_end-- - } - - // See if the link finishes with a punctuation sign that can be closed. - var copen byte - switch data[link_end-1] { - case '"': - copen = '"' - case '\'': - copen = '\'' - case ')': - copen = '(' - case ']': - copen = '[' - case '}': - copen = '{' - default: - copen = 0 - } - - if copen != 0 { - buf_end := offset + link_end - 2 - - open_delim := 1 - - /* Try to close the final punctuation sign in this same line; - * if we managed to close it outside of the URL, that means that it's - * not part of the URL. If it closes inside the URL, that means it - * is part of the URL. - * - * Examples: - * - * foo http://www.pokemon.com/Pikachu_(Electric) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo (http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric) - * - * foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => http://www.pokemon.com/Pikachu_(Electric)) - * - * (foo http://www.pokemon.com/Pikachu_(Electric)) bar - * => foo http://www.pokemon.com/Pikachu_(Electric) - */ - - for buf_end >= 0 && orig_data[buf_end] != '\n' && open_delim != 0 { - if orig_data[buf_end] == data[link_end-1] { - open_delim++ - } - - if orig_data[buf_end] == copen { - open_delim-- - } - - buf_end-- - } - - if open_delim == 0 { - link_end-- - } - } - - if rndr.mk.autolink != nil { - u_link := bytes.NewBuffer(nil) - unescape_text(u_link, data[:link_end]) - - rndr.mk.autolink(out, u_link.Bytes(), LINK_TYPE_NORMAL, rndr.mk.opaque) - } - - return link_end -} - -var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} - -func isSafeLink(link []byte) bool { - for _, prefix := range validUris { - // TODO: handle unicode here - // case-insensitive prefix test - if len(link) > len(prefix) && !less(link[:len(prefix)], prefix) && !less(prefix, link[:len(prefix)]) && isalnum(link[len(prefix)]) { - return true - } - } - - return false -} - -// return the length of the given tag, or 0 is it's not valid -func tagLength(data []byte, autolink *int) int { - var i, j int - - // a valid tag can't be shorter than 3 chars - if len(data) < 3 { - return 0 - } - - // begins with a '<' optionally followed by '/', followed by letter or number - if data[0] != '<' { - return 0 - } - if data[1] == '/' { - i = 2 - } else { - i = 1 - } - - if !isalnum(data[i]) { - return 0 - } - - // scheme test - *autolink = LINK_TYPE_NOT_AUTOLINK - - // try to find the beggining of an URI - for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { - i++ - } - - if i > 1 && data[i] == '@' { - if j = isMailtoAutolink(data[i:]); j != 0 { - *autolink = LINK_TYPE_EMAIL - return i + j - } - } - - if i > 2 && data[i] == ':' { - *autolink = LINK_TYPE_NORMAL - i++ - } - - // complete autolink test: no whitespace or ' or " - switch { - case i >= len(data): - *autolink = LINK_TYPE_NOT_AUTOLINK - case *autolink != 0: - j = i - - for i < len(data) { - if data[i] == '\\' { - i += 2 - } else { - if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { - break - } else { - i++ - } - } - - } - - if i >= len(data) { - return 0 - } - if i > j && data[i] == '>' { - return i + 1 - } - - // one of the forbidden chars has been found - *autolink = LINK_TYPE_NOT_AUTOLINK - } - - // look for something looking like a tag end - for i < len(data) && data[i] != '>' { - i++ - } - if i >= len(data) { - return 0 - } - return i + 1 -} - -// look for the address part of a mail autolink and '>' -// this is less strict than the original markdown e-mail address matching -func isMailtoAutolink(data []byte) int { - nb := 0 - - // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' - for i := 0; i < len(data); i++ { - if isalnum(data[i]) { - continue - } - - switch data[i] { - case '@': - nb++ - - case '-', '.', '_': - break - - case '>': - if nb == 1 { - return i + 1 - } else { - return 0 - } - default: - return 0 - } - } - - return 0 -} - -// look for the next emph char, skipping other constructs -func inlineHelperFindEmphChar(data []byte, c byte) int { - i := 1 - - for i < len(data) { - for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { - i++ - } - if i >= len(data) { - return 0 - } - if data[i] == c { - return i - } - - // do not count escaped chars - if i != 0 && data[i-1] == '\\' { - i++ - continue - } - - if data[i] == '`' { - // skip a code span - tmp_i := 0 - i++ - for i < len(data) && data[i] != '`' { - if tmp_i == 0 && data[i] == c { - tmp_i = i - } - i++ - } - if i >= len(data) { - return tmp_i - } - i++ - } else { - if data[i] == '[' { - // skip a link - tmp_i := 0 - i++ - for i < len(data) && data[i] != ']' { - if tmp_i == 0 && data[i] == c { - tmp_i = i - } - i++ - } - i++ - for i < len(data) && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n') { - i++ - } - if i >= len(data) { - return tmp_i - } - if data[i] != '[' && data[i] != '(' { // not a link - if tmp_i > 0 { - return tmp_i - } else { - continue - } - } - cc := data[i] - i++ - for i < len(data) && data[i] != cc { - if tmp_i == 0 && data[i] == c { - tmp_i = i - } - i++ - } - if i >= len(data) { - return tmp_i - } - i++ - } - } - } - return 0 -} - -func inlineHelperEmph1(out *bytes.Buffer, rndr *render, data []byte, c byte) int { - i := 0 - - if rndr.mk.emphasis == nil { - return 0 - } - - // skip one symbol if coming from emph3 - if len(data) > 1 && data[0] == c && data[1] == c { - i = 1 - } - - for i < len(data) { - length := inlineHelperFindEmphChar(data[i:], c) - if length == 0 { - return 0 - } - i += length - if i >= len(data) { - return 0 - } - - if i+1 < len(data) && data[i+1] == c { - i++ - continue - } - - if data[i] == c && !isspace(data[i-1]) { - - if rndr.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 { - if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { - continue - } - } - - work := bytes.NewBuffer(nil) - parseInline(work, rndr, data[:i]) - r := rndr.mk.emphasis(out, work.Bytes(), rndr.mk.opaque) - if r > 0 { - return i + 1 - } else { - return 0 - } - } - } - - return 0 -} - -func inlineHelperEmph2(out *bytes.Buffer, rndr *render, data []byte, c byte) int { - render_method := rndr.mk.doubleEmphasis - if c == '~' { - render_method = rndr.mk.strikethrough - } - - if render_method == nil { - return 0 - } - - i := 0 - - for i < len(data) { - length := inlineHelperFindEmphChar(data[i:], c) - if length == 0 { - return 0 - } - i += length - - if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { - work := bytes.NewBuffer(nil) - parseInline(work, rndr, data[:i]) - r := render_method(out, work.Bytes(), rndr.mk.opaque) - if r > 0 { - return i + 2 - } else { - return 0 - } - } - i++ - } - return 0 -} - -func inlineHelperEmph3(out *bytes.Buffer, rndr *render, data []byte, offset int, c byte) int { - i := 0 - orig_data := data - data = data[offset:] - - for i < len(data) { - length := inlineHelperFindEmphChar(data[i:], c) - if length == 0 { - return 0 - } - i += length - - // skip whitespace preceded symbols - if data[i] != c || isspace(data[i-1]) { - continue - } - - switch { - case (i+2 < len(data) && data[i+1] == c && data[i+2] == c && rndr.mk.tripleEmphasis != nil): - // triple symbol found - work := bytes.NewBuffer(nil) - - parseInline(work, rndr, data[:i]) - r := rndr.mk.tripleEmphasis(out, work.Bytes(), rndr.mk.opaque) - if r > 0 { - return i + 3 - } else { - return 0 - } - case (i+1 < len(data) && data[i+1] == c): - // double symbol found, hand over to emph1 - length = inlineHelperEmph1(out, rndr, orig_data[offset-2:], c) - if length == 0 { - return 0 - } else { - return length - 2 - } - default: - // single symbol found, hand over to emph2 - length = inlineHelperEmph2(out, rndr, orig_data[offset-1:], c) - if length == 0 { - return 0 - } else { - return length - 1 - } - } - } - return 0 -} - - -// -// Block parsing -// Functions to parse block-level elements. -// - -// parse block-level data -func parseBlock(out *bytes.Buffer, rndr *render, data []byte) { - if rndr.nesting >= rndr.maxNesting { - return - } - rndr.nesting++ - - for len(data) > 0 { - if isPrefixHeader(rndr, data) { - data = data[blockPrefixHeader(out, rndr, data):] - continue - } - if data[0] == '<' && rndr.mk.blockhtml != nil { - if i := blockHtml(out, rndr, data, true); i > 0 { - data = data[i:] - continue - } - } - if i := isEmpty(data); i > 0 { - data = data[i:] - continue - } - if isHrule(data) { - if rndr.mk.hrule != nil { - rndr.mk.hrule(out, rndr.mk.opaque) - } - var i int - for i = 0; i < len(data) && data[i] != '\n'; i++ { - } - data = data[i:] - continue - } - if rndr.flags&EXTENSION_FENCED_CODE != 0 { - if i := blockFencedCode(out, rndr, data); i > 0 { - data = data[i:] - continue - } - } - if rndr.flags&EXTENSION_TABLES != 0 { - if i := blockTable(out, rndr, data); i > 0 { - data = data[i:] - continue - } - } - if blockQuotePrefix(data) > 0 { - data = data[blockQuote(out, rndr, data):] - continue - } - if blockCodePrefix(data) > 0 { - data = data[blockCode(out, rndr, data):] - continue - } - if blockUliPrefix(data) > 0 { - data = data[blockList(out, rndr, data, 0):] - continue - } - if blockOliPrefix(data) > 0 { - data = data[blockList(out, rndr, data, LIST_TYPE_ORDERED):] - continue - } - - data = data[blockParagraph(out, rndr, data):] - } - - rndr.nesting-- -} - -func isPrefixHeader(rndr *render, data []byte) bool { - if data[0] != '#' { - return false - } - - if rndr.flags&EXTENSION_SPACE_HEADERS != 0 { - level := 0 - for level < len(data) && level < 6 && data[level] == '#' { - level++ - } - if level < len(data) && data[level] != ' ' && data[level] != '\t' { - return false - } - } - return true -} - -func blockPrefixHeader(out *bytes.Buffer, rndr *render, data []byte) int { - level := 0 - for level < len(data) && level < 6 && data[level] == '#' { - level++ - } - i, end := 0, 0 - for i = level; i < len(data) && (data[i] == ' ' || data[i] == '\t'); i++ { - } - for end = i; end < len(data) && data[end] != '\n'; end++ { - } - skip := end - for end > 0 && data[end-1] == '#' { - end-- - } - for end > 0 && (data[end-1] == ' ' || data[end-1] == '\t') { - end-- - } - if end > i { - work := bytes.NewBuffer(nil) - parseInline(work, rndr, data[i:end]) - if rndr.mk.header != nil { - rndr.mk.header(out, work.Bytes(), level, rndr.mk.opaque) - } - } - return skip -} - -func isUnderlinedHeader(data []byte) int { - i := 0 - - // test of level 1 header - if data[i] == '=' { - for i = 1; i < len(data) && data[i] == '='; i++ { - } - for i < len(data) && (data[i] == ' ' || data[i] == '\t') { - i++ - } - if i >= len(data) || data[i] == '\n' { - return 1 - } else { - return 0 - } - } - - // test of level 2 header - if data[i] == '-' { - for i = 1; i < len(data) && data[i] == '-'; i++ { - } - for i < len(data) && (data[i] == ' ' || data[i] == '\t') { - i++ - } - if i >= len(data) || data[i] == '\n' { - return 2 - } else { - return 0 - } - } - - return 0 -} - -func blockHtml(out *bytes.Buffer, rndr *render, data []byte, do_render bool) int { - var i, j int - - // identify the opening tag - if len(data) < 2 || data[0] != '<' { - return 0 - } - curtag, tagfound := blockHtmlFindTag(data[1:]) - - // handle special cases - if !tagfound { - - // HTML comment, laxist form - if len(data) > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-' { - i = 5 - - for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { - i++ - } - i++ - - if i < len(data) { - j = isEmpty(data[i:]) - } - - if j > 0 { - size := i + j - if do_render && rndr.mk.blockhtml != nil { - rndr.mk.blockhtml(out, data[:size], rndr.mk.opaque) - } - return size - } - } - - // HR, which is the only self-closing block tag considered - if len(data) > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R') { - i = 3 - for i < len(data) && data[i] != '>' { - i++ - } - - if i+1 < len(data) { - i++ - j = isEmpty(data[i:]) - if j > 0 { - size := i + j - if do_render && rndr.mk.blockhtml != nil { - rndr.mk.blockhtml(out, data[:size], rndr.mk.opaque) - } - return size - } - } - } - - // no special case recognized - return 0 - } - - // look for an unindented matching closing tag - // followed by a blank line - i = 1 - found := false - - // if not found, try a second pass looking for indented match - // but not if tag is "ins" or "del" (following original Markdown.pl) - if curtag != "ins" && curtag != "del" { - i = 1 - for i < len(data) { - i++ - for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { - i++ - } - - if i+2+len(curtag) >= len(data) { - break - } - - j = blockHtmlFindEnd(curtag, rndr, data[i-1:]) - - if j > 0 { - i += j - 1 - found = true - break - } - } - } - - if !found { - return 0 - } - - // the end of the block has been found - if do_render && rndr.mk.blockhtml != nil { - rndr.mk.blockhtml(out, data[:i], rndr.mk.opaque) - } - - return i -} - -func blockHtmlFindTag(data []byte) (string, bool) { - i := 0 - for i < len(data) && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z')) { - i++ - } - if i >= len(data) { - return "", false - } - key := string(data[:i]) - if block_tags[key] { - return key, true - } - return "", false -} - -func blockHtmlFindEnd(tag string, rndr *render, data []byte) int { - // assume data[0] == '<' && data[1] == '/' already tested - - // check if tag is a match - if len(tag)+3 >= len(data) || bytes.Compare(data[2:2+len(tag)], []byte(tag)) != 0 || data[len(tag)+2] != '>' { - return 0 - } - - // check white lines - i := len(tag) + 3 - w := 0 - if i < len(data) { - if w = isEmpty(data[i:]); w == 0 { - return 0 // non-blank after tag - } - } - i += w - w = 0 - - if rndr.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { - if i < len(data) { - w = isEmpty(data[i:]) - } - } else { - if i < len(data) { - if w = isEmpty(data[i:]); w == 0 { - return 0 // non-blank line after tag line - } - } - } - - return i + w -} - -func isEmpty(data []byte) int { - var i int - for i = 0; i < len(data) && data[i] != '\n'; i++ { - if data[i] != ' ' && data[i] != '\t' { - return 0 - } - } - return i + 1 -} - -func isHrule(data []byte) bool { - // skip initial spaces - if len(data) < 3 { - return false - } - i := 0 - if data[0] == ' ' { - i++ - if data[1] == ' ' { - i++ - if data[2] == ' ' { - i++ - } - } - } - - // look at the hrule char - if i+2 >= len(data) || (data[i] != '*' && data[i] != '-' && data[i] != '_') { - return false - } - c := data[i] - - // the whole line must be the char or whitespace - n := 0 - for i < len(data) && data[i] != '\n' { - switch { - case data[i] == c: - n++ - case data[i] != ' ' && data[i] != '\t': - return false - } - i++ - } - - return n >= 3 -} - -func isFencedCode(data []byte, syntax **string) int { - i, n := 0, 0 - - // skip initial spaces - if len(data) < 3 { - return 0 - } - if data[0] == ' ' { - i++ - if data[1] == ' ' { - i++ - if data[2] == ' ' { - i++ - } - } - } - - // look at the hrule char - if i+2 >= len(data) || !(data[i] == '~' || data[i] == '`') { - return 0 - } - - c := data[i] - - // the whole line must be the char or whitespace - for i < len(data) && data[i] == c { - n++ - i++ - } - - if n < 3 { - return 0 - } - - if syntax != nil { - syn := 0 - - for i < len(data) && (data[i] == ' ' || data[i] == '\t') { - i++ - } - - syntax_start := i - - if i < len(data) && data[i] == '{' { - i++ - syntax_start++ - - for i < len(data) && data[i] != '}' && data[i] != '\n' { - syn++ - i++ - } - - if i == len(data) || data[i] != '}' { - return 0 - } - - // string all whitespace at the beginning and the end - // of the {} block - for syn > 0 && isspace(data[syntax_start]) { - syntax_start++ - syn-- - } - - for syn > 0 && isspace(data[syntax_start+syn-1]) { - syn-- - } - - i++ - } else { - for i < len(data) && !isspace(data[i]) { - syn++ - i++ - } - } - - language := string(data[syntax_start : syntax_start+syn]) - *syntax = &language - } - - for i < len(data) && data[i] != '\n' { - if !isspace(data[i]) { - return 0 - } - i++ - } - - return i + 1 -} - -func blockFencedCode(out *bytes.Buffer, rndr *render, data []byte) int { - var lang *string - beg := isFencedCode(data, &lang) - if beg == 0 { - return 0 - } - - work := bytes.NewBuffer(nil) - - for beg < len(data) { - fence_end := isFencedCode(data[beg:], nil) - if fence_end != 0 { - beg += fence_end - break - } - - var end int - for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { - } - - if beg < end { - // verbatim copy to the working buffer, escaping entities - if isEmpty(data[beg:]) > 0 { - work.WriteByte('\n') - } else { - work.Write(data[beg:end]) - } - } - beg = end - } - - if work.Len() > 0 && work.Bytes()[work.Len()-1] != '\n' { - work.WriteByte('\n') - } - - if rndr.mk.blockcode != nil { - syntax := "" - if lang != nil { - syntax = *lang - } - - rndr.mk.blockcode(out, work.Bytes(), syntax, rndr.mk.opaque) - } - - return beg -} - -func blockTable(out *bytes.Buffer, rndr *render, data []byte) int { - header_work := bytes.NewBuffer(nil) - i, columns, col_data := blockTableHeader(header_work, rndr, data) - if i > 0 { - body_work := bytes.NewBuffer(nil) - - for i < len(data) { - pipes, row_start := 0, i - for ; i < len(data) && data[i] != '\n'; i++ { - if data[i] == '|' { - pipes++ - } - } - - if pipes == 0 || i == len(data) { - i = row_start - break - } - - blockTableRow(body_work, rndr, data[row_start:i], columns, col_data) - i++ - } - - if rndr.mk.table != nil { - rndr.mk.table(out, header_work.Bytes(), body_work.Bytes(), rndr.mk.opaque) - } - } - - return i -} - -func blockTableHeader(out *bytes.Buffer, rndr *render, data []byte) (size int, columns int, column_data []int) { - i, pipes := 0, 0 - column_data = []int{} - for i = 0; i < len(data) && data[i] != '\n'; i++ { - if data[i] == '|' { - pipes++ - } - } - - if i == len(data) || pipes == 0 { - return 0, 0, column_data - } - - header_end := i - - if data[0] == '|' { - pipes-- - } - - if i > 2 && data[i-1] == '|' { - pipes-- - } - - columns = pipes + 1 - column_data = make([]int, columns) - - // parse the header underline - i++ - if i < len(data) && data[i] == '|' { - i++ - } - - under_end := i - for under_end < len(data) && data[under_end] != '\n' { - under_end++ - } - - col := 0 - for ; col < columns && i < under_end; col++ { - dashes := 0 - - for i < under_end && (data[i] == ' ' || data[i] == '\t') { - i++ - } - - if data[i] == ':' { - i++ - column_data[col] |= TABLE_ALIGNMENT_LEFT - dashes++ - } - - for i < under_end && data[i] == '-' { - i++ - dashes++ - } - - if i < under_end && data[i] == ':' { - i++ - column_data[col] |= TABLE_ALIGNMENT_RIGHT - dashes++ - } - - for i < under_end && (data[i] == ' ' || data[i] == '\t') { - i++ - } - - if i < under_end && data[i] != '|' { - break - } - - if dashes < 3 { - break - } - - i++ - } - - if col < columns { - return 0, 0, column_data - } - - blockTableRow(out, rndr, data[:header_end], columns, column_data) - size = under_end + 1 - return -} - -func blockTableRow(out *bytes.Buffer, rndr *render, data []byte, columns int, col_data []int) { - i, col := 0, 0 - row_work := bytes.NewBuffer(nil) - - if i < len(data) && data[i] == '|' { - i++ - } - - for col = 0; col < columns && i < len(data); col++ { - for i < len(data) && isspace(data[i]) { - i++ - } - - cell_start := i - - for i < len(data) && data[i] != '|' { - i++ - } - - cell_end := i - 1 - - for cell_end > cell_start && isspace(data[cell_end]) { - cell_end-- - } - - cell_work := bytes.NewBuffer(nil) - parseInline(cell_work, rndr, data[cell_start:cell_end+1]) - - if rndr.mk.tableCell != nil { - cdata := 0 - if col < len(col_data) { - cdata = col_data[col] - } - rndr.mk.tableCell(row_work, cell_work.Bytes(), cdata, rndr.mk.opaque) - } - - i++ - } - - for ; col < columns; col++ { - empty_cell := []byte{} - if rndr.mk.tableCell != nil { - cdata := 0 - if col < len(col_data) { - cdata = col_data[col] - } - rndr.mk.tableCell(row_work, empty_cell, cdata, rndr.mk.opaque) - } - } - - if rndr.mk.tableRow != nil { - rndr.mk.tableRow(out, row_work.Bytes(), rndr.mk.opaque) - } -} - -// returns blockquote prefix length -func blockQuotePrefix(data []byte) int { - i := 0 - for i < len(data) && i < 3 && data[i] == ' ' { - i++ - } - if i < len(data) && data[i] == '>' { - if i+1 < len(data) && (data[i+1] == ' ' || data[i+1] == '\t') { - return i + 2 - } - return i + 1 - } - return 0 -} - -// parse a blockquote fragment -func blockQuote(out *bytes.Buffer, rndr *render, data []byte) int { - block := bytes.NewBuffer(nil) - work := bytes.NewBuffer(nil) - beg, end := 0, 0 - for beg < len(data) { - for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { - } - - if pre := blockQuotePrefix(data[beg:]); pre > 0 { - beg += pre // skip prefix - } else { - // empty line followed by non-quote line - if isEmpty(data[beg:]) > 0 && (end >= len(data) || (blockQuotePrefix(data[end:]) == 0 && isEmpty(data[end:]) == 0)) { - break - } - } - - if beg < end { // copy into the in-place working buffer - work.Write(data[beg:end]) - } - beg = end - } - - parseBlock(block, rndr, work.Bytes()) - if rndr.mk.blockquote != nil { - rndr.mk.blockquote(out, block.Bytes(), rndr.mk.opaque) - } - return end -} - -// returns prefix length for block code -func blockCodePrefix(data []byte) int { - if len(data) > 0 && data[0] == '\t' { - return 1 - } - if len(data) > 3 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { - return 4 - } - return 0 -} - -func blockCode(out *bytes.Buffer, rndr *render, data []byte) int { - work := bytes.NewBuffer(nil) - - beg, end := 0, 0 - for beg < len(data) { - for end = beg + 1; end < len(data) && data[end-1] != '\n'; end++ { - } - - if pre := blockCodePrefix(data[beg:end]); pre > 0 { - beg += pre - } else { - if isEmpty(data[beg:end]) == 0 { - // non-empty non-prefixed line breaks the pre - break - } - } - - if beg < end { - // verbatim copy to the working buffer, escaping entities - if isEmpty(data[beg:end]) > 0 { - work.WriteByte('\n') - } else { - work.Write(data[beg:end]) - } - } - beg = end - } - - // trim all the \n off the end of work - workbytes := work.Bytes() - n := 0 - for len(workbytes) > n && workbytes[len(workbytes)-n-1] == '\n' { - n++ - } - if n > 0 { - work = bytes.NewBuffer(workbytes[:len(workbytes)-n]) - } - - work.WriteByte('\n') - - if rndr.mk.blockcode != nil { - rndr.mk.blockcode(out, work.Bytes(), "", rndr.mk.opaque) - } - - return beg -} - -// returns unordered list item prefix -func blockUliPrefix(data []byte) int { - i := 0 - for i < len(data) && i < 3 && data[i] == ' ' { - i++ - } - if i+1 >= len(data) || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i+1] != ' ' && data[i+1] != '\t') { - return 0 - } - return i + 2 -} - -// returns ordered list item prefix -func blockOliPrefix(data []byte) int { - i := 0 - for i < len(data) && i < 3 && data[i] == ' ' { - i++ - } - if i >= len(data) || data[i] < '0' || data[i] > '9' { - return 0 - } - for i < len(data) && data[i] >= '0' && data[i] <= '9' { - i++ - } - if i+1 >= len(data) || data[i] != '.' || (data[i+1] != ' ' && data[i+1] != '\t') { - return 0 - } - return i + 2 -} - -// parse ordered or unordered list block -func blockList(out *bytes.Buffer, rndr *render, data []byte, flags int) int { - work := bytes.NewBuffer(nil) - - i, j := 0, 0 - for i < len(data) { - j = blockListItem(work, rndr, data[i:], &flags) - i += j - - if j == 0 || flags&LIST_ITEM_END_OF_LIST != 0 { - break - } - } - - if rndr.mk.list != nil { - rndr.mk.list(out, work.Bytes(), flags, rndr.mk.opaque) - } - return i -} - -// parse a single list item -// assumes initial prefix is already removed -func blockListItem(out *bytes.Buffer, rndr *render, data []byte, flags *int) int { - // keep track of the first indentation prefix - beg, end, pre, sublist, orgpre, i := 0, 0, 0, 0, 0, 0 - - for orgpre < 3 && orgpre < len(data) && data[orgpre] == ' ' { - orgpre++ - } - - beg = blockUliPrefix(data) - if beg == 0 { - beg = blockOliPrefix(data) - } - if beg == 0 { - return 0 - } - - // skip leading whitespace on first line - for beg < len(data) && data[beg] == ' ' { - beg++ - } - - // skip to the beginning of the following line - end = beg - for end < len(data) && data[end-1] != '\n' { - end++ - } - - // get working buffers - work := bytes.NewBuffer(nil) - inter := bytes.NewBuffer(nil) - - // put the first line into the working buffer - work.Write(data[beg:end]) - beg = end - - // process the following lines - in_empty, has_inside_empty := false, false - for beg < len(data) { - end++ - - for end < len(data) && data[end-1] != '\n' { - end++ - } - - // process an empty line - if isEmpty(data[beg:end]) > 0 { - in_empty = true - beg = end - continue - } - - // calculate the indentation - i = 0 - for i < 4 && beg+i < end && data[beg+i] == ' ' { - i++ - } - - pre = i - if data[beg] == '\t' { - i = 1 - pre = 8 - } - - // check for a new item - chunk := data[beg+i : end] - if (blockUliPrefix(chunk) > 0 && !isHrule(chunk)) || blockOliPrefix(chunk) > 0 { - if in_empty { - has_inside_empty = true - } - - if pre == orgpre { // the following item must have the same indentation - break - } - - if sublist == 0 { - sublist = work.Len() - } - } else { - // only join indented stuff after empty lines - if in_empty && i < 4 && data[beg] != '\t' { - *flags |= LIST_ITEM_END_OF_LIST - break - } else { - if in_empty { - work.WriteByte('\n') - has_inside_empty = true - } - } - } - - in_empty = false - - // add the line into the working buffer without prefix - work.Write(data[beg+i : end]) - beg = end - } - - // render li contents - if has_inside_empty { - *flags |= LIST_ITEM_CONTAINS_BLOCK - } - - workbytes := work.Bytes() - if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 { - // intermediate render of block li - if sublist > 0 && sublist < len(workbytes) { - parseBlock(inter, rndr, workbytes[:sublist]) - parseBlock(inter, rndr, workbytes[sublist:]) - } else { - parseBlock(inter, rndr, workbytes) - } - } else { - // intermediate render of inline li - if sublist > 0 && sublist < len(workbytes) { - parseInline(inter, rndr, workbytes[:sublist]) - parseBlock(inter, rndr, workbytes[sublist:]) - } else { - parseInline(inter, rndr, workbytes) - } - } - - // render li itself - if rndr.mk.listitem != nil { - rndr.mk.listitem(out, inter.Bytes(), *flags, rndr.mk.opaque) - } - - return beg -} - -func blockParagraph(out *bytes.Buffer, rndr *render, data []byte) int { - i, end, level := 0, 0, 0 - - for i < len(data) { - for end = i + 1; end < len(data) && data[end-1] != '\n'; end++ { - } - - if isEmpty(data[i:]) > 0 { - break - } - if level = isUnderlinedHeader(data[i:]); level > 0 { - break - } - - if rndr.flags&EXTENSION_LAX_HTML_BLOCKS != 0 { - if data[i] == '<' && rndr.mk.blockhtml != nil && blockHtml(out, rndr, data[i:], false) > 0 { - end = i - break - } - } - - if isPrefixHeader(rndr, data[i:]) || isHrule(data[i:]) { - end = i - break - } - - i = end - } - - work := data - size := i - for size > 0 && work[size-1] == '\n' { - size-- - } - - if level == 0 { - tmp := bytes.NewBuffer(nil) - parseInline(tmp, rndr, work[:size]) - if rndr.mk.paragraph != nil { - rndr.mk.paragraph(out, tmp.Bytes(), rndr.mk.opaque) - } - } else { - if size > 0 { - beg := 0 - i = size - size-- - - for size > 0 && work[size] != '\n' { - size-- - } - - beg = size + 1 - for size > 0 && work[size-1] == '\n' { - size-- - } - - if size > 0 { - tmp := bytes.NewBuffer(nil) - parseInline(tmp, rndr, work[:size]) - if rndr.mk.paragraph != nil { - rndr.mk.paragraph(out, tmp.Bytes(), rndr.mk.opaque) - } - - work = work[beg:] - size = i - beg - } else { - size = i - } - } - - header_work := bytes.NewBuffer(nil) - parseInline(header_work, rndr, work[:size]) - - if rndr.mk.header != nil { - rndr.mk.header(out, header_work.Bytes(), level, rndr.mk.opaque) - } - } - - return end -} - - // // Link references // diff --git a/smartypants.go b/smartypants.go index 7339bdb..f47ec2a 100644 --- a/smartypants.go +++ b/smartypants.go @@ -16,12 +16,12 @@ import ( "bytes" ) -type smartypants_data struct { - in_squote bool - in_dquote bool +type smartypantsData struct { + inSingleQuote bool + inDoubleQuote bool } -func word_boundary(c byte) bool { +func wordBoundary(c byte) bool { return c == 0 || isspace(c) || ispunct(c) } @@ -36,26 +36,26 @@ func isdigit(c byte) bool { return c >= '0' && c <= '9' } -func smartypants_quotes(ob *bytes.Buffer, previous_char byte, next_char byte, quote byte, is_open *bool) bool { +func smartQuotesHelper(ob *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool) bool { switch { // edge of the buffer is likely to be a tag that we don't get to see, // so we assume there is text there - case word_boundary(previous_char) && previous_char != 0 && next_char == 0: - *is_open = true - case previous_char == 0 && word_boundary(next_char) && next_char != 0: - *is_open = false - case word_boundary(previous_char) && !word_boundary(next_char): - *is_open = true - case !word_boundary(previous_char) && word_boundary(next_char): - *is_open = false - case !word_boundary(previous_char) && !word_boundary(next_char): - *is_open = true + case wordBoundary(previousChar) && previousChar != 0 && nextChar == 0: + *isOpen = true + case previousChar == 0 && wordBoundary(nextChar) && nextChar != 0: + *isOpen = false + case wordBoundary(previousChar) && !wordBoundary(nextChar): + *isOpen = true + case !wordBoundary(previousChar) && wordBoundary(nextChar): + *isOpen = false + case !wordBoundary(previousChar) && !wordBoundary(nextChar): + *isOpen = true default: - *is_open = !*is_open + *isOpen = !*isOpen } ob.WriteByte('&') - if *is_open { + if *isOpen { ob.WriteByte('l') } else { ob.WriteByte('r') @@ -65,21 +65,21 @@ func smartypants_quotes(ob *bytes.Buffer, previous_char byte, next_char byte, qu return true } -func smartypants_cb__squote(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartSquote(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if len(text) >= 2 { t1 := tolower(text[1]) if t1 == '\'' { - next_char := byte(0) + nextChar := byte(0) if len(text) >= 3 { - next_char = text[2] + nextChar = text[2] } - if smartypants_quotes(ob, previous_char, next_char, 'd', &smrt.in_dquote) { + if smartQuotesHelper(ob, previousChar, nextChar, 'd', &smrt.inDoubleQuote) { return 1 } } - if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || word_boundary(text[2])) { + if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) { ob.WriteString("’") return 0 } @@ -87,18 +87,18 @@ func smartypants_cb__squote(ob *bytes.Buffer, smrt *smartypants_data, previous_c if len(text) >= 3 { t2 := tolower(text[2]) - if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) && (len(text) < 4 || word_boundary(text[3])) { + if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) && (len(text) < 4 || wordBoundary(text[3])) { ob.WriteString("’") return 0 } } } - next_char := byte(0) + nextChar := byte(0) if len(text) > 1 { - next_char = text[1] + nextChar = text[1] } - if smartypants_quotes(ob, previous_char, next_char, 's', &smrt.in_squote) { + if smartQuotesHelper(ob, previousChar, nextChar, 's', &smrt.inSingleQuote) { return 0 } @@ -106,7 +106,7 @@ func smartypants_cb__squote(ob *bytes.Buffer, smrt *smartypants_data, previous_c return 0 } -func smartypants_cb__parens(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartParens(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if len(text) >= 3 { t1 := tolower(text[1]) t2 := tolower(text[2]) @@ -131,14 +131,14 @@ func smartypants_cb__parens(ob *bytes.Buffer, smrt *smartypants_data, previous_c return 0 } -func smartypants_cb__dash(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartDash(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if len(text) >= 2 { if text[1] == '-' { ob.WriteString("—") return 1 } - if word_boundary(previous_char) && word_boundary(text[1]) { + if wordBoundary(previousChar) && wordBoundary(text[1]) { ob.WriteString("–") return 0 } @@ -148,7 +148,7 @@ func smartypants_cb__dash(ob *bytes.Buffer, smrt *smartypants_data, previous_cha return 0 } -func smartypants_cb__dash_latex(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartDashLatex(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if len(text) >= 3 && text[1] == '-' && text[2] == '-' { ob.WriteString("—") return 2 @@ -162,13 +162,13 @@ func smartypants_cb__dash_latex(ob *bytes.Buffer, smrt *smartypants_data, previo return 0 } -func smartypants_cb__amp(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartAmp(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if bytes.HasPrefix(text, []byte(""")) { - next_char := byte(0) + nextChar := byte(0) if len(text) >= 7 { - next_char = text[6] + nextChar = text[6] } - if smartypants_quotes(ob, previous_char, next_char, 'd', &smrt.in_dquote) { + if smartQuotesHelper(ob, previousChar, nextChar, 'd', &smrt.inDoubleQuote) { return 5 } } @@ -181,7 +181,7 @@ func smartypants_cb__amp(ob *bytes.Buffer, smrt *smartypants_data, previous_char return 0 } -func smartypants_cb__period(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartPeriod(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if len(text) >= 3 && text[1] == '.' && text[2] == '.' { ob.WriteString("…") return 2 @@ -196,13 +196,13 @@ func smartypants_cb__period(ob *bytes.Buffer, smrt *smartypants_data, previous_c return 0 } -func smartypants_cb__backtick(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartBacktick(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { if len(text) >= 2 && text[1] == '`' { - next_char := byte(0) + nextChar := byte(0) if len(text) >= 3 { - next_char = text[2] + nextChar = text[2] } - if smartypants_quotes(ob, previous_char, next_char, 'd', &smrt.in_dquote) { + if smartQuotesHelper(ob, previousChar, nextChar, 'd', &smrt.inDoubleQuote) { return 1 } } @@ -210,8 +210,8 @@ func smartypants_cb__backtick(ob *bytes.Buffer, smrt *smartypants_data, previous return 0 } -func smartypants_cb__number_generic(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { - if word_boundary(previous_char) && len(text) >= 3 { +func smartNumberGeneric(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if wordBoundary(previousChar) && len(text) >= 3 { // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b num_end := 0 for len(text) > num_end && isdigit(text[num_end]) { @@ -233,7 +233,7 @@ func smartypants_cb__number_generic(ob *bytes.Buffer, smrt *smartypants_data, pr ob.WriteByte(text[0]) return 0 } - if len(text) == den_end || word_boundary(text[den_end]) { + if len(text) == den_end || wordBoundary(text[den_end]) { ob.WriteString("") ob.Write(text[:num_end]) ob.WriteString("") @@ -247,24 +247,24 @@ func smartypants_cb__number_generic(ob *bytes.Buffer, smrt *smartypants_data, pr return 0 } -func smartypants_cb__number(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { - if word_boundary(previous_char) && len(text) >= 3 { +func smartNumber(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + if wordBoundary(previousChar) && len(text) >= 3 { if text[0] == '1' && text[1] == '/' && text[2] == '2' { - if len(text) < 4 || word_boundary(text[3]) { + if len(text) < 4 || wordBoundary(text[3]) { ob.WriteString("½") return 2 } } if text[0] == '1' && text[1] == '/' && text[2] == '4' { - if len(text) < 4 || word_boundary(text[3]) || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') { + if len(text) < 4 || wordBoundary(text[3]) || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') { ob.WriteString("¼") return 2 } } if text[0] == '3' && text[1] == '/' && text[2] == '4' { - if len(text) < 4 || word_boundary(text[3]) || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') { + if len(text) < 4 || wordBoundary(text[3]) || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') { ob.WriteString("¾") return 2 } @@ -275,19 +275,19 @@ func smartypants_cb__number(ob *bytes.Buffer, smrt *smartypants_data, previous_c return 0 } -func smartypants_cb__dquote(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { - next_char := byte(0) +func smartDquote(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { + nextChar := byte(0) if len(text) > 1 { - next_char = text[1] + nextChar = text[1] } - if !smartypants_quotes(ob, previous_char, next_char, 'd', &smrt.in_dquote) { + if !smartQuotesHelper(ob, previousChar, nextChar, 'd', &smrt.inDoubleQuote) { ob.WriteString(""") } return 0 } -func smartypants_cb__ltag(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int { +func smartLtag(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int { i := 0 for i < len(text) && text[i] != '>' { @@ -298,42 +298,42 @@ func smartypants_cb__ltag(ob *bytes.Buffer, smrt *smartypants_data, previous_cha return i } -type smartypants_cb func(ob *bytes.Buffer, smrt *smartypants_data, previous_char byte, text []byte) int +type smartCallback func(ob *bytes.Buffer, smrt *smartypantsData, previousChar byte, text []byte) int -type SmartypantsRenderer [256]smartypants_cb +type SmartypantsRenderer [256]smartCallback func Smartypants(flags int) *SmartypantsRenderer { r := new(SmartypantsRenderer) - r['"'] = smartypants_cb__dquote - r['&'] = smartypants_cb__amp - r['\''] = smartypants_cb__squote - r['('] = smartypants_cb__parens + r['"'] = smartDquote + r['&'] = smartAmp + r['\''] = smartSquote + r['('] = smartParens if flags&HTML_SMARTYPANTS_LATEX_DASHES == 0 { - r['-'] = smartypants_cb__dash + r['-'] = smartDash } else { - r['-'] = smartypants_cb__dash_latex + r['-'] = smartDashLatex } - r['.'] = smartypants_cb__period + r['.'] = smartPeriod if flags&HTML_SMARTYPANTS_FRACTIONS == 0 { - r['1'] = smartypants_cb__number - r['3'] = smartypants_cb__number + r['1'] = smartNumber + r['3'] = smartNumber } else { for ch := '1'; ch <= '9'; ch++ { - r[ch] = smartypants_cb__number_generic + r[ch] = smartNumberGeneric } } - r['<'] = smartypants_cb__ltag - r['`'] = smartypants_cb__backtick + r['<'] = smartLtag + r['`'] = smartBacktick return r } -func rndr_smartypants(ob *bytes.Buffer, text []byte, opaque interface{}) { +func htmlSmartypants(ob *bytes.Buffer, text []byte, opaque interface{}) { options := opaque.(*htmlOptions) - smrt := smartypants_data{false, false} + smrt := smartypantsData{false, false} // first do normal entity escaping escaped := bytes.NewBuffer(nil) - attr_escape(escaped, text) + attrEscape(escaped, text) text = escaped.Bytes() mark := 0 @@ -343,11 +343,11 @@ func rndr_smartypants(ob *bytes.Buffer, text []byte, opaque interface{}) { ob.Write(text[mark:i]) } - previous_char := byte(0) + previousChar := byte(0) if i > 0 { - previous_char = text[i-1] + previousChar = text[i-1] } - i += action(ob, &smrt, previous_char, text[i:]) + i += action(ob, &smrt, previousChar, text[i:]) mark = i + 1 } }