First attempt at supporting Pandoc-style footnotes. The existing tests have not broken but the new functionality does not work yet.

2024-03-22 13:40:34 +08:00 · 2013-06-25 01:18:47 +00:00 · 2013-06-25 01:18:47 +00:00 · be082a1ef2
commit be082a1ef2
parent 2336fd3109
6 changed files with 369 additions and 33 deletions
--- a/block.go
+++ b/block.go
@ -1101,8 +1101,9 @@ gatherlines:
 		line = i
 	}

-	// render the contents of the list item
 	rawBytes := raw.Bytes()
+
+	// render the contents of the list item
 	var cooked bytes.Buffer
 	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 {
 		// intermediate render of block li
--- a/html.go
+++ b/html.go
@ -322,6 +322,20 @@ func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) {
 	out.WriteString("</td>")
 }

+func (options *Html) Footnotes(out *bytes.Buffer, p *parser) {
+	out.WriteString("<div class=\"footnotes\">\n")
+	options.HRule(out)
+	options.List(out, func() bool {
+		for _, ref := range p.notes {
+			out.WriteString("<li>\n")
+			out.Write(ref.title)
+			out.WriteString("</li>\n")
+		}
+		return true
+	}, LIST_TYPE_ORDERED)
+	out.WriteString("</div>\n")
+}
+
 func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) {
 	marker := out.Len()
 	doubleSpace(out)
@ -501,6 +515,17 @@ func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) {
 	out.WriteString("</del>")
 }

+func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
+	slug := slugify(ref)
+	out.WriteString(`<sup class="footnote-ref" id="fnref:`)
+	out.Write(slug)
+	out.WriteString(`"><a rel="footnote" href="#fn:`)
+	out.Write(slug)
+	out.WriteString(`">`)
+	out.WriteString(strconv.Itoa(id))
+	out.WriteString(`</a></sup>`)
+}
+
 func (options *Html) Entity(out *bytes.Buffer, entity []byte) {
 	out.Write(entity)
 }
--- a/inline.go
+++ b/inline.go
@ -168,20 +168,48 @@ func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 	return 1
 }

-// '[': parse a link or an image
+type linkType int
+
+const (
+	linkNormal linkType = iota
+	linkImg
+	linkDeferredFootnote
+
+//	linkInlineFootnote
+)
+
+// '[': parse a link or an image or a footnote
 func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 	// no links allowed inside other links
 	if p.insideLink {
 		return 0
 	}

-	isImg := offset > 0 && data[offset-1] == '!'
+	// [text] == regular link
+	// ![alt] == image
+	// ^[text] == inline footnote
+	// [^refId] == deferred footnote
+	var t linkType
+	if offset > 0 && data[offset-1] == '!' {
+		t = linkImg
+	} else if p.flags&EXTENSION_FOOTNOTES != 0 {
+		if len(data) > offset && data[offset+1] == '^' {
+			t = linkDeferredFootnote
+		}
+	}

 	data = data[offset:]

-	i := 1
-	var title, link []byte
-	textHasNl := false
+	var (
+		i           = 1
+		noteId      int
+		title, link []byte
+		textHasNl   = false
+	)
+
+	if t == linkDeferredFootnote {
+		i++
+	}

 	// look for the matching closing bracket
 	for level := 1; level > 0 && i < len(data); i++ {
@ -351,6 +379,7 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 		lr, ok := p.refs[key]
 		if !ok {
 			return 0
+
 		}

 		// keep link and title from reference
@ -358,7 +387,7 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 		title = lr.title
 		i++

-	// shortcut reference style link
+	// shortcut reference style link or footnote
 	default:
 		var id []byte

@ -377,7 +406,11 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {

 			id = b.Bytes()
 		} else {
-			id = data[1:txtE]
+			if t == linkDeferredFootnote {
+				id = data[2:txtE]
+			} else {
+				id = data[1:txtE]
+			}
 		}

 		// find the reference with matching id
@ -389,7 +422,9 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {

 		// keep link and title from reference
 		link = lr.link
+		// if inline footnote, title == footnote contents
 		title = lr.title
+		noteId = lr.noteId

 		// rewind the whitespace
 		i = txtE + 1
@ -398,7 +433,7 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 	// build content: img alt is escaped, link content is parsed
 	var content bytes.Buffer
 	if txtE > 1 {
-		if isImg {
+		if t == linkImg {
 			content.Write(data[1:txtE])
 		} else {
 			// links cannot contain other links, so turn off link parsing temporarily
@ -417,12 +452,16 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 	}

 	// links need something to click on and somewhere to go
-	if len(uLink) == 0 || (!isImg && content.Len() == 0) {
+	if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
 		return 0
 	}

 	// call the relevant rendering function
-	if isImg {
+	switch t {
+	case linkNormal:
+		p.r.Link(out, uLink, title, content.Bytes())
+
+	case linkImg:
 		outSize := out.Len()
 		outBytes := out.Bytes()
 		if outSize > 0 && outBytes[outSize-1] == '!' {
@ -430,8 +469,12 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
 		}

 		p.r.Image(out, uLink, title, content.Bytes())
-	} else {
-		p.r.Link(out, uLink, title, content.Bytes())
+
+	case linkDeferredFootnote:
+		p.r.FootnoteRef(out, link, noteId)
+
+	default:
+		return 0
 	}

 	return i
--- a/inline_test.go
+++ b/inline_test.go
@ -37,7 +37,8 @@ func doTestsInlineParam(t *testing.T, tests []string, extensions, htmlFlags int)
 	var candidate string
 	defer func() {
 		if err := recover(); err != nil {
-			t.Errorf("\npanic while processing [%#v]\n", candidate)
+			panic(err)
+			t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err)
 		}
 	}()

@ -501,3 +502,45 @@ func TestAutoLink(t *testing.T) {
 	}
 	doTestsInline(t, tests)
 }
+
+func TestFootnotes(t *testing.T) {
+	tests := []string{
+		"testing footnotes.[^a]\n\n[^a]: This is the note\n",
+		"",
+
+		`testing long[^b] notes.
+
+[^b]: Paragraph 1
+
+	Paragraph 2
+
+	` + "```\n\tsome code\n\t```" + `
+	
+	Paragraph 3
+
+No longer in the footnote
+`,
+		"",
+
+		`testing[^c] multiple[^d] notes.
+
+[^c]: this is note c
+
+
+omg
+
+[^d]: this is note d
+
+what happens here
+`,
+		"",
+	}
+
+	for _, test := range tests {
+		if len(test) > 0 {
+			t.Errorf("Output:\n%s\n", runMarkdownInline(test, EXTENSION_FOOTNOTES, 0))
+		}
+	}
+
+	//doTestsInlineParam(t, tests, EXTENSION_FOOTNOTES, 0)
+}
--- a/latex.go
+++ b/latex.go
@ -158,6 +158,11 @@ func (options *Latex) TableCell(out *bytes.Buffer, text []byte, align int) {
 	out.Write(text)
 }

+// TODO: this
+func (options *Latex) Footnotes(out *bytes.Buffer, p *parser) {
+
+}
+
 func (options *Latex) AutoLink(out *bytes.Buffer, link []byte, kind int) {
 	out.WriteString("\\href{")
 	if kind == LINK_TYPE_EMAIL {
@ -229,6 +234,11 @@ func (options *Latex) StrikeThrough(out *bytes.Buffer, text []byte) {
 	out.WriteString("}")
 }

+// TODO: this
+func (options *Latex) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
+
+}
+
 func needsBackslash(c byte) bool {
 	for _, r := range []byte("_{}%$&\\~") {
 		if c == r {
--- a/markdown.go
+++ b/markdown.go
@ -37,6 +37,7 @@ const (
 	EXTENSION_SPACE_HEADERS                 // be strict about prefix header rules
 	EXTENSION_HARD_LINE_BREAK               // translate newlines into line breaks
 	EXTENSION_TAB_SIZE_EIGHT                // expand tabs to eight spaces instead of four
+	EXTENSION_FOOTNOTES                     // Pandoc-style footnotes
 )

 // These are the possible flag values for the link renderer.
@ -139,6 +140,7 @@ type Renderer interface {
 	Table(out *bytes.Buffer, header []byte, body []byte, columnData []int)
 	TableRow(out *bytes.Buffer, text []byte)
 	TableCell(out *bytes.Buffer, text []byte, flags int)
+	Footnotes(out *bytes.Buffer, p *parser)

 	// Span-level callbacks
 	AutoLink(out *bytes.Buffer, link []byte, kind int)
@ -151,6 +153,7 @@ type Renderer interface {
 	RawHtmlTag(out *bytes.Buffer, tag []byte)
 	TripleEmphasis(out *bytes.Buffer, text []byte)
 	StrikeThrough(out *bytes.Buffer, text []byte)
+	FootnoteRef(out *bytes.Buffer, ref []byte, id int)

 	// Low-level callbacks
 	Entity(out *bytes.Buffer, entity []byte)
@ -175,6 +178,11 @@ type parser struct {
 	nesting        int
 	maxNesting     int
 	insideLink     bool
+
+	// Footnotes need to be ordered as well as available to quickly check for
+	// presence. If a ref is also a footnote, it's stored both in refs and here
+	// in notes. Slice is nil if footnotes not enabled.
+	notes []*reference
 }

 //
@ -273,6 +281,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte {
 		p.inlineCallback[':'] = autoLink
 	}

+	if extensions&EXTENSION_FOOTNOTES != 0 {
+		p.notes = make([]*reference, 0)
+	}
+
 	first := firstPass(p, input)
 	second := secondPass(p, first)

@ -292,7 +304,7 @@ func firstPass(p *parser, input []byte) []byte {
 	}
 	beg, end := 0, 0
 	for beg < len(input) { // iterate over lines
-		if end = isReference(p, input[beg:]); end > 0 {
+		if end = isReference(p, input[beg:], tabSize); end > 0 {
 			beg += end
 		} else { // skip to the next line
 			end = beg
@ -331,6 +343,13 @@ func secondPass(p *parser, input []byte) []byte {

 	p.r.DocumentHeader(&output)
 	p.block(&output, input)
+
+	// NOTE: this is a big hack because we need the parser again for the
+	// footnotes, so this can't really go in the public interface
+	if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 {
+		p.r.Footnotes(&output, p)
+	}
+
 	p.r.DocumentFooter(&output)

 	if p.nesting != 0 {
@ -354,11 +373,26 @@ func secondPass(p *parser, input []byte) []byte {
 // label, i.e., 1 and 2 in this example, as in:
 //
 //    This library is hosted on [Github][2], a git hosting site.
+//
+// Actual footnotes as specified in Pandoc and supported by some other Markdown
+// libraries such as php-markdown are also taken care of. They look like this:
+//
+//    This sentence needs a bit of further explanation.[^note]
+//
+//    [^note]: This is the explanation.
+//
+// Footnotes should be placed at the end of the document in an ordered list.
+// Inline footnotes such as:
+//
+//    Inline footnotes^[Not supported.] also exist.
+//
+// are not yet supported.

 // References are parsed and stored in this struct.
 type reference struct {
-	link  []byte
-	title []byte
+	link   []byte
+	title  []byte
+	noteId int // 0 if not a footnote ref
 }

 // Check whether or not data starts with a reference link.
@ -366,7 +400,8 @@ type reference struct {
 // (in the render struct).
 // Returns the number of bytes to skip to move past it,
 // or zero if the first line is not a reference.
-func isReference(p *parser, data []byte) int {
+func isReference(p *parser, data []byte, tabSize int) int {
+	println("[", string(data), "]")
 	// up to 3 optional leading spaces
 	if len(data) < 4 {
 		return 0
@ -376,11 +411,19 @@ func isReference(p *parser, data []byte) int {
 		i++
 	}

+	noteId := 0
+
 	// id part: anything but a newline between brackets
 	if data[i] != '[' {
 		return 0
 	}
 	i++
+	if p.flags&EXTENSION_FOOTNOTES != 0 {
+		if data[i] == '^' {
+			noteId = len(p.notes) + 1
+			i++
+		}
+	}
 	idOffset := i
 	for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' {
 		i++
@ -391,6 +434,7 @@ func isReference(p *parser, data []byte) int {
 	idEnd := i

 	// spacer: colon (space | tab)* newline? (space | tab)*
+	// /:[ \t]*\n?[ \t]*/
 	i++
 	if i >= len(data) || data[i] != ':' {
 		return 0
@ -412,15 +456,56 @@ func isReference(p *parser, data []byte) int {
 		return 0
 	}

+	var (
+		linkOffset, linkEnd   int
+		titleOffset, titleEnd int
+		lineEnd               int
+		raw                   []byte
+	)
+
+	if p.flags&EXTENSION_FOOTNOTES != 0 && noteId > 0 {
+		linkOffset, linkEnd, raw = scanFootnote(p, data, i, tabSize)
+		lineEnd = linkEnd + linkOffset
+	} else {
+		linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i)
+	}
+	if lineEnd == 0 {
+		return 0
+	}
+
+	// a valid ref has been found
+
+	ref := &reference{
+		noteId: noteId,
+	}
+
+	if noteId > 0 {
+		// reusing the link field for the id since footnotes don't have titles
+		ref.link = data[idOffset:idEnd]
+		// if footnote, it's not really a title, it's the contained text
+		ref.title = raw
+		p.notes = append(p.notes, ref)
+	} else {
+		ref.link = data[linkOffset:linkEnd]
+		ref.title = data[titleOffset:titleEnd]
+	}
+
+	// id matches are case-insensitive
+	id := string(bytes.ToLower(data[idOffset:idEnd]))
+	p.refs[id] = ref
+	return lineEnd
+}
+
+func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) {
 	// link: whitespace-free sequence, optionally between angle brackets
 	if data[i] == '<' {
 		i++
 	}
-	linkOffset := i
+	linkOffset = i
 	for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' {
 		i++
 	}
-	linkEnd := i
+	linkEnd = i
 	if data[linkOffset] == '<' && data[linkEnd-1] == '>' {
 		linkOffset++
 		linkEnd--
@ -431,11 +516,10 @@ func isReference(p *parser, data []byte) int {
 		i++
 	}
 	if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' {
-		return 0
+		return
 	}

 	// compute end-of-line
-	lineEnd := 0
 	if i >= len(data) || data[i] == '\r' || data[i] == '\n' {
 		lineEnd = i
 	}
@ -452,7 +536,6 @@ func isReference(p *parser, data []byte) int {
 	}

 	// optional title: any non-newline sequence enclosed in '"() alone on its line
-	titleOffset, titleEnd := 0, 0
 	if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') {
 		i++
 		titleOffset = i
@ -477,20 +560,97 @@ func isReference(p *parser, data []byte) int {
 			titleEnd = i
 		}
 	}
-	if lineEnd == 0 { // garbage after the link
-		return 0
+
+	return
+}
+
+// The first bit of this logic is the same as (*parser).listItem, but the rest
+// is much simpler. This function simply finds the entire block and shifts it
+// over by one tab if it is indeed a block (just returns the line if it's not).
+// blockEnd is the end of the section in the input buffer, and contents is the
+// extracted text that was shifted over one tab. It will need to be rendered at
+// the end of the document.
+func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte) {
+	if i == 0 {
+		return
 	}

-	// a valid ref has been found
-
-	// id matches are case-insensitive
-	id := string(bytes.ToLower(data[idOffset:idEnd]))
-	p.refs[id] = &reference{
-		link:  data[linkOffset:linkEnd],
-		title: data[titleOffset:titleEnd],
+	// skip leading whitespace on first line
+	for data[i] == ' ' {
+		i++
 	}

-	return lineEnd
+	blockStart = i
+
+	// find the end of the line
+	blockEnd = i
+	for data[i-1] != '\n' {
+		if i >= len(data) {
+			return
+		}
+		i++
+	}
+
+	// get working buffer
+	var raw bytes.Buffer
+
+	// put the first line into the working buffer
+	raw.Write(data[blockEnd:i])
+	blockEnd = i
+
+	// process the following lines
+	containsBlankLine := false
+	hasBlock := false
+
+gatherLines:
+	for blockEnd < len(data) {
+		i++
+
+		// find the end of this line
+		for data[i-1] != '\n' {
+			i++
+		}
+
+		// if it is an empty line, guess that it is part of this item
+		// and move on to the next line
+		if p.isEmpty(data[blockEnd:i]) > 0 {
+			containsBlankLine = true
+			blockEnd = i
+			continue
+		}
+
+		n := 0
+		if n = isIndented(data[blockEnd:i], indentSize); n == 0 {
+			// this is the end of the block.
+			// we don't want to include this last line in the index.
+			break gatherLines
+		}
+
+		// if there were blank lines before this one, insert a new one now
+		if containsBlankLine {
+			hasBlock = true
+			raw.WriteByte('\n')
+			containsBlankLine = false
+		}
+
+		// get rid of that first tab, write to buffer
+		raw.Write(data[blockEnd+n : i])
+
+		blockEnd = i
+	}
+
+	rawBytes := raw.Bytes()
+	println("raw: {" + string(raw.Bytes()) + "}")
+	buf := new(bytes.Buffer)
+
+	if hasBlock {
+		p.block(buf, rawBytes)
+	} else {
+		p.inline(buf, rawBytes)
+	}
+	contents = buf.Bytes()
+
+	return
 }

 //
@ -578,3 +738,57 @@ func expandTabs(out *bytes.Buffer, line []byte, tabSize int) {
 		i++
 	}
 }
+
+// Find if a line counts as indented or not.
+// Returns number of characters the indent is (0 = not indented).
+func isIndented(data []byte, indentSize int) int {
+	if len(data) == 0 {
+		return 0
+	}
+	if data[0] == '\t' {
+		return 1
+	}
+	if len(data) < indentSize {
+		return 0
+	}
+	for i := 0; i < indentSize; i++ {
+		if data[i] != ' ' {
+			return 0
+		}
+	}
+	return indentSize
+}
+
+// Create a url-safe slug for fragments
+func slugify(in []byte) []byte {
+	if len(in) == 0 {
+		return in
+	}
+	out := make([]byte, 0, len(in))
+	sym := false
+
+	for _, ch := range in {
+		if isalnum(ch) {
+			sym = false
+			out = append(out, ch)
+		} else if sym {
+			continue
+		} else {
+			out = append(out, '-')
+			sym = true
+		}
+	}
+	var a, b int
+	var ch byte
+	for a, ch = range out {
+		if ch != '-' {
+			break
+		}
+	}
+	for b = len(out) - 1; b > 0; b-- {
+		if out[b] != '-' {
+			break
+		}
+	}
+	return out[a : b+1]
+}