From be082a1ef25d4211496762bbbdbab6df7e68c1f3 Mon Sep 17 00:00:00 2001 From: moshee Date: Tue, 25 Jun 2013 01:18:47 +0000 Subject: [PATCH] First attempt at supporting Pandoc-style footnotes. The existing tests have not broken but the new functionality does not work yet. --- block.go | 3 +- html.go | 25 +++++ inline.go | 67 ++++++++++--- inline_test.go | 45 ++++++++- latex.go | 10 ++ markdown.go | 252 +++++++++++++++++++++++++++++++++++++++++++++---- 6 files changed, 369 insertions(+), 33 deletions(-) diff --git a/block.go b/block.go index 40f43a9..c189d0d 100644 --- a/block.go +++ b/block.go @@ -1101,8 +1101,9 @@ gatherlines: line = i } - // render the contents of the list item rawBytes := raw.Bytes() + + // render the contents of the list item var cooked bytes.Buffer if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 { // intermediate render of block li diff --git a/html.go b/html.go index 9cf9b19..36efeb1 100644 --- a/html.go +++ b/html.go @@ -322,6 +322,20 @@ func (options *Html) TableCell(out *bytes.Buffer, text []byte, align int) { out.WriteString("") } +func (options *Html) Footnotes(out *bytes.Buffer, p *parser) { + out.WriteString("
\n") + options.HRule(out) + options.List(out, func() bool { + for _, ref := range p.notes { + out.WriteString("
  • \n") + out.Write(ref.title) + out.WriteString("
  • \n") + } + return true + }, LIST_TYPE_ORDERED) + out.WriteString("
    \n") +} + func (options *Html) List(out *bytes.Buffer, text func() bool, flags int) { marker := out.Len() doubleSpace(out) @@ -501,6 +515,17 @@ func (options *Html) StrikeThrough(out *bytes.Buffer, text []byte) { out.WriteString("") } +func (options *Html) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + slug := slugify(ref) + out.WriteString(``) + out.WriteString(strconv.Itoa(id)) + out.WriteString(``) +} + func (options *Html) Entity(out *bytes.Buffer, entity []byte) { out.Write(entity) } diff --git a/inline.go b/inline.go index 942c3e6..c7d771a 100644 --- a/inline.go +++ b/inline.go @@ -168,20 +168,48 @@ func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int { return 1 } -// '[': parse a link or an image +type linkType int + +const ( + linkNormal linkType = iota + linkImg + linkDeferredFootnote + +// linkInlineFootnote +) + +// '[': parse a link or an image or a footnote func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { // no links allowed inside other links if p.insideLink { return 0 } - isImg := offset > 0 && data[offset-1] == '!' + // [text] == regular link + // ![alt] == image + // ^[text] == inline footnote + // [^refId] == deferred footnote + var t linkType + if offset > 0 && data[offset-1] == '!' { + t = linkImg + } else if p.flags&EXTENSION_FOOTNOTES != 0 { + if len(data) > offset && data[offset+1] == '^' { + t = linkDeferredFootnote + } + } data = data[offset:] - i := 1 - var title, link []byte - textHasNl := false + var ( + i = 1 + noteId int + title, link []byte + textHasNl = false + ) + + if t == linkDeferredFootnote { + i++ + } // look for the matching closing bracket for level := 1; level > 0 && i < len(data); i++ { @@ -351,6 +379,7 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { lr, ok := p.refs[key] if !ok { return 0 + } // keep link and title from reference @@ -358,7 +387,7 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { title = lr.title i++ - // shortcut reference style link + // shortcut reference style link or footnote default: var id []byte @@ -377,7 +406,11 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { id = b.Bytes() } else { - id = data[1:txtE] + if t == linkDeferredFootnote { + id = data[2:txtE] + } else { + id = data[1:txtE] + } } // find the reference with matching id @@ -389,7 +422,9 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { // keep link and title from reference link = lr.link + // if inline footnote, title == footnote contents title = lr.title + noteId = lr.noteId // rewind the whitespace i = txtE + 1 @@ -398,7 +433,7 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { // build content: img alt is escaped, link content is parsed var content bytes.Buffer if txtE > 1 { - if isImg { + if t == linkImg { content.Write(data[1:txtE]) } else { // links cannot contain other links, so turn off link parsing temporarily @@ -417,12 +452,16 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { } // links need something to click on and somewhere to go - if len(uLink) == 0 || (!isImg && content.Len() == 0) { + if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) { return 0 } // call the relevant rendering function - if isImg { + switch t { + case linkNormal: + p.r.Link(out, uLink, title, content.Bytes()) + + case linkImg: outSize := out.Len() outBytes := out.Bytes() if outSize > 0 && outBytes[outSize-1] == '!' { @@ -430,8 +469,12 @@ func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { } p.r.Image(out, uLink, title, content.Bytes()) - } else { - p.r.Link(out, uLink, title, content.Bytes()) + + case linkDeferredFootnote: + p.r.FootnoteRef(out, link, noteId) + + default: + return 0 } return i diff --git a/inline_test.go b/inline_test.go index 7f21648..31e415d 100644 --- a/inline_test.go +++ b/inline_test.go @@ -37,7 +37,8 @@ func doTestsInlineParam(t *testing.T, tests []string, extensions, htmlFlags int) var candidate string defer func() { if err := recover(); err != nil { - t.Errorf("\npanic while processing [%#v]\n", candidate) + panic(err) + t.Errorf("\npanic while processing [%#v] (%v)\n", candidate, err) } }() @@ -501,3 +502,45 @@ func TestAutoLink(t *testing.T) { } doTestsInline(t, tests) } + +func TestFootnotes(t *testing.T) { + tests := []string{ + "testing footnotes.[^a]\n\n[^a]: This is the note\n", + "", + + `testing long[^b] notes. + +[^b]: Paragraph 1 + + Paragraph 2 + + ` + "```\n\tsome code\n\t```" + ` + + Paragraph 3 + +No longer in the footnote +`, + "", + + `testing[^c] multiple[^d] notes. + +[^c]: this is note c + + +omg + +[^d]: this is note d + +what happens here +`, + "", + } + + for _, test := range tests { + if len(test) > 0 { + t.Errorf("Output:\n%s\n", runMarkdownInline(test, EXTENSION_FOOTNOTES, 0)) + } + } + + //doTestsInlineParam(t, tests, EXTENSION_FOOTNOTES, 0) +} diff --git a/latex.go b/latex.go index f6dd577..169757d 100644 --- a/latex.go +++ b/latex.go @@ -158,6 +158,11 @@ func (options *Latex) TableCell(out *bytes.Buffer, text []byte, align int) { out.Write(text) } +// TODO: this +func (options *Latex) Footnotes(out *bytes.Buffer, p *parser) { + +} + func (options *Latex) AutoLink(out *bytes.Buffer, link []byte, kind int) { out.WriteString("\\href{") if kind == LINK_TYPE_EMAIL { @@ -229,6 +234,11 @@ func (options *Latex) StrikeThrough(out *bytes.Buffer, text []byte) { out.WriteString("}") } +// TODO: this +func (options *Latex) FootnoteRef(out *bytes.Buffer, ref []byte, id int) { + +} + func needsBackslash(c byte) bool { for _, r := range []byte("_{}%$&\\~") { if c == r { diff --git a/markdown.go b/markdown.go index f9e5bf3..034dabb 100644 --- a/markdown.go +++ b/markdown.go @@ -37,6 +37,7 @@ const ( EXTENSION_SPACE_HEADERS // be strict about prefix header rules EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four + EXTENSION_FOOTNOTES // Pandoc-style footnotes ) // These are the possible flag values for the link renderer. @@ -139,6 +140,7 @@ type Renderer interface { Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) TableRow(out *bytes.Buffer, text []byte) TableCell(out *bytes.Buffer, text []byte, flags int) + Footnotes(out *bytes.Buffer, p *parser) // Span-level callbacks AutoLink(out *bytes.Buffer, link []byte, kind int) @@ -151,6 +153,7 @@ type Renderer interface { RawHtmlTag(out *bytes.Buffer, tag []byte) TripleEmphasis(out *bytes.Buffer, text []byte) StrikeThrough(out *bytes.Buffer, text []byte) + FootnoteRef(out *bytes.Buffer, ref []byte, id int) // Low-level callbacks Entity(out *bytes.Buffer, entity []byte) @@ -175,6 +178,11 @@ type parser struct { nesting int maxNesting int insideLink bool + + // Footnotes need to be ordered as well as available to quickly check for + // presence. If a ref is also a footnote, it's stored both in refs and here + // in notes. Slice is nil if footnotes not enabled. + notes []*reference } // @@ -273,6 +281,10 @@ func Markdown(input []byte, renderer Renderer, extensions int) []byte { p.inlineCallback[':'] = autoLink } + if extensions&EXTENSION_FOOTNOTES != 0 { + p.notes = make([]*reference, 0) + } + first := firstPass(p, input) second := secondPass(p, first) @@ -292,7 +304,7 @@ func firstPass(p *parser, input []byte) []byte { } beg, end := 0, 0 for beg < len(input) { // iterate over lines - if end = isReference(p, input[beg:]); end > 0 { + if end = isReference(p, input[beg:], tabSize); end > 0 { beg += end } else { // skip to the next line end = beg @@ -331,6 +343,13 @@ func secondPass(p *parser, input []byte) []byte { p.r.DocumentHeader(&output) p.block(&output, input) + + // NOTE: this is a big hack because we need the parser again for the + // footnotes, so this can't really go in the public interface + if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { + p.r.Footnotes(&output, p) + } + p.r.DocumentFooter(&output) if p.nesting != 0 { @@ -354,11 +373,26 @@ func secondPass(p *parser, input []byte) []byte { // label, i.e., 1 and 2 in this example, as in: // // This library is hosted on [Github][2], a git hosting site. +// +// Actual footnotes as specified in Pandoc and supported by some other Markdown +// libraries such as php-markdown are also taken care of. They look like this: +// +// This sentence needs a bit of further explanation.[^note] +// +// [^note]: This is the explanation. +// +// Footnotes should be placed at the end of the document in an ordered list. +// Inline footnotes such as: +// +// Inline footnotes^[Not supported.] also exist. +// +// are not yet supported. // References are parsed and stored in this struct. type reference struct { - link []byte - title []byte + link []byte + title []byte + noteId int // 0 if not a footnote ref } // Check whether or not data starts with a reference link. @@ -366,7 +400,8 @@ type reference struct { // (in the render struct). // Returns the number of bytes to skip to move past it, // or zero if the first line is not a reference. -func isReference(p *parser, data []byte) int { +func isReference(p *parser, data []byte, tabSize int) int { + println("[", string(data), "]") // up to 3 optional leading spaces if len(data) < 4 { return 0 @@ -376,11 +411,19 @@ func isReference(p *parser, data []byte) int { i++ } + noteId := 0 + // id part: anything but a newline between brackets if data[i] != '[' { return 0 } i++ + if p.flags&EXTENSION_FOOTNOTES != 0 { + if data[i] == '^' { + noteId = len(p.notes) + 1 + i++ + } + } idOffset := i for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { i++ @@ -391,6 +434,7 @@ func isReference(p *parser, data []byte) int { idEnd := i // spacer: colon (space | tab)* newline? (space | tab)* + // /:[ \t]*\n?[ \t]*/ i++ if i >= len(data) || data[i] != ':' { return 0 @@ -412,15 +456,56 @@ func isReference(p *parser, data []byte) int { return 0 } + var ( + linkOffset, linkEnd int + titleOffset, titleEnd int + lineEnd int + raw []byte + ) + + if p.flags&EXTENSION_FOOTNOTES != 0 && noteId > 0 { + linkOffset, linkEnd, raw = scanFootnote(p, data, i, tabSize) + lineEnd = linkEnd + linkOffset + } else { + linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) + } + if lineEnd == 0 { + return 0 + } + + // a valid ref has been found + + ref := &reference{ + noteId: noteId, + } + + if noteId > 0 { + // reusing the link field for the id since footnotes don't have titles + ref.link = data[idOffset:idEnd] + // if footnote, it's not really a title, it's the contained text + ref.title = raw + p.notes = append(p.notes, ref) + } else { + ref.link = data[linkOffset:linkEnd] + ref.title = data[titleOffset:titleEnd] + } + + // id matches are case-insensitive + id := string(bytes.ToLower(data[idOffset:idEnd])) + p.refs[id] = ref + return lineEnd +} + +func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { // link: whitespace-free sequence, optionally between angle brackets if data[i] == '<' { i++ } - linkOffset := i + linkOffset = i for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { i++ } - linkEnd := i + linkEnd = i if data[linkOffset] == '<' && data[linkEnd-1] == '>' { linkOffset++ linkEnd-- @@ -431,11 +516,10 @@ func isReference(p *parser, data []byte) int { i++ } if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { - return 0 + return } // compute end-of-line - lineEnd := 0 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { lineEnd = i } @@ -452,7 +536,6 @@ func isReference(p *parser, data []byte) int { } // optional title: any non-newline sequence enclosed in '"() alone on its line - titleOffset, titleEnd := 0, 0 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { i++ titleOffset = i @@ -477,20 +560,97 @@ func isReference(p *parser, data []byte) int { titleEnd = i } } - if lineEnd == 0 { // garbage after the link - return 0 + + return +} + +// The first bit of this logic is the same as (*parser).listItem, but the rest +// is much simpler. This function simply finds the entire block and shifts it +// over by one tab if it is indeed a block (just returns the line if it's not). +// blockEnd is the end of the section in the input buffer, and contents is the +// extracted text that was shifted over one tab. It will need to be rendered at +// the end of the document. +func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte) { + if i == 0 { + return } - // a valid ref has been found - - // id matches are case-insensitive - id := string(bytes.ToLower(data[idOffset:idEnd])) - p.refs[id] = &reference{ - link: data[linkOffset:linkEnd], - title: data[titleOffset:titleEnd], + // skip leading whitespace on first line + for data[i] == ' ' { + i++ } - return lineEnd + blockStart = i + + // find the end of the line + blockEnd = i + for data[i-1] != '\n' { + if i >= len(data) { + return + } + i++ + } + + // get working buffer + var raw bytes.Buffer + + // put the first line into the working buffer + raw.Write(data[blockEnd:i]) + blockEnd = i + + // process the following lines + containsBlankLine := false + hasBlock := false + +gatherLines: + for blockEnd < len(data) { + i++ + + // find the end of this line + for data[i-1] != '\n' { + i++ + } + + // if it is an empty line, guess that it is part of this item + // and move on to the next line + if p.isEmpty(data[blockEnd:i]) > 0 { + containsBlankLine = true + blockEnd = i + continue + } + + n := 0 + if n = isIndented(data[blockEnd:i], indentSize); n == 0 { + // this is the end of the block. + // we don't want to include this last line in the index. + break gatherLines + } + + // if there were blank lines before this one, insert a new one now + if containsBlankLine { + hasBlock = true + raw.WriteByte('\n') + containsBlankLine = false + } + + // get rid of that first tab, write to buffer + raw.Write(data[blockEnd+n : i]) + + blockEnd = i + } + + rawBytes := raw.Bytes() + println("raw: {" + string(raw.Bytes()) + "}") + buf := new(bytes.Buffer) + + if hasBlock { + p.block(buf, rawBytes) + } else { + p.inline(buf, rawBytes) + } + contents = buf.Bytes() + + return } // @@ -578,3 +738,57 @@ func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { i++ } } + +// Find if a line counts as indented or not. +// Returns number of characters the indent is (0 = not indented). +func isIndented(data []byte, indentSize int) int { + if len(data) == 0 { + return 0 + } + if data[0] == '\t' { + return 1 + } + if len(data) < indentSize { + return 0 + } + for i := 0; i < indentSize; i++ { + if data[i] != ' ' { + return 0 + } + } + return indentSize +} + +// Create a url-safe slug for fragments +func slugify(in []byte) []byte { + if len(in) == 0 { + return in + } + out := make([]byte, 0, len(in)) + sym := false + + for _, ch := range in { + if isalnum(ch) { + sym = false + out = append(out, ch) + } else if sym { + continue + } else { + out = append(out, '-') + sym = true + } + } + var a, b int + var ch byte + for a, ch = range out { + if ch != '-' { + break + } + } + for b = len(out) - 1; b > 0; b-- { + if out[b] != '-' { + break + } + } + return out[a : b+1] +}