Merge branch 'v2' into v2-perf-tweaks

2024-03-22 13:40:34 +08:00 · 2017-02-02 12:00:48 +02:00 · 2017-02-02 12:00:48 +02:00 · d04a53c644
commit d04a53c644
parent ea57e93666 a4dd8ad4a6
9 changed files with 45 additions and 394 deletions
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@ punctuation substitutions, etc.), and it is safe for all utf-8
 (unicode) input.

 HTML output is currently supported, along with Smartypants
-extensions. An experimental LaTeX output engine is also included.
+extensions.

 It started as a translation from C of [Sundown][3].

@ -69,7 +69,7 @@ html := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
 ### Custom options

 If you want to customize the set of options, first get a renderer
-(currently either the HTML or LaTeX output engines), then use it to
+(currently only the HTML output engine), then use it to
 call the more general `Markdown` function. For examples, see the
 implementations of `MarkdownBasic` and `MarkdownCommon` in
 `markdown.go`.
@ -233,15 +233,8 @@ are a few of note:
 *   [markdownfmt](https://github.com/shurcooL/markdownfmt): like gofmt,
    but for markdown.

-*   LaTeX output: renders output as LaTeX. This is currently part of the
-    main Blackfriday repository, but may be split into its own project
-    in the future. If you are interested in owning and maintaining the
-    LaTeX output component, please be in touch.
-
-    It renders some basic documents, but is only experimental at this
-    point. In particular, it does not do any inline escaping, so input
-    that happens to look like LaTeX code will be passed through without
-    modification.
+*   [LaTeX output](https://bitbucket.org/ambrevar/blackfriday-latex):
+    renders output as LaTeX.


 Todo
--- a/block_test.go
+++ b/block_test.go
@ -1601,7 +1601,9 @@ func TestTOC(t *testing.T) {
 		"#",
 		"",
 	}
-	doTestsBlock(t, tests, TOC)
+	doTestsParam(t, tests, TestParams{
+		HTMLFlags: UseXHTML | TOC,
+	})
 }

 func TestOmitContents(t *testing.T) {
@ -1625,9 +1627,13 @@ func TestOmitContents(t *testing.T) {
 		"#\n\nfoo",
 		"",
 	}
-	doTestsBlock(t, tests, TOC|OmitContents)
+	doTestsParam(t, tests, TestParams{
+		HTMLFlags: UseXHTML | TOC | OmitContents,
+	})
 	// Now run again: make sure OmitContents implies TOC
-	doTestsBlock(t, tests, OmitContents)
+	doTestsParam(t, tests, TestParams{
+		HTMLFlags: UseXHTML | OmitContents,
+	})
 }

 func TestCompletePage(t *testing.T) {
--- a/helpers_test.go
+++ b/helpers_test.go
@ -44,7 +44,6 @@ func execRecoverableTestSuite(t *testing.T, tests []string, params TestParams, s

 func runMarkdown(input string, params TestParams) string {
 	params.HTMLRendererParameters.Flags = params.HTMLFlags
-	params.HTMLRendererParameters.Extensions = params.Options.Extensions
 	renderer := NewHTMLRenderer(params.HTMLRendererParameters)
 	return string(Markdown([]byte(input), renderer, params.Options))
 }
@ -54,8 +53,7 @@ func doTests(t *testing.T, tests []string) {
 	doTestsParam(t, tests, TestParams{
 		Options: DefaultOptions,
 		HTMLRendererParameters: HTMLRendererParameters{
-			Flags:      CommonHTMLFlags,
-			Extensions: CommonExtensions,
+			Flags: CommonHTMLFlags,
 		},
 	})
 }
--- a/html.go
+++ b/html.go
@ -30,7 +30,6 @@ type HTMLFlags int
 const (
 	HTMLFlagsNone           HTMLFlags = 0
 	SkipHTML                HTMLFlags = 1 << iota // Skip preformatted HTML blocks
-	SkipStyle                                     // Skip embedded <style> elements
 	SkipImages                                    // Skip embedded images
 	SkipLinks                                     // Skip all links
 	Safelink                                      // Only link to trusted protocols
@ -45,6 +44,8 @@ const (
 	SmartypantsDashes                             // Enable smart dashes (with Smartypants)
 	SmartypantsLatexDashes                        // Enable LaTeX-style dashes (with Smartypants)
 	SmartypantsAngledQuotes                       // Enable angled double quotes (with Smartypants) for double quotes rendering
+	TOC                                           // Generate a table of contents
+	OmitContents                                  // Skip the main contents (for a standalone table of contents)

 	TagName               = "[A-Za-z][A-Za-z0-9-]*"
 	AttributeName         = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
@ -89,8 +90,7 @@ type HTMLRendererParameters struct {
 	CSS   string // Optional CSS file URL (used if CompletePage is set)
 	Icon  string // Optional icon file URL (used if CompletePage is set)

-	Flags      HTMLFlags  // Flags allow customizing this renderer's behavior
-	Extensions Extensions // Extensions give Smartypants and HTML renderer access to Blackfriday's global extensions
+	Flags HTMLFlags // Flags allow customizing this renderer's behavior
 }

 // HTMLRenderer is a type that implements the Renderer interface for HTML output.
@ -539,14 +539,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
 		if r.Flags&SkipHTML != 0 {
 			break
 		}
-		if r.Flags&SkipStyle != 0 && isHTMLTag(node.Literal, "style") {
-			break
-		}
-		//if options.safe {
-		//	out(w, "<!-- raw HTML omitted -->")
-		//} else {
 		r.out(w, node.Literal)
-		//}
 	case Link:
 		// mark it but don't link it if it is not a safe link: no smartypants
 		dest := node.LinkData.Destination
@ -941,9 +934,9 @@ func (r *HTMLRenderer) Render(ast *Node) []byte {
 	//dump(ast)
 	var buf bytes.Buffer
 	r.writeDocumentHeader(&buf)
-	if r.Extensions&TOC != 0 || r.Extensions&OmitContents != 0 {
+	if r.Flags&TOC != 0 || r.Flags&OmitContents != 0 {
 		r.writeTOC(&buf, ast)
-		if r.Extensions&OmitContents != 0 {
+		if r.Flags&OmitContents != 0 {
 			return buf.Bytes()
 		}
 	}
--- a/inline.go
+++ b/inline.go
@ -284,6 +284,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {

 	txtE := i
 	i++
+	var footnoteNode *Node

 	// skip any amount of whitespace or newline
 	// (this is much more lax than original markdown syntax)
@ -463,6 +464,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
 			}
 		}

+		footnoteNode = NewNode(Item)
 		if t == linkInlineFootnote {
 			// create a new reference
 			noteID = len(p.notes) + 1
@ -484,6 +486,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
 				hasBlock: false,
 				link:     fragment,
 				title:    id,
+				footnote: footnoteNode,
 			}

 			p.notes = append(p.notes, ref)
@ -499,6 +502,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {

 			if t == linkDeferredFootnote {
 				lr.noteID = len(p.notes) + 1
+				lr.footnote = footnoteNode
 				p.notes = append(p.notes, lr)
 			}

@ -557,6 +561,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
 		linkNode.Destination = link
 		linkNode.Title = title
 		linkNode.NoteID = noteID
+		linkNode.Footnote = footnoteNode
 		if t == linkInlineFootnote {
 			i++
 		}
--- a/inline_test.go
+++ b/inline_test.go
@ -1119,15 +1119,6 @@ func TestSkipImages(t *testing.T) {
 	})
 }

-func TestSkipStyle(t *testing.T) {
-	doTestsInlineParam(t, []string{
-		"foo\n\n<style>color: #f00</style> bar",
-		"<p>foo</p>\n\n<p>color: #f00 bar</p>\n",
-	}, TestParams{
-		HTMLFlags: SkipStyle,
-	})
-}
-
 func TestUseXHTML(t *testing.T) {
 	doTestsParam(t, []string{
 		"---",
--- a/latex.go
+++ b/latex.go
@ -1,336 +0,0 @@
-//
-// Blackfriday Markdown Processor
-// Available at http://github.com/russross/blackfriday
-//
-// Copyright © 2011 Russ Ross <russ@russross.com>.
-// Distributed under the Simplified BSD License.
-// See README.md for details.
-//
-
-//
-//
-// LaTeX rendering backend
-//
-//
-
-package blackfriday
-
-import (
-	"bytes"
-	"io"
-)
-
-// Latex is a type that implements the Renderer interface for LaTeX output.
-//
-// Do not create this directly, instead use the NewLatexRenderer function.
-type Latex struct {
-	w bytes.Buffer
-}
-
-// NewLatexRenderer creates and configures a Latex object, which
-// satisfies the Renderer interface.
-//
-// flags is a set of LATEX_* options ORed together (currently no such options
-// are defined).
-func NewLatexRenderer(flags int) *Latex {
-	var writer bytes.Buffer
-	return &Latex{
-		w: writer,
-	}
-}
-
-// render code chunks using verbatim, or listings if we have a language
-func (r *Latex) BlockCode(text []byte, lang string) {
-	if lang == "" {
-		r.w.WriteString("\n\\begin{verbatim}\n")
-	} else {
-		r.w.WriteString("\n\\begin{lstlisting}[language=")
-		r.w.WriteString(lang)
-		r.w.WriteString("]\n")
-	}
-	r.w.Write(text)
-	if lang == "" {
-		r.w.WriteString("\n\\end{verbatim}\n")
-	} else {
-		r.w.WriteString("\n\\end{lstlisting}\n")
-	}
-}
-
-func (r *Latex) TitleBlock(text []byte) {
-
-}
-
-func (r *Latex) BlockQuote(text []byte) {
-	r.w.WriteString("\n\\begin{quotation}\n")
-	r.w.Write(text)
-	r.w.WriteString("\n\\end{quotation}\n")
-}
-
-func (r *Latex) BlockHtml(text []byte) {
-	// a pretty lame thing to do...
-	r.w.WriteString("\n\\begin{verbatim}\n")
-	r.w.Write(text)
-	r.w.WriteString("\n\\end{verbatim}\n")
-}
-
-func (r *Latex) BeginHeader(level int, id string) {
-	switch level {
-	case 1:
-		r.w.WriteString("\n\\section{")
-	case 2:
-		r.w.WriteString("\n\\subsection{")
-	case 3:
-		r.w.WriteString("\n\\subsubsection{")
-	case 4:
-		r.w.WriteString("\n\\paragraph{")
-	case 5:
-		r.w.WriteString("\n\\subparagraph{")
-	case 6:
-		r.w.WriteString("\n\\textbf{")
-	}
-}
-
-func (r *Latex) EndHeader(level int, id string, header []byte) {
-	r.w.WriteString("}\n")
-}
-
-func (r *Latex) HRule() {
-	r.w.WriteString("\n\\HRule\n")
-}
-
-func (r *Latex) BeginList(flags ListType) {
-	if flags&ListTypeOrdered != 0 {
-		r.w.WriteString("\n\\begin{enumerate}\n")
-	} else {
-		r.w.WriteString("\n\\begin{itemize}\n")
-	}
-}
-
-func (r *Latex) EndList(flags ListType) {
-	if flags&ListTypeOrdered != 0 {
-		r.w.WriteString("\n\\end{enumerate}\n")
-	} else {
-		r.w.WriteString("\n\\end{itemize}\n")
-	}
-}
-
-func (r *Latex) ListItem(text []byte, flags ListType) {
-	r.w.WriteString("\n\\item ")
-	r.w.Write(text)
-}
-
-func (r *Latex) BeginParagraph() {
-	r.w.WriteString("\n")
-}
-
-func (r *Latex) EndParagraph() {
-	r.w.WriteString("\n")
-}
-
-func (r *Latex) Table(header []byte, body []byte, columnData []CellAlignFlags) {
-	r.w.WriteString("\n\\begin{tabular}{")
-	for _, elt := range columnData {
-		switch elt {
-		case TableAlignmentLeft:
-			r.w.WriteByte('l')
-		case TableAlignmentRight:
-			r.w.WriteByte('r')
-		default:
-			r.w.WriteByte('c')
-		}
-	}
-	r.w.WriteString("}\n")
-	r.w.Write(header)
-	r.w.WriteString(" \\\\\n\\hline\n")
-	r.w.Write(body)
-	r.w.WriteString("\n\\end{tabular}\n")
-}
-
-func (r *Latex) TableRow(text []byte) {
-	r.w.WriteString(" \\\\\n")
-	r.w.Write(text)
-}
-
-func (r *Latex) TableHeaderCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
-	if out.Len() > 0 {
-		out.WriteString(" & ")
-	}
-	out.Write(text)
-}
-
-func (r *Latex) TableCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
-	if out.Len() > 0 {
-		out.WriteString(" & ")
-	}
-	out.Write(text)
-}
-
-// TODO: this
-func (r *Latex) BeginFootnotes() {
-}
-
-// TODO: this
-func (r *Latex) EndFootnotes() {
-}
-
-func (r *Latex) FootnoteItem(name, text []byte, flags ListType) {
-
-}
-
-func (r *Latex) AutoLink(link []byte, kind autolinkType) {
-	r.w.WriteString("\\href{")
-	if kind == emailAutolink {
-		r.w.WriteString("mailto:")
-	}
-	r.w.Write(link)
-	r.w.WriteString("}{")
-	r.w.Write(link)
-	r.w.WriteString("}")
-}
-
-func (r *Latex) CodeSpan(text []byte) {
-	r.w.WriteString("\\texttt{")
-	r.escapeSpecialChars(text)
-	r.w.WriteString("}")
-}
-
-func (r *Latex) DoubleEmphasis(text []byte) {
-	r.w.WriteString("\\textbf{")
-	r.w.Write(text)
-	r.w.WriteString("}")
-}
-
-func (r *Latex) Emphasis(text []byte) {
-	r.w.WriteString("\\textit{")
-	r.w.Write(text)
-	r.w.WriteString("}")
-}
-
-func (r *Latex) Image(link []byte, title []byte, alt []byte) {
-	if bytes.HasPrefix(link, []byte("http://")) || bytes.HasPrefix(link, []byte("https://")) {
-		// treat it like a link
-		r.w.WriteString("\\href{")
-		r.w.Write(link)
-		r.w.WriteString("}{")
-		r.w.Write(alt)
-		r.w.WriteString("}")
-	} else {
-		r.w.WriteString("\\includegraphics{")
-		r.w.Write(link)
-		r.w.WriteString("}")
-	}
-}
-
-func (r *Latex) LineBreak() {
-	r.w.WriteString(" \\\\\n")
-}
-
-func (r *Latex) Link(link []byte, title []byte, content []byte) {
-	r.w.WriteString("\\href{")
-	r.w.Write(link)
-	r.w.WriteString("}{")
-	r.w.Write(content)
-	r.w.WriteString("}")
-}
-
-func (r *Latex) RawHtmlTag(tag []byte) {
-}
-
-func (r *Latex) TripleEmphasis(text []byte) {
-	r.w.WriteString("\\textbf{\\textit{")
-	r.w.Write(text)
-	r.w.WriteString("}}")
-}
-
-func (r *Latex) StrikeThrough(text []byte) {
-	r.w.WriteString("\\sout{")
-	r.w.Write(text)
-	r.w.WriteString("}")
-}
-
-// TODO: this
-func (r *Latex) FootnoteRef(ref []byte, id int) {
-}
-
-func needsBackslash(c byte) bool {
-	for _, r := range []byte("_{}%$&\\~#") {
-		if c == r {
-			return true
-		}
-	}
-	return false
-}
-
-func (r *Latex) escapeSpecialChars(text []byte) {
-	for i := 0; i < len(text); i++ {
-		// directly copy normal characters
-		org := i
-
-		for i < len(text) && !needsBackslash(text[i]) {
-			i++
-		}
-		if i > org {
-			r.w.Write(text[org:i])
-		}
-
-		// escape a character
-		if i >= len(text) {
-			break
-		}
-		r.w.WriteByte('\\')
-		r.w.WriteByte(text[i])
-	}
-}
-
-func (r *Latex) Entity(entity []byte) {
-	// TODO: convert this into a unicode character or something
-	r.w.Write(entity)
-}
-
-func (r *Latex) NormalText(text []byte) {
-	r.escapeSpecialChars(text)
-}
-
-// header and footer
-func (r *Latex) DocumentHeader() {
-	r.w.WriteString("\\documentclass{article}\n")
-	r.w.WriteString("\n")
-	r.w.WriteString("\\usepackage{graphicx}\n")
-	r.w.WriteString("\\usepackage{listings}\n")
-	r.w.WriteString("\\usepackage[margin=1in]{geometry}\n")
-	r.w.WriteString("\\usepackage[utf8]{inputenc}\n")
-	r.w.WriteString("\\usepackage{verbatim}\n")
-	r.w.WriteString("\\usepackage[normalem]{ulem}\n")
-	r.w.WriteString("\\usepackage{hyperref}\n")
-	r.w.WriteString("\n")
-	r.w.WriteString("\\hypersetup{colorlinks,%\n")
-	r.w.WriteString("  citecolor=black,%\n")
-	r.w.WriteString("  filecolor=black,%\n")
-	r.w.WriteString("  linkcolor=black,%\n")
-	r.w.WriteString("  urlcolor=black,%\n")
-	r.w.WriteString("  pdfstartview=FitH,%\n")
-	r.w.WriteString("  breaklinks=true,%\n")
-	r.w.WriteString("  pdfauthor={Blackfriday Markdown Processor v")
-	r.w.WriteString(Version)
-	r.w.WriteString("}}\n")
-	r.w.WriteString("\n")
-	r.w.WriteString("\\newcommand{\\HRule}{\\rule{\\linewidth}{0.5mm}}\n")
-	r.w.WriteString("\\addtolength{\\parskip}{0.5\\baselineskip}\n")
-	r.w.WriteString("\\parindent=0pt\n")
-	r.w.WriteString("\n")
-	r.w.WriteString("\\begin{document}\n")
-}
-
-func (r *Latex) DocumentFooter() {
-	r.w.WriteString("\n\\end{document}\n")
-}
-
-func (r *Latex) Render(ast *Node) []byte {
-	// TODO
-	return nil
-}
-
-func (r *Latex) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {
-	// TODO
-	return GoToNext
-}
--- a/markdown.go
+++ b/markdown.go
@ -46,8 +46,6 @@ const (
 	AutoHeaderIDs                                 // Create the header ID from the text
 	BackslashLineBreak                            // Translate trailing backslashes into line breaks
 	DefinitionLists                               // Render definition lists
-	TOC                                           // Generate a table of contents
-	OmitContents                                  // Skip the main contents (for a standalone table of contents)

 	CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
 		SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
@ -86,7 +84,7 @@ type CellAlignFlags int
 // Only a single one of these values will be used; they are not ORed together.
 // These are mostly of interest if you are writing a new output format.
 const (
-	TableAlignmentLeft = 1 << iota
+	TableAlignmentLeft CellAlignFlags = 1 << iota
 	TableAlignmentRight
 	TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
 )
@ -153,7 +151,8 @@ var blockTags = map[string]struct{}{
 // If the callback returns false, the rendering function should reset the
 // output buffer as though it had never been called.
 //
-// Currently HTML and Latex implementations are provided
+// Only an HTML implementation is provided in this repository,
+// see the README for external implementations.
 type Renderer interface {
 	Render(ast *Node) []byte
 	RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
@ -213,14 +212,16 @@ func (p *parser) finalize(block *Node) {
 }

 func (p *parser) addChild(node NodeType, offset uint32) *Node {
-	for !p.tip.canContain(node) {
+	return p.addExistingChild(NewNode(node), offset)
+}
+
+func (p *parser) addExistingChild(node *Node, offset uint32) *Node {
+	for !p.tip.canContain(node.Type) {
 		p.finalize(p.tip)
 	}
-	newNode := NewNode(node)
-	newNode.content = []byte{}
-	p.tip.AppendChild(newNode)
-	p.tip = newNode
-	return newNode
+	p.tip.AppendChild(node)
+	p.tip = node
+	return node
 }

 func (p *parser) closeUnmatchedBlocks() {
@ -287,8 +288,7 @@ type Options struct {
 func MarkdownBasic(input []byte) []byte {
 	// set up the HTML renderer
 	renderer := NewHTMLRenderer(HTMLRendererParameters{
-		Flags:      UseXHTML,
-		Extensions: CommonExtensions,
+		Flags: UseXHTML,
 	})

 	// set up the parser
@ -316,8 +316,7 @@ func MarkdownBasic(input []byte) []byte {
 func MarkdownCommon(input []byte) []byte {
 	// set up the HTML renderer
 	renderer := NewHTMLRenderer(HTMLRendererParameters{
-		Flags:      CommonHTMLFlags,
-		Extensions: CommonExtensions,
+		Flags: CommonHTMLFlags,
 	})
 	return Markdown(input, renderer, DefaultOptions)
 }
@ -327,8 +326,7 @@ func MarkdownCommon(input []byte) []byte {
 // The supplied Renderer is used to format the output, and extensions dictates
 // which non-standard extensions are enabled.
 //
-// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
-// NewLatexRenderer, respectively.
+// To use the supplied HTML renderer, see NewHTMLRenderer.
 func Markdown(input []byte, renderer Renderer, options Options) []byte {
 	if renderer == nil {
 		return nil
@ -419,7 +417,8 @@ func (p *parser) parseRefsToAST() {
 	// the fixed initial set.
 	for i := 0; i < len(p.notes); i++ {
 		ref := p.notes[i]
-		block := p.addBlock(Item, nil)
+		p.addExistingChild(ref.footnote, 0)
+		block := ref.footnote
 		block.ListFlags = flags | ListTypeOrdered
 		block.RefLink = ref.link
 		if ref.hasBlock {
@ -513,6 +512,7 @@ type reference struct {
 	title    []byte
 	noteID   int // 0 if not a footnote ref
 	hasBlock bool
+	footnote *Node // a link to the Item node within a list of footnotes

 	text []byte // only gets populated by refOverride feature with Reference.Text
 }
--- a/node.go
+++ b/node.go
@ -84,6 +84,7 @@ type LinkData struct {
 	Destination []byte // Destination is what goes into a href
 	Title       []byte // Title is the tooltip thing that goes in a title attribute
 	NoteID      int    // NoteID contains a serial number of a footnote, zero if it's not a footnote
+	Footnote    *Node  // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
 }

 // CodeBlockData contains fields relevant to a CodeBlock node type.
@ -277,8 +278,8 @@ type NodeVisitor func(node *Node, entering bool) WalkStatus

 // Walk is a convenience method that instantiates a walker and starts a
 // traversal of subtree rooted at n.
-func (root *Node) Walk(visitor NodeVisitor) {
-	w := newNodeWalker(root)
+func (n *Node) Walk(visitor NodeVisitor) {
+	w := newNodeWalker(n)
 	for w.current != nil {
 		status := visitor(w.current, w.entering)
 		switch status {
@ -308,7 +309,7 @@ func newNodeWalker(root *Node) *nodeWalker {
 }

 func (nw *nodeWalker) next() {
-	if !nw.entering && nw.current == nw.root {
+	if (!nw.current.isContainer() || !nw.entering) && nw.current == nw.root {
 		nw.current = nil
 		return
 	}