Merge branch 'v2' into v2-perf-tweaks

This commit is contained in:
Vytautas Šaltenis 2017-02-02 12:00:48 +02:00 committed by GitHub
commit d04a53c644
9 changed files with 45 additions and 394 deletions

View File

@ -8,7 +8,7 @@ punctuation substitutions, etc.), and it is safe for all utf-8
(unicode) input.
HTML output is currently supported, along with Smartypants
extensions. An experimental LaTeX output engine is also included.
extensions.
It started as a translation from C of [Sundown][3].
@ -69,7 +69,7 @@ html := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
### Custom options
If you want to customize the set of options, first get a renderer
(currently either the HTML or LaTeX output engines), then use it to
(currently only the HTML output engine), then use it to
call the more general `Markdown` function. For examples, see the
implementations of `MarkdownBasic` and `MarkdownCommon` in
`markdown.go`.
@ -233,15 +233,8 @@ are a few of note:
* [markdownfmt](https://github.com/shurcooL/markdownfmt): like gofmt,
but for markdown.
* LaTeX output: renders output as LaTeX. This is currently part of the
main Blackfriday repository, but may be split into its own project
in the future. If you are interested in owning and maintaining the
LaTeX output component, please be in touch.
It renders some basic documents, but is only experimental at this
point. In particular, it does not do any inline escaping, so input
that happens to look like LaTeX code will be passed through without
modification.
* [LaTeX output](https://bitbucket.org/ambrevar/blackfriday-latex):
renders output as LaTeX.
Todo

View File

@ -1601,7 +1601,9 @@ func TestTOC(t *testing.T) {
"#",
"",
}
doTestsBlock(t, tests, TOC)
doTestsParam(t, tests, TestParams{
HTMLFlags: UseXHTML | TOC,
})
}
func TestOmitContents(t *testing.T) {
@ -1625,9 +1627,13 @@ func TestOmitContents(t *testing.T) {
"#\n\nfoo",
"",
}
doTestsBlock(t, tests, TOC|OmitContents)
doTestsParam(t, tests, TestParams{
HTMLFlags: UseXHTML | TOC | OmitContents,
})
// Now run again: make sure OmitContents implies TOC
doTestsBlock(t, tests, OmitContents)
doTestsParam(t, tests, TestParams{
HTMLFlags: UseXHTML | OmitContents,
})
}
func TestCompletePage(t *testing.T) {

View File

@ -44,7 +44,6 @@ func execRecoverableTestSuite(t *testing.T, tests []string, params TestParams, s
func runMarkdown(input string, params TestParams) string {
params.HTMLRendererParameters.Flags = params.HTMLFlags
params.HTMLRendererParameters.Extensions = params.Options.Extensions
renderer := NewHTMLRenderer(params.HTMLRendererParameters)
return string(Markdown([]byte(input), renderer, params.Options))
}
@ -54,8 +53,7 @@ func doTests(t *testing.T, tests []string) {
doTestsParam(t, tests, TestParams{
Options: DefaultOptions,
HTMLRendererParameters: HTMLRendererParameters{
Flags: CommonHTMLFlags,
Extensions: CommonExtensions,
Flags: CommonHTMLFlags,
},
})
}

17
html.go
View File

@ -30,7 +30,6 @@ type HTMLFlags int
const (
HTMLFlagsNone HTMLFlags = 0
SkipHTML HTMLFlags = 1 << iota // Skip preformatted HTML blocks
SkipStyle // Skip embedded <style> elements
SkipImages // Skip embedded images
SkipLinks // Skip all links
Safelink // Only link to trusted protocols
@ -45,6 +44,8 @@ const (
SmartypantsDashes // Enable smart dashes (with Smartypants)
SmartypantsLatexDashes // Enable LaTeX-style dashes (with Smartypants)
SmartypantsAngledQuotes // Enable angled double quotes (with Smartypants) for double quotes rendering
TOC // Generate a table of contents
OmitContents // Skip the main contents (for a standalone table of contents)
TagName = "[A-Za-z][A-Za-z0-9-]*"
AttributeName = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
@ -89,8 +90,7 @@ type HTMLRendererParameters struct {
CSS string // Optional CSS file URL (used if CompletePage is set)
Icon string // Optional icon file URL (used if CompletePage is set)
Flags HTMLFlags // Flags allow customizing this renderer's behavior
Extensions Extensions // Extensions give Smartypants and HTML renderer access to Blackfriday's global extensions
Flags HTMLFlags // Flags allow customizing this renderer's behavior
}
// HTMLRenderer is a type that implements the Renderer interface for HTML output.
@ -539,14 +539,7 @@ func (r *HTMLRenderer) RenderNode(w io.Writer, node *Node, entering bool) WalkSt
if r.Flags&SkipHTML != 0 {
break
}
if r.Flags&SkipStyle != 0 && isHTMLTag(node.Literal, "style") {
break
}
//if options.safe {
// out(w, "<!-- raw HTML omitted -->")
//} else {
r.out(w, node.Literal)
//}
case Link:
// mark it but don't link it if it is not a safe link: no smartypants
dest := node.LinkData.Destination
@ -941,9 +934,9 @@ func (r *HTMLRenderer) Render(ast *Node) []byte {
//dump(ast)
var buf bytes.Buffer
r.writeDocumentHeader(&buf)
if r.Extensions&TOC != 0 || r.Extensions&OmitContents != 0 {
if r.Flags&TOC != 0 || r.Flags&OmitContents != 0 {
r.writeTOC(&buf, ast)
if r.Extensions&OmitContents != 0 {
if r.Flags&OmitContents != 0 {
return buf.Bytes()
}
}

View File

@ -284,6 +284,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
txtE := i
i++
var footnoteNode *Node
// skip any amount of whitespace or newline
// (this is much more lax than original markdown syntax)
@ -463,6 +464,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
}
}
footnoteNode = NewNode(Item)
if t == linkInlineFootnote {
// create a new reference
noteID = len(p.notes) + 1
@ -484,6 +486,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
hasBlock: false,
link: fragment,
title: id,
footnote: footnoteNode,
}
p.notes = append(p.notes, ref)
@ -499,6 +502,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
if t == linkDeferredFootnote {
lr.noteID = len(p.notes) + 1
lr.footnote = footnoteNode
p.notes = append(p.notes, lr)
}
@ -557,6 +561,7 @@ func link(p *parser, data []byte, offset int) (int, *Node) {
linkNode.Destination = link
linkNode.Title = title
linkNode.NoteID = noteID
linkNode.Footnote = footnoteNode
if t == linkInlineFootnote {
i++
}

View File

@ -1119,15 +1119,6 @@ func TestSkipImages(t *testing.T) {
})
}
func TestSkipStyle(t *testing.T) {
doTestsInlineParam(t, []string{
"foo\n\n<style>color: #f00</style> bar",
"<p>foo</p>\n\n<p>color: #f00 bar</p>\n",
}, TestParams{
HTMLFlags: SkipStyle,
})
}
func TestUseXHTML(t *testing.T) {
doTestsParam(t, []string{
"---",

336
latex.go
View File

@ -1,336 +0,0 @@
//
// Blackfriday Markdown Processor
// Available at http://github.com/russross/blackfriday
//
// Copyright © 2011 Russ Ross <russ@russross.com>.
// Distributed under the Simplified BSD License.
// See README.md for details.
//
//
//
// LaTeX rendering backend
//
//
package blackfriday
import (
"bytes"
"io"
)
// Latex is a type that implements the Renderer interface for LaTeX output.
//
// Do not create this directly, instead use the NewLatexRenderer function.
type Latex struct {
w bytes.Buffer
}
// NewLatexRenderer creates and configures a Latex object, which
// satisfies the Renderer interface.
//
// flags is a set of LATEX_* options ORed together (currently no such options
// are defined).
func NewLatexRenderer(flags int) *Latex {
var writer bytes.Buffer
return &Latex{
w: writer,
}
}
// render code chunks using verbatim, or listings if we have a language
func (r *Latex) BlockCode(text []byte, lang string) {
if lang == "" {
r.w.WriteString("\n\\begin{verbatim}\n")
} else {
r.w.WriteString("\n\\begin{lstlisting}[language=")
r.w.WriteString(lang)
r.w.WriteString("]\n")
}
r.w.Write(text)
if lang == "" {
r.w.WriteString("\n\\end{verbatim}\n")
} else {
r.w.WriteString("\n\\end{lstlisting}\n")
}
}
func (r *Latex) TitleBlock(text []byte) {
}
func (r *Latex) BlockQuote(text []byte) {
r.w.WriteString("\n\\begin{quotation}\n")
r.w.Write(text)
r.w.WriteString("\n\\end{quotation}\n")
}
func (r *Latex) BlockHtml(text []byte) {
// a pretty lame thing to do...
r.w.WriteString("\n\\begin{verbatim}\n")
r.w.Write(text)
r.w.WriteString("\n\\end{verbatim}\n")
}
func (r *Latex) BeginHeader(level int, id string) {
switch level {
case 1:
r.w.WriteString("\n\\section{")
case 2:
r.w.WriteString("\n\\subsection{")
case 3:
r.w.WriteString("\n\\subsubsection{")
case 4:
r.w.WriteString("\n\\paragraph{")
case 5:
r.w.WriteString("\n\\subparagraph{")
case 6:
r.w.WriteString("\n\\textbf{")
}
}
func (r *Latex) EndHeader(level int, id string, header []byte) {
r.w.WriteString("}\n")
}
func (r *Latex) HRule() {
r.w.WriteString("\n\\HRule\n")
}
func (r *Latex) BeginList(flags ListType) {
if flags&ListTypeOrdered != 0 {
r.w.WriteString("\n\\begin{enumerate}\n")
} else {
r.w.WriteString("\n\\begin{itemize}\n")
}
}
func (r *Latex) EndList(flags ListType) {
if flags&ListTypeOrdered != 0 {
r.w.WriteString("\n\\end{enumerate}\n")
} else {
r.w.WriteString("\n\\end{itemize}\n")
}
}
func (r *Latex) ListItem(text []byte, flags ListType) {
r.w.WriteString("\n\\item ")
r.w.Write(text)
}
func (r *Latex) BeginParagraph() {
r.w.WriteString("\n")
}
func (r *Latex) EndParagraph() {
r.w.WriteString("\n")
}
func (r *Latex) Table(header []byte, body []byte, columnData []CellAlignFlags) {
r.w.WriteString("\n\\begin{tabular}{")
for _, elt := range columnData {
switch elt {
case TableAlignmentLeft:
r.w.WriteByte('l')
case TableAlignmentRight:
r.w.WriteByte('r')
default:
r.w.WriteByte('c')
}
}
r.w.WriteString("}\n")
r.w.Write(header)
r.w.WriteString(" \\\\\n\\hline\n")
r.w.Write(body)
r.w.WriteString("\n\\end{tabular}\n")
}
func (r *Latex) TableRow(text []byte) {
r.w.WriteString(" \\\\\n")
r.w.Write(text)
}
func (r *Latex) TableHeaderCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
if out.Len() > 0 {
out.WriteString(" & ")
}
out.Write(text)
}
func (r *Latex) TableCell(out *bytes.Buffer, text []byte, align CellAlignFlags) {
if out.Len() > 0 {
out.WriteString(" & ")
}
out.Write(text)
}
// TODO: this
func (r *Latex) BeginFootnotes() {
}
// TODO: this
func (r *Latex) EndFootnotes() {
}
func (r *Latex) FootnoteItem(name, text []byte, flags ListType) {
}
func (r *Latex) AutoLink(link []byte, kind autolinkType) {
r.w.WriteString("\\href{")
if kind == emailAutolink {
r.w.WriteString("mailto:")
}
r.w.Write(link)
r.w.WriteString("}{")
r.w.Write(link)
r.w.WriteString("}")
}
func (r *Latex) CodeSpan(text []byte) {
r.w.WriteString("\\texttt{")
r.escapeSpecialChars(text)
r.w.WriteString("}")
}
func (r *Latex) DoubleEmphasis(text []byte) {
r.w.WriteString("\\textbf{")
r.w.Write(text)
r.w.WriteString("}")
}
func (r *Latex) Emphasis(text []byte) {
r.w.WriteString("\\textit{")
r.w.Write(text)
r.w.WriteString("}")
}
func (r *Latex) Image(link []byte, title []byte, alt []byte) {
if bytes.HasPrefix(link, []byte("http://")) || bytes.HasPrefix(link, []byte("https://")) {
// treat it like a link
r.w.WriteString("\\href{")
r.w.Write(link)
r.w.WriteString("}{")
r.w.Write(alt)
r.w.WriteString("}")
} else {
r.w.WriteString("\\includegraphics{")
r.w.Write(link)
r.w.WriteString("}")
}
}
func (r *Latex) LineBreak() {
r.w.WriteString(" \\\\\n")
}
func (r *Latex) Link(link []byte, title []byte, content []byte) {
r.w.WriteString("\\href{")
r.w.Write(link)
r.w.WriteString("}{")
r.w.Write(content)
r.w.WriteString("}")
}
func (r *Latex) RawHtmlTag(tag []byte) {
}
func (r *Latex) TripleEmphasis(text []byte) {
r.w.WriteString("\\textbf{\\textit{")
r.w.Write(text)
r.w.WriteString("}}")
}
func (r *Latex) StrikeThrough(text []byte) {
r.w.WriteString("\\sout{")
r.w.Write(text)
r.w.WriteString("}")
}
// TODO: this
func (r *Latex) FootnoteRef(ref []byte, id int) {
}
func needsBackslash(c byte) bool {
for _, r := range []byte("_{}%$&\\~#") {
if c == r {
return true
}
}
return false
}
func (r *Latex) escapeSpecialChars(text []byte) {
for i := 0; i < len(text); i++ {
// directly copy normal characters
org := i
for i < len(text) && !needsBackslash(text[i]) {
i++
}
if i > org {
r.w.Write(text[org:i])
}
// escape a character
if i >= len(text) {
break
}
r.w.WriteByte('\\')
r.w.WriteByte(text[i])
}
}
func (r *Latex) Entity(entity []byte) {
// TODO: convert this into a unicode character or something
r.w.Write(entity)
}
func (r *Latex) NormalText(text []byte) {
r.escapeSpecialChars(text)
}
// header and footer
func (r *Latex) DocumentHeader() {
r.w.WriteString("\\documentclass{article}\n")
r.w.WriteString("\n")
r.w.WriteString("\\usepackage{graphicx}\n")
r.w.WriteString("\\usepackage{listings}\n")
r.w.WriteString("\\usepackage[margin=1in]{geometry}\n")
r.w.WriteString("\\usepackage[utf8]{inputenc}\n")
r.w.WriteString("\\usepackage{verbatim}\n")
r.w.WriteString("\\usepackage[normalem]{ulem}\n")
r.w.WriteString("\\usepackage{hyperref}\n")
r.w.WriteString("\n")
r.w.WriteString("\\hypersetup{colorlinks,%\n")
r.w.WriteString(" citecolor=black,%\n")
r.w.WriteString(" filecolor=black,%\n")
r.w.WriteString(" linkcolor=black,%\n")
r.w.WriteString(" urlcolor=black,%\n")
r.w.WriteString(" pdfstartview=FitH,%\n")
r.w.WriteString(" breaklinks=true,%\n")
r.w.WriteString(" pdfauthor={Blackfriday Markdown Processor v")
r.w.WriteString(Version)
r.w.WriteString("}}\n")
r.w.WriteString("\n")
r.w.WriteString("\\newcommand{\\HRule}{\\rule{\\linewidth}{0.5mm}}\n")
r.w.WriteString("\\addtolength{\\parskip}{0.5\\baselineskip}\n")
r.w.WriteString("\\parindent=0pt\n")
r.w.WriteString("\n")
r.w.WriteString("\\begin{document}\n")
}
func (r *Latex) DocumentFooter() {
r.w.WriteString("\n\\end{document}\n")
}
func (r *Latex) Render(ast *Node) []byte {
// TODO
return nil
}
func (r *Latex) RenderNode(w io.Writer, node *Node, entering bool) WalkStatus {
// TODO
return GoToNext
}

View File

@ -46,8 +46,6 @@ const (
AutoHeaderIDs // Create the header ID from the text
BackslashLineBreak // Translate trailing backslashes into line breaks
DefinitionLists // Render definition lists
TOC // Generate a table of contents
OmitContents // Skip the main contents (for a standalone table of contents)
CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants |
SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes
@ -86,7 +84,7 @@ type CellAlignFlags int
// Only a single one of these values will be used; they are not ORed together.
// These are mostly of interest if you are writing a new output format.
const (
TableAlignmentLeft = 1 << iota
TableAlignmentLeft CellAlignFlags = 1 << iota
TableAlignmentRight
TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight)
)
@ -153,7 +151,8 @@ var blockTags = map[string]struct{}{
// If the callback returns false, the rendering function should reset the
// output buffer as though it had never been called.
//
// Currently HTML and Latex implementations are provided
// Only an HTML implementation is provided in this repository,
// see the README for external implementations.
type Renderer interface {
Render(ast *Node) []byte
RenderNode(w io.Writer, node *Node, entering bool) WalkStatus
@ -213,14 +212,16 @@ func (p *parser) finalize(block *Node) {
}
func (p *parser) addChild(node NodeType, offset uint32) *Node {
for !p.tip.canContain(node) {
return p.addExistingChild(NewNode(node), offset)
}
func (p *parser) addExistingChild(node *Node, offset uint32) *Node {
for !p.tip.canContain(node.Type) {
p.finalize(p.tip)
}
newNode := NewNode(node)
newNode.content = []byte{}
p.tip.AppendChild(newNode)
p.tip = newNode
return newNode
p.tip.AppendChild(node)
p.tip = node
return node
}
func (p *parser) closeUnmatchedBlocks() {
@ -287,8 +288,7 @@ type Options struct {
func MarkdownBasic(input []byte) []byte {
// set up the HTML renderer
renderer := NewHTMLRenderer(HTMLRendererParameters{
Flags: UseXHTML,
Extensions: CommonExtensions,
Flags: UseXHTML,
})
// set up the parser
@ -316,8 +316,7 @@ func MarkdownBasic(input []byte) []byte {
func MarkdownCommon(input []byte) []byte {
// set up the HTML renderer
renderer := NewHTMLRenderer(HTMLRendererParameters{
Flags: CommonHTMLFlags,
Extensions: CommonExtensions,
Flags: CommonHTMLFlags,
})
return Markdown(input, renderer, DefaultOptions)
}
@ -327,8 +326,7 @@ func MarkdownCommon(input []byte) []byte {
// The supplied Renderer is used to format the output, and extensions dictates
// which non-standard extensions are enabled.
//
// To use the supplied HTML or LaTeX renderers, see NewHTMLRenderer and
// NewLatexRenderer, respectively.
// To use the supplied HTML renderer, see NewHTMLRenderer.
func Markdown(input []byte, renderer Renderer, options Options) []byte {
if renderer == nil {
return nil
@ -419,7 +417,8 @@ func (p *parser) parseRefsToAST() {
// the fixed initial set.
for i := 0; i < len(p.notes); i++ {
ref := p.notes[i]
block := p.addBlock(Item, nil)
p.addExistingChild(ref.footnote, 0)
block := ref.footnote
block.ListFlags = flags | ListTypeOrdered
block.RefLink = ref.link
if ref.hasBlock {
@ -513,6 +512,7 @@ type reference struct {
title []byte
noteID int // 0 if not a footnote ref
hasBlock bool
footnote *Node // a link to the Item node within a list of footnotes
text []byte // only gets populated by refOverride feature with Reference.Text
}

View File

@ -84,6 +84,7 @@ type LinkData struct {
Destination []byte // Destination is what goes into a href
Title []byte // Title is the tooltip thing that goes in a title attribute
NoteID int // NoteID contains a serial number of a footnote, zero if it's not a footnote
Footnote *Node // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
}
// CodeBlockData contains fields relevant to a CodeBlock node type.
@ -277,8 +278,8 @@ type NodeVisitor func(node *Node, entering bool) WalkStatus
// Walk is a convenience method that instantiates a walker and starts a
// traversal of subtree rooted at n.
func (root *Node) Walk(visitor NodeVisitor) {
w := newNodeWalker(root)
func (n *Node) Walk(visitor NodeVisitor) {
w := newNodeWalker(n)
for w.current != nil {
status := visitor(w.current, w.entering)
switch status {
@ -308,7 +309,7 @@ func newNodeWalker(root *Node) *nodeWalker {
}
func (nw *nodeWalker) next() {
if !nw.entering && nw.current == nw.root {
if (!nw.current.isContainer() || !nw.entering) && nw.current == nw.root {
nw.current = nil
return
}