Prevent generated header collisions, less naively.

> This is a rework of an earlier version of this code.

The automatic header ID generation code submitted in #125 has a subtle
bug where it will use the same ID for multiple headers with identical
text. In the case below, all the headers are rendered a `<h1
id="header">Header</h1>`.

  ```markdown
  # Header
  # Header
  # Header
  # Header
  ```

This change is a simple but robust approach that uses an incrementing
counter and pre-checking to prevent header collision. (The above would
be rendered as `header`, `header-1`, `header-2`, and `header-3`.) In
more complex cases, it will append a new counter suffix (`-1`), like so:

  ```markdown
  # Header
  # Header 1
  # Header
  # Header
  ```

This will generate `header`, `header-1`, `header-1-1`, and `header-1-2`.

This code has two additional changes over the prior version:

1.  Rather than reimplementing @shurcooL’s anchor sanitization code, I
    have imported it as from
    `github.com/shurcooL/go/github_flavored_markdown/sanitized_anchor_name`.

2.  The markdown block parser is now only interested in *generating* a
    sanitized anchor name, not with ensuring its uniqueness. That code
    has been moved to the HTML renderer. This means that if the HTML
    renderer is modified to identify all unique headers prior to
    rendering, the hackish nature of the collision detection can be
    eliminated.
This commit is contained in:
Austin Ziegler 2014-11-01 18:35:35 -04:00
parent 7c8f3c1dcc
commit 40f28ee022
3 changed files with 55 additions and 20 deletions

View File

@ -15,7 +15,7 @@ package blackfriday
import ( import (
"bytes" "bytes"
"unicode" "github.com/shurcooL/go/github_flavored_markdown/sanitized_anchor_name"
) )
// Parse block-level data. // Parse block-level data.
@ -227,7 +227,7 @@ func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
} }
if end > i { if end > i {
if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
id = createSanitizedAnchorName(string(data[i:end])) id = sanitized_anchor_name.Create(string(data[i:end]))
} }
work := func() bool { work := func() bool {
p.inline(out, data[i:end]) p.inline(out, data[i:end])
@ -1276,7 +1276,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
id := "" id := ""
if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
id = createSanitizedAnchorName(string(data[prev:eol])) id = sanitized_anchor_name.Create(string(data[prev:eol]))
} }
p.r.Header(out, work, level, id) p.r.Header(out, work, level, id)
@ -1325,16 +1325,3 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
p.renderParagraph(out, data[:i]) p.renderParagraph(out, data[:i])
return i return i
} }
func createSanitizedAnchorName(text string) string {
var anchorName []rune
for _, r := range []rune(text) {
switch {
case r == ' ':
anchorName = append(anchorName, '-')
case unicode.IsLetter(r) || unicode.IsNumber(r):
anchorName = append(anchorName, unicode.ToLower(r))
}
}
return string(anchorName)
}

View File

@ -275,10 +275,27 @@ func TestPrefixAutoHeaderIdExtension(t *testing.T) {
"* List\n * Nested list\n # Nested header\n", "* List\n * Nested list\n # Nested header\n",
"<ul>\n<li><p>List</p>\n\n<ul>\n<li><p>Nested list</p>\n\n" + "<ul>\n<li><p>List</p>\n\n<ul>\n<li><p>Nested list</p>\n\n" +
"<h1 id=\"nested-header\">Nested header</h1></li>\n</ul></li>\n</ul>\n", "<h1 id=\"nested-header\">Nested header</h1></li>\n</ul></li>\n</ul>\n",
"# Header\n\n# Header\n",
"<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header</h1>\n",
"# Header 1\n\n# Header 1",
"<h1 id=\"header-1\">Header 1</h1>\n\n<h1 id=\"header-1-1\">Header 1</h1>\n",
"# Header\n\n# Header 1\n\n# Header\n\n# Header",
"<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header 1</h1>\n\n<h1 id=\"header-1-1\">Header</h1>\n\n<h1 id=\"header-1-2\">Header</h1>\n",
} }
doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS) doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS)
} }
func TestPrefixMultipleHeaderExtensions(t *testing.T) {
var tests = []string{
"# Header\n\n# Header {#header}\n\n# Header 1",
"<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header</h1>\n\n<h1 id=\"header-1-1\">Header 1</h1>\n",
}
doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS|EXTENSION_HEADER_IDS)
}
func TestUnderlineHeaders(t *testing.T) { func TestUnderlineHeaders(t *testing.T) {
var tests = []string{ var tests = []string{
"Header 1\n========\n", "Header 1\n========\n",
@ -369,6 +386,12 @@ func TestUnderlineHeadersAutoIDs(t *testing.T) {
"Double underline\n=====\n=====\n", "Double underline\n=====\n=====\n",
"<h1 id=\"double-underline\">Double underline</h1>\n\n<p>=====</p>\n", "<h1 id=\"double-underline\">Double underline</h1>\n\n<p>=====</p>\n",
"Header\n======\n\nHeader\n======\n",
"<h1 id=\"header\">Header</h1>\n\n<h1 id=\"header-1\">Header</h1>\n",
"Header 1\n========\n\nHeader 1\n========\n",
"<h1 id=\"header-1\">Header 1</h1>\n\n<h1 id=\"header-1-1\">Header 1</h1>\n",
} }
doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS) doTestsBlock(t, tests, EXTENSION_AUTO_HEADER_IDS)
} }

33
html.go
View File

@ -81,6 +81,9 @@ type Html struct {
currentLevel int currentLevel int
toc *bytes.Buffer toc *bytes.Buffer
// Track header IDs to prevent ID collision in a single generation.
headerIDs map[string]int
smartypants *smartypantsRenderer smartypants *smartypantsRenderer
} }
@ -123,6 +126,8 @@ func HtmlRendererWithParameters(flags int, title string,
currentLevel: 0, currentLevel: 0,
toc: new(bytes.Buffer), toc: new(bytes.Buffer),
headerIDs: make(map[string]int),
smartypants: smartypants(flags), smartypants: smartypants(flags),
} }
} }
@ -190,11 +195,12 @@ func (options *Html) Header(out *bytes.Buffer, text func() bool, level int, id s
marker := out.Len() marker := out.Len()
doubleSpace(out) doubleSpace(out)
if id == "" && options.flags&HTML_TOC != 0 {
id = fmt.Sprintf("toc_%d", options.headerCount)
}
if id != "" { if id != "" {
out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, id)) out.WriteString(fmt.Sprintf("<h%d id=\"%s\">", level, options.ensureUniqueHeaderID(id)))
} else if options.flags&HTML_TOC != 0 {
// headerCount is incremented in htmlTocHeader
out.WriteString(fmt.Sprintf("<h%d id=\"toc_%d\">", level, options.headerCount))
} else { } else {
out.WriteString(fmt.Sprintf("<h%d>", level)) out.WriteString(fmt.Sprintf("<h%d>", level))
} }
@ -853,3 +859,22 @@ func isRelativeLink(link []byte) (yes bool) {
} }
return return
} }
func (options *Html) ensureUniqueHeaderID(id string) string {
for count, found := options.headerIDs[id]; found; count, found = options.headerIDs[id] {
tmp := fmt.Sprintf("%s-%d", id, count+1)
if _, tmpFound := options.headerIDs[tmp]; !tmpFound {
options.headerIDs[id] = count + 1
id = tmp
} else {
id = id + "-1"
}
}
if _, found := options.headerIDs[id]; !found {
options.headerIDs[id] = 0
}
return id
}