Document SanitizedAnchorName algorithm, copy implementation. (#352)

The goal of this change is to reduce number of non-standard library
packages (repositories) that blackfriday imports from 1 to 0, and in
turn, reduce the cost of importing blackfriday into other projects.

Do so by documenting the algorithm of SanitizedAnchorName, and include
a copy of the small function inside blackfriday itself. The same
functionality continues to be available in the original location,
github.com/shurcooL/sanitized_anchor_name.Create. It can be used by
existing users and those that look for a small package, and don't need
all of blackfriday functionality. Existing users of blackfriday can use
the new SanitizedAnchorName function directly and avoid an extra
package import.

Resolves #350.
pull/343/merge
Dmitri Shuralyov 2017-05-09 02:07:14 -04:00 committed by GitHub
parent b253417e1c
commit 0ba0f2b6ed
5 changed files with 113 additions and 7 deletions

View File

@ -93,6 +93,22 @@ installed in `$GOPATH/bin`. This is a statically-linked binary that
can be copied to wherever you need it without worrying about
dependencies and library versions.
### Sanitized anchor names
Blackfriday includes an algorithm for creating sanitized anchor names
corresponding to a given input text. This algorithm is used to create
anchors for headings when `EXTENSION_AUTO_HEADER_IDS` is enabled. The
algorithm has a specification, so that other packages can create
compatible anchor names and links to those anchors.
The specification is located at https://godoc.org/github.com/russross/blackfriday#hdr-Sanitized_Anchor_Names.
[`SanitizedAnchorName`](https://godoc.org/github.com/russross/blackfriday#SanitizedAnchorName) exposes this functionality, and can be used to
create compatible links to the anchor names generated by blackfriday.
This algorithm is also implemented in a small standalone package at
[`github.com/shurcooL/sanitized_anchor_name`](https://godoc.org/github.com/shurcooL/sanitized_anchor_name). It can be useful for clients
that want a small package and don't need full functionality of blackfriday.
Features
--------

View File

@ -15,8 +15,7 @@ package blackfriday
import (
"bytes"
"github.com/shurcooL/sanitized_anchor_name"
"unicode"
)
// Parse block-level data.
@ -243,7 +242,7 @@ func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
}
if end > i {
if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
id = sanitized_anchor_name.Create(string(data[i:end]))
id = SanitizedAnchorName(string(data[i:end]))
}
work := func() bool {
p.inline(out, data[i:end])
@ -1364,7 +1363,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
id := ""
if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
id = sanitized_anchor_name.Create(string(data[prev:eol]))
id = SanitizedAnchorName(string(data[prev:eol]))
}
p.r.Header(out, work, level, id)
@ -1428,3 +1427,24 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
p.renderParagraph(out, data[:i])
return i
}
// SanitizedAnchorName returns a sanitized anchor name for the given text.
//
// It implements the algorithm specified in the package comment.
func SanitizedAnchorName(text string) string {
var anchorName []rune
futureDash := false
for _, r := range text {
switch {
case unicode.IsLetter(r) || unicode.IsNumber(r):
if futureDash && len(anchorName) > 0 {
anchorName = append(anchorName, '-')
}
futureDash = false
anchorName = append(anchorName, unicode.ToLower(r))
default:
futureDash = true
}
}
return string(anchorName)
}

View File

@ -1738,3 +1738,44 @@ func TestJoinLines(t *testing.T) {
t.Error("output dose not match.")
}
}
func TestSanitizedAnchorName(t *testing.T) {
tests := []struct {
text string
want string
}{
{
text: "This is a header",
want: "this-is-a-header",
},
{
text: "This is also a header",
want: "this-is-also-a-header",
},
{
text: "main.go",
want: "main-go",
},
{
text: "Article 123",
want: "article-123",
},
{
text: "<- Let's try this, shall we?",
want: "let-s-try-this-shall-we",
},
{
text: " ",
want: "",
},
{
text: "Hello, 世界",
want: "hello-世界",
},
}
for _, test := range tests {
if got := SanitizedAnchorName(test.text); got != test.want {
t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
}
}
}

32
doc.go Normal file
View File

@ -0,0 +1,32 @@
// Package blackfriday is a Markdown processor.
//
// It translates plain text with simple formatting rules into HTML or LaTeX.
//
// Sanitized Anchor Names
//
// Blackfriday includes an algorithm for creating sanitized anchor names
// corresponding to a given input text. This algorithm is used to create
// anchors for headings when EXTENSION_AUTO_HEADER_IDS is enabled. The
// algorithm is specified below, so that other packages can create
// compatible anchor names and links to those anchors.
//
// The algorithm iterates over the input text, interpreted as UTF-8,
// one Unicode code point (rune) at a time. All runes that are letters (category L)
// or numbers (category N) are considered valid characters. They are mapped to
// lower case, and included in the output. All other runes are considered
// invalid characters. Invalid characters that preceed the first valid character,
// as well as invalid character that follow the last valid character
// are dropped completely. All other sequences of invalid characters
// between two valid characters are replaced with a single dash character '-'.
//
// SanitizedAnchorName exposes this functionality, and can be used to
// create compatible links to the anchor names generated by blackfriday.
// This algorithm is also implemented in a small standalone package at
// github.com/shurcooL/sanitized_anchor_name. It can be useful for clients
// that want a small package and don't need full functionality of blackfriday.
package blackfriday
// NOTE: Keep Sanitized Anchor Name algorithm in sync with package
// github.com/shurcooL/sanitized_anchor_name.
// Otherwise, users of sanitized_anchor_name will get anchor names
// that are incompatible with those generated by blackfriday.

View File

@ -13,9 +13,6 @@
//
//
// Blackfriday markdown processor.
//
// Translates plain text with simple formatting rules into HTML or LaTeX.
package blackfriday
import (