Document SanitizedAnchorName algorithm, copy implementation. (#352)

The goal of this change is to reduce number of non-standard library
packages (repositories) that blackfriday imports from 1 to 0, and in
turn, reduce the cost of importing blackfriday into other projects.

Do so by documenting the algorithm of SanitizedAnchorName, and include
a copy of the small function inside blackfriday itself. The same
functionality continues to be available in the original location,
github.com/shurcooL/sanitized_anchor_name.Create. It can be used by
existing users and those that look for a small package, and don't need
all of blackfriday functionality. Existing users of blackfriday can use
the new SanitizedAnchorName function directly and avoid an extra
package import.

Resolves #350.
pull/343/merge
Dmitri Shuralyov 2017-05-09 02:07:14 -04:00 committed by GitHub
parent b253417e1c
commit 0ba0f2b6ed
5 changed files with 113 additions and 7 deletions

View File

@ -93,6 +93,22 @@ installed in `$GOPATH/bin`. This is a statically-linked binary that
can be copied to wherever you need it without worrying about can be copied to wherever you need it without worrying about
dependencies and library versions. dependencies and library versions.
### Sanitized anchor names
Blackfriday includes an algorithm for creating sanitized anchor names
corresponding to a given input text. This algorithm is used to create
anchors for headings when `EXTENSION_AUTO_HEADER_IDS` is enabled. The
algorithm has a specification, so that other packages can create
compatible anchor names and links to those anchors.
The specification is located at https://godoc.org/github.com/russross/blackfriday#hdr-Sanitized_Anchor_Names.
[`SanitizedAnchorName`](https://godoc.org/github.com/russross/blackfriday#SanitizedAnchorName) exposes this functionality, and can be used to
create compatible links to the anchor names generated by blackfriday.
This algorithm is also implemented in a small standalone package at
[`github.com/shurcooL/sanitized_anchor_name`](https://godoc.org/github.com/shurcooL/sanitized_anchor_name). It can be useful for clients
that want a small package and don't need full functionality of blackfriday.
Features Features
-------- --------

View File

@ -15,8 +15,7 @@ package blackfriday
import ( import (
"bytes" "bytes"
"unicode"
"github.com/shurcooL/sanitized_anchor_name"
) )
// Parse block-level data. // Parse block-level data.
@ -243,7 +242,7 @@ func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
} }
if end > i { if end > i {
if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
id = sanitized_anchor_name.Create(string(data[i:end])) id = SanitizedAnchorName(string(data[i:end]))
} }
work := func() bool { work := func() bool {
p.inline(out, data[i:end]) p.inline(out, data[i:end])
@ -1364,7 +1363,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
id := "" id := ""
if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
id = sanitized_anchor_name.Create(string(data[prev:eol])) id = SanitizedAnchorName(string(data[prev:eol]))
} }
p.r.Header(out, work, level, id) p.r.Header(out, work, level, id)
@ -1428,3 +1427,24 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
p.renderParagraph(out, data[:i]) p.renderParagraph(out, data[:i])
return i return i
} }
// SanitizedAnchorName returns a sanitized anchor name for the given text.
//
// It implements the algorithm specified in the package comment.
func SanitizedAnchorName(text string) string {
var anchorName []rune
futureDash := false
for _, r := range text {
switch {
case unicode.IsLetter(r) || unicode.IsNumber(r):
if futureDash && len(anchorName) > 0 {
anchorName = append(anchorName, '-')
}
futureDash = false
anchorName = append(anchorName, unicode.ToLower(r))
default:
futureDash = true
}
}
return string(anchorName)
}

View File

@ -1738,3 +1738,44 @@ func TestJoinLines(t *testing.T) {
t.Error("output dose not match.") t.Error("output dose not match.")
} }
} }
func TestSanitizedAnchorName(t *testing.T) {
tests := []struct {
text string
want string
}{
{
text: "This is a header",
want: "this-is-a-header",
},
{
text: "This is also a header",
want: "this-is-also-a-header",
},
{
text: "main.go",
want: "main-go",
},
{
text: "Article 123",
want: "article-123",
},
{
text: "<- Let's try this, shall we?",
want: "let-s-try-this-shall-we",
},
{
text: " ",
want: "",
},
{
text: "Hello, 世界",
want: "hello-世界",
},
}
for _, test := range tests {
if got := SanitizedAnchorName(test.text); got != test.want {
t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
}
}
}

32
doc.go Normal file
View File

@ -0,0 +1,32 @@
// Package blackfriday is a Markdown processor.
//
// It translates plain text with simple formatting rules into HTML or LaTeX.
//
// Sanitized Anchor Names
//
// Blackfriday includes an algorithm for creating sanitized anchor names
// corresponding to a given input text. This algorithm is used to create
// anchors for headings when EXTENSION_AUTO_HEADER_IDS is enabled. The
// algorithm is specified below, so that other packages can create
// compatible anchor names and links to those anchors.
//
// The algorithm iterates over the input text, interpreted as UTF-8,
// one Unicode code point (rune) at a time. All runes that are letters (category L)
// or numbers (category N) are considered valid characters. They are mapped to
// lower case, and included in the output. All other runes are considered
// invalid characters. Invalid characters that preceed the first valid character,
// as well as invalid character that follow the last valid character
// are dropped completely. All other sequences of invalid characters
// between two valid characters are replaced with a single dash character '-'.
//
// SanitizedAnchorName exposes this functionality, and can be used to
// create compatible links to the anchor names generated by blackfriday.
// This algorithm is also implemented in a small standalone package at
// github.com/shurcooL/sanitized_anchor_name. It can be useful for clients
// that want a small package and don't need full functionality of blackfriday.
package blackfriday
// NOTE: Keep Sanitized Anchor Name algorithm in sync with package
// github.com/shurcooL/sanitized_anchor_name.
// Otherwise, users of sanitized_anchor_name will get anchor names
// that are incompatible with those generated by blackfriday.

View File

@ -13,9 +13,6 @@
// //
// //
// Blackfriday markdown processor.
//
// Translates plain text with simple formatting rules into HTML or LaTeX.
package blackfriday package blackfriday
import ( import (