Document SanitizedAnchorName algorithm, copy implementation. (#509)
The goal of this change is to reduce number of non-standard library packages (repositories) that blackfriday imports (not counting imports used only for tests) from 1 to 0, and in turn, reduce the cost of importing blackfriday into other projects. Do so by documenting the algorithm of SanitizedAnchorName, and include a copy of the small function inside blackfriday itself. The same functionality continues to be available in the original location, github.com/shurcooL/sanitized_anchor_name.Create. It can be used by existing users and those that look for a small package, and don't need all of blackfriday functionality. Existing users of blackfriday can use the new SanitizedAnchorName function directly and avoid an extra package import. This change is a port of PR #352 from v1 into v2. Updates #348. Updates #350.pull/515/head
parent
535ad76f61
commit
919b1f5b9b
18
README.md
18
README.md
|
@ -104,6 +104,8 @@ html := bluemonday.UGCPolicy().SanitizeBytes(unsafe)
|
|||
If you want to customize the set of options, use `blackfriday.WithExtensions`,
|
||||
`blackfriday.WithRenderer` and `blackfriday.WithRefOverride`.
|
||||
|
||||
### `blackfriday-tool`
|
||||
|
||||
You can also check out `blackfriday-tool` for a more complete example
|
||||
of how to use it. Download and install it using:
|
||||
|
||||
|
@ -123,6 +125,22 @@ installed in `$GOPATH/bin`. This is a statically-linked binary that
|
|||
can be copied to wherever you need it without worrying about
|
||||
dependencies and library versions.
|
||||
|
||||
### Sanitized anchor names
|
||||
|
||||
Blackfriday includes an algorithm for creating sanitized anchor names
|
||||
corresponding to a given input text. This algorithm is used to create
|
||||
anchors for headings when `AutoHeadingIDs` extension is enabled. The
|
||||
algorithm has a specification, so that other packages can create
|
||||
compatible anchor names and links to those anchors.
|
||||
|
||||
The specification is located at https://godoc.org/gopkg.in/russross/blackfriday.v2#hdr-Sanitized_Anchor_Names.
|
||||
|
||||
[`SanitizedAnchorName`](https://godoc.org/gopkg.in/russross/blackfriday.v2#SanitizedAnchorName) exposes this functionality, and can be used to
|
||||
create compatible links to the anchor names generated by blackfriday.
|
||||
This algorithm is also implemented in a small standalone package at
|
||||
[`github.com/shurcooL/sanitized_anchor_name`](https://godoc.org/github.com/shurcooL/sanitized_anchor_name). It can be useful for clients
|
||||
that want a small package and don't need full functionality of blackfriday.
|
||||
|
||||
|
||||
Features
|
||||
--------
|
||||
|
|
28
block.go
28
block.go
|
@ -18,8 +18,7 @@ import (
|
|||
"html"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/shurcooL/sanitized_anchor_name"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -259,7 +258,7 @@ func (p *Markdown) prefixHeading(data []byte) int {
|
|||
}
|
||||
if end > i {
|
||||
if id == "" && p.extensions&AutoHeadingIDs != 0 {
|
||||
id = sanitized_anchor_name.Create(string(data[i:end]))
|
||||
id = SanitizedAnchorName(string(data[i:end]))
|
||||
}
|
||||
block := p.addBlock(Heading, data[i:end])
|
||||
block.HeadingID = id
|
||||
|
@ -1505,7 +1504,7 @@ func (p *Markdown) paragraph(data []byte) int {
|
|||
|
||||
id := ""
|
||||
if p.extensions&AutoHeadingIDs != 0 {
|
||||
id = sanitized_anchor_name.Create(string(data[prev:eol]))
|
||||
id = SanitizedAnchorName(string(data[prev:eol]))
|
||||
}
|
||||
|
||||
block := p.addBlock(Heading, data[prev:eol])
|
||||
|
@ -1590,3 +1589,24 @@ func skipUntilChar(text []byte, start int, char byte) int {
|
|||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// SanitizedAnchorName returns a sanitized anchor name for the given text.
|
||||
//
|
||||
// It implements the algorithm specified in the package comment.
|
||||
func SanitizedAnchorName(text string) string {
|
||||
var anchorName []rune
|
||||
futureDash := false
|
||||
for _, r := range text {
|
||||
switch {
|
||||
case unicode.IsLetter(r) || unicode.IsNumber(r):
|
||||
if futureDash && len(anchorName) > 0 {
|
||||
anchorName = append(anchorName, '-')
|
||||
}
|
||||
futureDash = false
|
||||
anchorName = append(anchorName, unicode.ToLower(r))
|
||||
default:
|
||||
futureDash = true
|
||||
}
|
||||
}
|
||||
return string(anchorName)
|
||||
}
|
||||
|
|
|
@ -1875,3 +1875,44 @@ func TestIsFenceLine(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizedAnchorName(t *testing.T) {
|
||||
tests := []struct {
|
||||
text string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
text: "This is a header",
|
||||
want: "this-is-a-header",
|
||||
},
|
||||
{
|
||||
text: "This is also a header",
|
||||
want: "this-is-also-a-header",
|
||||
},
|
||||
{
|
||||
text: "main.go",
|
||||
want: "main-go",
|
||||
},
|
||||
{
|
||||
text: "Article 123",
|
||||
want: "article-123",
|
||||
},
|
||||
{
|
||||
text: "<- Let's try this, shall we?",
|
||||
want: "let-s-try-this-shall-we",
|
||||
},
|
||||
{
|
||||
text: " ",
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
text: "Hello, 世界",
|
||||
want: "hello-世界",
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
if got := SanitizedAnchorName(test.text); got != test.want {
|
||||
t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
28
doc.go
28
doc.go
|
@ -15,4 +15,32 @@
|
|||
//
|
||||
// If you're interested in calling Blackfriday from command line, see
|
||||
// https://github.com/russross/blackfriday-tool.
|
||||
//
|
||||
// Sanitized Anchor Names
|
||||
//
|
||||
// Blackfriday includes an algorithm for creating sanitized anchor names
|
||||
// corresponding to a given input text. This algorithm is used to create
|
||||
// anchors for headings when AutoHeadingIDs extension is enabled. The
|
||||
// algorithm is specified below, so that other packages can create
|
||||
// compatible anchor names and links to those anchors.
|
||||
//
|
||||
// The algorithm iterates over the input text, interpreted as UTF-8,
|
||||
// one Unicode code point (rune) at a time. All runes that are letters (category L)
|
||||
// or numbers (category N) are considered valid characters. They are mapped to
|
||||
// lower case, and included in the output. All other runes are considered
|
||||
// invalid characters. Invalid characters that precede the first valid character,
|
||||
// as well as invalid character that follow the last valid character
|
||||
// are dropped completely. All other sequences of invalid characters
|
||||
// between two valid characters are replaced with a single dash character '-'.
|
||||
//
|
||||
// SanitizedAnchorName exposes this functionality, and can be used to
|
||||
// create compatible links to the anchor names generated by blackfriday.
|
||||
// This algorithm is also implemented in a small standalone package at
|
||||
// github.com/shurcooL/sanitized_anchor_name. It can be useful for clients
|
||||
// that want a small package and don't need full functionality of blackfriday.
|
||||
package blackfriday
|
||||
|
||||
// NOTE: Keep Sanitized Anchor Name algorithm in sync with package
|
||||
// github.com/shurcooL/sanitized_anchor_name.
|
||||
// Otherwise, users of sanitized_anchor_name will get anchor names
|
||||
// that are incompatible with those generated by blackfriday.
|
||||
|
|
5
go.mod
5
go.mod
|
@ -1,6 +1,3 @@
|
|||
module github.com/russross/blackfriday/v2
|
||||
|
||||
require (
|
||||
github.com/pmezard/go-difflib v1.0.0
|
||||
github.com/shurcooL/sanitized_anchor_name v0.0.0-20170918181015-86672fcb3f95
|
||||
)
|
||||
require github.com/pmezard/go-difflib v1.0.0
|
||||
|
|
2
go.sum
2
go.sum
|
@ -1,4 +1,2 @@
|
|||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/shurcooL/sanitized_anchor_name v0.0.0-20170918181015-86672fcb3f95 h1:/vdW8Cb7EXrkqWGufVMES1OH2sU9gKVb2n9/1y5NMBY=
|
||||
github.com/shurcooL/sanitized_anchor_name v0.0.0-20170918181015-86672fcb3f95/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||
|
|
Loading…
Reference in New Issue