From 0ba0f2b6ed7c475a92e4df8641825cb7a11d1fa3 Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Tue, 9 May 2017 02:07:14 -0400 Subject: [PATCH] Document SanitizedAnchorName algorithm, copy implementation. (#352) The goal of this change is to reduce number of non-standard library packages (repositories) that blackfriday imports from 1 to 0, and in turn, reduce the cost of importing blackfriday into other projects. Do so by documenting the algorithm of SanitizedAnchorName, and include a copy of the small function inside blackfriday itself. The same functionality continues to be available in the original location, github.com/shurcooL/sanitized_anchor_name.Create. It can be used by existing users and those that look for a small package, and don't need all of blackfriday functionality. Existing users of blackfriday can use the new SanitizedAnchorName function directly and avoid an extra package import. Resolves #350. --- README.md | 16 ++++++++++++++++ block.go | 28 ++++++++++++++++++++++++---- block_test.go | 41 +++++++++++++++++++++++++++++++++++++++++ doc.go | 32 ++++++++++++++++++++++++++++++++ markdown.go | 3 --- 5 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 doc.go diff --git a/README.md b/README.md index 3c113cb..0d1ac9a 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,22 @@ installed in `$GOPATH/bin`. This is a statically-linked binary that can be copied to wherever you need it without worrying about dependencies and library versions. +### Sanitized anchor names + +Blackfriday includes an algorithm for creating sanitized anchor names +corresponding to a given input text. This algorithm is used to create +anchors for headings when `EXTENSION_AUTO_HEADER_IDS` is enabled. The +algorithm has a specification, so that other packages can create +compatible anchor names and links to those anchors. + +The specification is located at https://godoc.org/github.com/russross/blackfriday#hdr-Sanitized_Anchor_Names. + +[`SanitizedAnchorName`](https://godoc.org/github.com/russross/blackfriday#SanitizedAnchorName) exposes this functionality, and can be used to +create compatible links to the anchor names generated by blackfriday. +This algorithm is also implemented in a small standalone package at +[`github.com/shurcooL/sanitized_anchor_name`](https://godoc.org/github.com/shurcooL/sanitized_anchor_name). It can be useful for clients +that want a small package and don't need full functionality of blackfriday. + Features -------- diff --git a/block.go b/block.go index 9cf451f..7fc731d 100644 --- a/block.go +++ b/block.go @@ -15,8 +15,7 @@ package blackfriday import ( "bytes" - - "github.com/shurcooL/sanitized_anchor_name" + "unicode" ) // Parse block-level data. @@ -243,7 +242,7 @@ func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int { } if end > i { if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { - id = sanitized_anchor_name.Create(string(data[i:end])) + id = SanitizedAnchorName(string(data[i:end])) } work := func() bool { p.inline(out, data[i:end]) @@ -1364,7 +1363,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { id := "" if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 { - id = sanitized_anchor_name.Create(string(data[prev:eol])) + id = SanitizedAnchorName(string(data[prev:eol])) } p.r.Header(out, work, level, id) @@ -1428,3 +1427,24 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int { p.renderParagraph(out, data[:i]) return i } + +// SanitizedAnchorName returns a sanitized anchor name for the given text. +// +// It implements the algorithm specified in the package comment. +func SanitizedAnchorName(text string) string { + var anchorName []rune + futureDash := false + for _, r := range text { + switch { + case unicode.IsLetter(r) || unicode.IsNumber(r): + if futureDash && len(anchorName) > 0 { + anchorName = append(anchorName, '-') + } + futureDash = false + anchorName = append(anchorName, unicode.ToLower(r)) + default: + futureDash = true + } + } + return string(anchorName) +} diff --git a/block_test.go b/block_test.go index 83ed2e1..89d5775 100644 --- a/block_test.go +++ b/block_test.go @@ -1738,3 +1738,44 @@ func TestJoinLines(t *testing.T) { t.Error("output dose not match.") } } + +func TestSanitizedAnchorName(t *testing.T) { + tests := []struct { + text string + want string + }{ + { + text: "This is a header", + want: "this-is-a-header", + }, + { + text: "This is also a header", + want: "this-is-also-a-header", + }, + { + text: "main.go", + want: "main-go", + }, + { + text: "Article 123", + want: "article-123", + }, + { + text: "<- Let's try this, shall we?", + want: "let-s-try-this-shall-we", + }, + { + text: " ", + want: "", + }, + { + text: "Hello, 世界", + want: "hello-世界", + }, + } + for _, test := range tests { + if got := SanitizedAnchorName(test.text); got != test.want { + t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want) + } + } +} diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..9656c42 --- /dev/null +++ b/doc.go @@ -0,0 +1,32 @@ +// Package blackfriday is a Markdown processor. +// +// It translates plain text with simple formatting rules into HTML or LaTeX. +// +// Sanitized Anchor Names +// +// Blackfriday includes an algorithm for creating sanitized anchor names +// corresponding to a given input text. This algorithm is used to create +// anchors for headings when EXTENSION_AUTO_HEADER_IDS is enabled. The +// algorithm is specified below, so that other packages can create +// compatible anchor names and links to those anchors. +// +// The algorithm iterates over the input text, interpreted as UTF-8, +// one Unicode code point (rune) at a time. All runes that are letters (category L) +// or numbers (category N) are considered valid characters. They are mapped to +// lower case, and included in the output. All other runes are considered +// invalid characters. Invalid characters that preceed the first valid character, +// as well as invalid character that follow the last valid character +// are dropped completely. All other sequences of invalid characters +// between two valid characters are replaced with a single dash character '-'. +// +// SanitizedAnchorName exposes this functionality, and can be used to +// create compatible links to the anchor names generated by blackfriday. +// This algorithm is also implemented in a small standalone package at +// github.com/shurcooL/sanitized_anchor_name. It can be useful for clients +// that want a small package and don't need full functionality of blackfriday. +package blackfriday + +// NOTE: Keep Sanitized Anchor Name algorithm in sync with package +// github.com/shurcooL/sanitized_anchor_name. +// Otherwise, users of sanitized_anchor_name will get anchor names +// that are incompatible with those generated by blackfriday. diff --git a/markdown.go b/markdown.go index 09fc2d7..6d842d3 100644 --- a/markdown.go +++ b/markdown.go @@ -13,9 +13,6 @@ // // -// Blackfriday markdown processor. -// -// Translates plain text with simple formatting rules into HTML or LaTeX. package blackfriday import (