Implement a furigana extension based on the syntax used in https://github.com/djfun/furigana_markdown

See https://discourse.gohugo.io/t/using-furigana-ruby-with-markdown/15156/4 to understand what this is for.
pull/578/head
Elia Argentieri 2019-12-04 11:53:04 +01:00
parent a925a152c1
commit 4dbb880a18
5 changed files with 151 additions and 1 deletions

12
html.go
View File

@ -492,6 +492,18 @@ func (options *Html) Emphasis(out *bytes.Buffer, text []byte) {
out.WriteString("</em>")
}
func (options *Html) Furigana(out *bytes.Buffer, kanji []byte, furigana []byte) {
if len(kanji) == 0 {
return
}
out.WriteString("<ruby lang=\"ja\"><rb>")
out.Write(kanji)
out.WriteString("</rb><rp></rp><rt>")
out.Write(furigana)
out.WriteString("</rt><rp></rp></ruby>")
}
func (options *Html) maybeWriteAbsolutePrefix(out *bytes.Buffer, link []byte) {
if options.parameters.AbsolutePrefix != "" && isRelativeLink(link) && link[0] != '.' {
out.WriteString(options.parameters.AbsolutePrefix)

View File

@ -17,6 +17,7 @@ import (
"bytes"
"regexp"
"strconv"
"unicode/utf8"
)
var (
@ -109,6 +110,99 @@ func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int {
return 0
}
// '': furigana parsing
func paren_furigana(p *parser, out *bytes.Buffer, data []byte, offset int) int {
data = data[offset:]
parenthesis, kanjiStart := utf8.DecodeRune(data)
kanjiEnd := kanjiStart
ret := kanjiStart
for kanjiEnd < len(data) {
runeValue, width := utf8.DecodeRune(data[kanjiEnd:])
ret += width
if parenthesis == '' && runeValue == '' {
break
}
kanjiEnd += width
}
if kanjiEnd <= kanjiStart || ret >= len(data) {
return 0
}
parenthesis, width := utf8.DecodeRune(data[ret:])
ret += width
furiganaStart := ret
furiganaEnd := furiganaStart
for furiganaEnd < len(data) {
runeValue, width := utf8.DecodeRune(data[furiganaEnd:])
ret += width
if parenthesis == '' && runeValue == '' {
break
}
furiganaEnd += width
}
if furiganaEnd <= furiganaStart || furiganaEnd >= len(data) {
return 0
}
p.r.Furigana(out, data[kanjiStart:kanjiEnd], data[furiganaStart:furiganaEnd])
return ret
}
// kanji furigana parsing
func kanji_furigana(p *parser, out *bytes.Buffer, data []byte, offset int) int {
data = data[offset:]
kanjiEnd := 0
furiganaStart := 0
furiganaEnd := 0
ret := 0
for ret < len(data) {
runeValue, width := utf8.DecodeRune(data[ret:])
ret += width
if runeValue == '' {
furiganaStart = ret
break
} else if (runeValue < 0x4E00 || 0x9FEF < runeValue) {
return 0
}
kanjiEnd += width
}
if furiganaStart == 0 {
return 0
}
for ret < len(data) {
runeValue, width := utf8.DecodeRune(data[ret:])
if runeValue == '' {
furiganaEnd = ret
ret += width
break
}
ret += width
}
if furiganaEnd == 0 {
return 0
}
p.r.Furigana(out, data[0:kanjiEnd], data[furiganaStart:furiganaEnd])
return ret
}
func handle_multi_byte_utf8(p *parser, out *bytes.Buffer, data []byte, offset int) int {
runeValue, _ := utf8.DecodeRune(data[offset:])
if (runeValue == '') {
return paren_furigana(p, out, data, offset)
} else if (0x4E00 <= runeValue && runeValue <= 0x9FEF) {
// Kanji character
return kanji_furigana(p, out, data, offset)
} else {
return 0
}
}
func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int {
data = data[offset:]

View File

@ -344,6 +344,29 @@ func TestEmphasisLink(t *testing.T) {
doTestsInline(t, tests)
}
func TestFurigana(t *testing.T) {
var tests = []string{
"(漢字)(かんじ)\n",
"<p><ruby lang=\"ja\"><rb>漢字</rb><rp></rp><rt>かんじ</rt><rp></rp></ruby></p>\n",
"漢字(かんじ)\n",
"<p><ruby lang=\"ja\"><rb>漢字</rb><rp></rp><rt>かんじ</rt><rp></rp></ruby></p>\n",
"Reading Japanese text is easier with ふりがなfurigana\n",
"<p>Reading Japanese text is easier with <ruby lang=\"ja\"><rb>ふりがな</rb><rp></rp><rt>furigana</rt><rp></rp></ruby></p>\n",
"\n",
"<p></p>\n",
"(すごい)ですね?\n",
"<p>(すごい)ですね?</p>\n",
"エリアeria\n",
"<p>エリアeria</p>\n",
}
doTestsInline(t, tests)
}
func TestStrikeThrough(t *testing.T) {
var tests = []string{
"nothing inline\n",

View File

@ -213,6 +213,18 @@ func (options *Latex) Emphasis(out *bytes.Buffer, text []byte) {
out.WriteString("}")
}
func (options *Latex) Furigana(out *bytes.Buffer, kanji []byte, furigana []byte) {
if len(kanji) == 0 {
return
}
out.WriteString("<ruby><rb>")
out.Write(kanji)
out.WriteString("</rb><rp></rp><rt>")
out.Write(furigana)
out.WriteString("</rt><rp></rp></ruby>")
}
func (options *Latex) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
if bytes.HasPrefix(link, []byte("http://")) || bytes.HasPrefix(link, []byte("https://")) {
// treat it like a link

View File

@ -44,6 +44,7 @@ const (
EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks
EXTENSION_DEFINITION_LISTS // render definition lists
EXTENSION_JOIN_LINES // delete newline and join lines
EXTENSION_FURIGANA // render furigana using html's <ruby> tag
commonHtmlFlags = 0 |
HTML_USE_XHTML |
@ -61,7 +62,8 @@ const (
EXTENSION_SPACE_HEADERS |
EXTENSION_HEADER_IDS |
EXTENSION_BACKSLASH_LINE_BREAK |
EXTENSION_DEFINITION_LISTS
EXTENSION_DEFINITION_LISTS |
EXTENSION_FURIGANA
)
// These are the possible flag values for the link renderer.
@ -180,6 +182,7 @@ type Renderer interface {
CodeSpan(out *bytes.Buffer, text []byte)
DoubleEmphasis(out *bytes.Buffer, text []byte)
Emphasis(out *bytes.Buffer, text []byte)
Furigana(out *bytes.Buffer, kanji []byte, furigana []byte)
Image(out *bytes.Buffer, link []byte, title []byte, alt []byte)
LineBreak(out *bytes.Buffer)
Link(out *bytes.Buffer, link []byte, title []byte, content []byte)
@ -385,6 +388,12 @@ func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte {
p.notesRecord = make(map[string]struct{})
}
//if extensions&EXTENSION_FURIGANA != 0 {
for i := 128; i < 256; i++ {
p.inlineCallback[i] = handle_multi_byte_utf8
}
//}
first := firstPass(p, input)
second := secondPass(p, first)
return second