mirror of
				https://github.com/gohugoio/hugo.git
				synced 2024-05-11 05:54:58 +00:00 
			
		
		
		
	markup/goldmark: Add an optional Blackfriday auto ID strategy
Fixes #6707
This commit is contained in:
		@@ -15,6 +15,8 @@
 | 
			
		||||
package blackfriday
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"unicode"
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/identity"
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config"
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/converter"
 | 
			
		||||
@@ -61,7 +63,27 @@ type blackfridayConverter struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
 | 
			
		||||
	return blackfriday.SanitizedAnchorName(s)
 | 
			
		||||
	return SanitizedAnchorName(s)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SanitizedAnchorName is how Blackfriday sanitizes anchor names.
 | 
			
		||||
// Implementation borrowed from https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L1464
 | 
			
		||||
func SanitizedAnchorName(text string) string {
 | 
			
		||||
	var anchorName []rune
 | 
			
		||||
	futureDash := false
 | 
			
		||||
	for _, r := range text {
 | 
			
		||||
		switch {
 | 
			
		||||
		case unicode.IsLetter(r) || unicode.IsNumber(r):
 | 
			
		||||
			if futureDash && len(anchorName) > 0 {
 | 
			
		||||
				anchorName = append(anchorName, '-')
 | 
			
		||||
			}
 | 
			
		||||
			futureDash = false
 | 
			
		||||
			anchorName = append(anchorName, unicode.ToLower(r))
 | 
			
		||||
		default:
 | 
			
		||||
			futureDash = true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return string(anchorName)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *blackfridayConverter) AnchorSuffix() string {
 | 
			
		||||
 
 | 
			
		||||
@@ -179,3 +179,45 @@ This is a footnote.[^1] And then some.
 | 
			
		||||
	c.Assert(s, qt.Contains, "This is a footnote.<sup class=\"footnote-ref\" id=\"fnref:testid:1\"><a href=\"#fn:testid:1\">1</a></sup>")
 | 
			
		||||
	c.Assert(s, qt.Contains, "<a class=\"footnote-return\" href=\"#fnref:testid:1\"><sup>[return]</sup></a>")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Tests borrowed from https://github.com/russross/blackfriday/blob/a925a152c144ea7de0f451eaf2f7db9e52fa005a/block_test.go#L1817
 | 
			
		||||
func TestSanitizedAnchorName(t *testing.T) {
 | 
			
		||||
	tests := []struct {
 | 
			
		||||
		text string
 | 
			
		||||
		want string
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			text: "This is a header",
 | 
			
		||||
			want: "this-is-a-header",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			text: "This is also          a header",
 | 
			
		||||
			want: "this-is-also-a-header",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			text: "main.go",
 | 
			
		||||
			want: "main-go",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			text: "Article 123",
 | 
			
		||||
			want: "article-123",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			text: "<- Let's try this, shall we?",
 | 
			
		||||
			want: "let-s-try-this-shall-we",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			text: "        ",
 | 
			
		||||
			want: "",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			text: "Hello, 世界",
 | 
			
		||||
			want: "hello-世界",
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
	for _, test := range tests {
 | 
			
		||||
		if got := SanitizedAnchorName(test.text); got != test.want {
 | 
			
		||||
			t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -19,6 +19,8 @@ import (
 | 
			
		||||
	"unicode"
 | 
			
		||||
	"unicode/utf8"
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/blackfriday"
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/common/text"
 | 
			
		||||
@@ -30,34 +32,41 @@ import (
 | 
			
		||||
	bp "github.com/gohugoio/hugo/bufferpool"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func sanitizeAnchorNameString(s string, asciiOnly bool) string {
 | 
			
		||||
	return string(sanitizeAnchorName([]byte(s), asciiOnly))
 | 
			
		||||
func sanitizeAnchorNameString(s string, idType string) string {
 | 
			
		||||
	return string(sanitizeAnchorName([]byte(s), idType))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
 | 
			
		||||
	return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
 | 
			
		||||
func sanitizeAnchorName(b []byte, idType string) []byte {
 | 
			
		||||
	return sanitizeAnchorNameWithHook(b, idType, nil)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
 | 
			
		||||
func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
 | 
			
		||||
	buf := bp.GetBuffer()
 | 
			
		||||
 | 
			
		||||
	if asciiOnly {
 | 
			
		||||
		// Normalize it to preserve accents if possible.
 | 
			
		||||
		b = text.RemoveAccents(b)
 | 
			
		||||
	}
 | 
			
		||||
	if idType == goldmark_config.AutoHeadingIDTypeBlackfriday {
 | 
			
		||||
		// TODO(bep) make it more efficient.
 | 
			
		||||
		buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
 | 
			
		||||
	} else {
 | 
			
		||||
		asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii
 | 
			
		||||
 | 
			
		||||
	for len(b) > 0 {
 | 
			
		||||
		r, size := utf8.DecodeRune(b)
 | 
			
		||||
		switch {
 | 
			
		||||
		case asciiOnly && size != 1:
 | 
			
		||||
		case r == '-' || isSpace(r):
 | 
			
		||||
			buf.WriteRune('-')
 | 
			
		||||
		case isAlphaNumeric(r):
 | 
			
		||||
			buf.WriteRune(unicode.ToLower(r))
 | 
			
		||||
		default:
 | 
			
		||||
		if asciiOnly {
 | 
			
		||||
			// Normalize it to preserve accents if possible.
 | 
			
		||||
			b = text.RemoveAccents(b)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		b = b[size:]
 | 
			
		||||
		for len(b) > 0 {
 | 
			
		||||
			r, size := utf8.DecodeRune(b)
 | 
			
		||||
			switch {
 | 
			
		||||
			case asciiOnly && size != 1:
 | 
			
		||||
			case r == '-' || isSpace(r):
 | 
			
		||||
				buf.WriteRune('-')
 | 
			
		||||
			case isAlphaNumeric(r):
 | 
			
		||||
				buf.WriteRune(unicode.ToLower(r))
 | 
			
		||||
			default:
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			b = b[size:]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if hook != nil {
 | 
			
		||||
@@ -83,19 +92,19 @@ func isSpace(r rune) bool {
 | 
			
		||||
var _ parser.IDs = (*idFactory)(nil)
 | 
			
		||||
 | 
			
		||||
type idFactory struct {
 | 
			
		||||
	asciiOnly bool
 | 
			
		||||
	vals      map[string]struct{}
 | 
			
		||||
	idType string
 | 
			
		||||
	vals   map[string]struct{}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newIDFactory(idType string) *idFactory {
 | 
			
		||||
	return &idFactory{
 | 
			
		||||
		vals:      make(map[string]struct{}),
 | 
			
		||||
		asciiOnly: idType == goldmark_config.AutoHeadingIDTypeGitHubAscii,
 | 
			
		||||
		vals:   make(map[string]struct{}),
 | 
			
		||||
		idType: idType,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
 | 
			
		||||
	return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
 | 
			
		||||
	return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
 | 
			
		||||
		if buf.Len() == 0 {
 | 
			
		||||
			if kind == ast.KindHeading {
 | 
			
		||||
				buf.WriteString("heading")
 | 
			
		||||
 
 | 
			
		||||
@@ -17,6 +17,8 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
 | 
			
		||||
 | 
			
		||||
	qt "github.com/frankban/quicktest"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -69,9 +71,9 @@ under_score
 | 
			
		||||
		expect := expectlines[i]
 | 
			
		||||
		c.Run(input, func(c *qt.C) {
 | 
			
		||||
			b := []byte(input)
 | 
			
		||||
			got := string(sanitizeAnchorName(b, false))
 | 
			
		||||
			got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub))
 | 
			
		||||
			c.Assert(got, qt.Equals, expect)
 | 
			
		||||
			c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect)
 | 
			
		||||
			c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect)
 | 
			
		||||
			c.Assert(string(b), qt.Equals, input)
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
@@ -80,16 +82,21 @@ under_score
 | 
			
		||||
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
 | 
			
		||||
	c := qt.New(t)
 | 
			
		||||
 | 
			
		||||
	c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good")
 | 
			
		||||
	c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume")
 | 
			
		||||
	c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good")
 | 
			
		||||
	c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume")
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
 | 
			
		||||
	c := qt.New(t)
 | 
			
		||||
	c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkSanitizeAnchorName(b *testing.B) {
 | 
			
		||||
	input := []byte("God is good: 神真美好")
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		result := sanitizeAnchorName(input, false)
 | 
			
		||||
		result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub)
 | 
			
		||||
		if len(result) != 24 {
 | 
			
		||||
			b.Fatalf("got %d", len(result))
 | 
			
		||||
 | 
			
		||||
@@ -101,7 +108,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
 | 
			
		||||
	input := []byte("God is good: 神真美好")
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		result := sanitizeAnchorName(input, true)
 | 
			
		||||
		result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii)
 | 
			
		||||
		if len(result) != 12 {
 | 
			
		||||
			b.Fatalf("got %d", len(result))
 | 
			
		||||
 | 
			
		||||
@@ -109,11 +116,23 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
 | 
			
		||||
	input := []byte("God is good: 神真美好")
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday)
 | 
			
		||||
		if len(result) != 24 {
 | 
			
		||||
			b.Fatalf("got %d", len(result))
 | 
			
		||||
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkSanitizeAnchorNameString(b *testing.B) {
 | 
			
		||||
	input := "God is good: 神真美好"
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		result := sanitizeAnchorNameString(input, false)
 | 
			
		||||
		result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub)
 | 
			
		||||
		if len(result) != 24 {
 | 
			
		||||
			b.Fatalf("got %d", len(result))
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,6 @@ import (
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/hugofs"
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/converter"
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/highlight"
 | 
			
		||||
	"github.com/gohugoio/hugo/markup/tableofcontents"
 | 
			
		||||
	"github.com/yuin/goldmark"
 | 
			
		||||
@@ -57,7 +56,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
 | 
			
		||||
			cfg: cfg,
 | 
			
		||||
			md:  md,
 | 
			
		||||
			sanitizeAnchorName: func(s string) string {
 | 
			
		||||
				return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType == goldmark_config.AutoHeadingIDTypeGitHub)
 | 
			
		||||
				return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType)
 | 
			
		||||
			},
 | 
			
		||||
		}, nil
 | 
			
		||||
	}), nil
 | 
			
		||||
 
 | 
			
		||||
@@ -178,6 +178,21 @@ func TestConvertAutoIDAsciiOnly(t *testing.T) {
 | 
			
		||||
	c.Assert(got, qt.Contains, "<h2 id=\"god-is-good-\">")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestConvertAutoIDBlackfriday(t *testing.T) {
 | 
			
		||||
	c := qt.New(t)
 | 
			
		||||
 | 
			
		||||
	content := `
 | 
			
		||||
## Let's try this, shall we?
 | 
			
		||||
 | 
			
		||||
`
 | 
			
		||||
	mconf := markup_config.Default
 | 
			
		||||
	mconf.Goldmark.Parser.AutoHeadingIDType = goldmark_config.AutoHeadingIDTypeBlackfriday
 | 
			
		||||
	b := convert(c, mconf, content)
 | 
			
		||||
	got := string(b.Bytes())
 | 
			
		||||
 | 
			
		||||
	c.Assert(got, qt.Contains, "<h2 id=\"let-s-try-this-shall-we\">")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestCodeFence(t *testing.T) {
 | 
			
		||||
	c := qt.New(t)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -17,6 +17,7 @@ package goldmark_config
 | 
			
		||||
const (
 | 
			
		||||
	AutoHeadingIDTypeGitHub      = "github"
 | 
			
		||||
	AutoHeadingIDTypeGitHubAscii = "github-ascii"
 | 
			
		||||
	AutoHeadingIDTypeBlackfriday = "blackfriday"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// DefaultConfig holds the default Goldmark configuration.
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user