1
0
mirror of https://github.com/gohugoio/hugo.git synced 2024-05-11 05:54:58 +00:00

Add page fragments support to Related

The main topic of this commit is that you can now index fragments (content heading identifiers) when calling `.Related`.

You can do this by:

* Configure one or more indices with type `fragments`
* The name of those index configurations maps to an (optional) front matter slice with fragment references. This allows you to link
page<->fragment and page<->page.
* This also will index all the fragments (heading identifiers) of the pages.

It's also possible to use type `fragments` indices in shortcode, e.g.:

```
{{ $related := site.RegularPages.Related .Page }}
```

But, and this is important, you need to include the shortcode using the `{{<` delimiter. Not doing so will create infinite loops and timeouts.

This commit also:

* Adds two new methods to Page: Fragments (can also be used to build ToC) and HeadingsFiltered (this is only used in Related Content with
index type `fragments` and `enableFilter` set to true.
* Consolidates all `.Related*` methods into one, which takes either a `Page` or an options map as its only argument.
* Add `context.Context` to all of the content related Page API. Turns out it wasn't strictly needed for this particular feature, but it will
soon become usefil, e.g. in #9339.

Closes #10711
Updates #9339
Updates #10725
This commit is contained in:
Bjørn Erik Pedersen
2023-02-11 16:20:24 +01:00
parent 0afec0a9f4
commit 90da7664bf
66 changed files with 1363 additions and 829 deletions

View File

@ -21,6 +21,123 @@ import (
"github.com/gohugoio/hugo/hugolib"
)
func TestRelatedFragments(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
baseURL = "http://example.com/"
disableKinds = ["taxonomy", "term", "RSS", "sitemap", "robotsTXT"]
[related]
includeNewer = false
threshold = 80
toLower = false
[[related.indices]]
name = 'pagerefs'
type = 'fragments'
applyFilter = true
weight = 90
[[related.indices]]
name = 'keywords'
weight = 80
-- content/p1.md --
---
title: p1
pagerefs: ['ref1']
---
{{< see-also >}}
## P1 title
-- content/p2.md --
---
title: p2
---
## P2 title 1
## P2 title 2
## First title {#ref1}
{{< see-also "ref1" >}}
-- content/p3.md --
---
title: p3
keywords: ['foo']
---
## P3 title 1
## P3 title 2
## Common p3, p4, p5
-- content/p4.md --
---
title: p4
---
## Common p3, p4, p5
## P4 title 1
-- content/p5.md --
---
title: p5
keywords: ['foo']
---
## P5 title 1
## Common p3, p4, p5
-- layouts/shortcodes/see-also.html --
{{ $p1 := site.GetPage "p1" }}
{{ $p2 := site.GetPage "p2" }}
{{ $p3 := site.GetPage "p3" }}
P1 Fragments: {{ $p1.Fragments.Identifiers }}
P2 Fragments: {{ $p2.Fragments.Identifiers }}
Contains ref1: {{ $p2.Fragments.Identifiers.Contains "ref1" }}
Count ref1: {{ $p2.Fragments.Identifiers.Count "ref1" }}
{{ $opts := dict "document" .Page "fragments" $.Params }}
{{ $related1 := site.RegularPages.Related $opts }}
{{ $related2 := site.RegularPages.Related $p3 }}
Len Related 1: {{ len $related1 }}
Len Related 2: {{ len $related2 }}
Related 1: {{ template "list-related" $related1 }}
Related 2: {{ template "list-related" $related2 }}
{{ define "list-related" }}{{ range $i, $e := . }} {{ $i }}: {{ .Title }}: {{ with .HeadingsFiltered}}{{ range $i, $e := .}}h{{ $i }}: {{ .Title }}|{{ .ID }}|{{ end }}{{ end }}::END{{ end }}{{ end }}
-- layouts/_default/single.html --
Content: {{ .Content }}
`
b := hugolib.NewIntegrationTestBuilder(
hugolib.IntegrationTestConfig{
T: t,
TxtarString: files,
}).Build()
expect := `
P1 Fragments: [p1-title]
P2 Fragments: [p2-title-1 p2-title-2 ref1]
Len Related 1: 1
Related 2: 2
`
for _, p := range []string{"p1", "p2"} {
b.AssertFileContent("public/"+p+"/index.html", expect)
}
b.AssertFileContent("public/p1/index.html",
"Related 1: 0: p2: h0: First title|ref1|::END",
"Related 2: 0: p5: h0: Common p3, p4, p5|common-p3-p4-p5|::END 1: p4: h0: Common p3, p4, p5|common-p3-p4-p5|::END",
)
}
func BenchmarkRelatedSite(b *testing.B) {
files := `
-- config.toml --
@ -33,6 +150,10 @@ disableKinds = ["taxonomy", "term", "RSS", "sitemap", "robotsTXT"]
[[related.indices]]
name = 'keywords'
weight = 70
[[related.indices]]
name = 'pagerefs'
type = 'fragments'
weight = 30
-- layouts/_default/single.html --
{{ range site.RegularPages }}
{{ $tmp := .WordCount }}

View File

@ -15,20 +15,37 @@
package related
import (
"context"
"errors"
"fmt"
"math"
"sort"
"strings"
"sync"
"time"
xmaps "golang.org/x/exp/maps"
"github.com/gohugoio/hugo/common/collections"
"github.com/gohugoio/hugo/common/maps"
"github.com/gohugoio/hugo/compare"
"github.com/gohugoio/hugo/markup/tableofcontents"
"github.com/spf13/cast"
"github.com/gohugoio/hugo/common/types"
"github.com/mitchellh/mapstructure"
)
const (
TypeBasic = "basic"
TypeFragments = "fragments"
)
var validTypes = map[string]bool{
TypeBasic: true,
TypeFragments: true,
}
var (
_ Keyword = (*StringKeyword)(nil)
zeroDate = time.Time{}
@ -37,8 +54,8 @@ var (
DefaultConfig = Config{
Threshold: 80,
Indices: IndexConfigs{
IndexConfig{Name: "keywords", Weight: 100},
IndexConfig{Name: "date", Weight: 10},
IndexConfig{Name: "keywords", Weight: 100, Type: TypeBasic},
IndexConfig{Name: "date", Weight: 10, Type: TypeBasic},
},
}
)
@ -84,6 +101,15 @@ func (c *Config) Add(index IndexConfig) {
c.Indices = append(c.Indices, index)
}
func (c *Config) HasType(s string) bool {
for _, i := range c.Indices {
if i.Type == s {
return true
}
}
return false
}
// IndexConfigs holds a set of index configurations.
type IndexConfigs []IndexConfig
@ -92,6 +118,13 @@ type IndexConfig struct {
// The index name. This directly maps to a field or Param name.
Name string
// The index type.
Type string
// Enable to apply a type specific filter to the results.
// This is currently only used for the "fragments" type.
ApplyFilter bool
// Contextual pattern used to convert the Param value into a string.
// Currently only used for dates. Can be used to, say, bump posts in the same
// time frame when searching for related documents.
@ -120,6 +153,14 @@ type Document interface {
Name() string
}
// FragmentProvider is an optional interface that can be implemented by a Document.
type FragmentProvider interface {
Fragments(context.Context) *tableofcontents.Fragments
// For internal use.
ApplyFilterToHeadings(context.Context, func(*tableofcontents.Heading) bool) Document
}
// InvertedIndex holds an inverted index, also sometimes named posting list, which
// lists, for every possible search term, the documents that contain that term.
type InvertedIndex struct {
@ -160,7 +201,7 @@ func NewInvertedIndex(cfg Config) *InvertedIndex {
// Add documents to the inverted index.
// The value must support == and !=.
func (idx *InvertedIndex) Add(docs ...Document) error {
func (idx *InvertedIndex) Add(ctx context.Context, docs ...Document) error {
var err error
for _, config := range idx.cfg.Indices {
if config.Weight == 0 {
@ -179,6 +220,14 @@ func (idx *InvertedIndex) Add(docs ...Document) error {
for _, keyword := range words {
setm[keyword] = append(setm[keyword], doc)
}
if config.Type == TypeFragments {
if fp, ok := doc.(FragmentProvider); ok {
for _, fragment := range fp.Fragments(ctx).Identifiers {
setm[FragmentKeyword(fragment)] = append(setm[FragmentKeyword(fragment)], doc)
}
}
}
}
}
@ -209,8 +258,22 @@ func (r *rank) addWeight(w int) {
r.Matches++
}
func newRank(doc Document, weight int) *rank {
return &rank{Doc: doc, Weight: weight, Matches: 1}
var rankPool = sync.Pool{
New: func() interface{} {
return &rank{}
},
}
func getRank(doc Document, weight int) *rank {
r := rankPool.Get().(*rank)
r.Doc = doc
r.Weight = weight
r.Matches = 1
return r
}
func putRank(r *rank) {
rankPool.Put(r)
}
func (r ranks) Len() int { return len(r) }
@ -225,22 +288,41 @@ func (r ranks) Less(i, j int) bool {
return r[i].Weight > r[j].Weight
}
// SearchDoc finds the documents matching any of the keywords in the given indices
// against the given document.
// SearchOpts holds the options for a related search.
type SearchOpts struct {
// The Document to search for related content for.
Document Document
// The keywords to search for.
NamedSlices []types.KeyValues
// The indices to search in.
Indices []string
// Fragments holds a a list of special keywords that is used
// for indices configured as type "fragments".
// This will match the fragment identifiers of the documents.
Fragments []string
}
// Search finds the documents matching any of the keywords in the given indices
// against query options in opts.
// The resulting document set will be sorted according to number of matches
// and the index weights, and any matches with a rank below the configured
// threshold (normalize to 0..100) will be removed.
// If an index name is provided, only that index will be queried.
func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document, error) {
var q []queryElement
func (idx *InvertedIndex) Search(ctx context.Context, opts SearchOpts) ([]Document, error) {
var configs IndexConfigs
var (
queryElements []queryElement
configs IndexConfigs
)
if len(indices) == 0 {
if len(opts.Indices) == 0 {
configs = idx.cfg.Indices
} else {
configs = make(IndexConfigs, len(indices))
for i, indexName := range indices {
configs = make(IndexConfigs, len(opts.Indices))
for i, indexName := range opts.Indices {
cfg, found := idx.getIndexCfg(indexName)
if !found {
return nil, fmt.Errorf("index %q not found", indexName)
@ -250,40 +332,78 @@ func (idx *InvertedIndex) SearchDoc(doc Document, indices ...string) ([]Document
}
for _, cfg := range configs {
keywords, err := doc.RelatedKeywords(cfg)
if err != nil {
return nil, err
var keywords []Keyword
if opts.Document != nil {
k, err := opts.Document.RelatedKeywords(cfg)
if err != nil {
return nil, err
}
keywords = append(keywords, k...)
}
if cfg.Type == TypeFragments {
for _, fragment := range opts.Fragments {
keywords = append(keywords, FragmentKeyword(fragment))
}
if opts.Document != nil {
if fp, ok := opts.Document.(FragmentProvider); ok {
for _, fragment := range fp.Fragments(ctx).Identifiers {
keywords = append(keywords, FragmentKeyword(fragment))
}
}
}
}
queryElements = append(queryElements, newQueryElement(cfg.Name, keywords...))
}
for _, slice := range opts.NamedSlices {
var keywords []Keyword
key := slice.KeyString()
if key == "" {
return nil, fmt.Errorf("index %q not valid", slice.Key)
}
conf, found := idx.getIndexCfg(key)
if !found {
return nil, fmt.Errorf("index %q not found", key)
}
q = append(q, newQueryElement(cfg.Name, keywords...))
for _, val := range slice.Values {
k, err := conf.ToKeywords(val)
if err != nil {
return nil, err
}
keywords = append(keywords, k...)
}
queryElements = append(queryElements, newQueryElement(conf.Name, keywords...))
}
return idx.searchDate(doc.PublishDate(), q...)
if opts.Document != nil {
return idx.searchDate(ctx, opts.Document, opts.Document.PublishDate(), queryElements...)
}
return idx.search(ctx, queryElements...)
}
func (cfg IndexConfig) stringToKeyword(s string) Keyword {
if cfg.ToLower {
s = strings.ToLower(s)
}
if cfg.Type == TypeFragments {
return FragmentKeyword(s)
}
return StringKeyword(s)
}
// ToKeywords returns a Keyword slice of the given input.
func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) {
var (
keywords []Keyword
toLower = cfg.ToLower
)
var keywords []Keyword
switch vv := v.(type) {
case string:
if toLower {
vv = strings.ToLower(vv)
}
keywords = append(keywords, StringKeyword(vv))
keywords = append(keywords, cfg.stringToKeyword(vv))
case []string:
if toLower {
vc := make([]string, len(vv))
copy(vc, vv)
for i := 0; i < len(vc); i++ {
vc[i] = strings.ToLower(vc[i])
}
vv = vc
vvv := make([]Keyword, len(vv))
for i := 0; i < len(vvv); i++ {
vvv[i] = cfg.stringToKeyword(vv[i])
}
keywords = append(keywords, StringsToKeywords(vv...)...)
keywords = append(keywords, vvv...)
case []any:
return cfg.ToKeywords(cast.ToStringSlice(vv))
case time.Time:
@ -301,46 +421,20 @@ func (cfg IndexConfig) ToKeywords(v any) ([]Keyword, error) {
return keywords, nil
}
// SearchKeyValues finds the documents matching any of the keywords in the given indices.
// The resulting document set will be sorted according to number of matches
// and the index weights, and any matches with a rank below the configured
// threshold (normalize to 0..100) will be removed.
func (idx *InvertedIndex) SearchKeyValues(args ...types.KeyValues) ([]Document, error) {
q := make([]queryElement, len(args))
for i, arg := range args {
var keywords []Keyword
key := arg.KeyString()
if key == "" {
return nil, fmt.Errorf("index %q not valid", arg.Key)
}
conf, found := idx.getIndexCfg(key)
if !found {
return nil, fmt.Errorf("index %q not found", key)
}
for _, val := range arg.Values {
k, err := conf.ToKeywords(val)
if err != nil {
return nil, err
}
keywords = append(keywords, k...)
}
q[i] = newQueryElement(conf.Name, keywords...)
}
return idx.search(q...)
func (idx *InvertedIndex) search(ctx context.Context, query ...queryElement) ([]Document, error) {
return idx.searchDate(ctx, nil, zeroDate, query...)
}
func (idx *InvertedIndex) search(query ...queryElement) ([]Document, error) {
return idx.searchDate(zeroDate, query...)
}
func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement) ([]Document, error) {
func (idx *InvertedIndex) searchDate(ctx context.Context, self Document, upperDate time.Time, query ...queryElement) ([]Document, error) {
matchm := make(map[Document]*rank, 200)
defer func() {
for _, r := range matchm {
putRank(r)
}
}()
applyDateFilter := !idx.cfg.IncludeNewer && !upperDate.IsZero()
var fragmentsFilter collections.SortedStringSlice
for _, el := range query {
setm, found := idx.index[el.Index]
@ -356,15 +450,27 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement)
for _, kw := range el.Keywords {
if docs, found := setm[kw]; found {
for _, doc := range docs {
if compare.Eq(doc, self) {
continue
}
if applyDateFilter {
// Exclude newer than the limit given
if doc.PublishDate().After(upperDate) {
continue
}
}
if config.Type == TypeFragments && config.ApplyFilter {
if fkw, ok := kw.(FragmentKeyword); ok {
fragmentsFilter = append(fragmentsFilter, string(fkw))
}
}
r, found := matchm[doc]
if !found {
matchm[doc] = newRank(doc, config.Weight)
r = getRank(doc, config.Weight)
matchm[doc] = r
} else {
r.addWeight(config.Weight)
}
@ -390,11 +496,19 @@ func (idx *InvertedIndex) searchDate(upperDate time.Time, query ...queryElement)
}
sort.Stable(matches)
sort.Strings(fragmentsFilter)
result := make([]Document, len(matches))
for i, m := range matches {
result[i] = m.Doc
if len(fragmentsFilter) > 0 {
if dp, ok := result[i].(FragmentProvider); ok {
result[i] = dp.ApplyFilterToHeadings(ctx, func(h *tableofcontents.Heading) bool {
return fragmentsFilter.Contains(h.ID)
})
}
}
}
return result, nil
@ -433,6 +547,14 @@ func DecodeConfig(m maps.Params) (Config, error) {
c.Indices[i].ToLower = true
}
}
for i := range c.Indices {
if c.Indices[i].Type == "" {
c.Indices[i].Type = TypeBasic
}
if !validTypes[c.Indices[i].Type] {
return c, fmt.Errorf("invalid index type %q. Must be one of %v", c.Indices[i].Type, xmaps.Keys(validTypes))
}
}
return c, nil
}
@ -444,17 +566,24 @@ func (s StringKeyword) String() string {
return string(s)
}
// FragmentKeyword represents a document fragment.
type FragmentKeyword string
func (f FragmentKeyword) String() string {
return string(f)
}
// Keyword is the interface a keyword in the search index must implement.
type Keyword interface {
String() string
}
// StringsToKeywords converts the given slice of strings to a slice of Keyword.
func StringsToKeywords(s ...string) []Keyword {
func (cfg IndexConfig) StringsToKeywords(s ...string) []Keyword {
kw := make([]Keyword, len(s))
for i := 0; i < len(s); i++ {
kw[i] = StringKeyword(s[i])
kw[i] = cfg.stringToKeyword(s[i])
}
return kw

View File

@ -14,6 +14,7 @@
package related
import (
"context"
"fmt"
"math/rand"
"testing"
@ -105,7 +106,7 @@ func TestSearch(t *testing.T) {
newTestDoc("tags", "g", "h").addKeywords("keywords", "a", "b"),
}
idx.Add(docs...)
idx.Add(context.Background(), docs...)
t.Run("count", func(t *testing.T) {
c := qt.New(t)
@ -122,7 +123,8 @@ func TestSearch(t *testing.T) {
t.Run("search-tags", func(t *testing.T) {
c := qt.New(t)
m, err := idx.search(newQueryElement("tags", StringsToKeywords("a", "b", "d", "z")...))
var cfg IndexConfig
m, err := idx.search(context.Background(), newQueryElement("tags", cfg.StringsToKeywords("a", "b", "d", "z")...))
c.Assert(err, qt.IsNil)
c.Assert(len(m), qt.Equals, 2)
c.Assert(m[0], qt.Equals, docs[0])
@ -131,9 +133,10 @@ func TestSearch(t *testing.T) {
t.Run("search-tags-and-keywords", func(t *testing.T) {
c := qt.New(t)
m, err := idx.search(
newQueryElement("tags", StringsToKeywords("a", "b", "z")...),
newQueryElement("keywords", StringsToKeywords("a", "b")...))
var cfg IndexConfig
m, err := idx.search(context.Background(),
newQueryElement("tags", cfg.StringsToKeywords("a", "b", "z")...),
newQueryElement("keywords", cfg.StringsToKeywords("a", "b")...))
c.Assert(err, qt.IsNil)
c.Assert(len(m), qt.Equals, 3)
c.Assert(m[0], qt.Equals, docs[3])
@ -144,7 +147,7 @@ func TestSearch(t *testing.T) {
t.Run("searchdoc-all", func(t *testing.T) {
c := qt.New(t)
doc := newTestDoc("tags", "a").addKeywords("keywords", "a")
m, err := idx.SearchDoc(doc)
m, err := idx.Search(context.Background(), SearchOpts{Document: doc})
c.Assert(err, qt.IsNil)
c.Assert(len(m), qt.Equals, 2)
c.Assert(m[0], qt.Equals, docs[3])
@ -154,7 +157,7 @@ func TestSearch(t *testing.T) {
t.Run("searchdoc-tags", func(t *testing.T) {
c := qt.New(t)
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
m, err := idx.SearchDoc(doc, "tags")
m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"tags"}})
c.Assert(err, qt.IsNil)
c.Assert(len(m), qt.Equals, 2)
c.Assert(m[0], qt.Equals, docs[0])
@ -166,9 +169,9 @@ func TestSearch(t *testing.T) {
doc := newTestDoc("tags", "a", "b", "d", "z").addKeywords("keywords", "a", "b")
// This will get a date newer than the others.
newDoc := newTestDoc("keywords", "a", "b")
idx.Add(newDoc)
idx.Add(context.Background(), newDoc)
m, err := idx.SearchDoc(doc, "keywords")
m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"keywords"}})
c.Assert(err, qt.IsNil)
c.Assert(len(m), qt.Equals, 2)
c.Assert(m[0], qt.Equals, docs[3])
@ -186,10 +189,10 @@ func TestSearch(t *testing.T) {
for i := 0; i < 10; i++ {
docc := *doc
docc.name = fmt.Sprintf("doc%d", i)
idx.Add(&docc)
idx.Add(context.Background(), &docc)
}
m, err := idx.SearchDoc(doc, "keywords")
m, err := idx.Search(context.Background(), SearchOpts{Document: doc, Indices: []string{"keywords"}})
c.Assert(err, qt.IsNil)
c.Assert(len(m), qt.Equals, 10)
for i := 0; i < 10; i++ {
@ -265,7 +268,7 @@ func BenchmarkRelatedNewIndex(b *testing.B) {
for i := 0; i < b.N; i++ {
idx := NewInvertedIndex(cfg)
for _, doc := range pages {
idx.Add(doc)
idx.Add(context.Background(), doc)
}
}
})
@ -277,14 +280,15 @@ func BenchmarkRelatedNewIndex(b *testing.B) {
for i := 0; i < len(pages); i++ {
docs[i] = pages[i]
}
idx.Add(docs...)
idx.Add(context.Background(), docs...)
}
})
}
func BenchmarkRelatedMatchesIn(b *testing.B) {
q1 := newQueryElement("tags", StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
q2 := newQueryElement("keywords", StringsToKeywords("keyword3", "keyword4")...)
var icfg IndexConfig
q1 := newQueryElement("tags", icfg.StringsToKeywords("keyword2", "keyword5", "keyword32", "asdf")...)
q2 := newQueryElement("keywords", icfg.StringsToKeywords("keyword3", "keyword4")...)
docs := make([]*testDoc, 1000)
numkeywords := 20
@ -315,15 +319,16 @@ func BenchmarkRelatedMatchesIn(b *testing.B) {
index = "keywords"
}
idx.Add(newTestDoc(index, allKeywords[start:end]...))
idx.Add(context.Background(), newTestDoc(index, allKeywords[start:end]...))
}
b.ResetTimer()
ctx := context.Background()
for i := 0; i < b.N; i++ {
if i%10 == 0 {
idx.search(q2)
idx.search(ctx, q2)
} else {
idx.search(q1)
idx.search(ctx, q1)
}
}
}