mirror of
				https://github.com/gohugoio/hugo.git
				synced 2024-05-11 05:54:58 +00:00 
			
		
		
		
	Move the emoji parsing to pageparser
This avoids double parsing the page content when `enableEmoji=true`. This commit also adds some general improvements to the parser, making it in general much faster: ```bash benchmark old ns/op new ns/op delta BenchmarkShortcodeLexer-4 90258 101730 +12.71% BenchmarkParse-4 148940 15037 -89.90% benchmark old allocs new allocs delta BenchmarkShortcodeLexer-4 456 700 +53.51% BenchmarkParse-4 28 33 +17.86% benchmark old bytes new bytes delta BenchmarkShortcodeLexer-4 69875 81014 +15.94% BenchmarkParse-4 8128 8304 +2.17% ``` Running some site benchmarks with Emoji support turned on: ```bash benchmark old ns/op new ns/op delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 924556797 818115620 -11.51% benchmark old allocs new allocs delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 4112613 4133787 +0.51% benchmark old bytes new bytes delta BenchmarkSiteBuilding/TOML,num_langs=3,num_pages=5000,tags_per_page=5,shortcodes,render-4 426982864 424363832 -0.61% ``` Fixes #5534
This commit is contained in:
		@@ -215,7 +215,7 @@ type parsedFile struct {
 | 
			
		||||
func parseContentFile(r io.Reader) (parsedFile, error) {
 | 
			
		||||
	var pf parsedFile
 | 
			
		||||
 | 
			
		||||
	psr, err := pageparser.Parse(r)
 | 
			
		||||
	psr, err := pageparser.Parse(r, pageparser.Config{})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return pf, err
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -30,6 +30,12 @@ var (
 | 
			
		||||
	emojiMaxSize   int
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Emoji returns the emojy given a key, e.g. ":smile:", nil if not found.
 | 
			
		||||
func Emoji(key string) []byte {
 | 
			
		||||
	emojiInit.Do(initEmoji)
 | 
			
		||||
	return emojis[key]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Emojify "emojifies" the input source.
 | 
			
		||||
// Note that the input byte slice will be modified if needed.
 | 
			
		||||
// See http://www.emoji-cheat-sheet.com/
 | 
			
		||||
 
 | 
			
		||||
@@ -17,6 +17,8 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"io"
 | 
			
		||||
 | 
			
		||||
	"github.com/gohugoio/hugo/helpers"
 | 
			
		||||
 | 
			
		||||
	errors "github.com/pkg/errors"
 | 
			
		||||
 | 
			
		||||
	bp "github.com/gohugoio/hugo/bufferpool"
 | 
			
		||||
@@ -149,6 +151,12 @@ Loop:
 | 
			
		||||
			result.WriteString(placeHolder)
 | 
			
		||||
			ordinal++
 | 
			
		||||
			s.shortcodes.Add(placeHolder, currShortcode)
 | 
			
		||||
		case it.Type == pageparser.TypeEmoji:
 | 
			
		||||
			if emoji := helpers.Emoji(it.ValStr()); emoji != nil {
 | 
			
		||||
				result.Write(emoji)
 | 
			
		||||
			} else {
 | 
			
		||||
				result.Write(it.Val)
 | 
			
		||||
			}
 | 
			
		||||
		case it.IsEOF():
 | 
			
		||||
			break Loop
 | 
			
		||||
		case it.IsError():
 | 
			
		||||
@@ -170,7 +178,10 @@ Loop:
 | 
			
		||||
 | 
			
		||||
func (p *Page) parse(reader io.Reader) error {
 | 
			
		||||
 | 
			
		||||
	parseResult, err := pageparser.Parse(reader)
 | 
			
		||||
	parseResult, err := pageparser.Parse(
 | 
			
		||||
		reader,
 | 
			
		||||
		pageparser.Config{EnableEmoji: p.s.Cfg.GetBool("enableEmoji")},
 | 
			
		||||
	)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -1497,6 +1497,45 @@ func TestChompBOM(t *testing.T) {
 | 
			
		||||
	checkPageTitle(t, p, "Simple")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestPageWithEmoji(t *testing.T) {
 | 
			
		||||
	for _, enableEmoji := range []bool{true, false} {
 | 
			
		||||
		v := viper.New()
 | 
			
		||||
		v.Set("enableEmoji", enableEmoji)
 | 
			
		||||
		b := newTestSitesBuilder(t)
 | 
			
		||||
		b.WithViper(v)
 | 
			
		||||
 | 
			
		||||
		b.WithSimpleConfigFile()
 | 
			
		||||
 | 
			
		||||
		b.WithContent("page-emoji.md", `---
 | 
			
		||||
title: "Hugo Smile"
 | 
			
		||||
---
 | 
			
		||||
This is a :smile:.
 | 
			
		||||
<!--more--> 
 | 
			
		||||
 | 
			
		||||
Another :smile: This is :not: an emoji.
 | 
			
		||||
 | 
			
		||||
`)
 | 
			
		||||
 | 
			
		||||
		b.CreateSites().Build(BuildCfg{})
 | 
			
		||||
 | 
			
		||||
		if enableEmoji {
 | 
			
		||||
			b.AssertFileContent("public/page-emoji/index.html",
 | 
			
		||||
				"This is a 😄",
 | 
			
		||||
				"Another 😄",
 | 
			
		||||
				"This is :not: an emoji",
 | 
			
		||||
			)
 | 
			
		||||
		} else {
 | 
			
		||||
			b.AssertFileContent("public/page-emoji/index.html",
 | 
			
		||||
				"This is a :smile:",
 | 
			
		||||
				"Another :smile:",
 | 
			
		||||
				"This is :not: an emoji",
 | 
			
		||||
			)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// https://github.com/gohugoio/hugo/issues/5381
 | 
			
		||||
func TestPageManualSummary(t *testing.T) {
 | 
			
		||||
	b := newTestSitesBuilder(t)
 | 
			
		||||
 
 | 
			
		||||
@@ -272,10 +272,6 @@ func (c *contentHandlers) handlePageContent() contentHandler {
 | 
			
		||||
 | 
			
		||||
		p := ctx.currentPage
 | 
			
		||||
 | 
			
		||||
		if c.s.Cfg.GetBool("enableEmoji") {
 | 
			
		||||
			p.workContent = helpers.Emojify(p.workContent)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		p.workContent = p.renderContent(p.workContent)
 | 
			
		||||
 | 
			
		||||
		tmpContent, tmpTableOfContents := helpers.ExtractTOC(p.workContent)
 | 
			
		||||
 
 | 
			
		||||
@@ -177,6 +177,16 @@ type shortcode struct {
 | 
			
		||||
	pos       int // the position in bytes in the source file
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s shortcode) innerString() string {
 | 
			
		||||
	var sb strings.Builder
 | 
			
		||||
 | 
			
		||||
	for _, inner := range s.inner {
 | 
			
		||||
		sb.WriteString(inner.(string))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return sb.String()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (sc shortcode) String() string {
 | 
			
		||||
	// for testing (mostly), so any change here will break tests!
 | 
			
		||||
	var params interface{}
 | 
			
		||||
@@ -363,7 +373,7 @@ func renderShortcode(
 | 
			
		||||
	if sc.isInline {
 | 
			
		||||
		templName := path.Join("_inline_shortcode", p.Path(), sc.name)
 | 
			
		||||
		if sc.isClosing {
 | 
			
		||||
			templStr := sc.inner[0].(string)
 | 
			
		||||
			templStr := sc.innerString()
 | 
			
		||||
 | 
			
		||||
			var err error
 | 
			
		||||
			tmpl, err = p.s.TextTmpl.Parse(templName, templStr)
 | 
			
		||||
 
 | 
			
		||||
@@ -113,6 +113,7 @@ const (
 | 
			
		||||
	TypeFrontMatterTOML
 | 
			
		||||
	TypeFrontMatterJSON
 | 
			
		||||
	TypeFrontMatterORG
 | 
			
		||||
	TypeEmoji
 | 
			
		||||
	TypeIgnore // // The BOM Unicode byte order marker and possibly others
 | 
			
		||||
 | 
			
		||||
	// shortcode items
 | 
			
		||||
 
 | 
			
		||||
@@ -37,6 +37,12 @@ type pageLexer struct {
 | 
			
		||||
	start      int // item start position
 | 
			
		||||
	width      int // width of last element
 | 
			
		||||
 | 
			
		||||
	// Contains lexers for shortcodes and other main section
 | 
			
		||||
	// elements.
 | 
			
		||||
	sectionHandlers *sectionHandlers
 | 
			
		||||
 | 
			
		||||
	cfg Config
 | 
			
		||||
 | 
			
		||||
	// The summary divider to look for.
 | 
			
		||||
	summaryDivider []byte
 | 
			
		||||
	// Set when we have parsed any summary divider
 | 
			
		||||
@@ -60,13 +66,17 @@ func (l *pageLexer) Input() []byte {
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Config struct {
 | 
			
		||||
	EnableEmoji bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// note: the input position here is normally 0 (start), but
 | 
			
		||||
// can be set if position of first shortcode is known
 | 
			
		||||
func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLexer {
 | 
			
		||||
func newPageLexer(input []byte, stateStart stateFunc, cfg Config) *pageLexer {
 | 
			
		||||
	lexer := &pageLexer{
 | 
			
		||||
		input:      input,
 | 
			
		||||
		pos:        inputPosition,
 | 
			
		||||
		stateStart: stateStart,
 | 
			
		||||
		cfg:        cfg,
 | 
			
		||||
		lexerShortcodeState: lexerShortcodeState{
 | 
			
		||||
			currLeftDelimItem:  tLeftDelimScNoMarkup,
 | 
			
		||||
			currRightDelimItem: tRightDelimScNoMarkup,
 | 
			
		||||
@@ -75,6 +85,8 @@ func newPageLexer(input []byte, inputPosition int, stateStart stateFunc) *pageLe
 | 
			
		||||
		items: make([]Item, 0, 5),
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	lexer.sectionHandlers = createSectionHandlers(lexer)
 | 
			
		||||
 | 
			
		||||
	return lexer
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -100,6 +112,8 @@ var (
 | 
			
		||||
	delimOrg          = []byte("#+")
 | 
			
		||||
	htmlCommentStart  = []byte("<!--")
 | 
			
		||||
	htmlCommentEnd    = []byte("-->")
 | 
			
		||||
 | 
			
		||||
	emojiDelim = byte(':')
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func (l *pageLexer) next() rune {
 | 
			
		||||
@@ -132,6 +146,10 @@ func (l *pageLexer) emit(t ItemType) {
 | 
			
		||||
	l.start = l.pos
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (l *pageLexer) isEOF() bool {
 | 
			
		||||
	return l.pos >= len(l.input)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// special case, do not send '\\' back to client
 | 
			
		||||
func (l *pageLexer) ignoreEscapesAndEmit(t ItemType) {
 | 
			
		||||
	val := bytes.Map(func(r rune) rune {
 | 
			
		||||
@@ -193,30 +211,80 @@ func (l *pageLexer) consumeSpace() {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexMainSection(l *pageLexer) stateFunc {
 | 
			
		||||
	if l.isInHTMLComment {
 | 
			
		||||
		return lexEndFromtMatterHTMLComment
 | 
			
		||||
	}
 | 
			
		||||
// lex a string starting at ":"
 | 
			
		||||
func lexEmoji(l *pageLexer) stateFunc {
 | 
			
		||||
	pos := l.pos + 1
 | 
			
		||||
	valid := false
 | 
			
		||||
 | 
			
		||||
	// Fast forward as far as possible.
 | 
			
		||||
	var l1, l2 int
 | 
			
		||||
 | 
			
		||||
	if !l.summaryDividerChecked && l.summaryDivider != nil {
 | 
			
		||||
		l1 = l.index(l.summaryDivider)
 | 
			
		||||
		if l1 == -1 {
 | 
			
		||||
			l.summaryDividerChecked = true
 | 
			
		||||
	for i := pos; i < len(l.input); i++ {
 | 
			
		||||
		if i > pos && l.input[i] == emojiDelim {
 | 
			
		||||
			pos = i + 1
 | 
			
		||||
			valid = true
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		r, _ := utf8.DecodeRune(l.input[i:])
 | 
			
		||||
		if !isAlphaNumeric(r) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	l2 = l.index(leftDelimSc)
 | 
			
		||||
	skip := minIndex(l1, l2)
 | 
			
		||||
 | 
			
		||||
	if skip > 0 {
 | 
			
		||||
		l.pos += skip
 | 
			
		||||
	if valid {
 | 
			
		||||
		l.pos = pos
 | 
			
		||||
		l.emit(TypeEmoji)
 | 
			
		||||
	} else {
 | 
			
		||||
		l.pos++
 | 
			
		||||
		l.emit(tText)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		if l.isShortCodeStart() {
 | 
			
		||||
	return lexMainSection
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type sectionHandlers struct {
 | 
			
		||||
	l *pageLexer
 | 
			
		||||
 | 
			
		||||
	// Set when none of the sections are found so we
 | 
			
		||||
	// can safely stop looking and skip to the end.
 | 
			
		||||
	skipAll bool
 | 
			
		||||
 | 
			
		||||
	handlers    []*sectionHandler
 | 
			
		||||
	skipIndexes []int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *sectionHandlers) skip() int {
 | 
			
		||||
	if s.skipAll {
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	s.skipIndexes = s.skipIndexes[:0]
 | 
			
		||||
	var shouldSkip bool
 | 
			
		||||
	for _, skipper := range s.handlers {
 | 
			
		||||
		idx := skipper.skip()
 | 
			
		||||
		if idx != -1 {
 | 
			
		||||
			shouldSkip = true
 | 
			
		||||
			s.skipIndexes = append(s.skipIndexes, idx)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !shouldSkip {
 | 
			
		||||
		s.skipAll = true
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return minIndex(s.skipIndexes...)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func createSectionHandlers(l *pageLexer) *sectionHandlers {
 | 
			
		||||
 | 
			
		||||
	shortCodeHandler := §ionHandler{
 | 
			
		||||
		l: l,
 | 
			
		||||
		skipFunc: func(l *pageLexer) int {
 | 
			
		||||
			return l.index(leftDelimSc)
 | 
			
		||||
		},
 | 
			
		||||
		lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
 | 
			
		||||
			if !l.isShortCodeStart() {
 | 
			
		||||
				return origin, false
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if l.isInline {
 | 
			
		||||
				// If we're inside an inline shortcode, the only valid shortcode markup is
 | 
			
		||||
				// the markup which closes it.
 | 
			
		||||
@@ -225,14 +293,11 @@ func lexMainSection(l *pageLexer) stateFunc {
 | 
			
		||||
				if end != len(l.input)-1 {
 | 
			
		||||
					b = bytes.TrimSpace(b[end+1:])
 | 
			
		||||
					if end == -1 || !bytes.HasPrefix(b, []byte(l.currShortcodeName+" ")) {
 | 
			
		||||
						return l.errorf("inline shortcodes do not support nesting")
 | 
			
		||||
						return l.errorf("inline shortcodes do not support nesting"), true
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if l.pos > l.start {
 | 
			
		||||
				l.emit(tText)
 | 
			
		||||
			}
 | 
			
		||||
			if l.hasPrefix(leftDelimScWithMarkup) {
 | 
			
		||||
				l.currLeftDelimItem = tLeftDelimScWithMarkup
 | 
			
		||||
				l.currRightDelimItem = tRightDelimScWithMarkup
 | 
			
		||||
@@ -240,32 +305,139 @@ func lexMainSection(l *pageLexer) stateFunc {
 | 
			
		||||
				l.currLeftDelimItem = tLeftDelimScNoMarkup
 | 
			
		||||
				l.currRightDelimItem = tRightDelimScNoMarkup
 | 
			
		||||
			}
 | 
			
		||||
			return lexShortcodeLeftDelim
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if !l.summaryDividerChecked && l.summaryDivider != nil {
 | 
			
		||||
			if l.hasPrefix(l.summaryDivider) {
 | 
			
		||||
				if l.pos > l.start {
 | 
			
		||||
					l.emit(tText)
 | 
			
		||||
				}
 | 
			
		||||
				l.summaryDividerChecked = true
 | 
			
		||||
				l.pos += len(l.summaryDivider)
 | 
			
		||||
				// This makes it a little easier to reason about later.
 | 
			
		||||
				l.consumeSpace()
 | 
			
		||||
				l.emit(TypeLeadSummaryDivider)
 | 
			
		||||
 | 
			
		||||
				// We have already moved to the next.
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		r := l.next()
 | 
			
		||||
		if r == eof {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
			return lexShortcodeLeftDelim, true
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	summaryDividerHandler := §ionHandler{
 | 
			
		||||
		l: l,
 | 
			
		||||
		skipFunc: func(l *pageLexer) int {
 | 
			
		||||
			if l.summaryDividerChecked || l.summaryDivider == nil {
 | 
			
		||||
				return -1
 | 
			
		||||
 | 
			
		||||
			}
 | 
			
		||||
			return l.index(l.summaryDivider)
 | 
			
		||||
		},
 | 
			
		||||
		lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
 | 
			
		||||
			if !l.hasPrefix(l.summaryDivider) {
 | 
			
		||||
				return origin, false
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			l.summaryDividerChecked = true
 | 
			
		||||
			l.pos += len(l.summaryDivider)
 | 
			
		||||
			// This makes it a little easier to reason about later.
 | 
			
		||||
			l.consumeSpace()
 | 
			
		||||
			l.emit(TypeLeadSummaryDivider)
 | 
			
		||||
 | 
			
		||||
			return origin, true
 | 
			
		||||
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}
 | 
			
		||||
 | 
			
		||||
	if l.cfg.EnableEmoji {
 | 
			
		||||
		emojiHandler := §ionHandler{
 | 
			
		||||
			l: l,
 | 
			
		||||
			skipFunc: func(l *pageLexer) int {
 | 
			
		||||
				return l.indexByte(emojiDelim)
 | 
			
		||||
			},
 | 
			
		||||
			lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
 | 
			
		||||
				return lexEmoji, true
 | 
			
		||||
			},
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		handlers = append(handlers, emojiHandler)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return §ionHandlers{
 | 
			
		||||
		l:           l,
 | 
			
		||||
		handlers:    handlers,
 | 
			
		||||
		skipIndexes: make([]int, len(handlers)),
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *sectionHandlers) lex(origin stateFunc) stateFunc {
 | 
			
		||||
	if s.skipAll {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if s.l.pos > s.l.start {
 | 
			
		||||
		s.l.emit(tText)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, handler := range s.handlers {
 | 
			
		||||
		if handler.skipAll {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		next, handled := handler.lexFunc(origin, handler.l)
 | 
			
		||||
		if next == nil || handled {
 | 
			
		||||
			return next
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Not handled by the above.
 | 
			
		||||
	s.l.pos++
 | 
			
		||||
 | 
			
		||||
	return origin
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type sectionHandler struct {
 | 
			
		||||
	l *pageLexer
 | 
			
		||||
 | 
			
		||||
	// No more sections of this type.
 | 
			
		||||
	skipAll bool
 | 
			
		||||
 | 
			
		||||
	// Returns the index of the next match, -1 if none found.
 | 
			
		||||
	skipFunc func(l *pageLexer) int
 | 
			
		||||
 | 
			
		||||
	// Lex lexes the current section and returns the next state func and
 | 
			
		||||
	// a bool telling if this section was handled.
 | 
			
		||||
	// Note that returning nil as the next state will terminate the
 | 
			
		||||
	// lexer.
 | 
			
		||||
	lexFunc func(origin stateFunc, l *pageLexer) (stateFunc, bool)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *sectionHandler) skip() int {
 | 
			
		||||
	if s.skipAll {
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	idx := s.skipFunc(s.l)
 | 
			
		||||
	if idx == -1 {
 | 
			
		||||
		s.skipAll = true
 | 
			
		||||
	}
 | 
			
		||||
	return idx
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lexMainSection(l *pageLexer) stateFunc {
 | 
			
		||||
 | 
			
		||||
	if l.isEOF() {
 | 
			
		||||
		return lexDone
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if l.isInHTMLComment {
 | 
			
		||||
		return lexEndFromtMatterHTMLComment
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Fast forward as far as possible.
 | 
			
		||||
	skip := l.sectionHandlers.skip()
 | 
			
		||||
 | 
			
		||||
	if skip == -1 {
 | 
			
		||||
		l.pos = len(l.input)
 | 
			
		||||
		return lexDone
 | 
			
		||||
	} else if skip > 0 {
 | 
			
		||||
		l.pos += skip
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	next := l.sectionHandlers.lex(lexMainSection)
 | 
			
		||||
	if next != nil {
 | 
			
		||||
		return next
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	l.pos = len(l.input)
 | 
			
		||||
	return lexDone
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -297,10 +469,22 @@ func (l *pageLexer) index(sep []byte) int {
 | 
			
		||||
	return bytes.Index(l.input[l.pos:], sep)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (l *pageLexer) indexByte(sep byte) int {
 | 
			
		||||
	return bytes.IndexByte(l.input[l.pos:], sep)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (l *pageLexer) hasPrefix(prefix []byte) bool {
 | 
			
		||||
	return bytes.HasPrefix(l.input[l.pos:], prefix)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (l *pageLexer) hasPrefixByte(prefix byte) bool {
 | 
			
		||||
	b := l.input[l.pos:]
 | 
			
		||||
	if len(b) == 0 {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	return b[0] == prefix
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// helper functions
 | 
			
		||||
 | 
			
		||||
// returns the min index >= 0
 | 
			
		||||
 
 | 
			
		||||
@@ -27,7 +27,7 @@ import (
 | 
			
		||||
 | 
			
		||||
// Result holds the parse result.
 | 
			
		||||
type Result interface {
 | 
			
		||||
	// Iterator returns a new Iterator positioned at the benning of the parse tree.
 | 
			
		||||
	// Iterator returns a new Iterator positioned at the beginning of the parse tree.
 | 
			
		||||
	Iterator() *Iterator
 | 
			
		||||
	// Input returns the input to Parse.
 | 
			
		||||
	Input() []byte
 | 
			
		||||
@@ -35,27 +35,21 @@ type Result interface {
 | 
			
		||||
 | 
			
		||||
var _ Result = (*pageLexer)(nil)
 | 
			
		||||
 | 
			
		||||
// Parse parses the page in the given reader.
 | 
			
		||||
func Parse(r io.Reader) (Result, error) {
 | 
			
		||||
// Parse parses the page in the given reader according to the given Config.
 | 
			
		||||
func Parse(r io.Reader, cfg Config) (Result, error) {
 | 
			
		||||
	b, err := ioutil.ReadAll(r)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, errors.Wrap(err, "failed to read page content")
 | 
			
		||||
	}
 | 
			
		||||
	return parseBytes(b)
 | 
			
		||||
	return parseBytes(b, cfg)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func parseBytes(b []byte) (Result, error) {
 | 
			
		||||
	lexer := newPageLexer(b, 0, lexIntroSection)
 | 
			
		||||
func parseBytes(b []byte, cfg Config) (Result, error) {
 | 
			
		||||
	lexer := newPageLexer(b, lexIntroSection, cfg)
 | 
			
		||||
	lexer.run()
 | 
			
		||||
	return lexer, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func parseMainSection(input []byte, from int) Result {
 | 
			
		||||
	lexer := newPageLexer(input, from, lexMainSection)
 | 
			
		||||
	lexer.run()
 | 
			
		||||
	return lexer
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// An Iterator has methods to iterate a parsed page with support going back
 | 
			
		||||
// if needed.
 | 
			
		||||
type Iterator struct {
 | 
			
		||||
 
 | 
			
		||||
@@ -88,8 +88,8 @@ func TestFrontMatter(t *testing.T) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
 | 
			
		||||
	l := newPageLexer(input, 0, stateStart)
 | 
			
		||||
func collectWithConfig(input []byte, skipFrontMatter bool, stateStart stateFunc, cfg Config) (items []Item) {
 | 
			
		||||
	l := newPageLexer(input, stateStart, cfg)
 | 
			
		||||
	l.run()
 | 
			
		||||
	t := l.newIterator()
 | 
			
		||||
 | 
			
		||||
@@ -103,6 +103,13 @@ func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func collect(input []byte, skipFrontMatter bool, stateStart stateFunc) (items []Item) {
 | 
			
		||||
	var cfg Config
 | 
			
		||||
 | 
			
		||||
	return collectWithConfig(input, skipFrontMatter, stateStart, cfg)
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// no positional checking, for now ...
 | 
			
		||||
func equal(i1, i2 []Item) bool {
 | 
			
		||||
	if len(i1) != len(i2) {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										40
									
								
								parser/pageparser/pageparser_main_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								parser/pageparser/pageparser_main_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
			
		||||
// Copyright 2018 The Hugo Authors. All rights reserved.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
// http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
package pageparser
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"testing"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestMain(t *testing.T) {
 | 
			
		||||
	t.Parallel()
 | 
			
		||||
 | 
			
		||||
	var mainTests = []lexerTest{
 | 
			
		||||
		{"emoji #1", "Some text with :emoji:", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
 | 
			
		||||
		{"emoji #2", "Some text with :emoji: and some text.", []Item{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
 | 
			
		||||
		{"looks like an emoji #1", "Some text and then :emoji", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
 | 
			
		||||
		{"looks like an emoji #2", "Some text and then ::", []Item{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
 | 
			
		||||
		{"looks like an emoji #3", ":Some :text", []Item{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for i, test := range mainTests {
 | 
			
		||||
		items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
 | 
			
		||||
		if !equal(items, test.items) {
 | 
			
		||||
			got := crLfReplacer.Replace(fmt.Sprint(items))
 | 
			
		||||
			expected := crLfReplacer.Replace(fmt.Sprint(test.items))
 | 
			
		||||
			t.Errorf("[%d] %s: got\n\t%v\nexpected\n\t%v", i, test.name, got, expected)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@@ -152,7 +152,8 @@ var shortCodeLexerTests = []lexerTest{
 | 
			
		||||
	{"basic inline", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
 | 
			
		||||
	{"basic inline with space", `{{< sc1.inline >}}Hello World{{< / sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
 | 
			
		||||
	{"inline self closing", `{{< sc1.inline >}}Hello World{{< /sc1.inline >}}Hello World{{< sc1.inline />}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstText, tstLeftNoMD, tstSC1Inline, tstSCClose, tstRightNoMD, tstEOF}},
 | 
			
		||||
	{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tError, "inline shortcodes do not support nesting")}},
 | 
			
		||||
	{"inline with template syntax", `{{< sc1.inline >}}{{ .Get 0 }}{{ .Get 1 }}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, nti(tText, "{{ .Get 0 }}"), nti(tText, "{{ .Get 1 }}"), tstLeftNoMD, tstSCClose, tstSC1Inline, tstRightNoMD, tstEOF}},
 | 
			
		||||
	{"inline with nested shortcode (not supported)", `{{< sc1.inline >}}Hello World{{< sc1 >}}{{< /sc1.inline >}}`, []Item{tstLeftNoMD, tstSC1Inline, tstRightNoMD, tstText, nti(tError, "inline shortcodes do not support nesting")}},
 | 
			
		||||
	{"inline case mismatch", `{{< sc1.Inline >}}Hello World{{< /sc1.Inline >}}`, []Item{tstLeftNoMD, nti(tError, "period in shortcode name only allowed for inline identifiers")}},
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -171,10 +172,11 @@ func BenchmarkShortcodeLexer(b *testing.B) {
 | 
			
		||||
	for i, input := range shortCodeLexerTests {
 | 
			
		||||
		testInputs[i] = []byte(input.input)
 | 
			
		||||
	}
 | 
			
		||||
	var cfg Config
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		for _, input := range testInputs {
 | 
			
		||||
			items := collect(input, true, lexMainSection)
 | 
			
		||||
			items := collectWithConfig(input, true, lexMainSection, cfg)
 | 
			
		||||
			if len(items) == 0 {
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -34,10 +34,37 @@ This is some summary. This is some summary. This is some summary. This is some s
 | 
			
		||||
 | 
			
		||||
`
 | 
			
		||||
	input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
 | 
			
		||||
	cfg := Config{EnableEmoji: false}
 | 
			
		||||
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		if _, err := parseBytes(input); err != nil {
 | 
			
		||||
		if _, err := parseBytes(input, cfg); err != nil {
 | 
			
		||||
			b.Fatal(err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkParseWithEmoji(b *testing.B) {
 | 
			
		||||
	start := `
 | 
			
		||||
	
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
title: "Front Matters"
 | 
			
		||||
description: "It really does"
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
This is some summary. This is some summary. This is some summary. This is some summary.
 | 
			
		||||
 | 
			
		||||
 <!--more-->
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
`
 | 
			
		||||
	input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))
 | 
			
		||||
	cfg := Config{EnableEmoji: true}
 | 
			
		||||
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		if _, err := parseBytes(input, cfg); err != nil {
 | 
			
		||||
			b.Fatal(err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user