mirror of
				https://github.com/gohugoio/hugo.git
				synced 2024-05-11 05:54:58 +00:00 
			
		
		
		
	* Page without front matter now treated same as a page with empty front matter. * Test cases added to cover this and repro issue #4320. * Type safety of front matter code improved. Fixes #4320
		
			
				
	
	
		
			416 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			416 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2016n The Hugo Authors. All rights reserved.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| // http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package parser
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"bytes"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| 	"unicode"
 | |
| 
 | |
| 	"github.com/chaseadamsio/goorgeous"
 | |
| )
 | |
| 
 | |
| const (
 | |
| 	// TODO(bep) Do we really have to export these?
 | |
| 
 | |
| 	// HTMLLead identifies the start of HTML documents.
 | |
| 	HTMLLead = "<"
 | |
| 	// YAMLLead identifies the start of YAML frontmatter.
 | |
| 	YAMLLead = "-"
 | |
| 	// YAMLDelimUnix identifies the end of YAML front matter on Unix.
 | |
| 	YAMLDelimUnix = "---\n"
 | |
| 	// YAMLDelimDOS identifies the end of YAML front matter on Windows.
 | |
| 	YAMLDelimDOS = "---\r\n"
 | |
| 	// YAMLDelim identifies the YAML front matter delimiter.
 | |
| 	YAMLDelim = "---"
 | |
| 	// TOMLLead identifies the start of TOML front matter.
 | |
| 	TOMLLead = "+"
 | |
| 	// TOMLDelimUnix identifies the end of TOML front matter on Unix.
 | |
| 	TOMLDelimUnix = "+++\n"
 | |
| 	// TOMLDelimDOS identifies the end of TOML front matter on Windows.
 | |
| 	TOMLDelimDOS = "+++\r\n"
 | |
| 	// TOMLDelim identifies the TOML front matter delimiter.
 | |
| 	TOMLDelim = "+++"
 | |
| 	// JSONLead identifies the start of JSON frontmatter.
 | |
| 	JSONLead = "{"
 | |
| 	// HTMLCommentStart identifies the start of HTML comment.
 | |
| 	HTMLCommentStart = "<!--"
 | |
| 	// HTMLCommentEnd identifies the end of HTML comment.
 | |
| 	HTMLCommentEnd = "-->"
 | |
| 	// BOM Unicode byte order marker
 | |
| 	BOM = '\ufeff'
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	delims = regexp.MustCompile(
 | |
| 		"^(" + regexp.QuoteMeta(YAMLDelim) + `\s*\n|` + regexp.QuoteMeta(TOMLDelim) + `\s*\n|` + regexp.QuoteMeta(JSONLead) + ")",
 | |
| 	)
 | |
| )
 | |
| 
 | |
| // Page represents a parsed content page.
 | |
| type Page interface {
 | |
| 	// FrontMatter contains the raw frontmatter with relevant delimiters.
 | |
| 	FrontMatter() []byte
 | |
| 
 | |
| 	// Content contains the raw page content.
 | |
| 	Content() []byte
 | |
| 
 | |
| 	// IsRenderable denotes that the page should be rendered.
 | |
| 	IsRenderable() bool
 | |
| 
 | |
| 	// Metadata returns the unmarshalled frontmatter data.
 | |
| 	Metadata() (map[string]interface{}, error)
 | |
| }
 | |
| 
 | |
| // page implements the Page interface.
 | |
| type page struct {
 | |
| 	render      bool
 | |
| 	frontmatter []byte
 | |
| 	content     []byte
 | |
| }
 | |
| 
 | |
| // Content returns the raw page content.
 | |
| func (p *page) Content() []byte {
 | |
| 	return p.content
 | |
| }
 | |
| 
 | |
| // FrontMatter contains the raw frontmatter with relevant delimiters.
 | |
| func (p *page) FrontMatter() []byte {
 | |
| 	return p.frontmatter
 | |
| }
 | |
| 
 | |
| // IsRenderable denotes that the page should be rendered.
 | |
| func (p *page) IsRenderable() bool {
 | |
| 	return p.render
 | |
| }
 | |
| 
 | |
| // Metadata returns the unmarshalled frontmatter data.
 | |
| func (p *page) Metadata() (meta map[string]interface{}, err error) {
 | |
| 	frontmatter := p.FrontMatter()
 | |
| 
 | |
| 	if len(frontmatter) != 0 {
 | |
| 		fm := DetectFrontMatter(rune(frontmatter[0]))
 | |
| 		if fm != nil {
 | |
| 			meta, err = fm.Parse(frontmatter)
 | |
| 		}
 | |
| 	}
 | |
| 	return
 | |
| }
 | |
| 
 | |
| // ReadFrom reads the content from an io.Reader and constructs a page.
 | |
| func ReadFrom(r io.Reader) (p Page, err error) {
 | |
| 	reader := bufio.NewReader(r)
 | |
| 
 | |
| 	// chomp BOM and assume UTF-8
 | |
| 	if err = chompBOM(reader); err != nil && err != io.EOF {
 | |
| 		return
 | |
| 	}
 | |
| 	if err = chompWhitespace(reader); err != nil && err != io.EOF {
 | |
| 		return
 | |
| 	}
 | |
| 	if err = chompFrontmatterStartComment(reader); err != nil && err != io.EOF {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	firstLine, err := peekLine(reader)
 | |
| 	if err != nil && err != io.EOF {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	newp := new(page)
 | |
| 	newp.render = shouldRender(firstLine)
 | |
| 
 | |
| 	if newp.render && isFrontMatterDelim(firstLine) {
 | |
| 		left, right := determineDelims(firstLine)
 | |
| 		fm, err := extractFrontMatterDelims(reader, left, right)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		newp.frontmatter = fm
 | |
| 	} else if newp.render && goorgeous.IsKeyword(firstLine) {
 | |
| 		fm, err := goorgeous.ExtractOrgHeaders(reader)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		newp.frontmatter = fm
 | |
| 	}
 | |
| 
 | |
| 	content, err := extractContent(reader)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	newp.content = content
 | |
| 
 | |
| 	return newp, nil
 | |
| }
 | |
| 
 | |
| // chompBOM scans any leading Unicode Byte Order Markers from r.
 | |
| func chompBOM(r io.RuneScanner) (err error) {
 | |
| 	for {
 | |
| 		c, _, err := r.ReadRune()
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if c != BOM {
 | |
| 			r.UnreadRune()
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // chompWhitespace scans any leading Unicode whitespace from r.
 | |
| func chompWhitespace(r io.RuneScanner) (err error) {
 | |
| 	for {
 | |
| 		c, _, err := r.ReadRune()
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		if !unicode.IsSpace(c) {
 | |
| 			r.UnreadRune()
 | |
| 			return nil
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // chompFrontmatterStartComment checks r for a leading HTML comment.  If a
 | |
| // comment is found, it is read from r and then whitespace is trimmed from the
 | |
| // beginning of r.
 | |
| func chompFrontmatterStartComment(r *bufio.Reader) (err error) {
 | |
| 	candidate, err := r.Peek(32)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	str := string(candidate)
 | |
| 	if strings.HasPrefix(str, HTMLCommentStart) {
 | |
| 		lineEnd := strings.IndexAny(str, "\n")
 | |
| 		if lineEnd == -1 {
 | |
| 			//TODO: if we can't find it, Peek more?
 | |
| 			return nil
 | |
| 		}
 | |
| 		testStr := strings.TrimSuffix(str[0:lineEnd], "\r")
 | |
| 		if strings.Contains(testStr, HTMLCommentEnd) {
 | |
| 			return nil
 | |
| 		}
 | |
| 		buf := make([]byte, lineEnd)
 | |
| 		if _, err = r.Read(buf); err != nil {
 | |
| 			return
 | |
| 		}
 | |
| 		if err = chompWhitespace(r); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // chompFrontmatterEndComment checks r for a trailing HTML comment.
 | |
| func chompFrontmatterEndComment(r *bufio.Reader) (err error) {
 | |
| 	candidate, err := r.Peek(32)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	str := string(candidate)
 | |
| 	lineEnd := strings.IndexAny(str, "\n")
 | |
| 	if lineEnd == -1 {
 | |
| 		return nil
 | |
| 	}
 | |
| 	testStr := strings.TrimSuffix(str[0:lineEnd], "\r")
 | |
| 	if strings.Contains(testStr, HTMLCommentStart) {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	//TODO: if we can't find it, Peek more?
 | |
| 	if strings.HasSuffix(testStr, HTMLCommentEnd) {
 | |
| 		buf := make([]byte, lineEnd)
 | |
| 		if _, err = r.Read(buf); err != nil {
 | |
| 			return
 | |
| 		}
 | |
| 		if err = chompWhitespace(r); err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func peekLine(r *bufio.Reader) (line []byte, err error) {
 | |
| 	firstFive, err := r.Peek(5)
 | |
| 	if err != nil {
 | |
| 		return
 | |
| 	}
 | |
| 	idx := bytes.IndexByte(firstFive, '\n')
 | |
| 	if idx == -1 {
 | |
| 		return firstFive, nil
 | |
| 	}
 | |
| 	idx++ // include newline.
 | |
| 	return firstFive[:idx], nil
 | |
| }
 | |
| 
 | |
| func shouldRender(lead []byte) (frontmatter bool) {
 | |
| 	if len(lead) <= 0 {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if bytes.Equal(lead[:1], []byte(HTMLLead)) {
 | |
| 		return
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func isFrontMatterDelim(data []byte) bool {
 | |
| 	return delims.Match(data)
 | |
| }
 | |
| 
 | |
| func determineDelims(firstLine []byte) (left, right []byte) {
 | |
| 	switch firstLine[0] {
 | |
| 	case YAMLLead[0]:
 | |
| 		return []byte(YAMLDelim), []byte(YAMLDelim)
 | |
| 	case TOMLLead[0]:
 | |
| 		return []byte(TOMLDelim), []byte(TOMLDelim)
 | |
| 	case JSONLead[0]:
 | |
| 		return []byte(JSONLead), []byte("}")
 | |
| 	default:
 | |
| 		panic(fmt.Sprintf("Unable to determine delims from %q", firstLine))
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // extractFrontMatterDelims takes a frontmatter from the content bufio.Reader.
 | |
| // Beginning white spaces of the bufio.Reader must be trimmed before call this
 | |
| // function.
 | |
| func extractFrontMatterDelims(r *bufio.Reader, left, right []byte) (fm []byte, err error) {
 | |
| 	var (
 | |
| 		c           byte
 | |
| 		buf         bytes.Buffer
 | |
| 		level       int
 | |
| 		sameDelim   = bytes.Equal(left, right)
 | |
| 		inQuote     bool
 | |
| 		escapeState int
 | |
| 	)
 | |
| 	// Frontmatter must start with a delimiter. To check it first,
 | |
| 	// pre-reads beginning delimiter length - 1 bytes from Reader
 | |
| 	for i := 0; i < len(left)-1; i++ {
 | |
| 		if c, err = r.ReadByte(); err != nil {
 | |
| 			return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | |
| 		}
 | |
| 		if err = buf.WriteByte(c); err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Reads a character from Reader one by one and checks it matches the
 | |
| 	// last character of one of delimiters to find the last character of
 | |
| 	// frontmatter. If it matches, makes sure it contains the delimiter
 | |
| 	// and if so, also checks it is followed by CR+LF or LF when YAML,
 | |
| 	// TOML case. In JSON case, nested delimiters must be parsed and it
 | |
| 	// is expected that the delimiter only contains one character.
 | |
| 	for {
 | |
| 		if c, err = r.ReadByte(); err != nil {
 | |
| 			return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | |
| 		}
 | |
| 		if err = buf.WriteByte(c); err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 
 | |
| 		switch c {
 | |
| 		case '"':
 | |
| 			if escapeState != 1 {
 | |
| 				inQuote = !inQuote
 | |
| 			}
 | |
| 		case '\\':
 | |
| 			escapeState++
 | |
| 		case left[len(left)-1]:
 | |
| 			if sameDelim { // YAML, TOML case
 | |
| 				if bytes.HasSuffix(buf.Bytes(), left) && (buf.Len() == len(left) || buf.Bytes()[buf.Len()-len(left)-1] == '\n') {
 | |
| 				nextByte:
 | |
| 					c, err = r.ReadByte()
 | |
| 					if err != nil {
 | |
| 						// It is ok that the end delimiter ends with EOF
 | |
| 						if err != io.EOF || level != 1 {
 | |
| 							return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | |
| 						}
 | |
| 					} else {
 | |
| 						switch c {
 | |
| 						case '\n':
 | |
| 							// ok
 | |
| 						case ' ':
 | |
| 							// Consume this byte and try to match again
 | |
| 							goto nextByte
 | |
| 						case '\r':
 | |
| 							if err = buf.WriteByte(c); err != nil {
 | |
| 								return nil, err
 | |
| 							}
 | |
| 							if c, err = r.ReadByte(); err != nil {
 | |
| 								return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | |
| 							}
 | |
| 							if c != '\n' {
 | |
| 								return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
 | |
| 							}
 | |
| 						default:
 | |
| 							return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
 | |
| 						}
 | |
| 						if err = buf.WriteByte(c); err != nil {
 | |
| 							return nil, err
 | |
| 						}
 | |
| 					}
 | |
| 					if level == 0 {
 | |
| 						level = 1
 | |
| 					} else {
 | |
| 						level = 0
 | |
| 					}
 | |
| 				}
 | |
| 			} else { // JSON case
 | |
| 				if !inQuote {
 | |
| 					level++
 | |
| 				}
 | |
| 			}
 | |
| 		case right[len(right)-1]: // JSON case only reaches here
 | |
| 			if !inQuote {
 | |
| 				level--
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if level == 0 {
 | |
| 			// Consumes white spaces immediately behind frontmatter
 | |
| 			if err = chompWhitespace(r); err != nil && err != io.EOF {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			if err = chompFrontmatterEndComment(r); err != nil && err != io.EOF {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 
 | |
| 			return buf.Bytes(), nil
 | |
| 		}
 | |
| 
 | |
| 		if c != '\\' {
 | |
| 			escapeState = 0
 | |
| 		}
 | |
| 
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func extractContent(r io.Reader) (content []byte, err error) {
 | |
| 	wr := new(bytes.Buffer)
 | |
| 	if _, err = wr.ReadFrom(r); err != nil {
 | |
| 		return
 | |
| 	}
 | |
| 	return wr.Bytes(), nil
 | |
| }
 |