| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | package parser
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import (
 | 
					
						
							|  |  |  | 	"bufio"
 | 
					
						
							|  |  |  | 	"bytes"
 | 
					
						
							| 
									
										
										
										
											2013-09-18 09:15:46 -07:00
										 |  |  | 	"fmt"
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	"io"
 | 
					
						
							| 
									
										
										
										
											2015-08-01 22:24:22 -07:00
										 |  |  | 	"regexp"
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	"unicode"
 | 
					
						
							|  |  |  | )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const (
 | 
					
						
							|  |  |  | 	HTML_LEAD       = "<"
 | 
					
						
							|  |  |  | 	YAML_LEAD       = "-"
 | 
					
						
							|  |  |  | 	YAML_DELIM_UNIX = "---\n"
 | 
					
						
							|  |  |  | 	YAML_DELIM_DOS  = "---\r\n"
 | 
					
						
							| 
									
										
										
										
											2014-05-01 13:19:51 -04:00
										 |  |  | 	YAML_DELIM      = "---"
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	TOML_LEAD       = "+"
 | 
					
						
							|  |  |  | 	TOML_DELIM_UNIX = "+++\n"
 | 
					
						
							|  |  |  | 	TOML_DELIM_DOS  = "+++\r\n"
 | 
					
						
							| 
									
										
										
										
											2014-05-01 13:19:51 -04:00
										 |  |  | 	TOML_DELIM      = "+++"
 | 
					
						
							| 
									
										
										
										
											2014-02-16 01:20:46 -08:00
										 |  |  | 	JSON_LEAD       = "{"
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var (
 | 
					
						
							| 
									
										
										
										
											2015-08-01 22:24:22 -07:00
										 |  |  | 	delims = regexp.MustCompile(
 | 
					
						
							|  |  |  | 		"^(" + regexp.QuoteMeta(YAML_DELIM) + `\s*\n|` + regexp.QuoteMeta(TOML_DELIM) + `\s*\n|` + regexp.QuoteMeta(JSON_LEAD) + ")",
 | 
					
						
							|  |  |  | 	)
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-15 17:32:41 -05:00
										 |  |  | 	UnixEnding = []byte("\n")
 | 
					
						
							|  |  |  | 	DosEnding  = []byte("\r\n")
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type FrontMatter []byte
 | 
					
						
							|  |  |  | type Content []byte
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type Page interface {
 | 
					
						
							|  |  |  | 	FrontMatter() FrontMatter
 | 
					
						
							|  |  |  | 	Content() Content
 | 
					
						
							| 
									
										
										
										
											2013-09-18 10:17:43 -07:00
										 |  |  | 	IsRenderable() bool
 | 
					
						
							| 
									
										
										
										
											2014-05-01 13:19:51 -04:00
										 |  |  | 	Metadata() (interface{}, error)
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type page struct {
 | 
					
						
							|  |  |  | 	render      bool
 | 
					
						
							|  |  |  | 	frontmatter FrontMatter
 | 
					
						
							|  |  |  | 	content     Content
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p *page) Content() Content {
 | 
					
						
							|  |  |  | 	return p.content
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p *page) FrontMatter() FrontMatter {
 | 
					
						
							|  |  |  | 	return p.frontmatter
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-18 10:17:43 -07:00
										 |  |  | func (p *page) IsRenderable() bool {
 | 
					
						
							|  |  |  | 	return p.render
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-05-01 13:19:51 -04:00
										 |  |  | func (p *page) Metadata() (meta interface{}, err error) {
 | 
					
						
							|  |  |  | 	frontmatter := p.FrontMatter()
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if len(frontmatter) != 0 {
 | 
					
						
							|  |  |  | 		fm := DetectFrontMatter(rune(frontmatter[0]))
 | 
					
						
							|  |  |  | 		meta, err = fm.Parse(frontmatter)
 | 
					
						
							|  |  |  | 		if err != nil {
 | 
					
						
							|  |  |  | 			return
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	return
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | // ReadFrom reads the content from an io.Reader and constructs a page.
 | 
					
						
							|  |  |  | func ReadFrom(r io.Reader) (p Page, err error) {
 | 
					
						
							|  |  |  | 	reader := bufio.NewReader(r)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-16 10:34:26 +02:00
										 |  |  | 	if err = chompWhitespace(reader); err != nil && err != io.EOF {
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	firstLine, err := peekLine(reader)
 | 
					
						
							| 
									
										
										
										
											2013-12-16 10:34:26 +02:00
										 |  |  | 	if err != nil && err != io.EOF {
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	newp := new(page)
 | 
					
						
							|  |  |  | 	newp.render = shouldRender(firstLine)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if newp.render && isFrontMatterDelim(firstLine) {
 | 
					
						
							|  |  |  | 		left, right := determineDelims(firstLine)
 | 
					
						
							|  |  |  | 		fm, err := extractFrontMatterDelims(reader, left, right)
 | 
					
						
							|  |  |  | 		if err != nil {
 | 
					
						
							|  |  |  | 			return nil, err
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 		newp.frontmatter = fm
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	content, err := extractContent(reader)
 | 
					
						
							|  |  |  | 	if err != nil {
 | 
					
						
							|  |  |  | 		return nil, err
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	newp.content = content
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return newp, nil
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func chompWhitespace(r io.RuneScanner) (err error) {
 | 
					
						
							|  |  |  | 	for {
 | 
					
						
							|  |  |  | 		c, _, err := r.ReadRune()
 | 
					
						
							|  |  |  | 		if err != nil {
 | 
					
						
							|  |  |  | 			return err
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 		if !unicode.IsSpace(c) {
 | 
					
						
							|  |  |  | 			r.UnreadRune()
 | 
					
						
							|  |  |  | 			return nil
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func peekLine(r *bufio.Reader) (line []byte, err error) {
 | 
					
						
							|  |  |  | 	firstFive, err := r.Peek(5)
 | 
					
						
							|  |  |  | 	if err != nil {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	idx := bytes.IndexByte(firstFive, '\n')
 | 
					
						
							|  |  |  | 	if idx == -1 {
 | 
					
						
							|  |  |  | 		return firstFive, nil
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-03-07 12:59:04 +01:00
										 |  |  | 	idx++ // include newline.
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	return firstFive[:idx], nil
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func shouldRender(lead []byte) (frontmatter bool) {
 | 
					
						
							|  |  |  | 	if len(lead) <= 0 {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if bytes.Equal(lead[:1], []byte(HTML_LEAD)) {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	return true
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func isFrontMatterDelim(data []byte) bool {
 | 
					
						
							| 
									
										
										
										
											2015-08-01 22:24:22 -07:00
										 |  |  | 	return delims.Match(data)
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func determineDelims(firstLine []byte) (left, right []byte) {
 | 
					
						
							|  |  |  | 	switch len(firstLine) {
 | 
					
						
							|  |  |  | 	case 5:
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 		fallthrough
 | 
					
						
							|  |  |  | 	case 4:
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		if firstLine[0] == YAML_LEAD[0] {
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 			return []byte(YAML_DELIM), []byte(YAML_DELIM)
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		}
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 		return []byte(TOML_DELIM), []byte(TOML_DELIM)
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	case 3:
 | 
					
						
							|  |  |  | 		fallthrough
 | 
					
						
							|  |  |  | 	case 2:
 | 
					
						
							|  |  |  | 		fallthrough
 | 
					
						
							|  |  |  | 	case 1:
 | 
					
						
							| 
									
										
										
										
											2014-02-16 01:20:46 -08:00
										 |  |  | 		return []byte(JSON_LEAD), []byte("}")
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	default:
 | 
					
						
							|  |  |  | 		panic(fmt.Sprintf("Unable to determine delims from %q", firstLine))
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | // extractFrontMatterDelims takes a frontmatter from the content bufio.Reader.
 | 
					
						
							|  |  |  | // Begining white spaces of the bufio.Reader must be trimmed before call this
 | 
					
						
							|  |  |  | // function.
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | func extractFrontMatterDelims(r *bufio.Reader, left, right []byte) (fm FrontMatter, err error) {
 | 
					
						
							| 
									
										
										
										
											2013-09-18 09:15:46 -07:00
										 |  |  | 	var (
 | 
					
						
							|  |  |  | 		c         byte
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 		buf       bytes.Buffer
 | 
					
						
							| 
									
										
										
										
											2015-03-07 12:59:04 +01:00
										 |  |  | 		level     int
 | 
					
						
							|  |  |  | 		sameDelim = bytes.Equal(left, right)
 | 
					
						
							| 
									
										
										
										
											2013-09-18 09:15:46 -07:00
										 |  |  | 	)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 	// Frontmatter must start with a delimiter. To check it first,
 | 
					
						
							|  |  |  | 	// pre-reads beginning delimiter length - 1 bytes from Reader
 | 
					
						
							|  |  |  | 	for i := 0; i < len(left)-1; i++ {
 | 
					
						
							|  |  |  | 		if c, err = r.ReadByte(); err != nil {
 | 
					
						
							|  |  |  | 			return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 		if err = buf.WriteByte(c); err != nil {
 | 
					
						
							|  |  |  | 			return nil, err
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Reads a character from Reader one by one and checks it matches the
 | 
					
						
							|  |  |  | 	// last character of one of delemiters to find the last character of
 | 
					
						
							|  |  |  | 	// frontmatter. If it matches, makes sure it contains the delimiter
 | 
					
						
							|  |  |  | 	// and if so, also checks it is followed by CR+LF or LF when YAML,
 | 
					
						
							|  |  |  | 	// TOML case. In JSON case, nested delimiters must be parsed and it
 | 
					
						
							|  |  |  | 	// is expected that the delimiter only contains one character.
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 	for {
 | 
					
						
							| 
									
										
										
										
											2013-09-18 09:15:46 -07:00
										 |  |  | 		if c, err = r.ReadByte(); err != nil {
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 			return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 		if err = buf.WriteByte(c); err != nil {
 | 
					
						
							|  |  |  | 			return nil, err
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		switch c {
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 		case left[len(left)-1]:
 | 
					
						
							|  |  |  | 			if sameDelim { // YAML, TOML case
 | 
					
						
							| 
									
										
										
										
											2015-08-03 23:32:51 +09:00
										 |  |  | 				if bytes.HasSuffix(buf.Bytes(), left) && (buf.Len() == len(left) || buf.Bytes()[buf.Len()-len(left)-1] == '\n') {
 | 
					
						
							| 
									
										
										
										
											2015-08-01 22:24:22 -07:00
										 |  |  | 				nextByte:
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 					c, err = r.ReadByte()
 | 
					
						
							|  |  |  | 					if err != nil {
 | 
					
						
							|  |  |  | 						// It is ok that the end delimiter ends with EOF
 | 
					
						
							|  |  |  | 						if err != io.EOF || level != 1 {
 | 
					
						
							|  |  |  | 							return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | 
					
						
							|  |  |  | 						}
 | 
					
						
							|  |  |  | 					} else {
 | 
					
						
							|  |  |  | 						switch c {
 | 
					
						
							|  |  |  | 						case '\n':
 | 
					
						
							|  |  |  | 							// ok
 | 
					
						
							| 
									
										
										
										
											2015-08-01 22:24:22 -07:00
										 |  |  | 						case ' ':
 | 
					
						
							|  |  |  | 							// Consume this byte and try to match again
 | 
					
						
							|  |  |  | 							goto nextByte
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 						case '\r':
 | 
					
						
							|  |  |  | 							if err = buf.WriteByte(c); err != nil {
 | 
					
						
							|  |  |  | 								return nil, err
 | 
					
						
							|  |  |  | 							}
 | 
					
						
							|  |  |  | 							if c, err = r.ReadByte(); err != nil {
 | 
					
						
							|  |  |  | 								return nil, fmt.Errorf("unable to read frontmatter at filepos %d: %s", buf.Len(), err)
 | 
					
						
							|  |  |  | 							}
 | 
					
						
							|  |  |  | 							if c != '\n' {
 | 
					
						
							|  |  |  | 								return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
 | 
					
						
							|  |  |  | 							}
 | 
					
						
							|  |  |  | 						default:
 | 
					
						
							|  |  |  | 							return nil, fmt.Errorf("frontmatter delimiter must be followed by CR+LF or LF but those can't be found at filepos %d", buf.Len())
 | 
					
						
							|  |  |  | 						}
 | 
					
						
							|  |  |  | 						if err = buf.WriteByte(c); err != nil {
 | 
					
						
							|  |  |  | 							return nil, err
 | 
					
						
							|  |  |  | 						}
 | 
					
						
							|  |  |  | 					}
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 					if level == 0 {
 | 
					
						
							|  |  |  | 						level = 1
 | 
					
						
							|  |  |  | 					} else {
 | 
					
						
							|  |  |  | 						level = 0
 | 
					
						
							|  |  |  | 					}
 | 
					
						
							| 
									
										
										
										
											2013-09-18 09:15:46 -07:00
										 |  |  | 				}
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 			} else { // JSON case
 | 
					
						
							|  |  |  | 				level++
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 			}
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 		case right[len(right)-1]: // JSON case only reaches here
 | 
					
						
							|  |  |  | 			level--
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		}
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 		if level == 0 {
 | 
					
						
							|  |  |  | 			// Consumes white spaces immediately behind frontmatter
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 			if err = chompWhitespace(r); err != nil {
 | 
					
						
							|  |  |  | 				if err != io.EOF {
 | 
					
						
							|  |  |  | 					return nil, err
 | 
					
						
							|  |  |  | 				}
 | 
					
						
							|  |  |  | 			}
 | 
					
						
							| 
									
										
										
										
											2015-01-10 16:15:51 +09:00
										 |  |  | 			return buf.Bytes(), nil
 | 
					
						
							| 
									
										
										
										
											2013-09-17 15:52:40 -07:00
										 |  |  | 		}
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func extractContent(r io.Reader) (content Content, err error) {
 | 
					
						
							|  |  |  | 	wr := new(bytes.Buffer)
 | 
					
						
							|  |  |  | 	if _, err = wr.ReadFrom(r); err != nil {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	return wr.Bytes(), nil
 | 
					
						
							|  |  |  | }
 |