| 
									
										
										
										
											2018-08-05 11:13:49 +02:00
										 |  |  | // Copyright 2018 The Hugo Authors. All rights reserved.
 | 
					
						
							| 
									
										
										
										
											2015-12-10 15:19:38 -07:00
										 |  |  | //
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | // http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-05 11:13:49 +02:00
										 |  |  | package urlreplacers
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | import (
 | 
					
						
							|  |  |  | 	"bytes"
 | 
					
						
							| 
									
										
										
										
											2015-03-18 00:36:48 +01:00
										 |  |  | 	"io"
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	"unicode"
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 	"unicode/utf8"
 | 
					
						
							| 
									
										
										
										
											2018-08-05 11:13:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/gohugoio/hugo/transform"
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | )
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-04 12:51:48 +02:00
										 |  |  | type absurllexer struct {
 | 
					
						
							|  |  |  | 	// the source to absurlify
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 	content []byte
 | 
					
						
							| 
									
										
										
										
											2015-05-04 12:51:48 +02:00
										 |  |  | 	// the target for the new absurlified content
 | 
					
						
							|  |  |  | 	w io.Writer
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-16 00:11:39 +02:00
										 |  |  | 	// path may be set to a "." relative path
 | 
					
						
							|  |  |  | 	path []byte
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | 	pos   int // input position
 | 
					
						
							|  |  |  | 	start int // item start position
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	quotes [][]byte
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | type prefix struct {
 | 
					
						
							| 
									
										
										
										
											2018-12-17 17:42:46 +01:00
										 |  |  | 	disabled bool
 | 
					
						
							|  |  |  | 	b        []byte
 | 
					
						
							|  |  |  | 	f        func(l *absurllexer)
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | }
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 17:42:46 +01:00
										 |  |  | func newPrefixState() []*prefix {
 | 
					
						
							|  |  |  | 	return []*prefix{
 | 
					
						
							|  |  |  | 		{b: []byte("src="), f: checkCandidateBase},
 | 
					
						
							|  |  |  | 		{b: []byte("href="), f: checkCandidateBase},
 | 
					
						
							| 
									
										
										
										
											2019-02-26 13:41:06 +05:30
										 |  |  | 		{b: []byte("action="), f: checkCandidateBase},
 | 
					
						
							| 
									
										
										
										
											2018-12-17 17:42:46 +01:00
										 |  |  | 		{b: []byte("srcset="), f: checkCandidateSrcset},
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-05-03 22:42:56 +02:00
										 |  |  | }
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-04 12:51:48 +02:00
										 |  |  | func (l *absurllexer) emit() {
 | 
					
						
							| 
									
										
										
										
											2015-03-18 00:36:48 +01:00
										 |  |  | 	l.w.Write(l.content[l.start:l.pos])
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 	l.start = l.pos
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | var (
 | 
					
						
							|  |  |  | 	relURLPrefix    = []byte("/")
 | 
					
						
							|  |  |  | 	relURLPrefixLen = len(relURLPrefix)
 | 
					
						
							|  |  |  | )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (l *absurllexer) consumeQuote() []byte {
 | 
					
						
							|  |  |  | 	for _, q := range l.quotes {
 | 
					
						
							|  |  |  | 		if bytes.HasPrefix(l.content[l.pos:], q) {
 | 
					
						
							|  |  |  | 			l.pos += len(q)
 | 
					
						
							| 
									
										
										
										
											2015-05-03 22:42:56 +02:00
										 |  |  | 			l.emit()
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 			return q
 | 
					
						
							| 
									
										
										
										
											2015-05-03 22:42:56 +02:00
										 |  |  | 		}
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	return nil
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // handle URLs in src and href.
 | 
					
						
							|  |  |  | func checkCandidateBase(l *absurllexer) {
 | 
					
						
							|  |  |  | 	l.consumeQuote()
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// check for schemaless URLs
 | 
					
						
							|  |  |  | 	posAfter := l.pos + relURLPrefixLen
 | 
					
						
							|  |  |  | 	if posAfter >= len(l.content) {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	r, _ := utf8.DecodeRune(l.content[posAfter:])
 | 
					
						
							|  |  |  | 	if r == '/' {
 | 
					
						
							|  |  |  | 		// schemaless: skip
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	if l.pos > l.start {
 | 
					
						
							|  |  |  | 		l.emit()
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	l.pos += relURLPrefixLen
 | 
					
						
							|  |  |  | 	l.w.Write(l.path)
 | 
					
						
							|  |  |  | 	l.start = l.pos
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (l *absurllexer) posAfterURL(q []byte) int {
 | 
					
						
							|  |  |  | 	if len(q) > 0 {
 | 
					
						
							|  |  |  | 		// look for end quote
 | 
					
						
							|  |  |  | 		return bytes.Index(l.content[l.pos:], q)
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return bytes.IndexFunc(l.content[l.pos:], func(r rune) bool {
 | 
					
						
							|  |  |  | 		return r == '>' || unicode.IsSpace(r)
 | 
					
						
							|  |  |  | 	})
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-04 12:51:48 +02:00
										 |  |  | // handle URLs in srcset.
 | 
					
						
							|  |  |  | func checkCandidateSrcset(l *absurllexer) {
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	q := l.consumeQuote()
 | 
					
						
							|  |  |  | 	if q == nil {
 | 
					
						
							|  |  |  | 		// srcset needs to be quoted.
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	// special case, not frequent (me think)
 | 
					
						
							|  |  |  | 	if !bytes.HasPrefix(l.content[l.pos:], relURLPrefix) {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	// check for schemaless URLs
 | 
					
						
							|  |  |  | 	posAfter := l.pos + relURLPrefixLen
 | 
					
						
							|  |  |  | 	if posAfter >= len(l.content) {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							|  |  |  | 	r, _ := utf8.DecodeRune(l.content[posAfter:])
 | 
					
						
							|  |  |  | 	if r == '/' {
 | 
					
						
							|  |  |  | 		// schemaless: skip
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	posEnd := l.posAfterURL(q)
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	// safe guard
 | 
					
						
							|  |  |  | 	if posEnd < 0 || posEnd > 2000 {
 | 
					
						
							|  |  |  | 		return
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	if l.pos > l.start {
 | 
					
						
							|  |  |  | 		l.emit()
 | 
					
						
							|  |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	section := l.content[l.pos : l.pos+posEnd+1]
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	fields := bytes.Fields(section)
 | 
					
						
							|  |  |  | 	for i, f := range fields {
 | 
					
						
							|  |  |  | 		if f[0] == '/' {
 | 
					
						
							|  |  |  | 			l.w.Write(l.path)
 | 
					
						
							|  |  |  | 			l.w.Write(f[1:])
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 		} else {
 | 
					
						
							|  |  |  | 			l.w.Write(f)
 | 
					
						
							| 
									
										
										
										
											2015-05-03 19:54:17 +02:00
										 |  |  | 		}
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 		if i < len(fields)-1 {
 | 
					
						
							|  |  |  | 			l.w.Write([]byte(" "))
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | 	}
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	l.pos += len(section)
 | 
					
						
							|  |  |  | 	l.start = l.pos
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-04 12:51:48 +02:00
										 |  |  | // main loop
 | 
					
						
							|  |  |  | func (l *absurllexer) replace() {
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 	contentLength := len(l.content)
 | 
					
						
							| 
									
										
										
										
											2018-12-17 17:42:46 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	prefixes := newPrefixState()
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for {
 | 
					
						
							| 
									
										
										
										
											2015-03-18 20:18:18 +01:00
										 |  |  | 		if l.pos >= contentLength {
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 			break
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 17:42:46 +01:00
										 |  |  | 		nextPos := -1
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		var match *prefix
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		for _, p := range prefixes {
 | 
					
						
							|  |  |  | 			if p.disabled {
 | 
					
						
							|  |  |  | 				continue
 | 
					
						
							|  |  |  | 			}
 | 
					
						
							|  |  |  | 			idx := bytes.Index(l.content[l.pos:], p.b)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if idx == -1 {
 | 
					
						
							|  |  |  | 				p.disabled = true
 | 
					
						
							|  |  |  | 				// Find the closest match
 | 
					
						
							|  |  |  | 			} else if nextPos == -1 || idx < nextPos {
 | 
					
						
							|  |  |  | 				nextPos = idx
 | 
					
						
							|  |  |  | 				match = p
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 			}
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							| 
									
										
										
										
											2018-12-17 17:42:46 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if nextPos == -1 {
 | 
					
						
							|  |  |  | 			// Done!
 | 
					
						
							|  |  |  | 			l.pos = contentLength
 | 
					
						
							|  |  |  | 			break
 | 
					
						
							|  |  |  | 		} else {
 | 
					
						
							|  |  |  | 			l.pos += nextPos + len(match.b)
 | 
					
						
							|  |  |  | 			match.f(l)
 | 
					
						
							|  |  |  | 		}
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 	}
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Done!
 | 
					
						
							|  |  |  | 	if l.pos > l.start {
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | 		l.emit()
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | 	}
 | 
					
						
							|  |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | func doReplace(path string, ct transform.FromTo, quotes [][]byte) {
 | 
					
						
							| 
									
										
										
										
											2015-05-16 00:11:39 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-04 12:51:48 +02:00
										 |  |  | 	lexer := &absurllexer{
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 		content: ct.From().Bytes(),
 | 
					
						
							|  |  |  | 		w:       ct.To(),
 | 
					
						
							|  |  |  | 		path:    []byte(path),
 | 
					
						
							|  |  |  | 		quotes:  quotes}
 | 
					
						
							| 
									
										
										
										
											2015-02-17 04:33:44 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-10 18:44:32 +01:00
										 |  |  | 	lexer.replace()
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-03-11 11:34:57 -06:00
										 |  |  | type absURLReplacer struct {
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	htmlQuotes [][]byte
 | 
					
						
							|  |  |  | 	xmlQuotes  [][]byte
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-16 00:11:39 +02:00
										 |  |  | func newAbsURLReplacer() *absURLReplacer {
 | 
					
						
							| 
									
										
										
										
											2015-03-11 11:34:57 -06:00
										 |  |  | 	return &absURLReplacer{
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 		htmlQuotes: [][]byte{[]byte("\""), []byte("'")},
 | 
					
						
							|  |  |  | 		xmlQuotes:  [][]byte{[]byte("""), []byte("'")}}
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-05 11:13:49 +02:00
										 |  |  | func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	doReplace(path, ct, au.htmlQuotes)
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-05 11:13:49 +02:00
										 |  |  | func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
 | 
					
						
							| 
									
										
										
										
											2018-12-17 14:25:00 +01:00
										 |  |  | 	doReplace(path, ct, au.xmlQuotes)
 | 
					
						
							| 
									
										
										
										
											2015-02-16 10:48:15 +01:00
										 |  |  | }
 |