mirror of
				https://github.com/gohugoio/hugo.git
				synced 2024-05-11 05:54:58 +00:00 
			
		
		
		
	publisher: Skip script, pre and textarea content when looking for HTML elements
Updates #7567
This commit is contained in:
		@@ -64,7 +64,7 @@ type cssClassCollectorWriter struct {
 | 
				
			|||||||
	buff      bytes.Buffer
 | 
						buff      bytes.Buffer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	isCollecting bool
 | 
						isCollecting bool
 | 
				
			||||||
	dropValue    bool
 | 
						inPreTag     string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	inQuote    bool
 | 
						inQuote    bool
 | 
				
			||||||
	quoteValue byte
 | 
						quoteValue byte
 | 
				
			||||||
@@ -90,49 +90,58 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
 | 
				
			|||||||
				b := p[i]
 | 
									b := p[i]
 | 
				
			||||||
				w.toggleIfQuote(b)
 | 
									w.toggleIfQuote(b)
 | 
				
			||||||
				if !w.inQuote && b == '>' {
 | 
									if !w.inQuote && b == '>' {
 | 
				
			||||||
					w.endCollecting(false)
 | 
										w.endCollecting()
 | 
				
			||||||
					break
 | 
										break
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				w.buff.WriteByte(b)
 | 
									w.buff.WriteByte(b)
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if !w.isCollecting {
 | 
								if !w.isCollecting {
 | 
				
			||||||
				if w.dropValue {
 | 
									if w.inPreTag != "" {
 | 
				
			||||||
					w.buff.Reset()
 | 
					 | 
				
			||||||
				} else {
 | 
					 | 
				
			||||||
					// First check if we have processed this element before.
 | 
					 | 
				
			||||||
					w.collector.mu.RLock()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					// See https://github.com/dominikh/go-tools/issues/723
 | 
					 | 
				
			||||||
					//lint:ignore S1030 This construct avoids memory allocation for the string.
 | 
					 | 
				
			||||||
					seen := w.collector.elementSet[string(w.buff.Bytes())]
 | 
					 | 
				
			||||||
					w.collector.mu.RUnlock()
 | 
					 | 
				
			||||||
					if seen {
 | 
					 | 
				
			||||||
						w.buff.Reset()
 | 
					 | 
				
			||||||
						continue
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					s := w.buff.String()
 | 
										s := w.buff.String()
 | 
				
			||||||
 | 
										if tagName, isEnd := w.parseEndTag(s); isEnd && w.inPreTag == tagName {
 | 
				
			||||||
 | 
											w.inPreTag = ""
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
					w.buff.Reset()
 | 
										w.buff.Reset()
 | 
				
			||||||
 | 
										continue
 | 
				
			||||||
					if strings.HasPrefix(s, "</") {
 | 
					 | 
				
			||||||
						continue
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					key := s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					s, tagName := w.insertStandinHTMLElement(s)
 | 
					 | 
				
			||||||
					el := parseHTMLElement(s)
 | 
					 | 
				
			||||||
					el.Tag = tagName
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
					w.collector.mu.Lock()
 | 
					 | 
				
			||||||
					w.collector.elementSet[key] = true
 | 
					 | 
				
			||||||
					if el.Tag != "" {
 | 
					 | 
				
			||||||
						w.collector.elements = append(w.collector.elements, el)
 | 
					 | 
				
			||||||
					}
 | 
					 | 
				
			||||||
					w.collector.mu.Unlock()
 | 
					 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									// First check if we have processed this element before.
 | 
				
			||||||
 | 
									w.collector.mu.RLock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									// See https://github.com/dominikh/go-tools/issues/723
 | 
				
			||||||
 | 
									//lint:ignore S1030 This construct avoids memory allocation for the string.
 | 
				
			||||||
 | 
									seen := w.collector.elementSet[string(w.buff.Bytes())]
 | 
				
			||||||
 | 
									w.collector.mu.RUnlock()
 | 
				
			||||||
 | 
									if seen {
 | 
				
			||||||
 | 
										w.buff.Reset()
 | 
				
			||||||
 | 
										continue
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									s := w.buff.String()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									w.buff.Reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									if strings.HasPrefix(s, "</") {
 | 
				
			||||||
 | 
										continue
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									key := s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									s, tagName := w.insertStandinHTMLElement(s)
 | 
				
			||||||
 | 
									el := parseHTMLElement(s)
 | 
				
			||||||
 | 
									el.Tag = tagName
 | 
				
			||||||
 | 
									if w.isPreFormatted(tagName) {
 | 
				
			||||||
 | 
										w.inPreTag = tagName
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
									w.collector.mu.Lock()
 | 
				
			||||||
 | 
									w.collector.elementSet[key] = true
 | 
				
			||||||
 | 
									if el.Tag != "" {
 | 
				
			||||||
 | 
										w.collector.elements = append(w.collector.elements, el)
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									w.collector.mu.Unlock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -140,6 +149,11 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
 | 
				
			|||||||
	return
 | 
						return
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// No need to look inside these for HTML elements.
 | 
				
			||||||
 | 
					func (c *cssClassCollectorWriter) isPreFormatted(s string) bool {
 | 
				
			||||||
 | 
						return s == "pre" || s == "textarea" || s == "script"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// The net/html parser does not handle single table elements as input, e.g. tbody.
 | 
					// The net/html parser does not handle single table elements as input, e.g. tbody.
 | 
				
			||||||
// We only care about the element/class/ids, so just store away the original tag name
 | 
					// We only care about the element/class/ids, so just store away the original tag name
 | 
				
			||||||
// and pretend it's a <div>.
 | 
					// and pretend it's a <div>.
 | 
				
			||||||
@@ -154,15 +168,24 @@ func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, s
 | 
				
			|||||||
	return newv, strings.ToLower(tag)
 | 
						return newv, strings.ToLower(tag)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (c *cssClassCollectorWriter) endCollecting(drop bool) {
 | 
					func (c *cssClassCollectorWriter) parseEndTag(s string) (string, bool) {
 | 
				
			||||||
 | 
						if !strings.HasPrefix(s, "</") {
 | 
				
			||||||
 | 
							return "", false
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						s = strings.TrimPrefix(s, "</")
 | 
				
			||||||
 | 
						s = strings.TrimSuffix(s, ">")
 | 
				
			||||||
 | 
						return strings.ToLower(strings.TrimSpace(s)), true
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (c *cssClassCollectorWriter) endCollecting() {
 | 
				
			||||||
	c.isCollecting = false
 | 
						c.isCollecting = false
 | 
				
			||||||
	c.inQuote = false
 | 
						c.inQuote = false
 | 
				
			||||||
	c.dropValue = drop
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (c *cssClassCollectorWriter) startCollecting() {
 | 
					func (c *cssClassCollectorWriter) startCollecting() {
 | 
				
			||||||
	c.isCollecting = true
 | 
						c.isCollecting = true
 | 
				
			||||||
	c.dropValue = false
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
 | 
					func (c *cssClassCollectorWriter) toggleIfQuote(b byte) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -89,8 +89,12 @@ func TestClassCollector(t *testing.T) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
		{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
 | 
							{"Alpine transition 1", `<div x-transition:enter-start="opacity-0 transform mobile:-translate-x-8 sm:-translate-y-8">`, f("div", "mobile:-translate-x-8 opacity-0 sm:-translate-y-8 transform", "")},
 | 
				
			||||||
		{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
 | 
							{"Vue bind", `<div v-bind:class="{ active: isActive }"></div>`, f("div", "active", "")},
 | 
				
			||||||
		// https://github.com/gohugoio/hugo/issues/7746
 | 
							// Issue #7746
 | 
				
			||||||
		{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
 | 
							{"Apostrophe inside attribute value", `<a class="missingclass" title="Plus d'information">my text</a><div></div>`, f("a div", "missingclass", "")},
 | 
				
			||||||
 | 
							// Issue #7567
 | 
				
			||||||
 | 
							{"Script tags content should be skipped", `<script><span>foo</span><span>bar</span></script><div class="foo"></div>`, f("div script", "foo", "")},
 | 
				
			||||||
 | 
							{"Pre tags content should be skipped", `<pre class="preclass"><span>foo</span><span>bar</span></pre><div class="foo"></div>`, f("div pre", "foo preclass", "")},
 | 
				
			||||||
 | 
							{"Textare tags content should be skipped", `<textarea class="textareaclass"><span>foo</span><span>bar</span></textarea><div class="foo"></div>`, f("div textarea", "foo textareaclass", "")},
 | 
				
			||||||
	} {
 | 
						} {
 | 
				
			||||||
		c.Run(test.name, func(c *qt.C) {
 | 
							c.Run(test.name, func(c *qt.C) {
 | 
				
			||||||
			w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
 | 
								w := newHTMLElementsCollectorWriter(newHTMLElementsCollector())
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user