mirror of
				https://github.com/gohugoio/hugo.git
				synced 2024-05-11 05:54:58 +00:00 
			
		
		
		
	Make the HTML collector parsing more robust
Most notably better handling self-closing elements Closes #10698
This commit is contained in:
		@@ -294,9 +294,10 @@ func htmlLexElementStart(w *htmlElementsCollectorWriter) htmlCollectorStateFunc
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		tagName := w.buff.Bytes()[1:]
 | 
			
		||||
		isSelfClosing := tagName[len(tagName)-1] == '/'
 | 
			
		||||
 | 
			
		||||
		switch {
 | 
			
		||||
		case skipInnerElementRe.Match(tagName):
 | 
			
		||||
		case !isSelfClosing && skipInnerElementRe.Match(tagName):
 | 
			
		||||
			// pre, script etc. We collect classes etc. on the surrounding
 | 
			
		||||
			// element, but skip the inner content.
 | 
			
		||||
			w.backup()
 | 
			
		||||
@@ -432,10 +433,18 @@ func parseStartTag(s string) string {
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	if spaceIndex == -1 {
 | 
			
		||||
		return s[1 : len(s)-1]
 | 
			
		||||
		s = s[1 : len(s)-1]
 | 
			
		||||
	} else {
 | 
			
		||||
		s = s[1:spaceIndex]
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return s[1:spaceIndex]
 | 
			
		||||
	if s[len(s)-1] == '/' {
 | 
			
		||||
		// Self closing.
 | 
			
		||||
		s = s[:len(s)-1]
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return s
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// isClosedByTag reports whether b ends with a closing tag for tagName.
 | 
			
		||||
@@ -487,7 +496,7 @@ LOOP:
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if state != 2 {
 | 
			
		||||
	if state != 2 || lo >= hi {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -110,6 +110,9 @@ func TestClassCollector(t *testing.T) {
 | 
			
		||||
		{"DOCTYPE should beskipped", `<!DOCTYPE html>`, f("", "", "")},
 | 
			
		||||
		{"Comments should be skipped", `<!-- example comment -->`, f("", "", "")},
 | 
			
		||||
		{"Comments with elements before and after", `<div></div><!-- example comment --><span><span>`, f("div span", "", "")},
 | 
			
		||||
		{"Self closing tag", `<div><hr/></div>`, f("div hr", "", "")},
 | 
			
		||||
		// svg with self closing style tag.
 | 
			
		||||
		{"SVG with self closing style tag", `<svg><style/><g><path class="foo"/></g></svg>`, f("g path style svg", "foo", "")},
 | 
			
		||||
		// Issue #8530
 | 
			
		||||
		{"Comment with single quote", `<!-- Hero Area Image d'accueil --><i class="foo">`, f("i", "foo", "")},
 | 
			
		||||
		{"Uppercase tags", `<DIV></DIV>`, f("div", "", "")},
 | 
			
		||||
@@ -174,6 +177,7 @@ func TestEndsWithTag(t *testing.T) {
 | 
			
		||||
		{"match space", "foo<  / div>", "div", true},
 | 
			
		||||
		{"match space 2", "foo<  / div   \n>", "div", true},
 | 
			
		||||
		{"match case", "foo</DIV>", "div", true},
 | 
			
		||||
		{"self closing", `</defs><g><g><path fill="#010101" d=asdf"/>`, "div", false},
 | 
			
		||||
	} {
 | 
			
		||||
		c.Run(test.name, func(c *qt.C) {
 | 
			
		||||
			got := isClosedByTag([]byte(test.s), []byte(test.tagName))
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user