exp/template/html: change transition functions to return indices

Formulaic changes to transition functions in preparation for CL 5074041. This should be completely semantics preserving. R=nigeltao CC=golang-dev https://golang.org/cl/5091041
2024-11-22 02:04:40 -07:00 · 2011-09-19 20:52:14 -07:00 · 2011-09-19 20:52:14 -07:00 · 3a013f1175
commit 3a013f1175
parent 3c3a86ccc7
3 changed files with 147 additions and 143 deletions
--- a/src/pkg/exp/template/html/escape.go
+++ b/src/pkg/exp/template/html/escape.go
@ -547,22 +547,22 @@ var delimEnds = [...]string{

 // escapeText escapes a text template node.
 func (e *escaper) escapeText(c context, n *parse.TextNode) context {
-	s, written := n.Text, 0
-	var b bytes.Buffer
-	for len(s) > 0 {
-		c1, s1 := contextAfterText(c, s)
+	s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
+	for i != len(s) {
+		c1, nread := contextAfterText(c, s[i:])
+		i1 := i + nread
 		if c.state == c1.state && (c.state == stateText || c.state == stateRCDATA) {
-			i0, i1 := len(n.Text)-len(s), len(n.Text)-len(s1)
-			for i := i0; i < i1; i++ {
-				if n.Text[i] == '<' {
-					b.Write(n.Text[written:i])
+			for j := i; j < i1; j++ {
+				if s[j] == '<' {
+					b.Write(s[written:j])
 					b.WriteString("&lt;")
-					written = i + 1
+					written = j + 1
 				}
 			}
 		}
-		c, s = c1, s1
+		c, i = c1, i1
 	}
+
 	if written != 0 && c.state != stateError {
 		b.Write(n.Text[written:])
 		e.editTextNode(n, b.Bytes())
@ -572,7 +572,7 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {

 // contextAfterText starts in context c, consumes some tokens from the front of
 // s, then returns the context after those tokens and the unprocessed suffix.
-func contextAfterText(c context, s []byte) (context, []byte) {
+func contextAfterText(c context, s []byte) (context, int) {
 	if c.delim == delimNone {
 		return transitionFunc[c.state](c, s)
 	}
@ -584,9 +584,10 @@ func contextAfterText(c context, s []byte) (context, []byte) {
 		//     <button onclick="alert(&quot;Hi!&quot;)">
 		// without having to entity decode token boundaries.
 		for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
-			c, u = transitionFunc[c.state](c, u)
+			c1, i1 := transitionFunc[c.state](c, u)
+			c, u = c1, u[i1:]
 		}
-		return c, nil
+		return c, len(s)
 	}
 	if c.delim != delimSpaceOrTagEnd {
 		// Consume any quote.
@ -594,7 +595,7 @@ func contextAfterText(c context, s []byte) (context, []byte) {
 	}
 	// On exiting an attribute, we discard all state information
 	// except the state and element.
-	return context{state: stateTag, element: c.element}, s[i:]
+	return context{state: stateTag, element: c.element}, i
 }

 // editActionNode records a change to an action pipeline for later commit.
--- a/src/pkg/exp/template/html/html.go
+++ b/src/pkg/exp/template/html/html.go
@ -165,43 +165,44 @@ func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
 // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
 func stripTags(html string) string {
 	var b bytes.Buffer
-	s, c := []byte(html), context{}
+	s, c, i := []byte(html), context{}, 0
 	// Using the transition funcs helps us avoid mangling
 	// `<div title="1>2">` or `I <3 Ponies!`.
-	for len(s) > 0 {
+	for i != len(s) {
 		if c.delim == delimNone {
-			d, t := transitionFunc[c.state](c, s)
+			d, nread := transitionFunc[c.state](c, s[i:])
+			i1 := i + nread
 			if c.state == stateText || c.state == stateRCDATA {
-				i := len(s) - len(t)
 				// Emit text up to the start of the tag or comment.
+				j := i1
 				if d.state != c.state {
-					for j := i - 1; j >= 0; j-- {
-						if s[j] == '<' {
-							i = j
+					for j1 := j - 1; j1 >= i; j1-- {
+						if s[j1] == '<' {
+							j = j1
 							break
 						}
 					}
 				}
-				b.Write(s[:i])
+				b.Write(s[i:j])
 			}
-			c, s = d, t
+			c, i = d, i1
 			continue
 		}
-		i := bytes.IndexAny(s, delimEnds[c.delim])
-		if i == -1 {
+		i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
+		if i1 < i {
 			break
 		}
 		if c.delim != delimSpaceOrTagEnd {
 			// Consume any quote.
-			i++
+			i1++
 		}
-		c, s = context{state: stateTag, element: c.element}, s[i:]
+		c, i = context{state: stateTag, element: c.element}, i1
 	}
 	if c.state == stateText {
 		if b.Len() == 0 {
 			return html
 		}
-		b.Write(s)
+		b.Write(s[i:])
 	}
 	return b.String()
 }
--- a/src/pkg/exp/template/html/transition.go
+++ b/src/pkg/exp/template/html/transition.go
@ -14,8 +14,9 @@ import (

 // transitionFunc is the array of context transition functions for text nodes.
 // A transition function takes a context and template text input, and returns
-// the updated context and any unconsumed text.
-var transitionFunc = [...]func(context, []byte) (context, []byte){
+// the updated context and the number of bytes consumed from the front of the
+// input.
+var transitionFunc = [...]func(context, []byte) (context, int){
 	stateText:        tText,
 	stateTag:         tTag,
 	stateAttrName:    tAttrName,
@ -46,27 +47,28 @@ var commentStart = []byte("<!--")
 var commentEnd = []byte("-->")

 // tText is the context transition function for the text state.
-func tText(c context, s []byte) (context, []byte) {
+func tText(c context, s []byte) (context, int) {
+	k := 0
 	for {
-		i := bytes.IndexByte(s, '<')
-		if i == -1 || i+1 == len(s) {
-			return c, nil
+		i := k + bytes.IndexByte(s[k:], '<')
+		if i < k || i+1 == len(s) {
+			return c, len(s)
 		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
-			return context{state: stateHTMLCmt}, s[i+4:]
+			return context{state: stateHTMLCmt}, i + 4
 		}
 		i++
 		if s[i] == '/' {
 			if i+1 == len(s) {
-				return c, nil
+				return c, len(s)
 			}
 			i++
 		}
 		j, e := eatTagName(s, i)
 		if j != i {
 			// We've found an HTML tag.
-			return context{state: stateTag, element: e}, s[j:]
+			return context{state: stateTag, element: e}, j
 		}
-		s = s[j:]
+		k = j
 	}
 	panic("unreachable")
 }
@ -80,21 +82,21 @@ var elementContentType = [...]state{
 }

 // tTag is the context transition function for the tag state.
-func tTag(c context, s []byte) (context, []byte) {
+func tTag(c context, s []byte) (context, int) {
 	// Find the attribute name.
 	i := eatWhiteSpace(s, 0)
 	if i == len(s) {
-		return c, nil
+		return c, len(s)
 	}
 	if s[i] == '>' {
 		return context{
 			state:   elementContentType[c.element],
 			element: c.element,
-		}, s[i+1:]
+		}, i + 1
 	}
 	j, err := eatAttrName(s, i)
 	if err != nil {
-		return context{state: stateError, err: err}, nil
+		return context{state: stateError, err: err}, len(s)
 	}
 	state, attr := stateTag, attrNone
 	if i != j {
@ -112,35 +114,35 @@ func tTag(c context, s []byte) (context, []byte) {
 			state = stateAfterName
 		}
 	}
-	return context{state: state, element: c.element, attr: attr}, s[j:]
+	return context{state: state, element: c.element, attr: attr}, j
 }

 // tAttrName is the context transition function for stateAttrName.
-func tAttrName(c context, s []byte) (context, []byte) {
+func tAttrName(c context, s []byte) (context, int) {
 	i, err := eatAttrName(s, 0)
 	if err != nil {
-		return context{state: stateError, err: err}, nil
+		return context{state: stateError, err: err}, len(s)
 	} else if i == len(s) {
-		return c, nil
+		return c, len(s)
 	}
 	c.state = stateAfterName
-	return c, s[i:]
+	return c, i
 }

 // tAfterName is the context transition function for stateAfterName.
-func tAfterName(c context, s []byte) (context, []byte) {
+func tAfterName(c context, s []byte) (context, int) {
 	// Look for the start of the value.
 	i := eatWhiteSpace(s, 0)
 	if i == len(s) {
-		return c, nil
+		return c, len(s)
 	} else if s[i] != '=' {
 		// Occurs due to tag ending '>', and valueless attribute.
 		c.state = stateTag
-		return c, s[i:]
+		return c, i
 	}
 	c.state = stateBeforeValue
 	// Consume the "=".
-	return c, s[i+1:]
+	return c, i + 1
 }

 var attrStartStates = [...]state{
@ -151,10 +153,10 @@ var attrStartStates = [...]state{
 }

 // tBeforeValue is the context transition function for stateBeforeValue.
-func tBeforeValue(c context, s []byte) (context, []byte) {
+func tBeforeValue(c context, s []byte) (context, int) {
 	i := eatWhiteSpace(s, 0)
 	if i == len(s) {
-		return c, nil
+		return c, len(s)
 	}
 	// Find the attribute delimiter.
 	delim := delimSpaceOrTagEnd
@ -165,16 +167,16 @@ func tBeforeValue(c context, s []byte) (context, []byte) {
 		delim, i = delimDoubleQuote, i+1
 	}
 	c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
-	return c, s[i:]
+	return c, i
 }

 // tHTMLCmt is the context transition function for stateHTMLCmt.
-func tHTMLCmt(c context, s []byte) (context, []byte) {
+func tHTMLCmt(c context, s []byte) (context, int) {
 	i := bytes.Index(s, commentEnd)
 	if i != -1 {
-		return context{}, s[i+3:]
+		return context{}, i + 3
 	}
-	return c, nil
+	return c, len(s)
 }

 // specialTagEndMarkers maps element types to the character sequence that
@ -188,24 +190,24 @@ var specialTagEndMarkers = [...]string{

 // tSpecialTagEnd is the context transition function for raw text and RCDATA
 // element states.
-func tSpecialTagEnd(c context, s []byte) (context, []byte) {
+func tSpecialTagEnd(c context, s []byte) (context, int) {
 	if c.element != elementNone {
 		end := specialTagEndMarkers[c.element]
 		i := strings.Index(strings.ToLower(string(s)), end)
 		if i != -1 {
-			return context{state: stateTag}, s[i+len(end):]
+			return context{state: stateTag}, i + len(end)
 		}
 	}
-	return c, nil
+	return c, len(s)
 }

 // tAttr is the context transition function for the attribute state.
-func tAttr(c context, s []byte) (context, []byte) {
-	return c, nil
+func tAttr(c context, s []byte) (context, int) {
+	return c, len(s)
 }

 // tURL is the context transition function for the URL state.
-func tURL(c context, s []byte) (context, []byte) {
+func tURL(c context, s []byte) (context, int) {
 	if bytes.IndexAny(s, "#?") >= 0 {
 		c.urlPart = urlPartQueryOrFrag
 	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
@ -213,20 +215,20 @@ func tURL(c context, s []byte) (context, []byte) {
 		// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
 		c.urlPart = urlPartPreQuery
 	}
-	return c, nil
+	return c, len(s)
 }

 // tJS is the context transition function for the JS state.
-func tJS(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tJS(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}

 	i := bytes.IndexAny(s, `"'/`)
 	if i == -1 {
 		// Entire input is non string, comment, regexp tokens.
 		c.jsCtx = nextJSCtx(s, c.jsCtx)
-		return c, nil
+		return c, len(s)
 	}
 	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
 	switch s[i] {
@ -248,18 +250,18 @@ func tJS(c context, s []byte) (context, []byte) {
 			return context{
 				state: stateError,
 				err:   errorf(ErrSlashAmbig, 0, "'/' could start div or regexp: %.32q", s[i:]),
-			}, nil
+			}, len(s)
 		}
 	default:
 		panic("unreachable")
 	}
-	return c, s[i+1:]
+	return c, i + 1
 }

 // tJSStr is the context transition function for the JS string states.
-func tJSStr(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tJSStr(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}

 	quoteAndEsc := `\"`
@ -267,55 +269,54 @@ func tJSStr(c context, s []byte) (context, []byte) {
 		quoteAndEsc = `\'`
 	}

-	b := s
+	k := 0
 	for {
-		i := bytes.IndexAny(b, quoteAndEsc)
-		if i == -1 {
-			return c, nil
+		i := k + bytes.IndexAny(s[k:], quoteAndEsc)
+		if i < k {
+			return c, len(s)
 		}
-		if b[i] == '\\' {
+		if s[i] == '\\' {
 			i++
-			if i == len(b) {
+			if i == len(s) {
 				return context{
 					state: stateError,
 					err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
-				}, nil
+				}, len(s)
 			}
 		} else {
 			c.state, c.jsCtx = stateJS, jsCtxDivOp
-			return c, b[i+1:]
+			return c, i + 1
 		}
-		b = b[i+1:]
+		k = i + 1
 	}
 	panic("unreachable")
 }

 // tJSRegexp is the context transition function for the /RegExp/ literal state.
-func tJSRegexp(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tJSRegexp(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}

-	b := s
-	inCharset := false
+	k, inCharset := 0, false
 	for {
-		i := bytes.IndexAny(b, `/[\]`)
-		if i == -1 {
+		i := k + bytes.IndexAny(s[k:], `\/[]`)
+		if i < k {
 			break
 		}
-		switch b[i] {
+		switch s[i] {
 		case '/':
 			if !inCharset {
 				c.state, c.jsCtx = stateJS, jsCtxDivOp
-				return c, b[i+1:]
+				return c, i + 1
 			}
 		case '\\':
 			i++
-			if i == len(b) {
+			if i == len(s) {
 				return context{
 					state: stateError,
 					err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS regexp: %q", s),
-				}, nil
+				}, len(s)
 			}
 		case '[':
 			inCharset = true
@ -324,7 +325,7 @@ func tJSRegexp(c context, s []byte) (context, []byte) {
 		default:
 			panic("unreachable")
 		}
-		b = b[i+1:]
+		k = i + 1
 	}

 	if inCharset {
@ -333,22 +334,22 @@ func tJSRegexp(c context, s []byte) (context, []byte) {
 		return context{
 			state: stateError,
 			err:   errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s),
-		}, nil
+		}, len(s)
 	}

-	return c, nil
+	return c, len(s)
 }

 var blockCommentEnd = []byte("*/")

 // tBlockCmt is the context transition function for /*comment*/ states.
-func tBlockCmt(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tBlockCmt(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}
 	i := bytes.Index(s, blockCommentEnd)
 	if i == -1 {
-		return c, nil
+		return c, len(s)
 	}
 	switch c.state {
 	case stateJSBlockCmt:
@ -358,13 +359,13 @@ func tBlockCmt(c context, s []byte) (context, []byte) {
 	default:
 		panic(c.state.String())
 	}
-	return c, s[i+2:]
+	return c, i + 2
 }

 // tLineCmt is the context transition function for //comment states.
-func tLineCmt(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tLineCmt(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}
 	var lineTerminators string
 	var endState state
@ -386,21 +387,21 @@ func tLineCmt(c context, s []byte) (context, []byte) {

 	i := bytes.IndexAny(s, lineTerminators)
 	if i == -1 {
-		return c, nil
+		return c, len(s)
 	}
 	c.state = endState
 	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
 	// "However, the LineTerminator at the end of the line is not
-	// considered to be part of the single-line comment; it is recognised
-	// separately by the lexical grammar and becomes part of the stream of
-	// input elements for the syntactic grammar."
-	return c, s[i:]
+	// considered to be part of the single-line comment; it is
+	// recognized separately by the lexical grammar and becomes part
+	// of the stream of input elements for the syntactic grammar."
+	return c, i
 }

 // tCSS is the context transition function for the CSS state.
-func tCSS(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tCSS(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}

 	// CSS quoted strings are almost never used except for:
@ -430,55 +431,55 @@ func tCSS(c context, s []byte) (context, []byte) {
 	// have the attribute name available if our conservative assumption
 	// proves problematic for real code.

+	k := 0
 	for {
-		i := bytes.IndexAny(s, `("'/`)
-		if i == -1 {
-			return c, nil
+		i := k + bytes.IndexAny(s[k:], `("'/`)
+		if i < k {
+			return c, len(s)
 		}
 		switch s[i] {
 		case '(':
 			// Look for url to the left.
 			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
 			if endsWithCSSKeyword(p, "url") {
-				q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
+				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
 				switch {
-				case len(q) != 0 && q[0] == '"':
-					c.state, s = stateCSSDqURL, q[1:]
-				case len(q) != 0 && q[0] == '\'':
-					c.state, s = stateCSSSqURL, q[1:]
-
+				case j != len(s) && s[j] == '"':
+					c.state, j = stateCSSDqURL, j+1
+				case j != len(s) && s[j] == '\'':
+					c.state, j = stateCSSSqURL, j+1
 				default:
-					c.state, s = stateCSSURL, q
+					c.state = stateCSSURL
 				}
-				return c, s
+				return c, j
 			}
 		case '/':
 			if i+1 < len(s) {
 				switch s[i+1] {
 				case '/':
 					c.state = stateCSSLineCmt
-					return c, s[i+2:]
+					return c, i + 2
 				case '*':
 					c.state = stateCSSBlockCmt
-					return c, s[i+2:]
+					return c, i + 2
 				}
 			}
 		case '"':
 			c.state = stateCSSDqStr
-			return c, s[i+1:]
+			return c, i + 1
 		case '\'':
 			c.state = stateCSSSqStr
-			return c, s[i+1:]
+			return c, i + 1
 		}
-		s = s[i+1:]
+		k = i + 1
 	}
 	panic("unreachable")
 }

 // tCSSStr is the context transition function for the CSS string and URL states.
-func tCSSStr(c context, s []byte) (context, []byte) {
-	if d, t := tSpecialTagEnd(c, s); t != nil {
-		return d, t
+func tCSSStr(c context, s []byte) (context, int) {
+	if d, i := tSpecialTagEnd(c, s); i != len(s) {
+		return d, i
 	}

 	var endAndEsc string
@ -495,33 +496,34 @@ func tCSSStr(c context, s []byte) (context, []byte) {
 		panic(c.state.String())
 	}

-	b := s
+	k := 0
 	for {
-		i := bytes.IndexAny(b, endAndEsc)
-		if i == -1 {
-			return tURL(c, decodeCSS(b))
+		i := k + bytes.IndexAny(s[k:], endAndEsc)
+		if i < k {
+			c, nread := tURL(c, decodeCSS(s[k:]))
+			return c, k + nread
 		}
-		if b[i] == '\\' {
+		if s[i] == '\\' {
 			i++
-			if i == len(b) {
+			if i == len(s) {
 				return context{
 					state: stateError,
 					err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s),
-				}, nil
+				}, len(s)
 			}
 		} else {
 			c.state = stateCSS
-			return c, b[i+1:]
+			return c, i + 1
 		}
-		c, _ = tURL(c, decodeCSS(b[:i+1]))
-		b = b[i+1:]
+		c, _ = tURL(c, decodeCSS(s[:i+1]))
+		k = i + 1
 	}
 	panic("unreachable")
 }

 // tError is the context transition function for the error state.
-func tError(c context, s []byte) (context, []byte) {
-	return c, nil
+func tError(c context, s []byte) (context, int) {
+	return c, len(s)
 }

 // eatAttrName returns the largest j such that s[i:j] is an attribute name.