From b4e1ca25b1fe02f37dcf6371732727fdd5036909 Mon Sep 17 00:00:00 2001 From: Mike Samuel Date: Sun, 18 Sep 2011 19:10:15 -0700 Subject: [PATCH] exp/template/html: allow quotes on either side of conditionals and dynamic HTML names This addresses several use cases: (1) used to build hierarchical documents. (2) used in widgets. (3)
used to embed bidi-hints. It also makes sure that we treat the two templates below the same: This splits up tTag into a number of sub-states and adds testcases. R=nigeltao CC=golang-dev https://golang.org/cl/5043042 --- src/pkg/exp/template/html/content.go | 9 +- src/pkg/exp/template/html/content_test.go | 26 +++++ src/pkg/exp/template/html/context.go | 55 +++++++++- src/pkg/exp/template/html/escape.go | 49 +++++++++ src/pkg/exp/template/html/escape_test.go | 63 +++++++++++- src/pkg/exp/template/html/html.go | 19 ++++ src/pkg/exp/template/html/transition.go | 119 ++++++++++++++-------- 7 files changed, 293 insertions(+), 47 deletions(-) diff --git a/src/pkg/exp/template/html/content.go b/src/pkg/exp/template/html/content.go index 4f79200405..8b9809b982 100644 --- a/src/pkg/exp/template/html/content.go +++ b/src/pkg/exp/template/html/content.go @@ -19,11 +19,15 @@ type ( CSS string // HTML encapsulates a known safe HTML document fragment. - // Should not be used for HTML from a third-party, or HTML with + // It should not be used for HTML from a third-party, or HTML with // unclosed tags or comments. The outputs of a sound HTML sanitizer // and a template escaped by this package are fine for use with HTML. HTML string + // HTMLAttr encapsulates an HTML attribute from a trusted source, + // for example: ` dir="ltr"`. + HTMLAttr string + // JS encapsulates a known safe EcmaScript5 Expression, or example, // `(x + y * z())`. // Template authors are responsible for ensuring that typed expressions @@ -56,6 +60,7 @@ const ( contentTypePlain contentType = iota contentTypeCSS contentTypeHTML + contentTypeHTMLAttr contentTypeJS contentTypeJSStr contentTypeURL @@ -71,6 +76,8 @@ func stringify(args ...interface{}) (string, contentType) { return string(s), contentTypeCSS case HTML: return string(s), contentTypeHTML + case HTMLAttr: + return string(s), contentTypeHTMLAttr case JS: return string(s), contentTypeJS case JSStr: diff --git a/src/pkg/exp/template/html/content_test.go b/src/pkg/exp/template/html/content_test.go index caef5ade8e..033dee1747 100644 --- a/src/pkg/exp/template/html/content_test.go +++ b/src/pkg/exp/template/html/content_test.go @@ -16,6 +16,7 @@ func TestTypedContent(t *testing.T) { ` "foo%" O'Reilly &bar;`, CSS(`a[href =~ "//example.com"]#foo`), HTML(`Hello, World &tc!`), + HTMLAttr(` dir="ltr"`), JS(`c && alert("Hello, World!");`), JSStr(`Hello, World & O'Reilly\x21`), URL(`greeting=H%69&addressee=(World)`), @@ -38,6 +39,7 @@ func TestTypedContent(t *testing.T) { `ZgotmplZ`, `ZgotmplZ`, `ZgotmplZ`, + `ZgotmplZ`, }, }, { @@ -50,6 +52,7 @@ func TestTypedContent(t *testing.T) { `ZgotmplZ`, `ZgotmplZ`, `ZgotmplZ`, + `ZgotmplZ`, }, }, { @@ -59,11 +62,25 @@ func TestTypedContent(t *testing.T) { `a[href =~ "//example.com"]#foo`, // Not escaped. `Hello, World &tc!`, + ` dir="ltr"`, `c && alert("Hello, World!");`, `Hello, World & O'Reilly\x21`, `greeting=H%69&addressee=(World)`, }, }, + { + ``, + []string{ + `ZgotmplZ`, + `ZgotmplZ`, + `ZgotmplZ`, + // Allowed and HTML escaped. + ` dir="ltr"`, + `ZgotmplZ`, + `ZgotmplZ`, + `ZgotmplZ`, + }, + }, { ``, []string{ @@ -71,6 +88,7 @@ func TestTypedContent(t *testing.T) { `a[href =~ "//example.com"]#foo`, // Tags stripped, spaces escaped, entity not re-escaped. `Hello, World &tc!`, + ` dir="ltr"`, `c && alert("Hello, World!");`, `Hello, World & O'Reilly\x21`, `greeting=H%69&addressee=(World)`, @@ -83,6 +101,7 @@ func TestTypedContent(t *testing.T) { `a[href =~ "//example.com"]#foo`, // Tags stripped, entity not re-escaped. `Hello, World &tc!`, + ` dir="ltr"`, `c && alert("Hello, World!");`, `Hello, World & O'Reilly\x21`, `greeting=H%69&addressee=(World)`, @@ -95,6 +114,7 @@ func TestTypedContent(t *testing.T) { `a[href =~ "//example.com"]#foo`, // Angle brackets escaped to prevent injection of close tags, entity not re-escaped. `Hello, <b>World</b> &tc!`, + ` dir="ltr"`, `c && alert("Hello, World!");`, `Hello, World & O'Reilly\x21`, `greeting=H%69&addressee=(World)`, @@ -106,6 +126,7 @@ func TestTypedContent(t *testing.T) { `"\u003cb\u003e \"foo%\" O'Reilly &bar;"`, `"a[href =~ \"//example.com\"]#foo"`, `"Hello, \u003cb\u003eWorld\u003c/b\u003e &tc!"`, + `" dir=\"ltr\""`, // Not escaped. `c && alert("Hello, World!");`, // Escape sequence not over-escaped. @@ -119,6 +140,7 @@ func TestTypedContent(t *testing.T) { `"\u003cb\u003e \"foo%\" O'Reilly &bar;"`, `"a[href =~ \"//example.com\"]#foo"`, `"Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;tc!"`, + `" dir=\"ltr\""`, // Not JS escaped but HTML escaped. `c && alert("Hello, World!");`, // Escape sequence not over-escaped. @@ -132,6 +154,7 @@ func TestTypedContent(t *testing.T) { `\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`, `a[href =~ \x22\/\/example.com\x22]#foo`, `Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`, + ` dir=\x22ltr\x22`, `c \x26\x26 alert(\x22Hello, World!\x22);`, // Escape sequence not over-escaped. `Hello, World \x26 O\x27Reilly\x21`, @@ -144,6 +167,7 @@ func TestTypedContent(t *testing.T) { `\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`, `a[href =~ \x22\/\/example.com\x22]#foo`, `Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`, + ` dir=\x22ltr\x22`, `c \x26\x26 alert(\x22Hello, World!\x22);`, // Escape sequence not over-escaped. `Hello, World \x26 O\x27Reilly\x21`, @@ -156,6 +180,7 @@ func TestTypedContent(t *testing.T) { `%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`, `a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`, `Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`, + `%20dir%3d%22ltr%22`, `c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`, `Hello%2c%20World%20%26%20O%27Reilly%5cx21`, // Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is done. @@ -168,6 +193,7 @@ func TestTypedContent(t *testing.T) { `%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`, `a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`, `Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`, + `%20dir%3d%22ltr%22`, `c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`, `Hello%2c%20World%20%26%20O%27Reilly%5cx21`, // Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is not done. diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go index e8812cf865..f7802d04b3 100644 --- a/src/pkg/exp/template/html/context.go +++ b/src/pkg/exp/template/html/context.go @@ -20,6 +20,7 @@ type context struct { delim delim urlPart urlPart jsCtx jsCtx + attr attr element element err *Error } @@ -30,6 +31,7 @@ func (c context) eq(d context) bool { c.delim == d.delim && c.urlPart == d.urlPart && c.jsCtx == d.jsCtx && + c.attr == d.attr && c.element == d.element && c.err == d.err } @@ -51,6 +53,9 @@ func (c context) mangle(templateName string) string { if c.jsCtx != 0 { s += "_" + c.jsCtx.String() } + if c.attr != 0 { + s += "_" + c.attr.String() + } if c.element != 0 { s += "_" + c.element.String() } @@ -75,6 +80,15 @@ const ( stateText state = iota // stateTag occurs before an HTML attribute or the end of a tag. stateTag + // stateAttrName occurs inside an attribute name. + // It occurs between the ^'s in ` ^name^ = value`. + stateAttrName + // stateAfterName occurs after an attr name has ended but before any + // equals sign. It occurs between the ^'s in ` name^ ^= value`. + stateAfterName + // stateBeforeValue occurs after the equals sign but before the value. + // It occurs between the ^'s in ` name =^ ^value`. + stateBeforeValue // stateComment occurs inside an . stateComment // stateRCDATA occurs inside an RCDATA element (`, ``, }, + { + "optional attrs", + `"}}"{{end}}` + + // Double quotes inside if/else. + ` src=` + + `{{if .T}}"?{{""}}"` + + `{{else}}"images/cleardot.gif"{{end}}` + + // Missing space before title, but it is not a + // part of the src attribute. + `{{if .T}}title="{{""}}"{{end}}` + + // Quotes outside if/else. + ` alt="` + + `{{if .T}}{{"<alt>"}}` + + `{{else}}{{if .F}}{{"<title>"}}{{end}}` + + `{{end}}"` + + `>`, + `<img class="iconClass" id="<iconId>" src="?%3ciconPath%3e"title="<title>" alt="<alt>">`, + }, + { + "conditional valueless attr name", + `<input{{if .T}} checked{{end}} name=n>`, + `<input checked name=n>`, + }, + { + "conditional dynamic valueless attr name 1", + `<input{{if .T}} {{"checked"}}{{end}} name=n>`, + `<input checked name=n>`, + }, + { + "conditional dynamic valueless attr name 2", + `<input {{if .T}}{{"checked"}} {{end}}name=n>`, + `<input checked name=n>`, + }, + { + "dynamic attribute name", + `<img on{{"load"}}="alert({{"loaded"}})">`, + // Treated as JS since quotes are inserted. + `<img onload="alert("loaded")">`, + }, + { + "dynamic element name", + `<h{{3}}><table><t{{"head"}}>...</h{{3}}>`, + `<h3><table><thead>...</h3>`, + }, } for _, test := range tests { @@ -780,9 +825,25 @@ func TestEscapeText(t *testing.T) { `<a>`, context{state: stateText}, }, + { + `<a href`, + context{state: stateAttrName, attr: attrURL}, + }, + { + `<a on`, + context{state: stateAttrName, attr: attrScript}, + }, + { + `<a href `, + context{state: stateAfterName, attr: attrURL}, + }, + { + `<a style = `, + context{state: stateBeforeValue, attr: attrStyle}, + }, { `<a href=`, - context{state: stateURL, delim: delimSpaceOrTagEnd}, + context{state: stateBeforeValue, attr: attrURL}, }, { `<a href=x`, diff --git a/src/pkg/exp/template/html/html.go b/src/pkg/exp/template/html/html.go index 8805e7ad3d..52472d193e 100644 --- a/src/pkg/exp/template/html/html.go +++ b/src/pkg/exp/template/html/html.go @@ -205,3 +205,22 @@ func stripTags(html string) string { } return b.String() } + +// htmlNameFilter accepts valid parts of an HTML attribute or tag name or +// a known-safe HTML attribute. +func htmlNameFilter(args ...interface{}) string { + s, t := stringify(args...) + if t == contentTypeHTMLAttr { + return s + } + for _, r := range s { + switch { + case '0' <= r && r <= '9': + case 'A' <= r && r <= 'Z': + case 'a' <= r && r <= 'z': + default: + return filterFailsafe + } + } + return s +} diff --git a/src/pkg/exp/template/html/transition.go b/src/pkg/exp/template/html/transition.go index 450dda43c4..6b10561caa 100644 --- a/src/pkg/exp/template/html/transition.go +++ b/src/pkg/exp/template/html/transition.go @@ -18,6 +18,9 @@ import ( var transitionFunc = [...]func(context, []byte) (context, []byte){ stateText: tText, stateTag: tTag, + stateAttrName: tAttrName, + stateAfterName: tAfterName, + stateBeforeValue: tBeforeValue, stateComment: tComment, stateRCDATA: tSpecialTagEnd, stateAttr: tAttr, @@ -79,54 +82,90 @@ var elementContentType = [...]state{ // tTag is the context transition function for the tag state. func tTag(c context, s []byte) (context, []byte) { // Find the attribute name. - attrStart := eatWhiteSpace(s, 0) - i, err := eatAttrName(s, attrStart) - if err != nil { - return context{ - state: stateError, - err: err, - }, nil - } + i := eatWhiteSpace(s, 0) if i == len(s) { return c, nil } - state := stateAttr - canonAttrName := strings.ToLower(string(s[attrStart:i])) - if urlAttr[canonAttrName] { - state = stateURL - } else if strings.HasPrefix(canonAttrName, "on") { - state = stateJS - } else if canonAttrName == "style" { - state = stateCSS - } - - // Look for the start of the value. - i = eatWhiteSpace(s, i) - if i == len(s) { - return c, s[i:] - } if s[i] == '>' { - state = elementContentType[c.element] - return context{state: state, element: c.element}, s[i+1:] - } else if s[i] != '=' { - // Possible due to a valueless attribute or '/' in "<input />". - return c, s[i:] + return context{ + state: elementContentType[c.element], + element: c.element, + }, s[i+1:] } - // Consume the "=". - i = eatWhiteSpace(s, i+1) - - // Find the attribute delimiter. - delim := delimSpaceOrTagEnd - if i < len(s) { - switch s[i] { - case '\'': - delim, i = delimSingleQuote, i+1 - case '"': - delim, i = delimDoubleQuote, i+1 + j, err := eatAttrName(s, i) + if err != nil { + return context{state: stateError, err: err}, nil + } + state, attr := stateTag, attrNone + if i != j { + canonAttrName := strings.ToLower(string(s[i:j])) + if urlAttr[canonAttrName] { + attr = attrURL + } else if strings.HasPrefix(canonAttrName, "on") { + attr = attrScript + } else if canonAttrName == "style" { + attr = attrStyle + } + if j == len(s) { + state = stateAttrName + } else { + state = stateAfterName } } + return context{state: state, element: c.element, attr: attr}, s[j:] +} - return context{state: state, delim: delim, element: c.element}, s[i:] +// tAttrName is the context transition function for stateAttrName. +func tAttrName(c context, s []byte) (context, []byte) { + i, err := eatAttrName(s, 0) + if err != nil { + return context{state: stateError, err: err}, nil + } else if i == len(s) { + return c, nil + } + c.state = stateAfterName + return c, s[i:] +} + +// tAfterName is the context transition function for stateAfterName. +func tAfterName(c context, s []byte) (context, []byte) { + // Look for the start of the value. + i := eatWhiteSpace(s, 0) + if i == len(s) { + return c, nil + } else if s[i] != '=' { + // Occurs due to tag ending '>', and valueless attribute. + c.state = stateTag + return c, s[i:] + } + c.state = stateBeforeValue + // Consume the "=". + return c, s[i+1:] +} + +var attrStartStates = [...]state{ + attrNone: stateAttr, + attrScript: stateJS, + attrStyle: stateCSS, + attrURL: stateURL, +} + +// tBeforeValue is the context transition function for stateBeforeValue. +func tBeforeValue(c context, s []byte) (context, []byte) { + i := eatWhiteSpace(s, 0) + if i == len(s) { + return c, nil + } + // Find the attribute delimiter. + delim := delimSpaceOrTagEnd + switch s[i] { + case '\'': + delim, i = delimSingleQuote, i+1 + case '"': + delim, i = delimDoubleQuote, i+1 + } + c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone + return c, s[i:] } // tComment is the context transition function for stateComment.