From 22d5f9aae364f8d9d77fa67abf791b7d8046af56 Mon Sep 17 00:00:00 2001 From: Mike Samuel Date: Tue, 30 Aug 2011 11:42:30 +1000 Subject: [PATCH] exp/template/html: Added handling for URL attributes. 1. adds a urlPart field to context 2. implements tURL to figure out the URL part 3. modifies joinContext to allow common context mismatches around branches to be ignored when not material as in 4. adds a pipeline function that filters dynamically inserted protocols to prevent code injection via URLs. R=nigeltao CC=golang-dev https://golang.org/cl/4957041 --- src/pkg/exp/template/html/context.go | 36 +++++++++- src/pkg/exp/template/html/escape.go | 78 ++++++++++++++++++++-- src/pkg/exp/template/html/escape_test.go | 84 ++++++++++++++++++++++-- 3 files changed, 183 insertions(+), 15 deletions(-) diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go index 5ef3b78146a..d8fed158677 100644 --- a/src/pkg/exp/template/html/context.go +++ b/src/pkg/exp/template/html/context.go @@ -18,13 +18,14 @@ import ( type context struct { state state delim delim + urlPart urlPart errLine int errStr string } // eq returns whether two contexts are equal. func (c context) eq(d context) bool { - return c.state == d.state && c.delim == d.delim && c.errLine == d.errLine && c.errStr == d.errStr + return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.errLine == d.errLine && c.errStr == d.errStr } // state describes a high-level HTML parser state. @@ -97,3 +98,36 @@ func (d delim) String() string { } return fmt.Sprintf("illegal delim %d", d) } + +// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different +// encoding strategies. +type urlPart uint8 + +const ( + // urlPartNone occurs when not in a URL, or possibly at the start: + // ^ in "^http://auth/path?k=v#frag". + urlPartNone urlPart = iota + // urlPartPreQuery occurs in the scheme, authority, or path; between the + // ^s in "h^ttp://auth/path^?k=v#frag". + urlPartPreQuery + // urlPartQueryOrFrag occurs in the query portion between the ^s in + // "http://auth/path?^k=v#frag^". + urlPartQueryOrFrag + // urlPartUnknown occurs due to joining of contexts both before and after + // the query separator. + urlPartUnknown +) + +var urlPartNames = [...]string{ + urlPartNone: "urlPartNone", + urlPartPreQuery: "urlPartPreQuery", + urlPartQueryOrFrag: "urlPartQueryOrFrag", + urlPartUnknown: "urlPartUnknown", +} + +func (u urlPart) String() string { + if int(u) < len(urlPartNames) { + return urlPartNames[u] + } + return fmt.Sprintf("illegal urlPart %d", u) +} diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go index 52d6323fae6..e7de81c4c68 100644 --- a/src/pkg/exp/template/html/escape.go +++ b/src/pkg/exp/template/html/escape.go @@ -10,6 +10,7 @@ package html import ( "bytes" "fmt" + "html" "os" "strings" "template" @@ -26,9 +27,15 @@ func Escape(t *template.Template) (*template.Template, os.Error) { if c.state != stateText { return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c) } + t.Funcs(funcMap) return t, nil } +// funcMap maps command names to functions that render their inputs safe. +var funcMap = template.FuncMap{ + "exp_template_html_urlfilter": urlFilter, +} + // escape escapes a template node. func escape(c context, n parse.Node) context { switch n := n.(type) { @@ -53,7 +60,22 @@ func escape(c context, n parse.Node) context { func escapeAction(c context, n *parse.ActionNode) context { sanitizer := "html" if c.state == stateURL { - sanitizer = "urlquery" + switch c.urlPart { + case urlPartNone: + sanitizer = "exp_template_html_urlfilter" + case urlPartQueryOrFrag: + sanitizer = "urlquery" + case urlPartPreQuery: + // The default "html" works here. + case urlPartUnknown: + return context{ + state: stateError, + errLine: n.Line, + errStr: fmt.Sprintf("%s appears in an ambiguous URL context", n), + } + default: + panic(c.urlPart.String()) + } } // If the pipe already ends with the sanitizer, do not interfere. if m := len(n.Pipe.Cmds); m != 0 { @@ -84,6 +106,15 @@ func join(a, b context, line int, nodeName string) context { if a.eq(b) { return a } + + c := a + c.urlPart = b.urlPart + if c.eq(b) { + // The contexts differ only by urlPart. + c.urlPart = urlPartUnknown + return c + } + return context{ state: stateError, errLine: line, @@ -148,8 +179,15 @@ func escapeText(c context, s []byte) context { i := bytes.IndexAny(s, delimEnds[c.delim]) if i == -1 { // Remain inside the attribute. - // TODO: Recurse to take into account grammars for - // JS, CSS, URIs embedded in attrs once implemented. + // Decode the value so non-HTML rules can easily handle + //