diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go index 5ef3b78146a..d8fed158677 100644 --- a/src/pkg/exp/template/html/context.go +++ b/src/pkg/exp/template/html/context.go @@ -18,13 +18,14 @@ import ( type context struct { state state delim delim + urlPart urlPart errLine int errStr string } // eq returns whether two contexts are equal. func (c context) eq(d context) bool { - return c.state == d.state && c.delim == d.delim && c.errLine == d.errLine && c.errStr == d.errStr + return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.errLine == d.errLine && c.errStr == d.errStr } // state describes a high-level HTML parser state. @@ -97,3 +98,36 @@ func (d delim) String() string { } return fmt.Sprintf("illegal delim %d", d) } + +// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different +// encoding strategies. +type urlPart uint8 + +const ( + // urlPartNone occurs when not in a URL, or possibly at the start: + // ^ in "^http://auth/path?k=v#frag". + urlPartNone urlPart = iota + // urlPartPreQuery occurs in the scheme, authority, or path; between the + // ^s in "h^ttp://auth/path^?k=v#frag". + urlPartPreQuery + // urlPartQueryOrFrag occurs in the query portion between the ^s in + // "http://auth/path?^k=v#frag^". + urlPartQueryOrFrag + // urlPartUnknown occurs due to joining of contexts both before and after + // the query separator. + urlPartUnknown +) + +var urlPartNames = [...]string{ + urlPartNone: "urlPartNone", + urlPartPreQuery: "urlPartPreQuery", + urlPartQueryOrFrag: "urlPartQueryOrFrag", + urlPartUnknown: "urlPartUnknown", +} + +func (u urlPart) String() string { + if int(u) < len(urlPartNames) { + return urlPartNames[u] + } + return fmt.Sprintf("illegal urlPart %d", u) +} diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go index 52d6323fae6..e7de81c4c68 100644 --- a/src/pkg/exp/template/html/escape.go +++ b/src/pkg/exp/template/html/escape.go @@ -10,6 +10,7 @@ package html import ( "bytes" "fmt" + "html" "os" "strings" "template" @@ -26,9 +27,15 @@ func Escape(t *template.Template) (*template.Template, os.Error) { if c.state != stateText { return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c) } + t.Funcs(funcMap) return t, nil } +// funcMap maps command names to functions that render their inputs safe. +var funcMap = template.FuncMap{ + "exp_template_html_urlfilter": urlFilter, +} + // escape escapes a template node. func escape(c context, n parse.Node) context { switch n := n.(type) { @@ -53,7 +60,22 @@ func escape(c context, n parse.Node) context { func escapeAction(c context, n *parse.ActionNode) context { sanitizer := "html" if c.state == stateURL { - sanitizer = "urlquery" + switch c.urlPart { + case urlPartNone: + sanitizer = "exp_template_html_urlfilter" + case urlPartQueryOrFrag: + sanitizer = "urlquery" + case urlPartPreQuery: + // The default "html" works here. + case urlPartUnknown: + return context{ + state: stateError, + errLine: n.Line, + errStr: fmt.Sprintf("%s appears in an ambiguous URL context", n), + } + default: + panic(c.urlPart.String()) + } } // If the pipe already ends with the sanitizer, do not interfere. if m := len(n.Pipe.Cmds); m != 0 { @@ -84,6 +106,15 @@ func join(a, b context, line int, nodeName string) context { if a.eq(b) { return a } + + c := a + c.urlPart = b.urlPart + if c.eq(b) { + // The contexts differ only by urlPart. + c.urlPart = urlPartUnknown + return c + } + return context{ state: stateError, errLine: line, @@ -148,8 +179,15 @@ func escapeText(c context, s []byte) context { i := bytes.IndexAny(s, delimEnds[c.delim]) if i == -1 { // Remain inside the attribute. - // TODO: Recurse to take into account grammars for - // JS, CSS, URIs embedded in attrs once implemented. + // Decode the value so non-HTML rules can easily handle + //