From 8b6527b70ea0408f0903a51d88c7245ec366f1f5 Mon Sep 17 00:00:00 2001 From: Didier Spezia Date: Sat, 27 Jun 2015 13:07:22 +0000 Subject: [PATCH] encoding/xml: improve marshaller sanity checks of directives When building a directive, the current sanity check prevents a '>' to be used, which makes a DOCTYPE directive with an internal subset be rejected. It is accepted by the parser though, so what can be parsed cannot be encoded. Improved the corresponding sanity check to mirror the behavior of the parser (in the way it handles angle brackets, quotes, and comments). Fixes #10158 Change-Id: Ieffea9f870f2694548e12897f8f47babc0ea4414 Reviewed-on: https://go-review.googlesource.com/11630 Reviewed-by: Russ Cox --- src/encoding/xml/marshal.go | 45 ++++++++++++++++++++++++++++++-- src/encoding/xml/marshal_test.go | 40 +++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go index 88e7d99cb5..5a49cc3528 100644 --- a/src/encoding/xml/marshal.go +++ b/src/encoding/xml/marshal.go @@ -173,6 +173,7 @@ func (enc *Encoder) EncodeElement(v interface{}, start StartElement) error { } var ( + begComment = []byte("") endProcInst = []byte("?>") endDirective = []byte(">") @@ -238,8 +239,8 @@ func (enc *Encoder) EncodeToken(t Token) error { } p.WriteString("?>") case Directive: - if bytes.Contains(t, endDirective) { - return fmt.Errorf("xml: EncodeToken of Directive containing > marker") + if !isValidDirective(t) { + return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers") } p.WriteString("' { + if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) { + incomment = false + } + } + // Just ignore anything in comment + case inquote != 0: + if c == inquote { + inquote = 0 + } + // Just ignore anything within quotes + case c == '\'' || c == '"': + inquote = c + case c == '<': + if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) { + incomment = true + } else { + depth++ + } + case c == '>': + if depth == 0 { + return false + } + depth-- + } + } + return depth == 0 && inquote == 0 && !incomment +} + // Flush flushes any buffered XML to the underlying writer. // See the EncodeToken documentation for details about when it is necessary. func (enc *Encoder) Flush() error { diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go index 4c478ddded..78fe841d76 100644 --- a/src/encoding/xml/marshal_test.go +++ b/src/encoding/xml/marshal_test.go @@ -1527,12 +1527,18 @@ var encodeTokenTests = []struct { Directive("foo"), }, want: ``, +}, { + desc: "more complex directive", + toks: []Token{ + Directive("DOCTYPE doc [ '> ]"), + }, + want: `'> ]>`, }, { desc: "directive instruction with bad name", toks: []Token{ Directive("foo>"), }, - err: "xml: EncodeToken of Directive containing > marker", + err: "xml: EncodeToken of Directive containing wrong < or > markers", }, { desc: "end tag without start tag", toks: []Token{ @@ -1868,3 +1874,35 @@ func TestRace9796(t *testing.T) { } wg.Wait() } + +func TestIsValidDirective(t *testing.T) { + testOK := []string{ + "<>", + "< < > >", + "' '>' >", + " ]>", + " '<' ' doc ANY> ]>", + ">>> a < comment --> [ ] >", + } + testKO := []string{ + "<", + ">", + "", + "< > > < < >", + " -->", + "", + "'", + "", + } + for _, s := range testOK { + if !isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be valid", s) + } + } + for _, s := range testKO { + if isValidDirective(Directive(s)) { + t.Errorf("Directive %q is expected to be invalid", s) + } + } +}