From 1e6d9f49da330e61d29588b1e2f0f3685c03f359 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 18 Oct 2012 13:40:45 -0700 Subject: [PATCH] encoding/xml: correctly escape newline, carriage return, and tab The generated encodings are those from http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html#charescaping The change to the decoder ensures that we turn in the input into \r, not \n. R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/6747043 --- src/pkg/encoding/xml/marshal_test.go | 21 +++++++++++++++++ src/pkg/encoding/xml/xml.go | 35 ++++++++++++++++------------ 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/pkg/encoding/xml/marshal_test.go b/src/pkg/encoding/xml/marshal_test.go index e729a247af..668fea6f85 100644 --- a/src/pkg/encoding/xml/marshal_test.go +++ b/src/pkg/encoding/xml/marshal_test.go @@ -687,6 +687,27 @@ var marshalTests = []struct { Value: &IgnoreTest{}, UnmarshalOnly: true, }, + + // Test escaping. + { + ExpectXML: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + Value: &AnyTest{ + Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`, + }, + }, + { + ExpectXML: `newline: ; cr: ; tab: ;`, + Value: &AnyTest{ + Nested: "newline: \n; cr: \r; tab: \t;", + }, + }, + { + ExpectXML: "1\r2\r\n3\n\r4\n5", + Value: &AnyTest{ + Nested: "1\n2\n3\n\n4\n5", + }, + UnmarshalOnly: true, + }, } func TestMarshal(t *testing.T) { diff --git a/src/pkg/encoding/xml/xml.go b/src/pkg/encoding/xml/xml.go index fbd2208e33..ab853c61a4 100644 --- a/src/pkg/encoding/xml/xml.go +++ b/src/pkg/encoding/xml/xml.go @@ -964,7 +964,16 @@ Input: b0, b1 = 0, 0 continue Input } - d.buf.WriteByte(b) + + // We must rewrite unescaped \r and \r\n into \n. + if b == '\r' { + d.buf.WriteByte('\n') + } else if b1 == '\r' && b == '\n' { + // Skip \r\n--we already wrote \n. + } else { + d.buf.WriteByte(b) + } + b0, b1 = b1, b } data := d.buf.Bytes() @@ -985,20 +994,7 @@ Input: } } - // Must rewrite \r and \r\n into \n. - w := 0 - for r := 0; r < len(data); r++ { - b := data[r] - if b == '\r' { - if r+1 < len(data) && data[r+1] == '\n' { - continue - } - b = '\n' - } - data[w] = b - w++ - } - return data[0:w] + return data } // Decide whether the given rune is in the XML Character Range, per @@ -1689,6 +1685,9 @@ var ( esc_amp = []byte("&") esc_lt = []byte("<") esc_gt = []byte(">") + esc_tab = []byte(" ") + esc_nl = []byte(" ") + esc_cr = []byte(" ") ) // Escape writes to w the properly escaped XML equivalent @@ -1708,6 +1707,12 @@ func Escape(w io.Writer, s []byte) { esc = esc_lt case '>': esc = esc_gt + case '\t': + esc = esc_tab + case '\n': + esc = esc_nl + case '\r': + esc = esc_cr default: continue }