1
0
mirror of https://github.com/golang/go synced 2024-11-20 05:44:44 -07:00

encoding/xml: correctly escape newline, carriage return, and tab

The generated encodings are those from
http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html#charescaping

The change to the decoder ensures that we turn 
 in the
input into \r, not \n.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/6747043
This commit is contained in:
Ian Lance Taylor 2012-10-18 13:40:45 -07:00
parent 2abaaefa72
commit 1e6d9f49da
2 changed files with 41 additions and 15 deletions

View File

@ -687,6 +687,27 @@ var marshalTests = []struct {
Value: &IgnoreTest{}, Value: &IgnoreTest{},
UnmarshalOnly: true, UnmarshalOnly: true,
}, },
// Test escaping.
{
ExpectXML: `<a><nested><value>dquote: &#34;; squote: &#39;; ampersand: &amp;; less: &lt;; greater: &gt;;</value></nested></a>`,
Value: &AnyTest{
Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`,
},
},
{
ExpectXML: `<a><nested><value>newline: &#xA;; cr: &#xD;; tab: &#x9;;</value></nested></a>`,
Value: &AnyTest{
Nested: "newline: \n; cr: \r; tab: \t;",
},
},
{
ExpectXML: "<a><nested><value>1\r2\r\n3\n\r4\n5</value></nested></a>",
Value: &AnyTest{
Nested: "1\n2\n3\n\n4\n5",
},
UnmarshalOnly: true,
},
} }
func TestMarshal(t *testing.T) { func TestMarshal(t *testing.T) {

View File

@ -964,7 +964,16 @@ Input:
b0, b1 = 0, 0 b0, b1 = 0, 0
continue Input continue Input
} }
d.buf.WriteByte(b)
// We must rewrite unescaped \r and \r\n into \n.
if b == '\r' {
d.buf.WriteByte('\n')
} else if b1 == '\r' && b == '\n' {
// Skip \r\n--we already wrote \n.
} else {
d.buf.WriteByte(b)
}
b0, b1 = b1, b b0, b1 = b1, b
} }
data := d.buf.Bytes() data := d.buf.Bytes()
@ -985,20 +994,7 @@ Input:
} }
} }
// Must rewrite \r and \r\n into \n. return data
w := 0
for r := 0; r < len(data); r++ {
b := data[r]
if b == '\r' {
if r+1 < len(data) && data[r+1] == '\n' {
continue
}
b = '\n'
}
data[w] = b
w++
}
return data[0:w]
} }
// Decide whether the given rune is in the XML Character Range, per // Decide whether the given rune is in the XML Character Range, per
@ -1689,6 +1685,9 @@ var (
esc_amp = []byte("&amp;") esc_amp = []byte("&amp;")
esc_lt = []byte("&lt;") esc_lt = []byte("&lt;")
esc_gt = []byte("&gt;") esc_gt = []byte("&gt;")
esc_tab = []byte("&#x9;")
esc_nl = []byte("&#xA;")
esc_cr = []byte("&#xD;")
) )
// Escape writes to w the properly escaped XML equivalent // Escape writes to w the properly escaped XML equivalent
@ -1708,6 +1707,12 @@ func Escape(w io.Writer, s []byte) {
esc = esc_lt esc = esc_lt
case '>': case '>':
esc = esc_gt esc = esc_gt
case '\t':
esc = esc_tab
case '\n':
esc = esc_nl
case '\r':
esc = esc_cr
default: default:
continue continue
} }