1
0
mirror of https://github.com/golang/go synced 2024-11-18 10:14:45 -07:00

encoding/json: escape U+2028 and U+2029.

Fixes #5836.

R=golang-dev, bradfitz, r, rsc
CC=golang-dev
https://golang.org/cl/10883045
This commit is contained in:
David Symonds 2013-07-12 14:35:55 +10:00
parent fb63e4fefb
commit d754647963
4 changed files with 60 additions and 6 deletions

View File

@ -568,14 +568,14 @@ func TestUnmarshalPtrPtr(t *testing.T) {
}
func TestEscape(t *testing.T) {
const input = `"foobar"<html>`
const expected = `"\"foobar\"\u003chtml\u003e"`
const input = `"foobar"<html>` + " [\u2028 \u2029]"
const expected = `"\"foobar\"\u003chtml\u003e [\u2028 \u2029]"`
b, err := Marshal(input)
if err != nil {
t.Fatalf("Marshal error: %v", err)
}
if s := string(b); s != expected {
t.Errorf("Encoding of [%s] was [%s], want [%s]", input, s, expected)
t.Errorf("Encoding of [%s]:\n got [%s]\nwant [%s]", input, s, expected)
}
}

View File

@ -149,14 +149,14 @@ func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) {
return buf.Bytes(), nil
}
// HTMLEscape appends to dst the JSON-encoded src with <, >, and &
// characters inside string literals changed to \u003c, \u003e, \u0026
// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
// so that the JSON will be safe to embed inside HTML <script> tags.
// For historical reasons, web browsers don't honor standard HTML
// escaping within <script> tags, so an alternative JSON encoding must
// be used.
func HTMLEscape(dst *bytes.Buffer, src []byte) {
// < > & can only appear in string literals,
// The characters can only appear in string literals,
// so just scan the string one byte at a time.
start := 0
for i, c := range src {
@ -169,6 +169,15 @@ func HTMLEscape(dst *bytes.Buffer, src []byte) {
dst.WriteByte(hex[c&0xF])
start = i + 1
}
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
if start < i {
dst.Write(src[start:i])
}
dst.WriteString(`\u202`)
dst.WriteByte(hex[src[i+2]&0xF])
start = i + 3
}
}
if start < len(src) {
dst.Write(src[start:])
@ -548,6 +557,23 @@ func (e *encodeState) string(s string) (int, error) {
if c == utf8.RuneError && size == 1 {
e.error(&InvalidUTF8Error{s})
}
// U+2028 is LINE SEPARATOR.
// U+2029 is PARAGRAPH SEPARATOR.
// They are both technically valid characters in JSON strings,
// but don't work in JSONP, which has to be evaluated as JavaScript,
// and can lead to security holes there. It is valid JSON to
// escape them, so we do so unconditionally.
// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
if c == '\u2028' || c == '\u2029' {
if start < i {
e.WriteString(s[start:i])
}
e.WriteString(`\u202`)
e.WriteByte(hex[c&0xF])
i += size
start = i
continue
}
i += size
}
if start < len(s) {

View File

@ -27,6 +27,15 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error {
dst.WriteByte(hex[c&0xF])
start = i + 1
}
// Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
if start < i {
dst.Write(src[start:i])
}
dst.WriteString(`\u202`)
dst.WriteByte(hex[src[i+2]&0xF])
start = i + 3
}
v := scan.step(&scan, int(c))
if v >= scanSkipSpace {
if v == scanError {

View File

@ -63,6 +63,25 @@ func TestCompact(t *testing.T) {
}
}
func TestCompactSeparators(t *testing.T) {
// U+2028 and U+2029 should be escaped inside strings.
// They should not appear outside strings.
tests := []struct {
in, compact string
}{
{"{\"\u2028\": 1}", `{"\u2028":1}`},
{"{\"\u2029\" :2}", `{"\u2029":2}`},
}
for _, tt := range tests {
var buf bytes.Buffer
if err := Compact(&buf, []byte(tt.in)); err != nil {
t.Errorf("Compact(%q): %v", tt.in, err)
} else if s := buf.String(); s != tt.compact {
t.Errorf("Compact(%q) = %q, want %q", tt.in, s, tt.compact)
}
}
}
func TestIndent(t *testing.T) {
var buf bytes.Buffer
for _, tt := range examples {