diff --git a/src/pkg/encoding/json/decode.go b/src/pkg/encoding/json/decode.go index ffe9d77b7a..f2ec9cb672 100644 --- a/src/pkg/encoding/json/decode.go +++ b/src/pkg/encoding/json/decode.go @@ -55,6 +55,11 @@ import ( // If no more serious errors are encountered, Unmarshal returns // an UnmarshalTypeError describing the earliest such error. // +// When unmarshaling quoted strings, invalid UTF-8 or +// invalid UTF-16 surrogate pairs are not treated as an error. +// Instead, they are replaced by the Unicode replacement +// character U+FFFD. +// func Unmarshal(data []byte, v interface{}) error { // Check for well-formedness. // Avoids filling out half a data structure diff --git a/src/pkg/encoding/json/decode_test.go b/src/pkg/encoding/json/decode_test.go index 524a9989fe..1ce26f8fb3 100644 --- a/src/pkg/encoding/json/decode_test.go +++ b/src/pkg/encoding/json/decode_test.go @@ -330,6 +330,43 @@ var unmarshalTests = []unmarshalTest{ ptr: new(S10), out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}}, }, + + // invalid UTF-8 is coerced to valid UTF-8. + { + in: "\"hello\xffworld\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + in: "\"hello\xc2\xc2world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\xc2\xffworld\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\\ud800world\"", + ptr: new(string), + out: "hello\ufffdworld", + }, + { + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\\ud800\\ud800world\"", + ptr: new(string), + out: "hello\ufffd\ufffdworld", + }, + { + in: "\"hello\xed\xa0\x80\xed\xb0\x80world\"", + ptr: new(string), + out: "hello\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdworld", + }, } func TestMarshal(t *testing.T) {