diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index bdd94e34ce6..cbd71acfc6c 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -272,9 +272,6 @@ type decodeState struct { savedError error useNumber bool disallowUnknownFields bool - // safeUnquote is the number of current string literal bytes that don't - // need to be unquoted. When negative, no bytes need unquoting. - safeUnquote int } // readIndex returns the position of the last byte read. @@ -376,27 +373,13 @@ func (d *decodeState) rescanLiteral() { Switch: switch data[i-1] { case '"': // string - // safeUnquote is initialized at -1, which means that all bytes - // checked so far can be unquoted at a later time with no work - // at all. When reaching the closing '"', if safeUnquote is - // still -1, all bytes can be unquoted with no work. Otherwise, - // only those bytes up until the first '\\' or non-ascii rune - // can be safely unquoted. - safeUnquote := -1 for ; i < len(data); i++ { - if c := data[i]; c == '\\' { - if safeUnquote < 0 { // first unsafe byte - safeUnquote = int(i - d.off) - } + switch data[i] { + case '\\': i++ // escaped char - } else if c == '"' { - d.safeUnquote = safeUnquote + case '"': i++ // tokenize the closing quote too break Switch - } else if c >= utf8.RuneSelf { - if safeUnquote < 0 { // first unsafe byte - safeUnquote = int(i - d.off) - } } } case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number @@ -749,7 +732,7 @@ func (d *decodeState) object(v reflect.Value) error { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := d.unquoteBytes(item) + key, ok := unquoteBytes(item) if !ok { panic(phasePanicMsg) } @@ -950,7 +933,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) return nil } - s, ok := d.unquoteBytes(item) + s, ok := unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1001,7 +984,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool } case '"': // string - s, ok := d.unquoteBytes(item) + s, ok := unquoteBytes(item) if !ok { if fromQuoted { return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) @@ -1159,7 +1142,7 @@ func (d *decodeState) objectInterface() map[string]interface{} { start := d.readIndex() d.rescanLiteral() item := d.data[start:d.readIndex()] - key, ok := d.unquote(item) + key, ok := unquote(item) if !ok { panic(phasePanicMsg) } @@ -1208,7 +1191,7 @@ func (d *decodeState) literalInterface() interface{} { return c == 't' case '"': // string - s, ok := d.unquote(item) + s, ok := unquote(item) if !ok { panic(phasePanicMsg) } @@ -1251,21 +1234,38 @@ func getu4(s []byte) rune { // unquote converts a quoted JSON string literal s into an actual string t. // The rules are different than for Go, so cannot use strconv.Unquote. -func (d *decodeState) unquote(s []byte) (t string, ok bool) { - s, ok = d.unquoteBytes(s) +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) t = string(s) return } -func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) { - r := d.safeUnquote - // The bytes have been scanned, so we know that the first and last bytes - // are double quotes. +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } s = s[1 : len(s)-1] - // If there are no unusual characters, no unquoting is needed, so return - // a slice of the original bytes. - if r == -1 { + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { return s, true } diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go index 719a9fa290c..3f25893b419 100644 --- a/src/encoding/json/decode_test.go +++ b/src/encoding/json/decode_test.go @@ -1250,6 +1250,8 @@ var wrongStringTests = []wrongStringTest{ {`{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`}, {`{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`}, {`{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`}, + {`{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`}, + {`{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`}, } // If people misuse the ,string modifier, the error message should be