mirror of
https://github.com/golang/go
synced 2024-11-18 01:14:48 -07:00
encoding/json: Use a lookup table for safe characters
The previous check for characters inside of a JSON string that needed to be escaped performed seven different boolean comparisons before determining that a ASCII character did not need to be escaped. Most characters do not need to be escaped, so this check can be done in a more performant way. Use the same strategy as the unicode package for precomputing a range of characters that need to be escaped, then do a single lookup into a character array to determine whether the character needs escaping. On an AWS c4.large node: $ benchstat benchmarks/master-bench benchmarks/json-table-bench name old time/op new time/op delta CodeEncoder-2 19.0ms ± 0% 15.5ms ± 1% -18.16% (p=0.000 n=19+20) CodeMarshal-2 20.1ms ± 1% 16.8ms ± 2% -16.35% (p=0.000 n=20+21) CodeDecoder-2 49.3ms ± 1% 49.5ms ± 2% ~ (p=0.498 n=16+20) DecoderStream-2 416ns ± 0% 416ns ± 1% ~ (p=0.978 n=19+19) CodeUnmarshal-2 51.0ms ± 1% 50.9ms ± 1% ~ (p=0.490 n=19+17) CodeUnmarshalReuse-2 48.5ms ± 2% 48.5ms ± 2% ~ (p=0.989 n=20+19) UnmarshalString-2 541ns ± 1% 532ns ± 1% -1.75% (p=0.000 n=20+21) UnmarshalFloat64-2 485ns ± 1% 481ns ± 1% -0.92% (p=0.000 n=20+21) UnmarshalInt64-2 429ns ± 1% 427ns ± 1% -0.49% (p=0.000 n=19+20) Issue10335-2 631ns ± 1% 619ns ± 1% -1.84% (p=0.000 n=20+20) NumberIsValid-2 19.1ns ± 0% 19.1ns ± 0% ~ (all samples are equal) NumberIsValidRegexp-2 689ns ± 1% 690ns ± 0% ~ (p=0.150 n=20+20) SkipValue-2 14.0ms ± 0% 14.0ms ± 0% -0.05% (p=0.000 n=18+18) EncoderEncode-2 525ns ± 2% 512ns ± 1% -2.33% (p=0.000 n=20+18) name old speed new speed delta CodeEncoder-2 102MB/s ± 0% 125MB/s ± 1% +22.20% (p=0.000 n=19+20) CodeMarshal-2 96.6MB/s ± 1% 115.6MB/s ± 2% +19.56% (p=0.000 n=20+21) CodeDecoder-2 39.3MB/s ± 1% 39.2MB/s ± 2% ~ (p=0.464 n=16+20) CodeUnmarshal-2 38.1MB/s ± 1% 38.1MB/s ± 1% ~ (p=0.525 n=19+17) SkipValue-2 143MB/s ± 0% 143MB/s ± 0% +0.05% (p=0.000 n=18+18) I also took the data set reported in #5683 (browser telemetry data from Mozilla), added named structs for the data set, and turned it into a proper benchmark: https://github.com/kevinburke/jsonbench/blob/master/go/bench_test.go The results from that test are similarly encouraging. On a 64-bit Mac: $ benchstat benchmarks/master-benchmark benchmarks/json-table-benchmark name old time/op new time/op delta CodeMarshal-4 1.19ms ± 2% 1.08ms ± 2% -9.33% (p=0.000 n=21+17) Unmarshal-4 3.09ms ± 3% 3.06ms ± 1% -0.83% (p=0.027 n=22+17) UnmarshalReuse-4 3.04ms ± 1% 3.04ms ± 1% ~ (p=0.169 n=20+15) name old speed new speed delta CodeMarshal-4 80.3MB/s ± 1% 88.5MB/s ± 1% +10.29% (p=0.000 n=21+17) Unmarshal-4 31.0MB/s ± 2% 31.2MB/s ± 1% +0.83% (p=0.025 n=22+17) On the c4.large: $ benchstat benchmarks/master-bench benchmarks/json-table-bench name old time/op new time/op delta CodeMarshal-2 1.10ms ± 1% 0.98ms ± 1% -10.12% (p=0.000 n=20+54) Unmarshal-2 2.82ms ± 1% 2.79ms ± 0% -1.09% (p=0.000 n=20+51) UnmarshalReuse-2 2.80ms ± 0% 2.77ms ± 0% -1.03% (p=0.000 n=20+52) name old speed new speed delta CodeMarshal-2 87.3MB/s ± 1% 97.1MB/s ± 1% +11.27% (p=0.000 n=20+54) Unmarshal-2 33.9MB/s ± 1% 34.2MB/s ± 0% +1.10% (p=0.000 n=20+51) For what it's worth, I tried other heuristics - short circuiting the conditional for common ASCII characters, for example: if (b >= 63 && b != 92) || (b >= 39 && b <= 59) || (rest of the conditional) This offered a speedup around 7-9%, not as large as the submitted change. Change-Id: Idcf88f7b93bfcd1164cdd6a585160b7e407a0d9b Reviewed-on: https://go-review.googlesource.com/24466 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com> Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
2321895fe2
commit
ed8f207940
@ -850,8 +850,7 @@ func (e *encodeState) string(s string, escapeHTML bool) int {
|
||||
start := 0
|
||||
for i := 0; i < len(s); {
|
||||
if b := s[i]; b < utf8.RuneSelf {
|
||||
if 0x20 <= b && b != '\\' && b != '"' &&
|
||||
(!escapeHTML || b != '<' && b != '>' && b != '&') {
|
||||
if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
@ -928,8 +927,7 @@ func (e *encodeState) stringBytes(s []byte, escapeHTML bool) int {
|
||||
start := 0
|
||||
for i := 0; i < len(s); {
|
||||
if b := s[i]; b < utf8.RuneSelf {
|
||||
if 0x20 <= b && b != '\\' && b != '"' &&
|
||||
(!escapeHTML || b != '<' && b != '>' && b != '&') {
|
||||
if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
218
src/encoding/json/tables.go
Normal file
218
src/encoding/json/tables.go
Normal file
@ -0,0 +1,218 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// safeSet holds the value true if the ASCII character with the given array
|
||||
// position can be represented inside a JSON string without any further
|
||||
// escaping.
|
||||
//
|
||||
// All values are true except for the ASCII control characters (0-31), the
|
||||
// double quote ("), and the backslash character ("\").
|
||||
var safeSet = [utf8.RuneSelf]bool{
|
||||
' ': true,
|
||||
'!': true,
|
||||
'"': false,
|
||||
'#': true,
|
||||
'$': true,
|
||||
'%': true,
|
||||
'&': true,
|
||||
'\'': true,
|
||||
'(': true,
|
||||
')': true,
|
||||
'*': true,
|
||||
'+': true,
|
||||
',': true,
|
||||
'-': true,
|
||||
'.': true,
|
||||
'/': true,
|
||||
'0': true,
|
||||
'1': true,
|
||||
'2': true,
|
||||
'3': true,
|
||||
'4': true,
|
||||
'5': true,
|
||||
'6': true,
|
||||
'7': true,
|
||||
'8': true,
|
||||
'9': true,
|
||||
':': true,
|
||||
';': true,
|
||||
'<': true,
|
||||
'=': true,
|
||||
'>': true,
|
||||
'?': true,
|
||||
'@': true,
|
||||
'A': true,
|
||||
'B': true,
|
||||
'C': true,
|
||||
'D': true,
|
||||
'E': true,
|
||||
'F': true,
|
||||
'G': true,
|
||||
'H': true,
|
||||
'I': true,
|
||||
'J': true,
|
||||
'K': true,
|
||||
'L': true,
|
||||
'M': true,
|
||||
'N': true,
|
||||
'O': true,
|
||||
'P': true,
|
||||
'Q': true,
|
||||
'R': true,
|
||||
'S': true,
|
||||
'T': true,
|
||||
'U': true,
|
||||
'V': true,
|
||||
'W': true,
|
||||
'X': true,
|
||||
'Y': true,
|
||||
'Z': true,
|
||||
'[': true,
|
||||
'\\': false,
|
||||
']': true,
|
||||
'^': true,
|
||||
'_': true,
|
||||
'`': true,
|
||||
'a': true,
|
||||
'b': true,
|
||||
'c': true,
|
||||
'd': true,
|
||||
'e': true,
|
||||
'f': true,
|
||||
'g': true,
|
||||
'h': true,
|
||||
'i': true,
|
||||
'j': true,
|
||||
'k': true,
|
||||
'l': true,
|
||||
'm': true,
|
||||
'n': true,
|
||||
'o': true,
|
||||
'p': true,
|
||||
'q': true,
|
||||
'r': true,
|
||||
's': true,
|
||||
't': true,
|
||||
'u': true,
|
||||
'v': true,
|
||||
'w': true,
|
||||
'x': true,
|
||||
'y': true,
|
||||
'z': true,
|
||||
'{': true,
|
||||
'|': true,
|
||||
'}': true,
|
||||
'~': true,
|
||||
'\u007f': true,
|
||||
}
|
||||
|
||||
// htmlSafeSet holds the value true if the ASCII character with the given
|
||||
// array position can be safely represented inside a JSON string, embedded
|
||||
// inside of HTML <script> tags, without any additional escaping.
|
||||
//
|
||||
// All values are true except for the ASCII control characters (0-31), the
|
||||
// double quote ("), the backslash character ("\"), HTML opening and closing
|
||||
// tags ("<" and ">"), and the ampersand ("&").
|
||||
var htmlSafeSet = [utf8.RuneSelf]bool{
|
||||
' ': true,
|
||||
'!': true,
|
||||
'"': false,
|
||||
'#': true,
|
||||
'$': true,
|
||||
'%': true,
|
||||
'&': false,
|
||||
'\'': true,
|
||||
'(': true,
|
||||
')': true,
|
||||
'*': true,
|
||||
'+': true,
|
||||
',': true,
|
||||
'-': true,
|
||||
'.': true,
|
||||
'/': true,
|
||||
'0': true,
|
||||
'1': true,
|
||||
'2': true,
|
||||
'3': true,
|
||||
'4': true,
|
||||
'5': true,
|
||||
'6': true,
|
||||
'7': true,
|
||||
'8': true,
|
||||
'9': true,
|
||||
':': true,
|
||||
';': true,
|
||||
'<': false,
|
||||
'=': true,
|
||||
'>': false,
|
||||
'?': true,
|
||||
'@': true,
|
||||
'A': true,
|
||||
'B': true,
|
||||
'C': true,
|
||||
'D': true,
|
||||
'E': true,
|
||||
'F': true,
|
||||
'G': true,
|
||||
'H': true,
|
||||
'I': true,
|
||||
'J': true,
|
||||
'K': true,
|
||||
'L': true,
|
||||
'M': true,
|
||||
'N': true,
|
||||
'O': true,
|
||||
'P': true,
|
||||
'Q': true,
|
||||
'R': true,
|
||||
'S': true,
|
||||
'T': true,
|
||||
'U': true,
|
||||
'V': true,
|
||||
'W': true,
|
||||
'X': true,
|
||||
'Y': true,
|
||||
'Z': true,
|
||||
'[': true,
|
||||
'\\': false,
|
||||
']': true,
|
||||
'^': true,
|
||||
'_': true,
|
||||
'`': true,
|
||||
'a': true,
|
||||
'b': true,
|
||||
'c': true,
|
||||
'd': true,
|
||||
'e': true,
|
||||
'f': true,
|
||||
'g': true,
|
||||
'h': true,
|
||||
'i': true,
|
||||
'j': true,
|
||||
'k': true,
|
||||
'l': true,
|
||||
'm': true,
|
||||
'n': true,
|
||||
'o': true,
|
||||
'p': true,
|
||||
'q': true,
|
||||
'r': true,
|
||||
's': true,
|
||||
't': true,
|
||||
'u': true,
|
||||
'v': true,
|
||||
'w': true,
|
||||
'x': true,
|
||||
'y': true,
|
||||
'z': true,
|
||||
'{': true,
|
||||
'|': true,
|
||||
'}': true,
|
||||
'~': true,
|
||||
'\u007f': true,
|
||||
}
|
Loading…
Reference in New Issue
Block a user