bytes, strings: optimize for ASCII sets

In a large codebase within Google, there are thousands of uses of: ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight An analysis of their usage shows that over 97% of them only use character sets consisting of only ASCII symbols. Uses of ContainsAny|IndexAny|LastIndexAny: 6% are 1 character (e.g., "\n" or " ") 58% are 2-4 characters (e.g., "<>" or "\r\n\t ") 24% are 5-9 characters (e.g., "()[]*^$") 10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ") We optimize for ASCII sets, which are commonly used to search for "control" characters in some string. We don't optimize for the single character scenario since IndexRune or IndexByte could be used. Uses of Trim|TrimLeft|TrimRight: 71% are 1 character (e.g., "\n" or " ") 14% are 2 characters (e.g., "\r\n") 10% are 3-4 characters (e.g., " \t\r\n") 5% are 10+ characters (e.g., "0123456789abcdefABCDEF") We optimize for the single character case with a simple closured function that only checks for that character's value. We optimize for the medium and larger sets using a 16-byte bit-map representing a set of ASCII characters. The benchmarks below have the following suffix name "%d:%d" where the first number is the length of the input and the second number is the length of the charset. == bytes package == benchmark old ns/op new ns/op delta BenchmarkIndexAnyASCII/1:1-4 5.09 5.23 +2.75% BenchmarkIndexAnyASCII/1:2-4 5.81 5.85 +0.69% BenchmarkIndexAnyASCII/1:4-4 7.22 7.50 +3.88% BenchmarkIndexAnyASCII/1:8-4 11.0 11.1 +0.91% BenchmarkIndexAnyASCII/1:16-4 17.5 17.8 +1.71% BenchmarkIndexAnyASCII/16:1-4 36.0 34.0 -5.56% BenchmarkIndexAnyASCII/16:2-4 46.6 36.5 -21.67% BenchmarkIndexAnyASCII/16:4-4 78.0 40.4 -48.21% BenchmarkIndexAnyASCII/16:8-4 136 47.4 -65.15% BenchmarkIndexAnyASCII/16:16-4 254 61.5 -75.79% BenchmarkIndexAnyASCII/256:1-4 542 388 -28.41% BenchmarkIndexAnyASCII/256:2-4 705 382 -45.82% BenchmarkIndexAnyASCII/256:4-4 1089 386 -64.55% BenchmarkIndexAnyASCII/256:8-4 1994 394 -80.24% BenchmarkIndexAnyASCII/256:16-4 3843 411 -89.31% BenchmarkIndexAnyASCII/4096:1-4 8522 5873 -31.08% BenchmarkIndexAnyASCII/4096:2-4 11253 5861 -47.92% BenchmarkIndexAnyASCII/4096:4-4 17824 5883 -66.99% BenchmarkIndexAnyASCII/4096:8-4 32053 5871 -81.68% BenchmarkIndexAnyASCII/4096:16-4 60512 5888 -90.27% BenchmarkTrimASCII/1:1-4 79.5 70.8 -10.94% BenchmarkTrimASCII/1:2-4 79.0 105 +32.91% BenchmarkTrimASCII/1:4-4 79.6 109 +36.93% BenchmarkTrimASCII/1:8-4 78.8 118 +49.75% BenchmarkTrimASCII/1:16-4 80.2 132 +64.59% BenchmarkTrimASCII/16:1-4 243 116 -52.26% BenchmarkTrimASCII/16:2-4 243 171 -29.63% BenchmarkTrimASCII/16:4-4 243 176 -27.57% BenchmarkTrimASCII/16:8-4 241 184 -23.65% BenchmarkTrimASCII/16:16-4 238 199 -16.39% BenchmarkTrimASCII/256:1-4 2580 840 -67.44% BenchmarkTrimASCII/256:2-4 2603 1175 -54.86% BenchmarkTrimASCII/256:4-4 2572 1188 -53.81% BenchmarkTrimASCII/256:8-4 2550 1191 -53.29% BenchmarkTrimASCII/256:16-4 2585 1208 -53.27% BenchmarkTrimASCII/4096:1-4 39773 12181 -69.37% BenchmarkTrimASCII/4096:2-4 39946 17231 -56.86% BenchmarkTrimASCII/4096:4-4 39641 17179 -56.66% BenchmarkTrimASCII/4096:8-4 39835 17175 -56.88% BenchmarkTrimASCII/4096:16-4 40229 17215 -57.21% == strings package == benchmark old ns/op new ns/op delta BenchmarkIndexAnyASCII/1:1-4 5.94 4.97 -16.33% BenchmarkIndexAnyASCII/1:2-4 5.94 5.55 -6.57% BenchmarkIndexAnyASCII/1:4-4 7.45 7.21 -3.22% BenchmarkIndexAnyASCII/1:8-4 10.8 10.6 -1.85% BenchmarkIndexAnyASCII/1:16-4 17.4 17.2 -1.15% BenchmarkIndexAnyASCII/16:1-4 36.4 32.2 -11.54% BenchmarkIndexAnyASCII/16:2-4 49.6 34.6 -30.24% BenchmarkIndexAnyASCII/16:4-4 77.5 37.9 -51.10% BenchmarkIndexAnyASCII/16:8-4 138 45.5 -67.03% BenchmarkIndexAnyASCII/16:16-4 241 59.1 -75.48% BenchmarkIndexAnyASCII/256:1-4 509 378 -25.74% BenchmarkIndexAnyASCII/256:2-4 720 381 -47.08% BenchmarkIndexAnyASCII/256:4-4 1142 384 -66.37% BenchmarkIndexAnyASCII/256:8-4 1999 391 -80.44% BenchmarkIndexAnyASCII/256:16-4 3735 403 -89.21% BenchmarkIndexAnyASCII/4096:1-4 7973 5824 -26.95% BenchmarkIndexAnyASCII/4096:2-4 11432 5809 -49.19% BenchmarkIndexAnyASCII/4096:4-4 18327 5819 -68.25% BenchmarkIndexAnyASCII/4096:8-4 33059 5828 -82.37% BenchmarkIndexAnyASCII/4096:16-4 59703 5817 -90.26% BenchmarkTrimASCII/1:1-4 71.9 71.8 -0.14% BenchmarkTrimASCII/1:2-4 73.3 103 +40.52% BenchmarkTrimASCII/1:4-4 71.8 106 +47.63% BenchmarkTrimASCII/1:8-4 71.2 113 +58.71% BenchmarkTrimASCII/1:16-4 71.6 128 +78.77% BenchmarkTrimASCII/16:1-4 152 116 -23.68% BenchmarkTrimASCII/16:2-4 160 168 +5.00% BenchmarkTrimASCII/16:4-4 172 170 -1.16% BenchmarkTrimASCII/16:8-4 200 177 -11.50% BenchmarkTrimASCII/16:16-4 254 193 -24.02% BenchmarkTrimASCII/256:1-4 1438 864 -39.92% BenchmarkTrimASCII/256:2-4 1551 1195 -22.95% BenchmarkTrimASCII/256:4-4 1770 1200 -32.20% BenchmarkTrimASCII/256:8-4 2195 1216 -44.60% BenchmarkTrimASCII/256:16-4 3054 1224 -59.92% BenchmarkTrimASCII/4096:1-4 21726 12557 -42.20% BenchmarkTrimASCII/4096:2-4 23586 17508 -25.77% BenchmarkTrimASCII/4096:4-4 26898 17510 -34.90% BenchmarkTrimASCII/4096:8-4 33714 17595 -47.81% BenchmarkTrimASCII/4096:16-4 47429 17700 -62.68% The benchmarks added test the worst case. For IndexAny, that is when the charset matches none of the input. For Trim, it is when the charset matches all of the input. Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea Reviewed-on: https://go-review.googlesource.com/31593 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Martin Möhrmann <martisch@uos.de>
2024-11-18 00:04:43 -07:00 · 2016-10-20 03:16:22 -07:00 · 2016-10-20 03:16:22 -07:00 · 9a8c69539c
commit 9a8c69539c
parent ef3a9f2dd4
4 changed files with 201 additions and 11 deletions
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@ -160,10 +160,19 @@ func IndexRune(s []byte, r rune) int {
 // point in common.
 func IndexAny(s []byte, chars string) int {
 	if len(chars) > 0 {
-		var r rune
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i, c := range s {
+					if as.contains(c) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		var width int
 		for i := 0; i < len(s); i += width {
-			r = rune(s[i])
+			r := rune(s[i])
 			if r < utf8.RuneSelf {
 				width = 1
 			} else {
@ -185,11 +194,21 @@ func IndexAny(s []byte, chars string) int {
 // there is no code point in common.
 func LastIndexAny(s []byte, chars string) int {
 	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := len(s) - 1; i >= 0; i-- {
+					if as.contains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		for i := len(s); i > 0; {
-			r, size := utf8.DecodeLastRune(s[0:i])
+			r, size := utf8.DecodeLastRune(s[:i])
 			i -= size
-			for _, ch := range chars {
-				if r == ch {
+			for _, c := range chars {
+				if r == c {
 					return i
 				}
 			}
@ -573,7 +592,43 @@ func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
 	return -1
 }

+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+	for i := 0; i < len(chars); i++ {
+		c := chars[i]
+		if c >= utf8.RuneSelf {
+			return as, false
+		}
+		as[c>>5] |= 1 << uint(c&31)
+	}
+	return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+	return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
 func makeCutsetFunc(cutset string) func(r rune) bool {
+	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+		return func(r rune) bool {
+			return r == rune(cutset[0])
+		}
+	}
+	if as, isASCII := makeASCIISet(cutset); isASCII {
+		return func(r rune) bool {
+			return r < utf8.RuneSelf && as.contains(byte(r))
+		}
+	}
 	return func(r rune) bool {
 		for _, c := range cutset {
 			if c == r {
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@ -167,8 +167,12 @@ var indexAnyTests = []BinOpTest{
 	{"abc", "xyz", -1},
 	{"abc", "xcz", 2},
 	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"aRegExp*", ".(|)*+?^$[]", 7},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 4},
+	{"012\x80bcb\x80210", "\xffb", 3},
 }

 var lastIndexAnyTests = []BinOpTest{
@ -180,9 +184,13 @@ var lastIndexAnyTests = []BinOpTest{
 	{"aaa", "a", 2},
 	{"abc", "xyz", -1},
 	{"abc", "ab", 1},
-	{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"a.RegExp*", ".(|)*+?^$[]", 8},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 6},
+	{"012\x80bcb\x80210", "\xffb", 7},
 }

 // Execute f on each test case.  funcName should be the name of f; it's used
@ -1029,6 +1037,9 @@ var trimTests = []TrimTest{
 	{"Trim", "* listitem", " *", "listitem"},
 	{"Trim", `"quote"`, `"`, "quote"},
 	{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+	{"Trim", "\x80test\xff", "\xff", "test"},
+	{"Trim", " Ġ ", " ", "Ġ"},
+	{"Trim", " Ġİ0", "0 ", "Ġİ"},
 	//empty string tests
 	{"Trim", "abba", "", "abba"},
 	{"Trim", "", "123", ""},
@ -1448,3 +1459,31 @@ func BenchmarkBytesCompare(b *testing.B) {
 		})
 	}
 }
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+	x := Repeat([]byte{'#'}, 4096) // Never matches set
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					IndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				x := Repeat([]byte(cs[:j]), k) // Always matches set
+				for i := 0; i < b.N; i++ {
+					Trim(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@ -169,6 +169,16 @@ func IndexRune(s string, r rune) int {
 // from chars in s, or -1 if no Unicode code point from chars is present in s.
 func IndexAny(s, chars string) int {
 	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := 0; i < len(s); i++ {
+					if as.contains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		for i, c := range s {
 			for _, m := range chars {
 				if c == m {
@ -185,11 +195,21 @@ func IndexAny(s, chars string) int {
 // present in s.
 func LastIndexAny(s, chars string) int {
 	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := len(s) - 1; i >= 0; i-- {
+					if as.contains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		for i := len(s); i > 0; {
-			rune, size := utf8.DecodeLastRuneInString(s[0:i])
+			r, size := utf8.DecodeLastRuneInString(s[:i])
 			i -= size
-			for _, m := range chars {
-				if rune == m {
+			for _, c := range chars {
+				if r == c {
 					return i
 				}
 			}
@ -570,7 +590,43 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
 	return -1
 }

+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+	for i := 0; i < len(chars); i++ {
+		c := chars[i]
+		if c >= utf8.RuneSelf {
+			return as, false
+		}
+		as[c>>5] |= 1 << uint(c&31)
+	}
+	return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+	return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
 func makeCutsetFunc(cutset string) func(rune) bool {
+	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+		return func(r rune) bool {
+			return r == rune(cutset[0])
+		}
+	}
+	if as, isASCII := makeASCIISet(cutset); isASCII {
+		return func(r rune) bool {
+			return r < utf8.RuneSelf && as.contains(byte(r))
+		}
+	}
 	return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
 }

--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@ -152,10 +152,15 @@ var indexAnyTests = []IndexTest{
 	{"aaa", "a", 0},
 	{"abc", "xyz", -1},
 	{"abc", "xcz", 2},
-	{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"aRegExp*", ".(|)*+?^$[]", 7},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 4},
+	{"012\x80bcb\x80210", "\xffb", 3},
 }
+
 var lastIndexAnyTests = []IndexTest{
 	{"", "", -1},
 	{"", "a", -1},
@ -165,9 +170,13 @@ var lastIndexAnyTests = []IndexTest{
 	{"aaa", "a", 2},
 	{"abc", "xyz", -1},
 	{"abc", "ab", 1},
-	{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"a.RegExp*", ".(|)*+?^$[]", 8},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 6},
+	{"012\x80bcb\x80210", "\xffb", 7},
 }

 // Execute f on each test case.  funcName should be the name of f; it's used
@ -668,6 +677,9 @@ var trimTests = []struct {
 	{"Trim", "* listitem", " *", "listitem"},
 	{"Trim", `"quote"`, `"`, "quote"},
 	{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+	{"Trim", "\x80test\xff", "\xff", "test"},
+	{"Trim", " Ġ ", " ", "Ġ"},
+	{"Trim", " Ġİ0", "0 ", "Ġİ"},
 	//empty string tests
 	{"Trim", "abba", "", "abba"},
 	{"Trim", "", "123", ""},
@ -1487,3 +1499,31 @@ func BenchmarkRepeat(b *testing.B) {
 		Repeat("-", 80)
 	}
 }
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+	x := Repeat("#", 4096) // Never matches set
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					IndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				x := Repeat(cs[:j], k) // Always matches set
+				for i := 0; i < b.N; i++ {
+					Trim(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}