From 9a8c69539cbd052e2c4b55496d72ec8407c0af52 Mon Sep 17 00:00:00 2001
From: Joe Tsai <joetsai@digital-static.net>
Date: Thu, 20 Oct 2016 03:16:22 -0700
Subject: [PATCH] bytes, strings: optimize for ASCII sets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In a large codebase within Google, there are thousands of uses of:
	ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
	 6% are 1   character  (e.g., "\n" or " ")
	58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
	24% are 5-9 characters (e.g., "()[]*^$")
	10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
	71% are 1   character  (e.g., "\n" or " ")
	14% are 2   characters (e.g., "\r\n")
	10% are 3-4 characters (e.g., " \t\r\n")
	 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
---
 src/bytes/bytes.go          | 65 ++++++++++++++++++++++++++++++++++---
 src/bytes/bytes_test.go     | 41 ++++++++++++++++++++++-
 src/strings/strings.go      | 62 +++++++++++++++++++++++++++++++++--
 src/strings/strings_test.go | 44 +++++++++++++++++++++++--
 4 files changed, 201 insertions(+), 11 deletions(-)

diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index 40c7c23cd7..406a38257a 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -160,10 +160,19 @@ func IndexRune(s []byte, r rune) int {
 // point in common.
 func IndexAny(s []byte, chars string) int {
 	if len(chars) > 0 {
-		var r rune
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i, c := range s {
+					if as.contains(c) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		var width int
 		for i := 0; i < len(s); i += width {
-			r = rune(s[i])
+			r := rune(s[i])
 			if r < utf8.RuneSelf {
 				width = 1
 			} else {
@@ -185,11 +194,21 @@ func IndexAny(s []byte, chars string) int {
 // there is no code point in common.
 func LastIndexAny(s []byte, chars string) int {
 	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := len(s) - 1; i >= 0; i-- {
+					if as.contains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		for i := len(s); i > 0; {
-			r, size := utf8.DecodeLastRune(s[0:i])
+			r, size := utf8.DecodeLastRune(s[:i])
 			i -= size
-			for _, ch := range chars {
-				if r == ch {
+			for _, c := range chars {
+				if r == c {
 					return i
 				}
 			}
@@ -573,7 +592,43 @@ func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
 	return -1
 }
 
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+	for i := 0; i < len(chars); i++ {
+		c := chars[i]
+		if c >= utf8.RuneSelf {
+			return as, false
+		}
+		as[c>>5] |= 1 << uint(c&31)
+	}
+	return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+	return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
 func makeCutsetFunc(cutset string) func(r rune) bool {
+	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+		return func(r rune) bool {
+			return r == rune(cutset[0])
+		}
+	}
+	if as, isASCII := makeASCIISet(cutset); isASCII {
+		return func(r rune) bool {
+			return r < utf8.RuneSelf && as.contains(byte(r))
+		}
+	}
 	return func(r rune) bool {
 		for _, c := range cutset {
 			if c == r {
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index 146dc42b0d..26eac5e08c 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -167,8 +167,12 @@ var indexAnyTests = []BinOpTest{
 	{"abc", "xyz", -1},
 	{"abc", "xcz", 2},
 	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"aRegExp*", ".(|)*+?^$[]", 7},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 4},
+	{"012\x80bcb\x80210", "\xffb", 3},
 }
 
 var lastIndexAnyTests = []BinOpTest{
@@ -180,9 +184,13 @@ var lastIndexAnyTests = []BinOpTest{
 	{"aaa", "a", 2},
 	{"abc", "xyz", -1},
 	{"abc", "ab", 1},
-	{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"a.RegExp*", ".(|)*+?^$[]", 8},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 6},
+	{"012\x80bcb\x80210", "\xffb", 7},
 }
 
 // Execute f on each test case.  funcName should be the name of f; it's used
@@ -1029,6 +1037,9 @@ var trimTests = []TrimTest{
 	{"Trim", "* listitem", " *", "listitem"},
 	{"Trim", `"quote"`, `"`, "quote"},
 	{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+	{"Trim", "\x80test\xff", "\xff", "test"},
+	{"Trim", " Ġ ", " ", "Ġ"},
+	{"Trim", " Ġİ0", "0 ", "Ġİ"},
 	//empty string tests
 	{"Trim", "abba", "", "abba"},
 	{"Trim", "", "123", ""},
@@ -1448,3 +1459,31 @@ func BenchmarkBytesCompare(b *testing.B) {
 		})
 	}
 }
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+	x := Repeat([]byte{'#'}, 4096) // Never matches set
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					IndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				x := Repeat([]byte(cs[:j]), k) // Always matches set
+				for i := 0; i < b.N; i++ {
+					Trim(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
diff --git a/src/strings/strings.go b/src/strings/strings.go
index 349989278d..60a281a6ac 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -169,6 +169,16 @@ func IndexRune(s string, r rune) int {
 // from chars in s, or -1 if no Unicode code point from chars is present in s.
 func IndexAny(s, chars string) int {
 	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := 0; i < len(s); i++ {
+					if as.contains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		for i, c := range s {
 			for _, m := range chars {
 				if c == m {
@@ -185,11 +195,21 @@ func IndexAny(s, chars string) int {
 // present in s.
 func LastIndexAny(s, chars string) int {
 	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := len(s) - 1; i >= 0; i-- {
+					if as.contains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
 		for i := len(s); i > 0; {
-			rune, size := utf8.DecodeLastRuneInString(s[0:i])
+			r, size := utf8.DecodeLastRuneInString(s[:i])
 			i -= size
-			for _, m := range chars {
-				if rune == m {
+			for _, c := range chars {
+				if r == c {
 					return i
 				}
 			}
@@ -570,7 +590,43 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
 	return -1
 }
 
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+	for i := 0; i < len(chars); i++ {
+		c := chars[i]
+		if c >= utf8.RuneSelf {
+			return as, false
+		}
+		as[c>>5] |= 1 << uint(c&31)
+	}
+	return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+	return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
 func makeCutsetFunc(cutset string) func(rune) bool {
+	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+		return func(r rune) bool {
+			return r == rune(cutset[0])
+		}
+	}
+	if as, isASCII := makeASCIISet(cutset); isASCII {
+		return func(r rune) bool {
+			return r < utf8.RuneSelf && as.contains(byte(r))
+		}
+	}
 	return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
 }
 
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go
index 6815944899..68b5943c59 100644
--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@@ -152,10 +152,15 @@ var indexAnyTests = []IndexTest{
 	{"aaa", "a", 0},
 	{"abc", "xyz", -1},
 	{"abc", "xcz", 2},
-	{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"aRegExp*", ".(|)*+?^$[]", 7},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 4},
+	{"012\x80bcb\x80210", "\xffb", 3},
 }
+
 var lastIndexAnyTests = []IndexTest{
 	{"", "", -1},
 	{"", "a", -1},
@@ -165,9 +170,13 @@ var lastIndexAnyTests = []IndexTest{
 	{"aaa", "a", 2},
 	{"abc", "xyz", -1},
 	{"abc", "ab", 1},
-	{"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+	{"ab☺c", "x☺yz", 2},
+	{"a☺b☻c☹d", "cx", len("a☺b☻")},
+	{"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
 	{"a.RegExp*", ".(|)*+?^$[]", 8},
 	{dots + dots + dots, " ", -1},
+	{"012abcba210", "\xffb", 6},
+	{"012\x80bcb\x80210", "\xffb", 7},
 }
 
 // Execute f on each test case.  funcName should be the name of f; it's used
@@ -668,6 +677,9 @@ var trimTests = []struct {
 	{"Trim", "* listitem", " *", "listitem"},
 	{"Trim", `"quote"`, `"`, "quote"},
 	{"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+	{"Trim", "\x80test\xff", "\xff", "test"},
+	{"Trim", " Ġ ", " ", "Ġ"},
+	{"Trim", " Ġİ0", "0 ", "Ġİ"},
 	//empty string tests
 	{"Trim", "abba", "", "abba"},
 	{"Trim", "", "123", ""},
@@ -1487,3 +1499,31 @@ func BenchmarkRepeat(b *testing.B) {
 		Repeat("-", 80)
 	}
 }
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+	x := Repeat("#", 4096) // Never matches set
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					IndexAny(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+	cs := "0123456789abcdef"
+	for k := 1; k <= 4096; k <<= 4 {
+		for j := 1; j <= 16; j <<= 1 {
+			b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+				x := Repeat(cs[:j], k) // Always matches set
+				for i := 0; i < b.N; i++ {
+					Trim(x[:k], cs[:j])
+				}
+			})
+		}
+	}
+}