[release-branch.go1.15] bytes, internal/bytealg: fix incorrect IndexString usage

The IndexString implementation in the bytealg package requires that the string passed into it be in the range '2 <= len(s) <= MaxLen' where MaxLen may be any value (including 0). CL 156998 added calls to bytealg.IndexString where MaxLen was not first checked. This led to an illegal instruction on s390x with the vector facility disabled. This CL guards the calls to bytealg.IndexString with a MaxLen check. If the check fails then the code now falls back to the pre CL 156998 implementation (a loop over the runes in the string). Since the MaxLen check is now in place the generic implementation is no longer called so I have returned it to its original unimplemented state. In future we may want to drop MaxLen to prevent this kind of confusion. Fixes #41595. Change-Id: I81d88cf8c5ae143a8f5f460d18f8269cb6c0f28c Reviewed-on: https://go-review.googlesource.com/c/go/+/256921 Trust: Michael Munday <mike.munday@ibm.com> Run-TryBot: Michael Munday <mike.munday@ibm.com> Reviewed-by: Keith Randall <khr@golang.org> TryBot-Result: Go Bot <gobot@golang.org>
2024-11-14 19:50:21 -07:00 · 2020-09-23 03:58:52 -07:00 · 2020-09-23 03:58:52 -07:00 · af06e65910
commit af06e65910
parent 884179022e
2 changed files with 34 additions and 54 deletions
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@ -227,19 +227,26 @@ func IndexAny(s []byte, chars string) int {
 			continue
 		}
 		r, width = utf8.DecodeRune(s[i:])
-		if r == utf8.RuneError {
-			for _, r = range chars {
-				if r == utf8.RuneError {
+		if r != utf8.RuneError {
+			// r is 2 to 4 bytes
+			if len(chars) == width {
+				if chars == string(r) {
 					return i
 				}
+				continue
+			}
+			// Use bytealg.IndexString for performance if available.
+			if bytealg.MaxLen >= width {
+				if bytealg.IndexString(chars, string(r)) >= 0 {
+					return i
+				}
+				continue
 			}
-			continue
 		}
-		// r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
-		// package should not import the strings package, use bytealg.IndexString
-		// instead. And this does not seem to lose much performance.
-		if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
-			return i
+		for _, ch := range chars {
+			if r == ch {
+				return i
+			}
 		}
 	}
 	return -1
@ -304,19 +311,26 @@ func LastIndexAny(s []byte, chars string) int {
 		}
 		r, size := utf8.DecodeLastRune(s[:i])
 		i -= size
-		if r == utf8.RuneError {
-			for _, r = range chars {
-				if r == utf8.RuneError {
+		if r != utf8.RuneError {
+			// r is 2 to 4 bytes
+			if len(chars) == size {
+				if chars == string(r) {
 					return i
 				}
+				continue
+			}
+			// Use bytealg.IndexString for performance if available.
+			if bytealg.MaxLen >= size {
+				if bytealg.IndexString(chars, string(r)) >= 0 {
+					return i
+				}
+				continue
 			}
-			continue
 		}
-		// r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
-		// package should not import the strings package, use bytealg.IndexString
-		// instead. And this does not seem to lose much performance.
-		if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
-			return i
+		for _, ch := range chars {
+			if r == ch {
+				return i
+			}
 		}
 	}
 	return -1
--- a/src/internal/bytealg/index_generic.go
+++ b/src/internal/bytealg/index_generic.go
@ -16,42 +16,8 @@ func Index(a, b []byte) int {

 // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
 // Requires 2 <= len(b) <= MaxLen.
-func IndexString(s, substr string) int {
-	// This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny
-	// call bytealg.IndexString. Some platforms have an optimized assembly version of this function.
-	// This implementation is used for those that do not. Although the pure Go implementation here
-	// works for the case of len(b) > MaxLen, we do not require that its assembly implementation also
-	// supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the
-	// case of len(b) > MaxLen.
-	n := len(substr)
-	c0 := substr[0]
-	c1 := substr[1]
-	i := 0
-	t := len(s) - n + 1
-	fails := 0
-	for i < t {
-		if s[i] != c0 {
-			o := IndexByteString(s[i:t], c0)
-			if o < 0 {
-				return -1
-			}
-			i += o
-		}
-		if s[i+1] == c1 && s[i:i+n] == substr {
-			return i
-		}
-		i++
-		fails++
-		if fails >= 4+i>>4 && i < t {
-			// See comment in src/bytes/bytes.go.
-			j := IndexRabinKarp(s[i:], substr)
-			if j < 0 {
-				return -1
-			}
-			return i + j
-		}
-	}
-	return -1
+func IndexString(a, b string) int {
+	panic("unimplemented")
 }

 // Cutover reports the number of failures of IndexByte we should tolerate