diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index e872cc20506..e7931387aa6 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -117,17 +117,17 @@ func LastIndex(s, sep []byte) int { return -1 } // Rabin-Karp search from the end of the string - hashss, pow := hashStrRev(sep) + hashss, pow := bytealg.HashStrRevBytes(sep) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { - h = h*primeRK + uint32(s[i]) + h = h*bytealg.PrimeRK + uint32(s[i]) } if h == hashss && Equal(s[last:], sep) { return last } for i := last - 1; i >= 0; i-- { - h *= primeRK + h *= bytealg.PrimeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && Equal(s[i:i+n], sep) { @@ -1068,7 +1068,7 @@ func Index(s, sep []byte) int { // we should cutover at even larger average skips, // because Equal becomes that much more expensive. // This code does not take that effect into account. - j := indexRabinKarp(s[i:], sep) + j := bytealg.IndexRabinKarpBytes(s[i:], sep) if j < 0 { return -1 } @@ -1077,63 +1077,3 @@ func Index(s, sep []byte) int { } return -1 } - -func indexRabinKarp(s, sep []byte) int { - // Rabin-Karp search - hashsep, pow := hashStr(sep) - n := len(sep) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashsep && Equal(s[:n], sep) { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashsep && Equal(s[i-n:i], sep) { - return i - n - } - } - return -1 -} - -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep []byte) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// hashStrRev returns the hash of the reverse of sep and the -// appropriate multiplicative factor for use in Rabin-Karp algorithm. -func hashStrRev(sep []byte) (uint32, uint32) { - hash := uint32(0) - for i := len(sep) - 1; i >= 0; i-- { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 2dbbb99f37a..a208d4ed763 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -141,9 +141,10 @@ var indexTests = []BinOpTest{ {"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33}, {"foofyfoobarfoobar", "y", 4}, {"oooooooooooooooooooooo", "r", -1}, - // test fallback to Rabin-Karp. {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22}, {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1}, + // test fallback to Rabin-Karp. + {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5}, } var lastIndexTests = []BinOpTest{ @@ -209,6 +210,27 @@ func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, tes t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i) } } + var allocTests = []struct { + a []byte + b []byte + i int + }{ + // case for function Index. + {[]byte("000000000000000000000000000000000000000000000000000000000000000000000001"), []byte("0000000000000000000000000000000000000000000000000000000000000000001"), 5}, + // case for function LastIndex. + {[]byte("000000000000000000000000000000000000000000000000000000000000000010000"), []byte("00000000000000000000000000000000000000000000000000000000000001"), 3}, + } + allocs := testing.AllocsPerRun(100, func() { + if i := Index(allocTests[1].a, allocTests[1].b); i != allocTests[1].i { + t.Errorf("Index([]byte(%q), []byte(%q)) = %v; want %v", allocTests[1].a, allocTests[1].b, i, allocTests[1].i) + } + if i := LastIndex(allocTests[0].a, allocTests[0].b); i != allocTests[0].i { + t.Errorf("LastIndex([]byte(%q), []byte(%q)) = %v; want %v", allocTests[0].a, allocTests[0].b, i, allocTests[0].i) + } + }) + if allocs != 0 { + t.Errorf("expected no allocations, got %f", allocs) + } } func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) { diff --git a/src/internal/bytealg/bytealg.go b/src/internal/bytealg/bytealg.go index 9ecd8eb004b..4c90cd3671e 100644 --- a/src/internal/bytealg/bytealg.go +++ b/src/internal/bytealg/bytealg.go @@ -21,3 +21,128 @@ const ( // MaxLen is the maximum length of the string to be searched for (argument b) in Index. var MaxLen int + +// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev, +// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate +// three of them without causing allocation? + +// PrimeRK is the prime base used in Rabin-Karp algorithm. +const PrimeRK = 16777619 + +// HashStrBytes returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func HashStrBytes(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// HashStr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func HashStr(sep string) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// HashStrRevBytes returns the hash of the reverse of sep and the +// appropriate multiplicative factor for use in Rabin-Karp algorithm. +func HashStrRevBytes(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := len(sep) - 1; i >= 0; i-- { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// HashStrRev returns the hash of the reverse of sep and the +// appropriate multiplicative factor for use in Rabin-Karp algorithm. +func HashStrRev(sep string) (uint32, uint32) { + hash := uint32(0) + for i := len(sep) - 1; i >= 0; i-- { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the +// first occurence of substr in s, or -1 if not present. +func IndexRabinKarpBytes(s, sep []byte) int { + // Rabin-Karp search + hashsep, pow := HashStrBytes(sep) + n := len(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*PrimeRK + uint32(s[i]) + } + if h == hashsep && Equal(s[:n], sep) { + return 0 + } + for i := n; i < len(s); { + h *= PrimeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && Equal(s[i-n:i], sep) { + return i - n + } + } + return -1 +} + +// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the +// first occurence of substr in s, or -1 if not present. +func IndexRabinKarp(s, substr string) int { + // Rabin-Karp search + hashss, pow := HashStr(substr) + n := len(substr) + var h uint32 + for i := 0; i < n; i++ { + h = h*PrimeRK + uint32(s[i]) + } + if h == hashss && s[:n] == substr { + return 0 + } + for i := n; i < len(s); { + h *= PrimeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashss && s[i-n:i] == substr { + return i - n + } + } + return -1 +} diff --git a/src/strings/strings.go b/src/strings/strings.go index 238d657f61b..7fb05b7d0eb 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -36,43 +36,6 @@ func explode(s string, n int) []string { return a } -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep string) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// hashStrRev returns the hash of the reverse of sep and the -// appropriate multiplicative factor for use in Rabin-Karp algorithm. -func hashStrRev(sep string) (uint32, uint32) { - hash := uint32(0) - for i := len(sep) - 1; i >= 0; i-- { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - // Count counts the number of non-overlapping instances of substr in s. // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. func Count(s, substr string) int { @@ -126,17 +89,17 @@ func LastIndex(s, substr string) int { return -1 } // Rabin-Karp search from the end of the string - hashss, pow := hashStrRev(substr) + hashss, pow := bytealg.HashStrRev(substr) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { - h = h*primeRK + uint32(s[i]) + h = h*bytealg.PrimeRK + uint32(s[i]) } if h == hashss && s[last:] == substr { return last } for i := last - 1; i >= 0; i-- { - h *= primeRK + h *= bytealg.PrimeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && s[i:i+n] == substr { @@ -1095,7 +1058,7 @@ func Index(s, substr string) int { fails++ if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes.go. - j := indexRabinKarp(s[i:], substr) + j := bytealg.IndexRabinKarp(s[i:], substr) if j < 0 { return -1 } @@ -1104,26 +1067,3 @@ func Index(s, substr string) int { } return -1 } - -func indexRabinKarp(s, substr string) int { - // Rabin-Karp search - hashss, pow := hashStr(substr) - n := len(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 -}