From 89ebdbb5fd548051339705687c25d1d89abc4539 Mon Sep 17 00:00:00 2001 From: Kevin Burke Date: Sun, 23 Apr 2017 22:19:35 -0700 Subject: [PATCH] regexp: speed up QuoteMeta with a lookup table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the same technique used in CL 24466. By adding a little bit of size to the binary, we can remove a function call and gain a lot of performance. A raw array ([128]bool) would be faster, but is also be 128 bytes instead of 16. Running tip on a Mac: name old time/op new time/op delta QuoteMetaAll-4 192ns ±12% 120ns ±11% -37.27% (p=0.000 n=10+10) QuoteMetaNone-4 186ns ± 6% 64ns ± 6% -65.52% (p=0.000 n=10+10) name old speed new speed delta QuoteMetaAll-4 73.2MB/s ±11% 116.6MB/s ±10% +59.21% (p=0.000 n=10+10) QuoteMetaNone-4 139MB/s ± 6% 405MB/s ± 6% +190.74% (p=0.000 n=10+10) Change-Id: I68ce9fe2ef1c28e2274157789b35b0dd6ae3efb5 Reviewed-on: https://go-review.googlesource.com/41495 Run-TryBot: Kevin Burke TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/regexp/all_test.go | 10 +++++++++- src/regexp/regexp.go | 12 ++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/regexp/all_test.go b/src/regexp/all_test.go index beb46e7099..28fe20c15d 100644 --- a/src/regexp/all_test.go +++ b/src/regexp/all_test.go @@ -9,6 +9,7 @@ import ( "regexp/syntax" "strings" "testing" + "unicode/utf8" ) var goodRe = []string{ @@ -354,6 +355,7 @@ type MetaTest struct { var metaTests = []MetaTest{ {``, ``, ``, true}, {`foo`, `foo`, `foo`, true}, + {`日本語+`, `日本語\+`, `日本語`, false}, {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false}, @@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) { var sink string func BenchmarkQuoteMetaAll(b *testing.B) { - s := string(specialBytes) + specials := make([]byte, 0) + for i := byte(0); i < utf8.RuneSelf; i++ { + if special(i) { + specials = append(specials, i) + } + } + s := string(specials) b.SetBytes(int64(len(s))) b.ResetTimer() for i := 0; i < b.N; i++ { diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 924b011991..b1af23e850 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -609,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { }) } -var specialBytes = []byte(`\.+*?()|[]{}^$`) +// Bitmap used by func special to check whether a character needs to be escaped. +var specialBytes [16]byte +// special reports whether byte b needs to be escaped by QuoteMeta. func special(b byte) bool { - return bytes.IndexByte(specialBytes, b) >= 0 + return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0 +} + +func init() { + for _, b := range []byte(`\.+*?()|[]{}^$`) { + specialBytes[b%16] |= 1 << (b / 16) + } } // QuoteMeta returns a string that quotes all regular expression metacharacters