mirror of
https://github.com/golang/go
synced 2024-11-17 02:04:48 -07:00
regexp: speed up QuoteMeta with a lookup table
This is the same technique used in CL 24466. By adding a little bit of size to the binary, we can remove a function call and gain a lot of performance. A raw array ([128]bool) would be faster, but is also be 128 bytes instead of 16. Running tip on a Mac: name old time/op new time/op delta QuoteMetaAll-4 192ns ±12% 120ns ±11% -37.27% (p=0.000 n=10+10) QuoteMetaNone-4 186ns ± 6% 64ns ± 6% -65.52% (p=0.000 n=10+10) name old speed new speed delta QuoteMetaAll-4 73.2MB/s ±11% 116.6MB/s ±10% +59.21% (p=0.000 n=10+10) QuoteMetaNone-4 139MB/s ± 6% 405MB/s ± 6% +190.74% (p=0.000 n=10+10) Change-Id: I68ce9fe2ef1c28e2274157789b35b0dd6ae3efb5 Reviewed-on: https://go-review.googlesource.com/41495 Run-TryBot: Kevin Burke <kev@inburke.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
642a1cc756
commit
89ebdbb5fd
@ -9,6 +9,7 @@ import (
|
||||
"regexp/syntax"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var goodRe = []string{
|
||||
@ -354,6 +355,7 @@ type MetaTest struct {
|
||||
var metaTests = []MetaTest{
|
||||
{``, ``, ``, true},
|
||||
{`foo`, `foo`, `foo`, true},
|
||||
{`日本語+`, `日本語\+`, `日本語`, false},
|
||||
{`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator
|
||||
{`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators
|
||||
{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
|
||||
@ -822,7 +824,13 @@ func BenchmarkMatchParallelCopied(b *testing.B) {
|
||||
var sink string
|
||||
|
||||
func BenchmarkQuoteMetaAll(b *testing.B) {
|
||||
s := string(specialBytes)
|
||||
specials := make([]byte, 0)
|
||||
for i := byte(0); i < utf8.RuneSelf; i++ {
|
||||
if special(i) {
|
||||
specials = append(specials, i)
|
||||
}
|
||||
}
|
||||
s := string(specials)
|
||||
b.SetBytes(int64(len(s)))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
@ -609,10 +609,18 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
|
||||
})
|
||||
}
|
||||
|
||||
var specialBytes = []byte(`\.+*?()|[]{}^$`)
|
||||
// Bitmap used by func special to check whether a character needs to be escaped.
|
||||
var specialBytes [16]byte
|
||||
|
||||
// special reports whether byte b needs to be escaped by QuoteMeta.
|
||||
func special(b byte) bool {
|
||||
return bytes.IndexByte(specialBytes, b) >= 0
|
||||
return b < utf8.RuneSelf && specialBytes[b%16]&(1<<(b/16)) != 0
|
||||
}
|
||||
|
||||
func init() {
|
||||
for _, b := range []byte(`\.+*?()|[]{}^$`) {
|
||||
specialBytes[b%16] |= 1 << (b / 16)
|
||||
}
|
||||
}
|
||||
|
||||
// QuoteMeta returns a string that quotes all regular expression metacharacters
|
||||
|
Loading…
Reference in New Issue
Block a user