mirror of
https://github.com/golang/go
synced 2024-11-21 20:04:44 -07:00
Optimization to regexp _CharClass: keep track of overall range of
charclass to avoid unnecessarily iterating over ranges. Also, use the fact that IntVector is an []int to avoid method calls. On my machine, this brings us from ~27500 ns/op to ~17500 ns/op in the benchmark I've added (it is also faster in the case where a range check doesn't help, added a benchmark for this too.) I'd also like to propose that "[]", and "[^]" be disallowed. They aren't useful as far as I can tell, they aren't widely supported, and they make reasoning about character classes a bit more complicated. R=r CC=golang-dev https://golang.org/cl/1495041
This commit is contained in:
parent
3d4c97b82c
commit
aae02a1855
@ -519,7 +519,7 @@ var numSubexpCases = []numSubexpCase{
|
||||
|
||||
func TestNumSubexp(t *testing.T) {
|
||||
for _, c := range numSubexpCases {
|
||||
re, _ := Compile(c.input)
|
||||
re := MustCompile(c.input)
|
||||
n := re.NumSubexp()
|
||||
if n != c.expected {
|
||||
t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected)
|
||||
@ -530,7 +530,7 @@ func TestNumSubexp(t *testing.T) {
|
||||
func BenchmarkLiteral(b *testing.B) {
|
||||
x := strings.Repeat("x", 50)
|
||||
b.StopTimer()
|
||||
re, _ := Compile(x)
|
||||
re := MustCompile(x)
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
@ -543,7 +543,35 @@ func BenchmarkLiteral(b *testing.B) {
|
||||
func BenchmarkNotLiteral(b *testing.B) {
|
||||
x := strings.Repeat("x", 49)
|
||||
b.StopTimer()
|
||||
re, _ := Compile("^" + x)
|
||||
re := MustCompile("^" + x)
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
println("no match!")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchClass(b *testing.B) {
|
||||
b.StopTimer()
|
||||
x := strings.Repeat("xxxx", 20) + "w"
|
||||
re := MustCompile("[abcdw]")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
println("no match!")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMatchClass_InRange(b *testing.B) {
|
||||
b.StopTimer()
|
||||
// 'b' is betwen 'a' and 'c', so the charclass
|
||||
// range checking is no help here.
|
||||
x := strings.Repeat("bbbb", 20) + "c"
|
||||
re := MustCompile("[ac]")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if !re.MatchString(x) {
|
||||
@ -556,7 +584,7 @@ func BenchmarkNotLiteral(b *testing.B) {
|
||||
func BenchmarkReplaceAll(b *testing.B) {
|
||||
x := "abcdefghijklmnopqrstuvwxyz"
|
||||
b.StopTimer()
|
||||
re, _ := Compile("[cjrw]")
|
||||
re := MustCompile("[cjrw]")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
re.ReplaceAllString(x, "")
|
||||
|
@ -152,10 +152,10 @@ func newChar(char int) *_Char {
|
||||
|
||||
type _CharClass struct {
|
||||
common
|
||||
char int
|
||||
negate bool // is character class negated? ([^a-z])
|
||||
// vector of int, stored pairwise: [a-z] is (a,z); x is (x,x):
|
||||
ranges *vector.IntVector
|
||||
ranges *vector.IntVector
|
||||
cmin, cmax int
|
||||
}
|
||||
|
||||
func (cclass *_CharClass) kind() int { return _CHARCLASS }
|
||||
@ -180,13 +180,21 @@ func (cclass *_CharClass) addRange(a, b int) {
|
||||
// range is a through b inclusive
|
||||
cclass.ranges.Push(a)
|
||||
cclass.ranges.Push(b)
|
||||
if a < cclass.cmin {
|
||||
cclass.cmin = a
|
||||
}
|
||||
if b > cclass.cmax {
|
||||
cclass.cmax = b
|
||||
}
|
||||
}
|
||||
|
||||
func (cclass *_CharClass) matches(c int) bool {
|
||||
for i := 0; i < cclass.ranges.Len(); i = i + 2 {
|
||||
min := cclass.ranges.At(i)
|
||||
max := cclass.ranges.At(i + 1)
|
||||
if min <= c && c <= max {
|
||||
if c < cclass.cmin || c > cclass.cmax {
|
||||
return cclass.negate
|
||||
}
|
||||
ranges := []int(*cclass.ranges)
|
||||
for i := 0; i < len(ranges); i = i + 2 {
|
||||
if ranges[i] <= c && c <= ranges[i+1] {
|
||||
return !cclass.negate
|
||||
}
|
||||
}
|
||||
@ -196,6 +204,8 @@ func (cclass *_CharClass) matches(c int) bool {
|
||||
func newCharClass() *_CharClass {
|
||||
c := new(_CharClass)
|
||||
c.ranges = new(vector.IntVector)
|
||||
c.cmin = 0x10FFFF + 1 // MaxRune + 1
|
||||
c.cmax = -1
|
||||
return c
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user