1
0
mirror of https://github.com/golang/go synced 2024-11-23 07:20:06 -07:00

regexp: use sync.Pool to cache regexp.machine objects

Performance optimization for the internals of the Regexp type. This adds
no features and has no user-visible impact beyond performance. Copy now
shares the cache, so memory usage for programs that use Copy a lot
should go down; Copy has effectively become a no-op.

The before v. after benchmark results show a lot of noise from run to
run, but there's a clear improvement to the Shared case and no detriment
to the Copied case.

BenchmarkMatchParallelShared-4                        361           77.9          -78.42%
BenchmarkMatchParallelCopied-4                        70.3          72.2          +2.70%

Macro benchmarks show that the lock contention in Regexp is gone, and my
server is now able to scale linearly 2.5x times more than before (and I
only stopped there because I ran out of CPU in my test machine).

Fixes #24411

Change-Id: Ib33abff2802f27599f5d09084775e95b54e3e1d7
Reviewed-on: https://go-review.googlesource.com/101715
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Javier Kohen 2018-03-20 14:35:37 -04:00 committed by Brad Fitzpatrick
parent daa2d54773
commit 7dbf9d43f5

View File

@ -79,15 +79,13 @@ import (
// A Regexp is safe for concurrent use by multiple goroutines,
// except for configuration methods, such as Longest.
type Regexp struct {
// read-only after Compile
regexpRO
// cache of machines for running regexp. This is a shared pointer across
// all copies of the original Regexp object to decrease the overall
// memory footprint of the regexps (since there will be one machine
// cached per thread instead of one per thread per copy).
machines *sync.Pool
// cache of machines for running regexp
mu sync.Mutex
machine []*machine
}
type regexpRO struct {
// everything below is read-only after Compile
expr string // as passed to Compile
prog *syntax.Prog // compiled program
onepass *onePassProg // onepass program or nil
@ -109,14 +107,10 @@ func (re *Regexp) String() string {
// Copy returns a new Regexp object copied from re.
//
// When using a Regexp in multiple goroutines, giving each goroutine
// its own copy helps to avoid lock contention.
// Deprecated: This exists for historical reasons.
func (re *Regexp) Copy() *Regexp {
// It is not safe to copy Regexp by value
// since it contains a sync.Mutex.
return &Regexp{
regexpRO: re.regexpRO,
}
re2 := *re
return &re2
}
// Compile parses a regular expression and returns, if successful,
@ -179,15 +173,21 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
if err != nil {
return nil, err
}
onepass := compileOnePass(prog)
regexp := &Regexp{
regexpRO: regexpRO{
expr: expr,
prog: prog,
onepass: compileOnePass(prog),
numSubexp: maxCap,
subexpNames: capNames,
cond: prog.StartCond(),
longest: longest,
expr: expr,
prog: prog,
onepass: onepass,
numSubexp: maxCap,
subexpNames: capNames,
cond: prog.StartCond(),
longest: longest,
}
regexp.machines = &sync.Pool{
New: func() interface{} {
z := progMachine(prog, onepass)
z.re = regexp
return z
},
}
if regexp.onepass == notOnePass {
@ -208,17 +208,7 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
// It uses the re's machine cache if possible, to avoid
// unnecessary allocation.
func (re *Regexp) get() *machine {
re.mu.Lock()
if n := len(re.machine); n > 0 {
z := re.machine[n-1]
re.machine = re.machine[:n-1]
re.mu.Unlock()
return z
}
re.mu.Unlock()
z := progMachine(re.prog, re.onepass)
z.re = re
return z
return re.machines.Get().(*machine)
}
// put returns a machine to the re's machine cache.
@ -231,9 +221,7 @@ func (re *Regexp) put(z *machine) {
z.inputString.str = ""
z.inputReader.r = nil
re.mu.Lock()
re.machine = append(re.machine, z)
re.mu.Unlock()
re.machines.Put(z)
}
// MustCompile is like Compile but panics if the expression cannot be parsed.