mirror of
https://github.com/golang/go
synced 2024-11-17 02:14:42 -07:00
regexp: split bit-state execution out of machine struct
This allows the bit-state executions to have their own pool of allocated structures. A step toward eliminating the per-Regexp machine cache. Note especially the -92% on MatchParallelShared. This is real but not a complete story: the other execution engines still need to be de-shared, but the benchmark was only using bit-state. The tiny slowdowns in unrelated code are noise. name old time/op new time/op delta Find-12 264ns ± 3% 254ns ± 0% -3.86% (p=0.000 n=10+9) FindAllNoMatches-12 140ns ± 2% 135ns ± 0% -3.91% (p=0.000 n=10+9) FindString-12 256ns ± 0% 247ns ± 0% -3.52% (p=0.000 n=8+8) FindSubmatch-12 339ns ± 1% 334ns ± 0% -1.41% (p=0.000 n=9+10) FindStringSubmatch-12 322ns ± 0% 321ns ± 0% -0.21% (p=0.005 n=8+9) Literal-12 100ns ± 2% 92ns ± 0% -8.10% (p=0.000 n=10+9) NotLiteral-12 1.50µs ± 0% 1.47µs ± 0% -1.91% (p=0.000 n=8+9) MatchClass-12 2.18µs ± 0% 2.17µs ± 0% -0.20% (p=0.001 n=10+7) MatchClass_InRange-12 2.12µs ± 0% 2.13µs ± 0% +0.23% (p=0.000 n=10+10) ReplaceAll-12 1.41µs ± 0% 1.39µs ± 0% -1.30% (p=0.000 n=7+10) AnchoredLiteralShortNonMatch-12 89.8ns ± 0% 83.2ns ± 0% -7.35% (p=0.000 n=8+8) AnchoredLiteralLongNonMatch-12 105ns ± 3% 105ns ± 0% ~ (p=0.186 n=10+10) AnchoredShortMatch-12 141ns ± 0% 131ns ± 0% -7.09% (p=0.000 n=9+10) AnchoredLongMatch-12 276ns ± 4% 267ns ± 0% -3.23% (p=0.000 n=10+10) OnePassShortA-12 620ns ± 0% 611ns ± 0% -1.39% (p=0.000 n=10+9) NotOnePassShortA-12 575ns ± 3% 552ns ± 0% -3.97% (p=0.000 n=10+8) OnePassShortB-12 493ns ± 0% 491ns ± 0% -0.33% (p=0.000 n=8+8) NotOnePassShortB-12 423ns ± 0% 412ns ± 0% -2.60% (p=0.000 n=8+9) OnePassLongPrefix-12 112ns ± 0% 112ns ± 0% ~ (all equal) OnePassLongNotPrefix-12 405ns ± 0% 410ns ± 0% +1.23% (p=0.000 n=8+9) MatchParallelShared-12 501ns ± 1% 39ns ± 1% -92.27% (p=0.000 n=10+10) MatchParallelCopied-12 39.1ns ± 0% 39.2ns ± 3% ~ (p=0.785 n=6+10) QuoteMetaAll-12 94.6ns ± 0% 94.6ns ± 0% ~ (p=0.439 n=10+8) QuoteMetaNone-12 52.7ns ± 0% 52.7ns ± 0% ~ (all equal) Match/Easy0/32-12 79.1ns ± 0% 72.9ns ± 0% -7.85% (p=0.000 n=9+9) Match/Easy0/1K-12 307ns ± 1% 298ns ± 0% -2.99% (p=0.000 n=10+6) Match/Easy0/32K-12 4.65µs ± 2% 4.60µs ± 2% ~ (p=0.159 n=10+10) Match/Easy0/1M-12 234µs ± 0% 235µs ± 0% +0.17% (p=0.003 n=10+10) Match/Easy0/32M-12 7.98ms ± 1% 7.96ms ± 0% ~ (p=0.278 n=9+10) Match/Easy0i/32-12 1.13µs ± 1% 1.09µs ± 0% -3.24% (p=0.000 n=9+8) Match/Easy0i/1K-12 32.5µs ± 0% 31.7µs ± 0% -2.66% (p=0.000 n=9+9) Match/Easy0i/32K-12 1.59ms ± 0% 1.61ms ± 0% +0.75% (p=0.000 n=9+9) Match/Easy0i/1M-12 51.0ms ± 0% 51.4ms ± 0% +0.77% (p=0.000 n=10+8) Match/Easy0i/32M-12 1.63s ± 0% 1.65s ± 1% +1.24% (p=0.000 n=7+9) Match/Easy1/32-12 75.1ns ± 1% 67.9ns ± 0% -9.54% (p=0.000 n=8+8) Match/Easy1/1K-12 861ns ± 0% 884ns ± 0% +2.71% (p=0.000 n=8+9) Match/Easy1/32K-12 39.2µs ± 1% 39.2µs ± 0% ~ (p=0.090 n=10+9) Match/Easy1/1M-12 1.38ms ± 0% 1.39ms ± 0% ~ (p=0.095 n=10+9) Match/Easy1/32M-12 44.2ms ± 1% 44.2ms ± 1% ~ (p=0.218 n=10+10) Match/Medium/32-12 1.04µs ± 1% 1.05µs ± 0% +1.05% (p=0.000 n=9+8) Match/Medium/1K-12 31.3µs ± 0% 31.3µs ± 0% -0.14% (p=0.004 n=9+9) Match/Medium/32K-12 1.44ms ± 0% 1.45ms ± 0% +0.18% (p=0.001 n=8+8) Match/Medium/1M-12 46.1ms ± 0% 46.2ms ± 0% +0.13% (p=0.003 n=6+9) Match/Medium/32M-12 1.48s ± 0% 1.48s ± 0% +0.20% (p=0.002 n=9+8) Match/Hard/32-12 1.54µs ± 1% 1.49µs ± 0% -3.60% (p=0.000 n=9+10) Match/Hard/1K-12 46.4µs ± 1% 45.1µs ± 1% -2.78% (p=0.000 n=9+10) Match/Hard/32K-12 2.19ms ± 0% 2.18ms ± 1% -0.51% (p=0.006 n=8+9) Match/Hard/1M-12 70.1ms ± 0% 69.7ms ± 1% -0.52% (p=0.006 n=8+9) Match/Hard/32M-12 2.24s ± 0% 2.23s ± 1% -0.42% (p=0.046 n=8+9) Match/Hard1/32-12 8.17µs ± 1% 7.89µs ± 0% -3.42% (p=0.000 n=8+9) Match/Hard1/1K-12 254µs ± 2% 244µs ± 0% -3.91% (p=0.000 n=9+9) Match/Hard1/32K-12 9.58ms ± 1% 10.35ms ± 0% +8.00% (p=0.000 n=10+10) Match/Hard1/1M-12 306ms ± 1% 331ms ± 0% +8.27% (p=0.000 n=9+8) Match/Hard1/32M-12 9.79s ± 1% 10.60s ± 0% +8.29% (p=0.000 n=9+8) Match_onepass_regex/32-12 808ns ± 0% 812ns ± 0% +0.47% (p=0.000 n=8+10) Match_onepass_regex/1K-12 27.8µs ± 0% 28.5µs ± 0% +2.32% (p=0.000 n=8+10) Match_onepass_regex/32K-12 925µs ± 0% 936µs ± 0% +1.24% (p=0.000 n=9+10) Match_onepass_regex/1M-12 29.5ms ± 0% 30.2ms ± 0% +2.38% (p=0.000 n=10+10) Match_onepass_regex/32M-12 945ms ± 0% 970ms ± 0% +2.60% (p=0.000 n=9+10) CompileOnepass-12 4.67µs ± 0% 4.63µs ± 1% -0.84% (p=0.000 n=10+10) [Geo mean] 24.5µs 23.3µs -5.04% https://perf.golang.org/search?q=upload:20181004.1 Change-Id: Idbc2b76223718265657819ff38be2d9aba1c54b4 Reviewed-on: https://go-review.googlesource.com/c/139779 Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
8e0aea162b
commit
2d4346b319
@ -14,7 +14,10 @@
|
||||
|
||||
package regexp
|
||||
|
||||
import "regexp/syntax"
|
||||
import (
|
||||
"regexp/syntax"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// A job is an entry on the backtracker's job stack. It holds
|
||||
// the instruction pc and the position in the input.
|
||||
@ -32,15 +35,29 @@ const (
|
||||
|
||||
// bitState holds state for the backtracker.
|
||||
type bitState struct {
|
||||
prog *syntax.Prog
|
||||
end int
|
||||
cap []int
|
||||
matchcap []int
|
||||
jobs []job
|
||||
visited []uint32
|
||||
|
||||
end int
|
||||
cap []int
|
||||
jobs []job
|
||||
visited []uint32
|
||||
inputs inputs
|
||||
}
|
||||
|
||||
var notBacktrack *bitState = nil
|
||||
var bitStatePool sync.Pool
|
||||
|
||||
func newBitState() *bitState {
|
||||
b, ok := bitStatePool.Get().(*bitState)
|
||||
if !ok {
|
||||
b = new(bitState)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func freeBitState(b *bitState) {
|
||||
b.inputs.clear()
|
||||
bitStatePool.Put(b)
|
||||
}
|
||||
|
||||
// maxBitStateLen returns the maximum length of a string to search with
|
||||
// the backtracker using prog.
|
||||
@ -51,18 +68,6 @@ func maxBitStateLen(prog *syntax.Prog) int {
|
||||
return maxBacktrackVector / len(prog.Inst)
|
||||
}
|
||||
|
||||
// newBitState returns a new bitState for the given prog,
|
||||
// or notBacktrack if the size of the prog exceeds the maximum size that
|
||||
// the backtracker will be run for.
|
||||
func newBitState(prog *syntax.Prog) *bitState {
|
||||
if !shouldBacktrack(prog) {
|
||||
return notBacktrack
|
||||
}
|
||||
return &bitState{
|
||||
prog: prog,
|
||||
}
|
||||
}
|
||||
|
||||
// shouldBacktrack reports whether the program is too
|
||||
// long for the backtracker to run.
|
||||
func shouldBacktrack(prog *syntax.Prog) bool {
|
||||
@ -72,7 +77,7 @@ func shouldBacktrack(prog *syntax.Prog) bool {
|
||||
// reset resets the state of the backtracker.
|
||||
// end is the end position in the input.
|
||||
// ncap is the number of captures.
|
||||
func (b *bitState) reset(end int, ncap int) {
|
||||
func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) {
|
||||
b.end = end
|
||||
|
||||
if cap(b.jobs) == 0 {
|
||||
@ -81,7 +86,7 @@ func (b *bitState) reset(end int, ncap int) {
|
||||
b.jobs = b.jobs[:0]
|
||||
}
|
||||
|
||||
visitedSize := (len(b.prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
|
||||
visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits
|
||||
if cap(b.visited) < visitedSize {
|
||||
b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits)
|
||||
} else {
|
||||
@ -99,6 +104,15 @@ func (b *bitState) reset(end int, ncap int) {
|
||||
for i := range b.cap {
|
||||
b.cap[i] = -1
|
||||
}
|
||||
|
||||
if cap(b.matchcap) < ncap {
|
||||
b.matchcap = make([]int, ncap)
|
||||
} else {
|
||||
b.matchcap = b.matchcap[:ncap]
|
||||
}
|
||||
for i := range b.matchcap {
|
||||
b.matchcap[i] = -1
|
||||
}
|
||||
}
|
||||
|
||||
// shouldVisit reports whether the combination of (pc, pos) has not
|
||||
@ -114,20 +128,19 @@ func (b *bitState) shouldVisit(pc uint32, pos int) bool {
|
||||
|
||||
// push pushes (pc, pos, arg) onto the job stack if it should be
|
||||
// visited.
|
||||
func (b *bitState) push(pc uint32, pos int, arg bool) {
|
||||
func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) {
|
||||
// Only check shouldVisit when arg is false.
|
||||
// When arg is true, we are continuing a previous visit.
|
||||
if b.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
|
||||
if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) {
|
||||
b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos})
|
||||
}
|
||||
}
|
||||
|
||||
// tryBacktrack runs a backtracking search starting at pos.
|
||||
func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
longest := m.re.longest
|
||||
m.matched = false
|
||||
func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
longest := re.longest
|
||||
|
||||
b.push(pc, pos, false)
|
||||
b.push(re, pc, pos, false)
|
||||
for len(b.jobs) > 0 {
|
||||
l := len(b.jobs) - 1
|
||||
// Pop job off the stack.
|
||||
@ -150,7 +163,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
}
|
||||
Skip:
|
||||
|
||||
inst := b.prog.Inst[pc]
|
||||
inst := re.prog.Inst[pc]
|
||||
|
||||
switch inst.Op {
|
||||
default:
|
||||
@ -172,23 +185,23 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
pc = inst.Arg
|
||||
goto CheckAndLoop
|
||||
} else {
|
||||
b.push(pc, pos, true)
|
||||
b.push(re, pc, pos, true)
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
}
|
||||
|
||||
case syntax.InstAltMatch:
|
||||
// One opcode consumes runes; the other leads to match.
|
||||
switch b.prog.Inst[inst.Out].Op {
|
||||
switch re.prog.Inst[inst.Out].Op {
|
||||
case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
|
||||
// inst.Arg is the match.
|
||||
b.push(inst.Arg, pos, false)
|
||||
b.push(re, inst.Arg, pos, false)
|
||||
pc = inst.Arg
|
||||
pos = b.end
|
||||
goto CheckAndLoop
|
||||
}
|
||||
// inst.Out is the match - non-greedy
|
||||
b.push(inst.Out, b.end, false)
|
||||
b.push(re, inst.Out, b.end, false)
|
||||
pc = inst.Out
|
||||
goto CheckAndLoop
|
||||
|
||||
@ -236,7 +249,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
} else {
|
||||
if 0 <= inst.Arg && inst.Arg < uint32(len(b.cap)) {
|
||||
// Capture pos to register, but save old value.
|
||||
b.push(pc, b.cap[inst.Arg], true) // come back when we're done.
|
||||
b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done.
|
||||
b.cap[inst.Arg] = pos
|
||||
}
|
||||
pc = inst.Out
|
||||
@ -258,8 +271,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
// We found a match. If the caller doesn't care
|
||||
// where the match is, no point going further.
|
||||
if len(b.cap) == 0 {
|
||||
m.matched = true
|
||||
return m.matched
|
||||
return true
|
||||
}
|
||||
|
||||
// Record best match so far.
|
||||
@ -268,19 +280,18 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
if len(b.cap) > 1 {
|
||||
b.cap[1] = pos
|
||||
}
|
||||
if !m.matched || (longest && pos > 0 && pos > m.matchcap[1]) {
|
||||
copy(m.matchcap, b.cap)
|
||||
if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) {
|
||||
copy(b.matchcap, b.cap)
|
||||
}
|
||||
m.matched = true
|
||||
|
||||
// If going for first match, we're done.
|
||||
if !longest {
|
||||
return m.matched
|
||||
return true
|
||||
}
|
||||
|
||||
// If we used the entire text, no longer match is possible.
|
||||
if pos == b.end {
|
||||
return m.matched
|
||||
return true
|
||||
}
|
||||
|
||||
// Otherwise, continue on in hope of a longer match.
|
||||
@ -288,65 +299,68 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
|
||||
}
|
||||
}
|
||||
|
||||
return m.matched
|
||||
return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0
|
||||
}
|
||||
|
||||
// backtrack runs a backtracking search of prog on the input starting at pos.
|
||||
func (m *machine) backtrack(i input, pos int, end int, ncap int) bool {
|
||||
if !i.canCheckPrefix() {
|
||||
panic("backtrack called for a RuneReader")
|
||||
}
|
||||
|
||||
startCond := m.re.cond
|
||||
func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int {
|
||||
startCond := re.cond
|
||||
if startCond == ^syntax.EmptyOp(0) { // impossible
|
||||
return false
|
||||
return nil
|
||||
}
|
||||
if startCond&syntax.EmptyBeginText != 0 && pos != 0 {
|
||||
// Anchored match, past beginning of text.
|
||||
return false
|
||||
return nil
|
||||
}
|
||||
|
||||
b := m.b
|
||||
b.reset(end, ncap)
|
||||
|
||||
m.matchcap = m.matchcap[:ncap]
|
||||
for i := range m.matchcap {
|
||||
m.matchcap[i] = -1
|
||||
}
|
||||
b := newBitState()
|
||||
i, end := b.inputs.init(nil, ib, is)
|
||||
b.reset(re.prog, end, ncap)
|
||||
|
||||
// Anchored search must start at the beginning of the input
|
||||
if startCond&syntax.EmptyBeginText != 0 {
|
||||
if len(b.cap) > 0 {
|
||||
b.cap[0] = pos
|
||||
}
|
||||
return m.tryBacktrack(b, i, uint32(m.p.Start), pos)
|
||||
}
|
||||
if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
|
||||
freeBitState(b)
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
|
||||
// Unanchored search, starting from each possible text position.
|
||||
// Notice that we have to try the empty string at the end of
|
||||
// the text, so the loop condition is pos <= end, not pos < end.
|
||||
// This looks like it's quadratic in the size of the text,
|
||||
// but we are not clearing visited between calls to TrySearch,
|
||||
// so no work is duplicated and it ends up still being linear.
|
||||
width := -1
|
||||
for ; pos <= end && width != 0; pos += width {
|
||||
if len(m.re.prefix) > 0 {
|
||||
// Match requires literal prefix; fast search for it.
|
||||
advance := i.index(m.re, pos)
|
||||
if advance < 0 {
|
||||
return false
|
||||
// Unanchored search, starting from each possible text position.
|
||||
// Notice that we have to try the empty string at the end of
|
||||
// the text, so the loop condition is pos <= end, not pos < end.
|
||||
// This looks like it's quadratic in the size of the text,
|
||||
// but we are not clearing visited between calls to TrySearch,
|
||||
// so no work is duplicated and it ends up still being linear.
|
||||
width := -1
|
||||
for ; pos <= end && width != 0; pos += width {
|
||||
if len(re.prefix) > 0 {
|
||||
// Match requires literal prefix; fast search for it.
|
||||
advance := i.index(re, pos)
|
||||
if advance < 0 {
|
||||
freeBitState(b)
|
||||
return nil
|
||||
}
|
||||
pos += advance
|
||||
}
|
||||
pos += advance
|
||||
}
|
||||
|
||||
if len(b.cap) > 0 {
|
||||
b.cap[0] = pos
|
||||
if len(b.cap) > 0 {
|
||||
b.cap[0] = pos
|
||||
}
|
||||
if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) {
|
||||
// Match must be leftmost; done.
|
||||
goto Match
|
||||
}
|
||||
_, width = i.step(pos)
|
||||
}
|
||||
if m.tryBacktrack(b, i, uint32(m.p.Start), pos) {
|
||||
// Match must be leftmost; done.
|
||||
return true
|
||||
}
|
||||
_, width = i.step(pos)
|
||||
freeBitState(b)
|
||||
return nil
|
||||
}
|
||||
return false
|
||||
|
||||
Match:
|
||||
dstCap = append(dstCap, b.matchcap...)
|
||||
freeBitState(b)
|
||||
return dstCap
|
||||
}
|
||||
|
@ -35,37 +35,61 @@ type thread struct {
|
||||
|
||||
// A machine holds all the state during an NFA simulation for p.
|
||||
type machine struct {
|
||||
re *Regexp // corresponding Regexp
|
||||
p *syntax.Prog // compiled program
|
||||
op *onePassProg // compiled onepass program, or notOnePass
|
||||
maxBitStateLen int // max length of string to search with bitstate
|
||||
b *bitState // state for backtracker, allocated lazily
|
||||
q0, q1 queue // two queues for runq, nextq
|
||||
pool []*thread // pool of available threads
|
||||
matched bool // whether a match was found
|
||||
matchcap []int // capture information for the match
|
||||
re *Regexp // corresponding Regexp
|
||||
p *syntax.Prog // compiled program
|
||||
op *onePassProg // compiled onepass program, or notOnePass
|
||||
q0, q1 queue // two queues for runq, nextq
|
||||
pool []*thread // pool of available threads
|
||||
matched bool // whether a match was found
|
||||
matchcap []int // capture information for the match
|
||||
|
||||
inputs inputs
|
||||
}
|
||||
|
||||
type inputs struct {
|
||||
// cached inputs, to avoid allocation
|
||||
inputBytes inputBytes
|
||||
inputString inputString
|
||||
inputReader inputReader
|
||||
bytes inputBytes
|
||||
string inputString
|
||||
reader inputReader
|
||||
}
|
||||
|
||||
func (m *machine) newInputBytes(b []byte) input {
|
||||
m.inputBytes.str = b
|
||||
return &m.inputBytes
|
||||
func (i *inputs) newBytes(b []byte) input {
|
||||
i.bytes.str = b
|
||||
return &i.bytes
|
||||
}
|
||||
|
||||
func (m *machine) newInputString(s string) input {
|
||||
m.inputString.str = s
|
||||
return &m.inputString
|
||||
func (i *inputs) newString(s string) input {
|
||||
i.string.str = s
|
||||
return &i.string
|
||||
}
|
||||
|
||||
func (m *machine) newInputReader(r io.RuneReader) input {
|
||||
m.inputReader.r = r
|
||||
m.inputReader.atEOT = false
|
||||
m.inputReader.pos = 0
|
||||
return &m.inputReader
|
||||
func (i *inputs) newReader(r io.RuneReader) input {
|
||||
i.reader.r = r
|
||||
i.reader.atEOT = false
|
||||
i.reader.pos = 0
|
||||
return &i.reader
|
||||
}
|
||||
|
||||
func (i *inputs) clear() {
|
||||
// We need to clear 1 of these.
|
||||
// Avoid the expense of clearing the others (pointer write barrier).
|
||||
if i.bytes.str != nil {
|
||||
i.bytes.str = nil
|
||||
} else if i.reader.r != nil {
|
||||
i.reader.r = nil
|
||||
} else {
|
||||
i.string.str = ""
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inputs) init(r io.RuneReader, b []byte, s string) (input, int) {
|
||||
if r != nil {
|
||||
return i.newReader(r), 0
|
||||
}
|
||||
if b != nil {
|
||||
return i.newBytes(b), len(b)
|
||||
}
|
||||
return i.newString(s), len(s)
|
||||
}
|
||||
|
||||
// progMachine returns a new machine running the prog p.
|
||||
@ -78,9 +102,6 @@ func progMachine(p *syntax.Prog, op *onePassProg) *machine {
|
||||
if ncap < 2 {
|
||||
ncap = 2
|
||||
}
|
||||
if op == notOnePass {
|
||||
m.maxBitStateLen = maxBitStateLen(p)
|
||||
}
|
||||
m.matchcap = make([]int, ncap)
|
||||
return m
|
||||
}
|
||||
@ -416,31 +437,23 @@ func (re *Regexp) doMatch(r io.RuneReader, b []byte, s string) bool {
|
||||
//
|
||||
// nil is returned if no matches are found and non-nil if matches are found.
|
||||
func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap int, dstCap []int) []int {
|
||||
m := re.get()
|
||||
var i input
|
||||
var size int
|
||||
if r != nil {
|
||||
i = m.newInputReader(r)
|
||||
} else if b != nil {
|
||||
i = m.newInputBytes(b)
|
||||
size = len(b)
|
||||
} else {
|
||||
i = m.newInputString(s)
|
||||
size = len(s)
|
||||
if dstCap == nil {
|
||||
// Make sure 'return dstCap' is non-nil.
|
||||
dstCap = arrayNoInts[:0:0]
|
||||
}
|
||||
|
||||
if re.onepass == notOnePass && r == nil && len(b)+len(s) < re.maxBitStateLen {
|
||||
return re.backtrack(b, s, pos, ncap, dstCap)
|
||||
}
|
||||
|
||||
m := re.get()
|
||||
i, _ := m.inputs.init(r, b, s)
|
||||
|
||||
if m.op != notOnePass {
|
||||
if !m.onepass(i, pos, ncap) {
|
||||
re.put(m)
|
||||
return nil
|
||||
}
|
||||
} else if size < m.maxBitStateLen && r == nil {
|
||||
if m.b == nil {
|
||||
m.b = newBitState(m.p)
|
||||
}
|
||||
if !m.backtrack(i, pos, size, ncap) {
|
||||
re.put(m)
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
m.init(ncap)
|
||||
if !m.match(i, pos) {
|
||||
@ -449,10 +462,6 @@ func (re *Regexp) doExecute(r io.RuneReader, b []byte, s string, pos int, ncap i
|
||||
}
|
||||
}
|
||||
dstCap = append(dstCap, m.matchcap...)
|
||||
if dstCap == nil {
|
||||
// Keep the promise of returning non-nil value on match.
|
||||
dstCap = arrayNoInts[:0]
|
||||
}
|
||||
re.put(m)
|
||||
return dstCap
|
||||
}
|
||||
|
@ -88,17 +88,18 @@ type Regexp struct {
|
||||
}
|
||||
|
||||
type regexpRO struct {
|
||||
expr string // as passed to Compile
|
||||
prog *syntax.Prog // compiled program
|
||||
onepass *onePassProg // onepass program or nil
|
||||
expr string // as passed to Compile
|
||||
prog *syntax.Prog // compiled program
|
||||
onepass *onePassProg // onepass program or nil
|
||||
numSubexp int
|
||||
maxBitStateLen int
|
||||
subexpNames []string
|
||||
prefix string // required prefix in unanchored matches
|
||||
prefixBytes []byte // prefix, as a []byte
|
||||
prefixComplete bool // prefix is the entire regexp
|
||||
prefixRune rune // first rune in prefix
|
||||
prefixEnd uint32 // pc for last rune in prefix
|
||||
prefixComplete bool // prefix is the entire regexp
|
||||
cond syntax.EmptyOp // empty-width conditions required at start of match
|
||||
numSubexp int
|
||||
subexpNames []string
|
||||
longest bool
|
||||
}
|
||||
|
||||
@ -192,6 +193,7 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
|
||||
}
|
||||
if regexp.onepass == notOnePass {
|
||||
regexp.prefix, regexp.prefixComplete = prog.Prefix()
|
||||
regexp.maxBitStateLen = maxBitStateLen(prog)
|
||||
} else {
|
||||
regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
|
||||
}
|
||||
@ -227,9 +229,7 @@ func (re *Regexp) get() *machine {
|
||||
// run using re. (The cache empties when re gets garbage collected.)
|
||||
func (re *Regexp) put(z *machine) {
|
||||
// Remove references to input data that we no longer need.
|
||||
z.inputBytes.str = nil
|
||||
z.inputString.str = ""
|
||||
z.inputReader.r = nil
|
||||
z.inputs.clear()
|
||||
|
||||
re.mu.Lock()
|
||||
re.machine = append(re.machine, z)
|
||||
|
Loading…
Reference in New Issue
Block a user