1
0
mirror of https://github.com/golang/go synced 2024-11-25 03:47:57 -07:00

exp/regexp: leftmost-longest matching

Not exposed in the API yet, but passes tests.

R=r
CC=golang-dev
https://golang.org/cl/4967059
This commit is contained in:
Russ Cox 2011-09-08 10:09:25 -04:00
parent e7af22a64e
commit 7df4322114
4 changed files with 93 additions and 33 deletions

View File

@ -128,6 +128,11 @@ func (m *machine) match(i input, pos int) bool {
if width == 0 { if width == 0 {
break break
} }
if len(m.matchcap) == 0 && m.matched {
// Found a match and not paying attention
// to where it is, so any match will do.
break
}
pos += width pos += width
rune, width = rune1, width1 rune, width = rune1, width1
if rune != endOfText { if rune != endOfText {
@ -155,37 +160,37 @@ func (m *machine) clear(q *queue) {
// which starts at position pos and ends at nextPos. // which starts at position pos and ends at nextPos.
// nextCond gives the setting for the empty-width flags after c. // nextCond gives the setting for the empty-width flags after c.
func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) { func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) {
longest := m.re.longest
for j := 0; j < len(runq.dense); j++ { for j := 0; j < len(runq.dense); j++ {
d := &runq.dense[j] d := &runq.dense[j]
t := d.t t := d.t
if t == nil { if t == nil {
continue continue
} }
/* if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
* If we support leftmost-longest matching: m.free(t)
if longest && matched && match[0] < t.cap[0] { continue
m.free(t) }
continue
}
*/
i := t.inst i := t.inst
switch i.Op { switch i.Op {
default: default:
panic("bad inst") panic("bad inst")
case syntax.InstMatch: case syntax.InstMatch:
if len(t.cap) > 0 { if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
t.cap[1] = pos t.cap[1] = pos
copy(m.matchcap, t.cap) copy(m.matchcap, t.cap)
} }
m.matched = true if !longest {
for _, d := range runq.dense[j+1:] { // First-match mode: cut off all lower-priority threads.
if d.t != nil { for _, d := range runq.dense[j+1:] {
m.free(d.t) if d.t != nil {
m.free(d.t)
}
} }
runq.dense = runq.dense[:0]
} }
runq.dense = runq.dense[:0] m.matched = true
case syntax.InstRune: case syntax.InstRune:
if i.MatchRune(c) { if i.MatchRune(c) {

View File

@ -164,29 +164,29 @@ func TestRE2(t *testing.T) {
continue continue
} }
res := strings.Split(line, ";") res := strings.Split(line, ";")
if len(res) != 2 { if len(res) != len(run) {
t.Fatalf("re2.txt:%d: have %d test results, want 2", lineno, len(res)) t.Fatalf("re2.txt:%d: have %d test results, want %d", lineno, len(res), len(run))
} }
// res[0] is full match for i := range res {
// res[1] is partial match have, suffix := run[i](re, refull, text)
// Run partial match first; don't bother with full if partial fails. want := parseResult(t, lineno, res[i])
have := re.FindStringSubmatchIndex(text) if !same(have, want) {
want := parseResult(t, lineno, res[1]) t.Errorf("re2.txt:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, suffix, text, have, want)
if !same(have, want) { if nfail++; nfail >= 100 {
t.Errorf("re2.txt:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, text, have, want) t.Fatalf("stopping after %d errors", nfail)
if nfail++; nfail >= 100 { }
t.Fatalf("stopping after %d errors", nfail) continue
} }
continue b, suffix := match[i](re, refull, text)
} if b != (want != nil) {
have = refull.FindStringSubmatchIndex(text) t.Errorf("re2.txt:%d: %#q%s.MatchString(%#q) = %v, want %v", lineno, re, suffix, text, b, !b)
want = parseResult(t, lineno, res[0]) if nfail++; nfail >= 100 {
if !same(have, want) { t.Fatalf("stopping after %d errors", nfail)
t.Errorf("re2.txt:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", lineno, refull, text, have, want) }
if nfail++; nfail >= 100 { continue
t.Fatalf("stopping after %d errors", nfail)
} }
} }
default: default:
t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line) t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line)
} }
@ -197,6 +197,60 @@ func TestRE2(t *testing.T) {
t.Logf("%d cases tested", ncase) t.Logf("%d cases tested", ncase)
} }
var run = []func(*Regexp, *Regexp, string) ([]int, string){
runFull,
runPartial,
runFullLongest,
runPartialLongest,
}
func runFull(re, refull *Regexp, text string) ([]int, string) {
refull.longest = false
return refull.FindStringSubmatchIndex(text), "[full]"
}
func runPartial(re, refull *Regexp, text string) ([]int, string) {
re.longest = false
return re.FindStringSubmatchIndex(text), ""
}
func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
refull.longest = true
return refull.FindStringSubmatchIndex(text), "[full,longest]"
}
func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
re.longest = true
return re.FindStringSubmatchIndex(text), "[longest]"
}
var match = []func(*Regexp, *Regexp, string) (bool, string){
matchFull,
matchPartial,
matchFullLongest,
matchPartialLongest,
}
func matchFull(re, refull *Regexp, text string) (bool, string) {
refull.longest = false
return refull.MatchString(text), "[full]"
}
func matchPartial(re, refull *Regexp, text string) (bool, string) {
re.longest = false
return re.MatchString(text), ""
}
func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
refull.longest = true
return refull.MatchString(text), "[full,longest]"
}
func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
re.longest = true
return re.MatchString(text), "[longest]"
}
func isSingleBytes(s string) bool { func isSingleBytes(s string) bool {
for _, c := range s { for _, c := range s {
if c >= utf8.RuneSelf { if c >= utf8.RuneSelf {

Binary file not shown.

View File

@ -85,6 +85,7 @@ type Regexp struct {
prefixRune int // first rune in prefix prefixRune int // first rune in prefix
cond syntax.EmptyOp // empty-width conditions required at start of match cond syntax.EmptyOp // empty-width conditions required at start of match
numSubexp int numSubexp int
longest bool
// cache of machines for running regexp // cache of machines for running regexp
mu sync.Mutex mu sync.Mutex