mirror of
https://github.com/golang/go
synced 2024-11-25 03:37:58 -07:00
exp/regexp: leftmost-longest matching
Not exposed in the API yet, but passes tests. R=r CC=golang-dev https://golang.org/cl/4967059
This commit is contained in:
parent
e7af22a64e
commit
7df4322114
@ -128,6 +128,11 @@ func (m *machine) match(i input, pos int) bool {
|
|||||||
if width == 0 {
|
if width == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
if len(m.matchcap) == 0 && m.matched {
|
||||||
|
// Found a match and not paying attention
|
||||||
|
// to where it is, so any match will do.
|
||||||
|
break
|
||||||
|
}
|
||||||
pos += width
|
pos += width
|
||||||
rune, width = rune1, width1
|
rune, width = rune1, width1
|
||||||
if rune != endOfText {
|
if rune != endOfText {
|
||||||
@ -155,37 +160,37 @@ func (m *machine) clear(q *queue) {
|
|||||||
// which starts at position pos and ends at nextPos.
|
// which starts at position pos and ends at nextPos.
|
||||||
// nextCond gives the setting for the empty-width flags after c.
|
// nextCond gives the setting for the empty-width flags after c.
|
||||||
func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) {
|
func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) {
|
||||||
|
longest := m.re.longest
|
||||||
for j := 0; j < len(runq.dense); j++ {
|
for j := 0; j < len(runq.dense); j++ {
|
||||||
d := &runq.dense[j]
|
d := &runq.dense[j]
|
||||||
t := d.t
|
t := d.t
|
||||||
if t == nil {
|
if t == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
/*
|
if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
|
||||||
* If we support leftmost-longest matching:
|
|
||||||
if longest && matched && match[0] < t.cap[0] {
|
|
||||||
m.free(t)
|
m.free(t)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
i := t.inst
|
i := t.inst
|
||||||
switch i.Op {
|
switch i.Op {
|
||||||
default:
|
default:
|
||||||
panic("bad inst")
|
panic("bad inst")
|
||||||
|
|
||||||
case syntax.InstMatch:
|
case syntax.InstMatch:
|
||||||
if len(t.cap) > 0 {
|
if len(t.cap) > 0 && (!longest || !m.matched || m.matchcap[1] < pos) {
|
||||||
t.cap[1] = pos
|
t.cap[1] = pos
|
||||||
copy(m.matchcap, t.cap)
|
copy(m.matchcap, t.cap)
|
||||||
}
|
}
|
||||||
m.matched = true
|
if !longest {
|
||||||
|
// First-match mode: cut off all lower-priority threads.
|
||||||
for _, d := range runq.dense[j+1:] {
|
for _, d := range runq.dense[j+1:] {
|
||||||
if d.t != nil {
|
if d.t != nil {
|
||||||
m.free(d.t)
|
m.free(d.t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
runq.dense = runq.dense[:0]
|
runq.dense = runq.dense[:0]
|
||||||
|
}
|
||||||
|
m.matched = true
|
||||||
|
|
||||||
case syntax.InstRune:
|
case syntax.InstRune:
|
||||||
if i.MatchRune(c) {
|
if i.MatchRune(c) {
|
||||||
|
@ -164,29 +164,29 @@ func TestRE2(t *testing.T) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
res := strings.Split(line, ";")
|
res := strings.Split(line, ";")
|
||||||
if len(res) != 2 {
|
if len(res) != len(run) {
|
||||||
t.Fatalf("re2.txt:%d: have %d test results, want 2", lineno, len(res))
|
t.Fatalf("re2.txt:%d: have %d test results, want %d", lineno, len(res), len(run))
|
||||||
}
|
}
|
||||||
// res[0] is full match
|
for i := range res {
|
||||||
// res[1] is partial match
|
have, suffix := run[i](re, refull, text)
|
||||||
// Run partial match first; don't bother with full if partial fails.
|
want := parseResult(t, lineno, res[i])
|
||||||
have := re.FindStringSubmatchIndex(text)
|
|
||||||
want := parseResult(t, lineno, res[1])
|
|
||||||
if !same(have, want) {
|
if !same(have, want) {
|
||||||
t.Errorf("re2.txt:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, text, have, want)
|
t.Errorf("re2.txt:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, suffix, text, have, want)
|
||||||
if nfail++; nfail >= 100 {
|
if nfail++; nfail >= 100 {
|
||||||
t.Fatalf("stopping after %d errors", nfail)
|
t.Fatalf("stopping after %d errors", nfail)
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
have = refull.FindStringSubmatchIndex(text)
|
b, suffix := match[i](re, refull, text)
|
||||||
want = parseResult(t, lineno, res[0])
|
if b != (want != nil) {
|
||||||
if !same(have, want) {
|
t.Errorf("re2.txt:%d: %#q%s.MatchString(%#q) = %v, want %v", lineno, re, suffix, text, b, !b)
|
||||||
t.Errorf("re2.txt:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", lineno, refull, text, have, want)
|
|
||||||
if nfail++; nfail >= 100 {
|
if nfail++; nfail >= 100 {
|
||||||
t.Fatalf("stopping after %d errors", nfail)
|
t.Fatalf("stopping after %d errors", nfail)
|
||||||
}
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line)
|
t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line)
|
||||||
}
|
}
|
||||||
@ -197,6 +197,60 @@ func TestRE2(t *testing.T) {
|
|||||||
t.Logf("%d cases tested", ncase)
|
t.Logf("%d cases tested", ncase)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var run = []func(*Regexp, *Regexp, string) ([]int, string){
|
||||||
|
runFull,
|
||||||
|
runPartial,
|
||||||
|
runFullLongest,
|
||||||
|
runPartialLongest,
|
||||||
|
}
|
||||||
|
|
||||||
|
func runFull(re, refull *Regexp, text string) ([]int, string) {
|
||||||
|
refull.longest = false
|
||||||
|
return refull.FindStringSubmatchIndex(text), "[full]"
|
||||||
|
}
|
||||||
|
|
||||||
|
func runPartial(re, refull *Regexp, text string) ([]int, string) {
|
||||||
|
re.longest = false
|
||||||
|
return re.FindStringSubmatchIndex(text), ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
|
||||||
|
refull.longest = true
|
||||||
|
return refull.FindStringSubmatchIndex(text), "[full,longest]"
|
||||||
|
}
|
||||||
|
|
||||||
|
func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
|
||||||
|
re.longest = true
|
||||||
|
return re.FindStringSubmatchIndex(text), "[longest]"
|
||||||
|
}
|
||||||
|
|
||||||
|
var match = []func(*Regexp, *Regexp, string) (bool, string){
|
||||||
|
matchFull,
|
||||||
|
matchPartial,
|
||||||
|
matchFullLongest,
|
||||||
|
matchPartialLongest,
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchFull(re, refull *Regexp, text string) (bool, string) {
|
||||||
|
refull.longest = false
|
||||||
|
return refull.MatchString(text), "[full]"
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchPartial(re, refull *Regexp, text string) (bool, string) {
|
||||||
|
re.longest = false
|
||||||
|
return re.MatchString(text), ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
|
||||||
|
refull.longest = true
|
||||||
|
return refull.MatchString(text), "[full,longest]"
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
|
||||||
|
re.longest = true
|
||||||
|
return re.MatchString(text), "[longest]"
|
||||||
|
}
|
||||||
|
|
||||||
func isSingleBytes(s string) bool {
|
func isSingleBytes(s string) bool {
|
||||||
for _, c := range s {
|
for _, c := range s {
|
||||||
if c >= utf8.RuneSelf {
|
if c >= utf8.RuneSelf {
|
||||||
|
Binary file not shown.
@ -85,6 +85,7 @@ type Regexp struct {
|
|||||||
prefixRune int // first rune in prefix
|
prefixRune int // first rune in prefix
|
||||||
cond syntax.EmptyOp // empty-width conditions required at start of match
|
cond syntax.EmptyOp // empty-width conditions required at start of match
|
||||||
numSubexp int
|
numSubexp int
|
||||||
|
longest bool
|
||||||
|
|
||||||
// cache of machines for running regexp
|
// cache of machines for running regexp
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
Loading…
Reference in New Issue
Block a user