1
0
mirror of https://github.com/golang/go synced 2024-11-25 03:47:57 -07:00

regexp: bug fix: need to track whether match begins with fixed prefix.

Fixes #872.

R=rsc
CC=golang-dev
https://golang.org/cl/1731043
This commit is contained in:
Rob Pike 2010-06-22 16:02:14 -07:00
parent 537e5f4b7e
commit a8cd6c2012
2 changed files with 20 additions and 17 deletions

View File

@ -100,7 +100,8 @@ var matches = []tester{
// fixed bugs // fixed bugs
tester{`ab$`, "cab", vec{1, 3}}, tester{`ab$`, "cab", vec{1, 3}},
tester{`axxb$`, "axxcb", vec{}}, tester{`data`, "daXY data", vec{5, 9}},
tester{`da(.)a$`, "daXY data", vec{5, 9, 7, 8}},
// can backslash-escape any punctuation // can backslash-escape any punctuation
tester{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, tester{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,

View File

@ -728,33 +728,34 @@ func (a *matchArena) noMatch() *matchVec {
type state struct { type state struct {
inst instr // next instruction to execute inst instr // next instruction to execute
prefixed bool // this match began with a fixed prefix
match *matchVec match *matchVec
} }
// Append new state to to-do list. Leftmost-longest wins so avoid // Append new state to to-do list. Leftmost-longest wins so avoid
// adding a state that's already active. The matchVec will be inc-ref'ed // adding a state that's already active. The matchVec will be inc-ref'ed
// if it is assigned to a state. // if it is assigned to a state.
func (a *matchArena) addState(s []state, inst instr, match *matchVec, pos, end int) []state { func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *matchVec, pos, end int) []state {
switch inst.kind() { switch inst.kind() {
case _BOT: case _BOT:
if pos == 0 { if pos == 0 {
s = a.addState(s, inst.next(), match, pos, end) s = a.addState(s, inst.next(), prefixed, match, pos, end)
} }
return s return s
case _EOT: case _EOT:
if pos == end { if pos == end {
s = a.addState(s, inst.next(), match, pos, end) s = a.addState(s, inst.next(), prefixed, match, pos, end)
} }
return s return s
case _BRA: case _BRA:
n := inst.(*_Bra).n n := inst.(*_Bra).n
match.m[2*n] = pos match.m[2*n] = pos
s = a.addState(s, inst.next(), match, pos, end) s = a.addState(s, inst.next(), prefixed, match, pos, end)
return s return s
case _EBRA: case _EBRA:
n := inst.(*_Ebra).n n := inst.(*_Ebra).n
match.m[2*n+1] = pos match.m[2*n+1] = pos
s = a.addState(s, inst.next(), match, pos, end) s = a.addState(s, inst.next(), prefixed, match, pos, end)
return s return s
} }
index := inst.index() index := inst.index()
@ -773,12 +774,13 @@ func (a *matchArena) addState(s []state, inst instr, match *matchVec, pos, end i
} }
s = s[0 : l+1] s = s[0 : l+1]
s[l].inst = inst s[l].inst = inst
s[l].prefixed = prefixed
s[l].match = match s[l].match = match
match.ref++ match.ref++
if inst.kind() == _ALT { if inst.kind() == _ALT {
s = a.addState(s, inst.(*_Alt).left, a.copy(match), pos, end) s = a.addState(s, inst.(*_Alt).left, prefixed, a.copy(match), pos, end)
// give other branch a copy of this match vector // give other branch a copy of this match vector
s = a.addState(s, inst.next(), a.copy(match), pos, end) s = a.addState(s, inst.next(), prefixed, a.copy(match), pos, end)
} }
return s return s
} }
@ -818,10 +820,10 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
match := arena.noMatch() match := arena.noMatch()
match.m[0] = pos match.m[0] = pos
if prefixed { if prefixed {
s[out] = arena.addState(s[out], re.prefixStart, match, pos, end) s[out] = arena.addState(s[out], re.prefixStart, true, match, pos, end)
prefixed = false // next iteration should start at beginning of machine. prefixed = false // next iteration should start at beginning of machine.
} else { } else {
s[out] = arena.addState(s[out], re.start.next(), match, pos, end) s[out] = arena.addState(s[out], re.start.next(), false, match, pos, end)
} }
arena.free(match) // if addState saved it, ref was incremented arena.free(match) // if addState saved it, ref was incremented
} }
@ -852,19 +854,19 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
case _EOT: case _EOT:
case _CHAR: case _CHAR:
if c == st.inst.(*_Char).char { if c == st.inst.(*_Char).char {
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end) s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
} }
case _CHARCLASS: case _CHARCLASS:
if st.inst.(*_CharClass).matches(c) { if st.inst.(*_CharClass).matches(c) {
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end) s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
} }
case _ANY: case _ANY:
if c != endOfFile { if c != endOfFile {
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end) s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
} }
case _NOTNL: case _NOTNL:
if c != endOfFile && c != '\n' { if c != endOfFile && c != '\n' {
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end) s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
} }
case _BRA: case _BRA:
case _EBRA: case _EBRA:
@ -892,7 +894,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
return nil return nil
} }
// if match found, back up start of match by width of prefix. // if match found, back up start of match by width of prefix.
if re.prefix != "" && len(final.match.m) > 0 { if final.prefixed && len(final.match.m) > 0 {
final.match.m[0] -= len(re.prefix) final.match.m[0] -= len(re.prefix)
} }
return final.match.m return final.match.m