mirror of
https://github.com/golang/go
synced 2024-11-25 03:47:57 -07:00
regexp: bug fix: need to track whether match begins with fixed prefix.
Fixes #872. R=rsc CC=golang-dev https://golang.org/cl/1731043
This commit is contained in:
parent
537e5f4b7e
commit
a8cd6c2012
@ -100,7 +100,8 @@ var matches = []tester{
|
|||||||
|
|
||||||
// fixed bugs
|
// fixed bugs
|
||||||
tester{`ab$`, "cab", vec{1, 3}},
|
tester{`ab$`, "cab", vec{1, 3}},
|
||||||
tester{`axxb$`, "axxcb", vec{}},
|
tester{`data`, "daXY data", vec{5, 9}},
|
||||||
|
tester{`da(.)a$`, "daXY data", vec{5, 9, 7, 8}},
|
||||||
|
|
||||||
// can backslash-escape any punctuation
|
// can backslash-escape any punctuation
|
||||||
tester{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
|
tester{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
|
||||||
|
@ -728,33 +728,34 @@ func (a *matchArena) noMatch() *matchVec {
|
|||||||
|
|
||||||
type state struct {
|
type state struct {
|
||||||
inst instr // next instruction to execute
|
inst instr // next instruction to execute
|
||||||
|
prefixed bool // this match began with a fixed prefix
|
||||||
match *matchVec
|
match *matchVec
|
||||||
}
|
}
|
||||||
|
|
||||||
// Append new state to to-do list. Leftmost-longest wins so avoid
|
// Append new state to to-do list. Leftmost-longest wins so avoid
|
||||||
// adding a state that's already active. The matchVec will be inc-ref'ed
|
// adding a state that's already active. The matchVec will be inc-ref'ed
|
||||||
// if it is assigned to a state.
|
// if it is assigned to a state.
|
||||||
func (a *matchArena) addState(s []state, inst instr, match *matchVec, pos, end int) []state {
|
func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *matchVec, pos, end int) []state {
|
||||||
switch inst.kind() {
|
switch inst.kind() {
|
||||||
case _BOT:
|
case _BOT:
|
||||||
if pos == 0 {
|
if pos == 0 {
|
||||||
s = a.addState(s, inst.next(), match, pos, end)
|
s = a.addState(s, inst.next(), prefixed, match, pos, end)
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
case _EOT:
|
case _EOT:
|
||||||
if pos == end {
|
if pos == end {
|
||||||
s = a.addState(s, inst.next(), match, pos, end)
|
s = a.addState(s, inst.next(), prefixed, match, pos, end)
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
case _BRA:
|
case _BRA:
|
||||||
n := inst.(*_Bra).n
|
n := inst.(*_Bra).n
|
||||||
match.m[2*n] = pos
|
match.m[2*n] = pos
|
||||||
s = a.addState(s, inst.next(), match, pos, end)
|
s = a.addState(s, inst.next(), prefixed, match, pos, end)
|
||||||
return s
|
return s
|
||||||
case _EBRA:
|
case _EBRA:
|
||||||
n := inst.(*_Ebra).n
|
n := inst.(*_Ebra).n
|
||||||
match.m[2*n+1] = pos
|
match.m[2*n+1] = pos
|
||||||
s = a.addState(s, inst.next(), match, pos, end)
|
s = a.addState(s, inst.next(), prefixed, match, pos, end)
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
index := inst.index()
|
index := inst.index()
|
||||||
@ -773,12 +774,13 @@ func (a *matchArena) addState(s []state, inst instr, match *matchVec, pos, end i
|
|||||||
}
|
}
|
||||||
s = s[0 : l+1]
|
s = s[0 : l+1]
|
||||||
s[l].inst = inst
|
s[l].inst = inst
|
||||||
|
s[l].prefixed = prefixed
|
||||||
s[l].match = match
|
s[l].match = match
|
||||||
match.ref++
|
match.ref++
|
||||||
if inst.kind() == _ALT {
|
if inst.kind() == _ALT {
|
||||||
s = a.addState(s, inst.(*_Alt).left, a.copy(match), pos, end)
|
s = a.addState(s, inst.(*_Alt).left, prefixed, a.copy(match), pos, end)
|
||||||
// give other branch a copy of this match vector
|
// give other branch a copy of this match vector
|
||||||
s = a.addState(s, inst.next(), a.copy(match), pos, end)
|
s = a.addState(s, inst.next(), prefixed, a.copy(match), pos, end)
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
@ -818,10 +820,10 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||||||
match := arena.noMatch()
|
match := arena.noMatch()
|
||||||
match.m[0] = pos
|
match.m[0] = pos
|
||||||
if prefixed {
|
if prefixed {
|
||||||
s[out] = arena.addState(s[out], re.prefixStart, match, pos, end)
|
s[out] = arena.addState(s[out], re.prefixStart, true, match, pos, end)
|
||||||
prefixed = false // next iteration should start at beginning of machine.
|
prefixed = false // next iteration should start at beginning of machine.
|
||||||
} else {
|
} else {
|
||||||
s[out] = arena.addState(s[out], re.start.next(), match, pos, end)
|
s[out] = arena.addState(s[out], re.start.next(), false, match, pos, end)
|
||||||
}
|
}
|
||||||
arena.free(match) // if addState saved it, ref was incremented
|
arena.free(match) // if addState saved it, ref was incremented
|
||||||
}
|
}
|
||||||
@ -852,19 +854,19 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||||||
case _EOT:
|
case _EOT:
|
||||||
case _CHAR:
|
case _CHAR:
|
||||||
if c == st.inst.(*_Char).char {
|
if c == st.inst.(*_Char).char {
|
||||||
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
|
||||||
}
|
}
|
||||||
case _CHARCLASS:
|
case _CHARCLASS:
|
||||||
if st.inst.(*_CharClass).matches(c) {
|
if st.inst.(*_CharClass).matches(c) {
|
||||||
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
|
||||||
}
|
}
|
||||||
case _ANY:
|
case _ANY:
|
||||||
if c != endOfFile {
|
if c != endOfFile {
|
||||||
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
|
||||||
}
|
}
|
||||||
case _NOTNL:
|
case _NOTNL:
|
||||||
if c != endOfFile && c != '\n' {
|
if c != endOfFile && c != '\n' {
|
||||||
s[out] = arena.addState(s[out], st.inst.next(), st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next(), st.prefixed, st.match, pos, end)
|
||||||
}
|
}
|
||||||
case _BRA:
|
case _BRA:
|
||||||
case _EBRA:
|
case _EBRA:
|
||||||
@ -892,7 +894,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// if match found, back up start of match by width of prefix.
|
// if match found, back up start of match by width of prefix.
|
||||||
if re.prefix != "" && len(final.match.m) > 0 {
|
if final.prefixed && len(final.match.m) > 0 {
|
||||||
final.match.m[0] -= len(re.prefix)
|
final.match.m[0] -= len(re.prefix)
|
||||||
}
|
}
|
||||||
return final.match.m
|
return final.match.m
|
||||||
|
Loading…
Reference in New Issue
Block a user