mirror of
https://github.com/golang/go
synced 2024-11-22 06:24:38 -07:00
regexp: add support for matching text read from things that implement
ReadRune. (If you have a Reader but not a RuneReader, use bufio.) The matching code is a few percent slower but significantly cleaner. R=rsc CC=golang-dev https://golang.org/cl/4125046
This commit is contained in:
parent
63457d089e
commit
7db904c1f6
@ -6,6 +6,7 @@ package regexp
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -191,6 +192,12 @@ func TestFindStringIndex(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFindReaderIndex(t *testing.T) {
|
||||||
|
for _, test := range findTests {
|
||||||
|
testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Now come the simple All cases.
|
// Now come the simple All cases.
|
||||||
|
|
||||||
func TestFindAll(t *testing.T) {
|
func TestFindAll(t *testing.T) {
|
||||||
@ -387,6 +394,12 @@ func TestFindStringSubmatchIndex(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFindReaderSubmatchIndex(t *testing.T) {
|
||||||
|
for _, test := range findTests {
|
||||||
|
testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Now come the monster AllSubmatch cases.
|
// Now come the monster AllSubmatch cases.
|
||||||
|
|
||||||
func TestFindAllSubmatch(t *testing.T) {
|
func TestFindAllSubmatch(t *testing.T) {
|
||||||
|
@ -54,6 +54,16 @@
|
|||||||
// text of the match/submatch. If an index is negative, it means that
|
// text of the match/submatch. If an index is negative, it means that
|
||||||
// subexpression did not match any string in the input.
|
// subexpression did not match any string in the input.
|
||||||
//
|
//
|
||||||
|
// There is also a subset of the methods that can be applied to text read
|
||||||
|
// from a RuneReader:
|
||||||
|
//
|
||||||
|
// MatchReader, FindReaderIndex, FindReaderSubmatchIndex
|
||||||
|
//
|
||||||
|
// This set may grow. Note that regular expression matches may need to
|
||||||
|
// examine text beyond the text returned by a match, so the methods that
|
||||||
|
// match text from a RuneReader may read arbitrarily far into the input
|
||||||
|
// before returning.
|
||||||
|
//
|
||||||
// (There are a few other methods that do not match this pattern.)
|
// (There are a few other methods that do not match this pattern.)
|
||||||
//
|
//
|
||||||
package regexp
|
package regexp
|
||||||
@ -231,13 +241,13 @@ func (p *parser) error(err Error) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
const endOfFile = -1
|
const endOfText = -1
|
||||||
|
|
||||||
func (p *parser) c() int { return p.ch }
|
func (p *parser) c() int { return p.ch }
|
||||||
|
|
||||||
func (p *parser) nextc() int {
|
func (p *parser) nextc() int {
|
||||||
if p.pos >= len(p.re.expr) {
|
if p.pos >= len(p.re.expr) {
|
||||||
p.ch = endOfFile
|
p.ch = endOfText
|
||||||
} else {
|
} else {
|
||||||
c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:])
|
c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:])
|
||||||
p.ch = c
|
p.ch = c
|
||||||
@ -288,7 +298,7 @@ func (p *parser) checkBackslash() int {
|
|||||||
if c == '\\' {
|
if c == '\\' {
|
||||||
c = p.nextc()
|
c = p.nextc()
|
||||||
switch {
|
switch {
|
||||||
case c == endOfFile:
|
case c == endOfText:
|
||||||
p.error(ErrExtraneousBackslash)
|
p.error(ErrExtraneousBackslash)
|
||||||
case ispunct(c):
|
case ispunct(c):
|
||||||
// c is as delivered
|
// c is as delivered
|
||||||
@ -311,7 +321,7 @@ func (p *parser) charClass() *instr {
|
|||||||
left := -1
|
left := -1
|
||||||
for {
|
for {
|
||||||
switch c := p.c(); c {
|
switch c := p.c(); c {
|
||||||
case ']', endOfFile:
|
case ']', endOfText:
|
||||||
if left >= 0 {
|
if left >= 0 {
|
||||||
p.error(ErrBadRange)
|
p.error(ErrBadRange)
|
||||||
}
|
}
|
||||||
@ -356,7 +366,7 @@ func (p *parser) charClass() *instr {
|
|||||||
|
|
||||||
func (p *parser) term() (start, end *instr) {
|
func (p *parser) term() (start, end *instr) {
|
||||||
switch c := p.c(); c {
|
switch c := p.c(); c {
|
||||||
case '|', endOfFile:
|
case '|', endOfText:
|
||||||
return nil, nil
|
return nil, nil
|
||||||
case '*', '+', '?':
|
case '*', '+', '?':
|
||||||
p.error(ErrBareClosure)
|
p.error(ErrBareClosure)
|
||||||
@ -640,6 +650,9 @@ func (re *Regexp) NumSubexp() int { return re.nbra }
|
|||||||
type matchArena struct {
|
type matchArena struct {
|
||||||
head *matchVec
|
head *matchVec
|
||||||
len int // length of match vector
|
len int // length of match vector
|
||||||
|
pos int
|
||||||
|
atBOT bool // whether we're at beginning of text
|
||||||
|
atEOT bool // whether we're at end of text
|
||||||
}
|
}
|
||||||
|
|
||||||
type matchVec struct {
|
type matchVec struct {
|
||||||
@ -699,21 +712,21 @@ type state struct {
|
|||||||
// Append new state to to-do list. Leftmost-longest wins so avoid
|
// Append new state to to-do list. Leftmost-longest wins so avoid
|
||||||
// adding a state that's already active. The matchVec will be inc-ref'ed
|
// adding a state that's already active. The matchVec will be inc-ref'ed
|
||||||
// if it is assigned to a state.
|
// if it is assigned to a state.
|
||||||
func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matchVec, pos, end int) []state {
|
func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matchVec) []state {
|
||||||
switch inst.kind {
|
switch inst.kind {
|
||||||
case iBOT:
|
case iBOT:
|
||||||
if pos == 0 {
|
if a.atBOT {
|
||||||
s = a.addState(s, inst.next, prefixed, match, pos, end)
|
s = a.addState(s, inst.next, prefixed, match)
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
case iEOT:
|
case iEOT:
|
||||||
if pos == end {
|
if a.atEOT {
|
||||||
s = a.addState(s, inst.next, prefixed, match, pos, end)
|
s = a.addState(s, inst.next, prefixed, match)
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
case iBra:
|
case iBra:
|
||||||
match.m[inst.braNum] = pos
|
match.m[inst.braNum] = a.pos
|
||||||
s = a.addState(s, inst.next, prefixed, match, pos, end)
|
s = a.addState(s, inst.next, prefixed, match)
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
l := len(s)
|
l := len(s)
|
||||||
@ -727,62 +740,157 @@ func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matc
|
|||||||
s = append(s, state{inst, prefixed, match})
|
s = append(s, state{inst, prefixed, match})
|
||||||
match.ref++
|
match.ref++
|
||||||
if inst.kind == iAlt {
|
if inst.kind == iAlt {
|
||||||
s = a.addState(s, inst.left, prefixed, a.copy(match), pos, end)
|
s = a.addState(s, inst.left, prefixed, a.copy(match))
|
||||||
// give other branch a copy of this match vector
|
// give other branch a copy of this match vector
|
||||||
s = a.addState(s, inst.next, prefixed, a.copy(match), pos, end)
|
s = a.addState(s, inst.next, prefixed, a.copy(match))
|
||||||
}
|
}
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accepts either string or bytes - the logic is identical either way.
|
// input abstracts different representations of the input text. It provides
|
||||||
// If bytes == nil, scan str.
|
// one-character lookahead.
|
||||||
func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
type input interface {
|
||||||
|
step(pos int) (rune int, width int) // advance one rune
|
||||||
|
canCheckPrefix() bool // can we look ahead without losing info?
|
||||||
|
hasPrefix(re *Regexp) bool
|
||||||
|
index(re *Regexp, pos int) int
|
||||||
|
}
|
||||||
|
|
||||||
|
// inputString scans a string.
|
||||||
|
type inputString struct {
|
||||||
|
str string
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInputString(str string) *inputString {
|
||||||
|
return &inputString{str: str}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputString) step(pos int) (int, int) {
|
||||||
|
if pos < len(i.str) {
|
||||||
|
return utf8.DecodeRuneInString(i.str[pos:len(i.str)])
|
||||||
|
}
|
||||||
|
return endOfText, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputString) canCheckPrefix() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputString) hasPrefix(re *Regexp) bool {
|
||||||
|
return strings.HasPrefix(i.str, re.prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputString) index(re *Regexp, pos int) int {
|
||||||
|
return strings.Index(i.str[pos:], re.prefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
// inputBytes scans a byte slice.
|
||||||
|
type inputBytes struct {
|
||||||
|
str []byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInputBytes(str []byte) *inputBytes {
|
||||||
|
return &inputBytes{str: str}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputBytes) step(pos int) (int, int) {
|
||||||
|
if pos < len(i.str) {
|
||||||
|
return utf8.DecodeRune(i.str[pos:len(i.str)])
|
||||||
|
}
|
||||||
|
return endOfText, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputBytes) canCheckPrefix() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputBytes) hasPrefix(re *Regexp) bool {
|
||||||
|
return bytes.HasPrefix(i.str, re.prefixBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputBytes) index(re *Regexp, pos int) int {
|
||||||
|
return bytes.Index(i.str[pos:], re.prefixBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// inputReader scans a RuneReader.
|
||||||
|
type inputReader struct {
|
||||||
|
r io.RuneReader
|
||||||
|
atEOT bool
|
||||||
|
pos int
|
||||||
|
}
|
||||||
|
|
||||||
|
func newInputReader(r io.RuneReader) *inputReader {
|
||||||
|
return &inputReader{r: r}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputReader) step(pos int) (int, int) {
|
||||||
|
if !i.atEOT && pos != i.pos {
|
||||||
|
return endOfText, 0
|
||||||
|
|
||||||
|
}
|
||||||
|
r, w, err := i.r.ReadRune()
|
||||||
|
if err != nil {
|
||||||
|
i.atEOT = true
|
||||||
|
return endOfText, 0
|
||||||
|
}
|
||||||
|
i.pos += w
|
||||||
|
return r, w
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputReader) canCheckPrefix() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputReader) hasPrefix(re *Regexp) bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *inputReader) index(re *Regexp, pos int) int {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search match starting from pos bytes into the input.
|
||||||
|
func (re *Regexp) doExecute(i input, pos int) []int {
|
||||||
var s [2][]state
|
var s [2][]state
|
||||||
s[0] = make([]state, 0, 10)
|
s[0] = make([]state, 0, 10)
|
||||||
s[1] = make([]state, 0, 10)
|
s[1] = make([]state, 0, 10)
|
||||||
in, out := 0, 1
|
in, out := 0, 1
|
||||||
var final state
|
var final state
|
||||||
found := false
|
found := false
|
||||||
end := len(str)
|
|
||||||
if bytestr != nil {
|
|
||||||
end = len(bytestr)
|
|
||||||
}
|
|
||||||
anchored := re.inst[0].next.kind == iBOT
|
anchored := re.inst[0].next.kind == iBOT
|
||||||
if anchored && pos > 0 {
|
if anchored && pos > 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// fast check for initial plain substring
|
// fast check for initial plain substring
|
||||||
if re.prefix != "" {
|
if i.canCheckPrefix() && re.prefix != "" {
|
||||||
advance := 0
|
advance := 0
|
||||||
if anchored {
|
if anchored {
|
||||||
if bytestr == nil {
|
if !i.hasPrefix(re) {
|
||||||
if !strings.HasPrefix(str, re.prefix) {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if !bytes.HasPrefix(bytestr, re.prefixBytes) {
|
advance = i.index(re, pos)
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if bytestr == nil {
|
|
||||||
advance = strings.Index(str[pos:], re.prefix)
|
|
||||||
} else {
|
|
||||||
advance = bytes.Index(bytestr[pos:], re.prefixBytes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if advance == -1 {
|
if advance == -1 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
}
|
||||||
pos += advance
|
pos += advance
|
||||||
}
|
}
|
||||||
arena := &matchArena{nil, 2 * (re.nbra + 1)}
|
// We look one character ahead so we can match $, which checks whether
|
||||||
for startPos := pos; pos <= end; {
|
// we are at EOT.
|
||||||
|
nextChar, nextWidth := i.step(pos)
|
||||||
|
arena := &matchArena{
|
||||||
|
len: 2 * (re.nbra + 1),
|
||||||
|
pos: pos,
|
||||||
|
atBOT: pos == 0,
|
||||||
|
atEOT: nextChar == endOfText,
|
||||||
|
}
|
||||||
|
for c, startPos := 0, pos; c != endOfText; {
|
||||||
if !found && (pos == startPos || !anchored) {
|
if !found && (pos == startPos || !anchored) {
|
||||||
// prime the pump if we haven't seen a match yet
|
// prime the pump if we haven't seen a match yet
|
||||||
match := arena.noMatch()
|
match := arena.noMatch()
|
||||||
match.m[0] = pos
|
match.m[0] = pos
|
||||||
s[out] = arena.addState(s[out], re.start.next, false, match, pos, end)
|
s[out] = arena.addState(s[out], re.start.next, false, match)
|
||||||
arena.free(match) // if addState saved it, ref was incremented
|
arena.free(match) // if addState saved it, ref was incremented
|
||||||
} else if len(s[out]) == 0 {
|
} else if len(s[out]) == 0 {
|
||||||
// machine has completed
|
// machine has completed
|
||||||
@ -795,35 +903,32 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||||||
arena.free(state.match)
|
arena.free(state.match)
|
||||||
}
|
}
|
||||||
s[out] = old[0:0] // truncate state vector
|
s[out] = old[0:0] // truncate state vector
|
||||||
charwidth := 1
|
c = nextChar
|
||||||
c := endOfFile
|
thisPos := pos
|
||||||
if pos < end {
|
pos += nextWidth
|
||||||
if bytestr == nil {
|
nextChar, nextWidth = i.step(pos)
|
||||||
c, charwidth = utf8.DecodeRuneInString(str[pos:end])
|
arena.atEOT = nextChar == endOfText
|
||||||
} else {
|
arena.atBOT = false
|
||||||
c, charwidth = utf8.DecodeRune(bytestr[pos:end])
|
arena.pos = pos
|
||||||
}
|
|
||||||
}
|
|
||||||
pos += charwidth
|
|
||||||
for _, st := range s[in] {
|
for _, st := range s[in] {
|
||||||
switch st.inst.kind {
|
switch st.inst.kind {
|
||||||
case iBOT:
|
case iBOT:
|
||||||
case iEOT:
|
case iEOT:
|
||||||
case iChar:
|
case iChar:
|
||||||
if c == st.inst.char {
|
if c == st.inst.char {
|
||||||
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
|
||||||
}
|
}
|
||||||
case iCharClass:
|
case iCharClass:
|
||||||
if st.inst.cclass.matches(c) {
|
if st.inst.cclass.matches(c) {
|
||||||
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
|
||||||
}
|
}
|
||||||
case iAny:
|
case iAny:
|
||||||
if c != endOfFile {
|
if c != endOfText {
|
||||||
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
|
||||||
}
|
}
|
||||||
case iNotNL:
|
case iNotNL:
|
||||||
if c != endOfFile && c != '\n' {
|
if c != endOfText && c != '\n' {
|
||||||
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match, pos, end)
|
s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match)
|
||||||
}
|
}
|
||||||
case iBra:
|
case iBra:
|
||||||
case iAlt:
|
case iAlt:
|
||||||
@ -831,13 +936,13 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
|
|||||||
// choose leftmost longest
|
// choose leftmost longest
|
||||||
if !found || // first
|
if !found || // first
|
||||||
st.match.m[0] < final.match.m[0] || // leftmost
|
st.match.m[0] < final.match.m[0] || // leftmost
|
||||||
(st.match.m[0] == final.match.m[0] && pos-charwidth > final.match.m[1]) { // longest
|
(st.match.m[0] == final.match.m[0] && thisPos > final.match.m[1]) { // longest
|
||||||
if final.match != nil {
|
if final.match != nil {
|
||||||
arena.free(final.match)
|
arena.free(final.match)
|
||||||
}
|
}
|
||||||
final = st
|
final = st
|
||||||
final.match.ref++
|
final.match.ref++
|
||||||
final.match.m[1] = pos - charwidth
|
final.match.m[1] = thisPos
|
||||||
}
|
}
|
||||||
found = true
|
found = true
|
||||||
default:
|
default:
|
||||||
@ -874,14 +979,31 @@ func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
|
|||||||
return string(c[:i]), true
|
return string(c[:i]), true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchReader returns whether the Regexp matches the text read by the
|
||||||
|
// RuneReader. The return value is a boolean: true for match, false for no
|
||||||
|
// match.
|
||||||
|
func (re *Regexp) MatchReader(r io.RuneReader) bool {
|
||||||
|
return len(re.doExecute(newInputReader(r), 0)) > 0
|
||||||
|
}
|
||||||
|
|
||||||
// MatchString returns whether the Regexp matches the string s.
|
// MatchString returns whether the Regexp matches the string s.
|
||||||
// The return value is a boolean: true for match, false for no match.
|
// The return value is a boolean: true for match, false for no match.
|
||||||
func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(s, nil, 0)) > 0 }
|
func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(newInputString(s), 0)) > 0 }
|
||||||
|
|
||||||
// Match returns whether the Regexp matches the byte slice b.
|
// Match returns whether the Regexp matches the byte slice b.
|
||||||
// The return value is a boolean: true for match, false for no match.
|
// The return value is a boolean: true for match, false for no match.
|
||||||
func (re *Regexp) Match(b []byte) bool { return len(re.doExecute("", b, 0)) > 0 }
|
func (re *Regexp) Match(b []byte) bool { return len(re.doExecute(newInputBytes(b), 0)) > 0 }
|
||||||
|
|
||||||
|
// MatchReader checks whether a textual regular expression matches the text
|
||||||
|
// read by the RuneReader. More complicated queries need to use Compile and
|
||||||
|
// the full Regexp interface.
|
||||||
|
func MatchReader(pattern string, r io.RuneReader) (matched bool, error os.Error) {
|
||||||
|
re, err := Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return re.MatchReader(r), nil
|
||||||
|
}
|
||||||
|
|
||||||
// MatchString checks whether a textual regular expression
|
// MatchString checks whether a textual regular expression
|
||||||
// matches a string. More complicated queries need
|
// matches a string. More complicated queries need
|
||||||
@ -921,7 +1043,7 @@ func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) str
|
|||||||
searchPos := 0 // position where we next look for a match
|
searchPos := 0 // position where we next look for a match
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
for searchPos <= len(src) {
|
for searchPos <= len(src) {
|
||||||
a := re.doExecute(src, nil, searchPos)
|
a := re.doExecute(newInputString(src), searchPos)
|
||||||
if len(a) == 0 {
|
if len(a) == 0 {
|
||||||
break // no more matches
|
break // no more matches
|
||||||
}
|
}
|
||||||
@ -973,7 +1095,7 @@ func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
|
|||||||
searchPos := 0 // position where we next look for a match
|
searchPos := 0 // position where we next look for a match
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
for searchPos <= len(src) {
|
for searchPos <= len(src) {
|
||||||
a := re.doExecute("", src, searchPos)
|
a := re.doExecute(newInputBytes(src), searchPos)
|
||||||
if len(a) == 0 {
|
if len(a) == 0 {
|
||||||
break // no more matches
|
break // no more matches
|
||||||
}
|
}
|
||||||
@ -1038,7 +1160,13 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
|
for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
|
||||||
matches := re.doExecute(s, b, pos)
|
var in input
|
||||||
|
if b == nil {
|
||||||
|
in = newInputString(s)
|
||||||
|
} else {
|
||||||
|
in = newInputBytes(b)
|
||||||
|
}
|
||||||
|
matches := re.doExecute(in, pos)
|
||||||
if len(matches) == 0 {
|
if len(matches) == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -1052,6 +1180,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
|
|||||||
accept = false
|
accept = false
|
||||||
}
|
}
|
||||||
var width int
|
var width int
|
||||||
|
// TODO: use step()
|
||||||
if b == nil {
|
if b == nil {
|
||||||
_, width = utf8.DecodeRuneInString(s[pos:end])
|
_, width = utf8.DecodeRuneInString(s[pos:end])
|
||||||
} else {
|
} else {
|
||||||
@ -1077,7 +1206,7 @@ func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) {
|
|||||||
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
|
// Find returns a slice holding the text of the leftmost match in b of the regular expression.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) Find(b []byte) []byte {
|
func (re *Regexp) Find(b []byte) []byte {
|
||||||
a := re.doExecute("", b, 0)
|
a := re.doExecute(newInputBytes(b), 0)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -1089,7 +1218,7 @@ func (re *Regexp) Find(b []byte) []byte {
|
|||||||
// b[loc[0]:loc[1]].
|
// b[loc[0]:loc[1]].
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
||||||
a := re.doExecute("", b, 0)
|
a := re.doExecute(newInputBytes(b), 0)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -1102,7 +1231,7 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) {
|
|||||||
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
// an empty string. Use FindStringIndex or FindStringSubmatch if it is
|
||||||
// necessary to distinguish these cases.
|
// necessary to distinguish these cases.
|
||||||
func (re *Regexp) FindString(s string) string {
|
func (re *Regexp) FindString(s string) string {
|
||||||
a := re.doExecute(s, nil, 0)
|
a := re.doExecute(newInputString(s), 0)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
@ -1114,7 +1243,19 @@ func (re *Regexp) FindString(s string) string {
|
|||||||
// itself is at s[loc[0]:loc[1]].
|
// itself is at s[loc[0]:loc[1]].
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindStringIndex(s string) []int {
|
func (re *Regexp) FindStringIndex(s string) []int {
|
||||||
a := re.doExecute(s, nil, 0)
|
a := re.doExecute(newInputString(s), 0)
|
||||||
|
if a == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return a[0:2]
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindReaderIndex returns a two-element slice of integers defining the
|
||||||
|
// location of the leftmost match of the regular expression in text read from
|
||||||
|
// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
|
||||||
|
// value of nil indicates no match.
|
||||||
|
func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
|
||||||
|
a := re.doExecute(newInputReader(r), 0)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -1127,7 +1268,7 @@ func (re *Regexp) FindStringIndex(s string) []int {
|
|||||||
// comment.
|
// comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
||||||
a := re.doExecute("", b, 0)
|
a := re.doExecute(newInputBytes(b), 0)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -1146,7 +1287,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
|||||||
// in the package comment.
|
// in the package comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
||||||
return re.doExecute("", b, 0)
|
return re.doExecute(newInputBytes(b), 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FindStringSubmatch returns a slice of strings holding the text of the
|
// FindStringSubmatch returns a slice of strings holding the text of the
|
||||||
@ -1155,7 +1296,7 @@ func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
|||||||
// package comment.
|
// package comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindStringSubmatch(s string) []string {
|
func (re *Regexp) FindStringSubmatch(s string) []string {
|
||||||
a := re.doExecute(s, nil, 0)
|
a := re.doExecute(newInputString(s), 0)
|
||||||
if a == nil {
|
if a == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -1174,7 +1315,16 @@ func (re *Regexp) FindStringSubmatch(s string) []string {
|
|||||||
// 'Index' descriptions in the package comment.
|
// 'Index' descriptions in the package comment.
|
||||||
// A return value of nil indicates no match.
|
// A return value of nil indicates no match.
|
||||||
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
||||||
return re.doExecute(s, nil, 0)
|
return re.doExecute(newInputString(s), 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindReaderSubmatchIndex returns a slice holding the index pairs
|
||||||
|
// identifying the leftmost match of the regular expression of text read by
|
||||||
|
// the RuneReader, and the matches, if any, of its subexpressions, as defined
|
||||||
|
// by the 'Submatch' and 'Index' descriptions in the package comment. A
|
||||||
|
// return value of nil indicates no match.
|
||||||
|
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
|
||||||
|
return re.doExecute(newInputReader(r), 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
const startSize = 10 // The size at which to start a slice in the 'All' routines.
|
const startSize = 10 // The size at which to start a slice in the 'All' routines.
|
||||||
|
Loading…
Reference in New Issue
Block a user