mirror of
https://github.com/golang/go
synced 2024-11-25 01:08:02 -07:00
regexp: add SubexpNames
Fixes #2440. R=r, dsymonds CC=golang-dev https://golang.org/cl/5559043
This commit is contained in:
parent
e3e93b0f43
commit
21d3721eb8
@ -289,30 +289,45 @@ func TestLiteralPrefix(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type numSubexpCase struct {
|
type subexpCase struct {
|
||||||
input string
|
input string
|
||||||
expected int
|
num int
|
||||||
|
names []string
|
||||||
}
|
}
|
||||||
|
|
||||||
var numSubexpCases = []numSubexpCase{
|
var subexpCases = []subexpCase{
|
||||||
{``, 0},
|
{``, 0, nil},
|
||||||
{`.*`, 0},
|
{`.*`, 0, nil},
|
||||||
{`abba`, 0},
|
{`abba`, 0, nil},
|
||||||
{`ab(b)a`, 1},
|
{`ab(b)a`, 1, []string{"", ""}},
|
||||||
{`ab(.*)a`, 1},
|
{`ab(.*)a`, 1, []string{"", ""}},
|
||||||
{`(.*)ab(.*)a`, 2},
|
{`(.*)ab(.*)a`, 2, []string{"", "", ""}},
|
||||||
{`(.*)(ab)(.*)a`, 3},
|
{`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}},
|
||||||
{`(.*)((a)b)(.*)a`, 4},
|
{`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}},
|
||||||
{`(.*)(\(ab)(.*)a`, 3},
|
{`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}},
|
||||||
{`(.*)(\(a\)b)(.*)a`, 3},
|
{`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}},
|
||||||
|
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNumSubexp(t *testing.T) {
|
func TestSubexp(t *testing.T) {
|
||||||
for _, c := range numSubexpCases {
|
for _, c := range subexpCases {
|
||||||
re := MustCompile(c.input)
|
re := MustCompile(c.input)
|
||||||
n := re.NumSubexp()
|
n := re.NumSubexp()
|
||||||
if n != c.expected {
|
if n != c.num {
|
||||||
t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected)
|
t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
names := re.SubexpNames()
|
||||||
|
if len(names) != 1+n {
|
||||||
|
t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if c.names != nil {
|
||||||
|
for i := 0; i < 1+n; i++ {
|
||||||
|
if names[i] != c.names[i] {
|
||||||
|
t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -85,6 +85,7 @@ type Regexp struct {
|
|||||||
prefixRune rune // first rune in prefix
|
prefixRune rune // first rune in prefix
|
||||||
cond syntax.EmptyOp // empty-width conditions required at start of match
|
cond syntax.EmptyOp // empty-width conditions required at start of match
|
||||||
numSubexp int
|
numSubexp int
|
||||||
|
subexpNames []string
|
||||||
longest bool
|
longest bool
|
||||||
|
|
||||||
// cache of machines for running regexp
|
// cache of machines for running regexp
|
||||||
@ -140,17 +141,20 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
maxCap := re.MaxCap()
|
maxCap := re.MaxCap()
|
||||||
|
capNames := re.CapNames()
|
||||||
|
|
||||||
re = re.Simplify()
|
re = re.Simplify()
|
||||||
prog, err := syntax.Compile(re)
|
prog, err := syntax.Compile(re)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
regexp := &Regexp{
|
regexp := &Regexp{
|
||||||
expr: expr,
|
expr: expr,
|
||||||
prog: prog,
|
prog: prog,
|
||||||
numSubexp: maxCap,
|
numSubexp: maxCap,
|
||||||
cond: prog.StartCond(),
|
subexpNames: capNames,
|
||||||
longest: longest,
|
cond: prog.StartCond(),
|
||||||
|
longest: longest,
|
||||||
}
|
}
|
||||||
regexp.prefix, regexp.prefixComplete = prog.Prefix()
|
regexp.prefix, regexp.prefixComplete = prog.Prefix()
|
||||||
if regexp.prefix != "" {
|
if regexp.prefix != "" {
|
||||||
@ -223,6 +227,15 @@ func (re *Regexp) NumSubexp() int {
|
|||||||
return re.numSubexp
|
return re.numSubexp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SubexpNames returns the names of the parenthesized subexpressions
|
||||||
|
// in this Regexp. The name for the first sub-expression is names[1],
|
||||||
|
// so that if m is a match slice, the name for m[i] is SubexpNames()[i].
|
||||||
|
// Since the Regexp as a whole cannot be named, names[0] is always
|
||||||
|
// the empty string. The slice should not be modified.
|
||||||
|
func (re *Regexp) SubexpNames() []string {
|
||||||
|
return re.subexpNames
|
||||||
|
}
|
||||||
|
|
||||||
const endOfText rune = -1
|
const endOfText rune = -1
|
||||||
|
|
||||||
// input abstracts different representations of the input text. It provides
|
// input abstracts different representations of the input text. It provides
|
||||||
|
@ -303,3 +303,19 @@ func (re *Regexp) MaxCap() int {
|
|||||||
}
|
}
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CapNames walks the regexp to find the names of capturing groups.
|
||||||
|
func (re *Regexp) CapNames() []string {
|
||||||
|
names := make([]string, re.MaxCap()+1)
|
||||||
|
re.capNames(names)
|
||||||
|
return names
|
||||||
|
}
|
||||||
|
|
||||||
|
func (re *Regexp) capNames(names []string) {
|
||||||
|
if re.Op == OpCapture {
|
||||||
|
names[re.Cap] = re.Name
|
||||||
|
}
|
||||||
|
for _, sub := range re.Sub {
|
||||||
|
sub.capNames(names)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user