mirror of
https://github.com/golang/go
synced 2024-11-22 03:14:41 -07:00
exp/regexp/syntax: import all RE2 parse tests + fix bugs
R=r CC=golang-dev https://golang.org/cl/4952061
This commit is contained in:
parent
940932056e
commit
177dca77e1
@ -181,11 +181,29 @@ func (p *parser) maybeConcat(r int, flags Flags) bool {
|
|||||||
func (p *parser) newLiteral(r int, flags Flags) *Regexp {
|
func (p *parser) newLiteral(r int, flags Flags) *Regexp {
|
||||||
re := p.newRegexp(OpLiteral)
|
re := p.newRegexp(OpLiteral)
|
||||||
re.Flags = flags
|
re.Flags = flags
|
||||||
|
if flags&FoldCase != 0 {
|
||||||
|
r = minFoldRune(r)
|
||||||
|
}
|
||||||
re.Rune0[0] = r
|
re.Rune0[0] = r
|
||||||
re.Rune = re.Rune0[:1]
|
re.Rune = re.Rune0[:1]
|
||||||
return re
|
return re
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// minFoldRune returns the minimum rune fold-equivalent to r.
|
||||||
|
func minFoldRune(r int) int {
|
||||||
|
if r < minFold || r > maxFold {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
min := r
|
||||||
|
r0 := r
|
||||||
|
for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
|
||||||
|
if min > r {
|
||||||
|
min = r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
|
||||||
// literal pushes a literal regexp for the rune r on the stack
|
// literal pushes a literal regexp for the rune r on the stack
|
||||||
// and returns that regexp.
|
// and returns that regexp.
|
||||||
func (p *parser) literal(r int) {
|
func (p *parser) literal(r int) {
|
||||||
@ -202,25 +220,29 @@ func (p *parser) op(op Op) *Regexp {
|
|||||||
|
|
||||||
// repeat replaces the top stack element with itself repeated
|
// repeat replaces the top stack element with itself repeated
|
||||||
// according to op.
|
// according to op.
|
||||||
func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (string, os.Error) {
|
func (p *parser) repeat(op Op, min, max int, whole, opstr, t, lastRepeat string) (string, string, os.Error) {
|
||||||
flags := p.flags
|
flags := p.flags
|
||||||
if p.flags&PerlX != 0 {
|
if p.flags&PerlX != 0 {
|
||||||
if len(t) > 0 && t[0] == '?' {
|
if len(t) > 0 && t[0] == '?' {
|
||||||
t = t[1:]
|
t = t[1:]
|
||||||
|
opstr = whole[:len(opstr)+1]
|
||||||
flags ^= NonGreedy
|
flags ^= NonGreedy
|
||||||
}
|
}
|
||||||
if lastRepeat != "" {
|
if lastRepeat != "" {
|
||||||
// In Perl it is not allowed to stack repetition operators:
|
// In Perl it is not allowed to stack repetition operators:
|
||||||
// a** is a syntax error, not a doubled star, and a++ means
|
// a** is a syntax error, not a doubled star, and a++ means
|
||||||
// something else entirely, which we don't support!
|
// something else entirely, which we don't support!
|
||||||
return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(t)]}
|
return "", "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(t)]}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
n := len(p.stack)
|
n := len(p.stack)
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
return "", &Error{ErrMissingRepeatArgument, opstr}
|
return "", "", &Error{ErrMissingRepeatArgument, opstr}
|
||||||
}
|
}
|
||||||
sub := p.stack[n-1]
|
sub := p.stack[n-1]
|
||||||
|
if sub.Op >= opPseudo {
|
||||||
|
return "", "", &Error{ErrMissingRepeatArgument, opstr}
|
||||||
|
}
|
||||||
re := p.newRegexp(op)
|
re := p.newRegexp(op)
|
||||||
re.Min = min
|
re.Min = min
|
||||||
re.Max = max
|
re.Max = max
|
||||||
@ -228,7 +250,7 @@ func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (strin
|
|||||||
re.Sub = re.Sub0[:1]
|
re.Sub = re.Sub0[:1]
|
||||||
re.Sub[0] = sub
|
re.Sub[0] = sub
|
||||||
p.stack[n-1] = re
|
p.stack[n-1] = re
|
||||||
return t, nil
|
return t, opstr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
|
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
|
||||||
@ -712,7 +734,7 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
|
|||||||
case '?':
|
case '?':
|
||||||
op = OpQuest
|
op = OpQuest
|
||||||
}
|
}
|
||||||
if t, err = p.repeat(op, min, max, t[:1], t[1:], lastRepeat); err != nil {
|
if t, repeat, err = p.repeat(op, min, max, t, t[:1], t[1:], lastRepeat); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
case '{':
|
case '{':
|
||||||
@ -724,7 +746,12 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
|
|||||||
t = t[1:]
|
t = t[1:]
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if t, err = p.repeat(op, min, max, t[:len(t)-len(tt)], tt, lastRepeat); err != nil {
|
opstr := t[:len(t)-len(tt)]
|
||||||
|
if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
|
||||||
|
// Numbers were too big, or max is present and min > max.
|
||||||
|
return nil, &Error{ErrInvalidRepeatSize, opstr}
|
||||||
|
}
|
||||||
|
if t, repeat, err = p.repeat(op, min, max, t, opstr, tt, lastRepeat); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
case '\\':
|
case '\\':
|
||||||
@ -815,12 +842,14 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
|
|||||||
|
|
||||||
// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}.
|
// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}.
|
||||||
// If s is not of that form, it returns ok == false.
|
// If s is not of that form, it returns ok == false.
|
||||||
|
// If s has the right form but the values are too big, it returns min == -1, ok == true.
|
||||||
func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
|
func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
|
||||||
if s == "" || s[0] != '{' {
|
if s == "" || s[0] != '{' {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
s = s[1:]
|
s = s[1:]
|
||||||
if min, s, ok = p.parseInt(s); !ok {
|
var ok1 bool
|
||||||
|
if min, s, ok1 = p.parseInt(s); !ok1 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if s == "" {
|
if s == "" {
|
||||||
@ -835,8 +864,11 @@ func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
|
|||||||
}
|
}
|
||||||
if s[0] == '}' {
|
if s[0] == '}' {
|
||||||
max = -1
|
max = -1
|
||||||
} else if max, s, ok = p.parseInt(s); !ok {
|
} else if max, s, ok1 = p.parseInt(s); !ok1 {
|
||||||
return
|
return
|
||||||
|
} else if max < 0 {
|
||||||
|
// parseInt found too big a number
|
||||||
|
min = -1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if s == "" || s[0] != '}' {
|
if s == "" || s[0] != '}' {
|
||||||
@ -981,16 +1013,22 @@ func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
|
|||||||
if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
|
if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
t := s
|
||||||
for s != "" && '0' <= s[0] && s[0] <= '9' {
|
for s != "" && '0' <= s[0] && s[0] <= '9' {
|
||||||
// Avoid overflow.
|
|
||||||
if n >= 1e8 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
n = n*10 + int(s[0]) - '0'
|
|
||||||
s = s[1:]
|
s = s[1:]
|
||||||
}
|
}
|
||||||
rest = s
|
rest = s
|
||||||
ok = true
|
ok = true
|
||||||
|
// Have digits, compute value.
|
||||||
|
t = t[:len(t)-len(s)]
|
||||||
|
for i := 0; i < len(t); i++ {
|
||||||
|
// Avoid overflow.
|
||||||
|
if n >= 1e8 {
|
||||||
|
n = -1
|
||||||
|
break
|
||||||
|
}
|
||||||
|
n = n*10 + int(t[i]) - '0'
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1125,6 +1163,8 @@ func (p *parser) parseRightParen() os.Error {
|
|||||||
if re2.Op != opLeftParen {
|
if re2.Op != opLeftParen {
|
||||||
return &Error{ErrMissingParen, p.wholeRegexp}
|
return &Error{ErrMissingParen, p.wholeRegexp}
|
||||||
}
|
}
|
||||||
|
// Restore flags at time of paren.
|
||||||
|
p.flags = re2.Flags
|
||||||
if re2.Cap == 0 {
|
if re2.Cap == 0 {
|
||||||
// Just for grouping.
|
// Just for grouping.
|
||||||
p.push(re1)
|
p.push(re1)
|
||||||
@ -1330,9 +1370,18 @@ func (p *parser) appendGroup(r []int, g charGroup) []int {
|
|||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var anyTable = &unicode.RangeTable{
|
||||||
|
[]unicode.Range16{{0, 1<<16 - 1, 1}},
|
||||||
|
[]unicode.Range32{{1 << 16, unicode.MaxRune, 1}},
|
||||||
|
}
|
||||||
|
|
||||||
// unicodeTable returns the unicode.RangeTable identified by name
|
// unicodeTable returns the unicode.RangeTable identified by name
|
||||||
// and the table of additional fold-equivalent code points.
|
// and the table of additional fold-equivalent code points.
|
||||||
func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
|
func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
|
||||||
|
// Special case: "Any" means any.
|
||||||
|
if name == "Any" {
|
||||||
|
return anyTable, anyTable
|
||||||
|
}
|
||||||
if t := unicode.Categories[name]; t != nil {
|
if t := unicode.Categories[name]; t != nil {
|
||||||
return t, unicode.FoldCategory[name]
|
return t, unicode.FoldCategory[name]
|
||||||
}
|
}
|
||||||
|
@ -11,10 +11,12 @@ import (
|
|||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
|
||||||
var parseTests = []struct {
|
type parseTest struct {
|
||||||
Regexp string
|
Regexp string
|
||||||
Dump string
|
Dump string
|
||||||
}{
|
}
|
||||||
|
|
||||||
|
var parseTests = []parseTest{
|
||||||
// Base cases
|
// Base cases
|
||||||
{`a`, `lit{a}`},
|
{`a`, `lit{a}`},
|
||||||
{`a.`, `cat{lit{a}dot{}}`},
|
{`a.`, `cat{lit{a}dot{}}`},
|
||||||
@ -38,6 +40,12 @@ var parseTests = []struct {
|
|||||||
{`a{2}?`, `nrep{2,2 lit{a}}`},
|
{`a{2}?`, `nrep{2,2 lit{a}}`},
|
||||||
{`a{2,3}?`, `nrep{2,3 lit{a}}`},
|
{`a{2,3}?`, `nrep{2,3 lit{a}}`},
|
||||||
{`a{2,}?`, `nrep{2,-1 lit{a}}`},
|
{`a{2,}?`, `nrep{2,-1 lit{a}}`},
|
||||||
|
// Malformed { } are treated as literals.
|
||||||
|
{`x{1001`, `str{x{1001}`},
|
||||||
|
{`x{9876543210`, `str{x{9876543210}`},
|
||||||
|
{`x{9876543210,`, `str{x{9876543210,}`},
|
||||||
|
{`x{2,1`, `str{x{2,1}`},
|
||||||
|
{`x{1,9876543210`, `str{x{1,9876543210}`},
|
||||||
{``, `emp{}`},
|
{``, `emp{}`},
|
||||||
{`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
|
{`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
|
||||||
{`|x|`, `alt{emp{}lit{x}emp{}}`},
|
{`|x|`, `alt{emp{}lit{x}emp{}}`},
|
||||||
@ -101,6 +109,8 @@ var parseTests = []struct {
|
|||||||
{`\p{Lu}`, mkCharClass(unicode.IsUpper)},
|
{`\p{Lu}`, mkCharClass(unicode.IsUpper)},
|
||||||
{`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
|
{`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
|
||||||
{`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
|
{`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
|
||||||
|
{`\p{Any}`, `dot{}`},
|
||||||
|
{`\p{^Any}`, `cc{}`},
|
||||||
|
|
||||||
// Hex, octal.
|
// Hex, octal.
|
||||||
{`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
|
{`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
|
||||||
@ -174,14 +184,80 @@ var parseTests = []struct {
|
|||||||
{`(?-s).`, `dnl{}`},
|
{`(?-s).`, `dnl{}`},
|
||||||
{`(?:(?:^).)`, `cat{bol{}dot{}}`},
|
{`(?:(?:^).)`, `cat{bol{}dot{}}`},
|
||||||
{`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
|
{`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
|
||||||
|
|
||||||
|
// RE2 prefix_tests
|
||||||
|
{`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
|
||||||
|
{`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`},
|
||||||
|
{`abc|abd|aef|bcx|bcy`,
|
||||||
|
`alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` +
|
||||||
|
`cat{str{bc}cc{0x78-0x79}}}`},
|
||||||
|
{`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`},
|
||||||
|
{`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`},
|
||||||
|
{`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`},
|
||||||
|
{`(?:xx|yy)c|(?:xx|yy)d`,
|
||||||
|
`cat{alt{str{xx}str{yy}}cc{0x63-0x64}}`},
|
||||||
|
{`x{2}|x{2}[0-9]`,
|
||||||
|
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
|
||||||
|
{`x{2}y|x{2}[0-9]y`,
|
||||||
|
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
|
||||||
}
|
}
|
||||||
|
|
||||||
const testFlags = MatchNL | PerlX | UnicodeGroups
|
const testFlags = MatchNL | PerlX | UnicodeGroups
|
||||||
|
|
||||||
|
func TestParseSimple(t *testing.T) {
|
||||||
|
testParseDump(t, parseTests, testFlags)
|
||||||
|
}
|
||||||
|
|
||||||
|
var foldcaseTests = []parseTest{
|
||||||
|
{`AbCdE`, `strfold{ABCDE}`},
|
||||||
|
{`[Aa]`, `litfold{A}`},
|
||||||
|
{`a`, `litfold{A}`},
|
||||||
|
|
||||||
|
// 0x17F is an old English long s (looks like an f) and folds to s.
|
||||||
|
// 0x212A is the Kelvin symbol and folds to k.
|
||||||
|
{`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...]
|
||||||
|
{`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||||||
|
{`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseFoldCase(t *testing.T) {
|
||||||
|
testParseDump(t, foldcaseTests, FoldCase)
|
||||||
|
}
|
||||||
|
|
||||||
|
var literalTests = []parseTest{
|
||||||
|
{"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseLiteral(t *testing.T) {
|
||||||
|
testParseDump(t, literalTests, Literal)
|
||||||
|
}
|
||||||
|
|
||||||
|
var matchnlTests = []parseTest{
|
||||||
|
{`.`, `dot{}`},
|
||||||
|
{"\n", "lit{\n}"},
|
||||||
|
{`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
|
||||||
|
{`[a\n]`, `cc{0xa 0x61}`},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseMatchNL(t *testing.T) {
|
||||||
|
testParseDump(t, matchnlTests, MatchNL)
|
||||||
|
}
|
||||||
|
|
||||||
|
var nomatchnlTests = []parseTest{
|
||||||
|
{`.`, `dnl{}`},
|
||||||
|
{"\n", "lit{\n}"},
|
||||||
|
{`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`},
|
||||||
|
{`[a\n]`, `cc{0xa 0x61}`},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseNoMatchNL(t *testing.T) {
|
||||||
|
testParseDump(t, nomatchnlTests, 0)
|
||||||
|
}
|
||||||
|
|
||||||
// Test Parse -> Dump.
|
// Test Parse -> Dump.
|
||||||
func TestParseDump(t *testing.T) {
|
func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
|
||||||
for _, tt := range parseTests {
|
for _, tt := range tests {
|
||||||
re, err := Parse(tt.Regexp, testFlags)
|
re, err := Parse(tt.Regexp, flags)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
||||||
continue
|
continue
|
||||||
@ -360,3 +436,115 @@ func TestAppendRangeCollapse(t *testing.T) {
|
|||||||
t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
|
t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var invalidRegexps = []string{
|
||||||
|
`(`,
|
||||||
|
`)`,
|
||||||
|
`(a`,
|
||||||
|
`(a|b|`,
|
||||||
|
`(a|b`,
|
||||||
|
`[a-z`,
|
||||||
|
`([a-z)`,
|
||||||
|
`x{1001}`,
|
||||||
|
`x{9876543210}`,
|
||||||
|
`x{2,1}`,
|
||||||
|
`x{1,9876543210}`,
|
||||||
|
"\xff", // Invalid UTF-8
|
||||||
|
"[\xff]",
|
||||||
|
"[\\\xff]",
|
||||||
|
"\\\xff",
|
||||||
|
`(?P<name>a`,
|
||||||
|
`(?P<name>`,
|
||||||
|
`(?P<name`,
|
||||||
|
`(?P<x y>a)`,
|
||||||
|
`(?P<>a)`,
|
||||||
|
`[a-Z]`,
|
||||||
|
`(?i)[a-Z]`,
|
||||||
|
`a{100000}`,
|
||||||
|
`a{100000,}`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var onlyPerl = []string{
|
||||||
|
`[a-b-c]`,
|
||||||
|
`\Qabc\E`,
|
||||||
|
`\Q*+?{[\E`,
|
||||||
|
`\Q\\E`,
|
||||||
|
`\Q\\\E`,
|
||||||
|
`\Q\\\\E`,
|
||||||
|
`\Q\\\\\E`,
|
||||||
|
`(?:a)`,
|
||||||
|
`(?P<name>a)`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var onlyPOSIX = []string{
|
||||||
|
"a++",
|
||||||
|
"a**",
|
||||||
|
"a?*",
|
||||||
|
"a+*",
|
||||||
|
"a{1}*",
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseInvalidRegexps(t *testing.T) {
|
||||||
|
for _, regexp := range invalidRegexps {
|
||||||
|
if re, err := Parse(regexp, Perl); err == nil {
|
||||||
|
t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
|
||||||
|
}
|
||||||
|
if re, err := Parse(regexp, POSIX); err == nil {
|
||||||
|
t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, regexp := range onlyPerl {
|
||||||
|
if _, err := Parse(regexp, Perl); err != nil {
|
||||||
|
t.Errorf("Parse(%#q, Perl): %v", regexp, err)
|
||||||
|
}
|
||||||
|
if re, err := Parse(regexp, POSIX); err == nil {
|
||||||
|
t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, regexp := range onlyPOSIX {
|
||||||
|
if re, err := Parse(regexp, Perl); err == nil {
|
||||||
|
t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
|
||||||
|
}
|
||||||
|
if _, err := Parse(regexp, POSIX); err != nil {
|
||||||
|
t.Errorf("Parse(%#q, POSIX): %v", regexp, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestToStringEquivalentParse(t *testing.T) {
|
||||||
|
for _, tt := range parseTests {
|
||||||
|
re, err := Parse(tt.Regexp, testFlags)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
d := dump(re)
|
||||||
|
if d != tt.Dump {
|
||||||
|
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
s := re.String()
|
||||||
|
if s != tt.Regexp {
|
||||||
|
// If ToString didn't return the original regexp,
|
||||||
|
// it must have found one with fewer parens.
|
||||||
|
// Unfortunately we can't check the length here, because
|
||||||
|
// ToString produces "\\{" for a literal brace,
|
||||||
|
// but "{" is a shorter equivalent in some contexts.
|
||||||
|
nre, err := Parse(s, testFlags)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, t, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
nd := dump(nre)
|
||||||
|
if d != nd {
|
||||||
|
t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd)
|
||||||
|
}
|
||||||
|
|
||||||
|
ns := nre.String()
|
||||||
|
if s != ns {
|
||||||
|
t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -57,7 +57,7 @@ func EmptyOpContext(r1, r2 int) EmptyOp {
|
|||||||
op |= EmptyBeginLine
|
op |= EmptyBeginLine
|
||||||
}
|
}
|
||||||
if r2 < 0 {
|
if r2 < 0 {
|
||||||
op |= EmptyEndText
|
op |= EmptyEndText | EmptyEndLine
|
||||||
}
|
}
|
||||||
if r2 == '\n' {
|
if r2 == '\n' {
|
||||||
op |= EmptyEndLine
|
op |= EmptyEndLine
|
||||||
|
@ -164,9 +164,9 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
|||||||
}
|
}
|
||||||
b.WriteRune(']')
|
b.WriteRune(']')
|
||||||
case OpAnyCharNotNL:
|
case OpAnyCharNotNL:
|
||||||
b.WriteString(`[^\n]`)
|
b.WriteString(`(?-s:.)`)
|
||||||
case OpAnyChar:
|
case OpAnyChar:
|
||||||
b.WriteRune('.')
|
b.WriteString(`(?s:.)`)
|
||||||
case OpBeginLine:
|
case OpBeginLine:
|
||||||
b.WriteRune('^')
|
b.WriteRune('^')
|
||||||
case OpEndLine:
|
case OpEndLine:
|
||||||
@ -174,7 +174,11 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
|||||||
case OpBeginText:
|
case OpBeginText:
|
||||||
b.WriteString(`\A`)
|
b.WriteString(`\A`)
|
||||||
case OpEndText:
|
case OpEndText:
|
||||||
b.WriteString(`\z`)
|
if re.Flags&WasDollar != 0 {
|
||||||
|
b.WriteString(`(?-m:$)`)
|
||||||
|
} else {
|
||||||
|
b.WriteString(`\z`)
|
||||||
|
}
|
||||||
case OpWordBoundary:
|
case OpWordBoundary:
|
||||||
b.WriteString(`\b`)
|
b.WriteString(`\b`)
|
||||||
case OpNoWordBoundary:
|
case OpNoWordBoundary:
|
||||||
@ -192,7 +196,7 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
|||||||
}
|
}
|
||||||
b.WriteRune(')')
|
b.WriteRune(')')
|
||||||
case OpStar, OpPlus, OpQuest, OpRepeat:
|
case OpStar, OpPlus, OpQuest, OpRepeat:
|
||||||
if sub := re.Sub[0]; sub.Op > OpCapture {
|
if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
|
||||||
b.WriteString(`(?:`)
|
b.WriteString(`(?:`)
|
||||||
writeRegexp(b, sub)
|
writeRegexp(b, sub)
|
||||||
b.WriteString(`)`)
|
b.WriteString(`)`)
|
||||||
@ -217,6 +221,9 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
|
|||||||
}
|
}
|
||||||
b.WriteRune('}')
|
b.WriteRune('}')
|
||||||
}
|
}
|
||||||
|
if re.Flags&NonGreedy != 0 {
|
||||||
|
b.WriteRune('?')
|
||||||
|
}
|
||||||
case OpConcat:
|
case OpConcat:
|
||||||
for _, sub := range re.Sub {
|
for _, sub := range re.Sub {
|
||||||
if sub.Op == OpAlternate {
|
if sub.Op == OpAlternate {
|
||||||
|
@ -18,7 +18,7 @@ var simplifyTests = []struct {
|
|||||||
{`(ab)*`, `(ab)*`},
|
{`(ab)*`, `(ab)*`},
|
||||||
{`(ab)+`, `(ab)+`},
|
{`(ab)+`, `(ab)+`},
|
||||||
{`(ab)?`, `(ab)?`},
|
{`(ab)?`, `(ab)?`},
|
||||||
{`.`, `.`},
|
{`.`, `(?s:.)`},
|
||||||
{`^`, `^`},
|
{`^`, `^`},
|
||||||
{`$`, `$`},
|
{`$`, `$`},
|
||||||
{`[ac]`, `[ac]`},
|
{`[ac]`, `[ac]`},
|
||||||
@ -97,22 +97,22 @@ var simplifyTests = []struct {
|
|||||||
{`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
|
{`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
|
||||||
|
|
||||||
// Full character classes
|
// Full character classes
|
||||||
{`[[:cntrl:][:^cntrl:]]`, `.`},
|
{`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},
|
||||||
|
|
||||||
// Unicode case folding.
|
// Unicode case folding.
|
||||||
{`(?i)A`, `(?i:A)`},
|
{`(?i)A`, `(?i:A)`},
|
||||||
{`(?i)a`, `(?i:a)`},
|
{`(?i)a`, `(?i:A)`},
|
||||||
{`(?i)[A]`, `(?i:A)`},
|
{`(?i)[A]`, `(?i:A)`},
|
||||||
{`(?i)[a]`, `(?i:A)`},
|
{`(?i)[a]`, `(?i:A)`},
|
||||||
{`(?i)K`, `(?i:K)`},
|
{`(?i)K`, `(?i:K)`},
|
||||||
{`(?i)k`, `(?i:k)`},
|
{`(?i)k`, `(?i:K)`},
|
||||||
{`(?i)\x{212a}`, "(?i:\u212A)"},
|
{`(?i)\x{212a}`, "(?i:K)"},
|
||||||
{`(?i)[K]`, "[Kk\u212A]"},
|
{`(?i)[K]`, "[Kk\u212A]"},
|
||||||
{`(?i)[k]`, "[Kk\u212A]"},
|
{`(?i)[k]`, "[Kk\u212A]"},
|
||||||
{`(?i)[\x{212a}]`, "[Kk\u212A]"},
|
{`(?i)[\x{212a}]`, "[Kk\u212A]"},
|
||||||
{`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
|
{`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
|
||||||
{`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
|
{`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
|
||||||
{`(?i)[\x00-\x{10FFFF}]`, `.`},
|
{`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},
|
||||||
|
|
||||||
// Empty string as a regular expression.
|
// Empty string as a regular expression.
|
||||||
// The empty string must be preserved inside parens in order
|
// The empty string must be preserved inside parens in order
|
||||||
|
Loading…
Reference in New Issue
Block a user