1
0
mirror of https://github.com/golang/go synced 2024-11-21 21:54:40 -07:00

exp/regexp: add CompilePOSIX, more tests

R=r
CC=golang-dev
https://golang.org/cl/4967060
This commit is contained in:
Russ Cox 2011-09-08 14:49:51 -04:00
parent 177dca77e1
commit 21e671dee6
11 changed files with 6837 additions and 29 deletions

View File

@ -6,9 +6,12 @@ package regexp
import (
"bufio"
"compress/gzip"
"compress/bzip2"
"exp/regexp/syntax"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
@ -59,24 +62,34 @@ import (
// At time of writing, re2.txt is 32 MB but compresses to 760 kB,
// so we store re2.txt.gz in the repository and decompress it on the fly.
//
func TestRE2(t *testing.T) {
func TestRE2Search(t *testing.T) {
testRE2(t, "testdata/re2-search.txt")
}
func TestRE2Exhaustive(t *testing.T) {
if testing.Short() {
t.Log("skipping TestRE2 during short test")
t.Log("skipping TestRE2Exhaustive during short test")
return
}
testRE2(t, "testdata/re2-exhaustive.txt.bz2")
}
f, err := os.Open("re2.txt.gz")
func testRE2(t *testing.T, file string) {
f, err := os.Open(file)
if err != nil {
t.Fatal(err)
}
defer f.Close()
gz, err := gzip.NewReader(f)
if err != nil {
t.Fatalf("decompress re2.txt.gz: %v", err)
var txt io.Reader
if strings.HasSuffix(file, ".bz2") {
z := bzip2.NewReader(f)
txt = z
file = file[:len(file)-len(".bz2")] // for error messages
} else {
txt = f
}
defer gz.Close()
lineno := 0
r := bufio.NewReader(gz)
r := bufio.NewReader(txt)
var (
str []string
input []string
@ -92,13 +105,13 @@ func TestRE2(t *testing.T) {
if err == os.EOF {
break
}
t.Fatalf("re2.txt:%d: %v", lineno, err)
t.Fatalf("%s:%d: %v", file, lineno, err)
}
line = line[:len(line)-1] // chop \n
lineno++
switch {
case line == "":
t.Fatalf("re2.txt:%d: unexpected blank line", lineno)
t.Fatalf("%s:%d: unexpected blank line", file, lineno)
case line[0] == '#':
continue
case 'A' <= line[0] && line[0] <= 'Z':
@ -114,7 +127,7 @@ func TestRE2(t *testing.T) {
q, err := strconv.Unquote(line)
if err != nil {
// Fatal because we'll get out of sync.
t.Fatalf("re2.txt:%d: unquote %s: %v", lineno, line, err)
t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
}
if inStrings {
str = append(str, q)
@ -122,7 +135,7 @@ func TestRE2(t *testing.T) {
}
// Is a regexp.
if len(input) != 0 {
t.Fatalf("re2.txt:%d: out of sync: have %d strings left before %#q", lineno, len(input), q)
t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
}
re, err = tryCompile(q)
if err != nil {
@ -130,7 +143,7 @@ func TestRE2(t *testing.T) {
// We don't and likely never will support \C; keep going.
continue
}
t.Errorf("re2.txt:%d: compile %#q: %v", lineno, q, err)
t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
if nfail++; nfail >= 100 {
t.Fatalf("stopping after %d errors", nfail)
}
@ -140,7 +153,7 @@ func TestRE2(t *testing.T) {
refull, err = tryCompile(full)
if err != nil {
// Fatal because q worked, so this should always work.
t.Fatalf("re2.txt:%d: compile full %#q: %v", lineno, full, err)
t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
}
input = str
case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
@ -151,7 +164,7 @@ func TestRE2(t *testing.T) {
continue
}
if len(input) == 0 {
t.Fatalf("re2.txt:%d: out of sync: no input remaining", lineno)
t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
}
var text string
text, input = input[0], input[1:]
@ -165,13 +178,13 @@ func TestRE2(t *testing.T) {
}
res := strings.Split(line, ";")
if len(res) != len(run) {
t.Fatalf("re2.txt:%d: have %d test results, want %d", lineno, len(res), len(run))
t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
}
for i := range res {
have, suffix := run[i](re, refull, text)
want := parseResult(t, lineno, res[i])
want := parseResult(t, file, lineno, res[i])
if !same(have, want) {
t.Errorf("re2.txt:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, suffix, text, have, want)
t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
if nfail++; nfail >= 100 {
t.Fatalf("stopping after %d errors", nfail)
}
@ -179,7 +192,7 @@ func TestRE2(t *testing.T) {
}
b, suffix := match[i](re, refull, text)
if b != (want != nil) {
t.Errorf("re2.txt:%d: %#q%s.MatchString(%#q) = %v, want %v", lineno, re, suffix, text, b, !b)
t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
if nfail++; nfail >= 100 {
t.Fatalf("stopping after %d errors", nfail)
}
@ -188,11 +201,11 @@ func TestRE2(t *testing.T) {
}
default:
t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line)
t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
}
}
if len(input) != 0 {
t.Fatalf("re2.txt:%d: out of sync: have %d strings left at EOF", lineno, len(input))
t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
}
t.Logf("%d cases tested", ncase)
}
@ -270,7 +283,7 @@ func tryCompile(s string) (re *Regexp, err os.Error) {
return Compile(s)
}
func parseResult(t *testing.T, lineno int, res string) []int {
func parseResult(t *testing.T, file string, lineno int, res string) []int {
// A single - indicates no match.
if res == "-" {
return nil
@ -295,12 +308,12 @@ func parseResult(t *testing.T, lineno int, res string) []int {
} else {
k := strings.Index(pair, "-")
if k < 0 {
t.Fatalf("re2.txt:%d: invalid pair %s", lineno, pair)
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
}
lo, err1 := strconv.Atoi(pair[:k])
hi, err2 := strconv.Atoi(pair[k+1:])
if err1 != nil || err2 != nil || lo > hi {
t.Fatalf("re2.txt:%d: invalid pair %s", lineno, pair)
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
}
out[n] = lo
out[n+1] = hi
@ -323,3 +336,314 @@ func same(x, y []int) bool {
}
return true
}
// TestFowler runs this package's regexp API against the
// POSIX regular expression tests collected by Glenn Fowler
// at http://www2.research.att.com/~gsf/testregex/.
func TestFowler(t *testing.T) {
files, err := filepath.Glob("testdata/*.dat")
if err != nil {
t.Fatal(err)
}
for _, file := range files {
t.Log(file)
testFowler(t, file)
}
}
var notab = MustCompile(`[^\t]+`)
func testFowler(t *testing.T, file string) {
f, err := os.Open(file)
if err != nil {
t.Error(err)
return
}
defer f.Close()
b := bufio.NewReader(f)
lineno := 0
lastRegexp := ""
Reading:
for {
lineno++
line, err := b.ReadString('\n')
if err != nil {
if err != os.EOF {
t.Errorf("%s:%d: %v", file, lineno, err)
}
break Reading
}
// http://www2.research.att.com/~gsf/man/man1/testregex.html
//
// INPUT FORMAT
// Input lines may be blank, a comment beginning with #, or a test
// specification. A specification is five fields separated by one
// or more tabs. NULL denotes the empty string and NIL denotes the
// 0 pointer.
if line[0] == '#' || line[0] == '\n' {
continue Reading
}
line = line[:len(line)-1]
field := notab.FindAllString(line, -1)
for i, f := range field {
if f == "NULL" {
field[i] = ""
}
if f == "NIL" {
t.Logf("%s:%d: skip: %s", file, lineno, line)
continue Reading
}
}
if len(field) == 0 {
continue Reading
}
// Field 1: the regex(3) flags to apply, one character per REG_feature
// flag. The test is skipped if REG_feature is not supported by the
// implementation. If the first character is not [BEASKLP] then the
// specification is a global control line. One or more of [BEASKLP] may be
// specified; the test will be repeated for each mode.
//
// B basic BRE (grep, ed, sed)
// E REG_EXTENDED ERE (egrep)
// A REG_AUGMENTED ARE (egrep with negation)
// S REG_SHELL SRE (sh glob)
// K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
// L REG_LITERAL LRE (fgrep)
//
// a REG_LEFT|REG_RIGHT implicit ^...$
// b REG_NOTBOL lhs does not match ^
// c REG_COMMENT ignore space and #...\n
// d REG_SHELL_DOT explicit leading . match
// e REG_NOTEOL rhs does not match $
// f REG_MULTIPLE multiple \n separated patterns
// g FNM_LEADING_DIR testfnmatch only -- match until /
// h REG_MULTIREF multiple digit backref
// i REG_ICASE ignore case
// j REG_SPAN . matches \n
// k REG_ESCAPE \ to ecape [...] delimiter
// l REG_LEFT implicit ^...
// m REG_MINIMAL minimal match
// n REG_NEWLINE explicit \n match
// o REG_ENCLOSED (|&) magic inside [@|&](...)
// p REG_SHELL_PATH explicit / match
// q REG_DELIMITED delimited pattern
// r REG_RIGHT implicit ...$
// s REG_SHELL_ESCAPED \ not special
// t REG_MUSTDELIM all delimiters must be specified
// u standard unspecified behavior -- errors not counted
// v REG_CLASS_ESCAPE \ special inside [...]
// w REG_NOSUB no subexpression match array
// x REG_LENIENT let some errors slide
// y REG_LEFT regexec() implicit ^...
// z REG_NULL NULL subexpressions ok
// $ expand C \c escapes in fields 2 and 3
// / field 2 is a regsubcomp() expression
// = field 3 is a regdecomp() expression
//
// Field 1 control lines:
//
// C set LC_COLLATE and LC_CTYPE to locale in field 2
//
// ?test ... output field 5 if passed and != EXPECTED, silent otherwise
// &test ... output field 5 if current and previous passed
// |test ... output field 5 if current passed and previous failed
// ; ... output field 2 if previous failed
// {test ... skip if failed until }
// } end of skip
//
// : comment comment copied as output NOTE
// :comment:test :comment: ignored
// N[OTE] comment comment copied as output NOTE
// T[EST] comment comment
//
// number use number for nmatch (20 by default)
flag := field[0]
switch flag[0] {
case '?', '&', '|', ';', '{', '}':
// Ignore all the control operators.
// Just run everything.
flag = flag[1:]
if flag == "" {
continue Reading
}
case ':':
i := strings.Index(flag[1:], ":")
if i < 0 {
t.Logf("skip: %s", line)
continue Reading
}
flag = flag[1+i+1:]
case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
t.Logf("skip: %s", line)
continue Reading
}
// Can check field count now that we've handled the myriad comment formats.
if len(field) < 4 {
t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
continue Reading
}
// Expand C escapes (a.k.a. Go escapes).
if strings.Contains(flag, "$") {
f := `"` + field[1] + `"`
if field[1], err = strconv.Unquote(f); err != nil {
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
}
f = `"` + field[2] + `"`
if field[2], err = strconv.Unquote(f); err != nil {
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
}
}
// Field 2: the regular expression pattern; SAME uses the pattern from
// the previous specification.
//
if field[1] == "SAME" {
field[1] = lastRegexp
}
lastRegexp = field[1]
// Field 3: the string to match.
text := field[2]
// Field 4: the test outcome...
ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
if !ok {
t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
continue Reading
}
// Field 5: optional comment appended to the report.
Testing:
// Run test once for each specified capital letter mode that we support.
for _, c := range flag {
pattern := field[1]
syn := syntax.POSIX | syntax.ClassNL
switch c {
default:
continue Testing
case 'E':
// extended regexp (what we support)
case 'L':
// literal
pattern = QuoteMeta(pattern)
}
for _, c := range flag {
switch c {
case 'i':
syn |= syntax.FoldCase
}
}
re, err := compile(pattern, syn, true)
if err != nil {
if shouldCompile {
t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
}
continue Testing
}
if !shouldCompile {
t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
continue Testing
}
match := re.MatchString(text)
if match != shouldMatch {
t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
continue Testing
}
have := re.FindStringSubmatchIndex(text)
if (len(have) > 0) != match {
t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, text, have)
continue Testing
}
if len(have) > len(pos) {
have = have[:len(pos)]
}
if !same(have, pos) {
t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
}
}
}
}
func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
// Field 4: the test outcome. This is either one of the posix error
// codes (with REG_ omitted) or the match array, a list of (m,n)
// entries with m and n being first and last+1 positions in the
// field 3 string, or NULL if REG_NOSUB is in effect and success
// is expected. BADPAT is acceptable in place of any regcomp(3)
// error code. The match[] array is initialized to (-2,-2) before
// each test. All array elements from 0 to nmatch-1 must be specified
// in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
// Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
// matched (?{...}) expression, where x is the text enclosed by {...},
// o is the expression ordinal counting from 1, and n is the length of
// the unmatched portion of the subject string. If x starts with a
// number then that is the return value of re_execf(), otherwise 0 is
// returned.
switch {
case s == "":
// Match with no position information.
ok = true
compiled = true
matched = true
return
case s == "NOMATCH":
// Match failure.
ok = true
compiled = true
matched = false
return
case 'A' <= s[0] && s[0] <= 'Z':
// All the other error codes are compile errors.
ok = true
compiled = false
return
}
compiled = true
var x []int
for s != "" {
var end byte = ')'
if len(x)%2 == 0 {
if s[0] != '(' {
ok = false
return
}
s = s[1:]
end = ','
}
i := 0
for i < len(s) && s[i] != end {
i++
}
if i == 0 || i == len(s) {
ok = false
return
}
var v = -1
var err os.Error
if s[:i] != "?" {
v, err = strconv.Atoi(s[:i])
if err != nil {
ok = false
return
}
}
x = append(x, v)
s = s[i+1:]
}
if len(x)%2 != 0 {
ok = false
return
}
ok = true
matched = true
pos = x
return
}

View File

@ -98,6 +98,15 @@ var findTests = []FindTest{
{`\B`, "x y", nil},
{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
// RE2 tests
{`[^\S\s]`, "abcd", nil},
{`[^\S[:space:]]`, "abcd", nil},
{`[^\D\d]`, "abcd", nil},
{`[^\D[:digit:]]`, "abcd", nil},
{`(?i)\W`, "x", nil},
{`(?i)\W`, "k", nil},
{`(?i)\W`, "s", nil},
// can backslash-escape any punctuation
{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},

Binary file not shown.

View File

@ -97,10 +97,45 @@ func (re *Regexp) String() string {
return re.expr
}
// Compile parses a regular expression and returns, if successful, a Regexp
// object that can be used to match against text.
// Compile parses a regular expression and returns, if successful,
// a Regexp object that can be used to match against text.
//
// When matching against text, the regexp returns a match that
// begins as early as possible in the input (leftmost), and among those
// it chooses the one that a backtracking search would have found first.
// This so-called leftmost-first matching is the same semantics
// that Perl, Python, and other implementations use, although this
// package implements it without the expense of backtracking.
// For POSIX leftmost-longest matching, see CompilePOSIX.
func Compile(expr string) (*Regexp, os.Error) {
re, err := syntax.Parse(expr, syntax.Perl)
return compile(expr, syntax.Perl, false)
}
// CompilePOSIX is like Compile but restricts the regular expression
// to POSIX ERE (egrep) syntax and changes the match semantics to
// leftmost-longest.
//
// That is, when matching against text, the regexp returns a match that
// begins as early as possible in the input (leftmost), and among those
// it chooses a match that is as long as possible.
// This so-called leftmost-longest matching is the same semantics
// that early regular expression implementations used and that POSIX
// specifies.
//
// However, there can be multiple leftmost-longest matches, with different
// submatch choices, and here this package diverges from POSIX.
// Among the possible leftmost-longest matches, this package chooses
// the one that a backtracking search would have found first, while POSIX
// specifies that the match be chosen to maximize the length of the first
// subexpression, then the second, and so on from left to right.
// The POSIX rule is computationally prohibitive and not even well-defined.
// See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
func CompilePOSIX(expr string) (*Regexp, os.Error) {
return compile(expr, syntax.POSIX, true)
}
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, os.Error) {
re, err := syntax.Parse(expr, mode)
if err != nil {
return nil, err
}
@ -114,6 +149,8 @@ func Compile(expr string) (*Regexp, os.Error) {
expr: expr,
prog: prog,
numSubexp: maxCap,
cond: prog.StartCond(),
longest: longest,
}
regexp.prefix, regexp.prefixComplete = prog.Prefix()
if regexp.prefix != "" {
@ -122,7 +159,6 @@ func Compile(expr string) (*Regexp, os.Error) {
regexp.prefixBytes = []byte(regexp.prefix)
regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
}
regexp.cond = prog.StartCond()
return regexp, nil
}

23
src/pkg/exp/regexp/testdata/README vendored Normal file
View File

@ -0,0 +1,23 @@
AT&T POSIX Test Files
See textregex.c for copyright + license.
testregex.c http://www2.research.att.com/~gsf/testregex/testregex.c
basic.dat http://www2.research.att.com/~gsf/testregex/basic.dat
nullsubexpr.dat http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat
repetition.dat http://www2.research.att.com/~gsf/testregex/repetition.dat
The test data has been edited to reflect RE2/Go differences:
* In a star of a possibly empty match like (a*)* matching x,
the no match case runs the starred subexpression zero times,
not once. This is consistent with (a*)* matching a, which
runs the starred subexpression one time, not twice.
* The submatch choice is first match, not the POSIX rule.
Such changes are marked with 'RE2/Go'.
RE2 Test Files
re2-exhaustive.txt.bz2 and re2-search.txt are built by running
'make log' in the RE2 distribution. http://code.google.com/p/re2/.
The exhaustive file is compressed because it is huge.

221
src/pkg/exp/regexp/testdata/basic.dat vendored Normal file
View File

@ -0,0 +1,221 @@
NOTE all standard compliant implementations should pass these : 2002-05-31
BE abracadabra$ abracadabracadabra (7,18)
BE a...b abababbb (2,7)
BE XXXXXX ..XXXXXX (2,8)
E \) () (1,2)
BE a] a]a (0,2)
B } } (0,1)
E \} } (0,1)
BE \] ] (0,1)
B ] ] (0,1)
E ] ] (0,1)
B { { (0,1)
B } } (0,1)
BE ^a ax (0,1)
BE \^a a^a (1,3)
BE a\^ a^ (0,2)
BE a$ aa (1,2)
BE a\$ a$ (0,2)
BE ^$ NULL (0,0)
E $^ NULL (0,0)
E a($) aa (1,2)(2,2)
E a*(^a) aa (0,1)(0,1)
E (..)*(...)* a (0,0)
E (..)*(...)* abcd (0,4)(2,4)
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
E (ab)c|abc abc (0,3)(0,2)
E a{0}b ab (1,2)
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E a{9876543210} NULL BADBR
E ((a|a)|a) a (0,1)(0,1)(0,1)
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
E a*(a.|aa) aaaa (0,4)(2,4)
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
E (a|b)?.* b (0,1)(0,1)
E (a|b)c|a(b|c) ac (0,2)(0,1)
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
E (a|b)*c|(a|ab)*c xc (1,2)
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
E a?(ab|ba)ab abab (0,4)(0,2)
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
E ab|abab abbabab (0,2)
E aba|bab|bba baaabbbaba (5,8)
E aba|bab baaabbbaba (6,9)
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
E ab|a xabc (1,3)
E ab|a xxabc (2,4)
Ei (Ab|cD)* aBcD (0,4)(2,4)
BE [^-] --a (2,3)
BE [a-]* --a (0,3)
BE [a-m-]* --amoma-- (0,4)
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
{E [[:upper:]] A (0,1) [[<element>]] not supported
E [[:lower:]]+ `az{ (1,3)
E [[:upper:]]+ @AZ[ (1,3)
# No collation in Go
#BE [[-]] [[-]] (2,4)
#BE [[.NIL.]] NULL ECOLLATE
#BE [[=aleph=]] NULL ECOLLATE
}
BE$ \n \n (0,1)
BEn$ \n \n (0,1)
BE$ [^a] \n (0,1)
BE$ \na \na (0,2)
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
BE xxx xxx (0,3)
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
BE$ .* \x01\xff (0,2)
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
E a*a*a*a*a*b aaaaaaaaab (0,10)
BE ^ NULL (0,0)
BE $ NULL (0,0)
BE ^$ NULL (0,0)
BE ^a$ a (0,1)
BE abc abc (0,3)
BE abc xabcy (1,4)
BE abc ababc (2,5)
BE ab*c abc (0,3)
BE ab*bc abc (0,3)
BE ab*bc abbc (0,4)
BE ab*bc abbbbc (0,6)
E ab+bc abbc (0,4)
E ab+bc abbbbc (0,6)
E ab?bc abbc (0,4)
E ab?bc abc (0,3)
E ab?c abc (0,3)
BE ^abc$ abc (0,3)
BE ^abc abcc (0,3)
BE abc$ aabc (1,4)
BE ^ abc (0,0)
BE $ abc (3,3)
BE a.c abc (0,3)
BE a.c axc (0,3)
BE a.*c axyzc (0,5)
BE a[bc]d abd (0,3)
BE a[b-d]e ace (0,3)
BE a[b-d] aac (1,3)
BE a[-b] a- (0,2)
BE a[b-] a- (0,2)
BE a] a] (0,2)
BE a[]]b a]b (0,3)
BE a[^bc]d aed (0,3)
BE a[^-b]c adc (0,3)
BE a[^]b]c adc (0,3)
E ab|cd abc (0,2)
E ab|cd abcd (0,2)
E a\(b a(b (0,3)
E a\(*b ab (0,2)
E a\(*b a((b (0,4)
E ((a)) abc (0,1)(0,1)(0,1)
E (a)b(c) abc (0,3)(0,1)(2,3)
E a+b+c aabbabc (4,7)
E a* aaa (0,3)
#E (a*)* - (0,0)(0,0)
E (a*)* - (0,0)(?,?) RE2/Go
E (a*)+ - (0,0)(0,0)
#E (a*|b)* - (0,0)(0,0)
E (a*|b)* - (0,0)(?,?) RE2/Go
E (a+|b)* ab (0,2)(1,2)
E (a+|b)+ ab (0,2)(1,2)
E (a+|b)? ab (0,1)(0,1)
BE [^ab]* cde (0,3)
#E (^)* - (0,0)(0,0)
E (^)* - (0,0)(?,?) RE2/Go
BE a* NULL (0,0)
E ([abc])*d abbbcd (0,6)(4,5)
E ([abc])*bcd abcd (0,4)(0,1)
E a|b|c|d|e e (0,1)
E (a|b|c|d|e)f ef (0,2)(0,1)
#E ((a*|b))* - (0,0)(0,0)(0,0)
E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
BE abcd*efg abcdefg (0,7)
BE ab* xabyabbbz (1,3)
BE ab* xayabbbz (1,2)
E (ab|cd)e abcde (2,5)(2,4)
BE [abhgefdc]ij hij (0,3)
E (a|b)c*d abcd (1,4)(1,2)
E (ab|ab*)bc abc (0,3)(0,1)
E a([bc]*)c* abc (0,3)(1,3)
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
E a[bcd]*dcdcde adcdcde (0,7)
E (ab|a)b*c abc (0,3)(0,2)
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
E ^a(bc+|b[eh])g|.h$ abh (1,3)
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
BE multiple words multiple words yeah (0,14)
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
BE abcd abcd (0,4)
E a(bc)d abcd (0,4)(1,3)
E a[-]?c ac (0,3)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
E a+(b|c)*d+ aabcdd (0,6)(3,4)
E ^.+$ vivi (0,4)
E ^(.+)$ vivi (0,4)(0,4)
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
E (foo|(bar))!bas foo!bas (0,7)(0,3)
E (foo|bar)!bas bar!bas (0,7)(0,3)
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
E (foo|bar)!bas foo!bas (0,7)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
E .*(/XXX).* /XXX (0,4)(0,4)
E .*(\\XXX).* \XXX (0,4)(0,4)
E \\XXX \XXX (0,4)
E .*(/000).* /000 (0,4)(0,4)
E .*(\\000).* \000 (0,4)(0,4)
E \\000 \000 (0,4)

View File

@ -0,0 +1,79 @@
NOTE null subexpression matches : 2002-06-06
E (a*)* a (0,1)(0,1)
#E SAME x (0,0)(0,0)
E SAME x (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)* a (0,1)(0,1)
E SAME x (0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)+ a (0,1)(0,1)
E SAME x NOMATCH
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)* a (0,1)(0,1)
#E SAME x (0,0)(0,0)
E SAME x (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([^b]*)* a (0,1)(0,1)
#E SAME b (0,0)(0,0)
E SAME b (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaab (0,6)(0,6)
E ([ab]*)* a (0,1)(0,1)
E SAME aaaaaa (0,6)(0,6)
E SAME ababab (0,6)(0,6)
E SAME bababa (0,6)(0,6)
E SAME b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
E SAME aaaabcde (0,5)(0,5)
E ([^a]*)* b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
#E SAME aaaaaa (0,0)(0,0)
E SAME aaaaaa (0,0)(?,?) RE2/Go
E ([^ab]*)* ccccxx (0,6)(0,6)
#E SAME ababab (0,0)(0,0)
E SAME ababab (0,0)(?,?) RE2/Go
E ((z)+|a)* zabcde (0,2)(1,2)
#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
#E (a) aaa (0,1)(0,1)
#E (a*?) aaa (0,0)(0,0)
#E (a)*? aaa (0,0)
#E (a*?)*? aaa (0,0)
#}
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
#E (a*)*(x) x (0,1)(0,0)(0,1)
E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
E (a*)*(x) ax (0,2)(0,1)(1,2)
E (a*)*(x) axa (0,2)(0,1)(1,2)
E (a*)+(x) x (0,1)(0,0)(0,1)
E (a*)+(x) ax (0,2)(0,1)(1,2)
E (a*)+(x) axa (0,2)(0,1)(1,2)
E (a*){2}(x) x (0,1)(0,0)(0,1)
E (a*){2}(x) ax (0,2)(1,1)(1,2)
E (a*){2}(x) axa (0,2)(1,1)(1,2)

Binary file not shown.

3667
src/pkg/exp/regexp/testdata/re2-search.txt vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,163 @@
NOTE implicit vs. explicit repetitions : 2009-02-02
# Glenn Fowler <gsf@research.att.com>
# conforming matches (column 4) must match one of the following BREs
# NOMATCH
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
# i.e., each 3-tuple has two identical elements and one (?,?)
E ((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.)){1} NULL NOMATCH
E ((..)|(.)){2} NULL NOMATCH
E ((..)|(.)){3} NULL NOMATCH
E ((..)|(.))* NULL (0,0)
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)){2} a NOMATCH
E ((..)|(.)){3} a NOMATCH
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
E ((..)|(.)){3} aa NOMATCH
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
# Linux/GLIBC gets the {8,} and {8,8} wrong.
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
# These test a fixed bug in my regex-tdfa that did not keep the expanded
# form properly grouped, so right association did the wrong thing with
# these ambiguous patterns (crafted just to test my code when I became
# suspicious of my implementation). The first subexpression should use
# "ab" then "a" then "bcd".
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
# results like (0,6)(4,5)(6,6).
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
# The above worked on Linux/GLIBC but the following often fail.
# They also trip up OS X / FreeBSD / NetBSD:
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go

2286
src/pkg/exp/regexp/testdata/testregex.c vendored Normal file

File diff suppressed because it is too large Load Diff