mirror of
https://github.com/golang/go
synced 2024-11-21 21:54:40 -07:00
exp/regexp: add CompilePOSIX, more tests
R=r CC=golang-dev https://golang.org/cl/4967060
This commit is contained in:
parent
177dca77e1
commit
21e671dee6
@ -6,9 +6,12 @@ package regexp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"compress/bzip2"
|
||||
"exp/regexp/syntax"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -59,24 +62,34 @@ import (
|
||||
// At time of writing, re2.txt is 32 MB but compresses to 760 kB,
|
||||
// so we store re2.txt.gz in the repository and decompress it on the fly.
|
||||
//
|
||||
func TestRE2(t *testing.T) {
|
||||
func TestRE2Search(t *testing.T) {
|
||||
testRE2(t, "testdata/re2-search.txt")
|
||||
}
|
||||
|
||||
func TestRE2Exhaustive(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Log("skipping TestRE2 during short test")
|
||||
t.Log("skipping TestRE2Exhaustive during short test")
|
||||
return
|
||||
}
|
||||
testRE2(t, "testdata/re2-exhaustive.txt.bz2")
|
||||
}
|
||||
|
||||
f, err := os.Open("re2.txt.gz")
|
||||
func testRE2(t *testing.T, file string) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
gz, err := gzip.NewReader(f)
|
||||
if err != nil {
|
||||
t.Fatalf("decompress re2.txt.gz: %v", err)
|
||||
var txt io.Reader
|
||||
if strings.HasSuffix(file, ".bz2") {
|
||||
z := bzip2.NewReader(f)
|
||||
txt = z
|
||||
file = file[:len(file)-len(".bz2")] // for error messages
|
||||
} else {
|
||||
txt = f
|
||||
}
|
||||
defer gz.Close()
|
||||
lineno := 0
|
||||
r := bufio.NewReader(gz)
|
||||
r := bufio.NewReader(txt)
|
||||
var (
|
||||
str []string
|
||||
input []string
|
||||
@ -92,13 +105,13 @@ func TestRE2(t *testing.T) {
|
||||
if err == os.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatalf("re2.txt:%d: %v", lineno, err)
|
||||
t.Fatalf("%s:%d: %v", file, lineno, err)
|
||||
}
|
||||
line = line[:len(line)-1] // chop \n
|
||||
lineno++
|
||||
switch {
|
||||
case line == "":
|
||||
t.Fatalf("re2.txt:%d: unexpected blank line", lineno)
|
||||
t.Fatalf("%s:%d: unexpected blank line", file, lineno)
|
||||
case line[0] == '#':
|
||||
continue
|
||||
case 'A' <= line[0] && line[0] <= 'Z':
|
||||
@ -114,7 +127,7 @@ func TestRE2(t *testing.T) {
|
||||
q, err := strconv.Unquote(line)
|
||||
if err != nil {
|
||||
// Fatal because we'll get out of sync.
|
||||
t.Fatalf("re2.txt:%d: unquote %s: %v", lineno, line, err)
|
||||
t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
|
||||
}
|
||||
if inStrings {
|
||||
str = append(str, q)
|
||||
@ -122,7 +135,7 @@ func TestRE2(t *testing.T) {
|
||||
}
|
||||
// Is a regexp.
|
||||
if len(input) != 0 {
|
||||
t.Fatalf("re2.txt:%d: out of sync: have %d strings left before %#q", lineno, len(input), q)
|
||||
t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
|
||||
}
|
||||
re, err = tryCompile(q)
|
||||
if err != nil {
|
||||
@ -130,7 +143,7 @@ func TestRE2(t *testing.T) {
|
||||
// We don't and likely never will support \C; keep going.
|
||||
continue
|
||||
}
|
||||
t.Errorf("re2.txt:%d: compile %#q: %v", lineno, q, err)
|
||||
t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
|
||||
if nfail++; nfail >= 100 {
|
||||
t.Fatalf("stopping after %d errors", nfail)
|
||||
}
|
||||
@ -140,7 +153,7 @@ func TestRE2(t *testing.T) {
|
||||
refull, err = tryCompile(full)
|
||||
if err != nil {
|
||||
// Fatal because q worked, so this should always work.
|
||||
t.Fatalf("re2.txt:%d: compile full %#q: %v", lineno, full, err)
|
||||
t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
|
||||
}
|
||||
input = str
|
||||
case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
|
||||
@ -151,7 +164,7 @@ func TestRE2(t *testing.T) {
|
||||
continue
|
||||
}
|
||||
if len(input) == 0 {
|
||||
t.Fatalf("re2.txt:%d: out of sync: no input remaining", lineno)
|
||||
t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
|
||||
}
|
||||
var text string
|
||||
text, input = input[0], input[1:]
|
||||
@ -165,13 +178,13 @@ func TestRE2(t *testing.T) {
|
||||
}
|
||||
res := strings.Split(line, ";")
|
||||
if len(res) != len(run) {
|
||||
t.Fatalf("re2.txt:%d: have %d test results, want %d", lineno, len(res), len(run))
|
||||
t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
|
||||
}
|
||||
for i := range res {
|
||||
have, suffix := run[i](re, refull, text)
|
||||
want := parseResult(t, lineno, res[i])
|
||||
want := parseResult(t, file, lineno, res[i])
|
||||
if !same(have, want) {
|
||||
t.Errorf("re2.txt:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, suffix, text, have, want)
|
||||
t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
|
||||
if nfail++; nfail >= 100 {
|
||||
t.Fatalf("stopping after %d errors", nfail)
|
||||
}
|
||||
@ -179,7 +192,7 @@ func TestRE2(t *testing.T) {
|
||||
}
|
||||
b, suffix := match[i](re, refull, text)
|
||||
if b != (want != nil) {
|
||||
t.Errorf("re2.txt:%d: %#q%s.MatchString(%#q) = %v, want %v", lineno, re, suffix, text, b, !b)
|
||||
t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
|
||||
if nfail++; nfail >= 100 {
|
||||
t.Fatalf("stopping after %d errors", nfail)
|
||||
}
|
||||
@ -188,11 +201,11 @@ func TestRE2(t *testing.T) {
|
||||
}
|
||||
|
||||
default:
|
||||
t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line)
|
||||
t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
|
||||
}
|
||||
}
|
||||
if len(input) != 0 {
|
||||
t.Fatalf("re2.txt:%d: out of sync: have %d strings left at EOF", lineno, len(input))
|
||||
t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
|
||||
}
|
||||
t.Logf("%d cases tested", ncase)
|
||||
}
|
||||
@ -270,7 +283,7 @@ func tryCompile(s string) (re *Regexp, err os.Error) {
|
||||
return Compile(s)
|
||||
}
|
||||
|
||||
func parseResult(t *testing.T, lineno int, res string) []int {
|
||||
func parseResult(t *testing.T, file string, lineno int, res string) []int {
|
||||
// A single - indicates no match.
|
||||
if res == "-" {
|
||||
return nil
|
||||
@ -295,12 +308,12 @@ func parseResult(t *testing.T, lineno int, res string) []int {
|
||||
} else {
|
||||
k := strings.Index(pair, "-")
|
||||
if k < 0 {
|
||||
t.Fatalf("re2.txt:%d: invalid pair %s", lineno, pair)
|
||||
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
|
||||
}
|
||||
lo, err1 := strconv.Atoi(pair[:k])
|
||||
hi, err2 := strconv.Atoi(pair[k+1:])
|
||||
if err1 != nil || err2 != nil || lo > hi {
|
||||
t.Fatalf("re2.txt:%d: invalid pair %s", lineno, pair)
|
||||
t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
|
||||
}
|
||||
out[n] = lo
|
||||
out[n+1] = hi
|
||||
@ -323,3 +336,314 @@ func same(x, y []int) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// TestFowler runs this package's regexp API against the
|
||||
// POSIX regular expression tests collected by Glenn Fowler
|
||||
// at http://www2.research.att.com/~gsf/testregex/.
|
||||
func TestFowler(t *testing.T) {
|
||||
files, err := filepath.Glob("testdata/*.dat")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, file := range files {
|
||||
t.Log(file)
|
||||
testFowler(t, file)
|
||||
}
|
||||
}
|
||||
|
||||
var notab = MustCompile(`[^\t]+`)
|
||||
|
||||
func testFowler(t *testing.T, file string) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
b := bufio.NewReader(f)
|
||||
lineno := 0
|
||||
lastRegexp := ""
|
||||
Reading:
|
||||
for {
|
||||
lineno++
|
||||
line, err := b.ReadString('\n')
|
||||
if err != nil {
|
||||
if err != os.EOF {
|
||||
t.Errorf("%s:%d: %v", file, lineno, err)
|
||||
}
|
||||
break Reading
|
||||
}
|
||||
|
||||
// http://www2.research.att.com/~gsf/man/man1/testregex.html
|
||||
//
|
||||
// INPUT FORMAT
|
||||
// Input lines may be blank, a comment beginning with #, or a test
|
||||
// specification. A specification is five fields separated by one
|
||||
// or more tabs. NULL denotes the empty string and NIL denotes the
|
||||
// 0 pointer.
|
||||
if line[0] == '#' || line[0] == '\n' {
|
||||
continue Reading
|
||||
}
|
||||
line = line[:len(line)-1]
|
||||
field := notab.FindAllString(line, -1)
|
||||
for i, f := range field {
|
||||
if f == "NULL" {
|
||||
field[i] = ""
|
||||
}
|
||||
if f == "NIL" {
|
||||
t.Logf("%s:%d: skip: %s", file, lineno, line)
|
||||
continue Reading
|
||||
}
|
||||
}
|
||||
if len(field) == 0 {
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Field 1: the regex(3) flags to apply, one character per REG_feature
|
||||
// flag. The test is skipped if REG_feature is not supported by the
|
||||
// implementation. If the first character is not [BEASKLP] then the
|
||||
// specification is a global control line. One or more of [BEASKLP] may be
|
||||
// specified; the test will be repeated for each mode.
|
||||
//
|
||||
// B basic BRE (grep, ed, sed)
|
||||
// E REG_EXTENDED ERE (egrep)
|
||||
// A REG_AUGMENTED ARE (egrep with negation)
|
||||
// S REG_SHELL SRE (sh glob)
|
||||
// K REG_SHELL|REG_AUGMENTED KRE (ksh glob)
|
||||
// L REG_LITERAL LRE (fgrep)
|
||||
//
|
||||
// a REG_LEFT|REG_RIGHT implicit ^...$
|
||||
// b REG_NOTBOL lhs does not match ^
|
||||
// c REG_COMMENT ignore space and #...\n
|
||||
// d REG_SHELL_DOT explicit leading . match
|
||||
// e REG_NOTEOL rhs does not match $
|
||||
// f REG_MULTIPLE multiple \n separated patterns
|
||||
// g FNM_LEADING_DIR testfnmatch only -- match until /
|
||||
// h REG_MULTIREF multiple digit backref
|
||||
// i REG_ICASE ignore case
|
||||
// j REG_SPAN . matches \n
|
||||
// k REG_ESCAPE \ to ecape [...] delimiter
|
||||
// l REG_LEFT implicit ^...
|
||||
// m REG_MINIMAL minimal match
|
||||
// n REG_NEWLINE explicit \n match
|
||||
// o REG_ENCLOSED (|&) magic inside [@|&](...)
|
||||
// p REG_SHELL_PATH explicit / match
|
||||
// q REG_DELIMITED delimited pattern
|
||||
// r REG_RIGHT implicit ...$
|
||||
// s REG_SHELL_ESCAPED \ not special
|
||||
// t REG_MUSTDELIM all delimiters must be specified
|
||||
// u standard unspecified behavior -- errors not counted
|
||||
// v REG_CLASS_ESCAPE \ special inside [...]
|
||||
// w REG_NOSUB no subexpression match array
|
||||
// x REG_LENIENT let some errors slide
|
||||
// y REG_LEFT regexec() implicit ^...
|
||||
// z REG_NULL NULL subexpressions ok
|
||||
// $ expand C \c escapes in fields 2 and 3
|
||||
// / field 2 is a regsubcomp() expression
|
||||
// = field 3 is a regdecomp() expression
|
||||
//
|
||||
// Field 1 control lines:
|
||||
//
|
||||
// C set LC_COLLATE and LC_CTYPE to locale in field 2
|
||||
//
|
||||
// ?test ... output field 5 if passed and != EXPECTED, silent otherwise
|
||||
// &test ... output field 5 if current and previous passed
|
||||
// |test ... output field 5 if current passed and previous failed
|
||||
// ; ... output field 2 if previous failed
|
||||
// {test ... skip if failed until }
|
||||
// } end of skip
|
||||
//
|
||||
// : comment comment copied as output NOTE
|
||||
// :comment:test :comment: ignored
|
||||
// N[OTE] comment comment copied as output NOTE
|
||||
// T[EST] comment comment
|
||||
//
|
||||
// number use number for nmatch (20 by default)
|
||||
flag := field[0]
|
||||
switch flag[0] {
|
||||
case '?', '&', '|', ';', '{', '}':
|
||||
// Ignore all the control operators.
|
||||
// Just run everything.
|
||||
flag = flag[1:]
|
||||
if flag == "" {
|
||||
continue Reading
|
||||
}
|
||||
case ':':
|
||||
i := strings.Index(flag[1:], ":")
|
||||
if i < 0 {
|
||||
t.Logf("skip: %s", line)
|
||||
continue Reading
|
||||
}
|
||||
flag = flag[1+i+1:]
|
||||
case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
t.Logf("skip: %s", line)
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Can check field count now that we've handled the myriad comment formats.
|
||||
if len(field) < 4 {
|
||||
t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Expand C escapes (a.k.a. Go escapes).
|
||||
if strings.Contains(flag, "$") {
|
||||
f := `"` + field[1] + `"`
|
||||
if field[1], err = strconv.Unquote(f); err != nil {
|
||||
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
|
||||
}
|
||||
f = `"` + field[2] + `"`
|
||||
if field[2], err = strconv.Unquote(f); err != nil {
|
||||
t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
|
||||
}
|
||||
}
|
||||
|
||||
// Field 2: the regular expression pattern; SAME uses the pattern from
|
||||
// the previous specification.
|
||||
//
|
||||
if field[1] == "SAME" {
|
||||
field[1] = lastRegexp
|
||||
}
|
||||
lastRegexp = field[1]
|
||||
|
||||
// Field 3: the string to match.
|
||||
text := field[2]
|
||||
|
||||
// Field 4: the test outcome...
|
||||
ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
|
||||
if !ok {
|
||||
t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
|
||||
continue Reading
|
||||
}
|
||||
|
||||
// Field 5: optional comment appended to the report.
|
||||
|
||||
Testing:
|
||||
// Run test once for each specified capital letter mode that we support.
|
||||
for _, c := range flag {
|
||||
pattern := field[1]
|
||||
syn := syntax.POSIX | syntax.ClassNL
|
||||
switch c {
|
||||
default:
|
||||
continue Testing
|
||||
case 'E':
|
||||
// extended regexp (what we support)
|
||||
case 'L':
|
||||
// literal
|
||||
pattern = QuoteMeta(pattern)
|
||||
}
|
||||
|
||||
for _, c := range flag {
|
||||
switch c {
|
||||
case 'i':
|
||||
syn |= syntax.FoldCase
|
||||
}
|
||||
}
|
||||
|
||||
re, err := compile(pattern, syn, true)
|
||||
if err != nil {
|
||||
if shouldCompile {
|
||||
t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
|
||||
}
|
||||
continue Testing
|
||||
}
|
||||
if !shouldCompile {
|
||||
t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
|
||||
continue Testing
|
||||
}
|
||||
match := re.MatchString(text)
|
||||
if match != shouldMatch {
|
||||
t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
|
||||
continue Testing
|
||||
}
|
||||
have := re.FindStringSubmatchIndex(text)
|
||||
if (len(have) > 0) != match {
|
||||
t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, text, have)
|
||||
continue Testing
|
||||
}
|
||||
if len(have) > len(pos) {
|
||||
have = have[:len(pos)]
|
||||
}
|
||||
if !same(have, pos) {
|
||||
t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
|
||||
// Field 4: the test outcome. This is either one of the posix error
|
||||
// codes (with REG_ omitted) or the match array, a list of (m,n)
|
||||
// entries with m and n being first and last+1 positions in the
|
||||
// field 3 string, or NULL if REG_NOSUB is in effect and success
|
||||
// is expected. BADPAT is acceptable in place of any regcomp(3)
|
||||
// error code. The match[] array is initialized to (-2,-2) before
|
||||
// each test. All array elements from 0 to nmatch-1 must be specified
|
||||
// in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
|
||||
// Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
|
||||
// matched (?{...}) expression, where x is the text enclosed by {...},
|
||||
// o is the expression ordinal counting from 1, and n is the length of
|
||||
// the unmatched portion of the subject string. If x starts with a
|
||||
// number then that is the return value of re_execf(), otherwise 0 is
|
||||
// returned.
|
||||
switch {
|
||||
case s == "":
|
||||
// Match with no position information.
|
||||
ok = true
|
||||
compiled = true
|
||||
matched = true
|
||||
return
|
||||
case s == "NOMATCH":
|
||||
// Match failure.
|
||||
ok = true
|
||||
compiled = true
|
||||
matched = false
|
||||
return
|
||||
case 'A' <= s[0] && s[0] <= 'Z':
|
||||
// All the other error codes are compile errors.
|
||||
ok = true
|
||||
compiled = false
|
||||
return
|
||||
}
|
||||
compiled = true
|
||||
|
||||
var x []int
|
||||
for s != "" {
|
||||
var end byte = ')'
|
||||
if len(x)%2 == 0 {
|
||||
if s[0] != '(' {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
s = s[1:]
|
||||
end = ','
|
||||
}
|
||||
i := 0
|
||||
for i < len(s) && s[i] != end {
|
||||
i++
|
||||
}
|
||||
if i == 0 || i == len(s) {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
var v = -1
|
||||
var err os.Error
|
||||
if s[:i] != "?" {
|
||||
v, err = strconv.Atoi(s[:i])
|
||||
if err != nil {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
}
|
||||
x = append(x, v)
|
||||
s = s[i+1:]
|
||||
}
|
||||
if len(x)%2 != 0 {
|
||||
ok = false
|
||||
return
|
||||
}
|
||||
ok = true
|
||||
matched = true
|
||||
pos = x
|
||||
return
|
||||
}
|
||||
|
@ -98,6 +98,15 @@ var findTests = []FindTest{
|
||||
{`\B`, "x y", nil},
|
||||
{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
|
||||
|
||||
// RE2 tests
|
||||
{`[^\S\s]`, "abcd", nil},
|
||||
{`[^\S[:space:]]`, "abcd", nil},
|
||||
{`[^\D\d]`, "abcd", nil},
|
||||
{`[^\D[:digit:]]`, "abcd", nil},
|
||||
{`(?i)\W`, "x", nil},
|
||||
{`(?i)\W`, "k", nil},
|
||||
{`(?i)\W`, "s", nil},
|
||||
|
||||
// can backslash-escape any punctuation
|
||||
{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
|
||||
`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
|
||||
|
Binary file not shown.
@ -97,10 +97,45 @@ func (re *Regexp) String() string {
|
||||
return re.expr
|
||||
}
|
||||
|
||||
// Compile parses a regular expression and returns, if successful, a Regexp
|
||||
// object that can be used to match against text.
|
||||
// Compile parses a regular expression and returns, if successful,
|
||||
// a Regexp object that can be used to match against text.
|
||||
//
|
||||
// When matching against text, the regexp returns a match that
|
||||
// begins as early as possible in the input (leftmost), and among those
|
||||
// it chooses the one that a backtracking search would have found first.
|
||||
// This so-called leftmost-first matching is the same semantics
|
||||
// that Perl, Python, and other implementations use, although this
|
||||
// package implements it without the expense of backtracking.
|
||||
// For POSIX leftmost-longest matching, see CompilePOSIX.
|
||||
func Compile(expr string) (*Regexp, os.Error) {
|
||||
re, err := syntax.Parse(expr, syntax.Perl)
|
||||
return compile(expr, syntax.Perl, false)
|
||||
}
|
||||
|
||||
// CompilePOSIX is like Compile but restricts the regular expression
|
||||
// to POSIX ERE (egrep) syntax and changes the match semantics to
|
||||
// leftmost-longest.
|
||||
//
|
||||
// That is, when matching against text, the regexp returns a match that
|
||||
// begins as early as possible in the input (leftmost), and among those
|
||||
// it chooses a match that is as long as possible.
|
||||
// This so-called leftmost-longest matching is the same semantics
|
||||
// that early regular expression implementations used and that POSIX
|
||||
// specifies.
|
||||
//
|
||||
// However, there can be multiple leftmost-longest matches, with different
|
||||
// submatch choices, and here this package diverges from POSIX.
|
||||
// Among the possible leftmost-longest matches, this package chooses
|
||||
// the one that a backtracking search would have found first, while POSIX
|
||||
// specifies that the match be chosen to maximize the length of the first
|
||||
// subexpression, then the second, and so on from left to right.
|
||||
// The POSIX rule is computationally prohibitive and not even well-defined.
|
||||
// See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
|
||||
func CompilePOSIX(expr string) (*Regexp, os.Error) {
|
||||
return compile(expr, syntax.POSIX, true)
|
||||
}
|
||||
|
||||
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, os.Error) {
|
||||
re, err := syntax.Parse(expr, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -114,6 +149,8 @@ func Compile(expr string) (*Regexp, os.Error) {
|
||||
expr: expr,
|
||||
prog: prog,
|
||||
numSubexp: maxCap,
|
||||
cond: prog.StartCond(),
|
||||
longest: longest,
|
||||
}
|
||||
regexp.prefix, regexp.prefixComplete = prog.Prefix()
|
||||
if regexp.prefix != "" {
|
||||
@ -122,7 +159,6 @@ func Compile(expr string) (*Regexp, os.Error) {
|
||||
regexp.prefixBytes = []byte(regexp.prefix)
|
||||
regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
|
||||
}
|
||||
regexp.cond = prog.StartCond()
|
||||
return regexp, nil
|
||||
}
|
||||
|
||||
|
23
src/pkg/exp/regexp/testdata/README
vendored
Normal file
23
src/pkg/exp/regexp/testdata/README
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
AT&T POSIX Test Files
|
||||
See textregex.c for copyright + license.
|
||||
|
||||
testregex.c http://www2.research.att.com/~gsf/testregex/testregex.c
|
||||
basic.dat http://www2.research.att.com/~gsf/testregex/basic.dat
|
||||
nullsubexpr.dat http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat
|
||||
repetition.dat http://www2.research.att.com/~gsf/testregex/repetition.dat
|
||||
|
||||
The test data has been edited to reflect RE2/Go differences:
|
||||
* In a star of a possibly empty match like (a*)* matching x,
|
||||
the no match case runs the starred subexpression zero times,
|
||||
not once. This is consistent with (a*)* matching a, which
|
||||
runs the starred subexpression one time, not twice.
|
||||
* The submatch choice is first match, not the POSIX rule.
|
||||
|
||||
Such changes are marked with 'RE2/Go'.
|
||||
|
||||
|
||||
RE2 Test Files
|
||||
|
||||
re2-exhaustive.txt.bz2 and re2-search.txt are built by running
|
||||
'make log' in the RE2 distribution. http://code.google.com/p/re2/.
|
||||
The exhaustive file is compressed because it is huge.
|
221
src/pkg/exp/regexp/testdata/basic.dat
vendored
Normal file
221
src/pkg/exp/regexp/testdata/basic.dat
vendored
Normal file
@ -0,0 +1,221 @@
|
||||
NOTE all standard compliant implementations should pass these : 2002-05-31
|
||||
|
||||
BE abracadabra$ abracadabracadabra (7,18)
|
||||
BE a...b abababbb (2,7)
|
||||
BE XXXXXX ..XXXXXX (2,8)
|
||||
E \) () (1,2)
|
||||
BE a] a]a (0,2)
|
||||
B } } (0,1)
|
||||
E \} } (0,1)
|
||||
BE \] ] (0,1)
|
||||
B ] ] (0,1)
|
||||
E ] ] (0,1)
|
||||
B { { (0,1)
|
||||
B } } (0,1)
|
||||
BE ^a ax (0,1)
|
||||
BE \^a a^a (1,3)
|
||||
BE a\^ a^ (0,2)
|
||||
BE a$ aa (1,2)
|
||||
BE a\$ a$ (0,2)
|
||||
BE ^$ NULL (0,0)
|
||||
E $^ NULL (0,0)
|
||||
E a($) aa (1,2)(2,2)
|
||||
E a*(^a) aa (0,1)(0,1)
|
||||
E (..)*(...)* a (0,0)
|
||||
E (..)*(...)* abcd (0,4)(2,4)
|
||||
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
|
||||
E (ab)c|abc abc (0,3)(0,2)
|
||||
E a{0}b ab (1,2)
|
||||
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
|
||||
E a{9876543210} NULL BADBR
|
||||
E ((a|a)|a) a (0,1)(0,1)(0,1)
|
||||
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
|
||||
E a*(a.|aa) aaaa (0,4)(2,4)
|
||||
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
|
||||
E (a|b)?.* b (0,1)(0,1)
|
||||
E (a|b)c|a(b|c) ac (0,2)(0,1)
|
||||
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
|
||||
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
|
||||
E (a|b)*c|(a|ab)*c xc (1,2)
|
||||
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
|
||||
E a?(ab|ba)ab abab (0,4)(0,2)
|
||||
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
|
||||
E ab|abab abbabab (0,2)
|
||||
E aba|bab|bba baaabbbaba (5,8)
|
||||
E aba|bab baaabbbaba (6,9)
|
||||
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
|
||||
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
|
||||
E ab|a xabc (1,3)
|
||||
E ab|a xxabc (2,4)
|
||||
Ei (Ab|cD)* aBcD (0,4)(2,4)
|
||||
BE [^-] --a (2,3)
|
||||
BE [a-]* --a (0,3)
|
||||
BE [a-m-]* --amoma-- (0,4)
|
||||
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
|
||||
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
|
||||
{E [[:upper:]] A (0,1) [[<element>]] not supported
|
||||
E [[:lower:]]+ `az{ (1,3)
|
||||
E [[:upper:]]+ @AZ[ (1,3)
|
||||
# No collation in Go
|
||||
#BE [[-]] [[-]] (2,4)
|
||||
#BE [[.NIL.]] NULL ECOLLATE
|
||||
#BE [[=aleph=]] NULL ECOLLATE
|
||||
}
|
||||
BE$ \n \n (0,1)
|
||||
BEn$ \n \n (0,1)
|
||||
BE$ [^a] \n (0,1)
|
||||
BE$ \na \na (0,2)
|
||||
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
|
||||
BE xxx xxx (0,3)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
|
||||
E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
|
||||
E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
|
||||
E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
|
||||
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
|
||||
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
|
||||
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
|
||||
BE$ .* \x01\xff (0,2)
|
||||
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
|
||||
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
|
||||
E a*a*a*a*a*b aaaaaaaaab (0,10)
|
||||
BE ^ NULL (0,0)
|
||||
BE $ NULL (0,0)
|
||||
BE ^$ NULL (0,0)
|
||||
BE ^a$ a (0,1)
|
||||
BE abc abc (0,3)
|
||||
BE abc xabcy (1,4)
|
||||
BE abc ababc (2,5)
|
||||
BE ab*c abc (0,3)
|
||||
BE ab*bc abc (0,3)
|
||||
BE ab*bc abbc (0,4)
|
||||
BE ab*bc abbbbc (0,6)
|
||||
E ab+bc abbc (0,4)
|
||||
E ab+bc abbbbc (0,6)
|
||||
E ab?bc abbc (0,4)
|
||||
E ab?bc abc (0,3)
|
||||
E ab?c abc (0,3)
|
||||
BE ^abc$ abc (0,3)
|
||||
BE ^abc abcc (0,3)
|
||||
BE abc$ aabc (1,4)
|
||||
BE ^ abc (0,0)
|
||||
BE $ abc (3,3)
|
||||
BE a.c abc (0,3)
|
||||
BE a.c axc (0,3)
|
||||
BE a.*c axyzc (0,5)
|
||||
BE a[bc]d abd (0,3)
|
||||
BE a[b-d]e ace (0,3)
|
||||
BE a[b-d] aac (1,3)
|
||||
BE a[-b] a- (0,2)
|
||||
BE a[b-] a- (0,2)
|
||||
BE a] a] (0,2)
|
||||
BE a[]]b a]b (0,3)
|
||||
BE a[^bc]d aed (0,3)
|
||||
BE a[^-b]c adc (0,3)
|
||||
BE a[^]b]c adc (0,3)
|
||||
E ab|cd abc (0,2)
|
||||
E ab|cd abcd (0,2)
|
||||
E a\(b a(b (0,3)
|
||||
E a\(*b ab (0,2)
|
||||
E a\(*b a((b (0,4)
|
||||
E ((a)) abc (0,1)(0,1)(0,1)
|
||||
E (a)b(c) abc (0,3)(0,1)(2,3)
|
||||
E a+b+c aabbabc (4,7)
|
||||
E a* aaa (0,3)
|
||||
#E (a*)* - (0,0)(0,0)
|
||||
E (a*)* - (0,0)(?,?) RE2/Go
|
||||
E (a*)+ - (0,0)(0,0)
|
||||
#E (a*|b)* - (0,0)(0,0)
|
||||
E (a*|b)* - (0,0)(?,?) RE2/Go
|
||||
E (a+|b)* ab (0,2)(1,2)
|
||||
E (a+|b)+ ab (0,2)(1,2)
|
||||
E (a+|b)? ab (0,1)(0,1)
|
||||
BE [^ab]* cde (0,3)
|
||||
#E (^)* - (0,0)(0,0)
|
||||
E (^)* - (0,0)(?,?) RE2/Go
|
||||
BE a* NULL (0,0)
|
||||
E ([abc])*d abbbcd (0,6)(4,5)
|
||||
E ([abc])*bcd abcd (0,4)(0,1)
|
||||
E a|b|c|d|e e (0,1)
|
||||
E (a|b|c|d|e)f ef (0,2)(0,1)
|
||||
#E ((a*|b))* - (0,0)(0,0)(0,0)
|
||||
E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
|
||||
BE abcd*efg abcdefg (0,7)
|
||||
BE ab* xabyabbbz (1,3)
|
||||
BE ab* xayabbbz (1,2)
|
||||
E (ab|cd)e abcde (2,5)(2,4)
|
||||
BE [abhgefdc]ij hij (0,3)
|
||||
E (a|b)c*d abcd (1,4)(1,2)
|
||||
E (ab|ab*)bc abc (0,3)(0,1)
|
||||
E a([bc]*)c* abc (0,3)(1,3)
|
||||
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
|
||||
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
|
||||
E a[bcd]*dcdcde adcdcde (0,7)
|
||||
E (ab|a)b*c abc (0,3)(0,2)
|
||||
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
|
||||
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
|
||||
E ^a(bc+|b[eh])g|.h$ abh (1,3)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
|
||||
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
|
||||
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
|
||||
BE multiple words multiple words yeah (0,14)
|
||||
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
|
||||
BE abcd abcd (0,4)
|
||||
E a(bc)d abcd (0,4)(1,3)
|
||||
E a[-]?c ac (0,3)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
|
||||
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
|
||||
E a+(b|c)*d+ aabcdd (0,6)(3,4)
|
||||
E ^.+$ vivi (0,4)
|
||||
E ^(.+)$ vivi (0,4)(0,4)
|
||||
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
|
||||
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
|
||||
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
|
||||
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
|
||||
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
|
||||
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
|
||||
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
|
||||
E (foo|(bar))!bas foo!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas bar!bas (0,7)(0,3)
|
||||
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
|
||||
E (foo|bar)!bas foo!bas (0,7)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
|
||||
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
|
||||
E .*(/XXX).* /XXX (0,4)(0,4)
|
||||
E .*(\\XXX).* \XXX (0,4)(0,4)
|
||||
E \\XXX \XXX (0,4)
|
||||
E .*(/000).* /000 (0,4)(0,4)
|
||||
E .*(\\000).* \000 (0,4)(0,4)
|
||||
E \\000 \000 (0,4)
|
79
src/pkg/exp/regexp/testdata/nullsubexpr.dat
vendored
Normal file
79
src/pkg/exp/regexp/testdata/nullsubexpr.dat
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
NOTE null subexpression matches : 2002-06-06
|
||||
|
||||
E (a*)* a (0,1)(0,1)
|
||||
#E SAME x (0,0)(0,0)
|
||||
E SAME x (0,0)(?,?) RE2/Go
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)* a (0,1)(0,1)
|
||||
E SAME x (0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E (a+)+ a (0,1)(0,1)
|
||||
E SAME x NOMATCH
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
|
||||
E ([a]*)* a (0,1)(0,1)
|
||||
#E SAME x (0,0)(0,0)
|
||||
E SAME x (0,0)(?,?) RE2/Go
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([a]*)+ a (0,1)(0,1)
|
||||
E SAME x (0,0)(0,0)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaax (0,6)(0,6)
|
||||
E ([^b]*)* a (0,1)(0,1)
|
||||
#E SAME b (0,0)(0,0)
|
||||
E SAME b (0,0)(?,?) RE2/Go
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME aaaaaab (0,6)(0,6)
|
||||
E ([ab]*)* a (0,1)(0,1)
|
||||
E SAME aaaaaa (0,6)(0,6)
|
||||
E SAME ababab (0,6)(0,6)
|
||||
E SAME bababa (0,6)(0,6)
|
||||
E SAME b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
E SAME aaaabcde (0,5)(0,5)
|
||||
E ([^a]*)* b (0,1)(0,1)
|
||||
E SAME bbbbbb (0,6)(0,6)
|
||||
#E SAME aaaaaa (0,0)(0,0)
|
||||
E SAME aaaaaa (0,0)(?,?) RE2/Go
|
||||
E ([^ab]*)* ccccxx (0,6)(0,6)
|
||||
#E SAME ababab (0,0)(0,0)
|
||||
E SAME ababab (0,0)(?,?) RE2/Go
|
||||
|
||||
E ((z)+|a)* zabcde (0,2)(1,2)
|
||||
|
||||
#{E a+? aaaaaa (0,1) no *? +? mimimal match ops
|
||||
#E (a) aaa (0,1)(0,1)
|
||||
#E (a*?) aaa (0,0)(0,0)
|
||||
#E (a)*? aaa (0,0)
|
||||
#E (a*?)*? aaa (0,0)
|
||||
#}
|
||||
|
||||
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
|
||||
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
|
||||
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
|
||||
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
|
||||
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
|
||||
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
|
||||
|
||||
#E (a*)*(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
|
||||
E (a*)*(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)*(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*)+(x) x (0,1)(0,0)(0,1)
|
||||
E (a*)+(x) ax (0,2)(0,1)(1,2)
|
||||
E (a*)+(x) axa (0,2)(0,1)(1,2)
|
||||
|
||||
E (a*){2}(x) x (0,1)(0,0)(0,1)
|
||||
E (a*){2}(x) ax (0,2)(1,1)(1,2)
|
||||
E (a*){2}(x) axa (0,2)(1,1)(1,2)
|
BIN
src/pkg/exp/regexp/testdata/re2-exhaustive.txt.bz2
vendored
Normal file
BIN
src/pkg/exp/regexp/testdata/re2-exhaustive.txt.bz2
vendored
Normal file
Binary file not shown.
3667
src/pkg/exp/regexp/testdata/re2-search.txt
vendored
Normal file
3667
src/pkg/exp/regexp/testdata/re2-search.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
163
src/pkg/exp/regexp/testdata/repetition.dat
vendored
Normal file
163
src/pkg/exp/regexp/testdata/repetition.dat
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
NOTE implicit vs. explicit repetitions : 2009-02-02
|
||||
|
||||
# Glenn Fowler <gsf@research.att.com>
|
||||
# conforming matches (column 4) must match one of the following BREs
|
||||
# NOMATCH
|
||||
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
|
||||
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
|
||||
# i.e., each 3-tuple has two identical elements and one (?,?)
|
||||
|
||||
E ((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
|
||||
|
||||
E ((..)|(.)){1} NULL NOMATCH
|
||||
E ((..)|(.)){2} NULL NOMATCH
|
||||
E ((..)|(.)){3} NULL NOMATCH
|
||||
|
||||
E ((..)|(.))* NULL (0,0)
|
||||
|
||||
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.))((..)|(.)) a NOMATCH
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
|
||||
|
||||
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
|
||||
E ((..)|(.)){2} a NOMATCH
|
||||
E ((..)|(.)){3} a NOMATCH
|
||||
|
||||
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
|
||||
|
||||
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
|
||||
|
||||
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
|
||||
E ((..)|(.)){3} aa NOMATCH
|
||||
|
||||
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
|
||||
|
||||
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
|
||||
|
||||
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
|
||||
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
|
||||
|
||||
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
|
||||
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
|
||||
|
||||
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
|
||||
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
|
||||
|
||||
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
|
||||
|
||||
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
|
||||
|
||||
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
|
||||
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
|
||||
|
||||
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
|
||||
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
|
||||
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
|
||||
|
||||
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
|
||||
|
||||
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
|
||||
# Linux/GLIBC gets the {8,} and {8,8} wrong.
|
||||
|
||||
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
|
||||
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
|
||||
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
|
||||
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
|
||||
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
|
||||
|
||||
# These test a fixed bug in my regex-tdfa that did not keep the expanded
|
||||
# form properly grouped, so right association did the wrong thing with
|
||||
# these ambiguous patterns (crafted just to test my code when I became
|
||||
# suspicious of my implementation). The first subexpression should use
|
||||
# "ab" then "a" then "bcd".
|
||||
|
||||
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
|
||||
# results like (0,6)(4,5)(6,6).
|
||||
|
||||
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
|
||||
# The above worked on Linux/GLIBC but the following often fail.
|
||||
# They also trip up OS X / FreeBSD / NetBSD:
|
||||
|
||||
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
|
||||
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
|
||||
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
||||
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
|
||||
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
|
2286
src/pkg/exp/regexp/testdata/testregex.c
vendored
Normal file
2286
src/pkg/exp/regexp/testdata/testregex.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user