exp/regexp: add CompilePOSIX, more tests

R=r CC=golang-dev https://golang.org/cl/4967060
2024-11-21 21:54:40 -07:00 · 2011-09-08 14:49:51 -04:00 · 2011-09-08 14:49:51 -04:00 · 21e671dee6
commit 21e671dee6
parent 177dca77e1
11 changed files with 6837 additions and 29 deletions
--- a/src/pkg/exp/regexp/exec_test.go
+++ b/src/pkg/exp/regexp/exec_test.go
@ -6,9 +6,12 @@ package regexp

 import (
 	"bufio"
-	"compress/gzip"
+	"compress/bzip2"
+	"exp/regexp/syntax"
 	"fmt"
+	"io"
 	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 	"testing"
@ -59,24 +62,34 @@ import (
 // At time of writing, re2.txt is 32 MB but compresses to 760 kB,
 // so we store re2.txt.gz in the repository and decompress it on the fly.
 //
-func TestRE2(t *testing.T) {
+func TestRE2Search(t *testing.T) {
+	testRE2(t, "testdata/re2-search.txt")
+}
+
+func TestRE2Exhaustive(t *testing.T) {
 	if testing.Short() {
-		t.Log("skipping TestRE2 during short test")
+		t.Log("skipping TestRE2Exhaustive during short test")
 		return
 	}
+	testRE2(t, "testdata/re2-exhaustive.txt.bz2")
+}

-	f, err := os.Open("re2.txt.gz")
+func testRE2(t *testing.T, file string) {
+	f, err := os.Open(file)
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer f.Close()
-	gz, err := gzip.NewReader(f)
-	if err != nil {
-		t.Fatalf("decompress re2.txt.gz: %v", err)
+	var txt io.Reader
+	if strings.HasSuffix(file, ".bz2") {
+		z := bzip2.NewReader(f)
+		txt = z
+		file = file[:len(file)-len(".bz2")] // for error messages
+	} else {
+		txt = f
 	}
-	defer gz.Close()
 	lineno := 0
-	r := bufio.NewReader(gz)
+	r := bufio.NewReader(txt)
 	var (
 		str       []string
 		input     []string
@ -92,13 +105,13 @@ func TestRE2(t *testing.T) {
 			if err == os.EOF {
 				break
 			}
-			t.Fatalf("re2.txt:%d: %v", lineno, err)
+			t.Fatalf("%s:%d: %v", file, lineno, err)
 		}
 		line = line[:len(line)-1] // chop \n
 		lineno++
 		switch {
 		case line == "":
-			t.Fatalf("re2.txt:%d: unexpected blank line", lineno)
+			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
 		case line[0] == '#':
 			continue
 		case 'A' <= line[0] && line[0] <= 'Z':
@ -114,7 +127,7 @@ func TestRE2(t *testing.T) {
 			q, err := strconv.Unquote(line)
 			if err != nil {
 				// Fatal because we'll get out of sync.
-				t.Fatalf("re2.txt:%d: unquote %s: %v", lineno, line, err)
+				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
 			}
 			if inStrings {
 				str = append(str, q)
@ -122,7 +135,7 @@ func TestRE2(t *testing.T) {
 			}
 			// Is a regexp.
 			if len(input) != 0 {
-				t.Fatalf("re2.txt:%d: out of sync: have %d strings left before %#q", lineno, len(input), q)
+				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
 			}
 			re, err = tryCompile(q)
 			if err != nil {
@ -130,7 +143,7 @@ func TestRE2(t *testing.T) {
 					// We don't and likely never will support \C; keep going.
 					continue
 				}
-				t.Errorf("re2.txt:%d: compile %#q: %v", lineno, q, err)
+				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
 				if nfail++; nfail >= 100 {
 					t.Fatalf("stopping after %d errors", nfail)
 				}
@ -140,7 +153,7 @@ func TestRE2(t *testing.T) {
 			refull, err = tryCompile(full)
 			if err != nil {
 				// Fatal because q worked, so this should always work.
-				t.Fatalf("re2.txt:%d: compile full %#q: %v", lineno, full, err)
+				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
 			}
 			input = str
 		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
@ -151,7 +164,7 @@ func TestRE2(t *testing.T) {
 				continue
 			}
 			if len(input) == 0 {
-				t.Fatalf("re2.txt:%d: out of sync: no input remaining", lineno)
+				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
 			}
 			var text string
 			text, input = input[0], input[1:]
@ -165,13 +178,13 @@ func TestRE2(t *testing.T) {
 			}
 			res := strings.Split(line, ";")
 			if len(res) != len(run) {
-				t.Fatalf("re2.txt:%d: have %d test results, want %d", lineno, len(res), len(run))
+				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
 			}
 			for i := range res {
 				have, suffix := run[i](re, refull, text)
-				want := parseResult(t, lineno, res[i])
+				want := parseResult(t, file, lineno, res[i])
 				if !same(have, want) {
-					t.Errorf("re2.txt:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", lineno, re, suffix, text, have, want)
+					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
 					if nfail++; nfail >= 100 {
 						t.Fatalf("stopping after %d errors", nfail)
 					}
@ -179,7 +192,7 @@ func TestRE2(t *testing.T) {
 				}
 				b, suffix := match[i](re, refull, text)
 				if b != (want != nil) {
-					t.Errorf("re2.txt:%d: %#q%s.MatchString(%#q) = %v, want %v", lineno, re, suffix, text, b, !b)
+					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
 					if nfail++; nfail >= 100 {
 						t.Fatalf("stopping after %d errors", nfail)
 					}
@ -188,11 +201,11 @@ func TestRE2(t *testing.T) {
 			}

 		default:
-			t.Fatalf("re2.txt:%d: out of sync: %s\n", lineno, line)
+			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
 		}
 	}
 	if len(input) != 0 {
-		t.Fatalf("re2.txt:%d: out of sync: have %d strings left at EOF", lineno, len(input))
+		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
 	}
 	t.Logf("%d cases tested", ncase)
 }
@ -270,7 +283,7 @@ func tryCompile(s string) (re *Regexp, err os.Error) {
 	return Compile(s)
 }

-func parseResult(t *testing.T, lineno int, res string) []int {
+func parseResult(t *testing.T, file string, lineno int, res string) []int {
 	// A single - indicates no match.
 	if res == "-" {
 		return nil
@ -295,12 +308,12 @@ func parseResult(t *testing.T, lineno int, res string) []int {
 			} else {
 				k := strings.Index(pair, "-")
 				if k < 0 {
-					t.Fatalf("re2.txt:%d: invalid pair %s", lineno, pair)
+					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 				}
 				lo, err1 := strconv.Atoi(pair[:k])
 				hi, err2 := strconv.Atoi(pair[k+1:])
 				if err1 != nil || err2 != nil || lo > hi {
-					t.Fatalf("re2.txt:%d: invalid pair %s", lineno, pair)
+					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 				}
 				out[n] = lo
 				out[n+1] = hi
@ -323,3 +336,314 @@ func same(x, y []int) bool {
 	}
 	return true
 }
+
+// TestFowler runs this package's regexp API against the
+// POSIX regular expression tests collected by Glenn Fowler
+// at http://www2.research.att.com/~gsf/testregex/.
+func TestFowler(t *testing.T) {
+	files, err := filepath.Glob("testdata/*.dat")
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, file := range files {
+		t.Log(file)
+		testFowler(t, file)
+	}
+}
+
+var notab = MustCompile(`[^\t]+`)
+
+func testFowler(t *testing.T, file string) {
+	f, err := os.Open(file)
+	if err != nil {
+		t.Error(err)
+		return
+	}
+	defer f.Close()
+	b := bufio.NewReader(f)
+	lineno := 0
+	lastRegexp := ""
+Reading:
+	for {
+		lineno++
+		line, err := b.ReadString('\n')
+		if err != nil {
+			if err != os.EOF {
+				t.Errorf("%s:%d: %v", file, lineno, err)
+			}
+			break Reading
+		}
+
+		// http://www2.research.att.com/~gsf/man/man1/testregex.html
+		//
+		// INPUT FORMAT
+		//   Input lines may be blank, a comment beginning with #, or a test
+		//   specification. A specification is five fields separated by one
+		//   or more tabs. NULL denotes the empty string and NIL denotes the
+		//   0 pointer.
+		if line[0] == '#' || line[0] == '\n' {
+			continue Reading
+		}
+		line = line[:len(line)-1]
+		field := notab.FindAllString(line, -1)
+		for i, f := range field {
+			if f == "NULL" {
+				field[i] = ""
+			}
+			if f == "NIL" {
+				t.Logf("%s:%d: skip: %s", file, lineno, line)
+				continue Reading
+			}
+		}
+		if len(field) == 0 {
+			continue Reading
+		}
+
+		//   Field 1: the regex(3) flags to apply, one character per REG_feature
+		//   flag. The test is skipped if REG_feature is not supported by the
+		//   implementation. If the first character is not [BEASKLP] then the
+		//   specification is a global control line. One or more of [BEASKLP] may be
+		//   specified; the test will be repeated for each mode.
+		// 
+		//     B 	basic			BRE	(grep, ed, sed)
+		//     E 	REG_EXTENDED		ERE	(egrep)
+		//     A	REG_AUGMENTED		ARE	(egrep with negation)
+		//     S	REG_SHELL		SRE	(sh glob)
+		//     K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
+		//     L	REG_LITERAL		LRE	(fgrep)
+		// 
+		//     a	REG_LEFT|REG_RIGHT	implicit ^...$
+		//     b	REG_NOTBOL		lhs does not match ^
+		//     c	REG_COMMENT		ignore space and #...\n
+		//     d	REG_SHELL_DOT		explicit leading . match
+		//     e	REG_NOTEOL		rhs does not match $
+		//     f	REG_MULTIPLE		multiple \n separated patterns
+		//     g	FNM_LEADING_DIR		testfnmatch only -- match until /
+		//     h	REG_MULTIREF		multiple digit backref
+		//     i	REG_ICASE		ignore case
+		//     j	REG_SPAN		. matches \n
+		//     k	REG_ESCAPE		\ to ecape [...] delimiter
+		//     l	REG_LEFT		implicit ^...
+		//     m	REG_MINIMAL		minimal match
+		//     n	REG_NEWLINE		explicit \n match
+		//     o	REG_ENCLOSED		(|&) magic inside [@|&](...)
+		//     p	REG_SHELL_PATH		explicit / match
+		//     q	REG_DELIMITED		delimited pattern
+		//     r	REG_RIGHT		implicit ...$
+		//     s	REG_SHELL_ESCAPED	\ not special
+		//     t	REG_MUSTDELIM		all delimiters must be specified
+		//     u	standard unspecified behavior -- errors not counted
+		//     v	REG_CLASS_ESCAPE	\ special inside [...]
+		//     w	REG_NOSUB		no subexpression match array
+		//     x	REG_LENIENT		let some errors slide
+		//     y	REG_LEFT		regexec() implicit ^...
+		//     z	REG_NULL		NULL subexpressions ok
+		//     $	                        expand C \c escapes in fields 2 and 3
+		//     /	                        field 2 is a regsubcomp() expression
+		//     =	                        field 3 is a regdecomp() expression
+		// 
+		//   Field 1 control lines:
+		// 
+		//     C		set LC_COLLATE and LC_CTYPE to locale in field 2
+		// 
+		//     ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
+		//     &test ...	output field 5 if current and previous passed
+		//     |test ...	output field 5 if current passed and previous failed
+		//     ; ...	output field 2 if previous failed
+		//     {test ...	skip if failed until }
+		//     }		end of skip
+		// 
+		//     : comment		comment copied as output NOTE
+		//     :comment:test	:comment: ignored
+		//     N[OTE] comment	comment copied as output NOTE
+		//     T[EST] comment	comment
+		// 
+		//     number		use number for nmatch (20 by default)
+		flag := field[0]
+		switch flag[0] {
+		case '?', '&', '|', ';', '{', '}':
+			// Ignore all the control operators.
+			// Just run everything.
+			flag = flag[1:]
+			if flag == "" {
+				continue Reading
+			}
+		case ':':
+			i := strings.Index(flag[1:], ":")
+			if i < 0 {
+				t.Logf("skip: %s", line)
+				continue Reading
+			}
+			flag = flag[1+i+1:]
+		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			t.Logf("skip: %s", line)
+			continue Reading
+		}
+
+		// Can check field count now that we've handled the myriad comment formats.
+		if len(field) < 4 {
+			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
+			continue Reading
+		}
+
+		// Expand C escapes (a.k.a. Go escapes).
+		if strings.Contains(flag, "$") {
+			f := `"` + field[1] + `"`
+			if field[1], err = strconv.Unquote(f); err != nil {
+				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
+			}
+			f = `"` + field[2] + `"`
+			if field[2], err = strconv.Unquote(f); err != nil {
+				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
+			}
+		}
+
+		//   Field 2: the regular expression pattern; SAME uses the pattern from
+		//     the previous specification.
+		// 
+		if field[1] == "SAME" {
+			field[1] = lastRegexp
+		}
+		lastRegexp = field[1]
+
+		//   Field 3: the string to match.
+		text := field[2]
+
+		//   Field 4: the test outcome...
+		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
+		if !ok {
+			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
+			continue Reading
+		}
+
+		//   Field 5: optional comment appended to the report.
+
+	Testing:
+		// Run test once for each specified capital letter mode that we support.
+		for _, c := range flag {
+			pattern := field[1]
+			syn := syntax.POSIX | syntax.ClassNL
+			switch c {
+			default:
+				continue Testing
+			case 'E':
+				// extended regexp (what we support)
+			case 'L':
+				// literal
+				pattern = QuoteMeta(pattern)
+			}
+
+			for _, c := range flag {
+				switch c {
+				case 'i':
+					syn |= syntax.FoldCase
+				}
+			}
+
+			re, err := compile(pattern, syn, true)
+			if err != nil {
+				if shouldCompile {
+					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
+				}
+				continue Testing
+			}
+			if !shouldCompile {
+				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
+				continue Testing
+			}
+			match := re.MatchString(text)
+			if match != shouldMatch {
+				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
+				continue Testing
+			}
+			have := re.FindStringSubmatchIndex(text)
+			if (len(have) > 0) != match {
+				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, text, have)
+				continue Testing
+			}
+			if len(have) > len(pos) {
+				have = have[:len(pos)]
+			}
+			if !same(have, pos) {
+				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
+			}
+		}
+	}
+}
+
+func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
+	//   Field 4: the test outcome. This is either one of the posix error
+	//     codes (with REG_ omitted) or the match array, a list of (m,n)
+	//     entries with m and n being first and last+1 positions in the
+	//     field 3 string, or NULL if REG_NOSUB is in effect and success
+	//     is expected. BADPAT is acceptable in place of any regcomp(3)
+	//     error code. The match[] array is initialized to (-2,-2) before
+	//     each test. All array elements from 0 to nmatch-1 must be specified
+	//     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
+	//     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
+	//     matched (?{...}) expression, where x is the text enclosed by {...},
+	//     o is the expression ordinal counting from 1, and n is the length of
+	//     the unmatched portion of the subject string. If x starts with a
+	//     number then that is the return value of re_execf(), otherwise 0 is
+	//     returned.
+	switch {
+	case s == "":
+		// Match with no position information.
+		ok = true
+		compiled = true
+		matched = true
+		return
+	case s == "NOMATCH":
+		// Match failure.
+		ok = true
+		compiled = true
+		matched = false
+		return
+	case 'A' <= s[0] && s[0] <= 'Z':
+		// All the other error codes are compile errors.
+		ok = true
+		compiled = false
+		return
+	}
+	compiled = true
+
+	var x []int
+	for s != "" {
+		var end byte = ')'
+		if len(x)%2 == 0 {
+			if s[0] != '(' {
+				ok = false
+				return
+			}
+			s = s[1:]
+			end = ','
+		}
+		i := 0
+		for i < len(s) && s[i] != end {
+			i++
+		}
+		if i == 0 || i == len(s) {
+			ok = false
+			return
+		}
+		var v = -1
+		var err os.Error
+		if s[:i] != "?" {
+			v, err = strconv.Atoi(s[:i])
+			if err != nil {
+				ok = false
+				return
+			}
+		}
+		x = append(x, v)
+		s = s[i+1:]
+	}
+	if len(x)%2 != 0 {
+		ok = false
+		return
+	}
+	ok = true
+	matched = true
+	pos = x
+	return
+}
--- a/src/pkg/exp/regexp/find_test.go
+++ b/src/pkg/exp/regexp/find_test.go
@ -98,6 +98,15 @@ var findTests = []FindTest{
 	{`\B`, "x y", nil},
 	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},

+	// RE2 tests
+	{`[^\S\s]`, "abcd", nil},
+	{`[^\S[:space:]]`, "abcd", nil},
+	{`[^\D\d]`, "abcd", nil},
+	{`[^\D[:digit:]]`, "abcd", nil},
+	{`(?i)\W`, "x", nil},
+	{`(?i)\W`, "k", nil},
+	{`(?i)\W`, "s", nil},
+
 	// can backslash-escape any punctuation
 	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
 		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
--- a/src/pkg/exp/regexp/re2.txt.gz
+++ b/src/pkg/exp/regexp/re2.txt.gz
--- a/src/pkg/exp/regexp/regexp.go
+++ b/src/pkg/exp/regexp/regexp.go
@ -97,10 +97,45 @@ func (re *Regexp) String() string {
 	return re.expr
 }

-// Compile parses a regular expression and returns, if successful, a Regexp
-// object that can be used to match against text.
+// Compile parses a regular expression and returns, if successful,
+// a Regexp object that can be used to match against text.
+//
+// When matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses the one that a backtracking search would have found first.
+// This so-called leftmost-first matching is the same semantics
+// that Perl, Python, and other implementations use, although this
+// package implements it without the expense of backtracking.
+// For POSIX leftmost-longest matching, see CompilePOSIX.
 func Compile(expr string) (*Regexp, os.Error) {
-	re, err := syntax.Parse(expr, syntax.Perl)
+	return compile(expr, syntax.Perl, false)
+}
+
+// CompilePOSIX is like Compile but restricts the regular expression
+// to POSIX ERE (egrep) syntax and changes the match semantics to
+// leftmost-longest.
+//
+// That is, when matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses a match that is as long as possible.
+// This so-called leftmost-longest matching is the same semantics
+// that early regular expression implementations used and that POSIX
+// specifies.
+//
+// However, there can be multiple leftmost-longest matches, with different
+// submatch choices, and here this package diverges from POSIX.
+// Among the possible leftmost-longest matches, this package chooses
+// the one that a backtracking search would have found first, while POSIX
+// specifies that the match be chosen to maximize the length of the first
+// subexpression, then the second, and so on from left to right.
+// The POSIX rule is computationally prohibitive and not even well-defined.
+// See http://swtch.com/~rsc/regexp/regexp2.html#posix for details.
+func CompilePOSIX(expr string) (*Regexp, os.Error) {
+	return compile(expr, syntax.POSIX, true)
+}
+
+func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, os.Error) {
+	re, err := syntax.Parse(expr, mode)
 	if err != nil {
 		return nil, err
 	}
@ -114,6 +149,8 @@ func Compile(expr string) (*Regexp, os.Error) {
 		expr:      expr,
 		prog:      prog,
 		numSubexp: maxCap,
+		cond:      prog.StartCond(),
+		longest:   longest,
 	}
 	regexp.prefix, regexp.prefixComplete = prog.Prefix()
 	if regexp.prefix != "" {
@ -122,7 +159,6 @@ func Compile(expr string) (*Regexp, os.Error) {
 		regexp.prefixBytes = []byte(regexp.prefix)
 		regexp.prefixRune, _ = utf8.DecodeRuneInString(regexp.prefix)
 	}
-	regexp.cond = prog.StartCond()
 	return regexp, nil
 }

--- a/src/pkg/exp/regexp/testdata/README
+++ b/src/pkg/exp/regexp/testdata/README
@ -0,0 +1,23 @@
+AT&T POSIX Test Files
+See textregex.c for copyright + license.
+
+testregex.c	http://www2.research.att.com/~gsf/testregex/testregex.c
+basic.dat	http://www2.research.att.com/~gsf/testregex/basic.dat
+nullsubexpr.dat	http://www2.research.att.com/~gsf/testregex/nullsubexpr.dat
+repetition.dat	http://www2.research.att.com/~gsf/testregex/repetition.dat
+
+The test data has been edited to reflect RE2/Go differences:
+  * In a star of a possibly empty match like (a*)* matching x,
+    the no match case runs the starred subexpression zero times,
+    not once.  This is consistent with (a*)* matching a, which
+    runs the starred subexpression one time, not twice.
+  * The submatch choice is first match, not the POSIX rule.
+
+Such changes are marked with 'RE2/Go'.
+
+
+RE2 Test Files
+
+re2-exhaustive.txt.bz2 and re2-search.txt are built by running
+'make log' in the RE2 distribution.  http://code.google.com/p/re2/.
+The exhaustive file is compressed because it is huge.
--- a/src/pkg/exp/regexp/testdata/basic.dat
+++ b/src/pkg/exp/regexp/testdata/basic.dat
@ -0,0 +1,221 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+# No collation in Go
+#BE	[[-]]			[[-]]		(2,4)
+#BE	[[.NIL.]]	NULL	ECOLLATE
+#BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+BE$	.*			\x01\xff	(0,2)
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+#E	(a*)*			-		(0,0)(0,0)
+E	(a*)*			-		(0,0)(?,?)	RE2/Go
+E	(a*)+			-		(0,0)(0,0)
+#E	(a*|b)*			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(?,?)	RE2/Go
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+#E	(^)*			-		(0,0)(0,0)
+E	(^)*			-		(0,0)(?,?)	RE2/Go
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+#E	((a*|b))*		-		(0,0)(0,0)(0,0)
+E	((a*|b))*		-		(0,0)(?,?)(?,?)	RE2/Go
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)
--- a/src/pkg/exp/regexp/testdata/nullsubexpr.dat
+++ b/src/pkg/exp/regexp/testdata/nullsubexpr.dat
@ -0,0 +1,79 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+#E	SAME		b		(0,0)(0,0)
+E	SAME		b		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+#E	SAME		aaaaaa		(0,0)(0,0)
+E	SAME		aaaaaa		(0,0)(?,?)	RE2/Go
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+#E	SAME		ababab		(0,0)(0,0)
+E	SAME		ababab		(0,0)(?,?)	RE2/Go
+
+E	((z)+|a)*	zabcde		(0,2)(1,2)
+
+#{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+#E	(a)		aaa		(0,1)(0,1)
+#E	(a*?)		aaa		(0,0)(0,0)
+#E	(a)*?		aaa		(0,0)
+#E	(a*?)*?		aaa		(0,0)
+#}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+#E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		x	(0,1)(?,?)(0,1)	RE2/Go
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)
--- a/src/pkg/exp/regexp/testdata/re2-exhaustive.txt.bz2
+++ b/src/pkg/exp/regexp/testdata/re2-exhaustive.txt.bz2
--- a/src/pkg/exp/regexp/testdata/re2-search.txt
+++ b/src/pkg/exp/regexp/testdata/re2-search.txt
--- a/src/pkg/exp/regexp/testdata/repetition.dat
+++ b/src/pkg/exp/regexp/testdata/repetition.dat
@ -0,0 +1,163 @@
+NOTE	implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+#E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+#E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.))*				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(0,2)(3,4)	RE2/Go
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+#E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+NOTE	additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree. 
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
+:HA#101:E	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
+:HA#102:E	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
+:HA#103:E	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
+:HA#104:E	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
+:HA#105:E	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
+:HA#106:E	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
+:HA#107:E	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
+:HA#108:E	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
+#:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
+:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
+:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
+:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
+:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
+:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
+:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
+:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
+:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+:HA#118:E	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation).  The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#262:E	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#263:E	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#264:E	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
+:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#267:E	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#268:E	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#269:E	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
+:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#284:E	(ab|a|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#289:E	(ab|a|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
--- a/src/pkg/exp/regexp/testdata/testregex.c
+++ b/src/pkg/exp/regexp/testdata/testregex.c