From 3e52dadfd7b4c43c1d630d510eeb1b289d2ab422 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 25 Oct 2011 22:20:57 -0700 Subject: [PATCH] regexp: use rune Public API of syntax tree changes. R=golang-dev, r, gri CC=golang-dev https://golang.org/cl/5302046 --- src/pkg/regexp/exec.go | 28 ++++---- src/pkg/regexp/regexp.go | 22 +++--- src/pkg/regexp/syntax/compile.go | 16 ++--- src/pkg/regexp/syntax/make_perl_groups.pl | 2 +- src/pkg/regexp/syntax/parse.go | 88 +++++++++++------------ src/pkg/regexp/syntax/parse_test.go | 22 +++--- src/pkg/regexp/syntax/perl_groups.go | 34 ++++----- src/pkg/regexp/syntax/prog.go | 20 +++--- src/pkg/regexp/syntax/regexp.go | 10 +-- 9 files changed, 121 insertions(+), 121 deletions(-) diff --git a/src/pkg/regexp/exec.go b/src/pkg/regexp/exec.go index 3b0e3888524..d7057a191b7 100644 --- a/src/pkg/regexp/exec.go +++ b/src/pkg/regexp/exec.go @@ -90,15 +90,15 @@ func (m *machine) match(i input, pos int) bool { m.matchcap[i] = -1 } runq, nextq := &m.q0, &m.q1 - rune, rune1 := endOfText, endOfText + r, r1 := endOfText, endOfText width, width1 := 0, 0 - rune, width = i.step(pos) - if rune != endOfText { - rune1, width1 = i.step(pos + width) + r, width = i.step(pos) + if r != endOfText { + r1, width1 = i.step(pos + width) } var flag syntax.EmptyOp if pos == 0 { - flag = syntax.EmptyOpContext(-1, rune) + flag = syntax.EmptyOpContext(-1, r) } else { flag = i.context(pos) } @@ -112,15 +112,15 @@ func (m *machine) match(i input, pos int) bool { // Have match; finished exploring alternatives. break } - if len(m.re.prefix) > 0 && rune1 != m.re.prefixRune && i.canCheckPrefix() { + if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() { // Match requires literal prefix; fast search for it. advance := i.index(m.re, pos) if advance < 0 { break } pos += advance - rune, width = i.step(pos) - rune1, width1 = i.step(pos + width) + r, width = i.step(pos) + r1, width1 = i.step(pos + width) } } if !m.matched { @@ -129,8 +129,8 @@ func (m *machine) match(i input, pos int) bool { } m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil) } - flag = syntax.EmptyOpContext(rune, rune1) - m.step(runq, nextq, pos, pos+width, rune, flag) + flag = syntax.EmptyOpContext(r, r1) + m.step(runq, nextq, pos, pos+width, r, flag) if width == 0 { break } @@ -140,9 +140,9 @@ func (m *machine) match(i input, pos int) bool { break } pos += width - rune, width = rune1, width1 - if rune != endOfText { - rune1, width1 = i.step(pos + width) + r, width = r1, width1 + if r != endOfText { + r1, width1 = i.step(pos + width) } runq, nextq = nextq, runq } @@ -166,7 +166,7 @@ func (m *machine) clear(q *queue) { // The step processes the rune c (which may be endOfText), // which starts at position pos and ends at nextPos. // nextCond gives the setting for the empty-width flags after c. -func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) { +func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) { longest := m.re.longest for j := 0; j < len(runq.dense); j++ { d := &runq.dense[j] diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index 2325f6204b1..a1b7951bfe8 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -83,7 +83,7 @@ type Regexp struct { prefix string // required prefix in unanchored matches prefixBytes []byte // prefix, as a []byte prefixComplete bool // prefix is the entire regexp - prefixRune int // first rune in prefix + prefixRune rune // first rune in prefix cond syntax.EmptyOp // empty-width conditions required at start of match numSubexp int longest bool @@ -224,13 +224,13 @@ func (re *Regexp) NumSubexp() int { return re.numSubexp } -const endOfText = -1 +const endOfText rune = -1 // input abstracts different representations of the input text. It provides // one-character lookahead. type input interface { - step(pos int) (rune int, width int) // advance one rune - canCheckPrefix() bool // can we look ahead without losing info? + step(pos int) (r rune, width int) // advance one rune + canCheckPrefix() bool // can we look ahead without losing info? hasPrefix(re *Regexp) bool index(re *Regexp, pos int) int context(pos int) syntax.EmptyOp @@ -245,11 +245,11 @@ func newInputString(str string) *inputString { return &inputString{str: str} } -func (i *inputString) step(pos int) (int, int) { +func (i *inputString) step(pos int) (rune, int) { if pos < len(i.str) { c := i.str[pos] if c < utf8.RuneSelf { - return int(c), 1 + return rune(c), 1 } return utf8.DecodeRuneInString(i.str[pos:]) } @@ -269,7 +269,7 @@ func (i *inputString) index(re *Regexp, pos int) int { } func (i *inputString) context(pos int) syntax.EmptyOp { - r1, r2 := -1, -1 + r1, r2 := endOfText, endOfText if pos > 0 && pos <= len(i.str) { r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) } @@ -288,11 +288,11 @@ func newInputBytes(str []byte) *inputBytes { return &inputBytes{str: str} } -func (i *inputBytes) step(pos int) (int, int) { +func (i *inputBytes) step(pos int) (rune, int) { if pos < len(i.str) { c := i.str[pos] if c < utf8.RuneSelf { - return int(c), 1 + return rune(c), 1 } return utf8.DecodeRune(i.str[pos:]) } @@ -312,7 +312,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int { } func (i *inputBytes) context(pos int) syntax.EmptyOp { - r1, r2 := -1, -1 + r1, r2 := endOfText, endOfText if pos > 0 && pos <= len(i.str) { r1, _ = utf8.DecodeLastRune(i.str[:pos]) } @@ -333,7 +333,7 @@ func newInputReader(r io.RuneReader) *inputReader { return &inputReader{r: r} } -func (i *inputReader) step(pos int) (int, int) { +func (i *inputReader) step(pos int) (rune, int) { if !i.atEOT && pos != i.pos { return endOfText, 0 diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go index c415d39a57e..c90de3fe99d 100644 --- a/src/pkg/regexp/syntax/compile.go +++ b/src/pkg/regexp/syntax/compile.go @@ -91,8 +91,8 @@ func (c *compiler) init() { c.inst(InstFail) } -var anyRuneNotNL = []int{0, '\n' - 1, '\n' + 1, unicode.MaxRune} -var anyRune = []int{0, unicode.MaxRune} +var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune} +var anyRune = []rune{0, unicode.MaxRune} func (c *compiler) compile(re *Regexp) frag { switch re.Op { @@ -262,12 +262,12 @@ func (c *compiler) empty(op EmptyOp) frag { return f } -func (c *compiler) rune(rune []int, flags Flags) frag { +func (c *compiler) rune(r []rune, flags Flags) frag { f := c.inst(InstRune) i := &c.p.Inst[f.i] - i.Rune = rune + i.Rune = r flags &= FoldCase // only relevant flag is FoldCase - if len(rune) != 1 || unicode.SimpleFold(rune[0]) == rune[0] { + if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] { // and sometimes not even that flags &^= FoldCase } @@ -276,11 +276,11 @@ func (c *compiler) rune(rune []int, flags Flags) frag { // Special cases for exec machine. switch { - case flags&FoldCase == 0 && (len(rune) == 1 || len(rune) == 2 && rune[0] == rune[1]): + case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]): i.Op = InstRune1 - case len(rune) == 2 && rune[0] == 0 && rune[1] == unicode.MaxRune: + case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune: i.Op = InstRuneAny - case len(rune) == 4 && rune[0] == 0 && rune[1] == '\n'-1 && rune[2] == '\n'+1 && rune[3] == unicode.MaxRune: + case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune: i.Op = InstRuneAnyNotNL } diff --git a/src/pkg/regexp/syntax/make_perl_groups.pl b/src/pkg/regexp/syntax/make_perl_groups.pl index 6d1b84b1003..d024f5090e9 100755 --- a/src/pkg/regexp/syntax/make_perl_groups.pl +++ b/src/pkg/regexp/syntax/make_perl_groups.pl @@ -57,7 +57,7 @@ sub ComputeClass($) { sub PrintClass($$@) { my ($cname, $name, @ranges) = @_; - print "var code$cname = []int{ /* $name */\n"; + print "var code$cname = []rune{ /* $name */\n"; for (my $i=0; $i<@ranges; $i++) { my @a = @{$ranges[$i]}; printf "\t0x%x, 0x%x,\n", $a[0], $a[1]; diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go index 1165db3e221..f5602628e70 100644 --- a/src/pkg/regexp/syntax/parse.go +++ b/src/pkg/regexp/syntax/parse.go @@ -82,7 +82,7 @@ type parser struct { free *Regexp numCap int // number of capturing groups seen wholeRegexp string - tmpClass []int // temporary char class work space + tmpClass []rune // temporary char class work space } func (p *parser) newRegexp(op Op) *Regexp { @@ -149,7 +149,7 @@ func (p *parser) push(re *Regexp) *Regexp { // If r >= 0 and there's a node left over, maybeConcat uses it // to push r with the given flags. // maybeConcat reports whether r was pushed. -func (p *parser) maybeConcat(r int, flags Flags) bool { +func (p *parser) maybeConcat(r rune, flags Flags) bool { n := len(p.stack) if n < 2 { return false @@ -178,7 +178,7 @@ func (p *parser) maybeConcat(r int, flags Flags) bool { } // newLiteral returns a new OpLiteral Regexp with the given flags -func (p *parser) newLiteral(r int, flags Flags) *Regexp { +func (p *parser) newLiteral(r rune, flags Flags) *Regexp { re := p.newRegexp(OpLiteral) re.Flags = flags if flags&FoldCase != 0 { @@ -190,7 +190,7 @@ func (p *parser) newLiteral(r int, flags Flags) *Regexp { } // minFoldRune returns the minimum rune fold-equivalent to r. -func minFoldRune(r int) int { +func minFoldRune(r rune) rune { if r < minFold || r > maxFold { return r } @@ -206,7 +206,7 @@ func minFoldRune(r int) int { // literal pushes a literal regexp for the rune r on the stack // and returns that regexp. -func (p *parser) literal(r int) { +func (p *parser) literal(r rune) { p.push(p.newLiteral(r, p.flags)) } @@ -369,7 +369,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { } // Round 1: Factor out common literal prefixes. - var str []int + var str []rune var strflags Flags start := 0 out := sub[:0] @@ -380,7 +380,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { // // Invariant: sub[start:i] consists of regexps that all begin // with str as modified by strflags. - var istr []int + var istr []rune var iflags Flags if i < len(sub) { istr, iflags = p.leadingString(sub[i]) @@ -543,7 +543,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { // leadingString returns the leading literal string that re begins with. // The string refers to storage in re or its children. -func (p *parser) leadingString(re *Regexp) ([]int, Flags) { +func (p *parser) leadingString(re *Regexp) ([]rune, Flags) { if re.Op == OpConcat && len(re.Sub) > 0 { re = re.Sub[0] } @@ -639,7 +639,7 @@ func literalRegexp(s string, flags Flags) *Regexp { for _, c := range s { if len(re.Rune) >= cap(re.Rune) { // string is too long to fit in Rune0. let Go handle it - re.Rune = []int(s) + re.Rune = []rune(s) break } re.Rune = append(re.Rune, c) @@ -662,7 +662,7 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) { var ( p parser err os.Error - c int + c rune op Op lastRepeat string min, max int @@ -935,7 +935,7 @@ func (p *parser) parsePerlFlags(s string) (rest string, err os.Error) { } // Non-capturing group. Might also twiddle Perl flags. - var c int + var c rune t = t[2:] // skip (? flags := p.flags sign := +1 @@ -1049,7 +1049,7 @@ func isCharClass(re *Regexp) bool { } // does re match r? -func matchRune(re *Regexp, r int) bool { +func matchRune(re *Regexp, r rune) bool { switch re.Op { case OpLiteral: return len(re.Rune) == 1 && re.Rune[0] == r @@ -1186,7 +1186,7 @@ func (p *parser) parseRightParen() os.Error { // parseEscape parses an escape sequence at the beginning of s // and returns the rune. -func (p *parser) parseEscape(s string) (r int, rest string, err os.Error) { +func (p *parser) parseEscape(s string) (r rune, rest string, err os.Error) { t := s[1:] if t == "" { return 0, "", &Error{ErrTrailingBackslash, ""} @@ -1221,7 +1221,7 @@ Switch: if t == "" || t[0] < '0' || t[0] > '7' { break } - r = r*8 + int(t[0]) - '0' + r = r*8 + rune(t[0]) - '0' t = t[1:] } return r, t, nil @@ -1302,7 +1302,7 @@ Switch: // parseClassChar parses a character class character at the beginning of s // and returns it. -func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) { +func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err os.Error) { if s == "" { return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass} } @@ -1318,13 +1318,13 @@ func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err o type charGroup struct { sign int - class []int + class []rune } // parsePerlClassEscape parses a leading Perl character class escape like \d // from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. -func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string) { +func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) { if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' { return } @@ -1338,7 +1338,7 @@ func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string // parseNamedClass parses a leading POSIX named character class like [:alnum:] // from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. -func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err os.Error) { +func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err os.Error) { if len(s) < 2 || s[0] != '[' || s[1] != ':' { return } @@ -1356,7 +1356,7 @@ func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err return p.appendGroup(r, g), s, nil } -func (p *parser) appendGroup(r []int, g charGroup) []int { +func (p *parser) appendGroup(r []rune, g charGroup) []rune { if p.flags&FoldCase == 0 { if g.sign < 0 { r = appendNegatedClass(r, g.class) @@ -1401,7 +1401,7 @@ func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) { // parseUnicodeClass parses a leading Unicode character class like \p{Han} // from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. -func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, err os.Error) { +func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err os.Error) { if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' { return } @@ -1533,7 +1533,7 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) { // Single character or simple range. rng := t - var lo, hi int + var lo, hi rune if lo, t, err = p.parseClassChar(t, s); err != nil { return "", err } @@ -1570,7 +1570,7 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) { // cleanClass sorts the ranges (pairs of elements of r), // merges them, and eliminates duplicates. -func cleanClass(rp *[]int) []int { +func cleanClass(rp *[]rune) []rune { // Sort by lo increasing, hi decreasing to break ties. sort.Sort(ranges{rp}) @@ -1601,7 +1601,7 @@ func cleanClass(rp *[]int) []int { } // appendLiteral returns the result of appending the literal x to the class r. -func appendLiteral(r []int, x int, flags Flags) []int { +func appendLiteral(r []rune, x rune, flags Flags) []rune { if flags&FoldCase != 0 { return appendFoldedRange(r, x, x) } @@ -1609,7 +1609,7 @@ func appendLiteral(r []int, x int, flags Flags) []int { } // appendRange returns the result of appending the range lo-hi to the class r. -func appendRange(r []int, lo, hi int) []int { +func appendRange(r []rune, lo, hi rune) []rune { // Expand last range or next to last range if it overlaps or abuts. // Checking two ranges helps when appending case-folded // alphabets, so that one range can be expanding A-Z and the @@ -1642,7 +1642,7 @@ const ( // appendFoldedRange returns the result of appending the range lo-hi // and its case folding-equivalent runes to the class r. -func appendFoldedRange(r []int, lo, hi int) []int { +func appendFoldedRange(r []rune, lo, hi rune) []rune { // Optimizations. if lo <= minFold && hi >= maxFold { // Range is full: folding can't add more. @@ -1677,7 +1677,7 @@ func appendFoldedRange(r []int, lo, hi int) []int { // appendClass returns the result of appending the class x to the class r. // It assume x is clean. -func appendClass(r []int, x []int) []int { +func appendClass(r []rune, x []rune) []rune { for i := 0; i < len(x); i += 2 { r = appendRange(r, x[i], x[i+1]) } @@ -1685,7 +1685,7 @@ func appendClass(r []int, x []int) []int { } // appendFolded returns the result of appending the case folding of the class x to the class r. -func appendFoldedClass(r []int, x []int) []int { +func appendFoldedClass(r []rune, x []rune) []rune { for i := 0; i < len(x); i += 2 { r = appendFoldedRange(r, x[i], x[i+1]) } @@ -1694,8 +1694,8 @@ func appendFoldedClass(r []int, x []int) []int { // appendNegatedClass returns the result of appending the negation of the class x to the class r. // It assumes x is clean. -func appendNegatedClass(r []int, x []int) []int { - nextLo := 0 +func appendNegatedClass(r []rune, x []rune) []rune { + nextLo := rune('\u0000') for i := 0; i < len(x); i += 2 { lo, hi := x[i], x[i+1] if nextLo <= lo-1 { @@ -1710,9 +1710,9 @@ func appendNegatedClass(r []int, x []int) []int { } // appendTable returns the result of appending x to the class r. -func appendTable(r []int, x *unicode.RangeTable) []int { +func appendTable(r []rune, x *unicode.RangeTable) []rune { for _, xr := range x.R16 { - lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride) + lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) if stride == 1 { r = appendRange(r, lo, hi) continue @@ -1722,7 +1722,7 @@ func appendTable(r []int, x *unicode.RangeTable) []int { } } for _, xr := range x.R32 { - lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride) + lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) if stride == 1 { r = appendRange(r, lo, hi) continue @@ -1735,10 +1735,10 @@ func appendTable(r []int, x *unicode.RangeTable) []int { } // appendNegatedTable returns the result of appending the negation of x to the class r. -func appendNegatedTable(r []int, x *unicode.RangeTable) []int { - nextLo := 0 // lo end of next class to add +func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune { + nextLo := rune('\u0000') // lo end of next class to add for _, xr := range x.R16 { - lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride) + lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) if stride == 1 { if nextLo <= lo-1 { r = appendRange(r, nextLo, lo-1) @@ -1754,7 +1754,7 @@ func appendNegatedTable(r []int, x *unicode.RangeTable) []int { } } for _, xr := range x.R32 { - lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride) + lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride) if stride == 1 { if nextLo <= lo-1 { r = appendRange(r, nextLo, lo-1) @@ -1777,9 +1777,9 @@ func appendNegatedTable(r []int, x *unicode.RangeTable) []int { // negateClass overwrites r and returns r's negation. // It assumes the class r is already clean. -func negateClass(r []int) []int { - nextLo := 0 // lo end of next class to add - w := 0 // write index +func negateClass(r []rune) []rune { + nextLo := rune('\u0000') // lo end of next class to add + w := 0 // write index for i := 0; i < len(r); i += 2 { lo, hi := r[i], r[i+1] if nextLo <= lo-1 { @@ -1801,9 +1801,9 @@ func negateClass(r []int) []int { // ranges implements sort.Interface on a []rune. // The choice of receiver type definition is strange // but avoids an allocation since we already have -// a *[]int. +// a *[]rune. type ranges struct { - p *[]int + p *[]rune } func (ra ranges) Less(i, j int) bool { @@ -1835,7 +1835,7 @@ func checkUTF8(s string) os.Error { return nil } -func nextRune(s string) (c int, t string, err os.Error) { +func nextRune(s string) (c rune, t string, err os.Error) { c, size := utf8.DecodeRuneInString(s) if c == utf8.RuneError && size == 1 { return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} @@ -1843,11 +1843,11 @@ func nextRune(s string) (c int, t string, err os.Error) { return c, s[size:], nil } -func isalnum(c int) bool { +func isalnum(c rune) bool { return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' } -func unhex(c int) int { +func unhex(c rune) rune { if '0' <= c && c <= '9' { return c - '0' } diff --git a/src/pkg/regexp/syntax/parse_test.go b/src/pkg/regexp/syntax/parse_test.go index f20276c59ab..c6e63392c9c 100644 --- a/src/pkg/regexp/syntax/parse_test.go +++ b/src/pkg/regexp/syntax/parse_test.go @@ -371,10 +371,10 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) { b.WriteByte('}') } -func mkCharClass(f func(int) bool) string { +func mkCharClass(f func(rune) bool) string { re := &Regexp{Op: OpCharClass} - lo := -1 - for i := 0; i <= unicode.MaxRune; i++ { + lo := rune(-1) + for i := rune(0); i <= unicode.MaxRune; i++ { if f(i) { if lo < 0 { lo = i @@ -392,12 +392,12 @@ func mkCharClass(f func(int) bool) string { return dump(re) } -func isUpperFold(rune int) bool { - if unicode.IsUpper(rune) { +func isUpperFold(r rune) bool { + if unicode.IsUpper(r) { return true } - c := unicode.SimpleFold(rune) - for c != rune { + c := unicode.SimpleFold(r) + for c != r { if unicode.IsUpper(c) { return true } @@ -407,8 +407,8 @@ func isUpperFold(rune int) bool { } func TestFoldConstants(t *testing.T) { - last := -1 - for i := 0; i <= unicode.MaxRune; i++ { + last := rune(-1) + for i := rune(0); i <= unicode.MaxRune; i++ { if unicode.SimpleFold(i) == i { continue } @@ -427,8 +427,8 @@ func TestAppendRangeCollapse(t *testing.T) { // into the earlier ones (it looks back two ranges), so that // the slice never grows very large. // Note that we are not calling cleanClass. - var r []int - for i := 'A'; i <= 'Z'; i++ { + var r []rune + for i := rune('A'); i <= 'Z'; i++ { r = appendRange(r, i, i) r = appendRange(r, i+'a'-'A', i+'a'-'A') } diff --git a/src/pkg/regexp/syntax/perl_groups.go b/src/pkg/regexp/syntax/perl_groups.go index 05b392c40d8..1a11ca62f0c 100644 --- a/src/pkg/regexp/syntax/perl_groups.go +++ b/src/pkg/regexp/syntax/perl_groups.go @@ -3,17 +3,17 @@ package syntax -var code1 = []int{ /* \d */ +var code1 = []rune{ /* \d */ 0x30, 0x39, } -var code2 = []int{ /* \s */ +var code2 = []rune{ /* \s */ 0x9, 0xa, 0xc, 0xd, 0x20, 0x20, } -var code3 = []int{ /* \w */ +var code3 = []rune{ /* \w */ 0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, @@ -28,71 +28,71 @@ var perlGroup = map[string]charGroup{ `\w`: {+1, code3}, `\W`: {-1, code3}, } -var code4 = []int{ /* [:alnum:] */ +var code4 = []rune{ /* [:alnum:] */ 0x30, 0x39, 0x41, 0x5a, 0x61, 0x7a, } -var code5 = []int{ /* [:alpha:] */ +var code5 = []rune{ /* [:alpha:] */ 0x41, 0x5a, 0x61, 0x7a, } -var code6 = []int{ /* [:ascii:] */ +var code6 = []rune{ /* [:ascii:] */ 0x0, 0x7f, } -var code7 = []int{ /* [:blank:] */ +var code7 = []rune{ /* [:blank:] */ 0x9, 0x9, 0x20, 0x20, } -var code8 = []int{ /* [:cntrl:] */ +var code8 = []rune{ /* [:cntrl:] */ 0x0, 0x1f, 0x7f, 0x7f, } -var code9 = []int{ /* [:digit:] */ +var code9 = []rune{ /* [:digit:] */ 0x30, 0x39, } -var code10 = []int{ /* [:graph:] */ +var code10 = []rune{ /* [:graph:] */ 0x21, 0x7e, } -var code11 = []int{ /* [:lower:] */ +var code11 = []rune{ /* [:lower:] */ 0x61, 0x7a, } -var code12 = []int{ /* [:print:] */ +var code12 = []rune{ /* [:print:] */ 0x20, 0x7e, } -var code13 = []int{ /* [:punct:] */ +var code13 = []rune{ /* [:punct:] */ 0x21, 0x2f, 0x3a, 0x40, 0x5b, 0x60, 0x7b, 0x7e, } -var code14 = []int{ /* [:space:] */ +var code14 = []rune{ /* [:space:] */ 0x9, 0xd, 0x20, 0x20, } -var code15 = []int{ /* [:upper:] */ +var code15 = []rune{ /* [:upper:] */ 0x41, 0x5a, } -var code16 = []int{ /* [:word:] */ +var code16 = []rune{ /* [:word:] */ 0x30, 0x39, 0x41, 0x5a, 0x5f, 0x5f, 0x61, 0x7a, } -var code17 = []int{ /* [:xdigit:] */ +var code17 = []rune{ /* [:xdigit:] */ 0x30, 0x39, 0x41, 0x46, 0x61, 0x66, diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go index ced45da077b..f5b697a59ae 100644 --- a/src/pkg/regexp/syntax/prog.go +++ b/src/pkg/regexp/syntax/prog.go @@ -51,7 +51,7 @@ const ( // at the beginning of the text. // Passing r2 == -1 indicates that the position is // at the end of the text. -func EmptyOpContext(r1, r2 int) EmptyOp { +func EmptyOpContext(r1, r2 rune) EmptyOp { var op EmptyOp if r1 < 0 { op |= EmptyBeginText | EmptyBeginLine @@ -76,7 +76,7 @@ func EmptyOpContext(r1, r2 int) EmptyOp { // IsWordChar reports whether r is consider a ``word character'' // during the evaluation of the \b and \B zero-width assertions. // These assertions are ASCII-only: the word characters are [A-Za-z0-9_]. -func IsWordChar(r int) bool { +func IsWordChar(r rune) bool { return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_' } @@ -85,7 +85,7 @@ type Inst struct { Op InstOp Out uint32 // all but InstMatch, InstFail Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth - Rune []int + Rune []rune } func (p *Prog) String() string { @@ -161,7 +161,7 @@ Loop: // MatchRune returns true if the instruction matches (and consumes) r. // It should only be called when i.Op == InstRune. -func (i *Inst) MatchRune(r int) bool { +func (i *Inst) MatchRune(r rune) bool { rune := i.Rune // Special case: single-rune slice is from literal string, not char class. @@ -210,17 +210,17 @@ func (i *Inst) MatchRune(r int) bool { // As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char. // Since we act on runes, it would be easy to support Unicode here. -func wordRune(rune int) bool { - return rune == '_' || - ('A' <= rune && rune <= 'Z') || - ('a' <= rune && rune <= 'z') || - ('0' <= rune && rune <= '9') +func wordRune(r rune) bool { + return r == '_' || + ('A' <= r && r <= 'Z') || + ('a' <= r && r <= 'z') || + ('0' <= r && r <= '9') } // MatchEmptyWidth returns true if the instruction matches // an empty string between the runes before and after. // It should only be called when i.Op == InstEmptyWidth. -func (i *Inst) MatchEmptyWidth(before int, after int) bool { +func (i *Inst) MatchEmptyWidth(before rune, after rune) bool { switch EmptyOp(i.Arg) { case EmptyBeginLine: return before == '\n' || before == -1 diff --git a/src/pkg/regexp/syntax/regexp.go b/src/pkg/regexp/syntax/regexp.go index 033848df28a..b5ddab1d16b 100644 --- a/src/pkg/regexp/syntax/regexp.go +++ b/src/pkg/regexp/syntax/regexp.go @@ -22,8 +22,8 @@ type Regexp struct { Flags Flags Sub []*Regexp // subexpressions, if any Sub0 [1]*Regexp // storage for short Sub - Rune []int // matched runes, for OpLiteral, OpCharClass - Rune0 [2]int // storage for short Rune + Rune []rune // matched runes, for OpLiteral, OpCharClass + Rune0 [2]rune // storage for short Rune Min, Max int // min, max for OpRepeat Cap int // capturing index, for OpCapture Name string // capturing name, for OpCapture @@ -252,7 +252,7 @@ func (re *Regexp) String() string { const meta = `\.+*?()|[]{}^$` -func escape(b *bytes.Buffer, r int, force bool) { +func escape(b *bytes.Buffer, r rune, force bool) { if unicode.IsPrint(r) { if strings.IndexRune(meta, r) >= 0 || force { b.WriteRune('\\') @@ -277,7 +277,7 @@ func escape(b *bytes.Buffer, r int, force bool) { default: if r < 0x100 { b.WriteString(`\x`) - s := strconv.Itob(r, 16) + s := strconv.Itob(int(r), 16) if len(s) == 1 { b.WriteRune('0') } @@ -285,7 +285,7 @@ func escape(b *bytes.Buffer, r int, force bool) { break } b.WriteString(`\x{`) - b.WriteString(strconv.Itob(r, 16)) + b.WriteString(strconv.Itob(int(r), 16)) b.WriteString(`}`) } }