mirror of
https://github.com/golang/go
synced 2024-11-24 22:57:57 -07:00
regexp: use rune
Public API of syntax tree changes. R=golang-dev, r, gri CC=golang-dev https://golang.org/cl/5302046
This commit is contained in:
parent
81b014818c
commit
3e52dadfd7
@ -90,15 +90,15 @@ func (m *machine) match(i input, pos int) bool {
|
|||||||
m.matchcap[i] = -1
|
m.matchcap[i] = -1
|
||||||
}
|
}
|
||||||
runq, nextq := &m.q0, &m.q1
|
runq, nextq := &m.q0, &m.q1
|
||||||
rune, rune1 := endOfText, endOfText
|
r, r1 := endOfText, endOfText
|
||||||
width, width1 := 0, 0
|
width, width1 := 0, 0
|
||||||
rune, width = i.step(pos)
|
r, width = i.step(pos)
|
||||||
if rune != endOfText {
|
if r != endOfText {
|
||||||
rune1, width1 = i.step(pos + width)
|
r1, width1 = i.step(pos + width)
|
||||||
}
|
}
|
||||||
var flag syntax.EmptyOp
|
var flag syntax.EmptyOp
|
||||||
if pos == 0 {
|
if pos == 0 {
|
||||||
flag = syntax.EmptyOpContext(-1, rune)
|
flag = syntax.EmptyOpContext(-1, r)
|
||||||
} else {
|
} else {
|
||||||
flag = i.context(pos)
|
flag = i.context(pos)
|
||||||
}
|
}
|
||||||
@ -112,15 +112,15 @@ func (m *machine) match(i input, pos int) bool {
|
|||||||
// Have match; finished exploring alternatives.
|
// Have match; finished exploring alternatives.
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if len(m.re.prefix) > 0 && rune1 != m.re.prefixRune && i.canCheckPrefix() {
|
if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() {
|
||||||
// Match requires literal prefix; fast search for it.
|
// Match requires literal prefix; fast search for it.
|
||||||
advance := i.index(m.re, pos)
|
advance := i.index(m.re, pos)
|
||||||
if advance < 0 {
|
if advance < 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
pos += advance
|
pos += advance
|
||||||
rune, width = i.step(pos)
|
r, width = i.step(pos)
|
||||||
rune1, width1 = i.step(pos + width)
|
r1, width1 = i.step(pos + width)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !m.matched {
|
if !m.matched {
|
||||||
@ -129,8 +129,8 @@ func (m *machine) match(i input, pos int) bool {
|
|||||||
}
|
}
|
||||||
m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil)
|
m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil)
|
||||||
}
|
}
|
||||||
flag = syntax.EmptyOpContext(rune, rune1)
|
flag = syntax.EmptyOpContext(r, r1)
|
||||||
m.step(runq, nextq, pos, pos+width, rune, flag)
|
m.step(runq, nextq, pos, pos+width, r, flag)
|
||||||
if width == 0 {
|
if width == 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
@ -140,9 +140,9 @@ func (m *machine) match(i input, pos int) bool {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
pos += width
|
pos += width
|
||||||
rune, width = rune1, width1
|
r, width = r1, width1
|
||||||
if rune != endOfText {
|
if r != endOfText {
|
||||||
rune1, width1 = i.step(pos + width)
|
r1, width1 = i.step(pos + width)
|
||||||
}
|
}
|
||||||
runq, nextq = nextq, runq
|
runq, nextq = nextq, runq
|
||||||
}
|
}
|
||||||
@ -166,7 +166,7 @@ func (m *machine) clear(q *queue) {
|
|||||||
// The step processes the rune c (which may be endOfText),
|
// The step processes the rune c (which may be endOfText),
|
||||||
// which starts at position pos and ends at nextPos.
|
// which starts at position pos and ends at nextPos.
|
||||||
// nextCond gives the setting for the empty-width flags after c.
|
// nextCond gives the setting for the empty-width flags after c.
|
||||||
func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) {
|
func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) {
|
||||||
longest := m.re.longest
|
longest := m.re.longest
|
||||||
for j := 0; j < len(runq.dense); j++ {
|
for j := 0; j < len(runq.dense); j++ {
|
||||||
d := &runq.dense[j]
|
d := &runq.dense[j]
|
||||||
|
@ -83,7 +83,7 @@ type Regexp struct {
|
|||||||
prefix string // required prefix in unanchored matches
|
prefix string // required prefix in unanchored matches
|
||||||
prefixBytes []byte // prefix, as a []byte
|
prefixBytes []byte // prefix, as a []byte
|
||||||
prefixComplete bool // prefix is the entire regexp
|
prefixComplete bool // prefix is the entire regexp
|
||||||
prefixRune int // first rune in prefix
|
prefixRune rune // first rune in prefix
|
||||||
cond syntax.EmptyOp // empty-width conditions required at start of match
|
cond syntax.EmptyOp // empty-width conditions required at start of match
|
||||||
numSubexp int
|
numSubexp int
|
||||||
longest bool
|
longest bool
|
||||||
@ -224,13 +224,13 @@ func (re *Regexp) NumSubexp() int {
|
|||||||
return re.numSubexp
|
return re.numSubexp
|
||||||
}
|
}
|
||||||
|
|
||||||
const endOfText = -1
|
const endOfText rune = -1
|
||||||
|
|
||||||
// input abstracts different representations of the input text. It provides
|
// input abstracts different representations of the input text. It provides
|
||||||
// one-character lookahead.
|
// one-character lookahead.
|
||||||
type input interface {
|
type input interface {
|
||||||
step(pos int) (rune int, width int) // advance one rune
|
step(pos int) (r rune, width int) // advance one rune
|
||||||
canCheckPrefix() bool // can we look ahead without losing info?
|
canCheckPrefix() bool // can we look ahead without losing info?
|
||||||
hasPrefix(re *Regexp) bool
|
hasPrefix(re *Regexp) bool
|
||||||
index(re *Regexp, pos int) int
|
index(re *Regexp, pos int) int
|
||||||
context(pos int) syntax.EmptyOp
|
context(pos int) syntax.EmptyOp
|
||||||
@ -245,11 +245,11 @@ func newInputString(str string) *inputString {
|
|||||||
return &inputString{str: str}
|
return &inputString{str: str}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *inputString) step(pos int) (int, int) {
|
func (i *inputString) step(pos int) (rune, int) {
|
||||||
if pos < len(i.str) {
|
if pos < len(i.str) {
|
||||||
c := i.str[pos]
|
c := i.str[pos]
|
||||||
if c < utf8.RuneSelf {
|
if c < utf8.RuneSelf {
|
||||||
return int(c), 1
|
return rune(c), 1
|
||||||
}
|
}
|
||||||
return utf8.DecodeRuneInString(i.str[pos:])
|
return utf8.DecodeRuneInString(i.str[pos:])
|
||||||
}
|
}
|
||||||
@ -269,7 +269,7 @@ func (i *inputString) index(re *Regexp, pos int) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (i *inputString) context(pos int) syntax.EmptyOp {
|
func (i *inputString) context(pos int) syntax.EmptyOp {
|
||||||
r1, r2 := -1, -1
|
r1, r2 := endOfText, endOfText
|
||||||
if pos > 0 && pos <= len(i.str) {
|
if pos > 0 && pos <= len(i.str) {
|
||||||
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
|
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
|
||||||
}
|
}
|
||||||
@ -288,11 +288,11 @@ func newInputBytes(str []byte) *inputBytes {
|
|||||||
return &inputBytes{str: str}
|
return &inputBytes{str: str}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *inputBytes) step(pos int) (int, int) {
|
func (i *inputBytes) step(pos int) (rune, int) {
|
||||||
if pos < len(i.str) {
|
if pos < len(i.str) {
|
||||||
c := i.str[pos]
|
c := i.str[pos]
|
||||||
if c < utf8.RuneSelf {
|
if c < utf8.RuneSelf {
|
||||||
return int(c), 1
|
return rune(c), 1
|
||||||
}
|
}
|
||||||
return utf8.DecodeRune(i.str[pos:])
|
return utf8.DecodeRune(i.str[pos:])
|
||||||
}
|
}
|
||||||
@ -312,7 +312,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (i *inputBytes) context(pos int) syntax.EmptyOp {
|
func (i *inputBytes) context(pos int) syntax.EmptyOp {
|
||||||
r1, r2 := -1, -1
|
r1, r2 := endOfText, endOfText
|
||||||
if pos > 0 && pos <= len(i.str) {
|
if pos > 0 && pos <= len(i.str) {
|
||||||
r1, _ = utf8.DecodeLastRune(i.str[:pos])
|
r1, _ = utf8.DecodeLastRune(i.str[:pos])
|
||||||
}
|
}
|
||||||
@ -333,7 +333,7 @@ func newInputReader(r io.RuneReader) *inputReader {
|
|||||||
return &inputReader{r: r}
|
return &inputReader{r: r}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *inputReader) step(pos int) (int, int) {
|
func (i *inputReader) step(pos int) (rune, int) {
|
||||||
if !i.atEOT && pos != i.pos {
|
if !i.atEOT && pos != i.pos {
|
||||||
return endOfText, 0
|
return endOfText, 0
|
||||||
|
|
||||||
|
@ -91,8 +91,8 @@ func (c *compiler) init() {
|
|||||||
c.inst(InstFail)
|
c.inst(InstFail)
|
||||||
}
|
}
|
||||||
|
|
||||||
var anyRuneNotNL = []int{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
|
var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
|
||||||
var anyRune = []int{0, unicode.MaxRune}
|
var anyRune = []rune{0, unicode.MaxRune}
|
||||||
|
|
||||||
func (c *compiler) compile(re *Regexp) frag {
|
func (c *compiler) compile(re *Regexp) frag {
|
||||||
switch re.Op {
|
switch re.Op {
|
||||||
@ -262,12 +262,12 @@ func (c *compiler) empty(op EmptyOp) frag {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *compiler) rune(rune []int, flags Flags) frag {
|
func (c *compiler) rune(r []rune, flags Flags) frag {
|
||||||
f := c.inst(InstRune)
|
f := c.inst(InstRune)
|
||||||
i := &c.p.Inst[f.i]
|
i := &c.p.Inst[f.i]
|
||||||
i.Rune = rune
|
i.Rune = r
|
||||||
flags &= FoldCase // only relevant flag is FoldCase
|
flags &= FoldCase // only relevant flag is FoldCase
|
||||||
if len(rune) != 1 || unicode.SimpleFold(rune[0]) == rune[0] {
|
if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] {
|
||||||
// and sometimes not even that
|
// and sometimes not even that
|
||||||
flags &^= FoldCase
|
flags &^= FoldCase
|
||||||
}
|
}
|
||||||
@ -276,11 +276,11 @@ func (c *compiler) rune(rune []int, flags Flags) frag {
|
|||||||
|
|
||||||
// Special cases for exec machine.
|
// Special cases for exec machine.
|
||||||
switch {
|
switch {
|
||||||
case flags&FoldCase == 0 && (len(rune) == 1 || len(rune) == 2 && rune[0] == rune[1]):
|
case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]):
|
||||||
i.Op = InstRune1
|
i.Op = InstRune1
|
||||||
case len(rune) == 2 && rune[0] == 0 && rune[1] == unicode.MaxRune:
|
case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune:
|
||||||
i.Op = InstRuneAny
|
i.Op = InstRuneAny
|
||||||
case len(rune) == 4 && rune[0] == 0 && rune[1] == '\n'-1 && rune[2] == '\n'+1 && rune[3] == unicode.MaxRune:
|
case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune:
|
||||||
i.Op = InstRuneAnyNotNL
|
i.Op = InstRuneAnyNotNL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ sub ComputeClass($) {
|
|||||||
|
|
||||||
sub PrintClass($$@) {
|
sub PrintClass($$@) {
|
||||||
my ($cname, $name, @ranges) = @_;
|
my ($cname, $name, @ranges) = @_;
|
||||||
print "var code$cname = []int{ /* $name */\n";
|
print "var code$cname = []rune{ /* $name */\n";
|
||||||
for (my $i=0; $i<@ranges; $i++) {
|
for (my $i=0; $i<@ranges; $i++) {
|
||||||
my @a = @{$ranges[$i]};
|
my @a = @{$ranges[$i]};
|
||||||
printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
|
printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
|
||||||
|
@ -82,7 +82,7 @@ type parser struct {
|
|||||||
free *Regexp
|
free *Regexp
|
||||||
numCap int // number of capturing groups seen
|
numCap int // number of capturing groups seen
|
||||||
wholeRegexp string
|
wholeRegexp string
|
||||||
tmpClass []int // temporary char class work space
|
tmpClass []rune // temporary char class work space
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) newRegexp(op Op) *Regexp {
|
func (p *parser) newRegexp(op Op) *Regexp {
|
||||||
@ -149,7 +149,7 @@ func (p *parser) push(re *Regexp) *Regexp {
|
|||||||
// If r >= 0 and there's a node left over, maybeConcat uses it
|
// If r >= 0 and there's a node left over, maybeConcat uses it
|
||||||
// to push r with the given flags.
|
// to push r with the given flags.
|
||||||
// maybeConcat reports whether r was pushed.
|
// maybeConcat reports whether r was pushed.
|
||||||
func (p *parser) maybeConcat(r int, flags Flags) bool {
|
func (p *parser) maybeConcat(r rune, flags Flags) bool {
|
||||||
n := len(p.stack)
|
n := len(p.stack)
|
||||||
if n < 2 {
|
if n < 2 {
|
||||||
return false
|
return false
|
||||||
@ -178,7 +178,7 @@ func (p *parser) maybeConcat(r int, flags Flags) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// newLiteral returns a new OpLiteral Regexp with the given flags
|
// newLiteral returns a new OpLiteral Regexp with the given flags
|
||||||
func (p *parser) newLiteral(r int, flags Flags) *Regexp {
|
func (p *parser) newLiteral(r rune, flags Flags) *Regexp {
|
||||||
re := p.newRegexp(OpLiteral)
|
re := p.newRegexp(OpLiteral)
|
||||||
re.Flags = flags
|
re.Flags = flags
|
||||||
if flags&FoldCase != 0 {
|
if flags&FoldCase != 0 {
|
||||||
@ -190,7 +190,7 @@ func (p *parser) newLiteral(r int, flags Flags) *Regexp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// minFoldRune returns the minimum rune fold-equivalent to r.
|
// minFoldRune returns the minimum rune fold-equivalent to r.
|
||||||
func minFoldRune(r int) int {
|
func minFoldRune(r rune) rune {
|
||||||
if r < minFold || r > maxFold {
|
if r < minFold || r > maxFold {
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
@ -206,7 +206,7 @@ func minFoldRune(r int) int {
|
|||||||
|
|
||||||
// literal pushes a literal regexp for the rune r on the stack
|
// literal pushes a literal regexp for the rune r on the stack
|
||||||
// and returns that regexp.
|
// and returns that regexp.
|
||||||
func (p *parser) literal(r int) {
|
func (p *parser) literal(r rune) {
|
||||||
p.push(p.newLiteral(r, p.flags))
|
p.push(p.newLiteral(r, p.flags))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,7 +369,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Round 1: Factor out common literal prefixes.
|
// Round 1: Factor out common literal prefixes.
|
||||||
var str []int
|
var str []rune
|
||||||
var strflags Flags
|
var strflags Flags
|
||||||
start := 0
|
start := 0
|
||||||
out := sub[:0]
|
out := sub[:0]
|
||||||
@ -380,7 +380,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
|
|||||||
//
|
//
|
||||||
// Invariant: sub[start:i] consists of regexps that all begin
|
// Invariant: sub[start:i] consists of regexps that all begin
|
||||||
// with str as modified by strflags.
|
// with str as modified by strflags.
|
||||||
var istr []int
|
var istr []rune
|
||||||
var iflags Flags
|
var iflags Flags
|
||||||
if i < len(sub) {
|
if i < len(sub) {
|
||||||
istr, iflags = p.leadingString(sub[i])
|
istr, iflags = p.leadingString(sub[i])
|
||||||
@ -543,7 +543,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
|
|||||||
|
|
||||||
// leadingString returns the leading literal string that re begins with.
|
// leadingString returns the leading literal string that re begins with.
|
||||||
// The string refers to storage in re or its children.
|
// The string refers to storage in re or its children.
|
||||||
func (p *parser) leadingString(re *Regexp) ([]int, Flags) {
|
func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
|
||||||
if re.Op == OpConcat && len(re.Sub) > 0 {
|
if re.Op == OpConcat && len(re.Sub) > 0 {
|
||||||
re = re.Sub[0]
|
re = re.Sub[0]
|
||||||
}
|
}
|
||||||
@ -639,7 +639,7 @@ func literalRegexp(s string, flags Flags) *Regexp {
|
|||||||
for _, c := range s {
|
for _, c := range s {
|
||||||
if len(re.Rune) >= cap(re.Rune) {
|
if len(re.Rune) >= cap(re.Rune) {
|
||||||
// string is too long to fit in Rune0. let Go handle it
|
// string is too long to fit in Rune0. let Go handle it
|
||||||
re.Rune = []int(s)
|
re.Rune = []rune(s)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
re.Rune = append(re.Rune, c)
|
re.Rune = append(re.Rune, c)
|
||||||
@ -662,7 +662,7 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
|
|||||||
var (
|
var (
|
||||||
p parser
|
p parser
|
||||||
err os.Error
|
err os.Error
|
||||||
c int
|
c rune
|
||||||
op Op
|
op Op
|
||||||
lastRepeat string
|
lastRepeat string
|
||||||
min, max int
|
min, max int
|
||||||
@ -935,7 +935,7 @@ func (p *parser) parsePerlFlags(s string) (rest string, err os.Error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Non-capturing group. Might also twiddle Perl flags.
|
// Non-capturing group. Might also twiddle Perl flags.
|
||||||
var c int
|
var c rune
|
||||||
t = t[2:] // skip (?
|
t = t[2:] // skip (?
|
||||||
flags := p.flags
|
flags := p.flags
|
||||||
sign := +1
|
sign := +1
|
||||||
@ -1049,7 +1049,7 @@ func isCharClass(re *Regexp) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// does re match r?
|
// does re match r?
|
||||||
func matchRune(re *Regexp, r int) bool {
|
func matchRune(re *Regexp, r rune) bool {
|
||||||
switch re.Op {
|
switch re.Op {
|
||||||
case OpLiteral:
|
case OpLiteral:
|
||||||
return len(re.Rune) == 1 && re.Rune[0] == r
|
return len(re.Rune) == 1 && re.Rune[0] == r
|
||||||
@ -1186,7 +1186,7 @@ func (p *parser) parseRightParen() os.Error {
|
|||||||
|
|
||||||
// parseEscape parses an escape sequence at the beginning of s
|
// parseEscape parses an escape sequence at the beginning of s
|
||||||
// and returns the rune.
|
// and returns the rune.
|
||||||
func (p *parser) parseEscape(s string) (r int, rest string, err os.Error) {
|
func (p *parser) parseEscape(s string) (r rune, rest string, err os.Error) {
|
||||||
t := s[1:]
|
t := s[1:]
|
||||||
if t == "" {
|
if t == "" {
|
||||||
return 0, "", &Error{ErrTrailingBackslash, ""}
|
return 0, "", &Error{ErrTrailingBackslash, ""}
|
||||||
@ -1221,7 +1221,7 @@ Switch:
|
|||||||
if t == "" || t[0] < '0' || t[0] > '7' {
|
if t == "" || t[0] < '0' || t[0] > '7' {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
r = r*8 + int(t[0]) - '0'
|
r = r*8 + rune(t[0]) - '0'
|
||||||
t = t[1:]
|
t = t[1:]
|
||||||
}
|
}
|
||||||
return r, t, nil
|
return r, t, nil
|
||||||
@ -1302,7 +1302,7 @@ Switch:
|
|||||||
|
|
||||||
// parseClassChar parses a character class character at the beginning of s
|
// parseClassChar parses a character class character at the beginning of s
|
||||||
// and returns it.
|
// and returns it.
|
||||||
func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) {
|
func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err os.Error) {
|
||||||
if s == "" {
|
if s == "" {
|
||||||
return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
|
return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
|
||||||
}
|
}
|
||||||
@ -1318,13 +1318,13 @@ func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err o
|
|||||||
|
|
||||||
type charGroup struct {
|
type charGroup struct {
|
||||||
sign int
|
sign int
|
||||||
class []int
|
class []rune
|
||||||
}
|
}
|
||||||
|
|
||||||
// parsePerlClassEscape parses a leading Perl character class escape like \d
|
// parsePerlClassEscape parses a leading Perl character class escape like \d
|
||||||
// from the beginning of s. If one is present, it appends the characters to r
|
// from the beginning of s. If one is present, it appends the characters to r
|
||||||
// and returns the new slice r and the remainder of the string.
|
// and returns the new slice r and the remainder of the string.
|
||||||
func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string) {
|
func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
|
||||||
if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
|
if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1338,7 +1338,7 @@ func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string
|
|||||||
// parseNamedClass parses a leading POSIX named character class like [:alnum:]
|
// parseNamedClass parses a leading POSIX named character class like [:alnum:]
|
||||||
// from the beginning of s. If one is present, it appends the characters to r
|
// from the beginning of s. If one is present, it appends the characters to r
|
||||||
// and returns the new slice r and the remainder of the string.
|
// and returns the new slice r and the remainder of the string.
|
||||||
func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err os.Error) {
|
func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err os.Error) {
|
||||||
if len(s) < 2 || s[0] != '[' || s[1] != ':' {
|
if len(s) < 2 || s[0] != '[' || s[1] != ':' {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1356,7 +1356,7 @@ func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err
|
|||||||
return p.appendGroup(r, g), s, nil
|
return p.appendGroup(r, g), s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) appendGroup(r []int, g charGroup) []int {
|
func (p *parser) appendGroup(r []rune, g charGroup) []rune {
|
||||||
if p.flags&FoldCase == 0 {
|
if p.flags&FoldCase == 0 {
|
||||||
if g.sign < 0 {
|
if g.sign < 0 {
|
||||||
r = appendNegatedClass(r, g.class)
|
r = appendNegatedClass(r, g.class)
|
||||||
@ -1401,7 +1401,7 @@ func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
|
|||||||
// parseUnicodeClass parses a leading Unicode character class like \p{Han}
|
// parseUnicodeClass parses a leading Unicode character class like \p{Han}
|
||||||
// from the beginning of s. If one is present, it appends the characters to r
|
// from the beginning of s. If one is present, it appends the characters to r
|
||||||
// and returns the new slice r and the remainder of the string.
|
// and returns the new slice r and the remainder of the string.
|
||||||
func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, err os.Error) {
|
func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err os.Error) {
|
||||||
if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
|
if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1533,7 +1533,7 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {
|
|||||||
|
|
||||||
// Single character or simple range.
|
// Single character or simple range.
|
||||||
rng := t
|
rng := t
|
||||||
var lo, hi int
|
var lo, hi rune
|
||||||
if lo, t, err = p.parseClassChar(t, s); err != nil {
|
if lo, t, err = p.parseClassChar(t, s); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@ -1570,7 +1570,7 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {
|
|||||||
|
|
||||||
// cleanClass sorts the ranges (pairs of elements of r),
|
// cleanClass sorts the ranges (pairs of elements of r),
|
||||||
// merges them, and eliminates duplicates.
|
// merges them, and eliminates duplicates.
|
||||||
func cleanClass(rp *[]int) []int {
|
func cleanClass(rp *[]rune) []rune {
|
||||||
|
|
||||||
// Sort by lo increasing, hi decreasing to break ties.
|
// Sort by lo increasing, hi decreasing to break ties.
|
||||||
sort.Sort(ranges{rp})
|
sort.Sort(ranges{rp})
|
||||||
@ -1601,7 +1601,7 @@ func cleanClass(rp *[]int) []int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// appendLiteral returns the result of appending the literal x to the class r.
|
// appendLiteral returns the result of appending the literal x to the class r.
|
||||||
func appendLiteral(r []int, x int, flags Flags) []int {
|
func appendLiteral(r []rune, x rune, flags Flags) []rune {
|
||||||
if flags&FoldCase != 0 {
|
if flags&FoldCase != 0 {
|
||||||
return appendFoldedRange(r, x, x)
|
return appendFoldedRange(r, x, x)
|
||||||
}
|
}
|
||||||
@ -1609,7 +1609,7 @@ func appendLiteral(r []int, x int, flags Flags) []int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// appendRange returns the result of appending the range lo-hi to the class r.
|
// appendRange returns the result of appending the range lo-hi to the class r.
|
||||||
func appendRange(r []int, lo, hi int) []int {
|
func appendRange(r []rune, lo, hi rune) []rune {
|
||||||
// Expand last range or next to last range if it overlaps or abuts.
|
// Expand last range or next to last range if it overlaps or abuts.
|
||||||
// Checking two ranges helps when appending case-folded
|
// Checking two ranges helps when appending case-folded
|
||||||
// alphabets, so that one range can be expanding A-Z and the
|
// alphabets, so that one range can be expanding A-Z and the
|
||||||
@ -1642,7 +1642,7 @@ const (
|
|||||||
|
|
||||||
// appendFoldedRange returns the result of appending the range lo-hi
|
// appendFoldedRange returns the result of appending the range lo-hi
|
||||||
// and its case folding-equivalent runes to the class r.
|
// and its case folding-equivalent runes to the class r.
|
||||||
func appendFoldedRange(r []int, lo, hi int) []int {
|
func appendFoldedRange(r []rune, lo, hi rune) []rune {
|
||||||
// Optimizations.
|
// Optimizations.
|
||||||
if lo <= minFold && hi >= maxFold {
|
if lo <= minFold && hi >= maxFold {
|
||||||
// Range is full: folding can't add more.
|
// Range is full: folding can't add more.
|
||||||
@ -1677,7 +1677,7 @@ func appendFoldedRange(r []int, lo, hi int) []int {
|
|||||||
|
|
||||||
// appendClass returns the result of appending the class x to the class r.
|
// appendClass returns the result of appending the class x to the class r.
|
||||||
// It assume x is clean.
|
// It assume x is clean.
|
||||||
func appendClass(r []int, x []int) []int {
|
func appendClass(r []rune, x []rune) []rune {
|
||||||
for i := 0; i < len(x); i += 2 {
|
for i := 0; i < len(x); i += 2 {
|
||||||
r = appendRange(r, x[i], x[i+1])
|
r = appendRange(r, x[i], x[i+1])
|
||||||
}
|
}
|
||||||
@ -1685,7 +1685,7 @@ func appendClass(r []int, x []int) []int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// appendFolded returns the result of appending the case folding of the class x to the class r.
|
// appendFolded returns the result of appending the case folding of the class x to the class r.
|
||||||
func appendFoldedClass(r []int, x []int) []int {
|
func appendFoldedClass(r []rune, x []rune) []rune {
|
||||||
for i := 0; i < len(x); i += 2 {
|
for i := 0; i < len(x); i += 2 {
|
||||||
r = appendFoldedRange(r, x[i], x[i+1])
|
r = appendFoldedRange(r, x[i], x[i+1])
|
||||||
}
|
}
|
||||||
@ -1694,8 +1694,8 @@ func appendFoldedClass(r []int, x []int) []int {
|
|||||||
|
|
||||||
// appendNegatedClass returns the result of appending the negation of the class x to the class r.
|
// appendNegatedClass returns the result of appending the negation of the class x to the class r.
|
||||||
// It assumes x is clean.
|
// It assumes x is clean.
|
||||||
func appendNegatedClass(r []int, x []int) []int {
|
func appendNegatedClass(r []rune, x []rune) []rune {
|
||||||
nextLo := 0
|
nextLo := rune('\u0000')
|
||||||
for i := 0; i < len(x); i += 2 {
|
for i := 0; i < len(x); i += 2 {
|
||||||
lo, hi := x[i], x[i+1]
|
lo, hi := x[i], x[i+1]
|
||||||
if nextLo <= lo-1 {
|
if nextLo <= lo-1 {
|
||||||
@ -1710,9 +1710,9 @@ func appendNegatedClass(r []int, x []int) []int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// appendTable returns the result of appending x to the class r.
|
// appendTable returns the result of appending x to the class r.
|
||||||
func appendTable(r []int, x *unicode.RangeTable) []int {
|
func appendTable(r []rune, x *unicode.RangeTable) []rune {
|
||||||
for _, xr := range x.R16 {
|
for _, xr := range x.R16 {
|
||||||
lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
|
lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
|
||||||
if stride == 1 {
|
if stride == 1 {
|
||||||
r = appendRange(r, lo, hi)
|
r = appendRange(r, lo, hi)
|
||||||
continue
|
continue
|
||||||
@ -1722,7 +1722,7 @@ func appendTable(r []int, x *unicode.RangeTable) []int {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, xr := range x.R32 {
|
for _, xr := range x.R32 {
|
||||||
lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
|
lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
|
||||||
if stride == 1 {
|
if stride == 1 {
|
||||||
r = appendRange(r, lo, hi)
|
r = appendRange(r, lo, hi)
|
||||||
continue
|
continue
|
||||||
@ -1735,10 +1735,10 @@ func appendTable(r []int, x *unicode.RangeTable) []int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// appendNegatedTable returns the result of appending the negation of x to the class r.
|
// appendNegatedTable returns the result of appending the negation of x to the class r.
|
||||||
func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
|
func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
|
||||||
nextLo := 0 // lo end of next class to add
|
nextLo := rune('\u0000') // lo end of next class to add
|
||||||
for _, xr := range x.R16 {
|
for _, xr := range x.R16 {
|
||||||
lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
|
lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
|
||||||
if stride == 1 {
|
if stride == 1 {
|
||||||
if nextLo <= lo-1 {
|
if nextLo <= lo-1 {
|
||||||
r = appendRange(r, nextLo, lo-1)
|
r = appendRange(r, nextLo, lo-1)
|
||||||
@ -1754,7 +1754,7 @@ func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, xr := range x.R32 {
|
for _, xr := range x.R32 {
|
||||||
lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
|
lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
|
||||||
if stride == 1 {
|
if stride == 1 {
|
||||||
if nextLo <= lo-1 {
|
if nextLo <= lo-1 {
|
||||||
r = appendRange(r, nextLo, lo-1)
|
r = appendRange(r, nextLo, lo-1)
|
||||||
@ -1777,9 +1777,9 @@ func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
|
|||||||
|
|
||||||
// negateClass overwrites r and returns r's negation.
|
// negateClass overwrites r and returns r's negation.
|
||||||
// It assumes the class r is already clean.
|
// It assumes the class r is already clean.
|
||||||
func negateClass(r []int) []int {
|
func negateClass(r []rune) []rune {
|
||||||
nextLo := 0 // lo end of next class to add
|
nextLo := rune('\u0000') // lo end of next class to add
|
||||||
w := 0 // write index
|
w := 0 // write index
|
||||||
for i := 0; i < len(r); i += 2 {
|
for i := 0; i < len(r); i += 2 {
|
||||||
lo, hi := r[i], r[i+1]
|
lo, hi := r[i], r[i+1]
|
||||||
if nextLo <= lo-1 {
|
if nextLo <= lo-1 {
|
||||||
@ -1801,9 +1801,9 @@ func negateClass(r []int) []int {
|
|||||||
// ranges implements sort.Interface on a []rune.
|
// ranges implements sort.Interface on a []rune.
|
||||||
// The choice of receiver type definition is strange
|
// The choice of receiver type definition is strange
|
||||||
// but avoids an allocation since we already have
|
// but avoids an allocation since we already have
|
||||||
// a *[]int.
|
// a *[]rune.
|
||||||
type ranges struct {
|
type ranges struct {
|
||||||
p *[]int
|
p *[]rune
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ra ranges) Less(i, j int) bool {
|
func (ra ranges) Less(i, j int) bool {
|
||||||
@ -1835,7 +1835,7 @@ func checkUTF8(s string) os.Error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func nextRune(s string) (c int, t string, err os.Error) {
|
func nextRune(s string) (c rune, t string, err os.Error) {
|
||||||
c, size := utf8.DecodeRuneInString(s)
|
c, size := utf8.DecodeRuneInString(s)
|
||||||
if c == utf8.RuneError && size == 1 {
|
if c == utf8.RuneError && size == 1 {
|
||||||
return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
|
return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
|
||||||
@ -1843,11 +1843,11 @@ func nextRune(s string) (c int, t string, err os.Error) {
|
|||||||
return c, s[size:], nil
|
return c, s[size:], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func isalnum(c int) bool {
|
func isalnum(c rune) bool {
|
||||||
return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
|
return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
|
||||||
}
|
}
|
||||||
|
|
||||||
func unhex(c int) int {
|
func unhex(c rune) rune {
|
||||||
if '0' <= c && c <= '9' {
|
if '0' <= c && c <= '9' {
|
||||||
return c - '0'
|
return c - '0'
|
||||||
}
|
}
|
||||||
|
@ -371,10 +371,10 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) {
|
|||||||
b.WriteByte('}')
|
b.WriteByte('}')
|
||||||
}
|
}
|
||||||
|
|
||||||
func mkCharClass(f func(int) bool) string {
|
func mkCharClass(f func(rune) bool) string {
|
||||||
re := &Regexp{Op: OpCharClass}
|
re := &Regexp{Op: OpCharClass}
|
||||||
lo := -1
|
lo := rune(-1)
|
||||||
for i := 0; i <= unicode.MaxRune; i++ {
|
for i := rune(0); i <= unicode.MaxRune; i++ {
|
||||||
if f(i) {
|
if f(i) {
|
||||||
if lo < 0 {
|
if lo < 0 {
|
||||||
lo = i
|
lo = i
|
||||||
@ -392,12 +392,12 @@ func mkCharClass(f func(int) bool) string {
|
|||||||
return dump(re)
|
return dump(re)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isUpperFold(rune int) bool {
|
func isUpperFold(r rune) bool {
|
||||||
if unicode.IsUpper(rune) {
|
if unicode.IsUpper(r) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
c := unicode.SimpleFold(rune)
|
c := unicode.SimpleFold(r)
|
||||||
for c != rune {
|
for c != r {
|
||||||
if unicode.IsUpper(c) {
|
if unicode.IsUpper(c) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
@ -407,8 +407,8 @@ func isUpperFold(rune int) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestFoldConstants(t *testing.T) {
|
func TestFoldConstants(t *testing.T) {
|
||||||
last := -1
|
last := rune(-1)
|
||||||
for i := 0; i <= unicode.MaxRune; i++ {
|
for i := rune(0); i <= unicode.MaxRune; i++ {
|
||||||
if unicode.SimpleFold(i) == i {
|
if unicode.SimpleFold(i) == i {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -427,8 +427,8 @@ func TestAppendRangeCollapse(t *testing.T) {
|
|||||||
// into the earlier ones (it looks back two ranges), so that
|
// into the earlier ones (it looks back two ranges), so that
|
||||||
// the slice never grows very large.
|
// the slice never grows very large.
|
||||||
// Note that we are not calling cleanClass.
|
// Note that we are not calling cleanClass.
|
||||||
var r []int
|
var r []rune
|
||||||
for i := 'A'; i <= 'Z'; i++ {
|
for i := rune('A'); i <= 'Z'; i++ {
|
||||||
r = appendRange(r, i, i)
|
r = appendRange(r, i, i)
|
||||||
r = appendRange(r, i+'a'-'A', i+'a'-'A')
|
r = appendRange(r, i+'a'-'A', i+'a'-'A')
|
||||||
}
|
}
|
||||||
|
@ -3,17 +3,17 @@
|
|||||||
|
|
||||||
package syntax
|
package syntax
|
||||||
|
|
||||||
var code1 = []int{ /* \d */
|
var code1 = []rune{ /* \d */
|
||||||
0x30, 0x39,
|
0x30, 0x39,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code2 = []int{ /* \s */
|
var code2 = []rune{ /* \s */
|
||||||
0x9, 0xa,
|
0x9, 0xa,
|
||||||
0xc, 0xd,
|
0xc, 0xd,
|
||||||
0x20, 0x20,
|
0x20, 0x20,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code3 = []int{ /* \w */
|
var code3 = []rune{ /* \w */
|
||||||
0x30, 0x39,
|
0x30, 0x39,
|
||||||
0x41, 0x5a,
|
0x41, 0x5a,
|
||||||
0x5f, 0x5f,
|
0x5f, 0x5f,
|
||||||
@ -28,71 +28,71 @@ var perlGroup = map[string]charGroup{
|
|||||||
`\w`: {+1, code3},
|
`\w`: {+1, code3},
|
||||||
`\W`: {-1, code3},
|
`\W`: {-1, code3},
|
||||||
}
|
}
|
||||||
var code4 = []int{ /* [:alnum:] */
|
var code4 = []rune{ /* [:alnum:] */
|
||||||
0x30, 0x39,
|
0x30, 0x39,
|
||||||
0x41, 0x5a,
|
0x41, 0x5a,
|
||||||
0x61, 0x7a,
|
0x61, 0x7a,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code5 = []int{ /* [:alpha:] */
|
var code5 = []rune{ /* [:alpha:] */
|
||||||
0x41, 0x5a,
|
0x41, 0x5a,
|
||||||
0x61, 0x7a,
|
0x61, 0x7a,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code6 = []int{ /* [:ascii:] */
|
var code6 = []rune{ /* [:ascii:] */
|
||||||
0x0, 0x7f,
|
0x0, 0x7f,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code7 = []int{ /* [:blank:] */
|
var code7 = []rune{ /* [:blank:] */
|
||||||
0x9, 0x9,
|
0x9, 0x9,
|
||||||
0x20, 0x20,
|
0x20, 0x20,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code8 = []int{ /* [:cntrl:] */
|
var code8 = []rune{ /* [:cntrl:] */
|
||||||
0x0, 0x1f,
|
0x0, 0x1f,
|
||||||
0x7f, 0x7f,
|
0x7f, 0x7f,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code9 = []int{ /* [:digit:] */
|
var code9 = []rune{ /* [:digit:] */
|
||||||
0x30, 0x39,
|
0x30, 0x39,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code10 = []int{ /* [:graph:] */
|
var code10 = []rune{ /* [:graph:] */
|
||||||
0x21, 0x7e,
|
0x21, 0x7e,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code11 = []int{ /* [:lower:] */
|
var code11 = []rune{ /* [:lower:] */
|
||||||
0x61, 0x7a,
|
0x61, 0x7a,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code12 = []int{ /* [:print:] */
|
var code12 = []rune{ /* [:print:] */
|
||||||
0x20, 0x7e,
|
0x20, 0x7e,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code13 = []int{ /* [:punct:] */
|
var code13 = []rune{ /* [:punct:] */
|
||||||
0x21, 0x2f,
|
0x21, 0x2f,
|
||||||
0x3a, 0x40,
|
0x3a, 0x40,
|
||||||
0x5b, 0x60,
|
0x5b, 0x60,
|
||||||
0x7b, 0x7e,
|
0x7b, 0x7e,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code14 = []int{ /* [:space:] */
|
var code14 = []rune{ /* [:space:] */
|
||||||
0x9, 0xd,
|
0x9, 0xd,
|
||||||
0x20, 0x20,
|
0x20, 0x20,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code15 = []int{ /* [:upper:] */
|
var code15 = []rune{ /* [:upper:] */
|
||||||
0x41, 0x5a,
|
0x41, 0x5a,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code16 = []int{ /* [:word:] */
|
var code16 = []rune{ /* [:word:] */
|
||||||
0x30, 0x39,
|
0x30, 0x39,
|
||||||
0x41, 0x5a,
|
0x41, 0x5a,
|
||||||
0x5f, 0x5f,
|
0x5f, 0x5f,
|
||||||
0x61, 0x7a,
|
0x61, 0x7a,
|
||||||
}
|
}
|
||||||
|
|
||||||
var code17 = []int{ /* [:xdigit:] */
|
var code17 = []rune{ /* [:xdigit:] */
|
||||||
0x30, 0x39,
|
0x30, 0x39,
|
||||||
0x41, 0x46,
|
0x41, 0x46,
|
||||||
0x61, 0x66,
|
0x61, 0x66,
|
||||||
|
@ -51,7 +51,7 @@ const (
|
|||||||
// at the beginning of the text.
|
// at the beginning of the text.
|
||||||
// Passing r2 == -1 indicates that the position is
|
// Passing r2 == -1 indicates that the position is
|
||||||
// at the end of the text.
|
// at the end of the text.
|
||||||
func EmptyOpContext(r1, r2 int) EmptyOp {
|
func EmptyOpContext(r1, r2 rune) EmptyOp {
|
||||||
var op EmptyOp
|
var op EmptyOp
|
||||||
if r1 < 0 {
|
if r1 < 0 {
|
||||||
op |= EmptyBeginText | EmptyBeginLine
|
op |= EmptyBeginText | EmptyBeginLine
|
||||||
@ -76,7 +76,7 @@ func EmptyOpContext(r1, r2 int) EmptyOp {
|
|||||||
// IsWordChar reports whether r is consider a ``word character''
|
// IsWordChar reports whether r is consider a ``word character''
|
||||||
// during the evaluation of the \b and \B zero-width assertions.
|
// during the evaluation of the \b and \B zero-width assertions.
|
||||||
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
|
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
|
||||||
func IsWordChar(r int) bool {
|
func IsWordChar(r rune) bool {
|
||||||
return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
|
return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,7 +85,7 @@ type Inst struct {
|
|||||||
Op InstOp
|
Op InstOp
|
||||||
Out uint32 // all but InstMatch, InstFail
|
Out uint32 // all but InstMatch, InstFail
|
||||||
Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
|
Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
|
||||||
Rune []int
|
Rune []rune
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Prog) String() string {
|
func (p *Prog) String() string {
|
||||||
@ -161,7 +161,7 @@ Loop:
|
|||||||
|
|
||||||
// MatchRune returns true if the instruction matches (and consumes) r.
|
// MatchRune returns true if the instruction matches (and consumes) r.
|
||||||
// It should only be called when i.Op == InstRune.
|
// It should only be called when i.Op == InstRune.
|
||||||
func (i *Inst) MatchRune(r int) bool {
|
func (i *Inst) MatchRune(r rune) bool {
|
||||||
rune := i.Rune
|
rune := i.Rune
|
||||||
|
|
||||||
// Special case: single-rune slice is from literal string, not char class.
|
// Special case: single-rune slice is from literal string, not char class.
|
||||||
@ -210,17 +210,17 @@ func (i *Inst) MatchRune(r int) bool {
|
|||||||
|
|
||||||
// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
|
// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
|
||||||
// Since we act on runes, it would be easy to support Unicode here.
|
// Since we act on runes, it would be easy to support Unicode here.
|
||||||
func wordRune(rune int) bool {
|
func wordRune(r rune) bool {
|
||||||
return rune == '_' ||
|
return r == '_' ||
|
||||||
('A' <= rune && rune <= 'Z') ||
|
('A' <= r && r <= 'Z') ||
|
||||||
('a' <= rune && rune <= 'z') ||
|
('a' <= r && r <= 'z') ||
|
||||||
('0' <= rune && rune <= '9')
|
('0' <= r && r <= '9')
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchEmptyWidth returns true if the instruction matches
|
// MatchEmptyWidth returns true if the instruction matches
|
||||||
// an empty string between the runes before and after.
|
// an empty string between the runes before and after.
|
||||||
// It should only be called when i.Op == InstEmptyWidth.
|
// It should only be called when i.Op == InstEmptyWidth.
|
||||||
func (i *Inst) MatchEmptyWidth(before int, after int) bool {
|
func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
|
||||||
switch EmptyOp(i.Arg) {
|
switch EmptyOp(i.Arg) {
|
||||||
case EmptyBeginLine:
|
case EmptyBeginLine:
|
||||||
return before == '\n' || before == -1
|
return before == '\n' || before == -1
|
||||||
|
@ -22,8 +22,8 @@ type Regexp struct {
|
|||||||
Flags Flags
|
Flags Flags
|
||||||
Sub []*Regexp // subexpressions, if any
|
Sub []*Regexp // subexpressions, if any
|
||||||
Sub0 [1]*Regexp // storage for short Sub
|
Sub0 [1]*Regexp // storage for short Sub
|
||||||
Rune []int // matched runes, for OpLiteral, OpCharClass
|
Rune []rune // matched runes, for OpLiteral, OpCharClass
|
||||||
Rune0 [2]int // storage for short Rune
|
Rune0 [2]rune // storage for short Rune
|
||||||
Min, Max int // min, max for OpRepeat
|
Min, Max int // min, max for OpRepeat
|
||||||
Cap int // capturing index, for OpCapture
|
Cap int // capturing index, for OpCapture
|
||||||
Name string // capturing name, for OpCapture
|
Name string // capturing name, for OpCapture
|
||||||
@ -252,7 +252,7 @@ func (re *Regexp) String() string {
|
|||||||
|
|
||||||
const meta = `\.+*?()|[]{}^$`
|
const meta = `\.+*?()|[]{}^$`
|
||||||
|
|
||||||
func escape(b *bytes.Buffer, r int, force bool) {
|
func escape(b *bytes.Buffer, r rune, force bool) {
|
||||||
if unicode.IsPrint(r) {
|
if unicode.IsPrint(r) {
|
||||||
if strings.IndexRune(meta, r) >= 0 || force {
|
if strings.IndexRune(meta, r) >= 0 || force {
|
||||||
b.WriteRune('\\')
|
b.WriteRune('\\')
|
||||||
@ -277,7 +277,7 @@ func escape(b *bytes.Buffer, r int, force bool) {
|
|||||||
default:
|
default:
|
||||||
if r < 0x100 {
|
if r < 0x100 {
|
||||||
b.WriteString(`\x`)
|
b.WriteString(`\x`)
|
||||||
s := strconv.Itob(r, 16)
|
s := strconv.Itob(int(r), 16)
|
||||||
if len(s) == 1 {
|
if len(s) == 1 {
|
||||||
b.WriteRune('0')
|
b.WriteRune('0')
|
||||||
}
|
}
|
||||||
@ -285,7 +285,7 @@ func escape(b *bytes.Buffer, r int, force bool) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
b.WriteString(`\x{`)
|
b.WriteString(`\x{`)
|
||||||
b.WriteString(strconv.Itob(r, 16))
|
b.WriteString(strconv.Itob(int(r), 16))
|
||||||
b.WriteString(`}`)
|
b.WriteString(`}`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user