From ca3b5222ebf8dce04d6ce484d7986f382927a168 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Mon, 30 Aug 2010 14:06:59 +1000 Subject: [PATCH] regexp: interpret all Go characer escapes \a \b \f \n \r \t \v R=rsc CC=golang-dev https://golang.org/cl/2042044 --- src/pkg/regexp/find_test.go | 2 ++ src/pkg/regexp/regexp.go | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/pkg/regexp/find_test.go b/src/pkg/regexp/find_test.go index d0aad82b71..6a34cabf5b 100644 --- a/src/pkg/regexp/find_test.go +++ b/src/pkg/regexp/find_test.go @@ -57,6 +57,8 @@ var findTests = []FindTest{ FindTest{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, FindTest{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, FindTest{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, + FindTest{`\a\b\f\n\r\t\v`, "\a\b\f\n\r\t\v", build(1, 0, 7)}, + FindTest{`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)}, FindTest{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)}, FindTest{`(.*).*`, "ab", build(1, 0, 2, 0, 2)}, diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index fce76953e3..f3e07d74a4 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -22,7 +22,8 @@ // character [ '-' character ] // // All characters are UTF-8-encoded code points. Backslashes escape special -// characters, including inside character classes. +// characters, including inside character classes. The standard Go character +// escapes are also recognized: \a \b \f \n \r \t \v. // // There are 16 methods of Regexp that match a regular expression and identify // the matched text. Their names are matched by this regular expression: @@ -353,6 +354,18 @@ func ispunct(c int) bool { return false } +var escapes = []byte("abfnrtv") +var escaped = []byte("\a\b\f\n\r\t\v") + +func escape(c int) int { + for i, b := range escapes { + if int(b) == c { + return i + } + } + return -1 +} + func (p *parser) charClass() instr { cc := newCharClass() if p.c() == '^' { @@ -388,10 +401,10 @@ func (p *parser) charClass() instr { switch { case c == endOfFile: p.error(ErrExtraneousBackslash) - case c == 'n': - c = '\n' case ispunct(c): // c is as delivered + case escape(c) >= 0: + c = int(escaped[escape(c)]) default: p.error(ErrBadBackslash) } @@ -483,10 +496,10 @@ func (p *parser) term() (start, end instr) { switch { case c == endOfFile: p.error(ErrExtraneousBackslash) - case c == 'n': - c = '\n' case ispunct(c): // c is as delivered + case escape(c) >= 0: + c = int(escaped[escape(c)]) default: p.error(ErrBadBackslash) }