1
0
mirror of https://github.com/golang/go synced 2024-11-25 07:37:57 -07:00

cmd/yacc: allow utf-8 token values

Also clean up the code and allow \U.
Fixes #3007.

R=golang-dev, rsc, 0xjnml
CC=golang-dev
https://golang.org/cl/6492105
This commit is contained in:
Rob Pike 2012-09-13 13:59:00 -07:00
parent f3fc0090f4
commit f269f9a3c7
2 changed files with 63 additions and 53 deletions

View File

@ -76,7 +76,7 @@ var vflag bool
%type <node> prog expr expr0 expr1 expr2 expr3 expr4 %type <node> prog expr expr0 expr1 expr2 expr3 expr4
%token <vval> VAL %token <vval> VÄL // dieresis to test UTF-8
%token <vvar> VAR %token <vvar> VAR
%token <numb> _SUP // tests leading underscore in token name %token <numb> _SUP // tests leading underscore in token name
%% %%
@ -199,7 +199,7 @@ expr0:
$$ = $1.node $$ = $1.node
} }
} }
| VAL | VÄL
{ {
$$ = one $$ = one
$$.vval = $1 $$.vval = $1
@ -275,7 +275,7 @@ numb:
f = 0 f = 0
} }
yylval.vval = f yylval.vval = f
return VAL return VÄL
} }
func (UnitsLex) Error(s string) { func (UnitsLex) Error(s string) {

View File

@ -52,6 +52,7 @@ import (
"os" "os"
"strings" "strings"
"unicode" "unicode"
"unicode/utf8"
) )
// the following are adjustable // the following are adjustable
@ -326,7 +327,6 @@ var resrv = []Resrv{
var zznewstate = 0 var zznewstate = 0
const EOF = -1 const EOF = -1
const UTFmax = 0x3f
func main() { func main() {
@ -719,8 +719,8 @@ func moreprod() {
} }
// //
// define s to be a terminal if t=0 // define s to be a terminal if nt==0
// or a nonterminal if t=1 // or a nonterminal if nt==1
// //
func defin(nt int, s string) int { func defin(nt int, s string) int {
val := 0 val := 0
@ -753,56 +753,66 @@ func defin(nt int, s string) int {
// establish value for token // establish value for token
// single character literal // single character literal
if s[0] == ' ' && len(s) == 1+1 { if s[0] == ' ' {
val = int(s[1]) s = s[1:]
} else if s[0] == ' ' && s[1] == '\\' { // escape sequence r, size := utf8.DecodeRuneInString(s)
if len(s) == 2+1 { if r == utf8.RuneError && size == 1 {
// single character escape sequence errorf("invalid UTF-8 sequence %q", s)
switch s[2] { }
case '\'': val = int(r)
val = '\'' if val == '\\' { // escape sequence
case '"': switch {
val = '"' case len(s) == 2:
case '\\': // single character escape sequence
val = '\\' switch s[1] {
case 'a': case '\'':
val = '\a' val = '\''
case 'b': case '"':
val = '\b' val = '"'
case 'n': case '\\':
val = '\n' val = '\\'
case 'r': case 'a':
val = '\r' val = '\a'
case 't': case 'b':
val = '\t' val = '\b'
case 'v': case 'f':
val = '\v' val = '\f'
default: case 'n':
errorf("invalid escape %v", s[1:3]) val = '\n'
} case 'r':
} else if s[2] == 'u' && len(s) == 2+1+4 { // \unnnn sequence val = '\r'
val = 0 case 't':
s = s[3:] val = '\t'
for s != "" { case 'v':
c := int(s[0]) val = '\v'
switch {
case c >= '0' && c <= '9':
c -= '0'
case c >= 'a' && c <= 'f':
c -= 'a' - 10
case c >= 'A' && c <= 'F':
c -= 'A' - 10
default: default:
errorf("illegal \\unnnn construction") errorf("invalid escape %s", s)
} }
val = val*16 + c case s[1] == 'u' && len(s) == 2+4, // \unnnn sequence
s = s[1:] s[1] == 'U' && len(s) == 2+8: // \Unnnnnnnn sequence
val = 0
s = s[2:]
for s != "" {
c := int(s[0])
switch {
case c >= '0' && c <= '9':
c -= '0'
case c >= 'a' && c <= 'f':
c -= 'a' - 10
case c >= 'A' && c <= 'F':
c -= 'A' - 10
default:
errorf(`illegal \u or \U construction`)
}
val = val*16 + c
s = s[1:]
}
default:
errorf("invalid escape %s", s)
} }
if val == 0 { }
errorf("'\\u0000' is illegal") if val == 0 {
} errorf("token value 0 is illegal")
} else {
errorf("unknown escape")
} }
} else { } else {
val = extval val = extval