1
0
mirror of https://github.com/golang/go synced 2024-11-22 00:34:40 -07:00

gc: disallow NUL byte, catch more invalid UTF-8, test

R=ken2, ken3
CC=golang-dev
https://golang.org/cl/209041
This commit is contained in:
Russ Cox 2010-02-16 16:47:39 -08:00
parent b7d9ffeecd
commit cfff862862
3 changed files with 89 additions and 31 deletions

View File

@ -520,18 +520,19 @@ l0:
ncp = 8; ncp = 8;
for(;;) { for(;;) {
if(clen == ncp) { if(clen+UTFmax > ncp) {
cp = remal(cp, clen, ncp); cp = remal(cp, ncp, ncp);
ncp += ncp; ncp += ncp;
} }
c = getc(); c = getr();
if(c == EOF) { if(c == EOF) {
yyerror("eof in string"); yyerror("eof in string");
break; break;
} }
if(c == '`') if(c == '`')
break; break;
cp[clen++] = c; rune = c;
clen += runetochar(cp+clen, &rune);
} }
strlit: strlit:
@ -821,28 +822,16 @@ talph:
*/ */
for(;;) { for(;;) {
if(c >= Runeself) { if(c >= Runeself) {
for(c1=0;;) { ungetc(c);
cp[c1++] = c; rune = getr();
if(fullrune(cp, c1)) { // 0xb7 · is used for internal names
chartorune(&rune, cp); if(!isalpharune(rune) && !isdigitrune(rune) && rune != 0xb7)
if(isfrog(rune)) { yyerror("invalid identifier character 0x%ux", rune);
yyerror("illegal character 0x%ux", rune); cp += runetochar(cp, &rune);
goto l0; } else if(!isalnum(c) && c != '_')
}
// 0xb7 · is used for internal names
if(!isalpharune(rune) && !isdigitrune(rune) && rune != 0xb7)
yyerror("invalid identifier character 0x%ux", rune);
break;
}
c = getc();
}
cp += c1;
c = getc();
continue;
}
if(!isalnum(c) && c != '_')
break; break;
*cp++ = c; else
*cp++ = c;
c = getc(); c = getc();
} }
*cp = 0; *cp = 0;
@ -1054,8 +1043,10 @@ getc(void)
switch(c) { switch(c) {
case 0: case 0:
if(curio.bin != nil) if(curio.bin != nil) {
yyerror("illegal NUL byte");
break; break;
}
case EOF: case EOF:
return EOF; return EOF;
@ -1097,10 +1088,11 @@ loop:
c = chartorune(&rune, str); c = chartorune(&rune, str);
if(rune == Runeerror && c == 1) { if(rune == Runeerror && c == 1) {
lineno = lexlineno; lineno = lexlineno;
yyerror("illegal UTF-8 sequence in comment or string"); yyerror("illegal UTF-8 sequence");
flusherrors(); flusherrors();
print("\t");
for(c=0; c<i; c++) for(c=0; c<i; c++)
print(" %.2x", *(uchar*)(str+c)); print("%s%.2x", c > 0 ? " " : "", *(uchar*)(str+c));
print("\n"); print("\n");
} }
return rune; return rune;
@ -1209,11 +1201,11 @@ oct:
l = l*8 + c-'0'; l = l*8 + c-'0';
continue; continue;
} }
yyerror("non-oct character in escape sequence: %c", c); yyerror("non-octal character in escape sequence: %c", c);
ungetc(c); ungetc(c);
} }
if(l > 255) if(l > 255)
yyerror("oct escape value > 255: %d", l); yyerror("octal escape value > 255: %d", l);
*val = l; *val = l;
return 0; return 0;

View File

@ -1525,6 +1525,7 @@ Zconv(Fmt *fp)
Rune r; Rune r;
Strlit *sp; Strlit *sp;
char *s, *se; char *s, *se;
int n;
sp = va_arg(fp->args, Strlit*); sp = va_arg(fp->args, Strlit*);
if(sp == nil) if(sp == nil)
@ -1533,8 +1534,15 @@ Zconv(Fmt *fp)
s = sp->s; s = sp->s;
se = s + sp->len; se = s + sp->len;
while(s < se) { while(s < se) {
s += chartorune(&r, s); n = chartorune(&r, s);
s += n;
switch(r) { switch(r) {
case Runeerror:
if(n == 1) {
fmtprint(fp, "\\x%02x", *(s-1));
break;
}
// fall through
default: default:
if(r < ' ') { if(r < ' ') {
fmtprint(fp, "\\x%02x", r); fmtprint(fp, "\\x%02x", r);

58
test/nul.go Normal file
View File

@ -0,0 +1,58 @@
// $G $D/$F.go && $L $F.$A && ./$A.out >tmp.go &&
// errchk $G -e tmp.go
// rm -f tmp.go
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Test source files and strings containing NUL and invalid UTF-8.
package main
import (
"fmt"
"os"
)
func main() {
var s = "\xc2\xff"
var t = "\xd0\xfe"
var u = "\xab\x00\xfc"
if len(s) != 2 || s[0] != 0xc2 || s[1] != 0xff ||
len(t) != 2 || t[0] != 0xd0 || t[1] != 0xfe ||
len(u) != 3 || u[0] != 0xab || u[1] != 0x00 || u[2] != 0xfc {
println("BUG: non-UTF-8 string mangled");
os.Exit(2)
}
fmt.Print(`
package main
var x = "in string ` + "\x00" + `" // ERROR "NUL"
var y = ` + "`in raw string \x00 foo`" + ` // ERROR "NUL"
// in comment ` + "\x00" + ` // ERROR "NUL"
/* in other comment ` + "\x00" + ` */ // ERROR "NUL"
/* in source code */ ` + "\x00" + `// ERROR "NUL"
var xx = "in string ` + "\xc2\xff" + `" // ERROR "UTF-8"
var yy = ` + "`in raw string \xff foo`" + ` // ERROR "UTF-8"
// in comment ` + "\xe2\x80\x01" + ` // ERROR "UTF-8"
/* in other comment ` + "\xe0\x00\x00" + ` */ // ERROR "UTF-8"
/* in variable name */
var z` + "\xc1\x81" + ` int // ERROR "UTF-8"
/* in source code */ ` + "\xc2A" + `// ERROR "UTF-8"
`)
}