mirror of
https://github.com/golang/go
synced 2024-09-24 15:30:13 -06:00
scanner cleanup - getting it ready to as a library
- removed unneeded code that accumulated over time - change src from string to []byte (perhaps should be io.Read but that has some other disadvantages) - simplified interface R=r OCL=25615 CL=25615
This commit is contained in:
parent
da34bea950
commit
d169268a1a
@ -30,13 +30,12 @@ type Flags struct {
|
||||
Sixg bool;
|
||||
Deps bool;
|
||||
Columns bool;
|
||||
Testmode bool;
|
||||
}
|
||||
|
||||
|
||||
type errorHandler struct {
|
||||
filename string;
|
||||
src string;
|
||||
src []byte;
|
||||
nerrors int;
|
||||
nwarnings int;
|
||||
errpos int;
|
||||
@ -44,7 +43,7 @@ type errorHandler struct {
|
||||
}
|
||||
|
||||
|
||||
func (h *errorHandler) Init(filename, src string, columns bool) {
|
||||
func (h *errorHandler) Init(filename string, src []byte, columns bool) {
|
||||
h.filename = filename;
|
||||
h.src = src;
|
||||
h.nerrors = 0;
|
||||
@ -71,7 +70,7 @@ func (h *errorHandler) LineCol(pos int) (line, col int) {
|
||||
}
|
||||
}
|
||||
|
||||
return line, utf8.RuneCountInString(src, lpos, pos - lpos);
|
||||
return line, utf8.RuneCount(src[lpos : pos]);
|
||||
}
|
||||
|
||||
|
||||
@ -128,10 +127,10 @@ func Compile(src_file string, flags *Flags) (*AST.Program, int) {
|
||||
err.Init(src_file, src, flags.Columns);
|
||||
|
||||
var scanner Scanner.Scanner;
|
||||
scanner.Init(&err, src, true, flags.Testmode);
|
||||
scanner.Init(src, &err, true);
|
||||
|
||||
var parser Parser.Parser;
|
||||
parser.Open(flags.Verbose, flags.Sixg, flags.Deps, &scanner);
|
||||
parser.Open(&scanner, err, flags.Verbose, flags.Sixg, flags.Deps);
|
||||
|
||||
prog := parser.ParseProgram();
|
||||
|
||||
|
@ -14,13 +14,20 @@ import (
|
||||
)
|
||||
|
||||
|
||||
type ErrorHandler interface {
|
||||
Error(pos int, msg string);
|
||||
Warning(pos int, msg string);
|
||||
}
|
||||
|
||||
|
||||
type Parser struct {
|
||||
scanner *Scanner.Scanner;
|
||||
err ErrorHandler;
|
||||
|
||||
// Tracing/debugging
|
||||
trace, sixg, deps bool;
|
||||
indent uint;
|
||||
|
||||
// Scanner
|
||||
scanner *Scanner.Scanner;
|
||||
comments *vector.Vector;
|
||||
|
||||
// Scanner.Token
|
||||
@ -90,7 +97,10 @@ func un/*trace*/(P *Parser) {
|
||||
|
||||
|
||||
func (P *Parser) next0() {
|
||||
P.pos, P.tok, P.val = P.scanner.Scan();
|
||||
// TODO make P.val a []byte
|
||||
var val []byte;
|
||||
P.pos, P.tok, val = P.scanner.Scan();
|
||||
P.val = string(val);
|
||||
P.opt_semi = false;
|
||||
|
||||
if P.trace {
|
||||
@ -118,13 +128,15 @@ func (P *Parser) next() {
|
||||
}
|
||||
|
||||
|
||||
func (P *Parser) Open(trace, sixg, deps bool, scanner *Scanner.Scanner) {
|
||||
func (P *Parser) Open(scanner *Scanner.Scanner, err ErrorHandler, trace, sixg, deps bool) {
|
||||
P.scanner = scanner;
|
||||
P.err = err;
|
||||
|
||||
P.trace = trace;
|
||||
P.sixg = sixg;
|
||||
P.deps = deps;
|
||||
P.indent = 0;
|
||||
|
||||
P.scanner = scanner;
|
||||
P.comments = vector.New(0);
|
||||
|
||||
P.next();
|
||||
@ -133,7 +145,7 @@ func (P *Parser) Open(trace, sixg, deps bool, scanner *Scanner.Scanner) {
|
||||
|
||||
|
||||
func (P *Parser) error(pos int, msg string) {
|
||||
P.scanner.Error(pos, msg);
|
||||
P.err.Error(pos, msg);
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,10 +37,10 @@ const (
|
||||
Obj_file_ext = ".7";
|
||||
)
|
||||
|
||||
func readfile(filename string) (string, *OS.Error) {
|
||||
func readfile(filename string) ([]byte, *OS.Error) {
|
||||
fd, err := OS.Open(filename, OS.O_RDONLY, 0);
|
||||
if err != nil {
|
||||
return "", err;
|
||||
return []byte(), err;
|
||||
}
|
||||
var buf [1<<20]byte;
|
||||
n, err1 := IO.Readn(fd, buf);
|
||||
@ -48,7 +48,7 @@ func readfile(filename string) (string, *OS.Error) {
|
||||
if err1 == IO.ErrEOF {
|
||||
err1 = nil;
|
||||
}
|
||||
return string(buf[0:n]), err1;
|
||||
return buf[0:n], err1;
|
||||
}
|
||||
|
||||
func writefile(name, data string) *OS.Error {
|
||||
@ -61,17 +61,17 @@ func writefile(name, data string) *OS.Error {
|
||||
return err1;
|
||||
}
|
||||
|
||||
func ReadObjectFile(filename string) (string, bool) {
|
||||
func ReadObjectFile(filename string) ([]byte, bool) {
|
||||
data, err := readfile(filename + Obj_file_ext);
|
||||
magic := MAGIC_obj_file; // TODO remove once len(constant) works
|
||||
if err == nil && len(data) >= len(magic) && data[0 : len(magic)] == magic {
|
||||
if err == nil && len(data) >= len(magic) && string(data[0 : len(magic)]) == magic {
|
||||
return data, true;
|
||||
}
|
||||
return "", false;
|
||||
return []byte(), false;
|
||||
}
|
||||
|
||||
|
||||
func ReadSourceFile(name string) (string, bool) {
|
||||
func ReadSourceFile(name string) ([]byte, bool) {
|
||||
name = Utils.TrimExt(name, Src_file_ext) + Src_file_ext;
|
||||
data, err := readfile(name);
|
||||
return data, err == nil;
|
||||
|
@ -24,7 +24,6 @@ func init() {
|
||||
flag.BoolVar(&flags.Sixg, "6g", true, "6g compatibility mode");
|
||||
flag.BoolVar(&flags.Deps, "d", false, "print dependency information only");
|
||||
flag.BoolVar(&flags.Columns, "columns", Platform.USER == "gri", "print column info in error messages");
|
||||
flag.BoolVar(&flags.Testmode, "t", false, "test mode: interprets /* ERROR */ and /* SYNC */ comments");
|
||||
}
|
||||
|
||||
|
||||
@ -52,12 +51,9 @@ func main() {
|
||||
} else {
|
||||
prog, nerrors := Compilation.Compile(src_file, &flags);
|
||||
if nerrors > 0 {
|
||||
if flags.Testmode {
|
||||
return; // TODO we shouldn't need this
|
||||
}
|
||||
sys.Exit(1);
|
||||
}
|
||||
if !*silent && !flags.Testmode {
|
||||
if !*silent {
|
||||
Printer.Print(os.Stdout, *html, prog);
|
||||
}
|
||||
}
|
||||
|
@ -2,23 +2,22 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package Scanner
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"utf8";
|
||||
"unicode";
|
||||
"utils";
|
||||
"strconv";
|
||||
)
|
||||
|
||||
const (
|
||||
ILLEGAL = iota;
|
||||
|
||||
IDENT;
|
||||
EOF;
|
||||
|
||||
INT;
|
||||
FLOAT;
|
||||
STRING;
|
||||
EOF;
|
||||
|
||||
IDENT;
|
||||
COMMENT;
|
||||
|
||||
ADD;
|
||||
@ -52,28 +51,28 @@ const (
|
||||
DEC;
|
||||
|
||||
EQL;
|
||||
NEQ;
|
||||
LSS;
|
||||
LEQ;
|
||||
GTR;
|
||||
GEQ;
|
||||
|
||||
ASSIGN;
|
||||
DEFINE;
|
||||
NOT;
|
||||
|
||||
NEQ;
|
||||
LEQ;
|
||||
GEQ;
|
||||
DEFINE;
|
||||
ELLIPSIS;
|
||||
|
||||
LPAREN;
|
||||
RPAREN;
|
||||
LBRACK;
|
||||
RBRACK;
|
||||
LBRACE;
|
||||
RBRACE;
|
||||
|
||||
COMMA;
|
||||
PERIOD;
|
||||
|
||||
RPAREN;
|
||||
RBRACK;
|
||||
RBRACE;
|
||||
SEMICOLON;
|
||||
COLON;
|
||||
PERIOD;
|
||||
|
||||
// keywords
|
||||
keywords_beg;
|
||||
@ -113,13 +112,12 @@ const (
|
||||
func TokenString(tok int) string {
|
||||
switch tok {
|
||||
case ILLEGAL: return "ILLEGAL";
|
||||
case EOF: return "EOF";
|
||||
|
||||
case IDENT: return "IDENT";
|
||||
case INT: return "INT";
|
||||
case FLOAT: return "FLOAT";
|
||||
case STRING: return "STRING";
|
||||
case EOF: return "EOF";
|
||||
|
||||
case IDENT: return "IDENT";
|
||||
case COMMENT: return "COMMENT";
|
||||
|
||||
case ADD: return "+";
|
||||
@ -153,28 +151,28 @@ func TokenString(tok int) string {
|
||||
case DEC: return "--";
|
||||
|
||||
case EQL: return "==";
|
||||
case NEQ: return "!=";
|
||||
case LSS: return "<";
|
||||
case LEQ: return "<=";
|
||||
case GTR: return ">";
|
||||
case GEQ: return ">=";
|
||||
|
||||
case ASSIGN: return "=";
|
||||
case DEFINE: return ":=";
|
||||
case NOT: return "!";
|
||||
|
||||
case NEQ: return "!=";
|
||||
case LEQ: return "<=";
|
||||
case GEQ: return ">=";
|
||||
case DEFINE: return ":=";
|
||||
case ELLIPSIS: return "...";
|
||||
|
||||
case LPAREN: return "(";
|
||||
case RPAREN: return ")";
|
||||
case LBRACK: return "[";
|
||||
case RBRACK: return "]";
|
||||
case LBRACE: return "{";
|
||||
case RBRACE: return "}";
|
||||
|
||||
case COMMA: return ",";
|
||||
case PERIOD: return ".";
|
||||
|
||||
case RPAREN: return ")";
|
||||
case RBRACK: return "]";
|
||||
case RBRACE: return "}";
|
||||
case SEMICOLON: return ";";
|
||||
case COLON: return ":";
|
||||
case PERIOD: return ".";
|
||||
|
||||
case BREAK: return "break";
|
||||
case CASE: return "case";
|
||||
@ -207,7 +205,7 @@ func TokenString(tok int) string {
|
||||
case VAR: return "var";
|
||||
}
|
||||
|
||||
return "token(" + Utils.IntToString(tok, 10) + ")";
|
||||
return "token(" + strconv.Itoa(tok) + ")";
|
||||
}
|
||||
|
||||
|
||||
@ -258,6 +256,7 @@ func is_letter(ch int) bool {
|
||||
|
||||
|
||||
func digit_val(ch int) int {
|
||||
// TODO: spec permits other Unicode digits as well
|
||||
if '0' <= ch && ch <= '9' {
|
||||
return ch - '0';
|
||||
}
|
||||
@ -273,25 +272,19 @@ func digit_val(ch int) int {
|
||||
|
||||
type ErrorHandler interface {
|
||||
Error(pos int, msg string);
|
||||
Warning(pos int, msg string);
|
||||
}
|
||||
|
||||
|
||||
type Scanner struct {
|
||||
// setup
|
||||
src []byte; // source
|
||||
err ErrorHandler;
|
||||
src string; // source
|
||||
scan_comments bool;
|
||||
|
||||
// scanning
|
||||
pos int; // current reading position
|
||||
ch int; // one char look-ahead
|
||||
chpos int; // position of ch
|
||||
linepos int; // position of beginning of line
|
||||
|
||||
// testmode
|
||||
testmode bool;
|
||||
testpos int;
|
||||
}
|
||||
|
||||
|
||||
@ -303,7 +296,7 @@ func (S *Scanner) next() {
|
||||
r, w := int(S.src[S.pos]), 1;
|
||||
if r >= 0x80 {
|
||||
// not ascii
|
||||
r, w = utf8.DecodeRuneInString(S.src, S.pos);
|
||||
r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]);
|
||||
}
|
||||
S.ch = r;
|
||||
S.chpos = S.pos;
|
||||
@ -315,38 +308,16 @@ func (S *Scanner) next() {
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) Error(pos int, msg string) {
|
||||
// check for expected errors (test mode)
|
||||
if S.testpos < 0 || pos == S.testpos {
|
||||
// test mode:
|
||||
// S.testpos < 0: // follow-up errors are expected and ignored
|
||||
// S.testpos == 0: // an error is expected at S.testpos and ignored
|
||||
S.testpos = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
func (S *Scanner) error(pos int, msg string) {
|
||||
S.err.Error(pos, msg);
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) expectNoErrors() {
|
||||
// set the next expected error position to one after eof
|
||||
// (the eof position is a legal error position!)
|
||||
S.testpos = len(S.src) + 1;
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) Init(err ErrorHandler, src string, scan_comments, testmode bool) {
|
||||
S.err = err;
|
||||
func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
|
||||
S.src = src;
|
||||
S.err = err;
|
||||
S.scan_comments = scan_comments;
|
||||
|
||||
S.pos = 0;
|
||||
S.linepos = 0;
|
||||
|
||||
S.testmode = testmode;
|
||||
S.expectNoErrors(); // S.src must be set
|
||||
S.next(); // S.expectNoErrrors() must be called before
|
||||
S.next();
|
||||
}
|
||||
|
||||
|
||||
@ -363,13 +334,13 @@ func charString(ch int) string {
|
||||
case '\\': s = `\\`;
|
||||
case '\'': s = `\'`;
|
||||
}
|
||||
return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")";
|
||||
return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")";
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) expect(ch int) {
|
||||
if S.ch != ch {
|
||||
S.Error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
|
||||
S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
|
||||
}
|
||||
S.next(); // make always progress
|
||||
}
|
||||
@ -393,7 +364,7 @@ func (S *Scanner) skipWhitespace() {
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanComment() string {
|
||||
func (S *Scanner) scanComment() []byte {
|
||||
// first '/' already consumed
|
||||
pos := S.chpos - 1;
|
||||
|
||||
@ -422,37 +393,14 @@ func (S *Scanner) scanComment() string {
|
||||
}
|
||||
}
|
||||
|
||||
S.Error(pos, "comment not terminated");
|
||||
S.error(pos, "comment not terminated");
|
||||
|
||||
exit:
|
||||
comment := S.src[pos : S.chpos];
|
||||
|
||||
if S.testmode {
|
||||
// interpret ERROR and SYNC comments
|
||||
oldpos := -1;
|
||||
switch {
|
||||
case len(comment) >= 8 && comment[3 : 8] == "ERROR" :
|
||||
// an error is expected at the next token position
|
||||
oldpos = S.testpos;
|
||||
S.skipWhitespace();
|
||||
S.testpos = S.chpos;
|
||||
case len(comment) >= 7 && comment[3 : 7] == "SYNC" :
|
||||
// scanning/parsing synchronized again - no (follow-up) errors expected
|
||||
oldpos = S.testpos;
|
||||
S.expectNoErrors();
|
||||
}
|
||||
|
||||
if 0 <= oldpos && oldpos <= len(S.src) {
|
||||
// the previous error was not found
|
||||
S.Error(oldpos, "ERROR not found"); // TODO this should call ErrorMsg
|
||||
}
|
||||
}
|
||||
|
||||
return comment;
|
||||
return S.src[pos : S.chpos];
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanIdentifier() (tok int, val string) {
|
||||
func (S *Scanner) scanIdentifier() (tok int, val []byte) {
|
||||
pos := S.chpos;
|
||||
for is_letter(S.ch) || digit_val(S.ch) < 10 {
|
||||
S.next();
|
||||
@ -460,7 +408,7 @@ func (S *Scanner) scanIdentifier() (tok int, val string) {
|
||||
val = S.src[pos : S.chpos];
|
||||
|
||||
var present bool;
|
||||
tok, present = keywords[val];
|
||||
tok, present = keywords[string(val)];
|
||||
if !present {
|
||||
tok = IDENT;
|
||||
}
|
||||
@ -476,7 +424,7 @@ func (S *Scanner) scanMantissa(base int) {
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, val string) {
|
||||
func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, val []byte) {
|
||||
pos := S.chpos;
|
||||
tok = INT;
|
||||
|
||||
@ -540,50 +488,33 @@ func (S *Scanner) scanDigits(n int, base int) {
|
||||
n--;
|
||||
}
|
||||
if n > 0 {
|
||||
S.Error(S.chpos, "illegal char escape");
|
||||
S.error(S.chpos, "illegal char escape");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanEscape(quote int) string {
|
||||
// TODO: fix this routine
|
||||
|
||||
func (S *Scanner) scanEscape(quote int) {
|
||||
ch := S.ch;
|
||||
pos := S.chpos;
|
||||
S.next();
|
||||
switch ch {
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
|
||||
return string(ch);
|
||||
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
|
||||
// nothing to do
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
S.scanDigits(3 - 1, 8); // 1 char already read
|
||||
return ""; // TODO fix this
|
||||
|
||||
S.scanDigits(3 - 1, 8); // 1 char read already
|
||||
case 'x':
|
||||
S.scanDigits(2, 16);
|
||||
return ""; // TODO fix this
|
||||
|
||||
case 'u':
|
||||
S.scanDigits(4, 16);
|
||||
return ""; // TODO fix this
|
||||
|
||||
case 'U':
|
||||
S.scanDigits(8, 16);
|
||||
return ""; // TODO fix this
|
||||
|
||||
default:
|
||||
// check for quote outside the switch for better generated code (eventually)
|
||||
if ch == quote {
|
||||
return string(quote);
|
||||
}
|
||||
S.Error(pos, "illegal char escape");
|
||||
S.error(pos, "illegal char escape");
|
||||
}
|
||||
|
||||
return ""; // TODO fix this
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanChar() string {
|
||||
func (S *Scanner) scanChar() []byte {
|
||||
// '\'' already consumed
|
||||
|
||||
pos := S.chpos - 1;
|
||||
@ -598,7 +529,7 @@ func (S *Scanner) scanChar() string {
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanString() string {
|
||||
func (S *Scanner) scanString() []byte {
|
||||
// '"' already consumed
|
||||
|
||||
pos := S.chpos - 1;
|
||||
@ -606,7 +537,7 @@ func (S *Scanner) scanString() string {
|
||||
ch := S.ch;
|
||||
S.next();
|
||||
if ch == '\n' || ch < 0 {
|
||||
S.Error(pos, "string not terminated");
|
||||
S.error(pos, "string not terminated");
|
||||
break;
|
||||
}
|
||||
if ch == '\\' {
|
||||
@ -619,7 +550,7 @@ func (S *Scanner) scanString() string {
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) scanRawString() string {
|
||||
func (S *Scanner) scanRawString() []byte {
|
||||
// '`' already consumed
|
||||
|
||||
pos := S.chpos - 1;
|
||||
@ -627,7 +558,7 @@ func (S *Scanner) scanRawString() string {
|
||||
ch := S.ch;
|
||||
S.next();
|
||||
if ch == '\n' || ch < 0 {
|
||||
S.Error(pos, "string not terminated");
|
||||
S.error(pos, "string not terminated");
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -676,7 +607,7 @@ func (S *Scanner) select4(tok0, tok1, ch2, tok2, tok3 int) int {
|
||||
}
|
||||
|
||||
|
||||
func (S *Scanner) Scan() (pos, tok int, val string) {
|
||||
func (S *Scanner) Scan() (pos, tok int, val []byte) {
|
||||
loop:
|
||||
S.skipWhitespace();
|
||||
|
||||
@ -689,7 +620,7 @@ loop:
|
||||
S.next(); // always make progress
|
||||
switch ch {
|
||||
case -1: tok = EOF;
|
||||
case '\n': tok, val = COMMENT, "\n";
|
||||
case '\n': tok, val = COMMENT, []byte('\n');
|
||||
case '"': tok, val = STRING, S.scanString();
|
||||
case '\'': tok, val = INT, S.scanChar();
|
||||
case '`': tok, val = STRING, S.scanRawString();
|
||||
@ -741,7 +672,7 @@ loop:
|
||||
case '&': tok = S.select3(AND, AND_ASSIGN, '&', LAND);
|
||||
case '|': tok = S.select3(OR, OR_ASSIGN, '|', LOR);
|
||||
default:
|
||||
S.Error(pos, "illegal character " + charString(ch));
|
||||
S.error(pos, "illegal character " + charString(ch));
|
||||
tok = ILLEGAL;
|
||||
}
|
||||
}
|
||||
|
@ -119,16 +119,6 @@ runtests() {
|
||||
}
|
||||
|
||||
|
||||
# run selftest1 always
|
||||
$CMD -t selftest1.go > $TMP1
|
||||
if [ $? != 0 ]; then
|
||||
cat $TMP1
|
||||
echo "Error (selftest1): $CMD -t selftest1.go"
|
||||
exit 1
|
||||
fi
|
||||
count selftest1.go
|
||||
|
||||
|
||||
# run over all .go files
|
||||
runtests $*
|
||||
cleanup
|
||||
|
Loading…
Reference in New Issue
Block a user