1
0
mirror of https://github.com/golang/go synced 2024-11-14 07:50:21 -07:00

minor tweaks:

- permit scanner to run w/o error handler
- provide an error counter

R=iant
DELTA=43  (25 added, 0 deleted, 18 changed)
OCL=26804
CL=26812
This commit is contained in:
Robert Griesemer 2009-03-26 22:13:49 -07:00
parent 8e39472e3b
commit 3f9da82904
2 changed files with 40 additions and 15 deletions

View File

@ -16,10 +16,10 @@ import (
) )
// An implementation of an ErrorHandler must be provided to the Scanner. // An implementation of an ErrorHandler may be provided to the Scanner.
// If a syntax error is encountered, Error is called with a position and // If a syntax error is encountered and a handler was installed, Error
// an error message. The position points to the beginning of the offending // is called with a position and an error message. The position points
// token. // to the beginning of the offending token.
// //
type ErrorHandler interface { type ErrorHandler interface {
Error(pos token.Position, msg string); Error(pos token.Position, msg string);
@ -34,13 +34,16 @@ type ErrorHandler interface {
type Scanner struct { type Scanner struct {
// immutable state // immutable state
src []byte; // source src []byte; // source
err ErrorHandler; // error reporting err ErrorHandler; // error reporting; or nil
scan_comments bool; // if set, comments are reported as tokens scan_comments bool; // if set, comments are reported as tokens
// scanning state // scanning state
pos token.Position; // previous reading position (position before ch) pos token.Position; // previous reading position (position before ch)
offset int; // current reading offset (position after ch) offset int; // current reading offset (position after ch)
ch int; // one char look-ahead ch int; // one char look-ahead
// public state - ok to modify
ErrorCount int; // number of errors encountered
} }
@ -70,10 +73,12 @@ func (S *Scanner) next() {
// Init prepares the scanner S to tokenize the text src. Calls to Scan // Init prepares the scanner S to tokenize the text src. Calls to Scan
// will use the error handler err if they encounter a syntax error. The boolean // will use the error handler err if they encounter a syntax error and
// scan_comments specifies whether comments should be recognized and returned // err is not nil. Also, for each error encountered, the Scanner field
// by Scan as token.COMMENT. If scan_comments is false, they are treated as // ErrorCount is incremented by one. The boolean scan_comments specifies
// white space and ignored. // whether comments should be recognized and returned by Scan as COMMENT
// tokens. If scan_comments is false, they are treated as white space and
// ignored.
// //
func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) { func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
// Explicitly initialize all fields since a scanner may be reused. // Explicitly initialize all fields since a scanner may be reused.
@ -82,6 +87,7 @@ func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
S.scan_comments = scan_comments; S.scan_comments = scan_comments;
S.pos = token.Position{0, 1, 0}; S.pos = token.Position{0, 1, 0};
S.offset = 0; S.offset = 0;
S.ErrorCount = 0;
S.next(); S.next();
} }
@ -105,8 +111,11 @@ func charString(ch int) string {
func (S *Scanner) error(pos token.Position, msg string) { func (S *Scanner) error(pos token.Position, msg string) {
if S.err != nil {
S.err.Error(pos, msg); S.err.Error(pos, msg);
} }
S.ErrorCount++;
}
func (S *Scanner) expect(ch int) { func (S *Scanner) expect(ch int) {
@ -374,6 +383,13 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
// the token tok, and the literal text lit corresponding to the // the token tok, and the literal text lit corresponding to the
// token. The source end is indicated by token.EOF. // token. The source end is indicated by token.EOF.
// //
// For more tolerant parsing, Scan will return a valid token if
// possible even if a syntax error was encountered. Thus, even
// if the resulting token sequence contains no illegal tokens,
// a client may not assume that no error occurred. Instead it
// must check the scanner's ErrorCount or the number of calls
// of the error handler, if there was one installed.
//
func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
scan_again: scan_again:
// skip white space // skip white space
@ -462,12 +478,14 @@ scan_again:
// Tokenize calls a function f with the token position, token value, and token // Tokenize calls a function f with the token position, token value, and token
// text for each token in the source src. The other parameters have the same // text for each token in the source src. The other parameters have the same
// meaning as for the Init function. Tokenize keeps scanning until f returns // meaning as for the Init function. Tokenize keeps scanning until f returns
// false (usually when the token value is token.EOF). // false (usually when the token value is token.EOF). The result is the number
// of errors encountered.
// //
func Tokenize(src []byte, err ErrorHandler, scan_comments bool, f func (pos token.Position, tok token.Token, lit []byte) bool) { func Tokenize(src []byte, err ErrorHandler, scan_comments bool, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
var s Scanner; var s Scanner;
s.Init(src, err, scan_comments); s.Init(src, err, scan_comments);
for f(s.Scan()) { for f(s.Scan()) {
// action happens in f // action happens in f
} }
return s.ErrorCount;
} }

View File

@ -188,7 +188,7 @@ func TestScan(t *testing.T) {
// verify scan // verify scan
index := 0; index := 0;
eloc := token.Position{0, 1, 1}; eloc := token.Position{0, 1, 1};
scanner.Tokenize(io.StringBytes(src), &TestErrorHandler{t}, true, nerrors := scanner.Tokenize(io.StringBytes(src), &TestErrorHandler{t}, true,
func (pos token.Position, tok token.Token, litb []byte) bool { func (pos token.Position, tok token.Token, litb []byte) bool {
e := elt{token.EOF, "", special}; e := elt{token.EOF, "", special};
if index < len(tokens) { if index < len(tokens) {
@ -223,6 +223,9 @@ func TestScan(t *testing.T) {
return tok != token.EOF; return tok != token.EOF;
} }
); );
if nerrors != 0 {
t.Errorf("found %d errors", nerrors);
}
} }
@ -231,7 +234,7 @@ func TestInit(t *testing.T) {
var s scanner.Scanner; var s scanner.Scanner;
// 1st init // 1st init
s.Init(io.StringBytes("if true { }"), &TestErrorHandler{t}, false); s.Init(io.StringBytes("if true { }"), nil, false);
s.Scan(); // if s.Scan(); // if
s.Scan(); // true s.Scan(); // true
pos, tok, lit := s.Scan(); // { pos, tok, lit := s.Scan(); // {
@ -240,9 +243,13 @@ func TestInit(t *testing.T) {
} }
// 2nd init // 2nd init
s.Init(io.StringBytes("go true { ]"), &TestErrorHandler{t}, false); s.Init(io.StringBytes("go true { ]"), nil, false);
pos, tok, lit = s.Scan(); // go pos, tok, lit = s.Scan(); // go
if tok != token.GO { if tok != token.GO {
t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO); t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO);
} }
if s.ErrorCount != 0 {
t.Errorf("found %d errors", s.ErrorCount);
}
} }