1
0
mirror of https://github.com/golang/go synced 2024-11-26 17:46:57 -07:00

- added Filename field to token.Position

- handle //line filename:line comments in scanner
- moved error handling code used by various scanner clients
  to errors.go
- added extra tests

R=rsc
DELTA=385  (343 added, 18 deleted, 24 changed)
OCL=31551
CL=31601
This commit is contained in:
Robert Griesemer 2009-07-14 10:44:57 -07:00
parent 10e995fba8
commit 14228f3898
5 changed files with 366 additions and 40 deletions

View File

@ -2,8 +2,9 @@
# Use of this source code is governed by a BSD-style # Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file. # license that can be found in the LICENSE file.
# DO NOT EDIT. Automatically generated by gobuild. # DO NOT EDIT. Automatically generated by gobuild.
# gobuild -m >Makefile # gobuild -m scanner.go errors.go >Makefile
D=/go/ D=/go/
@ -20,7 +21,7 @@ test: packages
coverage: packages coverage: packages
gotest gotest
6cov -g `pwd` | grep -v '_test\.go:' 6cov -g $$(pwd) | grep -v '_test\.go:'
%.$O: %.go %.$O: %.go
$(GC) -I_obj $*.go $(GC) -I_obj $*.go
@ -32,16 +33,23 @@ coverage: packages
$(AS) $*.s $(AS) $*.s
O1=\ O1=\
errors.$O\
O2=\
scanner.$O\ scanner.$O\
phases: a1 phases: a1 a2
_obj$D/scanner.a: phases _obj$D/scanner.a: phases
a1: $(O1) a1: $(O1)
$(AR) grc _obj$D/scanner.a scanner.$O $(AR) grc _obj$D/scanner.a errors.$O
rm -f $(O1) rm -f $(O1)
a2: $(O2)
$(AR) grc _obj$D/scanner.a scanner.$O
rm -f $(O2)
newpkg: clean newpkg: clean
mkdir -p _obj$D mkdir -p _obj$D
@ -49,6 +57,7 @@ newpkg: clean
$(O1): newpkg $(O1): newpkg
$(O2): a1 $(O2): a1
$(O3): a2
nuke: clean nuke: clean
rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a

View File

@ -0,0 +1,203 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package scanner
import (
"container/vector";
"fmt";
"go/token";
"io";
"os";
"sort";
)
// An implementation of an ErrorHandler may be provided to the Scanner.
// If a syntax error is encountered and a handler was installed, Error
// is called with a position and an error message. The position points
// to the beginning of the offending token.
//
type ErrorHandler interface {
Error(pos token.Position, msg string);
}
// ErrorVector implements the ErrorHandler interface. It must be
// initialized with Init(). It maintains a list of errors which can
// be retrieved with GetErrorList and GetError.
//
// A common usage pattern is to embed an ErrorVector alongside a
// scanner in a data structure that uses the scanner. By passing a
// reference to an ErrorVector to the scanner's Init call, default
// error handling is obtained.
//
type ErrorVector struct {
errors vector.Vector;
}
// Init initializes an ErrorVector.
func (h *ErrorVector) Init() {
h.errors.Init(0);
}
// NewErrorVector creates a new ErrorVector.
func NewErrorVector() *ErrorVector {
h := new(ErrorVector);
h.Init();
return h;
}
// ErrorCount returns the number of errors collected.
func (h *ErrorVector) ErrorCount() int {
return h.errors.Len();
}
// Within ErrorVector, an error is represented by an Error node. The
// position Pos, if valid, points to the beginning of the offending
// token, and the error condition is described by Msg.
//
type Error struct {
Pos token.Position;
Msg string;
}
func (e *Error) String() string {
s := e.Pos.Filename;
if s != "" {
s += ":";
}
if e.Pos.IsValid() {
s += fmt.Sprintf("%d:%d:", e.Pos.Line, e.Pos.Column);
}
if s != "" {
s += " ";
}
return s + e.Msg;
}
// An ErrorList is a (possibly sorted) list of Errors.
type ErrorList []*Error
// ErrorList implements the SortInterface.
func (p ErrorList) Len() int { return len(p); }
func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; }
func (p ErrorList) Less(i, j int) bool {
e := &p[i].Pos;
f := &p[j].Pos;
// Note that it is not sufficient to simply compare file offsets because
// the offsets do not reflect modified line information (through //line
// comments).
if e.Filename < f.Filename {
return true;
}
if e.Filename == f.Filename {
if e.Line < f.Line {
return true;
}
if e.Line == f.Line {
return e.Column < f.Column;
}
}
return false;
}
func (p ErrorList) String() string {
switch len(p) {
case 0:
return "unspecified error";
case 1:
return p[0].String();
}
return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
}
// These constants control the construction of the ErrorList
// returned by GetErrors.
//
const (
Raw = iota; // leave error list unchanged
Sorted; // sort error list by file, line, and column number
NoMultiples; // sort error list and leave only the first error per line
)
// GetErrorList returns the list of errors collected by an ErrorVector.
// The construction of the ErrorList returned is controlled by the mode
// parameter. If there are no errors, the result is nil.
//
func (h *ErrorVector) GetErrorList(mode int) ErrorList {
if h.errors.Len() == 0 {
return nil;
}
list := make(ErrorList, h.errors.Len());
for i := 0; i < h.errors.Len(); i++ {
list[i] = h.errors.At(i).(*Error);
}
if mode >= Sorted {
sort.Sort(list);
}
if mode >= NoMultiples {
var last token.Position; // initial last.Line is != any legal error line
i := 0;
for _, e := range list {
if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line {
last = e.Pos;
list[i] = e;
i++;
}
}
list = list[0 : i];
}
return list;
}
// GetError is like GetErrorList, but it returns an os.Error instead
// so that a nil result can be assigned to an os.Error variable and
// remains nil.
//
func (h *ErrorVector) GetError(mode int) os.Error {
if h.errors.Len() == 0 {
return nil;
}
return h.GetErrorList(mode);
}
// ErrorVector implements the ErrorHandler interface.
func (h *ErrorVector) Error(pos token.Position, msg string) {
h.errors.Push(&Error{pos, msg});
}
// PrintError is a utility function that prints a list of errors to w,
// one error per line, if the err parameter is an ErrorList. Otherwise
// it prints the err string.
//
func PrintError(w io.Writer, err os.Error) {
if list, ok := err.(ErrorList); ok {
for _, e := range list {
fmt.Fprintf(w, "%s\n", e);
}
} else {
fmt.Fprintf(w, "%s\n", err);
}
}

View File

@ -9,23 +9,15 @@
package scanner package scanner
import ( import (
"bytes";
"go/token"; "go/token";
"go/scanner";
"strconv"; "strconv";
"unicode"; "unicode";
"utf8"; "utf8";
) )
// An implementation of an ErrorHandler may be provided to the Scanner.
// If a syntax error is encountered and a handler was installed, Error
// is called with a position and an error message. The position points
// to the beginning of the offending token.
//
type ErrorHandler interface {
Error(pos token.Position, msg string);
}
// A Scanner holds the scanner's internal state while processing // A Scanner holds the scanner's internal state while processing
// a given text. It can be allocated as part of another data // a given text. It can be allocated as part of another data
// structure but must be initialized via Init before use. For // structure but must be initialized via Init before use. For
@ -84,15 +76,17 @@ const (
// Init prepares the scanner S to tokenize the text src. Calls to Scan // Init prepares the scanner S to tokenize the text src. Calls to Scan
// will use the error handler err if they encounter a syntax error and // will use the error handler err if they encounter a syntax error and
// err is not nil. Also, for each error encountered, the Scanner field // err is not nil. Also, for each error encountered, the Scanner field
// ErrorCount is incremented by one. The mode parameter determines how // ErrorCount is incremented by one. The filename parameter is used as
// comments and illegal characters are handled. // filename in the token.Position returned by Scan for each token. The
// mode parameter determines how comments and illegal characters are
// handled.
// //
func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) { func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
// Explicitly initialize all fields since a scanner may be reused. // Explicitly initialize all fields since a scanner may be reused.
S.src = src; S.src = src;
S.err = err; S.err = err;
S.mode = mode; S.mode = mode;
S.pos = token.Position{0, 1, 0}; S.pos = token.Position{filename, 0, 1, 0};
S.offset = 0; S.offset = 0;
S.ErrorCount = 0; S.ErrorCount = 0;
S.next(); S.next();
@ -133,6 +127,8 @@ func (S *Scanner) expect(ch int) {
} }
var prefix = []byte{'l', 'i', 'n', 'e', ' '}; // "line "
func (S *Scanner) scanComment(pos token.Position) { func (S *Scanner) scanComment(pos token.Position) {
// first '/' already consumed // first '/' already consumed
@ -143,6 +139,22 @@ func (S *Scanner) scanComment(pos token.Position) {
if S.ch == '\n' { if S.ch == '\n' {
// '\n' is not part of the comment // '\n' is not part of the comment
// (the comment ends on the same line where it started) // (the comment ends on the same line where it started)
if pos.Column == 1 {
text := S.src[pos.Offset+2 : S.pos.Offset];
if bytes.HasPrefix(text, prefix) {
// comment starts at beginning of line with "//line ";
// get filename and line number, if any
i := bytes.Index(text, []byte{':'});
if i >= 0 {
if line, err := strconv.Atoi(string(text[i+1 : len(text)])); err == nil && line > 0 {
// valid //line filename:line comment;
// update scanner position
S.pos.Filename = string(text[len(prefix) : i]);
S.pos.Line = line;
}
}
}
}
return; return;
} }
} }
@ -492,9 +504,9 @@ scan_again:
// false (usually when the token value is token.EOF). The result is the number // false (usually when the token value is token.EOF). The result is the number
// of errors encountered. // of errors encountered.
// //
func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int { func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
var s Scanner; var s Scanner;
s.Init(src, err, mode); s.Init(filename, src, err, mode);
for f(s.Scan()) { for f(s.Scan()) {
// action happens in f // action happens in f
} }

View File

@ -7,6 +7,7 @@ package scanner
import ( import (
"go/scanner"; "go/scanner";
"go/token"; "go/token";
"os";
"strings"; "strings";
"testing"; "testing";
) )
@ -178,19 +179,35 @@ func NewlineCount(s string) int {
} }
func checkPos(t *testing.T, lit string, pos, expected token.Position) {
if pos.Filename != expected.Filename {
t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename);
}
if pos.Offset != expected.Offset {
t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset);
}
if pos.Line != expected.Line {
t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line);
}
if pos.Column!= expected.Column {
t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column);
}
}
// Verify that calling Scan() provides the correct results. // Verify that calling Scan() provides the correct results.
func TestScan(t *testing.T) { func TestScan(t *testing.T) {
// make source // make source
var src string; var src string;
for i, e := range tokens { for _, e := range tokens {
src += e.lit + whitespace; src += e.lit + whitespace;
} }
whitespace_linecount := NewlineCount(whitespace); whitespace_linecount := NewlineCount(whitespace);
// verify scan // verify scan
index := 0; index := 0;
eloc := token.Position{0, 1, 1}; epos := token.Position{"", 0, 1, 1};
nerrors := scanner.Tokenize(strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments, nerrors := scanner.Tokenize("", strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
func (pos token.Position, tok token.Token, litb []byte) bool { func (pos token.Position, tok token.Token, litb []byte) bool {
e := elt{token.EOF, "", special}; e := elt{token.EOF, "", special};
if index < len(tokens) { if index < len(tokens) {
@ -199,17 +216,9 @@ func TestScan(t *testing.T) {
lit := string(litb); lit := string(litb);
if tok == token.EOF { if tok == token.EOF {
lit = "<EOF>"; lit = "<EOF>";
eloc.Column = 0; epos.Column = 0;
}
if pos.Offset != eloc.Offset {
t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset);
}
if pos.Line != eloc.Line {
t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line);
}
if pos.Column!= eloc.Column {
t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column);
} }
checkPos(t, lit, pos, epos);
if tok != e.tok { if tok != e.tok {
t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String()); t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String());
} }
@ -219,12 +228,12 @@ func TestScan(t *testing.T) {
if tokenclass(tok) != e.class { if tokenclass(tok) != e.class {
t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class); t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class);
} }
eloc.Offset += len(lit) + len(whitespace); epos.Offset += len(lit) + len(whitespace);
eloc.Line += NewlineCount(lit) + whitespace_linecount; epos.Line += NewlineCount(lit) + whitespace_linecount;
if tok == token.COMMENT && litb[1] == '/' { if tok == token.COMMENT && litb[1] == '/' {
// correct for unaccounted '/n' in //-style comment // correct for unaccounted '/n' in //-style comment
eloc.Offset++; epos.Offset++;
eloc.Line++; epos.Line++;
} }
index++; index++;
return tok != token.EOF; return tok != token.EOF;
@ -236,12 +245,60 @@ func TestScan(t *testing.T) {
} }
type seg struct {
srcline string; // a line of source text
filename string; // filename for current token
line int; // line number for current token
}
var segments = []seg{
// exactly one token per line since the test consumes one token per segment
seg{ " line1", "TestLineComments", 1 },
seg{ "\nline2", "TestLineComments", 2 },
seg{ "\nline3 //line File1.go:100", "TestLineComments", 3 }, // bad line comment, ignored
seg{ "\nline4", "TestLineComments", 4 },
seg{ "\n//line File1.go:100\n line100", "File1.go", 100 },
seg{ "\n//line File2.go:200\n line200", "File2.go", 200 },
seg{ "\n//line :1\n line1", "", 1 },
seg{ "\n//line foo:42\n line42", "foo", 42 },
seg{ "\n //line foo:42\n line44", "foo", 44 }, // bad line comment, ignored
seg{ "\n//line foo 42\n line46", "foo", 46 }, // bad line comment, ignored
seg{ "\n//line foo:42 extra text\n line48", "foo", 48 }, // bad line comment, ignored
seg{ "\n//line foo:42\n line42", "foo", 42 },
seg{ "\n//line foo:42\n line42", "foo", 42 },
seg{ "\n//line File1.go:100\n line100", "File1.go", 100 },
}
// Verify that comments of the form "//line filename:line" are interpreted correctly.
func TestLineComments(t *testing.T) {
// make source
var src string;
for _, e := range segments {
src += e.srcline;
}
// verify scan
var S scanner.Scanner;
S.Init("TestLineComments", strings.Bytes(src), nil, 0);
for _, s := range segments {
pos, tok, lit := S.Scan();
checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column});
}
if S.ErrorCount != 0 {
t.Errorf("found %d errors", S.ErrorCount);
}
}
// Verify that initializing the same scanner more then once works correctly. // Verify that initializing the same scanner more then once works correctly.
func TestInit(t *testing.T) { func TestInit(t *testing.T) {
var s scanner.Scanner; var s scanner.Scanner;
// 1st init // 1st init
s.Init(strings.Bytes("if true { }"), nil, 0); s.Init("", strings.Bytes("if true { }"), nil, 0);
s.Scan(); // if s.Scan(); // if
s.Scan(); // true s.Scan(); // true
pos, tok, lit := s.Scan(); // { pos, tok, lit := s.Scan(); // {
@ -250,7 +307,7 @@ func TestInit(t *testing.T) {
} }
// 2nd init // 2nd init
s.Init(strings.Bytes("go true { ]"), nil, 0); s.Init("", strings.Bytes("go true { ]"), nil, 0);
pos, tok, lit = s.Scan(); // go pos, tok, lit = s.Scan(); // go
if tok != token.GO { if tok != token.GO {
t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO); t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO);
@ -266,7 +323,7 @@ func TestIllegalChars(t *testing.T) {
var s scanner.Scanner; var s scanner.Scanner;
const src = "*?*$*@*"; const src = "*?*$*@*";
s.Init(strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars); s.Init("", strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
for offs, ch := range src { for offs, ch := range src {
pos, tok, lit := s.Scan(); pos, tok, lit := s.Scan();
if pos.Offset != offs { if pos.Offset != offs {
@ -281,3 +338,47 @@ func TestIllegalChars(t *testing.T) {
t.Errorf("found %d errors", s.ErrorCount); t.Errorf("found %d errors", s.ErrorCount);
} }
} }
func TestStdErrorHander(t *testing.T) {
const src =
"@\n" // illegal character, cause an error
"@ @\n" // two errors on the same line
"//line File2:20\n"
"@\n" // different file, but same line
"//line File2:1\n"
"@ @\n" // same file, decreasing line number
"//line File1:1\n"
"@ @ @" // original file, line 1 again
;
var s scanner.Scanner;
v := NewErrorVector();
nerrors := scanner.Tokenize("File1", strings.Bytes(src), v, 0,
func (pos token.Position, tok token.Token, litb []byte) bool {
return tok != token.EOF;
}
);
list := v.GetErrorList(Raw);
if len(list) != 9 {
t.Errorf("found %d raw errors, expected 9", len(list));
PrintError(os.Stderr, list);
}
list = v.GetErrorList(Sorted);
if len(list) != 9 {
t.Errorf("found %d sorted errors, expected 9", len(list));
PrintError(os.Stderr, list);
}
list = v.GetErrorList(NoMultiples);
if len(list) != 4 {
t.Errorf("found %d one-per-line errors, expected 4", len(list));
PrintError(os.Stderr, list);
}
if v.ErrorCount() != nerrors {
t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors);
}
}

View File

@ -327,6 +327,7 @@ func (tok Token) IsKeyword() bool {
// A Position is valid if the line number is > 0. // A Position is valid if the line number is > 0.
// //
type Position struct { type Position struct {
Filename string; // filename, if any
Offset int; // byte offset, starting at 0 Offset int; // byte offset, starting at 0
Line int; // line number, starting at 1 Line int; // line number, starting at 1
Column int; // column number, starting at 1 (character count) Column int; // column number, starting at 1 (character count)