- added Filename field to token.Position

- handle //line filename:line comments in scanner - moved error handling code used by various scanner clients to errors.go - added extra tests R=rsc DELTA=385 (343 added, 18 deleted, 24 changed) OCL=31551 CL=31601
2024-11-20 06:44:40 -07:00 · 2009-07-14 10:44:57 -07:00 · 2009-07-14 10:44:57 -07:00 · 14228f3898
commit 14228f3898
parent 10e995fba8
5 changed files with 366 additions and 40 deletions
--- a/src/pkg/go/scanner/Makefile
+++ b/src/pkg/go/scanner/Makefile
@ -2,8 +2,9 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.

+
 # DO NOT EDIT.  Automatically generated by gobuild.
-# gobuild -m >Makefile
+# gobuild -m scanner.go errors.go >Makefile

 D=/go/

@ -20,7 +21,7 @@ test: packages

 coverage: packages
 	gotest
-	6cov -g `pwd` | grep -v '_test\.go:'
+	6cov -g $$(pwd) | grep -v '_test\.go:'

 %.$O: %.go
 	$(GC) -I_obj $*.go
@ -32,16 +33,23 @@ coverage: packages
 	$(AS) $*.s

 O1=\
+	errors.$O\
+
+O2=\
 	scanner.$O\


-phases: a1
+phases: a1 a2
 _obj$D/scanner.a: phases

 a1: $(O1)
-	$(AR) grc _obj$D/scanner.a scanner.$O
+	$(AR) grc _obj$D/scanner.a errors.$O
 	rm -f $(O1)

+a2: $(O2)
+	$(AR) grc _obj$D/scanner.a scanner.$O
+	rm -f $(O2)
+

 newpkg: clean
 	mkdir -p _obj$D
@ -49,6 +57,7 @@ newpkg: clean

 $(O1): newpkg
 $(O2): a1
+$(O3): a2

 nuke: clean
 	rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a
--- a/src/pkg/go/scanner/errors.go
+++ b/src/pkg/go/scanner/errors.go
@ -0,0 +1,203 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+	"container/vector";
+	"fmt";
+	"go/token";
+	"io";
+	"os";
+	"sort";
+)
+
+
+// An implementation of an ErrorHandler may be provided to the Scanner.
+// If a syntax error is encountered and a handler was installed, Error
+// is called with a position and an error message. The position points
+// to the beginning of the offending token.
+//
+type ErrorHandler interface {
+	Error(pos token.Position, msg string);
+}
+
+
+// ErrorVector implements the ErrorHandler interface. It must be
+// initialized with Init(). It maintains a list of errors which can
+// be retrieved with GetErrorList and GetError.
+//
+// A common usage pattern is to embed an ErrorVector alongside a
+// scanner in a data structure that uses the scanner. By passing a
+// reference to an ErrorVector to the scanner's Init call, default
+// error handling is obtained.
+//
+type ErrorVector struct {
+	errors vector.Vector;
+}
+
+
+// Init initializes an ErrorVector.
+func (h *ErrorVector) Init() {
+	h.errors.Init(0);
+}
+
+
+// NewErrorVector creates a new ErrorVector.
+func NewErrorVector() *ErrorVector {
+	h := new(ErrorVector);
+	h.Init();
+	return h;
+}
+
+
+// ErrorCount returns the number of errors collected.
+func (h *ErrorVector) ErrorCount() int {
+	return h.errors.Len();
+}
+
+
+// Within ErrorVector, an error is represented by an Error node. The
+// position Pos, if valid, points to the beginning of the offending
+// token, and the error condition is described by Msg.
+//
+type Error struct {
+	Pos token.Position;
+	Msg string;
+}
+
+
+func (e *Error) String() string {
+	s := e.Pos.Filename;
+	if s != "" {
+		s += ":";
+	}
+	if e.Pos.IsValid() {
+		s += fmt.Sprintf("%d:%d:", e.Pos.Line, e.Pos.Column);
+	}
+	if s != "" {
+		s += " ";
+	}
+	return s + e.Msg;
+}
+
+
+// An ErrorList is a (possibly sorted) list of Errors.
+type ErrorList []*Error
+
+
+// ErrorList implements the SortInterface.
+func (p ErrorList) Len() int  { return len(p); }
+func (p ErrorList) Swap(i, j int)  { p[i], p[j] = p[j], p[i]; }
+
+
+func (p ErrorList) Less(i, j int) bool  {
+	e := &p[i].Pos;
+	f := &p[j].Pos;
+	// Note that it is not sufficient to simply compare file offsets because
+	// the offsets do not reflect modified line information (through //line
+	// comments).
+	if e.Filename < f.Filename {
+		return true;
+	}
+	if e.Filename == f.Filename {
+		if e.Line < f.Line {
+			return true;
+		}
+		if e.Line == f.Line {
+			return e.Column < f.Column;
+		}
+	}
+	return false;
+}
+
+
+func (p ErrorList) String() string {
+	switch len(p) {
+	case 0:
+		return "unspecified error";
+	case 1:
+		return p[0].String();
+	}
+	return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
+}
+
+
+// These constants control the construction of the ErrorList
+// returned by GetErrors.
+//
+const (
+	Raw = iota;  // leave error list unchanged
+	Sorted;  // sort error list by file, line, and column number
+	NoMultiples;  // sort error list and leave only the first error per line
+)
+
+
+// GetErrorList returns the list of errors collected by an ErrorVector.
+// The construction of the ErrorList returned is controlled by the mode
+// parameter. If there are no errors, the result is nil.
+//
+func (h *ErrorVector) GetErrorList(mode int) ErrorList {
+	if h.errors.Len() == 0 {
+		return nil;
+	}
+
+	list := make(ErrorList, h.errors.Len());
+	for i := 0; i < h.errors.Len(); i++ {
+		list[i] = h.errors.At(i).(*Error);
+	}
+
+	if mode >= Sorted {
+		sort.Sort(list);
+	}
+
+	if mode >= NoMultiples {
+		var last token.Position;  // initial last.Line is != any legal error line
+		i := 0;
+		for _, e := range list {
+			if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line {
+				last = e.Pos;
+				list[i] = e;
+				i++;
+			}
+		}
+		list = list[0 : i];
+	}
+
+	return list;
+}
+
+
+// GetError is like GetErrorList, but it returns an os.Error instead
+// so that a nil result can be assigned to an os.Error variable and
+// remains nil.
+//
+func (h *ErrorVector) GetError(mode int) os.Error {
+	if h.errors.Len() == 0 {
+		return nil;
+	}
+
+	return h.GetErrorList(mode);
+}
+
+
+// ErrorVector implements the ErrorHandler interface.
+func (h *ErrorVector) Error(pos token.Position, msg string) {
+	h.errors.Push(&Error{pos, msg});
+}
+
+
+// PrintError is a utility function that prints a list of errors to w,
+// one error per line, if the err parameter is an ErrorList. Otherwise
+// it prints the err string.
+//
+func PrintError(w io.Writer, err os.Error) {
+	if list, ok := err.(ErrorList); ok {
+		for _, e := range list {
+			fmt.Fprintf(w, "%s\n", e);
+		}
+	} else {
+		fmt.Fprintf(w, "%s\n", err);
+	}
+}
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@ -9,23 +9,15 @@
 package scanner

 import (
+	"bytes";
 	"go/token";
+	"go/scanner";
 	"strconv";
 	"unicode";
 	"utf8";
 )


-// An implementation of an ErrorHandler may be provided to the Scanner.
-// If a syntax error is encountered and a handler was installed, Error
-// is called with a position and an error message. The position points
-// to the beginning of the offending token.
-//
-type ErrorHandler interface {
-	Error(pos token.Position, msg string);
-}
-
-
 // A Scanner holds the scanner's internal state while processing
 // a given text.  It can be allocated as part of another data
 // structure but must be initialized via Init before use. For
@ -84,15 +76,17 @@ const (
 // Init prepares the scanner S to tokenize the text src. Calls to Scan
 // will use the error handler err if they encounter a syntax error and
 // err is not nil. Also, for each error encountered, the Scanner field
-// ErrorCount is incremented by one. The mode parameter determines how
-// comments and illegal characters are handled.
+// ErrorCount is incremented by one. The filename parameter is used as
+// filename in the token.Position returned by Scan for each token. The
+// mode parameter determines how comments and illegal characters are
+// handled.
 //
-func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
 	// Explicitly initialize all fields since a scanner may be reused.
 	S.src = src;
 	S.err = err;
 	S.mode = mode;
-	S.pos = token.Position{0, 1, 0};
+	S.pos = token.Position{filename, 0, 1, 0};
 	S.offset = 0;
 	S.ErrorCount = 0;
 	S.next();
@ -133,6 +127,8 @@ func (S *Scanner) expect(ch int) {
 }


+var prefix = []byte{'l', 'i', 'n', 'e', ' '};  // "line "
+
 func (S *Scanner) scanComment(pos token.Position) {
 	// first '/' already consumed

@ -143,6 +139,22 @@ func (S *Scanner) scanComment(pos token.Position) {
 			if S.ch == '\n' {
 				// '\n' is not part of the comment
 				// (the comment ends on the same line where it started)
+				if pos.Column == 1 {
+					text := S.src[pos.Offset+2 : S.pos.Offset];
+					if bytes.HasPrefix(text, prefix) {
+						// comment starts at beginning of line with "//line ";
+						// get filename and line number, if any
+						i := bytes.Index(text, []byte{':'});
+						if i >= 0 {
+							if line, err := strconv.Atoi(string(text[i+1 : len(text)])); err == nil && line > 0 {
+								// valid //line filename:line comment;
+								// update scanner position
+								S.pos.Filename = string(text[len(prefix) : i]);
+								S.pos.Line = line;
+							}
+						}
+					}
+				}
 				return;
 			}
 		}
@ -492,9 +504,9 @@ scan_again:
 // false (usually when the token value is token.EOF). The result is the number
 // of errors encountered.
 //
-func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
+func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
 	var s Scanner;
-	s.Init(src, err, mode);
+	s.Init(filename, src, err, mode);
 	for f(s.Scan()) {
 		// action happens in f
 	}
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@ -7,6 +7,7 @@ package scanner
 import (
 	"go/scanner";
 	"go/token";
+	"os";
 	"strings";
 	"testing";
 )
@ -178,19 +179,35 @@ func NewlineCount(s string) int {
 }


+func checkPos(t *testing.T, lit string, pos, expected token.Position) {
+	if pos.Filename != expected.Filename {
+		t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename);
+	}
+	if pos.Offset != expected.Offset {
+		t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset);
+	}
+	if pos.Line != expected.Line {
+		t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line);
+	}
+	if pos.Column!= expected.Column {
+		t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column);
+	}
+}
+
+
 // Verify that calling Scan() provides the correct results.
 func TestScan(t *testing.T) {
 	// make source
 	var src string;
-	for i, e := range tokens {
+	for _, e := range tokens {
 		src += e.lit + whitespace;
 	}
 	whitespace_linecount := NewlineCount(whitespace);

 	// verify scan
 	index := 0;
-	eloc := token.Position{0, 1, 1};
-	nerrors := scanner.Tokenize(strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
+	epos := token.Position{"", 0, 1, 1};
+	nerrors := scanner.Tokenize("", strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
 		func (pos token.Position, tok token.Token, litb []byte) bool {
 			e := elt{token.EOF, "", special};
 			if index < len(tokens) {
@ -199,17 +216,9 @@ func TestScan(t *testing.T) {
 			lit := string(litb);
 			if tok == token.EOF {
 				lit = "<EOF>";
-				eloc.Column = 0;
-			}
-			if pos.Offset != eloc.Offset {
-				t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset);
-			}
-			if pos.Line != eloc.Line {
-				t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line);
-			}
-			if pos.Column!= eloc.Column {
-				t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column);
+				epos.Column = 0;
 			}
+			checkPos(t, lit, pos, epos);
 			if tok != e.tok {
 				t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String());
 			}
@ -219,12 +228,12 @@ func TestScan(t *testing.T) {
 			if tokenclass(tok) != e.class {
 				t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class);
 			}
-			eloc.Offset += len(lit) + len(whitespace);
-			eloc.Line += NewlineCount(lit) + whitespace_linecount;
+			epos.Offset += len(lit) + len(whitespace);
+			epos.Line += NewlineCount(lit) + whitespace_linecount;
 			if tok == token.COMMENT && litb[1] == '/' {
 				// correct for unaccounted '/n' in //-style comment
-				eloc.Offset++;
-				eloc.Line++;
+				epos.Offset++;
+				epos.Line++;
 			}
 			index++;
 			return tok != token.EOF;
@ -236,12 +245,60 @@ func TestScan(t *testing.T) {
 }


+type seg struct {
+	srcline string;  // a line of source text
+	filename string;  // filename for current token
+	line int;  // line number for current token
+}
+
+
+var segments = []seg{
+	// exactly one token per line since the test consumes one token per segment
+	seg{ "  line1", "TestLineComments", 1 },
+	seg{ "\nline2", "TestLineComments", 2 },
+	seg{ "\nline3  //line File1.go:100", "TestLineComments", 3 },  // bad line comment, ignored
+	seg{ "\nline4", "TestLineComments", 4 },
+	seg{ "\n//line File1.go:100\n  line100", "File1.go", 100 },
+	seg{ "\n//line File2.go:200\n  line200", "File2.go", 200 },
+	seg{ "\n//line :1\n  line1", "", 1 },
+	seg{ "\n//line foo:42\n  line42", "foo", 42 },
+	seg{ "\n //line foo:42\n  line44", "foo", 44 },  // bad line comment, ignored
+	seg{ "\n//line foo 42\n  line46", "foo", 46 },  // bad line comment, ignored
+	seg{ "\n//line foo:42 extra text\n  line48", "foo", 48 },  // bad line comment, ignored
+	seg{ "\n//line foo:42\n  line42", "foo", 42 },
+	seg{ "\n//line foo:42\n  line42", "foo", 42 },
+	seg{ "\n//line File1.go:100\n  line100", "File1.go", 100 },
+}
+
+
+// Verify that comments of the form "//line filename:line" are interpreted correctly.
+func TestLineComments(t *testing.T) {
+	// make source
+	var src string;
+	for _, e := range segments {
+		src += e.srcline;
+	}
+
+	// verify scan
+	var S scanner.Scanner;
+	S.Init("TestLineComments", strings.Bytes(src), nil, 0);
+	for _, s := range segments {
+		pos, tok, lit := S.Scan();
+		checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column});
+	}
+
+	if S.ErrorCount != 0 {
+		t.Errorf("found %d errors", S.ErrorCount);
+	}
+}
+
+
 // Verify that initializing the same scanner more then once works correctly.
 func TestInit(t *testing.T) {
 	var s scanner.Scanner;

 	// 1st init
-	s.Init(strings.Bytes("if true { }"), nil, 0);
+	s.Init("", strings.Bytes("if true { }"), nil, 0);
 	s.Scan();  // if
 	s.Scan();  // true
 	pos, tok, lit := s.Scan();  // {
@ -250,7 +307,7 @@ func TestInit(t *testing.T) {
 	}

 	// 2nd init
-	s.Init(strings.Bytes("go true { ]"), nil, 0);
+	s.Init("", strings.Bytes("go true { ]"), nil, 0);
 	pos, tok, lit = s.Scan();  // go
 	if tok != token.GO {
 		t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO);
@ -266,7 +323,7 @@ func TestIllegalChars(t *testing.T) {
 	var s scanner.Scanner;

 	const src = "*?*$*@*";
-	s.Init(strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
+	s.Init("", strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
 	for offs, ch := range src {
 		pos, tok, lit := s.Scan();
 		if pos.Offset != offs {
@ -281,3 +338,47 @@ func TestIllegalChars(t *testing.T) {
 		t.Errorf("found %d errors", s.ErrorCount);
 	}
 }
+
+
+func TestStdErrorHander(t *testing.T) {
+	const src =
+		"@\n"  // illegal character, cause an error
+		"@ @\n"  // two errors on the same line
+		"//line File2:20\n"
+		"@\n"  // different file, but same line
+		"//line File2:1\n"
+		"@ @\n"  // same file, decreasing line number
+		"//line File1:1\n"
+		"@ @ @"  // original file, line 1 again
+	;
+
+	var s scanner.Scanner;
+	v := NewErrorVector();
+	nerrors := scanner.Tokenize("File1", strings.Bytes(src), v, 0,
+		func (pos token.Position, tok token.Token, litb []byte) bool {
+			return tok != token.EOF;
+		}
+	);
+
+	list := v.GetErrorList(Raw);
+	if len(list) != 9 {
+		t.Errorf("found %d raw errors, expected 9", len(list));
+		PrintError(os.Stderr, list);
+	}
+
+	list = v.GetErrorList(Sorted);
+	if len(list) != 9 {
+		t.Errorf("found %d sorted errors, expected 9", len(list));
+		PrintError(os.Stderr, list);
+	}
+
+	list = v.GetErrorList(NoMultiples);
+	if len(list) != 4 {
+		t.Errorf("found %d one-per-line errors, expected 4", len(list));
+		PrintError(os.Stderr, list);
+	}
+
+	if v.ErrorCount() != nerrors {
+		t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors);
+	}
+}
--- a/src/pkg/go/token/token.go
+++ b/src/pkg/go/token/token.go
@ -327,6 +327,7 @@ func (tok Token) IsKeyword() bool {
 // A Position is valid if the line number is > 0.
 //
 type Position struct {
+	Filename string;  // filename, if any
 	Offset int;  // byte offset, starting at 0
 	Line int;  // line number, starting at 1
 	Column int;  // column number, starting at 1 (character count)