- added Filename field to token.Position

- handle //line filename:line comments in scanner - moved error handling code used by various scanner clients to errors.go - added extra tests R=rsc DELTA=385 (343 added, 18 deleted, 24 changed) OCL=31551 CL=31601
2024-11-20 08:34:41 -07:00 · 2009-07-14 10:44:57 -07:00 · 2009-07-14 10:44:57 -07:00 · 14228f3898
commit 14228f3898
parent 10e995fba8
5 changed files with 366 additions and 40 deletions
--- a/src/pkg/go/scanner/Makefile
+++ b/src/pkg/go/scanner/Makefile
@ -2,8 +2,9 @@
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 # DO NOT EDIT.  Automatically generated by gobuild.
-# gobuild -m >Makefile
+# gobuild -m scanner.go errors.go >Makefile
 D=/go/
@ -20,7 +21,7 @@ test: packages
 coverage: packages
 	gotest
-	6cov -g `pwd` | grep -v '_test\.go:'
+	6cov -g $$(pwd) | grep -v '_test\.go:'
 %.$O: %.go
 	$(GC) -I_obj $*.go
@ -32,16 +33,23 @@ coverage: packages
 	$(AS) $*.s
 O1=\
 	errors.$O\
 O2=\
 	scanner.$O\
-phases: a1
+phases: a1 a2
 _obj$D/scanner.a: phases
 a1: $(O1)
-	$(AR) grc _obj$D/scanner.a scanner.$O
+	$(AR) grc _obj$D/scanner.a errors.$O
 	rm -f $(O1)
 a2: $(O2)
 	$(AR) grc _obj$D/scanner.a scanner.$O
 	rm -f $(O2)
 newpkg: clean
 	mkdir -p _obj$D
@ -49,6 +57,7 @@ newpkg: clean
 $(O1): newpkg
 $(O2): a1
 $(O3): a2
 nuke: clean
 	rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a
--- a/src/pkg/go/scanner/errors.go
+++ b/src/pkg/go/scanner/errors.go
@ -0,0 +1,203 @@
 // Copyright 2009 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package scanner
 import (
 	"container/vector";
 	"fmt";
 	"go/token";
 	"io";
 	"os";
 	"sort";
 )
 // An implementation of an ErrorHandler may be provided to the Scanner.
 // If a syntax error is encountered and a handler was installed, Error
 // is called with a position and an error message. The position points
 // to the beginning of the offending token.
 //
 type ErrorHandler interface {
 	Error(pos token.Position, msg string);
 }
 // ErrorVector implements the ErrorHandler interface. It must be
 // initialized with Init(). It maintains a list of errors which can
 // be retrieved with GetErrorList and GetError.
 //
 // A common usage pattern is to embed an ErrorVector alongside a
 // scanner in a data structure that uses the scanner. By passing a
 // reference to an ErrorVector to the scanner's Init call, default
 // error handling is obtained.
 //
 type ErrorVector struct {
 	errors vector.Vector;
 }
 // Init initializes an ErrorVector.
 func (h *ErrorVector) Init() {
 	h.errors.Init(0);
 }
 // NewErrorVector creates a new ErrorVector.
 func NewErrorVector() *ErrorVector {
 	h := new(ErrorVector);
 	h.Init();
 	return h;
 }
 // ErrorCount returns the number of errors collected.
 func (h *ErrorVector) ErrorCount() int {
 	return h.errors.Len();
 }
 // Within ErrorVector, an error is represented by an Error node. The
 // position Pos, if valid, points to the beginning of the offending
 // token, and the error condition is described by Msg.
 //
 type Error struct {
 	Pos token.Position;
 	Msg string;
 }
 func (e *Error) String() string {
 	s := e.Pos.Filename;
 	if s != "" {
 		s += ":";
 	}
 	if e.Pos.IsValid() {
 		s += fmt.Sprintf("%d:%d:", e.Pos.Line, e.Pos.Column);
 	}
 	if s != "" {
 		s += " ";
 	}
 	return s + e.Msg;
 }
 // An ErrorList is a (possibly sorted) list of Errors.
 type ErrorList []*Error
 // ErrorList implements the SortInterface.
 func (p ErrorList) Len() int  { return len(p); }
 func (p ErrorList) Swap(i, j int)  { p[i], p[j] = p[j], p[i]; }
 func (p ErrorList) Less(i, j int) bool  {
 	e := &p[i].Pos;
 	f := &p[j].Pos;
 	// Note that it is not sufficient to simply compare file offsets because
 	// the offsets do not reflect modified line information (through //line
 	// comments).
 	if e.Filename < f.Filename {
 		return true;
 	}
 	if e.Filename == f.Filename {
 		if e.Line < f.Line {
 			return true;
 		}
 		if e.Line == f.Line {
 			return e.Column < f.Column;
 		}
 	}
 	return false;
 }
 func (p ErrorList) String() string {
 	switch len(p) {
 	case 0:
 		return "unspecified error";
 	case 1:
 		return p[0].String();
 	}
 	return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
 }
 // These constants control the construction of the ErrorList
 // returned by GetErrors.
 //
 const (
 	Raw = iota;  // leave error list unchanged
 	Sorted;  // sort error list by file, line, and column number
 	NoMultiples;  // sort error list and leave only the first error per line
 )
 // GetErrorList returns the list of errors collected by an ErrorVector.
 // The construction of the ErrorList returned is controlled by the mode
 // parameter. If there are no errors, the result is nil.
 //
 func (h *ErrorVector) GetErrorList(mode int) ErrorList {
 	if h.errors.Len() == 0 {
 		return nil;
 	}
 	list := make(ErrorList, h.errors.Len());
 	for i := 0; i < h.errors.Len(); i++ {
 		list[i] = h.errors.At(i).(*Error);
 	}
 	if mode >= Sorted {
 		sort.Sort(list);
 	}
 	if mode >= NoMultiples {
 		var last token.Position;  // initial last.Line is != any legal error line
 		i := 0;
 		for _, e := range list {
 			if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line {
 				last = e.Pos;
 				list[i] = e;
 				i++;
 			}
 		}
 		list = list[0 : i];
 	}
 	return list;
 }
 // GetError is like GetErrorList, but it returns an os.Error instead
 // so that a nil result can be assigned to an os.Error variable and
 // remains nil.
 //
 func (h *ErrorVector) GetError(mode int) os.Error {
 	if h.errors.Len() == 0 {
 		return nil;
 	}
 	return h.GetErrorList(mode);
 }
 // ErrorVector implements the ErrorHandler interface.
 func (h *ErrorVector) Error(pos token.Position, msg string) {
 	h.errors.Push(&Error{pos, msg});
 }
 // PrintError is a utility function that prints a list of errors to w,
 // one error per line, if the err parameter is an ErrorList. Otherwise
 // it prints the err string.
 //
 func PrintError(w io.Writer, err os.Error) {
 	if list, ok := err.(ErrorList); ok {
 		for _, e := range list {
 			fmt.Fprintf(w, "%s\n", e);
 		}
 	} else {
 		fmt.Fprintf(w, "%s\n", err);
 	}
 }
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@ -9,23 +9,15 @@
 package scanner
 import (
 	"bytes";
 	"go/token";
 	"go/scanner";
 	"strconv";
 	"unicode";
 	"utf8";
 )
 // An implementation of an ErrorHandler may be provided to the Scanner.
 // If a syntax error is encountered and a handler was installed, Error
 // is called with a position and an error message. The position points
 // to the beginning of the offending token.
 //
 type ErrorHandler interface {
 	Error(pos token.Position, msg string);
 }
 // A Scanner holds the scanner's internal state while processing
 // a given text.  It can be allocated as part of another data
 // structure but must be initialized via Init before use. For
@ -84,15 +76,17 @@ const (
 // Init prepares the scanner S to tokenize the text src. Calls to Scan
 // will use the error handler err if they encounter a syntax error and
 // err is not nil. Also, for each error encountered, the Scanner field
-// ErrorCount is incremented by one. The mode parameter determines how
+// ErrorCount is incremented by one. The filename parameter is used as
-// comments and illegal characters are handled.
+// filename in the token.Position returned by Scan for each token. The
 // mode parameter determines how comments and illegal characters are
 // handled.
 //
-func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
 	// Explicitly initialize all fields since a scanner may be reused.
 	S.src = src;
 	S.err = err;
 	S.mode = mode;
-	S.pos = token.Position{0, 1, 0};
+	S.pos = token.Position{filename, 0, 1, 0};
 	S.offset = 0;
 	S.ErrorCount = 0;
 	S.next();
@ -133,6 +127,8 @@ func (S *Scanner) expect(ch int) {
 }
 var prefix = []byte{'l', 'i', 'n', 'e', ' '};  // "line "
 func (S *Scanner) scanComment(pos token.Position) {
 	// first '/' already consumed
@ -143,6 +139,22 @@ func (S *Scanner) scanComment(pos token.Position) {
 			if S.ch == '\n' {
 				// '\n' is not part of the comment
 				// (the comment ends on the same line where it started)
 				if pos.Column == 1 {
 					text := S.src[pos.Offset+2 : S.pos.Offset];
 					if bytes.HasPrefix(text, prefix) {
 						// comment starts at beginning of line with "//line ";
 						// get filename and line number, if any
 						i := bytes.Index(text, []byte{':'});
 						if i >= 0 {
 							if line, err := strconv.Atoi(string(text[i+1 : len(text)])); err == nil && line > 0 {
 								// valid //line filename:line comment;
 								// update scanner position
 								S.pos.Filename = string(text[len(prefix) : i]);
 								S.pos.Line = line;
 							}
 						}
 					}
 				}
 				return;
 			}
 		}
@ -492,9 +504,9 @@ scan_again:
 // false (usually when the token value is token.EOF). The result is the number
 // of errors encountered.
 //
-func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
+func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
 	var s Scanner;
-	s.Init(src, err, mode);
+	s.Init(filename, src, err, mode);
 	for f(s.Scan()) {
 		// action happens in f
 	}
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@ -7,6 +7,7 @@ package scanner
 import (
 	"go/scanner";
 	"go/token";
 	"os";
 	"strings";
 	"testing";
 )
@ -178,19 +179,35 @@ func NewlineCount(s string) int {
 }
 func checkPos(t *testing.T, lit string, pos, expected token.Position) {
 	if pos.Filename != expected.Filename {
 		t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename);
 	}
 	if pos.Offset != expected.Offset {
 		t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset);
 	}
 	if pos.Line != expected.Line {
 		t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line);
 	}
 	if pos.Column!= expected.Column {
 		t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column);
 	}
 }
 // Verify that calling Scan() provides the correct results.
 func TestScan(t *testing.T) {
 	// make source
 	var src string;
-	for i, e := range tokens {
+	for _, e := range tokens {
 		src += e.lit + whitespace;
 	}
 	whitespace_linecount := NewlineCount(whitespace);
 	// verify scan
 	index := 0;
-	eloc := token.Position{0, 1, 1};
+	epos := token.Position{"", 0, 1, 1};
-	nerrors := scanner.Tokenize(strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
+	nerrors := scanner.Tokenize("", strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
 		func (pos token.Position, tok token.Token, litb []byte) bool {
 			e := elt{token.EOF, "", special};
 			if index < len(tokens) {
@ -199,17 +216,9 @@ func TestScan(t *testing.T) {
 			lit := string(litb);
 			if tok == token.EOF {
 				lit = "<EOF>";
-				eloc.Column = 0;
+				epos.Column = 0;
 			}
 			if pos.Offset != eloc.Offset {
 				t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset);
 			}
 			if pos.Line != eloc.Line {
 				t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line);
 			}
 			if pos.Column!= eloc.Column {
 				t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column);
 			}
 			checkPos(t, lit, pos, epos);
 			if tok != e.tok {
 				t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String());
 			}
@ -219,12 +228,12 @@ func TestScan(t *testing.T) {
 			if tokenclass(tok) != e.class {
 				t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class);
 			}
-			eloc.Offset += len(lit) + len(whitespace);
+			epos.Offset += len(lit) + len(whitespace);
-			eloc.Line += NewlineCount(lit) + whitespace_linecount;
+			epos.Line += NewlineCount(lit) + whitespace_linecount;
 			if tok == token.COMMENT && litb[1] == '/' {
 				// correct for unaccounted '/n' in //-style comment
-				eloc.Offset++;
+				epos.Offset++;
-				eloc.Line++;
+				epos.Line++;
 			}
 			index++;
 			return tok != token.EOF;
@ -236,12 +245,60 @@ func TestScan(t *testing.T) {
 }
 type seg struct {
 	srcline string;  // a line of source text
 	filename string;  // filename for current token
 	line int;  // line number for current token
 }
 var segments = []seg{
 	// exactly one token per line since the test consumes one token per segment
 	seg{ "  line1", "TestLineComments", 1 },
 	seg{ "\nline2", "TestLineComments", 2 },
 	seg{ "\nline3  //line File1.go:100", "TestLineComments", 3 },  // bad line comment, ignored
 	seg{ "\nline4", "TestLineComments", 4 },
 	seg{ "\n//line File1.go:100\n  line100", "File1.go", 100 },
 	seg{ "\n//line File2.go:200\n  line200", "File2.go", 200 },
 	seg{ "\n//line :1\n  line1", "", 1 },
 	seg{ "\n//line foo:42\n  line42", "foo", 42 },
 	seg{ "\n //line foo:42\n  line44", "foo", 44 },  // bad line comment, ignored
 	seg{ "\n//line foo 42\n  line46", "foo", 46 },  // bad line comment, ignored
 	seg{ "\n//line foo:42 extra text\n  line48", "foo", 48 },  // bad line comment, ignored
 	seg{ "\n//line foo:42\n  line42", "foo", 42 },
 	seg{ "\n//line foo:42\n  line42", "foo", 42 },
 	seg{ "\n//line File1.go:100\n  line100", "File1.go", 100 },
 }
 // Verify that comments of the form "//line filename:line" are interpreted correctly.
 func TestLineComments(t *testing.T) {
 	// make source
 	var src string;
 	for _, e := range segments {
 		src += e.srcline;
 	}
 	// verify scan
 	var S scanner.Scanner;
 	S.Init("TestLineComments", strings.Bytes(src), nil, 0);
 	for _, s := range segments {
 		pos, tok, lit := S.Scan();
 		checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column});
 	}
 	if S.ErrorCount != 0 {
 		t.Errorf("found %d errors", S.ErrorCount);
 	}
 }
 // Verify that initializing the same scanner more then once works correctly.
 func TestInit(t *testing.T) {
 	var s scanner.Scanner;
 	// 1st init
-	s.Init(strings.Bytes("if true { }"), nil, 0);
+	s.Init("", strings.Bytes("if true { }"), nil, 0);
 	s.Scan();  // if
 	s.Scan();  // true
 	pos, tok, lit := s.Scan();  // {
@ -250,7 +307,7 @@ func TestInit(t *testing.T) {
 	}
 	// 2nd init
-	s.Init(strings.Bytes("go true { ]"), nil, 0);
+	s.Init("", strings.Bytes("go true { ]"), nil, 0);
 	pos, tok, lit = s.Scan();  // go
 	if tok != token.GO {
 		t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO);
@ -266,7 +323,7 @@ func TestIllegalChars(t *testing.T) {
 	var s scanner.Scanner;
 	const src = "*?*$*@*";
-	s.Init(strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
+	s.Init("", strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
 	for offs, ch := range src {
 		pos, tok, lit := s.Scan();
 		if pos.Offset != offs {
@ -281,3 +338,47 @@ func TestIllegalChars(t *testing.T) {
 		t.Errorf("found %d errors", s.ErrorCount);
 	}
 }
 func TestStdErrorHander(t *testing.T) {
 	const src =
 		"@\n"  // illegal character, cause an error
 		"@ @\n"  // two errors on the same line
 		"//line File2:20\n"
 		"@\n"  // different file, but same line
 		"//line File2:1\n"
 		"@ @\n"  // same file, decreasing line number
 		"//line File1:1\n"
 		"@ @ @"  // original file, line 1 again
 	;
 	var s scanner.Scanner;
 	v := NewErrorVector();
 	nerrors := scanner.Tokenize("File1", strings.Bytes(src), v, 0,
 		func (pos token.Position, tok token.Token, litb []byte) bool {
 			return tok != token.EOF;
 		}
 	);
 	list := v.GetErrorList(Raw);
 	if len(list) != 9 {
 		t.Errorf("found %d raw errors, expected 9", len(list));
 		PrintError(os.Stderr, list);
 	}
 	list = v.GetErrorList(Sorted);
 	if len(list) != 9 {
 		t.Errorf("found %d sorted errors, expected 9", len(list));
 		PrintError(os.Stderr, list);
 	}
 	list = v.GetErrorList(NoMultiples);
 	if len(list) != 4 {
 		t.Errorf("found %d one-per-line errors, expected 4", len(list));
 		PrintError(os.Stderr, list);
 	}
 	if v.ErrorCount() != nerrors {
 		t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors);
 	}
 }
--- a/src/pkg/go/token/token.go
+++ b/src/pkg/go/token/token.go
@ -327,6 +327,7 @@ func (tok Token) IsKeyword() bool {
 // A Position is valid if the line number is > 0.
 //
 type Position struct {
 	Filename string;  // filename, if any
 	Offset int;  // byte offset, starting at 0
 	Line int;  // line number, starting at 1
 	Column int;  // column number, starting at 1 (character count)