diff --git a/src/pkg/go/scanner/Makefile b/src/pkg/go/scanner/Makefile index d47fecb7c1..7845fe5a7d 100644 --- a/src/pkg/go/scanner/Makefile +++ b/src/pkg/go/scanner/Makefile @@ -2,8 +2,9 @@ # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. + # DO NOT EDIT. Automatically generated by gobuild. -# gobuild -m >Makefile +# gobuild -m scanner.go errors.go >Makefile D=/go/ @@ -20,7 +21,7 @@ test: packages coverage: packages gotest - 6cov -g `pwd` | grep -v '_test\.go:' + 6cov -g $$(pwd) | grep -v '_test\.go:' %.$O: %.go $(GC) -I_obj $*.go @@ -32,16 +33,23 @@ coverage: packages $(AS) $*.s O1=\ + errors.$O\ + +O2=\ scanner.$O\ -phases: a1 +phases: a1 a2 _obj$D/scanner.a: phases a1: $(O1) - $(AR) grc _obj$D/scanner.a scanner.$O + $(AR) grc _obj$D/scanner.a errors.$O rm -f $(O1) +a2: $(O2) + $(AR) grc _obj$D/scanner.a scanner.$O + rm -f $(O2) + newpkg: clean mkdir -p _obj$D @@ -49,6 +57,7 @@ newpkg: clean $(O1): newpkg $(O2): a1 +$(O3): a2 nuke: clean rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a diff --git a/src/pkg/go/scanner/errors.go b/src/pkg/go/scanner/errors.go new file mode 100644 index 0000000000..54770f0201 --- /dev/null +++ b/src/pkg/go/scanner/errors.go @@ -0,0 +1,203 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package scanner + +import ( + "container/vector"; + "fmt"; + "go/token"; + "io"; + "os"; + "sort"; +) + + +// An implementation of an ErrorHandler may be provided to the Scanner. +// If a syntax error is encountered and a handler was installed, Error +// is called with a position and an error message. The position points +// to the beginning of the offending token. +// +type ErrorHandler interface { + Error(pos token.Position, msg string); +} + + +// ErrorVector implements the ErrorHandler interface. It must be +// initialized with Init(). It maintains a list of errors which can +// be retrieved with GetErrorList and GetError. +// +// A common usage pattern is to embed an ErrorVector alongside a +// scanner in a data structure that uses the scanner. By passing a +// reference to an ErrorVector to the scanner's Init call, default +// error handling is obtained. +// +type ErrorVector struct { + errors vector.Vector; +} + + +// Init initializes an ErrorVector. +func (h *ErrorVector) Init() { + h.errors.Init(0); +} + + +// NewErrorVector creates a new ErrorVector. +func NewErrorVector() *ErrorVector { + h := new(ErrorVector); + h.Init(); + return h; +} + + +// ErrorCount returns the number of errors collected. +func (h *ErrorVector) ErrorCount() int { + return h.errors.Len(); +} + + +// Within ErrorVector, an error is represented by an Error node. The +// position Pos, if valid, points to the beginning of the offending +// token, and the error condition is described by Msg. +// +type Error struct { + Pos token.Position; + Msg string; +} + + +func (e *Error) String() string { + s := e.Pos.Filename; + if s != "" { + s += ":"; + } + if e.Pos.IsValid() { + s += fmt.Sprintf("%d:%d:", e.Pos.Line, e.Pos.Column); + } + if s != "" { + s += " "; + } + return s + e.Msg; +} + + +// An ErrorList is a (possibly sorted) list of Errors. +type ErrorList []*Error + + +// ErrorList implements the SortInterface. +func (p ErrorList) Len() int { return len(p); } +func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; } + + +func (p ErrorList) Less(i, j int) bool { + e := &p[i].Pos; + f := &p[j].Pos; + // Note that it is not sufficient to simply compare file offsets because + // the offsets do not reflect modified line information (through //line + // comments). + if e.Filename < f.Filename { + return true; + } + if e.Filename == f.Filename { + if e.Line < f.Line { + return true; + } + if e.Line == f.Line { + return e.Column < f.Column; + } + } + return false; +} + + +func (p ErrorList) String() string { + switch len(p) { + case 0: + return "unspecified error"; + case 1: + return p[0].String(); + } + return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1); +} + + +// These constants control the construction of the ErrorList +// returned by GetErrors. +// +const ( + Raw = iota; // leave error list unchanged + Sorted; // sort error list by file, line, and column number + NoMultiples; // sort error list and leave only the first error per line +) + + +// GetErrorList returns the list of errors collected by an ErrorVector. +// The construction of the ErrorList returned is controlled by the mode +// parameter. If there are no errors, the result is nil. +// +func (h *ErrorVector) GetErrorList(mode int) ErrorList { + if h.errors.Len() == 0 { + return nil; + } + + list := make(ErrorList, h.errors.Len()); + for i := 0; i < h.errors.Len(); i++ { + list[i] = h.errors.At(i).(*Error); + } + + if mode >= Sorted { + sort.Sort(list); + } + + if mode >= NoMultiples { + var last token.Position; // initial last.Line is != any legal error line + i := 0; + for _, e := range list { + if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line { + last = e.Pos; + list[i] = e; + i++; + } + } + list = list[0 : i]; + } + + return list; +} + + +// GetError is like GetErrorList, but it returns an os.Error instead +// so that a nil result can be assigned to an os.Error variable and +// remains nil. +// +func (h *ErrorVector) GetError(mode int) os.Error { + if h.errors.Len() == 0 { + return nil; + } + + return h.GetErrorList(mode); +} + + +// ErrorVector implements the ErrorHandler interface. +func (h *ErrorVector) Error(pos token.Position, msg string) { + h.errors.Push(&Error{pos, msg}); +} + + +// PrintError is a utility function that prints a list of errors to w, +// one error per line, if the err parameter is an ErrorList. Otherwise +// it prints the err string. +// +func PrintError(w io.Writer, err os.Error) { + if list, ok := err.(ErrorList); ok { + for _, e := range list { + fmt.Fprintf(w, "%s\n", e); + } + } else { + fmt.Fprintf(w, "%s\n", err); + } +} diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 795d56f8ba..3a2d985144 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -9,23 +9,15 @@ package scanner import ( + "bytes"; "go/token"; + "go/scanner"; "strconv"; "unicode"; "utf8"; ) -// An implementation of an ErrorHandler may be provided to the Scanner. -// If a syntax error is encountered and a handler was installed, Error -// is called with a position and an error message. The position points -// to the beginning of the offending token. -// -type ErrorHandler interface { - Error(pos token.Position, msg string); -} - - // A Scanner holds the scanner's internal state while processing // a given text. It can be allocated as part of another data // structure but must be initialized via Init before use. For @@ -84,15 +76,17 @@ const ( // Init prepares the scanner S to tokenize the text src. Calls to Scan // will use the error handler err if they encounter a syntax error and // err is not nil. Also, for each error encountered, the Scanner field -// ErrorCount is incremented by one. The mode parameter determines how -// comments and illegal characters are handled. +// ErrorCount is incremented by one. The filename parameter is used as +// filename in the token.Position returned by Scan for each token. The +// mode parameter determines how comments and illegal characters are +// handled. // -func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) { +func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) { // Explicitly initialize all fields since a scanner may be reused. S.src = src; S.err = err; S.mode = mode; - S.pos = token.Position{0, 1, 0}; + S.pos = token.Position{filename, 0, 1, 0}; S.offset = 0; S.ErrorCount = 0; S.next(); @@ -133,6 +127,8 @@ func (S *Scanner) expect(ch int) { } +var prefix = []byte{'l', 'i', 'n', 'e', ' '}; // "line " + func (S *Scanner) scanComment(pos token.Position) { // first '/' already consumed @@ -143,6 +139,22 @@ func (S *Scanner) scanComment(pos token.Position) { if S.ch == '\n' { // '\n' is not part of the comment // (the comment ends on the same line where it started) + if pos.Column == 1 { + text := S.src[pos.Offset+2 : S.pos.Offset]; + if bytes.HasPrefix(text, prefix) { + // comment starts at beginning of line with "//line "; + // get filename and line number, if any + i := bytes.Index(text, []byte{':'}); + if i >= 0 { + if line, err := strconv.Atoi(string(text[i+1 : len(text)])); err == nil && line > 0 { + // valid //line filename:line comment; + // update scanner position + S.pos.Filename = string(text[len(prefix) : i]); + S.pos.Line = line; + } + } + } + } return; } } @@ -492,9 +504,9 @@ scan_again: // false (usually when the token value is token.EOF). The result is the number // of errors encountered. // -func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int { +func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int { var s Scanner; - s.Init(src, err, mode); + s.Init(filename, src, err, mode); for f(s.Scan()) { // action happens in f } diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index 18dae19cc2..0cb200b48f 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -7,6 +7,7 @@ package scanner import ( "go/scanner"; "go/token"; + "os"; "strings"; "testing"; ) @@ -178,19 +179,35 @@ func NewlineCount(s string) int { } +func checkPos(t *testing.T, lit string, pos, expected token.Position) { + if pos.Filename != expected.Filename { + t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename); + } + if pos.Offset != expected.Offset { + t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset); + } + if pos.Line != expected.Line { + t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line); + } + if pos.Column!= expected.Column { + t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column); + } +} + + // Verify that calling Scan() provides the correct results. func TestScan(t *testing.T) { // make source var src string; - for i, e := range tokens { + for _, e := range tokens { src += e.lit + whitespace; } whitespace_linecount := NewlineCount(whitespace); // verify scan index := 0; - eloc := token.Position{0, 1, 1}; - nerrors := scanner.Tokenize(strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments, + epos := token.Position{"", 0, 1, 1}; + nerrors := scanner.Tokenize("", strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments, func (pos token.Position, tok token.Token, litb []byte) bool { e := elt{token.EOF, "", special}; if index < len(tokens) { @@ -199,17 +216,9 @@ func TestScan(t *testing.T) { lit := string(litb); if tok == token.EOF { lit = ""; - eloc.Column = 0; - } - if pos.Offset != eloc.Offset { - t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset); - } - if pos.Line != eloc.Line { - t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line); - } - if pos.Column!= eloc.Column { - t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column); + epos.Column = 0; } + checkPos(t, lit, pos, epos); if tok != e.tok { t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String()); } @@ -219,12 +228,12 @@ func TestScan(t *testing.T) { if tokenclass(tok) != e.class { t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class); } - eloc.Offset += len(lit) + len(whitespace); - eloc.Line += NewlineCount(lit) + whitespace_linecount; + epos.Offset += len(lit) + len(whitespace); + epos.Line += NewlineCount(lit) + whitespace_linecount; if tok == token.COMMENT && litb[1] == '/' { // correct for unaccounted '/n' in //-style comment - eloc.Offset++; - eloc.Line++; + epos.Offset++; + epos.Line++; } index++; return tok != token.EOF; @@ -236,12 +245,60 @@ func TestScan(t *testing.T) { } +type seg struct { + srcline string; // a line of source text + filename string; // filename for current token + line int; // line number for current token +} + + +var segments = []seg{ + // exactly one token per line since the test consumes one token per segment + seg{ " line1", "TestLineComments", 1 }, + seg{ "\nline2", "TestLineComments", 2 }, + seg{ "\nline3 //line File1.go:100", "TestLineComments", 3 }, // bad line comment, ignored + seg{ "\nline4", "TestLineComments", 4 }, + seg{ "\n//line File1.go:100\n line100", "File1.go", 100 }, + seg{ "\n//line File2.go:200\n line200", "File2.go", 200 }, + seg{ "\n//line :1\n line1", "", 1 }, + seg{ "\n//line foo:42\n line42", "foo", 42 }, + seg{ "\n //line foo:42\n line44", "foo", 44 }, // bad line comment, ignored + seg{ "\n//line foo 42\n line46", "foo", 46 }, // bad line comment, ignored + seg{ "\n//line foo:42 extra text\n line48", "foo", 48 }, // bad line comment, ignored + seg{ "\n//line foo:42\n line42", "foo", 42 }, + seg{ "\n//line foo:42\n line42", "foo", 42 }, + seg{ "\n//line File1.go:100\n line100", "File1.go", 100 }, +} + + +// Verify that comments of the form "//line filename:line" are interpreted correctly. +func TestLineComments(t *testing.T) { + // make source + var src string; + for _, e := range segments { + src += e.srcline; + } + + // verify scan + var S scanner.Scanner; + S.Init("TestLineComments", strings.Bytes(src), nil, 0); + for _, s := range segments { + pos, tok, lit := S.Scan(); + checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column}); + } + + if S.ErrorCount != 0 { + t.Errorf("found %d errors", S.ErrorCount); + } +} + + // Verify that initializing the same scanner more then once works correctly. func TestInit(t *testing.T) { var s scanner.Scanner; // 1st init - s.Init(strings.Bytes("if true { }"), nil, 0); + s.Init("", strings.Bytes("if true { }"), nil, 0); s.Scan(); // if s.Scan(); // true pos, tok, lit := s.Scan(); // { @@ -250,7 +307,7 @@ func TestInit(t *testing.T) { } // 2nd init - s.Init(strings.Bytes("go true { ]"), nil, 0); + s.Init("", strings.Bytes("go true { ]"), nil, 0); pos, tok, lit = s.Scan(); // go if tok != token.GO { t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO); @@ -266,7 +323,7 @@ func TestIllegalChars(t *testing.T) { var s scanner.Scanner; const src = "*?*$*@*"; - s.Init(strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars); + s.Init("", strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars); for offs, ch := range src { pos, tok, lit := s.Scan(); if pos.Offset != offs { @@ -281,3 +338,47 @@ func TestIllegalChars(t *testing.T) { t.Errorf("found %d errors", s.ErrorCount); } } + + +func TestStdErrorHander(t *testing.T) { + const src = + "@\n" // illegal character, cause an error + "@ @\n" // two errors on the same line + "//line File2:20\n" + "@\n" // different file, but same line + "//line File2:1\n" + "@ @\n" // same file, decreasing line number + "//line File1:1\n" + "@ @ @" // original file, line 1 again + ; + + var s scanner.Scanner; + v := NewErrorVector(); + nerrors := scanner.Tokenize("File1", strings.Bytes(src), v, 0, + func (pos token.Position, tok token.Token, litb []byte) bool { + return tok != token.EOF; + } + ); + + list := v.GetErrorList(Raw); + if len(list) != 9 { + t.Errorf("found %d raw errors, expected 9", len(list)); + PrintError(os.Stderr, list); + } + + list = v.GetErrorList(Sorted); + if len(list) != 9 { + t.Errorf("found %d sorted errors, expected 9", len(list)); + PrintError(os.Stderr, list); + } + + list = v.GetErrorList(NoMultiples); + if len(list) != 4 { + t.Errorf("found %d one-per-line errors, expected 4", len(list)); + PrintError(os.Stderr, list); + } + + if v.ErrorCount() != nerrors { + t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors); + } +} diff --git a/src/pkg/go/token/token.go b/src/pkg/go/token/token.go index a70a75a540..3197b6637c 100644 --- a/src/pkg/go/token/token.go +++ b/src/pkg/go/token/token.go @@ -327,6 +327,7 @@ func (tok Token) IsKeyword() bool { // A Position is valid if the line number is > 0. // type Position struct { + Filename string; // filename, if any Offset int; // byte offset, starting at 0 Line int; // line number, starting at 1 Column int; // column number, starting at 1 (character count)