From ab224094d0bf035aff5e70cbd818fe29666cc0d1 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 14 Sep 2012 12:22:45 -0400 Subject: [PATCH] go/build: use custom file readers to avoid I/O When reading Go files, read through import block. When reading non-Go files, read only leading comments. R=nigeltao, adg, r CC=golang-dev https://golang.org/cl/6493068 --- src/pkg/go/build/build.go | 8 +- src/pkg/go/build/read.go | 238 ++++++++++++++++++++++++++++++++++ src/pkg/go/build/read_test.go | 226 ++++++++++++++++++++++++++++++++ 3 files changed, 471 insertions(+), 1 deletion(-) create mode 100644 src/pkg/go/build/read.go create mode 100644 src/pkg/go/build/read_test.go diff --git a/src/pkg/go/build/build.go b/src/pkg/go/build/build.go index 53daa6db2fb..43ad4531ed2 100644 --- a/src/pkg/go/build/build.go +++ b/src/pkg/go/build/build.go @@ -512,7 +512,13 @@ Found: if err != nil { return p, err } - data, err := ioutil.ReadAll(f) + + var data []byte + if strings.HasSuffix(filename, ".go") { + data, err = readImports(f, false) + } else { + data, err = readComments(f) + } f.Close() if err != nil { return p, fmt.Errorf("read %s: %v", filename, err) diff --git a/src/pkg/go/build/read.go b/src/pkg/go/build/read.go new file mode 100644 index 00000000000..c8079dfd15d --- /dev/null +++ b/src/pkg/go/build/read.go @@ -0,0 +1,238 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "bufio" + "errors" + "io" +) + +type importReader struct { + b *bufio.Reader + buf []byte + peek byte + err error + eof bool + nerr int +} + +func isIdent(c byte) bool { + return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= 0x80 +} + +var ( + errSyntax = errors.New("syntax error") + errNUL = errors.New("unexpected NUL in input") +) + +// syntaxError records a syntax error, but only if an I/O error has not already been recorded. +func (r *importReader) syntaxError() { + if r.err == nil { + r.err = errSyntax + } +} + +// readByte reads the next byte from the input, saves it in buf, and returns it. +// If an error occurs, readByte records the error in r.err and returns 0. +func (r *importReader) readByte() byte { + c, err := r.b.ReadByte() + if err == nil { + r.buf = append(r.buf, c) + if c == 0 { + err = errNUL + } + } + if err != nil { + if err == io.EOF { + r.eof = true + } else if r.err == nil { + r.err = err + } + c = 0 + } + return c +} + +// peekByte returns the next byte from the input reader but does not advance beyond it. +// If skipSpace is set, peekByte skips leading spaces and comments. +func (r *importReader) peekByte(skipSpace bool) byte { + if r.err != nil { + if r.nerr++; r.nerr > 10000 { + panic("go/build: import reader looping") + } + return 0 + } + + // Use r.peek as first input byte. + // Don't just return r.peek here: it might have been left by peekByte(false) + // and this might be peekByte(true). + c := r.peek + if c == 0 { + c = r.readByte() + } + for r.err == nil && !r.eof { + if skipSpace { + // For the purposes of this reader, semicolons are never necessary to + // understand the input and are treated as spaces. + switch c { + case ' ', '\f', '\t', '\r', '\n', ';': + c = r.readByte() + continue + + case '/': + c = r.readByte() + if c == '/' { + for c != '\n' && r.err == nil && !r.eof { + c = r.readByte() + } + } else if c == '*' { + var c1 byte + for (c != '*' || c1 != '/') && r.err == nil { + if r.eof { + r.syntaxError() + } + c, c1 = c1, r.readByte() + } + } else { + r.syntaxError() + } + c = r.readByte() + continue + } + } + break + } + r.peek = c + return r.peek +} + +// nextByte is like peekByte but advances beyond the returned byte. +func (r *importReader) nextByte(skipSpace bool) byte { + c := r.peekByte(skipSpace) + r.peek = 0 + return c +} + +// readKeyword reads the given keyword from the input. +// If the keyword is not present, readKeyword records a syntax error. +func (r *importReader) readKeyword(kw string) { + r.peekByte(true) + for i := 0; i < len(kw); i++ { + if r.nextByte(false) != kw[i] { + r.syntaxError() + return + } + } + if isIdent(r.peekByte(false)) { + r.syntaxError() + } +} + +// readIdent reads an identifier from the input. +// If an identifier is not present, readIdent records a syntax error. +func (r *importReader) readIdent() { + c := r.peekByte(true) + if !isIdent(c) { + r.syntaxError() + return + } + for isIdent(r.peekByte(false)) { + r.peek = 0 + } +} + +// readString reads a quoted string literal from the input. +// If an identifier is not present, readString records a syntax error. +func (r *importReader) readString() { + switch r.nextByte(true) { + case '`': + for r.err == nil { + if r.nextByte(false) == '`' { + break + } + if r.eof { + r.syntaxError() + } + } + case '"': + for r.err == nil { + c := r.nextByte(false) + if c == '"' { + break + } + if r.eof || c == '\n' { + r.syntaxError() + } + if c == '\\' { + r.nextByte(false) + } + } + default: + r.syntaxError() + } +} + +// readImport reads an import clause - optional identifier followed by quoted string - +// from the input. +func (r *importReader) readImport() { + c := r.peekByte(true) + if c == '.' { + r.peek = 0 + } else if isIdent(c) { + r.readIdent() + } + r.readString() +} + +// readComments is like ioutil.ReadAll, except that it only reads the leading +// block of comments in the file. +func readComments(f io.Reader) ([]byte, error) { + r := &importReader{b: bufio.NewReader(f)} + r.peekByte(true) + if r.err == nil && !r.eof { + // Didn't reach EOF, so must have found a non-space byte. Remove it. + r.buf = r.buf[:len(r.buf)-1] + } + return r.buf, r.err +} + +// readImports is like ioutil.ReadAll, except that it expects a Go file as input +// and stops reading the input once the imports have completed. +func readImports(f io.Reader, reportSyntaxError bool) ([]byte, error) { + r := &importReader{b: bufio.NewReader(f)} + + r.readKeyword("package") + r.readIdent() + for r.peekByte(true) == 'i' { + r.readKeyword("import") + if r.peekByte(true) == '(' { + r.nextByte(false) + for r.peekByte(true) != ')' && r.err == nil { + r.readImport() + } + r.nextByte(false) + } else { + r.readImport() + } + } + + // If we stopped successfully before EOF, we read a byte that told us we were done. + // Return all but that last byte, which would cause a syntax error if we let it through. + if r.err == nil && !r.eof { + return r.buf[:len(r.buf)-1], nil + } + + // If we stopped for a syntax error, consume the whole file so that + // we are sure we don't change the errors that go/parser returns. + if r.err == errSyntax && !reportSyntaxError { + r.err = nil + for r.err == nil && !r.eof { + r.readByte() + } + } + + return r.buf, r.err +} diff --git a/src/pkg/go/build/read_test.go b/src/pkg/go/build/read_test.go new file mode 100644 index 00000000000..2dcc1208f71 --- /dev/null +++ b/src/pkg/go/build/read_test.go @@ -0,0 +1,226 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package build + +import ( + "io" + "strings" + "testing" +) + +const quote = "`" + +type readTest struct { + // Test input contains ℙ where readImports should stop. + in string + err string +} + +var readImportsTests = []readTest{ + { + `package p`, + "", + }, + { + `package p; import "x"`, + "", + }, + { + `package p; import . "x"`, + "", + }, + { + `package p; import "x";ℙvar x = 1`, + "", + }, + { + `package p + + // comment + + import "x" + import _ "x" + import a "x" + + /* comment */ + + import ( + "x" /* comment */ + _ "x" + a "x" // comment + ` + quote + `x` + quote + ` + _ /*comment*/ ` + quote + `x` + quote + ` + a ` + quote + `x` + quote + ` + ) + import ( + ) + import () + import()import()import() + import();import();import() + + ℙvar x = 1 + `, + "", + }, +} + +var readCommentsTests = []readTest{ + { + `ℙpackage p`, + "", + }, + { + `ℙpackage p; import "x"`, + "", + }, + { + `ℙpackage p; import . "x"`, + "", + }, + { + `// foo + + /* bar */ + + /* quux */ // baz + + /*/ zot */ + + // asdf + ℙHello, world`, + "", + }, +} + +func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, error)) { + for i, tt := range tests { + var in, testOut string + j := strings.Index(tt.in, "ℙ") + if j < 0 { + in = tt.in + testOut = tt.in + } else { + in = tt.in[:j] + tt.in[j+len("ℙ"):] + testOut = tt.in[:j] + } + r := strings.NewReader(in) + buf, err := read(r) + if err != nil { + if tt.err == "" { + t.Errorf("#%d: err=%q, expected success (%q)", i, err, string(buf)) + continue + } + if !strings.Contains(err.Error(), tt.err) { + t.Errorf("#%d: err=%q, expected %q", i, err, tt.err) + continue + } + continue + } + if err == nil && tt.err != "" { + t.Errorf("#%d: success, expected %q", i, tt.err) + continue + } + + out := string(buf) + if out != testOut { + t.Errorf("#%d: wrong output:\nhave %q\nwant %q\n", i, out, testOut) + } + } +} + +func TestReadImports(t *testing.T) { + testRead(t, readImportsTests, func(r io.Reader) ([]byte, error) { return readImports(r, true) }) +} + +func TestReadComments(t *testing.T) { + testRead(t, readCommentsTests, readComments) +} + +var readFailuresTests = []readTest{ + { + `package`, + "syntax error", + }, + { + "package p\n\x00\nimport `math`\n", + "unexpected NUL in input", + }, + { + `package p; import`, + "syntax error", + }, + { + `package p; import "`, + "syntax error", + }, + { + "package p; import ` \n\n", + "syntax error", + }, + { + `package p; import "x`, + "syntax error", + }, + { + `package p; import _`, + "syntax error", + }, + { + `package p; import _ "`, + "syntax error", + }, + { + `package p; import _ "x`, + "syntax error", + }, + { + `package p; import .`, + "syntax error", + }, + { + `package p; import . "`, + "syntax error", + }, + { + `package p; import . "x`, + "syntax error", + }, + { + `package p; import (`, + "syntax error", + }, + { + `package p; import ("`, + "syntax error", + }, + { + `package p; import ("x`, + "syntax error", + }, + { + `package p; import ("x"`, + "syntax error", + }, +} + +func TestReadFailures(t *testing.T) { + // Errors should be reported (true arg to readImports). + testRead(t, readFailuresTests, func(r io.Reader) ([]byte, error) { return readImports(r, true) }) +} + +func TestReadFailuresIgnored(t *testing.T) { + // Syntax errors should not be reported (false arg to readImports). + // Instead, entire file should be the output and no error. + // Convert tests not to return syntax errors. + tests := make([]readTest, len(readFailuresTests)) + copy(tests, readFailuresTests) + for i := range tests { + tt := &tests[i] + if !strings.Contains(tt.err, "NUL") { + tt.err = "" + } + } + testRead(t, tests, func(r io.Reader) ([]byte, error) { return readImports(r, false) }) +}