go/build: use custom file readers to avoid I/O

When reading Go files, read through import block. When reading non-Go files, read only leading comments. R=nigeltao, adg, r CC=golang-dev https://golang.org/cl/6493068
2024-11-22 07:34:40 -07:00 · 2012-09-14 12:22:45 -04:00 · 2012-09-14 12:22:45 -04:00 · ab224094d0
commit ab224094d0
parent ae42beafd3
3 changed files with 471 additions and 1 deletions
--- a/src/pkg/go/build/build.go
+++ b/src/pkg/go/build/build.go
@ -512,7 +512,13 @@ Found:
 		if err != nil {
 			return p, err
 		}
-		data, err := ioutil.ReadAll(f)
+
 		var data []byte
 		if strings.HasSuffix(filename, ".go") {
 			data, err = readImports(f, false)
 		} else {
 			data, err = readComments(f)
 		}
 		f.Close()
 		if err != nil {
 			return p, fmt.Errorf("read %s: %v", filename, err)
--- a/src/pkg/go/build/read.go
+++ b/src/pkg/go/build/read.go
@ -0,0 +1,238 @@
 // Copyright 2012 The Go Authors.  All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package build
 import (
 	"bufio"
 	"errors"
 	"io"
 )
 type importReader struct {
 	b    *bufio.Reader
 	buf  []byte
 	peek byte
 	err  error
 	eof  bool
 	nerr int
 }
 func isIdent(c byte) bool {
 	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= 0x80
 }
 var (
 	errSyntax = errors.New("syntax error")
 	errNUL    = errors.New("unexpected NUL in input")
 )
 // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
 func (r *importReader) syntaxError() {
 	if r.err == nil {
 		r.err = errSyntax
 	}
 }
 // readByte reads the next byte from the input, saves it in buf, and returns it.
 // If an error occurs, readByte records the error in r.err and returns 0.
 func (r *importReader) readByte() byte {
 	c, err := r.b.ReadByte()
 	if err == nil {
 		r.buf = append(r.buf, c)
 		if c == 0 {
 			err = errNUL
 		}
 	}
 	if err != nil {
 		if err == io.EOF {
 			r.eof = true
 		} else if r.err == nil {
 			r.err = err
 		}
 		c = 0
 	}
 	return c
 }
 // peekByte returns the next byte from the input reader but does not advance beyond it.
 // If skipSpace is set, peekByte skips leading spaces and comments.
 func (r *importReader) peekByte(skipSpace bool) byte {
 	if r.err != nil {
 		if r.nerr++; r.nerr > 10000 {
 			panic("go/build: import reader looping")
 		}
 		return 0
 	}
 	// Use r.peek as first input byte.
 	// Don't just return r.peek here: it might have been left by peekByte(false)
 	// and this might be peekByte(true).
 	c := r.peek
 	if c == 0 {
 		c = r.readByte()
 	}
 	for r.err == nil && !r.eof {
 		if skipSpace {
 			// For the purposes of this reader, semicolons are never necessary to
 			// understand the input and are treated as spaces.
 			switch c {
 			case ' ', '\f', '\t', '\r', '\n', ';':
 				c = r.readByte()
 				continue
 			case '/':
 				c = r.readByte()
 				if c == '/' {
 					for c != '\n' && r.err == nil && !r.eof {
 						c = r.readByte()
 					}
 				} else if c == '*' {
 					var c1 byte
 					for (c != '*' || c1 != '/') && r.err == nil {
 						if r.eof {
 							r.syntaxError()
 						}
 						c, c1 = c1, r.readByte()
 					}
 				} else {
 					r.syntaxError()
 				}
 				c = r.readByte()
 				continue
 			}
 		}
 		break
 	}
 	r.peek = c
 	return r.peek
 }
 // nextByte is like peekByte but advances beyond the returned byte.
 func (r *importReader) nextByte(skipSpace bool) byte {
 	c := r.peekByte(skipSpace)
 	r.peek = 0
 	return c
 }
 // readKeyword reads the given keyword from the input.
 // If the keyword is not present, readKeyword records a syntax error.
 func (r *importReader) readKeyword(kw string) {
 	r.peekByte(true)
 	for i := 0; i < len(kw); i++ {
 		if r.nextByte(false) != kw[i] {
 			r.syntaxError()
 			return
 		}
 	}
 	if isIdent(r.peekByte(false)) {
 		r.syntaxError()
 	}
 }
 // readIdent reads an identifier from the input.
 // If an identifier is not present, readIdent records a syntax error.
 func (r *importReader) readIdent() {
 	c := r.peekByte(true)
 	if !isIdent(c) {
 		r.syntaxError()
 		return
 	}
 	for isIdent(r.peekByte(false)) {
 		r.peek = 0
 	}
 }
 // readString reads a quoted string literal from the input.
 // If an identifier is not present, readString records a syntax error.
 func (r *importReader) readString() {
 	switch r.nextByte(true) {
 	case '`':
 		for r.err == nil {
 			if r.nextByte(false) == '`' {
 				break
 			}
 			if r.eof {
 				r.syntaxError()
 			}
 		}
 	case '"':
 		for r.err == nil {
 			c := r.nextByte(false)
 			if c == '"' {
 				break
 			}
 			if r.eof || c == '\n' {
 				r.syntaxError()
 			}
 			if c == '\\' {
 				r.nextByte(false)
 			}
 		}
 	default:
 		r.syntaxError()
 	}
 }
 // readImport reads an import clause - optional identifier followed by quoted string -
 // from the input.
 func (r *importReader) readImport() {
 	c := r.peekByte(true)
 	if c == '.' {
 		r.peek = 0
 	} else if isIdent(c) {
 		r.readIdent()
 	}
 	r.readString()
 }
 // readComments is like ioutil.ReadAll, except that it only reads the leading
 // block of comments in the file.
 func readComments(f io.Reader) ([]byte, error) {
 	r := &importReader{b: bufio.NewReader(f)}
 	r.peekByte(true)
 	if r.err == nil && !r.eof {
 		// Didn't reach EOF, so must have found a non-space byte. Remove it.
 		r.buf = r.buf[:len(r.buf)-1]
 	}
 	return r.buf, r.err
 }
 // readImports is like ioutil.ReadAll, except that it expects a Go file as input
 // and stops reading the input once the imports have completed.
 func readImports(f io.Reader, reportSyntaxError bool) ([]byte, error) {
 	r := &importReader{b: bufio.NewReader(f)}
 	r.readKeyword("package")
 	r.readIdent()
 	for r.peekByte(true) == 'i' {
 		r.readKeyword("import")
 		if r.peekByte(true) == '(' {
 			r.nextByte(false)
 			for r.peekByte(true) != ')' && r.err == nil {
 				r.readImport()
 			}
 			r.nextByte(false)
 		} else {
 			r.readImport()
 		}
 	}
 	// If we stopped successfully before EOF, we read a byte that told us we were done.
 	// Return all but that last byte, which would cause a syntax error if we let it through.
 	if r.err == nil && !r.eof {
 		return r.buf[:len(r.buf)-1], nil
 	}
 	// If we stopped for a syntax error, consume the whole file so that
 	// we are sure we don't change the errors that go/parser returns.
 	if r.err == errSyntax && !reportSyntaxError {
 		r.err = nil
 		for r.err == nil && !r.eof {
 			r.readByte()
 		}
 	}
 	return r.buf, r.err
 }
--- a/src/pkg/go/build/read_test.go
+++ b/src/pkg/go/build/read_test.go
@ -0,0 +1,226 @@
 // Copyright 2012 The Go Authors.  All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package build
 import (
 	"io"
 	"strings"
 	"testing"
 )
 const quote = "`"
 type readTest struct {
 	// Test input contains ℙ where readImports should stop.
 	in  string
 	err string
 }
 var readImportsTests = []readTest{
 	{
 		`package p`,
 		"",
 	},
 	{
 		`package p; import "x"`,
 		"",
 	},
 	{
 		`package p; import . "x"`,
 		"",
 	},
 	{
 		`package p; import "x";ℙvar x = 1`,
 		"",
 	},
 	{
 		`package p
 		// comment
 		import "x"
 		import _ "x"
 		import a "x"
 		/* comment */
 		import (
 			"x" /* comment */
 			_ "x"
 			a "x" // comment
 			` + quote + `x` + quote + `
 			_ /*comment*/ ` + quote + `x` + quote + `
 			a ` + quote + `x` + quote + `
 		)
 		import (
 		)
 		import ()
 		import()import()import()
 		import();import();import()
 		ℙvar x = 1
 		`,
 		"",
 	},
 }
 var readCommentsTests = []readTest{
 	{
 		`ℙpackage p`,
 		"",
 	},
 	{
 		`ℙpackage p; import "x"`,
 		"",
 	},
 	{
 		`ℙpackage p; import . "x"`,
 		"",
 	},
 	{
 		`// foo
 		/* bar */
 		/* quux */ // baz
 		/*/ zot */
 		// asdf
 		ℙHello, world`,
 		"",
 	},
 }
 func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, error)) {
 	for i, tt := range tests {
 		var in, testOut string
 		j := strings.Index(tt.in, "ℙ")
 		if j < 0 {
 			in = tt.in
 			testOut = tt.in
 		} else {
 			in = tt.in[:j] + tt.in[j+len("ℙ"):]
 			testOut = tt.in[:j]
 		}
 		r := strings.NewReader(in)
 		buf, err := read(r)
 		if err != nil {
 			if tt.err == "" {
 				t.Errorf("#%d: err=%q, expected success (%q)", i, err, string(buf))
 				continue
 			}
 			if !strings.Contains(err.Error(), tt.err) {
 				t.Errorf("#%d: err=%q, expected %q", i, err, tt.err)
 				continue
 			}
 			continue
 		}
 		if err == nil && tt.err != "" {
 			t.Errorf("#%d: success, expected %q", i, tt.err)
 			continue
 		}
 		out := string(buf)
 		if out != testOut {
 			t.Errorf("#%d: wrong output:\nhave %q\nwant %q\n", i, out, testOut)
 		}
 	}
 }
 func TestReadImports(t *testing.T) {
 	testRead(t, readImportsTests, func(r io.Reader) ([]byte, error) { return readImports(r, true) })
 }
 func TestReadComments(t *testing.T) {
 	testRead(t, readCommentsTests, readComments)
 }
 var readFailuresTests = []readTest{
 	{
 		`package`,
 		"syntax error",
 	},
 	{
 		"package p\n\x00\nimport `math`\n",
 		"unexpected NUL in input",
 	},
 	{
 		`package p; import`,
 		"syntax error",
 	},
 	{
 		`package p; import "`,
 		"syntax error",
 	},
 	{
 		"package p; import ` \n\n",
 		"syntax error",
 	},
 	{
 		`package p; import "x`,
 		"syntax error",
 	},
 	{
 		`package p; import _`,
 		"syntax error",
 	},
 	{
 		`package p; import _ "`,
 		"syntax error",
 	},
 	{
 		`package p; import _ "x`,
 		"syntax error",
 	},
 	{
 		`package p; import .`,
 		"syntax error",
 	},
 	{
 		`package p; import . "`,
 		"syntax error",
 	},
 	{
 		`package p; import . "x`,
 		"syntax error",
 	},
 	{
 		`package p; import (`,
 		"syntax error",
 	},
 	{
 		`package p; import ("`,
 		"syntax error",
 	},
 	{
 		`package p; import ("x`,
 		"syntax error",
 	},
 	{
 		`package p; import ("x"`,
 		"syntax error",
 	},
 }
 func TestReadFailures(t *testing.T) {
 	// Errors should be reported (true arg to readImports).
 	testRead(t, readFailuresTests, func(r io.Reader) ([]byte, error) { return readImports(r, true) })
 }
 func TestReadFailuresIgnored(t *testing.T) {
 	// Syntax errors should not be reported (false arg to readImports).
 	// Instead, entire file should be the output and no error.
 	// Convert tests not to return syntax errors.
 	tests := make([]readTest, len(readFailuresTests))
 	copy(tests, readFailuresTests)
 	for i := range tests {
 		tt := &tests[i]
 		if !strings.Contains(tt.err, "NUL") {
 			tt.err = ""
 		}
 	}
 	testRead(t, tests, func(r io.Reader) ([]byte, error) { return readImports(r, false) })
 }