1
0
mirror of https://github.com/golang/go synced 2024-11-11 20:40:21 -07:00

cmd/go: ignore UTF8 BOM when reading source code

Fix the problem that UTF8 BOM can cause the parsing of import path and directives to fail.
This commit is contained in:
unbyte 2021-06-08 12:38:00 +08:00
parent 39c39ae52f
commit 98abf91377
5 changed files with 117 additions and 3 deletions

View File

@ -8,6 +8,7 @@ package imports
import ( import (
"bufio" "bufio"
"bytes"
"errors" "errors"
"io" "io"
"unicode/utf8" "unicode/utf8"
@ -22,6 +23,19 @@ type importReader struct {
nerr int nerr int
} }
var bom = []byte{0xef, 0xbb, 0xbf}
func newImportReader(b *bufio.Reader) *importReader {
// Remove leading UTF-8 BOM.
// Per https://golang.org/ref/spec#Source_code_representation:
// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
// if it is the first Unicode code point in the source text.
if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
b.Discard(3)
}
return &importReader{b: b}
}
func isIdent(c byte) bool { func isIdent(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
} }
@ -201,7 +215,7 @@ func (r *importReader) readImport(imports *[]string) {
// ReadComments is like io.ReadAll, except that it only reads the leading // ReadComments is like io.ReadAll, except that it only reads the leading
// block of comments in the file. // block of comments in the file.
func ReadComments(f io.Reader) ([]byte, error) { func ReadComments(f io.Reader) ([]byte, error) {
r := &importReader{b: bufio.NewReader(f)} r := newImportReader(bufio.NewReader(f))
r.peekByte(true) r.peekByte(true)
if r.err == nil && !r.eof { if r.err == nil && !r.eof {
// Didn't reach EOF, so must have found a non-space byte. Remove it. // Didn't reach EOF, so must have found a non-space byte. Remove it.
@ -213,7 +227,7 @@ func ReadComments(f io.Reader) ([]byte, error) {
// ReadImports is like io.ReadAll, except that it expects a Go file as input // ReadImports is like io.ReadAll, except that it expects a Go file as input
// and stops reading the input once the imports have completed. // and stops reading the input once the imports have completed.
func ReadImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) { func ReadImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) {
r := &importReader{b: bufio.NewReader(f)} r := newImportReader(bufio.NewReader(f))
r.readKeyword("package") r.readKeyword("package")
r.readIdent() r.readIdent()

View File

@ -66,6 +66,10 @@ var readImportsTests = []readTest{
`, `,
"", "",
}, },
{
"\ufeff𝔻" + `package p; import "x";var x = 1`,
"",
},
} }
var readCommentsTests = []readTest{ var readCommentsTests = []readTest{
@ -81,6 +85,10 @@ var readCommentsTests = []readTest{
`package p; import . "x"`, `package p; import . "x"`,
"", "",
}, },
{
"\ufeff𝔻" + `package p; import . "x"`,
"",
},
{ {
`// foo `// foo
@ -90,6 +98,19 @@ var readCommentsTests = []readTest{
/*/ zot */ /*/ zot */
// asdf
Hello, world`,
"",
},
{
"\ufeff𝔻" + `// foo
/* bar */
/* quux */ // baz
/*/ zot */
// asdf // asdf
Hello, world`, Hello, world`,
"", "",
@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
in = tt.in[:j] + tt.in[j+len(""):] in = tt.in[:j] + tt.in[j+len(""):]
testOut = tt.in[:j] testOut = tt.in[:j]
} }
d := strings.Index(tt.in, "𝔻")
if d >= 0 {
in = in[:d] + in[d+len("𝔻"):]
testOut = testOut[d+len("𝔻"):]
}
r := strings.NewReader(in) r := strings.NewReader(in)
buf, err := read(r) buf, err := read(r)
if err != nil { if err != nil {

View File

@ -0,0 +1,27 @@
# Per https://golang.org/ref/spec#Source_code_representation:
# a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
# if it is the first Unicode code point in the source text.
go list -f 'Imports: {{.Imports}} EmbedFiles: {{.EmbedFiles}}' .
stdout '^Imports: \[embed m/hello\] EmbedFiles: \[.*file\]$'
-- go.mod --
module m
go 1.16
-- m.go --
package main
import (
_ "embed"
"m/hello"
)
//go:embed file
var s string
-- hello/hello.go --
package hello
-- file --

View File

@ -6,6 +6,7 @@ package build
import ( import (
"bufio" "bufio"
"bytes"
"errors" "errors"
"fmt" "fmt"
"go/ast" "go/ast"
@ -28,9 +29,19 @@ type importReader struct {
pos token.Position pos token.Position
} }
var bom = []byte{0xef, 0xbb, 0xbf}
func newImportReader(name string, r io.Reader) *importReader { func newImportReader(name string, r io.Reader) *importReader {
b := bufio.NewReader(r)
// Remove leading UTF-8 BOM.
// Per https://golang.org/ref/spec#Source_code_representation:
// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
// if it is the first Unicode code point in the source text.
if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
b.Discard(3)
}
return &importReader{ return &importReader{
b: bufio.NewReader(r), b: b,
pos: token.Position{ pos: token.Position{
Filename: name, Filename: name,
Line: 1, Line: 1,

View File

@ -66,6 +66,10 @@ var readGoInfoTests = []readTest{
`, `,
"", "",
}, },
{
"\ufeff𝔻" + `package p; import "x";var x = 1`,
"",
},
} }
var readCommentsTests = []readTest{ var readCommentsTests = []readTest{
@ -81,6 +85,10 @@ var readCommentsTests = []readTest{
`package p; import . "x"`, `package p; import . "x"`,
"", "",
}, },
{
"\ufeff𝔻" + `package p; import . "x"`,
"",
},
{ {
`// foo `// foo
@ -90,6 +98,19 @@ var readCommentsTests = []readTest{
/*/ zot */ /*/ zot */
// asdf
Hello, world`,
"",
},
{
"\ufeff𝔻" + `// foo
/* bar */
/* quux */ // baz
/*/ zot */
// asdf // asdf
Hello, world`, Hello, world`,
"", "",
@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
in = tt.in[:j] + tt.in[j+len(""):] in = tt.in[:j] + tt.in[j+len(""):]
testOut = tt.in[:j] testOut = tt.in[:j]
} }
d := strings.Index(tt.in, "𝔻")
if d >= 0 {
in = in[:d] + in[d+len("𝔻"):]
testOut = testOut[d+len("𝔻"):]
}
r := strings.NewReader(in) r := strings.NewReader(in)
buf, err := read(r) buf, err := read(r)
if err != nil { if err != nil {
@ -264,6 +290,12 @@ var readEmbedTests = []struct {
test:3:14:y test:3:14:y
test:3:16:z`, test:3:16:z`,
}, },
{
"\ufeffpackage p\nimport \"embed\"\n//go:embed x y z\nvar files embed.FS",
`test:3:12:x
test:3:14:y
test:3:16:z`,
},
{ {
"package p\nimport \"embed\"\nvar s = \"/*\"\n//go:embed x\nvar files embed.FS", "package p\nimport \"embed\"\nvar s = \"/*\"\n//go:embed x\nvar files embed.FS",
`test:4:12:x`, `test:4:12:x`,
@ -292,6 +324,10 @@ var readEmbedTests = []struct {
"package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan "package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
"", "",
}, },
{
"\ufeffpackage p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
"",
},
} }
func TestReadEmbed(t *testing.T) { func TestReadEmbed(t *testing.T) {