mirror of
https://github.com/golang/go
synced 2024-11-11 20:40:21 -07:00
cmd/go: ignore UTF8 BOM when reading source code
Fix the problem that UTF8 BOM can cause the parsing of import path and directives to fail.
This commit is contained in:
parent
39c39ae52f
commit
98abf91377
@ -8,6 +8,7 @@ package imports
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
@ -22,6 +23,19 @@ type importReader struct {
|
|||||||
nerr int
|
nerr int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var bom = []byte{0xef, 0xbb, 0xbf}
|
||||||
|
|
||||||
|
func newImportReader(b *bufio.Reader) *importReader {
|
||||||
|
// Remove leading UTF-8 BOM.
|
||||||
|
// Per https://golang.org/ref/spec#Source_code_representation:
|
||||||
|
// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
|
||||||
|
// if it is the first Unicode code point in the source text.
|
||||||
|
if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
|
||||||
|
b.Discard(3)
|
||||||
|
}
|
||||||
|
return &importReader{b: b}
|
||||||
|
}
|
||||||
|
|
||||||
func isIdent(c byte) bool {
|
func isIdent(c byte) bool {
|
||||||
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
|
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
|
||||||
}
|
}
|
||||||
@ -201,7 +215,7 @@ func (r *importReader) readImport(imports *[]string) {
|
|||||||
// ReadComments is like io.ReadAll, except that it only reads the leading
|
// ReadComments is like io.ReadAll, except that it only reads the leading
|
||||||
// block of comments in the file.
|
// block of comments in the file.
|
||||||
func ReadComments(f io.Reader) ([]byte, error) {
|
func ReadComments(f io.Reader) ([]byte, error) {
|
||||||
r := &importReader{b: bufio.NewReader(f)}
|
r := newImportReader(bufio.NewReader(f))
|
||||||
r.peekByte(true)
|
r.peekByte(true)
|
||||||
if r.err == nil && !r.eof {
|
if r.err == nil && !r.eof {
|
||||||
// Didn't reach EOF, so must have found a non-space byte. Remove it.
|
// Didn't reach EOF, so must have found a non-space byte. Remove it.
|
||||||
@ -213,7 +227,7 @@ func ReadComments(f io.Reader) ([]byte, error) {
|
|||||||
// ReadImports is like io.ReadAll, except that it expects a Go file as input
|
// ReadImports is like io.ReadAll, except that it expects a Go file as input
|
||||||
// and stops reading the input once the imports have completed.
|
// and stops reading the input once the imports have completed.
|
||||||
func ReadImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) {
|
func ReadImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) {
|
||||||
r := &importReader{b: bufio.NewReader(f)}
|
r := newImportReader(bufio.NewReader(f))
|
||||||
|
|
||||||
r.readKeyword("package")
|
r.readKeyword("package")
|
||||||
r.readIdent()
|
r.readIdent()
|
||||||
|
@ -66,6 +66,10 @@ var readImportsTests = []readTest{
|
|||||||
`,
|
`,
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"\ufeff𝔻" + `package p; import "x";ℙvar x = 1`,
|
||||||
|
"",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var readCommentsTests = []readTest{
|
var readCommentsTests = []readTest{
|
||||||
@ -81,6 +85,10 @@ var readCommentsTests = []readTest{
|
|||||||
`ℙpackage p; import . "x"`,
|
`ℙpackage p; import . "x"`,
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"\ufeff𝔻" + `ℙpackage p; import . "x"`,
|
||||||
|
"",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
`// foo
|
`// foo
|
||||||
|
|
||||||
@ -90,6 +98,19 @@ var readCommentsTests = []readTest{
|
|||||||
|
|
||||||
/*/ zot */
|
/*/ zot */
|
||||||
|
|
||||||
|
// asdf
|
||||||
|
ℙHello, world`,
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"\ufeff𝔻" + `// foo
|
||||||
|
|
||||||
|
/* bar */
|
||||||
|
|
||||||
|
/* quux */ // baz
|
||||||
|
|
||||||
|
/*/ zot */
|
||||||
|
|
||||||
// asdf
|
// asdf
|
||||||
ℙHello, world`,
|
ℙHello, world`,
|
||||||
"",
|
"",
|
||||||
@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
|
|||||||
in = tt.in[:j] + tt.in[j+len("ℙ"):]
|
in = tt.in[:j] + tt.in[j+len("ℙ"):]
|
||||||
testOut = tt.in[:j]
|
testOut = tt.in[:j]
|
||||||
}
|
}
|
||||||
|
d := strings.Index(tt.in, "𝔻")
|
||||||
|
if d >= 0 {
|
||||||
|
in = in[:d] + in[d+len("𝔻"):]
|
||||||
|
testOut = testOut[d+len("𝔻"):]
|
||||||
|
}
|
||||||
r := strings.NewReader(in)
|
r := strings.NewReader(in)
|
||||||
buf, err := read(r)
|
buf, err := read(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
27
src/cmd/go/testdata/script/build_ignore_leading_bom.txt
vendored
Normal file
27
src/cmd/go/testdata/script/build_ignore_leading_bom.txt
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# Per https://golang.org/ref/spec#Source_code_representation:
|
||||||
|
# a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
|
||||||
|
# if it is the first Unicode code point in the source text.
|
||||||
|
|
||||||
|
go list -f 'Imports: {{.Imports}} EmbedFiles: {{.EmbedFiles}}' .
|
||||||
|
stdout '^Imports: \[embed m/hello\] EmbedFiles: \[.*file\]$'
|
||||||
|
|
||||||
|
-- go.mod --
|
||||||
|
module m
|
||||||
|
|
||||||
|
go 1.16
|
||||||
|
-- m.go --
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "embed"
|
||||||
|
|
||||||
|
"m/hello"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed file
|
||||||
|
var s string
|
||||||
|
|
||||||
|
-- hello/hello.go --
|
||||||
|
package hello
|
||||||
|
|
||||||
|
-- file --
|
@ -6,6 +6,7 @@ package build
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"go/ast"
|
"go/ast"
|
||||||
@ -28,9 +29,19 @@ type importReader struct {
|
|||||||
pos token.Position
|
pos token.Position
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var bom = []byte{0xef, 0xbb, 0xbf}
|
||||||
|
|
||||||
func newImportReader(name string, r io.Reader) *importReader {
|
func newImportReader(name string, r io.Reader) *importReader {
|
||||||
|
b := bufio.NewReader(r)
|
||||||
|
// Remove leading UTF-8 BOM.
|
||||||
|
// Per https://golang.org/ref/spec#Source_code_representation:
|
||||||
|
// a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
|
||||||
|
// if it is the first Unicode code point in the source text.
|
||||||
|
if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
|
||||||
|
b.Discard(3)
|
||||||
|
}
|
||||||
return &importReader{
|
return &importReader{
|
||||||
b: bufio.NewReader(r),
|
b: b,
|
||||||
pos: token.Position{
|
pos: token.Position{
|
||||||
Filename: name,
|
Filename: name,
|
||||||
Line: 1,
|
Line: 1,
|
||||||
|
@ -66,6 +66,10 @@ var readGoInfoTests = []readTest{
|
|||||||
`,
|
`,
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"\ufeff𝔻" + `package p; import "x";ℙvar x = 1`,
|
||||||
|
"",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var readCommentsTests = []readTest{
|
var readCommentsTests = []readTest{
|
||||||
@ -81,6 +85,10 @@ var readCommentsTests = []readTest{
|
|||||||
`ℙpackage p; import . "x"`,
|
`ℙpackage p; import . "x"`,
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"\ufeff𝔻" + `ℙpackage p; import . "x"`,
|
||||||
|
"",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
`// foo
|
`// foo
|
||||||
|
|
||||||
@ -90,6 +98,19 @@ var readCommentsTests = []readTest{
|
|||||||
|
|
||||||
/*/ zot */
|
/*/ zot */
|
||||||
|
|
||||||
|
// asdf
|
||||||
|
ℙHello, world`,
|
||||||
|
"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"\ufeff𝔻" + `// foo
|
||||||
|
|
||||||
|
/* bar */
|
||||||
|
|
||||||
|
/* quux */ // baz
|
||||||
|
|
||||||
|
/*/ zot */
|
||||||
|
|
||||||
// asdf
|
// asdf
|
||||||
ℙHello, world`,
|
ℙHello, world`,
|
||||||
"",
|
"",
|
||||||
@ -107,6 +128,11 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
|
|||||||
in = tt.in[:j] + tt.in[j+len("ℙ"):]
|
in = tt.in[:j] + tt.in[j+len("ℙ"):]
|
||||||
testOut = tt.in[:j]
|
testOut = tt.in[:j]
|
||||||
}
|
}
|
||||||
|
d := strings.Index(tt.in, "𝔻")
|
||||||
|
if d >= 0 {
|
||||||
|
in = in[:d] + in[d+len("𝔻"):]
|
||||||
|
testOut = testOut[d+len("𝔻"):]
|
||||||
|
}
|
||||||
r := strings.NewReader(in)
|
r := strings.NewReader(in)
|
||||||
buf, err := read(r)
|
buf, err := read(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -264,6 +290,12 @@ var readEmbedTests = []struct {
|
|||||||
test:3:14:y
|
test:3:14:y
|
||||||
test:3:16:z`,
|
test:3:16:z`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"\ufeffpackage p\nimport \"embed\"\n//go:embed x y z\nvar files embed.FS",
|
||||||
|
`test:3:12:x
|
||||||
|
test:3:14:y
|
||||||
|
test:3:16:z`,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"package p\nimport \"embed\"\nvar s = \"/*\"\n//go:embed x\nvar files embed.FS",
|
"package p\nimport \"embed\"\nvar s = \"/*\"\n//go:embed x\nvar files embed.FS",
|
||||||
`test:4:12:x`,
|
`test:4:12:x`,
|
||||||
@ -292,6 +324,10 @@ var readEmbedTests = []struct {
|
|||||||
"package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
|
"package p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
|
||||||
"",
|
"",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"\ufeffpackage p\n//go:embed x y z\nvar files embed.FS", // no import, no scan
|
||||||
|
"",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReadEmbed(t *testing.T) {
|
func TestReadEmbed(t *testing.T) {
|
||||||
|
Loading…
Reference in New Issue
Block a user