1
0
mirror of https://github.com/golang/go synced 2024-11-25 08:57:58 -07:00

scanner: correct error position for illegal UTF-8 encodings

- added corresponding test cases
- re-grouped tests for better readability

Fixes #2138.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4815091
This commit is contained in:
Robert Griesemer 2011-08-08 13:54:32 -07:00
parent ffc0830c75
commit 5987a4ad29
2 changed files with 38 additions and 14 deletions

View File

@ -265,7 +265,12 @@ func (s *Scanner) next() int {
// uncommon case: not ASCII // uncommon case: not ASCII
ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd]) ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
if ch == utf8.RuneError && width == 1 { if ch == utf8.RuneError && width == 1 {
// advance for correct error position
s.srcPos += width
s.lastCharLen = width
s.column++
s.error("illegal UTF-8 encoding") s.error("illegal UTF-8 encoding")
return ch
} }
} }
} }

View File

@ -401,12 +401,15 @@ func TestScanWhitespace(t *testing.T) {
} }
} }
func testError(t *testing.T, src, msg string, tok int) { func testError(t *testing.T, src, pos, msg string, tok int) {
s := new(Scanner).Init(bytes.NewBufferString(src)) s := new(Scanner).Init(bytes.NewBufferString(src))
errorCalled := false errorCalled := false
s.Error = func(_ *Scanner, m string) { s.Error = func(s *Scanner, m string) {
if !errorCalled { if !errorCalled {
// only look at first error // only look at first error
if p := s.Pos().String(); p != pos {
t.Errorf("pos = %q, want %q for %q", p, pos, src)
}
if m != msg { if m != msg {
t.Errorf("msg = %q, want %q for %q", m, msg, src) t.Errorf("msg = %q, want %q for %q", m, msg, src)
} }
@ -426,18 +429,34 @@ func testError(t *testing.T, src, msg string, tok int) {
} }
func TestError(t *testing.T) { func TestError(t *testing.T) {
testError(t, "\x00", "illegal character NUL", 0) testError(t, "\x00", "1:1", "illegal character NUL", 0)
testError(t, "\xff", "illegal UTF-8 encoding", utf8.RuneError) testError(t, "\x80", "1:1", "illegal UTF-8 encoding", utf8.RuneError)
testError(t, `01238`, "illegal octal number", Int) testError(t, "\xff", "1:1", "illegal UTF-8 encoding", utf8.RuneError)
testError(t, `'\"'`, "illegal char escape", Char)
testError(t, `'aa'`, "illegal char literal", Char) testError(t, "a\x00", "1:2", "illegal character NUL", Ident)
testError(t, `'`, "literal not terminated", Char) testError(t, "ab\x80", "1:3", "illegal UTF-8 encoding", Ident)
testError(t, `"\'"`, "illegal char escape", String) testError(t, "abc\xff", "1:4", "illegal UTF-8 encoding", Ident)
testError(t, `"abc`, "literal not terminated", String)
testError(t, "`abc", "literal not terminated", String) testError(t, `"a`+"\x00", "1:3", "illegal character NUL", String)
testError(t, `/*/`, "comment not terminated", EOF) testError(t, `"ab`+"\x80", "1:4", "illegal UTF-8 encoding", String)
testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String) testError(t, `"abc`+"\xff", "1:5", "illegal UTF-8 encoding", String)
testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
testError(t, "`a"+"\x00", "1:3", "illegal character NUL", String)
testError(t, "`ab"+"\x80", "1:4", "illegal UTF-8 encoding", String)
testError(t, "`abc"+"\xff", "1:5", "illegal UTF-8 encoding", String)
testError(t, `'\"'`, "1:3", "illegal char escape", Char)
testError(t, `"\'"`, "1:3", "illegal char escape", String)
testError(t, `01238`, "1:6", "illegal octal number", Int)
testError(t, `'aa'`, "1:4", "illegal char literal", Char)
testError(t, `'`, "1:2", "literal not terminated", Char)
testError(t, `'`+"\n", "1:2", "literal not terminated", Char)
testError(t, `"abc`, "1:5", "literal not terminated", String)
testError(t, `"abc`+"\n", "1:5", "literal not terminated", String)
testError(t, "`abc\n", "2:1", "literal not terminated", String)
testError(t, `/*/`, "1:4", "comment not terminated", EOF)
} }
func checkPos(t *testing.T, got, want Position) { func checkPos(t *testing.T, got, want Position) {