mirror of
https://github.com/golang/go
synced 2024-11-18 10:54:40 -07:00
text/scanner: Fix EOF reporting on strange Readers
Currently, scanner uses -1 to represent 2 different states: 1. I haven't yet scanned anything, call it "Beginning of File" 2. I've reached the end of the input, ie EOF The result of this behavior is that calling Peek() when next() has detected the end of the input and set s.ch to scanner.EOF, is that Peek() things "oh, s.ch is < 0, which to me means that I haven't scanned any next yet, let me try and clear the BOM marker." When this behavior is run on a typical IO, next() will issue a Read and get (0, io.EOF) back for the second time without blocking and Peek() will return scanner.EOF. The bug comes into play when, inside a terminal, hitting Control-D. This causes the terminal to return a EOF condition to the reader but it does not actually close the fd. So, combining these 2 situations, we arrive at the bug: What is expected: hitting Control-D in a terminal will make Peek() return scanner.EOF instantly. What actually happens: 0. Code waiting in Next() 1. User hits Control-D 2. fd returns EOF condition 3. EOF bubbles it's way out to line 249 in scanner.go 4. next() returns scanner.EOF 5. Next() saves the scanner.EOF to s.ch and returns the previous value 6. Peek() runs, sees s.ch < 0, mistakenly thinks it hasn't run yet and tries to read the BOM marker. 7. next() sees the buffer is empty and tries to fill it again, blocking on line 249. The fix is simple: use a different code to indicate that no data has been scanned. Change-Id: Iee8f4da5881682c4d4c36b93b9bf397ac5798179 Reviewed-on: https://go-review.googlesource.com/7913 Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
parent
15e66f9d01
commit
9c0375277c
@ -208,7 +208,7 @@ func (s *Scanner) Init(src io.Reader) *Scanner {
|
||||
s.tokPos = -1
|
||||
|
||||
// initialize one character look-ahead
|
||||
s.ch = -1 // no char read yet
|
||||
s.ch = -2 // no char read yet, not EOF
|
||||
|
||||
// initialize public fields
|
||||
s.Error = nil
|
||||
@ -322,7 +322,7 @@ func (s *Scanner) Next() rune {
|
||||
// the scanner. It returns EOF if the scanner's position is at the last
|
||||
// character of the source.
|
||||
func (s *Scanner) Peek() rune {
|
||||
if s.ch < 0 {
|
||||
if s.ch == -2 {
|
||||
// this code is only run for the very first character
|
||||
s.ch = s.next()
|
||||
if s.ch == '\uFEFF' {
|
||||
|
@ -616,3 +616,32 @@ func TestPos(t *testing.T) {
|
||||
t.Errorf("%d errors", s.ErrorCount)
|
||||
}
|
||||
}
|
||||
|
||||
type countReader int
|
||||
|
||||
func (c *countReader) Read([]byte) (int, error) {
|
||||
*c++
|
||||
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
func TestPeekEOFHandling(t *testing.T) {
|
||||
var r countReader
|
||||
|
||||
// corner case: empty source
|
||||
s := new(Scanner).Init(&r)
|
||||
|
||||
tok := s.Next()
|
||||
if tok != EOF {
|
||||
t.Errorf("EOF not reported")
|
||||
}
|
||||
|
||||
tok = s.Peek()
|
||||
if tok != EOF {
|
||||
t.Errorf("EOF not reported")
|
||||
}
|
||||
|
||||
if r != 2 {
|
||||
t.Errorf("scanner called Read %d times, not twice", r)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user