1
0
mirror of https://github.com/golang/go synced 2024-11-23 05:30:07 -07:00

bufio: don't loop generating empty tokens

The new rules for split functions mean that we are exposed
to the common bug of a function that loops forever at EOF.
Pick these off by shutting down the scanner if too many
consecutive empty tokens are delivered.

Fixes #9020.

LGTM=rsc, adg
R=golang-codereviews, rsc, adg, bradfitz
CC=golang-codereviews
https://golang.org/cl/169970043
This commit is contained in:
Rob Pike 2014-11-06 09:57:46 +11:00
parent bb4a358af3
commit 590f528376
2 changed files with 70 additions and 0 deletions

View File

@ -36,6 +36,7 @@ type Scanner struct {
start int // First non-processed byte in buf.
end int // End of data in buf.
err error // Sticky error.
empties int // Count of successive empty tokens.
}
// SplitFunc is the signature of the split function used to tokenize the
@ -108,6 +109,8 @@ func (s *Scanner) Text() string {
// After Scan returns false, the Err method will return any error that
// occurred during scanning, except that if it was io.EOF, Err
// will return nil.
// Split panics if the split function returns 100 empty tokens without
// advancing the input. This is a common error mode for scanners.
func (s *Scanner) Scan() bool {
// Loop until we have a token.
for {
@ -125,6 +128,14 @@ func (s *Scanner) Scan() bool {
}
s.token = token
if token != nil {
if len(token) > 0 {
s.empties = 0
} else {
s.empties++
if s.empties > 100 {
panic("bufio.Scan: 100 empty tokens without progressing")
}
}
return true
}
}
@ -172,6 +183,7 @@ func (s *Scanner) Scan() bool {
break
}
if n > 0 {
s.empties = 0
break
}
loop++

View File

@ -455,3 +455,61 @@ func TestEmptyTokens(t *testing.T) {
t.Fatal(err)
}
}
func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
if len(data) > 0 {
return 1, data[:1], nil
}
return 0, data, nil
}
func TestDontLoopForever(t *testing.T) {
s := NewScanner(strings.NewReader("abc"))
s.Split(loopAtEOFSplit)
// Expect a panic
panicked := true
defer func() {
err := recover()
if err == nil {
t.Fatal("should have panicked")
}
if msg, ok := err.(string); ok && strings.Contains(msg, "empty tokens") {
panicked = true
} else {
panic(err)
}
}()
for count := 0; s.Scan(); count++ {
if count > 1000 {
t.Fatal("looping")
}
}
if s.Err() != nil {
t.Fatal("after scan:", s.Err())
}
}
type countdown int
func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
if *c > 0 {
*c--
return 1, data[:1], nil
}
return 0, nil, nil
}
// Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
func TestEmptyLinesOK(t *testing.T) {
c := countdown(10000)
s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
s.Split(c.split)
for s.Scan() {
}
if s.Err() != nil {
t.Fatal("after scan:", s.Err())
}
if c != 0 {
t.Fatalf("stopped with %d left to process", c)
}
}