mirror of
https://github.com/golang/go
synced 2024-11-23 05:30:07 -07:00
bufio: don't loop generating empty tokens
The new rules for split functions mean that we are exposed to the common bug of a function that loops forever at EOF. Pick these off by shutting down the scanner if too many consecutive empty tokens are delivered. Fixes #9020. LGTM=rsc, adg R=golang-codereviews, rsc, adg, bradfitz CC=golang-codereviews https://golang.org/cl/169970043
This commit is contained in:
parent
bb4a358af3
commit
590f528376
@ -36,6 +36,7 @@ type Scanner struct {
|
||||
start int // First non-processed byte in buf.
|
||||
end int // End of data in buf.
|
||||
err error // Sticky error.
|
||||
empties int // Count of successive empty tokens.
|
||||
}
|
||||
|
||||
// SplitFunc is the signature of the split function used to tokenize the
|
||||
@ -108,6 +109,8 @@ func (s *Scanner) Text() string {
|
||||
// After Scan returns false, the Err method will return any error that
|
||||
// occurred during scanning, except that if it was io.EOF, Err
|
||||
// will return nil.
|
||||
// Split panics if the split function returns 100 empty tokens without
|
||||
// advancing the input. This is a common error mode for scanners.
|
||||
func (s *Scanner) Scan() bool {
|
||||
// Loop until we have a token.
|
||||
for {
|
||||
@ -125,6 +128,14 @@ func (s *Scanner) Scan() bool {
|
||||
}
|
||||
s.token = token
|
||||
if token != nil {
|
||||
if len(token) > 0 {
|
||||
s.empties = 0
|
||||
} else {
|
||||
s.empties++
|
||||
if s.empties > 100 {
|
||||
panic("bufio.Scan: 100 empty tokens without progressing")
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
@ -172,6 +183,7 @@ func (s *Scanner) Scan() bool {
|
||||
break
|
||||
}
|
||||
if n > 0 {
|
||||
s.empties = 0
|
||||
break
|
||||
}
|
||||
loop++
|
||||
|
@ -455,3 +455,61 @@ func TestEmptyTokens(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if len(data) > 0 {
|
||||
return 1, data[:1], nil
|
||||
}
|
||||
return 0, data, nil
|
||||
}
|
||||
|
||||
func TestDontLoopForever(t *testing.T) {
|
||||
s := NewScanner(strings.NewReader("abc"))
|
||||
s.Split(loopAtEOFSplit)
|
||||
// Expect a panic
|
||||
panicked := true
|
||||
defer func() {
|
||||
err := recover()
|
||||
if err == nil {
|
||||
t.Fatal("should have panicked")
|
||||
}
|
||||
if msg, ok := err.(string); ok && strings.Contains(msg, "empty tokens") {
|
||||
panicked = true
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}()
|
||||
for count := 0; s.Scan(); count++ {
|
||||
if count > 1000 {
|
||||
t.Fatal("looping")
|
||||
}
|
||||
}
|
||||
if s.Err() != nil {
|
||||
t.Fatal("after scan:", s.Err())
|
||||
}
|
||||
}
|
||||
|
||||
type countdown int
|
||||
|
||||
func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
||||
if *c > 0 {
|
||||
*c--
|
||||
return 1, data[:1], nil
|
||||
}
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
|
||||
func TestEmptyLinesOK(t *testing.T) {
|
||||
c := countdown(10000)
|
||||
s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
|
||||
s.Split(c.split)
|
||||
for s.Scan() {
|
||||
}
|
||||
if s.Err() != nil {
|
||||
t.Fatal("after scan:", s.Err())
|
||||
}
|
||||
if c != 0 {
|
||||
t.Fatalf("stopped with %d left to process", c)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user