bufio: don't loop generating empty tokens

The new rules for split functions mean that we are exposed to the common bug of a function that loops forever at EOF. Pick these off by shutting down the scanner if too many consecutive empty tokens are delivered. Fixes #9020. LGTM=rsc, adg R=golang-codereviews, rsc, adg, bradfitz CC=golang-codereviews https://golang.org/cl/169970043
2024-11-23 05:30:07 -07:00 · 2014-11-06 09:57:46 +11:00 · 2014-11-06 09:57:46 +11:00 · 590f528376
commit 590f528376
parent bb4a358af3
2 changed files with 70 additions and 0 deletions
--- a/src/bufio/scan.go
+++ b/src/bufio/scan.go
@ -36,6 +36,7 @@ type Scanner struct {
 	start        int       // First non-processed byte in buf.
 	end          int       // End of data in buf.
 	err          error     // Sticky error.
+	empties      int       // Count of successive empty tokens.
 }

 // SplitFunc is the signature of the split function used to tokenize the
@ -108,6 +109,8 @@ func (s *Scanner) Text() string {
 // After Scan returns false, the Err method will return any error that
 // occurred during scanning, except that if it was io.EOF, Err
 // will return nil.
+// Split panics if the split function returns 100 empty tokens without
+// advancing the input. This is a common error mode for scanners.
 func (s *Scanner) Scan() bool {
 	// Loop until we have a token.
 	for {
@ -125,6 +128,14 @@ func (s *Scanner) Scan() bool {
 			}
 			s.token = token
 			if token != nil {
+				if len(token) > 0 {
+					s.empties = 0
+				} else {
+					s.empties++
+					if s.empties > 100 {
+						panic("bufio.Scan: 100 empty tokens without progressing")
+					}
+				}
 				return true
 			}
 		}
@ -172,6 +183,7 @@ func (s *Scanner) Scan() bool {
 				break
 			}
 			if n > 0 {
+				s.empties = 0
 				break
 			}
 			loop++
--- a/src/bufio/scan_test.go
+++ b/src/bufio/scan_test.go
@ -455,3 +455,61 @@ func TestEmptyTokens(t *testing.T) {
 		t.Fatal(err)
 	}
 }
+
+func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if len(data) > 0 {
+		return 1, data[:1], nil
+	}
+	return 0, data, nil
+}
+
+func TestDontLoopForever(t *testing.T) {
+	s := NewScanner(strings.NewReader("abc"))
+	s.Split(loopAtEOFSplit)
+	// Expect a panic
+	panicked := true
+	defer func() {
+		err := recover()
+		if err == nil {
+			t.Fatal("should have panicked")
+		}
+		if msg, ok := err.(string); ok && strings.Contains(msg, "empty tokens") {
+			panicked = true
+		} else {
+			panic(err)
+		}
+	}()
+	for count := 0; s.Scan(); count++ {
+		if count > 1000 {
+			t.Fatal("looping")
+		}
+	}
+	if s.Err() != nil {
+		t.Fatal("after scan:", s.Err())
+	}
+}
+
+type countdown int
+
+func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if *c > 0 {
+		*c--
+		return 1, data[:1], nil
+	}
+	return 0, nil, nil
+}
+
+// Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
+func TestEmptyLinesOK(t *testing.T) {
+	c := countdown(10000)
+	s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
+	s.Split(c.split)
+	for s.Scan() {
+	}
+	if s.Err() != nil {
+		t.Fatal("after scan:", s.Err())
+	}
+	if c != 0 {
+		t.Fatalf("stopped with %d left to process", c)
+	}
+}