From 6ee7997366ef4c1f666790d65b039e76407327f3 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Tue, 14 Sep 2010 11:06:17 +1000 Subject: [PATCH] fmt.Fscan: use UnreadRune to preserve data across calls. Fixes #1055. (About as well as it can be fixed without changing the API.) R=rsc CC=golang-dev https://golang.org/cl/2156047 --- src/pkg/fmt/doc.go | 12 ++++++++---- src/pkg/fmt/scan.go | 38 +++++++++++++++++++++++++++++--------- src/pkg/fmt/scan_test.go | 22 ++++++++++++++++++++++ 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/src/pkg/fmt/doc.go b/src/pkg/fmt/doc.go index 101e242c398..03343ea5d4b 100644 --- a/src/pkg/fmt/doc.go +++ b/src/pkg/fmt/doc.go @@ -129,9 +129,13 @@ All arguments to be scanned must be either pointers to basic types or implementations of the Scanner interface. - Note: Fscan etc. can read one character past the input - they return, which means that a loop calling a scan routine - may skip some of the input. This is usually a problem only - when there is no space between input values. + Note: Fscan etc. can read one character (rune) past the + input they return, which means that a loop calling a scan + routine may skip some of the input. This is usually a + problem only when there is no space between input values. + However, if the reader provided to Fscan implements UnreadRune, + that method will be used to save the character and successive + calls will not lose data. To attach an UnreadRune method + to a reader without that capability, use bufio.NewReader. */ package fmt diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go index bd8af50069f..fefd556c7e2 100644 --- a/src/pkg/fmt/scan.go +++ b/src/pkg/fmt/scan.go @@ -22,6 +22,14 @@ type readRuner interface { ReadRune() (rune int, size int, err os.Error) } +// unreadRuner is the interface to something that can unread runes. +// If the object provided to Scan does not satisfy this interface, +// a local buffer will be used to back up the input, but its contents +// will be lost when Scan returns. +type unreadRuner interface { + UnreadRune() os.Error +} + // ScanState represents the scanner state passed to custom scanners. // Scanners may do rune-at-a-time scanning or ask the ScanState // to discover the next space-delimited token. @@ -29,7 +37,7 @@ type ScanState interface { // GetRune reads the next rune (Unicode code point) from the input. GetRune() (rune int, err os.Error) // UngetRune causes the next call to GetRune to return the rune. - UngetRune(rune int) + UngetRune() // Width returns the value of the width option and whether it has been set. // The unit is Unicode code points. Width() (wid int, ok bool) @@ -133,6 +141,7 @@ type ss struct { buf bytes.Buffer // token accumulator nlIsSpace bool // whether newline counts as white space peekRune int // one-rune lookahead + prevRune int // last rune returned by GetRune atEOF bool // already read EOF maxWid int // max width of field, in runes widPresent bool // width was specified @@ -142,10 +151,14 @@ type ss struct { func (s *ss) GetRune() (rune int, err os.Error) { if s.peekRune >= 0 { rune = s.peekRune + s.prevRune = rune s.peekRune = -1 return } rune, _, err = s.rr.ReadRune() + if err == nil { + s.prevRune = rune + } return } @@ -161,11 +174,14 @@ func (s *ss) getRune() (rune int) { } if s.peekRune >= 0 { rune = s.peekRune + s.prevRune = rune s.peekRune = -1 return } rune, _, err := s.rr.ReadRune() - if err != nil { + if err == nil { + s.prevRune = rune + } else if err != nil { if err == os.EOF { s.atEOF = true return EOF @@ -198,8 +214,12 @@ func (s *ss) mustGetRune() (rune int) { } -func (s *ss) UngetRune(rune int) { - s.peekRune = rune +func (s *ss) UngetRune() { + if u, ok := s.rr.(unreadRuner); ok { + u.UnreadRune() + } else { + s.peekRune = s.prevRune + } } func (s *ss) error(err os.Error) { @@ -334,7 +354,7 @@ func (s *ss) skipSpace(stopAtNewline bool) { return } if !unicode.IsSpace(rune) { - s.UngetRune(rune) + s.UngetRune() break } } @@ -352,7 +372,7 @@ func (s *ss) token() string { break } if unicode.IsSpace(rune) { - s.UngetRune(rune) + s.UngetRune() break } s.buf.WriteRune(rune) @@ -386,7 +406,7 @@ func (s *ss) accept(ok string) bool { } } if rune != EOF { - s.UngetRune(rune) + s.UngetRune() } return false } @@ -681,7 +701,7 @@ func (s *ss) hexByte() (b byte, ok bool) { return } if unicode.IsSpace(rune1) { - s.UngetRune(rune1) + s.UngetRune() return } rune2 := s.mustGetRune() @@ -892,7 +912,7 @@ func (s *ss) advance(format string) (i int) { } inputc := s.mustGetRune() if fmtc != inputc { - s.UngetRune(inputc) + s.UngetRune() return -1 } i += w diff --git a/src/pkg/fmt/scan_test.go b/src/pkg/fmt/scan_test.go index 569d2f55f39..075cdaa1ca5 100644 --- a/src/pkg/fmt/scan_test.go +++ b/src/pkg/fmt/scan_test.go @@ -5,6 +5,7 @@ package fmt_test import ( + "bufio" . "fmt" "io" "os" @@ -582,3 +583,24 @@ func TestEOF(t *testing.T) { t.Error("expected one EOF, got", ec.eofCount) } } + +// Verify that, at least when using bufio, successive calls to Fscan do not lose runes. +func TestUnreadRuneWithBufio(t *testing.T) { + r := bufio.NewReader(strings.NewReader("123αb")) + var i int + var a string + n, err := Fscanf(r, "%d", &i) + if n != 1 || err != nil { + t.Errorf("reading int expected one item, no errors; got %d %q", n, err) + } + if i != 123 { + t.Errorf("expected 123; got %d", i) + } + n, err = Fscanf(r, "%s", &a) + if n != 1 || err != nil { + t.Errorf("reading string expected one item, no errors; got %d %q", n, err) + } + if a != "αb" { + t.Errorf("expected αb; got %q", a) + } +}