1
0
mirror of https://github.com/golang/go synced 2024-11-20 00:44:45 -07:00

fmt: make ScanState.Token more general.

When writing custom scanners, I found that
Token itself was rarely useful, as I did not always
want to stop at white space. This change makes
it possible to stop at any class of characters
while reusing the buffer within State.
(also fix a bug in Token)

R=r, r2
CC=golang-dev
https://golang.org/cl/4243055
This commit is contained in:
Roger Peppe 2011-03-09 10:01:47 -08:00 committed by Rob Pike
parent 2c420ece67
commit 3a95587e01
2 changed files with 36 additions and 16 deletions

View File

@ -35,10 +35,15 @@ type ScanState interface {
ReadRune() (rune int, size int, err os.Error) ReadRune() (rune int, size int, err os.Error)
// UnreadRune causes the next call to ReadRune to return the same rune. // UnreadRune causes the next call to ReadRune to return the same rune.
UnreadRune() os.Error UnreadRune() os.Error
// Token returns the next space-delimited token from the input. If // Token skips space in the input if skipSpace is true, then returns the
// a width has been specified, the returned token will be no longer // run of Unicode code points c satisfying f(c). If f is nil,
// than the width. // !unicode.IsSpace(c) is used; that is, the token will hold non-space
Token() (token string, err os.Error) // characters. Newlines are treated as space unless the scan operation
// is Scanln, Fscanln or Sscanln, in which case a newline is treated as
// EOF. The returned slice points to shared data that may be overwritten
// by the next call to Token, a call to a Scan function using the ScanState
// as input, or when the calling Scan method returns.
Token(skipSpace bool, f func(int) bool) (token []byte, err os.Error)
// Width returns the value of the width option and whether it has been set. // Width returns the value of the width option and whether it has been set.
// The unit is Unicode code points. // The unit is Unicode code points.
Width() (wid int, ok bool) Width() (wid int, ok bool)
@ -238,7 +243,7 @@ func (s *ss) errorString(err string) {
panic(scanError{os.ErrorString(err)}) panic(scanError{os.ErrorString(err)})
} }
func (s *ss) Token() (tok string, err os.Error) { func (s *ss) Token(skipSpace bool, f func(int) bool) (tok []byte, err os.Error) {
defer func() { defer func() {
if e := recover(); e != nil { if e := recover(); e != nil {
if se, ok := e.(scanError); ok { if se, ok := e.(scanError); ok {
@ -248,10 +253,19 @@ func (s *ss) Token() (tok string, err os.Error) {
} }
} }
}() }()
tok = s.token() if f == nil {
f = notSpace
}
s.buf.Reset()
tok = s.token(skipSpace, f)
return return
} }
// notSpace is the default scanning function used in Token.
func notSpace(r int) bool {
return !unicode.IsSpace(r)
}
// readRune is a structure to enable reading UTF-8 encoded code points // readRune is a structure to enable reading UTF-8 encoded code points
// from an io.Reader. It is used if the Reader given to the scanner does // from an io.Reader. It is used if the Reader given to the scanner does
// not already implement io.RuneReader. // not already implement io.RuneReader.
@ -384,24 +398,27 @@ func (s *ss) skipSpace(stopAtNewline bool) {
} }
} }
// token returns the next space-delimited string from the input. It // token returns the next space-delimited string from the input. It
// skips white space. For Scanln, it stops at newlines. For Scan, // skips white space. For Scanln, it stops at newlines. For Scan,
// newlines are treated as spaces. // newlines are treated as spaces.
func (s *ss) token() string { func (s *ss) token(skipSpace bool, f func(int) bool) []byte {
if skipSpace {
s.skipSpace(false) s.skipSpace(false)
}
// read until white space or newline // read until white space or newline
for { for {
rune := s.getRune() rune := s.getRune()
if rune == EOF { if rune == EOF {
break break
} }
if unicode.IsSpace(rune) { if !f(rune) {
s.UnreadRune() s.UnreadRune()
break break
} }
s.buf.WriteRune(rune) s.buf.WriteRune(rune)
} }
return s.buf.String() return s.buf.Bytes()
} }
// typeError indicates that the type of the operand did not match the format // typeError indicates that the type of the operand did not match the format
@ -729,7 +746,7 @@ func (s *ss) convertString(verb int) (str string) {
case 'x': case 'x':
str = s.hexString() str = s.hexString()
default: default:
str = s.token() // %s and %v just return the next word str = string(s.token(true, notSpace)) // %s and %v just return the next word
} }
// Empty strings other than with %q are not OK. // Empty strings other than with %q are not OK.
if len(str) == 0 && verb != 'q' && s.maxWid > 0 { if len(str) == 0 && verb != 'q' && s.maxWid > 0 {

View File

@ -88,14 +88,15 @@ type FloatTest struct {
type Xs string type Xs string
func (x *Xs) Scan(state ScanState, verb int) os.Error { func (x *Xs) Scan(state ScanState, verb int) os.Error {
tok, err := state.Token() tok, err := state.Token(true, func(r int) bool { return r == verb })
if err != nil { if err != nil {
return err return err
} }
if !regexp.MustCompile("^" + string(verb) + "+$").MatchString(tok) { s := string(tok)
if !regexp.MustCompile("^" + string(verb) + "+$").MatchString(s) {
return os.ErrorString("syntax error for xs") return os.ErrorString("syntax error for xs")
} }
*x = Xs(tok) *x = Xs(s)
return nil return nil
} }
@ -113,9 +114,11 @@ func (s *IntString) Scan(state ScanState, verb int) os.Error {
return err return err
} }
if _, err := Fscan(state, &s.s); err != nil { tok, err := state.Token(true, nil)
if err != nil {
return err return err
} }
s.s = string(tok)
return nil return nil
} }
@ -331,7 +334,7 @@ var multiTests = []ScanfMultiTest{
{"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""}, {"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""},
// Custom scanners. // Custom scanners.
{"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""}, {"%e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""},
{"%4v%s", "12abcd", args(&z, &s), args(IntString{12, "ab"}, "cd"), ""}, {"%4v%s", "12abcd", args(&z, &s), args(IntString{12, "ab"}, "cd"), ""},
// Errors // Errors