mirror of
https://github.com/golang/go
synced 2024-11-11 23:40:22 -07:00
fmt: require newlines to match when scanning with a format
The documentation says that newlines behave like this: Scan etc.: newlines are spaces. Scanln etc.: newlines terminate the scan. Scanf etc.: newlines must match in input and format. The code did not implement this behavior in all cases, especially for Scanf. Make it behave: - Fix the handling of spaces and newlines in ss.Advance. The code is longer but now behaves as it should. - Delete the reuse of the current ss in newScanState. There is really no need, since it's only used in recursive calls to Scan etc., and the flags are likely wrong. Simpler just to allocate a new one every time, and likelier to be correct. Fixes #10862. Change-Id: If060ac021017346723b0d62de4e5a305da898f68 Reviewed-on: https://go-review.googlesource.com/10991 Reviewed-by: Andrew Gerrand <adg@golang.org>
This commit is contained in:
parent
ab89378cb7
commit
57f4b43078
@ -237,11 +237,24 @@
|
||||
An analogous set of functions scans formatted text to yield
|
||||
values. Scan, Scanf and Scanln read from os.Stdin; Fscan,
|
||||
Fscanf and Fscanln read from a specified io.Reader; Sscan,
|
||||
Sscanf and Sscanln read from an argument string. Scanln,
|
||||
Fscanln and Sscanln stop scanning at a newline and require that
|
||||
the items be followed by one; Scanf, Fscanf and Sscanf require
|
||||
newlines in the input to match newlines in the format; the other
|
||||
routines treat newlines as spaces.
|
||||
Sscanf and Sscanln read from an argument string.
|
||||
|
||||
Scan, Fscan, Sscan treat newlines in the input as spaces.
|
||||
|
||||
Scanln, Fscanln and Sscanln stop scanning at a newline and
|
||||
require that the items be followed by a newline or EOF.
|
||||
|
||||
Scanf, Fscanf and Sscanf require that (after skipping spaces)
|
||||
newlines in the format are matched by newlines in the input
|
||||
and vice versa. This behavior differs from the corresponding
|
||||
routines in C, which uniformly treat newlines as spaces.
|
||||
|
||||
When scanning with Scanf, Fscanf, and Sscanf, all non-empty
|
||||
runs of space characters (except newline) are equivalent
|
||||
to a single space in both the format and the input. With
|
||||
that proviso, text in the format string must match the input
|
||||
text; scanning stops if it does not, with the return value
|
||||
of the function indicating the number of arguments scanned.
|
||||
|
||||
Scanf, Fscanf, and Sscanf parse the arguments according to a
|
||||
format string, analogous to that of Printf. For example, %x
|
||||
@ -266,13 +279,6 @@
|
||||
is no syntax for scanning with a precision (no %5.2f, just
|
||||
%5f).
|
||||
|
||||
When scanning with a format, all non-empty runs of space
|
||||
characters (except newline) are equivalent to a single
|
||||
space in both the format and the input. With that proviso,
|
||||
text in the format string must match the input text; scanning
|
||||
stops if it does not, with the return value of the function
|
||||
indicating the number of arguments scanned.
|
||||
|
||||
In all the scanning functions, a carriage return followed
|
||||
immediately by a newline is treated as a plain newline
|
||||
(\r\n means the same as \n).
|
||||
|
@ -34,16 +34,16 @@ type ScanState interface {
|
||||
ReadRune() (r rune, size int, err error)
|
||||
// UnreadRune causes the next call to ReadRune to return the same rune.
|
||||
UnreadRune() error
|
||||
// SkipSpace skips space in the input. Newlines are treated as space
|
||||
// unless the scan operation is Scanln, Fscanln or Sscanln, in which case
|
||||
// a newline is treated as EOF.
|
||||
// SkipSpace skips space in the input. Newlines are treated appropriately
|
||||
// for the operation being performed; see the package documentation
|
||||
// for more information.
|
||||
SkipSpace()
|
||||
// Token skips space in the input if skipSpace is true, then returns the
|
||||
// run of Unicode code points c satisfying f(c). If f is nil,
|
||||
// !unicode.IsSpace(c) is used; that is, the token will hold non-space
|
||||
// characters. Newlines are treated as space unless the scan operation
|
||||
// is Scanln, Fscanln or Sscanln, in which case a newline is treated as
|
||||
// EOF. The returned slice points to shared data that may be overwritten
|
||||
// characters. Newlines are treated appropriately for the operation being
|
||||
// performed; see the package documentation for more information.
|
||||
// The returned slice points to shared data that may be overwritten
|
||||
// by the next call to Token, a call to a Scan function using the ScanState
|
||||
// as input, or when the calling Scan method returns.
|
||||
Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
|
||||
@ -82,6 +82,7 @@ func Scanln(a ...interface{}) (n int, err error) {
|
||||
// space-separated values into successive arguments as determined by
|
||||
// the format. It returns the number of items successfully scanned.
|
||||
// If that is less than the number of arguments, err will report why.
|
||||
// Newlines in the input must match newlines in the format.
|
||||
func Scanf(format string, a ...interface{}) (n int, err error) {
|
||||
return Fscanf(os.Stdin, format, a...)
|
||||
}
|
||||
@ -114,6 +115,7 @@ func Sscanln(str string, a ...interface{}) (n int, err error) {
|
||||
// Sscanf scans the argument string, storing successive space-separated
|
||||
// values into successive arguments as determined by the format. It
|
||||
// returns the number of items successfully parsed.
|
||||
// Newlines in the input must match newlines in the format.
|
||||
func Sscanf(str string, format string, a ...interface{}) (n int, err error) {
|
||||
return Fscanf((*stringReader)(&str), format, a...)
|
||||
}
|
||||
@ -141,6 +143,7 @@ func Fscanln(r io.Reader, a ...interface{}) (n int, err error) {
|
||||
// Fscanf scans text read from r, storing successive space-separated
|
||||
// values into successive arguments as determined by the format. It
|
||||
// returns the number of items successfully parsed.
|
||||
// Newlines in the input must match newlines in the format.
|
||||
func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err error) {
|
||||
s, old := newScanState(r, false, false)
|
||||
n, err = s.doScanf(format, a)
|
||||
@ -388,17 +391,6 @@ var ssFree = sync.Pool{
|
||||
|
||||
// newScanState allocates a new ss struct or grab a cached one.
|
||||
func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
|
||||
// If the reader is a *ss, then we've got a recursive
|
||||
// call to Scan, so re-use the scan state.
|
||||
s, ok := r.(*ss)
|
||||
if ok {
|
||||
old = s.ssave
|
||||
s.limit = s.argLimit
|
||||
s.nlIsEnd = nlIsEnd || s.nlIsEnd
|
||||
s.nlIsSpace = nlIsSpace
|
||||
return
|
||||
}
|
||||
|
||||
s = ssFree.Get().(*ss)
|
||||
if rr, ok := r.(io.RuneReader); ok {
|
||||
s.rr = rr
|
||||
@ -1057,8 +1049,8 @@ func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
|
||||
s.scanOne('v', arg)
|
||||
numProcessed++
|
||||
}
|
||||
// Check for newline if required.
|
||||
if !s.nlIsSpace {
|
||||
// Check for newline (or EOF) if required (Scanln etc.).
|
||||
if s.nlIsEnd {
|
||||
for {
|
||||
r := s.getRune()
|
||||
if r == '\n' || r == eof {
|
||||
@ -1074,12 +1066,13 @@ func (s *ss) doScan(a []interface{}) (numProcessed int, err error) {
|
||||
}
|
||||
|
||||
// advance determines whether the next characters in the input match
|
||||
// those of the format. It returns the number of bytes (sic) consumed
|
||||
// in the format. Newlines included, all runs of space characters in
|
||||
// either input or format behave as a single space. This routine also
|
||||
// handles the %% case. If the return value is zero, either format
|
||||
// starts with a % (with no following %) or the input is empty.
|
||||
// If it is negative, the input did not match the string.
|
||||
// those of the format. It returns the number of bytes (sic) consumed
|
||||
// in the format. All runs of space characters in either input or
|
||||
// format behave as a single space. Newlines are special, though:
|
||||
// newlines in the format must match those in the input and vice versa.
|
||||
// This routine also handles the %% case. If the return value is zero,
|
||||
// either format starts with a % (with no following %) or the input
|
||||
// is empty. If it is negative, the input did not match the string.
|
||||
func (s *ss) advance(format string) (i int) {
|
||||
for i < len(format) {
|
||||
fmtc, w := utf8.DecodeRuneInString(format[i:])
|
||||
@ -1092,24 +1085,45 @@ func (s *ss) advance(format string) (i int) {
|
||||
i += w // skip the first %
|
||||
}
|
||||
sawSpace := false
|
||||
wasNewline := false
|
||||
// Skip spaces in format but absorb at most one newline.
|
||||
for isSpace(fmtc) && i < len(format) {
|
||||
if fmtc == '\n' {
|
||||
if wasNewline { // Already saw one; stop here.
|
||||
break
|
||||
}
|
||||
wasNewline = true
|
||||
}
|
||||
sawSpace = true
|
||||
i += w
|
||||
fmtc, w = utf8.DecodeRuneInString(format[i:])
|
||||
}
|
||||
if sawSpace {
|
||||
// There was space in the format, so there should be space (EOF)
|
||||
// There was space in the format, so there should be space
|
||||
// in the input.
|
||||
inputc := s.getRune()
|
||||
if inputc == eof || inputc == '\n' {
|
||||
// If we've reached a newline, stop now; don't read ahead.
|
||||
if inputc == eof {
|
||||
return
|
||||
}
|
||||
if !isSpace(inputc) {
|
||||
// Space in format but not in input: error
|
||||
// Space in format but not in input.
|
||||
s.errorString("expected space in input to match format")
|
||||
}
|
||||
s.skipSpace(true)
|
||||
// Skip spaces but stop at newline.
|
||||
for inputc != '\n' && isSpace(inputc) {
|
||||
inputc = s.getRune()
|
||||
}
|
||||
if inputc == '\n' {
|
||||
if !wasNewline {
|
||||
s.errorString("newline in input does not match format")
|
||||
}
|
||||
// We've reached a newline, stop now; don't read further.
|
||||
return
|
||||
}
|
||||
s.UnreadRune()
|
||||
if wasNewline {
|
||||
s.errorString("newline in format does not match input")
|
||||
}
|
||||
continue
|
||||
}
|
||||
inputc := s.mustReadRune()
|
||||
|
@ -1044,3 +1044,85 @@ func TestHexBytes(t *testing.T) {
|
||||
t.Errorf("odd count: got count, err = %d, %v; expected 0, error", n, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScanNewlinesAreSpaces(t *testing.T) {
|
||||
var a, b int
|
||||
var tests = []struct {
|
||||
name string
|
||||
text string
|
||||
count int
|
||||
}{
|
||||
{"newlines", "1\n2\n", 2},
|
||||
{"no final newline", "1\n2", 2},
|
||||
{"newlines with spaces ", "1 \n 2 \n", 2},
|
||||
{"no final newline with spaces", "1 \n 2", 2},
|
||||
}
|
||||
for _, test := range tests {
|
||||
n, err := Sscan(test.text, &a, &b)
|
||||
if n != test.count {
|
||||
t.Errorf("%s: expected to scan %d item(s), scanned %d", test.name, test.count, n)
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("%s: unexpected error: %s", test.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScanlnNewlinesTerminate(t *testing.T) {
|
||||
var a, b int
|
||||
var tests = []struct {
|
||||
name string
|
||||
text string
|
||||
count int
|
||||
ok bool
|
||||
}{
|
||||
{"one line one item", "1\n", 1, false},
|
||||
{"one line two items with spaces ", " 1 2 \n", 2, true},
|
||||
{"one line two items no newline", " 1 2", 2, true},
|
||||
{"two lines two items", "1\n2\n", 1, false},
|
||||
}
|
||||
for _, test := range tests {
|
||||
n, err := Sscanln(test.text, &a, &b)
|
||||
if n != test.count {
|
||||
t.Errorf("%s: expected to scan %d item(s), scanned %d", test.name, test.count, n)
|
||||
}
|
||||
if test.ok && err != nil {
|
||||
t.Errorf("%s: unexpected error: %s", test.name, err)
|
||||
}
|
||||
if !test.ok && err == nil {
|
||||
t.Errorf("%s: expected error; got none", test.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScanfNewlineMatchFormat(t *testing.T) {
|
||||
var a, b int
|
||||
var tests = []struct {
|
||||
name string
|
||||
text string
|
||||
format string
|
||||
count int
|
||||
ok bool
|
||||
}{
|
||||
{"newline in both", "1\n2", "%d\n%d\n", 2, true},
|
||||
{"newline in input", "1\n2", "%d %d", 1, false},
|
||||
{"space-newline in input", "1 \n2", "%d %d", 1, false},
|
||||
{"newline in format", "1 2", "%d\n%d", 1, false},
|
||||
{"space-newline in format", "1 2", "%d \n%d", 1, false},
|
||||
{"space-newline in both", "1 \n2", "%d \n%d", 2, true},
|
||||
{"extra space in format", "1\n2", "%d\n %d", 2, true},
|
||||
{"two extra spaces in format", "1\n2", "%d \n %d", 2, true},
|
||||
}
|
||||
for _, test := range tests {
|
||||
n, err := Sscanf(test.text, test.format, &a, &b)
|
||||
if n != test.count {
|
||||
t.Errorf("%s: expected to scan %d item(s), scanned %d", test.name, test.count, n)
|
||||
}
|
||||
if test.ok && err != nil {
|
||||
t.Errorf("%s: unexpected error: %s", test.name, err)
|
||||
}
|
||||
if !test.ok && err == nil {
|
||||
t.Errorf("%s: expected error; got none", test.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user