From 55ad7b9bfe86c90cff55e0e8926fd8ff6b3b5182 Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Wed, 20 Feb 2013 12:14:31 -0800 Subject: [PATCH] bufio: new Scanner interface Add a new, simple interface for scanning (probably textual) data, based on a new type called Scanner. It does its own internal buffering, so should be plausibly efficient even without injecting a bufio.Reader. The format of the input is defined by a "split function", by default splitting into lines. Other implemented split functions include single bytes, single runes, and space-separated words. Here's the loop to scan stdin as a file of lines: s := bufio.NewScanner(os.Stdin) for s.Scan() { fmt.Printf("%s\n", s.Bytes()) } if s.Err() != nil { log.Fatal(s.Err()) } While we're dealing with spaces, define what space means to strings.Fields. Fixes #4802. R=adg, rogpeppe, bradfitz, rsc CC=golang-dev https://golang.org/cl/7322088 --- src/pkg/bufio/bufio_test.go | 2 +- src/pkg/bufio/export_test.go | 27 +++ src/pkg/bufio/scan.go | 338 ++++++++++++++++++++++++++++++++ src/pkg/bufio/scan_test.go | 370 +++++++++++++++++++++++++++++++++++ src/pkg/strings/strings.go | 3 +- 5 files changed, 738 insertions(+), 2 deletions(-) create mode 100644 src/pkg/bufio/export_test.go create mode 100644 src/pkg/bufio/scan.go create mode 100644 src/pkg/bufio/scan_test.go diff --git a/src/pkg/bufio/bufio_test.go b/src/pkg/bufio/bufio_test.go index 418690aa45..b0e8114431 100644 --- a/src/pkg/bufio/bufio_test.go +++ b/src/pkg/bufio/bufio_test.go @@ -953,7 +953,7 @@ func TestNegativeRead(t *testing.T) { t.Fatal("read did not panic") case error: if !strings.Contains(err.Error(), "reader returned negative count from Read") { - t.Fatal("wrong panic: %v", err) + t.Fatalf("wrong panic: %v", err) } default: t.Fatalf("unexpected panic value: %T(%v)", err, err) diff --git a/src/pkg/bufio/export_test.go b/src/pkg/bufio/export_test.go new file mode 100644 index 0000000000..3d3bb27d8d --- /dev/null +++ b/src/pkg/bufio/export_test.go @@ -0,0 +1,27 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio + +// Exported for testing only. +import ( + "unicode/utf8" +) + +var IsSpace = isSpace + +func (s *Scanner) MaxTokenSize(n int) { + if n < utf8.UTFMax || n > 1e9 { + panic("bad max token size") + } + if n < len(s.buf) { + s.buf = make([]byte, n) + } + s.maxTokenSize = n +} + +// ErrOrEOF is like Err, but returns EOF. Used to test a corner case. +func (s *Scanner) ErrOrEOF() error { + return s.err +} diff --git a/src/pkg/bufio/scan.go b/src/pkg/bufio/scan.go new file mode 100644 index 0000000000..268ce6d1d3 --- /dev/null +++ b/src/pkg/bufio/scan.go @@ -0,0 +1,338 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio + +import ( + "bytes" + "errors" + "io" + "unicode/utf8" +) + +// Scanner provides a convenient interface for reading data such as +// a file of newline-delimited lines of text. Successive calls to +// the Scan method will step through the 'tokens' of a file, skipping +// the bytes between the tokens. The specification of a token is +// defined by a split function of type SplitFunc; the default split +// function breaks the input into lines with newlines stripped. Split +// functions are defined in this package for scanning a file into +// lines, bytes, UTF-8-encoded runes, and space-delimited words. The +// client may instead provide a custom split function. +// +// Scanning stops unrecoverably at EOF, the first I/O error, or a token too +// large to fit in the buffer. When a scan stops, the reader may have +// advanced arbitrarily far past the last token. Programs that need more +// control over error handling or large tokens, or must run sequential scans +// on a reader, should use bufio.Reader instead. +// +// TODO(r): Provide executable examples. +// +type Scanner struct { + r io.Reader // The reader provided by the client. + split SplitFunc // The function to split the tokens. + maxTokenSize int // Maximum size of a token; modified by tests. + token []byte // Last token returned by split. + buf []byte // Buffer used as argument to split. + start int // First non-processed byte in buf. + end int // End of data in buf. + err error // Sticky error. +} + +// SplitFunc is the signature of the split function used to tokenize the +// input. The arguments are an initial substring of the remaining unprocessed +// data and a flag, atEOF, that reports whether the Reader has no more data +// to give. The return values are the number of bytes to advance the input +// and the next token to return to the user, plus an error, if any. If the +// data does not yet hold a complete token, for instance if it has no newline +// while scanning lines, SplitFunc can return (0, nil) to signal the Scanner +// to read more data into the slice and try again with a longer slice +// starting at the same point in the input. +// +// If the returned error is non-nil, scanning stops and the error +// is returned to the client. +// +// The function is never called with an empty data slice unless atEOF +// is true. If atEOF is true, however, data may be non-empty and, +// as always, holds unprocessed text. +type SplitFunc func(data []byte, atEOF bool) (advance int, token []byte, err error) + +// Errors returned by Scanner. +var ( + ErrTooLong = errors.New("bufio.Scanner: token too long") + ErrNegativeAdvance = errors.New("bufio.Scanner: SplitFunc returns negative advance count") + ErrAdvanceTooFar = errors.New("bufio.Scanner: SplitFunc returns advance count beyond input") +) + +const ( + // Maximum size used to buffer a token. The actual maximum token size + // may be smaller as the buffer may need to include, for instance, a newline. + MaxScanTokenSize = 64 * 1024 +) + +// NewScanner returns a new Scanner to read from r. +func NewScanner(r io.Reader) *Scanner { + return &Scanner{ + r: r, + split: ScanLines, + maxTokenSize: MaxScanTokenSize, + buf: make([]byte, 4096), // Plausible starting size; needn't be large. + } +} + +// Err returns the first non-EOF error that was encountered by the Scanner. +func (s *Scanner) Err() error { + if s.err == io.EOF { + return nil + } + return s.err +} + +// Bytes returns the most recent token generated by a call to Scan. +// The underlying array may point to data that will be overwritten +// by a subsequent call to Scan. It does no allocation. +func (s *Scanner) Bytes() []byte { + return s.token +} + +// Text returns the most recent token generated by a call to Scan +// as a newly allocated string holding its bytes. +func (s *Scanner) Text() string { + return string(s.token) +} + +// Scan advances the Scanner to the next token, which will then be +// available through the Bytes or Text method. It returns false when the +// scan stops, either by reaching the end of the input or an error. +// After Scan returns false, the Err method will return any error that +// occurred during scanning, except that if it was io.EOF, Err +// will return nil. +func (s *Scanner) Scan() bool { + // Loop until we have a token. + for { + // See if we can get a token with what we already have. + if s.end > s.start { + advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil) + if err != nil { + s.setErr(err) + return false + } + if !s.advance(advance) { + return false + } + s.token = token + if token != nil { + return true + } + } + // We cannot generate a token with what we are holding. + // If we've already hit EOF or an I/O error, we are done. + if s.err != nil { + // Shut it down. + s.start = 0 + s.end = 0 + return false + } + // Must read more data. + // First, shift data to beginning of buffer if there's lots of empty space + // or space is neded. + if s.start > 0 && (s.end == len(s.buf) || s.start > len(s.buf)/2) { + copy(s.buf, s.buf[s.start:s.end]) + s.end -= s.start + s.start = 0 + } + // Is the buffer full? If so, resize. + if s.end == len(s.buf) { + if len(s.buf) >= s.maxTokenSize { + s.setErr(ErrTooLong) + return false + } + newSize := len(s.buf) * 2 + if newSize > s.maxTokenSize { + newSize = s.maxTokenSize + } + newBuf := make([]byte, newSize) + copy(newBuf, s.buf[s.start:s.end]) + s.buf = newBuf + s.end -= s.start + s.start = 0 + continue + } + // Finally we can read some input. + n, err := s.r.Read(s.buf[s.end:len(s.buf)]) + if err != nil { + s.setErr(err) + } + if n == 0 { // Don't loop forever if Reader doesn't deliver EOF. + s.err = io.EOF + } + s.end += n + } + panic("not reached") +} + +// advance consumes n bytes of the buffer. It reports whether the advance was legal. +func (s *Scanner) advance(n int) bool { + if n < 0 { + s.setErr(ErrNegativeAdvance) + return false + } + if n > s.end-s.start { + s.setErr(ErrAdvanceTooFar) + return false + } + s.start += n + return true +} + +// setErr records the first error encountered. +func (s *Scanner) setErr(err error) { + if s.err == nil || s.err == io.EOF { + s.err = err + } +} + +// Split sets the split function for the Scanner. If called, it must be +// called before Scan. The default split function is ScanLines. +func (s *Scanner) Split(split SplitFunc) { + s.split = split +} + +// Split functions + +// ScanBytes is a split function for a Scanner that returns each byte as a token. +func ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + return 1, data[0:1], nil +} + +var errorRune = []byte(string(utf8.RuneError)) + +// ScanRunes is a split function for a Scanner that returns each +// UTF-8-encoded rune as a token. The sequence of runes returned is +// equivalent to that from a range loop over the input as a string, which +// means that erroneous UTF-8 encodings translate to U+FFFD = "\xef\xbf\xbd". +// Because of the Scan interface, this makes it impossible for the client to +// distinguish correctly encoded replacement runes from encoding errors. +func ScanRunes(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + + // Fast path 1: ASCII. + if data[0] < utf8.RuneSelf { + return 1, data[0:1], nil + } + + // Fast path 2: Correct UTF-8 decode without error. + _, width := utf8.DecodeRune(data) + if width > 1 { + // It's a valid encoding. Width cannot be one for a correctly encoded + // non-ASCII rune. + return width, data[0:width], nil + } + + // We know it's an error: we have width==1 and implicitly r==utf8.RuneError. + // Is the error because there wasn't a full rune to be decoded? + // FullRune distinguishes correctly between erroneous and incomplete encodings. + if !atEOF && !utf8.FullRune(data) { + // Incomplete; get more bytes. + return 0, nil, nil + } + + // We have a real UTF-8 encoding error. Return a properly encoded error rune + // but advance only one byte. This matches the behavior of a range loop over + // an incorrectly encoded string. + return 1, errorRune, nil +} + +// dropCR drops a terminal \r from the data. +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0 : len(data)-1] + } + return data +} + +// ScanLines is a split function for a Scanner that returns each line of +// text, stripped of any trailing end-of-line marker. The returned line may +// be empty. The end-of-line marker is one optional carriage return followed +// by one mandatory newline. In regular expression notation, it is `\r?\n'. +// The last non-empty line of input will be returned even if it has no +// newline. +func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + // We have a full newline-terminated line. + return i + 1, dropCR(data[0:i]), nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), dropCR(data), nil + } + // Request more data. + return 0, nil, nil +} + +// isSpace returns whether the character is a Unicode white space character. +// We avoid dependency on the unicode package, but check validity of the implementation +// in the tests. +func isSpace(r rune) bool { + if r <= '\u00FF' { + // Obvious ASCII ones: \t through \r plus space. Plus two Latin-1 oddballs. + switch r { + case ' ', '\t', '\n', '\v', '\f', '\r': + return true + case '\u0085', '\u00A0': + return true + } + return false + } + // High-valued ones. + if '\u2000' <= r && r <= '\u200a' { + return true + } + switch r { + case '\u1680', '\u180e', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000': + return true + } + return false +} + +// ScanWords is a split function for a Scanner that returns each +// space-separated word of text, with surrounding spaces deleted. It will +// never return an empty string. The definition of space is set by +// unicode.IsSpace. +func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { + // Skip leading spaces. + start := 0 + for width := 0; start < len(data); start += width { + var r rune + r, width = utf8.DecodeRune(data[start:]) + if !isSpace(r) { + break + } + } + if atEOF && len(data) == 0 { + return 0, nil, nil + } + // Scan until space, marking end of word. + for width, i := 0, start; i < len(data); i += width { + var r rune + r, width = utf8.DecodeRune(data[i:]) + if isSpace(r) { + return i + width, data[start:i], nil + } + } + // If we're at EOF, we have a final, non-empty, non-terminated word. Return it. + if atEOF && len(data) > start { + return len(data), data[start:], nil + } + // Request more data. + return 0, nil, nil +} diff --git a/src/pkg/bufio/scan_test.go b/src/pkg/bufio/scan_test.go new file mode 100644 index 0000000000..48729aabb1 --- /dev/null +++ b/src/pkg/bufio/scan_test.go @@ -0,0 +1,370 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio_test + +import ( + . "bufio" + "bytes" + "errors" + "io" + "strings" + "testing" + "unicode" + "unicode/utf8" +) + +// Test white space table matches the Unicode definition. +func TestSpace(t *testing.T) { + for r := rune(0); r <= utf8.MaxRune; r++ { + if IsSpace(r) != unicode.IsSpace(r) { + t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r)) + } + } +} + +var scanTests = []string{ + "", + "a", + "¼", + "☹", + "\x81", // UTF-8 error + "\uFFFD", // correctly encoded RuneError + "abcdefgh", + "abc def\n\t\tgh ", + "abc¼☹\x81\uFFFD日本語\x82abc", +} + +func TestScanByte(t *testing.T) { + for n, test := range scanTests { + buf := bytes.NewBufferString(test) + s := NewScanner(buf) + s.Split(ScanBytes) + var i int + for i = 0; s.Scan(); i++ { + if b := s.Bytes(); len(b) != 1 || b[0] != test[i] { + t.Errorf("#%d: %d: expected %q got %q", n, i, test, b) + } + } + if i != len(test) { + t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +// Test that the rune splitter returns same sequence of runes (not bytes) as for range string. +func TestScanRune(t *testing.T) { + for n, test := range scanTests { + buf := bytes.NewBufferString(test) + s := NewScanner(buf) + s.Split(ScanRunes) + var i, runeCount int + var expect rune + // Use a string range loop to validate the sequence of runes. + for i, expect = range string(test) { + if !s.Scan() { + break + } + runeCount++ + got, _ := utf8.DecodeRune(s.Bytes()) + if got != expect { + t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got) + } + } + if s.Scan() { + t.Errorf("#%d: scan ran too long, got %q", n, s.Text()) + } + testRuneCount := utf8.RuneCountInString(test) + if runeCount != testRuneCount { + t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +var wordScanTests = []string{ + "", + " ", + "\n", + "a", + " a ", + "abc def", + " abc def ", + " abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n", +} + +// Test that the word splitter returns the same data as strings.Fields. +func TestScanWords(t *testing.T) { + for n, test := range wordScanTests { + buf := bytes.NewBufferString(test) + s := NewScanner(buf) + s.Split(ScanWords) + words := strings.Fields(test) + var wordCount int + for wordCount = 0; wordCount < len(words); wordCount++ { + if !s.Scan() { + break + } + got := s.Text() + if got != words[wordCount] { + t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got) + } + } + if s.Scan() { + t.Errorf("#%d: scan ran too long, got %q", n, s.Text()) + } + if wordCount != len(words) { + t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +// slowReader is a reader that returns only a few bytes at a time, to test the incremental +// reads in Scanner.Scan. +type slowReader struct { + max int + buf *bytes.Buffer +} + +func (sr *slowReader) Read(p []byte) (n int, err error) { + if len(p) > sr.max { + p = p[0:sr.max] + } + return sr.buf.Read(p) +} + +// genLine writes to buf a predictable but non-trivial line of text of length +// n, including the terminal newline and an occasional carriage return. +// If addNewline is false, the \r and \n are not emitted. +func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { + buf.Reset() + doCR := lineNum%5 == 0 + if doCR { + n-- + } + for i := 0; i < n-1; i++ { // Stop early for \n. + c := 'a' + byte(lineNum+i) + if c == '\n' || c == '\r' { // Don't confuse us. + c = 'N' + } + buf.WriteByte(c) + } + if addNewline { + if doCR { + buf.WriteByte('\r') + } + buf.WriteByte('\n') + } + return +} + +// Test the line splitter, including some carriage returns but no long lines. +func TestScanLongLines(t *testing.T) { + const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. + tmp := new(bytes.Buffer) + buf := new(bytes.Buffer) + lineNum := 0 + j := 0 + for i := 0; i < 2*smallMaxTokenSize; i++ { + genLine(tmp, lineNum, j, true) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + buf.Write(tmp.Bytes()) + lineNum++ + } + s := NewScanner(&slowReader{1, buf}) + s.Split(ScanLines) + s.MaxTokenSize(smallMaxTokenSize) + j = 0 + for lineNum := 0; s.Scan(); lineNum++ { + genLine(tmp, lineNum, j, false) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + line := tmp.String() // We use the string-valued token here, for variety. + if s.Text() != line { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line) + } + } + err := s.Err() + if err != nil { + t.Fatal(err) + } +} + +// Test that the line splitter errors out on a long line. +func TestScanLineTooLong(t *testing.T) { + const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. + tmp := new(bytes.Buffer) + buf := new(bytes.Buffer) + lineNum := 0 + j := 0 + for i := 0; i < 2*smallMaxTokenSize; i++ { + genLine(tmp, lineNum, j, true) + j++ + buf.Write(tmp.Bytes()) + lineNum++ + } + s := NewScanner(&slowReader{3, buf}) + s.Split(ScanLines) + s.MaxTokenSize(smallMaxTokenSize) + j = 0 + for lineNum := 0; s.Scan(); lineNum++ { + genLine(tmp, lineNum, j, false) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + line := tmp.Bytes() + if !bytes.Equal(s.Bytes(), line) { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line) + } + } + err := s.Err() + if err != ErrTooLong { + t.Fatalf("expected ErrTooLong; got %s", err) + } +} + +// Test that the line splitter handles a final line without a newline. +func testNoNewline(text string, lines []string, t *testing.T) { + buf := bytes.NewBufferString(text) + s := NewScanner(&slowReader{7, buf}) + s.Split(ScanLines) + for lineNum := 0; s.Scan(); lineNum++ { + line := lines[lineNum] + if s.Text() != line { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line) + } + } + err := s.Err() + if err != nil { + t.Fatal(err) + } +} + +var noNewlineLines = []string{ + "abcdefghijklmn\nopqrstuvwxyz", +} + +// Test that the line splitter handles a final line without a newline. +func TestScanLineNoNewline(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final line with a carriage return but nonewline. +func TestScanLineReturnButNoNewline(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\r" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final empty line. +func TestScanLineEmptyFinalLine(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\n\n" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + "", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final empty line with a carriage return but no newline. +func TestScanLineEmptyFinalLineWithCR(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\n\r" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + "", + } + testNoNewline(text, lines, t) +} + +var testError = errors.New("testError") + +// Test the correct error is returned when the split function errors out. +func TestSplitError(t *testing.T) { + // Create a split function that delivers a little data, then a predictable error. + numSplits := 0 + const okCount = 7 + errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF { + panic("didn't get enough data") + } + if numSplits >= okCount { + return 0, nil, testError + } + numSplits++ + return 1, data[0:1], nil + } + // Read the data. + const text = "abcdefghijklmnopqrstuvwxyz" + buf := bytes.NewBufferString(text) + s := NewScanner(&slowReader{1, buf}) + s.Split(errorSplit) + var i int + for i = 0; s.Scan(); i++ { + if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] { + t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0]) + } + } + // Check correct termination location and error. + if i != okCount { + t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i) + } + err := s.Err() + if err != testError { + t.Fatalf("expected %q got %v", testError, err) + } +} + +// Test that an EOF is overridden by a user-generated scan error. +func TestErrAtEOF(t *testing.T) { + s := NewScanner(strings.NewReader("1 2 33")) + // This spitter will fail on last entry, after s.err==EOF. + split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + advance, token, err = ScanWords(data, atEOF) + if len(token) > 1 { + if s.ErrOrEOF() != io.EOF { + t.Fatal("not testing EOF") + } + err = testError + } + return + } + s.Split(split) + for s.Scan() { + } + if s.Err() != testError { + t.Fatal("wrong error:", s.Err()) + } +} diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 9203fc5140..ccf415e694 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -305,7 +305,8 @@ func SplitAfter(s, sep string) []string { } // Fields splits the string s around each instance of one or more consecutive white space -// characters, returning an array of substrings of s or an empty list if s contains only white space. +// characters, as defined by unicode.IsSpace, returning an array of substrings of s or an +// empty list if s contains only white space. func Fields(s string) []string { return FieldsFunc(s, unicode.IsSpace) }