1
0
mirror of https://github.com/golang/go synced 2024-11-20 05:04:43 -07:00

fmt.Scan, fmt.Scanln: Start of a simple scanning API in the fmt package.

Still to do:
	- composite types
	- user-defined scanners
	- format-driven scanning
The package comment will be updated when more of the functionality is in place.

R=rsc
CC=golang-dev
https://golang.org/cl/1252045
This commit is contained in:
Rob Pike 2010-05-25 21:02:35 -07:00
parent 67d30bb696
commit 71f130f55c
3 changed files with 595 additions and 0 deletions

View File

@ -8,5 +8,6 @@ TARG=fmt
GOFILES=\ GOFILES=\
format.go\ format.go\
print.go\ print.go\
scan.go\
include ../../Make.pkg include ../../Make.pkg

413
src/pkg/fmt/scan.go Normal file
View File

@ -0,0 +1,413 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fmt
import (
"bytes"
"io"
"os"
"reflect"
"strconv"
"unicode"
"utf8"
)
// readRuner is the interface to something that can read runes. If
// the object provided to Scan does not satisfy this interface, the
// object will be wrapped by a readRune object.
type readRuner interface {
ReadRune() (rune int, size int, err os.Error)
}
type ss struct {
rr readRuner // where to read input
buf bytes.Buffer // token accumulator
nlIsSpace bool // whether newline counts as white space
peekRune int // one-rune lookahead
err os.Error
}
// readRune is a structure to enable reading UTF-8 encoded code points
// from an io.Reader. It is used if the Reader given to the scanner does
// not already implement readRuner.
// TODO: readByteRune for things that can read bytes.
type readRune struct {
reader io.Reader
buf [utf8.UTFMax]byte
}
// ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r.
func (r readRune) ReadRune() (rune int, size int, err os.Error) {
_, err = r.reader.Read(r.buf[0:1])
if err != nil {
return 0, 0, err
}
if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
rune = int(r.buf[0])
return
}
for size := 1; size < utf8.UTFMax; size++ {
_, err = r.reader.Read(r.buf[size : size+1])
if err != nil {
break
}
if !utf8.FullRune(&r.buf) {
continue
}
if c, w := utf8.DecodeRune(r.buf[0:size]); w == size {
rune = c
return
}
}
return utf8.RuneError, 1, err
}
// A leaky bucket of reusable ss structures.
var ssFree = make(chan *ss, 100)
// Allocate a new ss struct. Probably can grab the previous one from ssFree.
func newScanState(r io.Reader, nlIsSpace bool) *ss {
s, ok := <-ssFree
if !ok {
s = new(ss)
}
if rr, ok := r.(readRuner); ok {
s.rr = rr
} else {
s.rr = readRune{reader: r}
}
s.nlIsSpace = nlIsSpace
s.peekRune = -1
s.err = nil
return s
}
// Save used ss structs in ssFree; avoid an allocation per invocation.
func (s *ss) free() {
// Don't hold on to ss structs with large buffers.
if cap(s.buf.Bytes()) > 1024 {
return
}
s.buf.Reset()
s.rr = nil
_ = ssFree <- s
}
// readRune reads the next rune, but checks the peeked item first.
func (s *ss) readRune() (rune int, err os.Error) {
if s.peekRune >= 0 {
rune = s.peekRune
s.peekRune = -1
return
}
rune, _, err = s.rr.ReadRune()
return
}
// token returns the next space-delimited string from the input.
// For Scanln, it stops at newlines. For Scan, newlines are treated as
// spaces.
func (s *ss) token() string {
s.buf.Reset()
// skip white space and maybe newline
for {
rune, err := s.readRune()
if err != nil {
s.err = err
return ""
}
if rune == '\n' {
if s.nlIsSpace {
continue
}
s.err = os.ErrorString("unexpected newline")
return ""
}
if !unicode.IsSpace(rune) {
s.buf.WriteRune(rune)
break
}
}
// read until white space or newline
for {
rune, err := s.readRune()
if err != nil {
if err == os.EOF {
break
}
s.err = err
return ""
}
if unicode.IsSpace(rune) {
s.peekRune = rune
break
}
s.buf.WriteRune(rune)
}
return s.buf.String()
}
// Scan parses text read from r, storing successive space-separated
// values into successive arguments. Newlines count as space. Each
// argument must be a pointer to a basic type. It returns the number of
// items successfully parsed. If that is less than the number of arguments,
// err will report why.
func Scan(r io.Reader, a ...interface{}) (n int, err os.Error) {
s := newScanState(r, true)
n = s.doScan(a)
err = s.err
s.free()
return
}
// Scanln parses text read from r, storing successive space-separated
// values into successive arguments. Scanning stops at a newline and after
// the final item there must be a newline or EOF. Each argument must be a
// pointer to a basic type. It returns the number of items successfully
// parsed. If that is less than the number of arguments, err will report
// why.
func Scanln(r io.Reader, a ...interface{}) (n int, err os.Error) {
s := newScanState(r, false)
n = s.doScan(a)
err = s.err
s.free()
return
}
var intBits = uint(reflect.Typeof(int(0)).Size() * 8)
var uintptrBits = uint(reflect.Typeof(int(0)).Size() * 8)
var complexError = os.ErrorString("syntax error scanning complex number")
// scanBool converts the token to a boolean value.
func (s *ss) scanBool(tok string) bool {
if s.err != nil {
return false
}
var b bool
b, s.err = strconv.Atob(tok)
return b
}
// complexParts returns the strings representing the real and imaginary parts of the string.
func (s *ss) complexParts(str string) (real, imag string) {
if len(str) > 2 && str[0] == '(' && str[len(str)-1] == ')' {
str = str[1 : len(str)-1]
}
real, str = floatPart(str)
// Must now have a sign.
if len(str) == 0 || (str[0] != '+' && str[0] != '-') {
s.err = complexError
return "", ""
}
imag, str = floatPart(str)
if str != "i" {
s.err = complexError
return "", ""
}
return real, imag
}
// floatPart returns strings holding the floating point value in the string, followed
// by the remainder of the string. That is, it splits str into (number,rest-of-string).
func floatPart(str string) (first, last string) {
i := 0
// leading sign?
if len(str) > 0 && (str[0] == '+' || str[0] == '-') {
i++
}
// digits?
for len(str) > 0 && '0' <= str[i] && str[i] <= '9' {
i++
}
// period?
if str[i] == '.' {
i++
}
// fraction?
for len(str) > 0 && '0' <= str[i] && str[i] <= '9' {
i++
}
// exponent?
if len(str) > 0 && (str[i] == 'e' || str[i] == 'E') {
i++
// leading sign?
if str[0] == '+' || str[0] == '-' {
i++
}
// digits?
for len(str) > 0 && '0' <= str[i] && str[i] <= '9' {
i++
}
}
return str[0:i], str[i:]
}
// scanFloat converts the string to a float value.
func (s *ss) scanFloat(str string) float64 {
var f float
f, s.err = strconv.Atof(str)
return float64(f)
}
// scanFloat32 converts the string to a float32 value.
func (s *ss) scanFloat32(str string) float64 {
var f float32
f, s.err = strconv.Atof32(str)
return float64(f)
}
// scanFloat64 converts the string to a float64 value.
func (s *ss) scanFloat64(str string) float64 {
var f float64
f, s.err = strconv.Atof64(str)
return f
}
// scanComplex converts the token to a complex128 value.
// The atof argument is a type-specific reader for the underlying type.
// If we're reading complex64, atof will parse float32s and convert them
// to float64's to avoid reproducing this code for each complex type.
func (s *ss) scanComplex(tok string, atof func(*ss, string) float64) complex128 {
if s.err != nil {
return 0
}
sreal, simag := s.complexParts(tok)
if s.err != nil {
return 0
}
var real, imag float64
real = atof(s, sreal)
if s.err != nil {
return 0
}
imag = atof(s, simag)
if s.err != nil {
return 0
}
return cmplx(real, imag)
}
// scanInt converts the token to an int64, but checks that it fits into the
// specified number of bits.
func (s *ss) scanInt(tok string, bitSize uint) int64 {
if s.err != nil {
return 0
}
var i int64
i, s.err = strconv.Atoi64(tok)
x := (i << (64 - bitSize)) >> (64 - bitSize)
if i != x {
s.err = os.ErrorString("integer overflow on token " + tok)
}
return i
}
// scanUint converts the token to a uint64, but checks that it fits into the
// specified number of bits.
func (s *ss) scanUint(tok string, bitSize uint) uint64 {
if s.err != nil {
return 0
}
var i uint64
i, s.err = strconv.Atoui64(tok)
x := (i << (64 - bitSize)) >> (64 - bitSize)
if i != x {
s.err = os.ErrorString("unsigned integer overflow on token " + tok)
}
return i
}
// doScan does the real work. At the moment, it handles only pointers to basic types.
func (s *ss) doScan(a []interface{}) int {
for n, param := range a {
tok := s.token()
switch v := param.(type) {
case *bool:
*v = s.scanBool(tok)
case *complex:
*v = complex(s.scanComplex(tok, (*ss).scanFloat))
case *complex64:
*v = complex64(s.scanComplex(tok, (*ss).scanFloat32))
case *complex128:
*v = s.scanComplex(tok, (*ss).scanFloat64)
case *int:
*v = int(s.scanInt(tok, intBits))
case *int8:
*v = int8(s.scanInt(tok, 8))
case *int16:
*v = int16(s.scanInt(tok, 16))
case *int32:
*v = int32(s.scanInt(tok, 32))
case *int64:
*v = s.scanInt(tok, 64)
case *uint:
*v = uint(s.scanUint(tok, intBits))
case *uint8:
*v = uint8(s.scanUint(tok, 8))
case *uint16:
*v = uint16(s.scanUint(tok, 16))
case *uint32:
*v = uint32(s.scanUint(tok, 32))
case *uint64:
*v = s.scanUint(tok, 64)
case *uintptr:
*v = uintptr(s.scanUint(tok, uintptrBits))
case *float:
if s.err == nil {
*v, s.err = strconv.Atof(tok)
} else {
*v = 0
}
case *float32:
if s.err == nil {
*v, s.err = strconv.Atof32(tok)
} else {
*v = 0
}
case *float64:
if s.err == nil {
*v, s.err = strconv.Atof64(tok)
} else {
*v = 0
}
case *string:
*v = tok
default:
t := reflect.Typeof(v)
str := t.String()
if _, ok := t.(*reflect.PtrType); !ok {
s.err = os.ErrorString("Scan: type not a pointer: " + str)
} else {
s.err = os.ErrorString("Scan: can't handle type: " + str)
}
}
if s.err != nil {
return n
}
}
// Check for newline if required.
if !s.nlIsSpace {
for {
rune, err := s.readRune()
if err != nil {
if err == os.EOF {
break
}
s.err = err
break
}
if rune == '\n' {
break
}
if !unicode.IsSpace(rune) {
s.err = os.ErrorString("Scan: expected newline")
break
}
}
}
return len(a)
}

181
src/pkg/fmt/scan_test.go Normal file
View File

@ -0,0 +1,181 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fmt_test
import (
. "fmt"
"io"
"os"
"reflect"
"strings"
"testing"
)
type ScanTest struct {
text string
in interface{}
out interface{}
}
var boolVal bool
var intVal int
var int8Val int8
var int16Val int16
var int32Val int32
var int64Val int64
var uintVal uint
var uint8Val uint8
var uint16Val uint16
var uint32Val uint32
var uint64Val uint64
var floatVal float
var float32Val float32
var float64Val float64
var stringVal string
var complexVal complex
var complex64Val complex64
var complex128Val complex128
var scanTests = []ScanTest{
ScanTest{"T\n", &boolVal, true},
ScanTest{"21\n", &intVal, 21},
ScanTest{"22\n", &int8Val, int8(22)},
ScanTest{"23\n", &int16Val, int16(23)},
ScanTest{"24\n", &int32Val, int32(24)},
ScanTest{"25\n", &int64Val, int64(25)},
ScanTest{"127\n", &int8Val, int8(127)},
ScanTest{"-21\n", &intVal, -21},
ScanTest{"-22\n", &int8Val, int8(-22)},
ScanTest{"-23\n", &int16Val, int16(-23)},
ScanTest{"-24\n", &int32Val, int32(-24)},
ScanTest{"-25\n", &int64Val, int64(-25)},
ScanTest{"-128\n", &int8Val, int8(-128)},
ScanTest{"+21\n", &intVal, +21},
ScanTest{"+22\n", &int8Val, int8(+22)},
ScanTest{"+23\n", &int16Val, int16(+23)},
ScanTest{"+24\n", &int32Val, int32(+24)},
ScanTest{"+25\n", &int64Val, int64(+25)},
ScanTest{"+127\n", &int8Val, int8(+127)},
ScanTest{"26\n", &uintVal, uint(26)},
ScanTest{"27\n", &uint8Val, uint8(27)},
ScanTest{"28\n", &uint16Val, uint16(28)},
ScanTest{"29\n", &uint32Val, uint32(29)},
ScanTest{"30\n", &uint64Val, uint64(30)},
ScanTest{"255\n", &uint8Val, uint8(255)},
ScanTest{"32767\n", &int16Val, int16(32767)},
ScanTest{"2.3\n", &floatVal, 2.3},
ScanTest{"2.3e1\n", &float32Val, float32(2.3e1)},
ScanTest{"2.3e2\n", &float64Val, float64(2.3e2)},
ScanTest{"2.35\n", &stringVal, "2.35"},
ScanTest{"(3.4e1-2i)\n", &complexVal, 3.4e1 - 2i},
ScanTest{"-3.45e1-3i\n", &complex64Val, complex64(-3.45e1 - 3i)},
ScanTest{"-.45e1-1e2i\n", &complex128Val, complex128(-.45e1 - 100i)},
}
var overflowTests = []ScanTest{
ScanTest{"128", &int8Val, 0},
ScanTest{"32768", &int16Val, 0},
ScanTest{"-129", &int8Val, 0},
ScanTest{"-32769", &int16Val, 0},
ScanTest{"256", &uint8Val, 0},
ScanTest{"65536", &uint16Val, 0},
ScanTest{"1e100", &float32Val, 0},
ScanTest{"1e500", &float64Val, 0},
ScanTest{"(1e100+0i)", &complexVal, 0},
ScanTest{"(1+1e100i)", &complex64Val, 0},
ScanTest{"(1-1e500i)", &complex128Val, 0},
}
func testScan(t *testing.T, scan func(r io.Reader, a ...interface{}) (int, os.Error)) {
for _, test := range scanTests {
r := strings.NewReader(test.text)
n, err := scan(r, test.in)
if err != nil {
t.Errorf("got error scanning %q: %s", test.text, err)
continue
}
if n != 1 {
t.Errorf("count error on entry %q: got %d", test.text, n)
continue
}
// The incoming value may be a pointer
v := reflect.NewValue(test.in)
if p, ok := v.(*reflect.PtrValue); ok {
v = p.Elem()
}
val := v.Interface()
if !reflect.DeepEqual(val, test.out) {
t.Errorf("scanning %q: expected %v got %v, type %T", test.text, test.out, val, val)
}
}
}
func TestScan(t *testing.T) {
testScan(t, Scan)
}
func TestScanln(t *testing.T) {
testScan(t, Scanln)
}
func TestScanOverflow(t *testing.T) {
for _, test := range overflowTests {
r := strings.NewReader(test.text)
_, err := Scan(r, test.in)
if err == nil {
t.Errorf("expected overflow scanning %q", test.text)
continue
}
if strings.Index(err.String(), "overflow") < 0 && strings.Index(err.String(), "too large") < 0 {
t.Errorf("expected overflow error scanning %q: %s", test.text, err)
}
}
}
func TestScanMultiple(t *testing.T) {
text := "1 2 3 x"
r := strings.NewReader(text)
var a, b, c, d int
n, err := Scan(r, &a, &b, &c, &d)
if n != 3 {
t.Errorf("count error: expected 3: got %d", n)
}
if err == nil {
t.Errorf("expected error scanning ", text)
}
}
func TestScanNotPointer(t *testing.T) {
r := strings.NewReader("1")
var a int
_, err := Scan(r, a)
if err == nil {
t.Error("expected error scanning non-pointer")
} else if strings.Index(err.String(), "pointer") < 0 {
t.Errorf("expected pointer error scanning non-pointer, got: %s", err)
}
}
func TestScanlnNoNewline(t *testing.T) {
r := strings.NewReader("1 x\n")
var a int
_, err := Scanln(r, &a)
if err == nil {
t.Error("expected error scanning string missing newline")
} else if strings.Index(err.String(), "newline") < 0 {
t.Errorf("expected newline error scanning string missing newline, got: %s", err)
}
}
func TestScanlnWithMiddleNewline(t *testing.T) {
r := strings.NewReader("123\n456\n")
var a, b int
_, err := Scanln(r, &a, &b)
if err == nil {
t.Error("expected error scanning string with extra newline")
} else if strings.Index(err.String(), "newline") < 0 {
t.Errorf("expected newline error scanning string with extra newline, got: %s", err)
}
}