mirror of
https://github.com/golang/go
synced 2024-11-22 01:54:42 -07:00
csv: new package
csv reader/writer based on RFC 4180 R=rsc, mattn.jp, r, dchest CC=golang-dev https://golang.org/cl/4629085
This commit is contained in:
parent
21752bc130
commit
00f7cd4b36
@ -62,6 +62,7 @@ DIRS=\
|
|||||||
crypto/x509\
|
crypto/x509\
|
||||||
crypto/x509/pkix\
|
crypto/x509/pkix\
|
||||||
crypto/xtea\
|
crypto/xtea\
|
||||||
|
csv\
|
||||||
debug/dwarf\
|
debug/dwarf\
|
||||||
debug/macho\
|
debug/macho\
|
||||||
debug/elf\
|
debug/elf\
|
||||||
|
12
src/pkg/csv/Makefile
Normal file
12
src/pkg/csv/Makefile
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by a BSD-style
|
||||||
|
# license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
include ../../Make.inc
|
||||||
|
|
||||||
|
TARG=csv
|
||||||
|
GOFILES=\
|
||||||
|
reader.go\
|
||||||
|
writer.go\
|
||||||
|
|
||||||
|
include ../../Make.pkg
|
373
src/pkg/csv/reader.go
Normal file
373
src/pkg/csv/reader.go
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package csv reads and writes comma-separated values (CSV) files.
|
||||||
|
//
|
||||||
|
// A csv file contains zero or more records of one or more fields per record.
|
||||||
|
// Each record is separated by the newline character. The final record may
|
||||||
|
// optionally be followed by a newline character.
|
||||||
|
//
|
||||||
|
// field1,field2,field3
|
||||||
|
//
|
||||||
|
// White space is considered part of a field.
|
||||||
|
//
|
||||||
|
// Carriage returns before newline characters are silently removed.
|
||||||
|
//
|
||||||
|
// Blank lines are ignored. A line with only whitespace characters (excluding
|
||||||
|
// the ending newline character) is not considered a blank line.
|
||||||
|
//
|
||||||
|
// Fields which start and stop with the quote character " are called
|
||||||
|
// quoted-fields. The beginning and ending quote are not part of the
|
||||||
|
// field.
|
||||||
|
//
|
||||||
|
// The source:
|
||||||
|
//
|
||||||
|
// normal string,"quoted-field"
|
||||||
|
//
|
||||||
|
// results in the fields
|
||||||
|
//
|
||||||
|
// {`normal string`, `quoted-field`}
|
||||||
|
//
|
||||||
|
// Within a quoted-field a quote character followed by a second quote
|
||||||
|
// character is considered a single quote.
|
||||||
|
//
|
||||||
|
// "the ""word"" is true","a ""quoted-field"""
|
||||||
|
//
|
||||||
|
// results in
|
||||||
|
//
|
||||||
|
// {`the "word" is true`, `a "quoted-field"`}
|
||||||
|
//
|
||||||
|
// Newlines and commas may be included in a quoted-field
|
||||||
|
//
|
||||||
|
// "Multi-line
|
||||||
|
// field","comma is ,"
|
||||||
|
//
|
||||||
|
// results in
|
||||||
|
//
|
||||||
|
// {`Multi-line
|
||||||
|
// field`, `comma is ,`}
|
||||||
|
package csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A ParseError is returned for parsing errors.
|
||||||
|
// The first line is 1. The first column is 0.
|
||||||
|
type ParseError struct {
|
||||||
|
Line int // Line where the error occurred
|
||||||
|
Column int // Column (rune index) where the error occurred
|
||||||
|
Error os.Error // The actual error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *ParseError) String() string {
|
||||||
|
return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// These are the errors that can be returned in ParseError.Error
|
||||||
|
var (
|
||||||
|
ErrTrailingComma = os.NewError("extra delimiter at end of line")
|
||||||
|
ErrBareQuote = os.NewError("bare \" in non-quoted-field")
|
||||||
|
ErrQuote = os.NewError("extraneous \" in field")
|
||||||
|
ErrFieldCount = os.NewError("wrong number of fields in line")
|
||||||
|
)
|
||||||
|
|
||||||
|
// A Reader reads records from a CSV-encoded file.
|
||||||
|
//
|
||||||
|
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
|
||||||
|
// The exported fields can be changed to customize the details before the
|
||||||
|
// first call to Read or ReadAll.
|
||||||
|
//
|
||||||
|
// Comma is the field delimiter. It defaults to ','.
|
||||||
|
//
|
||||||
|
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||||
|
// Comment character is ignored.
|
||||||
|
//
|
||||||
|
// If FieldsPerRecord is positive, Read requires each record to
|
||||||
|
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
|
||||||
|
// the number of fields in the first record, so that future records must
|
||||||
|
// have the same field count.
|
||||||
|
//
|
||||||
|
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||||
|
// non-doubled quote may appear in a quoted field.
|
||||||
|
//
|
||||||
|
// If TrailingComma is true, the last field may be a unquoted empty field.
|
||||||
|
//
|
||||||
|
// If TrimLeadingSpace is true, leading white space in a field is ignored.
|
||||||
|
type Reader struct {
|
||||||
|
Comma int // Field delimiter (set to ',' by NewReader)
|
||||||
|
Comment int // Comment character for start of line
|
||||||
|
FieldsPerRecord int // Number of expected fields per record
|
||||||
|
LazyQuotes bool // Allow lazy quotes
|
||||||
|
TrailingComma bool // Allow trailing comma
|
||||||
|
TrimLeadingSpace bool // Trim leading space
|
||||||
|
line int
|
||||||
|
column int
|
||||||
|
r *bufio.Reader
|
||||||
|
field bytes.Buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewReader returns a new Reader that reads from r.
|
||||||
|
func NewReader(r io.Reader) *Reader {
|
||||||
|
return &Reader{
|
||||||
|
Comma: ',',
|
||||||
|
r: bufio.NewReader(r),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// error creates a new ParseError based on err.
|
||||||
|
func (r *Reader) error(err os.Error) os.Error {
|
||||||
|
return &ParseError{
|
||||||
|
Line: r.line,
|
||||||
|
Column: r.column,
|
||||||
|
Error: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read reads one record from r. The record is a slice of strings with each
|
||||||
|
// string representing one field.
|
||||||
|
func (r *Reader) Read() (record []string, err os.Error) {
|
||||||
|
for {
|
||||||
|
record, err = r.parseRecord()
|
||||||
|
if record != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.FieldsPerRecord > 0 {
|
||||||
|
if len(record) != r.FieldsPerRecord {
|
||||||
|
r.column = 0 // report at start of record
|
||||||
|
return record, r.error(ErrFieldCount)
|
||||||
|
}
|
||||||
|
} else if r.FieldsPerRecord == 0 {
|
||||||
|
r.FieldsPerRecord = len(record)
|
||||||
|
}
|
||||||
|
return record, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadAll reads all the remaining records from r.
|
||||||
|
// Each record is a slice of fields.
|
||||||
|
func (r *Reader) ReadAll() (records [][]string, err os.Error) {
|
||||||
|
for {
|
||||||
|
record, err := r.Read()
|
||||||
|
if err == os.EOF {
|
||||||
|
return records, nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
records = append(records, record)
|
||||||
|
}
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
// readRune reads one rune from r, folding \r\n to \n and keeping track
|
||||||
|
// of our far into the line we have read. r.column will point to the start
|
||||||
|
// of this rune, not the end of this rune.
|
||||||
|
func (r *Reader) readRune() (int, os.Error) {
|
||||||
|
rune, _, err := r.r.ReadRune()
|
||||||
|
|
||||||
|
// Handle \r\n here. We make the simplifying assumption that
|
||||||
|
// anytime \r is followed by \n that it can be folded to \n.
|
||||||
|
// We will not detect files which contain both \r\n and bare \n.
|
||||||
|
if rune == '\r' {
|
||||||
|
rune, _, err = r.r.ReadRune()
|
||||||
|
if err == nil {
|
||||||
|
if rune != '\n' {
|
||||||
|
r.r.UnreadRune()
|
||||||
|
rune = '\r'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.column++
|
||||||
|
return rune, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// unreadRune puts the last rune read from r back.
|
||||||
|
func (r *Reader) unreadRune() {
|
||||||
|
r.r.UnreadRune()
|
||||||
|
r.column--
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip reads runes up to and including the rune delim or until error.
|
||||||
|
func (r *Reader) skip(delim int) os.Error {
|
||||||
|
for {
|
||||||
|
rune, err := r.readRune()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if rune == delim {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseRecord reads and parses a single csv record from r.
|
||||||
|
func (r *Reader) parseRecord() (fields []string, err os.Error) {
|
||||||
|
// Each record starts on a new line. We increment our line
|
||||||
|
// number (lines start at 1, not 0) and set column to -1
|
||||||
|
// so as we increment in readRune it points to the character we read.
|
||||||
|
r.line++
|
||||||
|
r.column = -1
|
||||||
|
|
||||||
|
// Peek at the first rune. If it is an error we are done.
|
||||||
|
// If we are support comments and it is the comment character
|
||||||
|
// the skip to the end of line.
|
||||||
|
|
||||||
|
rune, _, err := r.r.ReadRune()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Comment != 0 && rune == r.Comment {
|
||||||
|
return nil, r.skip('\n')
|
||||||
|
}
|
||||||
|
r.r.UnreadRune()
|
||||||
|
|
||||||
|
// At this point we have at least one field.
|
||||||
|
for {
|
||||||
|
haveField, delim, err := r.parseField()
|
||||||
|
if haveField {
|
||||||
|
fields = append(fields, r.field.String())
|
||||||
|
}
|
||||||
|
if delim == '\n' || err == os.EOF {
|
||||||
|
return fields, err
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
panic("unreachable")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// parseField parses the next field in the record. The read field is
|
||||||
|
// located in r.field. Delim is the first character not part of the field
|
||||||
|
// (r.Comma or '\n').
|
||||||
|
func (r *Reader) parseField() (haveField bool, delim int, err os.Error) {
|
||||||
|
r.field.Reset()
|
||||||
|
|
||||||
|
rune, err := r.readRune()
|
||||||
|
if err != nil {
|
||||||
|
// If we have EOF and are not at the start of a line
|
||||||
|
// then we return the empty field. We have already
|
||||||
|
// checked for trailing commas if needed.
|
||||||
|
if err == os.EOF && r.column != 0 {
|
||||||
|
return true, 0, err
|
||||||
|
}
|
||||||
|
return false, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.TrimLeadingSpace {
|
||||||
|
for unicode.IsSpace(rune) {
|
||||||
|
rune, err = r.readRune()
|
||||||
|
if err != nil {
|
||||||
|
return false, 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch rune {
|
||||||
|
case r.Comma:
|
||||||
|
// will check below
|
||||||
|
|
||||||
|
case '\n':
|
||||||
|
// We are a trailing empty field or a blank linke
|
||||||
|
if r.column == 0 {
|
||||||
|
return false, rune, nil
|
||||||
|
}
|
||||||
|
return true, rune, nil
|
||||||
|
|
||||||
|
case '"':
|
||||||
|
// quoted field
|
||||||
|
Quoted:
|
||||||
|
for {
|
||||||
|
rune, err = r.readRune()
|
||||||
|
if err != nil {
|
||||||
|
if err == os.EOF {
|
||||||
|
if r.LazyQuotes {
|
||||||
|
return true, 0, err
|
||||||
|
}
|
||||||
|
return false, 0, r.error(ErrQuote)
|
||||||
|
}
|
||||||
|
return false, 0, err
|
||||||
|
}
|
||||||
|
switch rune {
|
||||||
|
case '"':
|
||||||
|
rune, err = r.readRune()
|
||||||
|
if err != nil || rune == r.Comma {
|
||||||
|
break Quoted
|
||||||
|
}
|
||||||
|
if rune == '\n' {
|
||||||
|
return true, rune, nil
|
||||||
|
}
|
||||||
|
if rune != '"' {
|
||||||
|
if !r.LazyQuotes {
|
||||||
|
r.column--
|
||||||
|
return false, 0, r.error(ErrQuote)
|
||||||
|
}
|
||||||
|
// accept the bare quote
|
||||||
|
r.field.WriteRune('"')
|
||||||
|
}
|
||||||
|
case '\n':
|
||||||
|
r.line++
|
||||||
|
r.column = -1
|
||||||
|
}
|
||||||
|
r.field.WriteRune(rune)
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
// unquoted field
|
||||||
|
for {
|
||||||
|
r.field.WriteRune(rune)
|
||||||
|
rune, err = r.readRune()
|
||||||
|
if err != nil || rune == r.Comma {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if rune == '\n' {
|
||||||
|
return true, rune, nil
|
||||||
|
}
|
||||||
|
if !r.LazyQuotes && rune == '"' {
|
||||||
|
return false, 0, r.error(ErrBareQuote)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
if err == os.EOF {
|
||||||
|
return true, 0, err
|
||||||
|
}
|
||||||
|
return false, 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !r.TrailingComma {
|
||||||
|
// We don't allow trailing commas. See if we
|
||||||
|
// are at the end of the line (being mindful
|
||||||
|
// of triming spaces
|
||||||
|
c := r.column
|
||||||
|
rune, err = r.readRune()
|
||||||
|
if r.TrimLeadingSpace {
|
||||||
|
for unicode.IsSpace(rune) {
|
||||||
|
rune, err = r.readRune()
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err == os.EOF || rune == '\n' {
|
||||||
|
r.column = c // report the comma
|
||||||
|
return false, 0, r.error(ErrTrailingComma)
|
||||||
|
}
|
||||||
|
r.unreadRune()
|
||||||
|
}
|
||||||
|
return true, rune, nil
|
||||||
|
}
|
265
src/pkg/csv/reader_test.go
Normal file
265
src/pkg/csv/reader_test.go
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
var readTests = []struct {
|
||||||
|
Name string
|
||||||
|
Input string
|
||||||
|
Output [][]string
|
||||||
|
UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
|
||||||
|
|
||||||
|
// These fields are copied into the Reader
|
||||||
|
Comma int
|
||||||
|
Comment int
|
||||||
|
FieldsPerRecord int
|
||||||
|
LazyQuotes bool
|
||||||
|
TrailingComma bool
|
||||||
|
TrimLeadingSpace bool
|
||||||
|
|
||||||
|
Error string
|
||||||
|
Line int // Expected error line if != 0
|
||||||
|
Column int // Expected error column if line != 0
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Name: "Simple",
|
||||||
|
Input: "a,b,c\n",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "CRLF",
|
||||||
|
Input: "a,b\r\nc,d\r\n",
|
||||||
|
Output: [][]string{{"a", "b"}, {"c", "d"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BareCR",
|
||||||
|
Input: "a,b\rc,d\r\n",
|
||||||
|
Output: [][]string{{"a", "b\rc", "d"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "RFC4180test",
|
||||||
|
UseFieldsPerRecord: true,
|
||||||
|
Input: `#field1,field2,field3
|
||||||
|
"aaa","bb
|
||||||
|
b","ccc"
|
||||||
|
"a,a","b""bb","ccc"
|
||||||
|
zzz,yyy,xxx
|
||||||
|
`,
|
||||||
|
Output: [][]string{
|
||||||
|
{"#field1", "field2", "field3"},
|
||||||
|
{"aaa", "bb\nb", "ccc"},
|
||||||
|
{"a,a", `b"bb`, "ccc"},
|
||||||
|
{"zzz", "yyy", "xxx"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "NoEOLTest",
|
||||||
|
Input: "a,b,c",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Semicolon",
|
||||||
|
Comma: ';',
|
||||||
|
Input: "a;b;c\n",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "MultiLine",
|
||||||
|
Input: `"two
|
||||||
|
line","one line","three
|
||||||
|
line
|
||||||
|
field"`,
|
||||||
|
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BlankLine",
|
||||||
|
Input: "a,b,c\n\nd,e,f\n\n",
|
||||||
|
Output: [][]string{
|
||||||
|
{"a", "b", "c"},
|
||||||
|
{"d", "e", "f"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "TrimSpace",
|
||||||
|
Input: " a, b, c\n",
|
||||||
|
TrimLeadingSpace: true,
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "LeadingSpace",
|
||||||
|
Input: " a, b, c\n",
|
||||||
|
Output: [][]string{{" a", " b", " c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Comment",
|
||||||
|
Comment: '#',
|
||||||
|
Input: "#1,2,3\na,b,c\n#comment",
|
||||||
|
Output: [][]string{{"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "NoComment",
|
||||||
|
Input: "#1,2,3\na,b,c",
|
||||||
|
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "LazyQuotes",
|
||||||
|
LazyQuotes: true,
|
||||||
|
Input: `a "word","1"2",a","b`,
|
||||||
|
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BareQuotes",
|
||||||
|
LazyQuotes: true,
|
||||||
|
Input: `a "word","1"2",a"`,
|
||||||
|
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BareDoubleQuotes",
|
||||||
|
LazyQuotes: true,
|
||||||
|
Input: `a""b,c`,
|
||||||
|
Output: [][]string{{`a""b`, `c`}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadDoubleQuotes",
|
||||||
|
Input: `a""b,c`,
|
||||||
|
Output: [][]string{{`a""b`, `c`}},
|
||||||
|
Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "TrimQuote",
|
||||||
|
Input: ` "a"," b",c`,
|
||||||
|
TrimLeadingSpace: true,
|
||||||
|
Output: [][]string{{"a", " b", "c"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadBareQuote",
|
||||||
|
Input: `a "word","b"`,
|
||||||
|
Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadTrailingQuote",
|
||||||
|
Input: `"a word",b"`,
|
||||||
|
Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "ExtraneousQuote",
|
||||||
|
Input: `"a "word","b"`,
|
||||||
|
Error: `extraneous " in field`, Line: 1, Column: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadFieldCount",
|
||||||
|
UseFieldsPerRecord: true,
|
||||||
|
Input: "a,b,c\nd,e",
|
||||||
|
Error: "wrong number of fields", Line: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadFieldCount1",
|
||||||
|
UseFieldsPerRecord: true,
|
||||||
|
FieldsPerRecord: 2,
|
||||||
|
Input: `a,b,c`,
|
||||||
|
Error: "wrong number of fields", Line: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "FieldCount",
|
||||||
|
Input: "a,b,c\nd,e",
|
||||||
|
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadTrailingCommaEOF",
|
||||||
|
Input: "a,b,c,",
|
||||||
|
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadTrailingCommaEOL",
|
||||||
|
Input: "a,b,c,\n",
|
||||||
|
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadTrailingCommaSpaceEOF",
|
||||||
|
TrimLeadingSpace: true,
|
||||||
|
Input: "a,b,c, ",
|
||||||
|
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadTrailingCommaSpaceEOL",
|
||||||
|
TrimLeadingSpace: true,
|
||||||
|
Input: "a,b,c, \n",
|
||||||
|
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "BadTrailingCommaLine3",
|
||||||
|
TrimLeadingSpace: true,
|
||||||
|
Input: "a,b,c\nd,e,f\ng,hi,",
|
||||||
|
Error: "extra delimiter at end of line", Line: 3, Column: 4,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "NotTrailingComma3",
|
||||||
|
Input: "a,b,c, \n",
|
||||||
|
Output: [][]string{{"a", "b", "c", " "}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "CommaFieldTest",
|
||||||
|
TrailingComma: true,
|
||||||
|
Input: `x,y,z,w
|
||||||
|
x,y,z,
|
||||||
|
x,y,,
|
||||||
|
x,,,
|
||||||
|
,,,
|
||||||
|
"x","y","z","w"
|
||||||
|
"x","y","z",""
|
||||||
|
"x","y","",""
|
||||||
|
"x","","",""
|
||||||
|
"","","",""
|
||||||
|
`,
|
||||||
|
Output: [][]string{
|
||||||
|
{"x", "y", "z", "w"},
|
||||||
|
{"x", "y", "z", ""},
|
||||||
|
{"x", "y", "", ""},
|
||||||
|
{"x", "", "", ""},
|
||||||
|
{"", "", "", ""},
|
||||||
|
{"x", "y", "z", "w"},
|
||||||
|
{"x", "y", "z", ""},
|
||||||
|
{"x", "y", "", ""},
|
||||||
|
{"x", "", "", ""},
|
||||||
|
{"", "", "", ""},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRead(t *testing.T) {
|
||||||
|
for _, tt := range readTests {
|
||||||
|
r := NewReader(strings.NewReader(tt.Input))
|
||||||
|
r.Comment = tt.Comment
|
||||||
|
if tt.UseFieldsPerRecord {
|
||||||
|
r.FieldsPerRecord = tt.FieldsPerRecord
|
||||||
|
} else {
|
||||||
|
r.FieldsPerRecord = -1
|
||||||
|
}
|
||||||
|
r.LazyQuotes = tt.LazyQuotes
|
||||||
|
r.TrailingComma = tt.TrailingComma
|
||||||
|
r.TrimLeadingSpace = tt.TrimLeadingSpace
|
||||||
|
if tt.Comma != 0 {
|
||||||
|
r.Comma = tt.Comma
|
||||||
|
}
|
||||||
|
out, err := r.ReadAll()
|
||||||
|
perr, _ := err.(*ParseError)
|
||||||
|
if tt.Error != "" {
|
||||||
|
if err == nil || !strings.Contains(err.String(), tt.Error) {
|
||||||
|
t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
|
||||||
|
} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
|
||||||
|
t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
|
||||||
|
}
|
||||||
|
} else if err != nil {
|
||||||
|
t.Errorf("%s: unexpected error %v", tt.Name, err)
|
||||||
|
} else if !reflect.DeepEqual(out, tt.Output) {
|
||||||
|
t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
123
src/pkg/csv/writer.go
Normal file
123
src/pkg/csv/writer.go
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
"utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A Writer writes records to a CSV encoded file.
|
||||||
|
//
|
||||||
|
// As returned by NewWriter, a Writer writes records terminated by a
|
||||||
|
// newline and uses ',' as the field delimiter. The exported fields can be
|
||||||
|
// changed to customize the details before the first call to Write or WriteAll.
|
||||||
|
//
|
||||||
|
// Comma is the field delimiter.
|
||||||
|
//
|
||||||
|
// If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
|
||||||
|
// just \n is written.
|
||||||
|
type Writer struct {
|
||||||
|
Comma int // Field delimiter (set to to ',' by NewWriter)
|
||||||
|
UseCRLF bool // True to use \r\n as the line terminator
|
||||||
|
w *bufio.Writer
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWriter returns a new Writer that writes to w.
|
||||||
|
func NewWriter(w io.Writer) *Writer {
|
||||||
|
return &Writer{
|
||||||
|
Comma: ',',
|
||||||
|
w: bufio.NewWriter(w),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writer writes a single CSV record to w along with any necessary quoting.
|
||||||
|
// A record is a slice of strings with each string being one field.
|
||||||
|
func (w *Writer) Write(record []string) (err os.Error) {
|
||||||
|
for n, field := range record {
|
||||||
|
if n > 0 {
|
||||||
|
if _, err = w.w.WriteRune(w.Comma); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we don't have to have a quoted field then just
|
||||||
|
// write out the field and continue to the next field.
|
||||||
|
if !w.fieldNeedsQuotes(field) {
|
||||||
|
if _, err = w.w.WriteString(field); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err = w.w.WriteByte('"'); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, rune := range field {
|
||||||
|
switch rune {
|
||||||
|
case '"':
|
||||||
|
_, err = w.w.WriteString(`""`)
|
||||||
|
case '\r':
|
||||||
|
if !w.UseCRLF {
|
||||||
|
err = w.w.WriteByte('\r')
|
||||||
|
}
|
||||||
|
case '\n':
|
||||||
|
if w.UseCRLF {
|
||||||
|
_, err = w.w.WriteString("\r\n")
|
||||||
|
} else {
|
||||||
|
err = w.w.WriteByte('\n')
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
_, err = w.w.WriteRune(rune)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = w.w.WriteByte('"'); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if w.UseCRLF {
|
||||||
|
_, err = w.w.WriteString("\r\n")
|
||||||
|
} else {
|
||||||
|
err = w.w.WriteByte('\n')
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush writes any buffered data to the underlying io.Writer.
|
||||||
|
func (w *Writer) Flush() {
|
||||||
|
w.w.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteAll writes multiple CSV records to w using Write and then calls Flush.
|
||||||
|
func (w *Writer) WriteAll(records [][]string) (err os.Error) {
|
||||||
|
for _, record := range records {
|
||||||
|
err = w.Write(record)
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.Flush()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fieldNeedsQuotes returns true if our field must be enclosed in quotes.
|
||||||
|
// Empty fields, files with a Comma, fields with a quote or newline, and
|
||||||
|
// fields which start with a space must be enclosed in quotes.
|
||||||
|
func (w *Writer) fieldNeedsQuotes(field string) bool {
|
||||||
|
if len(field) == 0 || strings.IndexRune(field, w.Comma) >= 0 || strings.IndexAny(field, "\"\r\n") >= 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
rune, _ := utf8.DecodeRuneInString(field)
|
||||||
|
return unicode.IsSpace(rune)
|
||||||
|
}
|
44
src/pkg/csv/writer_test.go
Normal file
44
src/pkg/csv/writer_test.go
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
// Copyright 2011 The Go Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package csv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
var writeTests = []struct {
|
||||||
|
Input [][]string
|
||||||
|
Output string
|
||||||
|
UseCRLF bool
|
||||||
|
}{
|
||||||
|
{Input: [][]string{{"abc"}}, Output: "abc\n"},
|
||||||
|
{Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true},
|
||||||
|
{Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"},
|
||||||
|
{Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"},
|
||||||
|
{Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"},
|
||||||
|
{Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"},
|
||||||
|
{Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"},
|
||||||
|
{Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"},
|
||||||
|
{Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"},
|
||||||
|
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"},
|
||||||
|
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWrite(t *testing.T) {
|
||||||
|
for n, tt := range writeTests {
|
||||||
|
b := &bytes.Buffer{}
|
||||||
|
f := NewWriter(b)
|
||||||
|
f.UseCRLF = tt.UseCRLF
|
||||||
|
err := f.WriteAll(tt.Input)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Unexpected error: %s\n", err)
|
||||||
|
}
|
||||||
|
out := b.String()
|
||||||
|
if out != tt.Output {
|
||||||
|
t.Errorf("#%d: out=%q want %q", n, out, tt.Output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user