mirror of
https://github.com/golang/go
synced 2024-11-21 20:24:50 -07:00
csv: new package
csv reader/writer based on RFC 4180 R=rsc, mattn.jp, r, dchest CC=golang-dev https://golang.org/cl/4629085
This commit is contained in:
parent
21752bc130
commit
00f7cd4b36
@ -62,6 +62,7 @@ DIRS=\
|
||||
crypto/x509\
|
||||
crypto/x509/pkix\
|
||||
crypto/xtea\
|
||||
csv\
|
||||
debug/dwarf\
|
||||
debug/macho\
|
||||
debug/elf\
|
||||
|
12
src/pkg/csv/Makefile
Normal file
12
src/pkg/csv/Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
# Copyright 2011 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../Make.inc
|
||||
|
||||
TARG=csv
|
||||
GOFILES=\
|
||||
reader.go\
|
||||
writer.go\
|
||||
|
||||
include ../../Make.pkg
|
373
src/pkg/csv/reader.go
Normal file
373
src/pkg/csv/reader.go
Normal file
@ -0,0 +1,373 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package csv reads and writes comma-separated values (CSV) files.
|
||||
//
|
||||
// A csv file contains zero or more records of one or more fields per record.
|
||||
// Each record is separated by the newline character. The final record may
|
||||
// optionally be followed by a newline character.
|
||||
//
|
||||
// field1,field2,field3
|
||||
//
|
||||
// White space is considered part of a field.
|
||||
//
|
||||
// Carriage returns before newline characters are silently removed.
|
||||
//
|
||||
// Blank lines are ignored. A line with only whitespace characters (excluding
|
||||
// the ending newline character) is not considered a blank line.
|
||||
//
|
||||
// Fields which start and stop with the quote character " are called
|
||||
// quoted-fields. The beginning and ending quote are not part of the
|
||||
// field.
|
||||
//
|
||||
// The source:
|
||||
//
|
||||
// normal string,"quoted-field"
|
||||
//
|
||||
// results in the fields
|
||||
//
|
||||
// {`normal string`, `quoted-field`}
|
||||
//
|
||||
// Within a quoted-field a quote character followed by a second quote
|
||||
// character is considered a single quote.
|
||||
//
|
||||
// "the ""word"" is true","a ""quoted-field"""
|
||||
//
|
||||
// results in
|
||||
//
|
||||
// {`the "word" is true`, `a "quoted-field"`}
|
||||
//
|
||||
// Newlines and commas may be included in a quoted-field
|
||||
//
|
||||
// "Multi-line
|
||||
// field","comma is ,"
|
||||
//
|
||||
// results in
|
||||
//
|
||||
// {`Multi-line
|
||||
// field`, `comma is ,`}
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// A ParseError is returned for parsing errors.
|
||||
// The first line is 1. The first column is 0.
|
||||
type ParseError struct {
|
||||
Line int // Line where the error occurred
|
||||
Column int // Column (rune index) where the error occurred
|
||||
Error os.Error // The actual error
|
||||
}
|
||||
|
||||
func (e *ParseError) String() string {
|
||||
return fmt.Sprintf("line %d, column %d: %s", e.Line, e.Column, e.Error)
|
||||
}
|
||||
|
||||
// These are the errors that can be returned in ParseError.Error
|
||||
var (
|
||||
ErrTrailingComma = os.NewError("extra delimiter at end of line")
|
||||
ErrBareQuote = os.NewError("bare \" in non-quoted-field")
|
||||
ErrQuote = os.NewError("extraneous \" in field")
|
||||
ErrFieldCount = os.NewError("wrong number of fields in line")
|
||||
)
|
||||
|
||||
// A Reader reads records from a CSV-encoded file.
|
||||
//
|
||||
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
|
||||
// The exported fields can be changed to customize the details before the
|
||||
// first call to Read or ReadAll.
|
||||
//
|
||||
// Comma is the field delimiter. It defaults to ','.
|
||||
//
|
||||
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||
// Comment character is ignored.
|
||||
//
|
||||
// If FieldsPerRecord is positive, Read requires each record to
|
||||
// have the given number of fields. If FieldsPerRecord is 0, Read sets it to
|
||||
// the number of fields in the first record, so that future records must
|
||||
// have the same field count.
|
||||
//
|
||||
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||
// non-doubled quote may appear in a quoted field.
|
||||
//
|
||||
// If TrailingComma is true, the last field may be a unquoted empty field.
|
||||
//
|
||||
// If TrimLeadingSpace is true, leading white space in a field is ignored.
|
||||
type Reader struct {
|
||||
Comma int // Field delimiter (set to ',' by NewReader)
|
||||
Comment int // Comment character for start of line
|
||||
FieldsPerRecord int // Number of expected fields per record
|
||||
LazyQuotes bool // Allow lazy quotes
|
||||
TrailingComma bool // Allow trailing comma
|
||||
TrimLeadingSpace bool // Trim leading space
|
||||
line int
|
||||
column int
|
||||
r *bufio.Reader
|
||||
field bytes.Buffer
|
||||
}
|
||||
|
||||
// NewReader returns a new Reader that reads from r.
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
Comma: ',',
|
||||
r: bufio.NewReader(r),
|
||||
}
|
||||
}
|
||||
|
||||
// error creates a new ParseError based on err.
|
||||
func (r *Reader) error(err os.Error) os.Error {
|
||||
return &ParseError{
|
||||
Line: r.line,
|
||||
Column: r.column,
|
||||
Error: err,
|
||||
}
|
||||
}
|
||||
|
||||
// Read reads one record from r. The record is a slice of strings with each
|
||||
// string representing one field.
|
||||
func (r *Reader) Read() (record []string, err os.Error) {
|
||||
for {
|
||||
record, err = r.parseRecord()
|
||||
if record != nil {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if r.FieldsPerRecord > 0 {
|
||||
if len(record) != r.FieldsPerRecord {
|
||||
r.column = 0 // report at start of record
|
||||
return record, r.error(ErrFieldCount)
|
||||
}
|
||||
} else if r.FieldsPerRecord == 0 {
|
||||
r.FieldsPerRecord = len(record)
|
||||
}
|
||||
return record, nil
|
||||
}
|
||||
|
||||
// ReadAll reads all the remaining records from r.
|
||||
// Each record is a slice of fields.
|
||||
func (r *Reader) ReadAll() (records [][]string, err os.Error) {
|
||||
for {
|
||||
record, err := r.Read()
|
||||
if err == os.EOF {
|
||||
return records, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
records = append(records, record)
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// readRune reads one rune from r, folding \r\n to \n and keeping track
|
||||
// of our far into the line we have read. r.column will point to the start
|
||||
// of this rune, not the end of this rune.
|
||||
func (r *Reader) readRune() (int, os.Error) {
|
||||
rune, _, err := r.r.ReadRune()
|
||||
|
||||
// Handle \r\n here. We make the simplifying assumption that
|
||||
// anytime \r is followed by \n that it can be folded to \n.
|
||||
// We will not detect files which contain both \r\n and bare \n.
|
||||
if rune == '\r' {
|
||||
rune, _, err = r.r.ReadRune()
|
||||
if err == nil {
|
||||
if rune != '\n' {
|
||||
r.r.UnreadRune()
|
||||
rune = '\r'
|
||||
}
|
||||
}
|
||||
}
|
||||
r.column++
|
||||
return rune, err
|
||||
}
|
||||
|
||||
// unreadRune puts the last rune read from r back.
|
||||
func (r *Reader) unreadRune() {
|
||||
r.r.UnreadRune()
|
||||
r.column--
|
||||
}
|
||||
|
||||
// skip reads runes up to and including the rune delim or until error.
|
||||
func (r *Reader) skip(delim int) os.Error {
|
||||
for {
|
||||
rune, err := r.readRune()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if rune == delim {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// parseRecord reads and parses a single csv record from r.
|
||||
func (r *Reader) parseRecord() (fields []string, err os.Error) {
|
||||
// Each record starts on a new line. We increment our line
|
||||
// number (lines start at 1, not 0) and set column to -1
|
||||
// so as we increment in readRune it points to the character we read.
|
||||
r.line++
|
||||
r.column = -1
|
||||
|
||||
// Peek at the first rune. If it is an error we are done.
|
||||
// If we are support comments and it is the comment character
|
||||
// the skip to the end of line.
|
||||
|
||||
rune, _, err := r.r.ReadRune()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if r.Comment != 0 && rune == r.Comment {
|
||||
return nil, r.skip('\n')
|
||||
}
|
||||
r.r.UnreadRune()
|
||||
|
||||
// At this point we have at least one field.
|
||||
for {
|
||||
haveField, delim, err := r.parseField()
|
||||
if haveField {
|
||||
fields = append(fields, r.field.String())
|
||||
}
|
||||
if delim == '\n' || err == os.EOF {
|
||||
return fields, err
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
|
||||
// parseField parses the next field in the record. The read field is
|
||||
// located in r.field. Delim is the first character not part of the field
|
||||
// (r.Comma or '\n').
|
||||
func (r *Reader) parseField() (haveField bool, delim int, err os.Error) {
|
||||
r.field.Reset()
|
||||
|
||||
rune, err := r.readRune()
|
||||
if err != nil {
|
||||
// If we have EOF and are not at the start of a line
|
||||
// then we return the empty field. We have already
|
||||
// checked for trailing commas if needed.
|
||||
if err == os.EOF && r.column != 0 {
|
||||
return true, 0, err
|
||||
}
|
||||
return false, 0, err
|
||||
}
|
||||
|
||||
if r.TrimLeadingSpace {
|
||||
for unicode.IsSpace(rune) {
|
||||
rune, err = r.readRune()
|
||||
if err != nil {
|
||||
return false, 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch rune {
|
||||
case r.Comma:
|
||||
// will check below
|
||||
|
||||
case '\n':
|
||||
// We are a trailing empty field or a blank linke
|
||||
if r.column == 0 {
|
||||
return false, rune, nil
|
||||
}
|
||||
return true, rune, nil
|
||||
|
||||
case '"':
|
||||
// quoted field
|
||||
Quoted:
|
||||
for {
|
||||
rune, err = r.readRune()
|
||||
if err != nil {
|
||||
if err == os.EOF {
|
||||
if r.LazyQuotes {
|
||||
return true, 0, err
|
||||
}
|
||||
return false, 0, r.error(ErrQuote)
|
||||
}
|
||||
return false, 0, err
|
||||
}
|
||||
switch rune {
|
||||
case '"':
|
||||
rune, err = r.readRune()
|
||||
if err != nil || rune == r.Comma {
|
||||
break Quoted
|
||||
}
|
||||
if rune == '\n' {
|
||||
return true, rune, nil
|
||||
}
|
||||
if rune != '"' {
|
||||
if !r.LazyQuotes {
|
||||
r.column--
|
||||
return false, 0, r.error(ErrQuote)
|
||||
}
|
||||
// accept the bare quote
|
||||
r.field.WriteRune('"')
|
||||
}
|
||||
case '\n':
|
||||
r.line++
|
||||
r.column = -1
|
||||
}
|
||||
r.field.WriteRune(rune)
|
||||
}
|
||||
|
||||
default:
|
||||
// unquoted field
|
||||
for {
|
||||
r.field.WriteRune(rune)
|
||||
rune, err = r.readRune()
|
||||
if err != nil || rune == r.Comma {
|
||||
break
|
||||
}
|
||||
if rune == '\n' {
|
||||
return true, rune, nil
|
||||
}
|
||||
if !r.LazyQuotes && rune == '"' {
|
||||
return false, 0, r.error(ErrBareQuote)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if err == os.EOF {
|
||||
return true, 0, err
|
||||
}
|
||||
return false, 0, err
|
||||
}
|
||||
|
||||
if !r.TrailingComma {
|
||||
// We don't allow trailing commas. See if we
|
||||
// are at the end of the line (being mindful
|
||||
// of triming spaces
|
||||
c := r.column
|
||||
rune, err = r.readRune()
|
||||
if r.TrimLeadingSpace {
|
||||
for unicode.IsSpace(rune) {
|
||||
rune, err = r.readRune()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == os.EOF || rune == '\n' {
|
||||
r.column = c // report the comma
|
||||
return false, 0, r.error(ErrTrailingComma)
|
||||
}
|
||||
r.unreadRune()
|
||||
}
|
||||
return true, rune, nil
|
||||
}
|
265
src/pkg/csv/reader_test.go
Normal file
265
src/pkg/csv/reader_test.go
Normal file
@ -0,0 +1,265 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var readTests = []struct {
|
||||
Name string
|
||||
Input string
|
||||
Output [][]string
|
||||
UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
|
||||
|
||||
// These fields are copied into the Reader
|
||||
Comma int
|
||||
Comment int
|
||||
FieldsPerRecord int
|
||||
LazyQuotes bool
|
||||
TrailingComma bool
|
||||
TrimLeadingSpace bool
|
||||
|
||||
Error string
|
||||
Line int // Expected error line if != 0
|
||||
Column int // Expected error column if line != 0
|
||||
}{
|
||||
{
|
||||
Name: "Simple",
|
||||
Input: "a,b,c\n",
|
||||
Output: [][]string{{"a", "b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "CRLF",
|
||||
Input: "a,b\r\nc,d\r\n",
|
||||
Output: [][]string{{"a", "b"}, {"c", "d"}},
|
||||
},
|
||||
{
|
||||
Name: "BareCR",
|
||||
Input: "a,b\rc,d\r\n",
|
||||
Output: [][]string{{"a", "b\rc", "d"}},
|
||||
},
|
||||
{
|
||||
Name: "RFC4180test",
|
||||
UseFieldsPerRecord: true,
|
||||
Input: `#field1,field2,field3
|
||||
"aaa","bb
|
||||
b","ccc"
|
||||
"a,a","b""bb","ccc"
|
||||
zzz,yyy,xxx
|
||||
`,
|
||||
Output: [][]string{
|
||||
{"#field1", "field2", "field3"},
|
||||
{"aaa", "bb\nb", "ccc"},
|
||||
{"a,a", `b"bb`, "ccc"},
|
||||
{"zzz", "yyy", "xxx"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "NoEOLTest",
|
||||
Input: "a,b,c",
|
||||
Output: [][]string{{"a", "b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "Semicolon",
|
||||
Comma: ';',
|
||||
Input: "a;b;c\n",
|
||||
Output: [][]string{{"a", "b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "MultiLine",
|
||||
Input: `"two
|
||||
line","one line","three
|
||||
line
|
||||
field"`,
|
||||
Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
|
||||
},
|
||||
{
|
||||
Name: "BlankLine",
|
||||
Input: "a,b,c\n\nd,e,f\n\n",
|
||||
Output: [][]string{
|
||||
{"a", "b", "c"},
|
||||
{"d", "e", "f"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "TrimSpace",
|
||||
Input: " a, b, c\n",
|
||||
TrimLeadingSpace: true,
|
||||
Output: [][]string{{"a", "b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "LeadingSpace",
|
||||
Input: " a, b, c\n",
|
||||
Output: [][]string{{" a", " b", " c"}},
|
||||
},
|
||||
{
|
||||
Name: "Comment",
|
||||
Comment: '#',
|
||||
Input: "#1,2,3\na,b,c\n#comment",
|
||||
Output: [][]string{{"a", "b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "NoComment",
|
||||
Input: "#1,2,3\na,b,c",
|
||||
Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "LazyQuotes",
|
||||
LazyQuotes: true,
|
||||
Input: `a "word","1"2",a","b`,
|
||||
Output: [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
|
||||
},
|
||||
{
|
||||
Name: "BareQuotes",
|
||||
LazyQuotes: true,
|
||||
Input: `a "word","1"2",a"`,
|
||||
Output: [][]string{{`a "word"`, `1"2`, `a"`}},
|
||||
},
|
||||
{
|
||||
Name: "BareDoubleQuotes",
|
||||
LazyQuotes: true,
|
||||
Input: `a""b,c`,
|
||||
Output: [][]string{{`a""b`, `c`}},
|
||||
},
|
||||
{
|
||||
Name: "BadDoubleQuotes",
|
||||
Input: `a""b,c`,
|
||||
Output: [][]string{{`a""b`, `c`}},
|
||||
Error: `bare " in non-quoted-field`, Line: 1, Column: 1,
|
||||
},
|
||||
{
|
||||
Name: "TrimQuote",
|
||||
Input: ` "a"," b",c`,
|
||||
TrimLeadingSpace: true,
|
||||
Output: [][]string{{"a", " b", "c"}},
|
||||
},
|
||||
{
|
||||
Name: "BadBareQuote",
|
||||
Input: `a "word","b"`,
|
||||
Error: `bare " in non-quoted-field`, Line: 1, Column: 2,
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingQuote",
|
||||
Input: `"a word",b"`,
|
||||
Error: `bare " in non-quoted-field`, Line: 1, Column: 10,
|
||||
},
|
||||
{
|
||||
Name: "ExtraneousQuote",
|
||||
Input: `"a "word","b"`,
|
||||
Error: `extraneous " in field`, Line: 1, Column: 3,
|
||||
},
|
||||
{
|
||||
Name: "BadFieldCount",
|
||||
UseFieldsPerRecord: true,
|
||||
Input: "a,b,c\nd,e",
|
||||
Error: "wrong number of fields", Line: 2,
|
||||
},
|
||||
{
|
||||
Name: "BadFieldCount1",
|
||||
UseFieldsPerRecord: true,
|
||||
FieldsPerRecord: 2,
|
||||
Input: `a,b,c`,
|
||||
Error: "wrong number of fields", Line: 1,
|
||||
},
|
||||
{
|
||||
Name: "FieldCount",
|
||||
Input: "a,b,c\nd,e",
|
||||
Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingCommaEOF",
|
||||
Input: "a,b,c,",
|
||||
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingCommaEOL",
|
||||
Input: "a,b,c,\n",
|
||||
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingCommaSpaceEOF",
|
||||
TrimLeadingSpace: true,
|
||||
Input: "a,b,c, ",
|
||||
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingCommaSpaceEOL",
|
||||
TrimLeadingSpace: true,
|
||||
Input: "a,b,c, \n",
|
||||
Error: "extra delimiter at end of line", Line: 1, Column: 5,
|
||||
},
|
||||
{
|
||||
Name: "BadTrailingCommaLine3",
|
||||
TrimLeadingSpace: true,
|
||||
Input: "a,b,c\nd,e,f\ng,hi,",
|
||||
Error: "extra delimiter at end of line", Line: 3, Column: 4,
|
||||
},
|
||||
{
|
||||
Name: "NotTrailingComma3",
|
||||
Input: "a,b,c, \n",
|
||||
Output: [][]string{{"a", "b", "c", " "}},
|
||||
},
|
||||
{
|
||||
Name: "CommaFieldTest",
|
||||
TrailingComma: true,
|
||||
Input: `x,y,z,w
|
||||
x,y,z,
|
||||
x,y,,
|
||||
x,,,
|
||||
,,,
|
||||
"x","y","z","w"
|
||||
"x","y","z",""
|
||||
"x","y","",""
|
||||
"x","","",""
|
||||
"","","",""
|
||||
`,
|
||||
Output: [][]string{
|
||||
{"x", "y", "z", "w"},
|
||||
{"x", "y", "z", ""},
|
||||
{"x", "y", "", ""},
|
||||
{"x", "", "", ""},
|
||||
{"", "", "", ""},
|
||||
{"x", "y", "z", "w"},
|
||||
{"x", "y", "z", ""},
|
||||
{"x", "y", "", ""},
|
||||
{"x", "", "", ""},
|
||||
{"", "", "", ""},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestRead(t *testing.T) {
|
||||
for _, tt := range readTests {
|
||||
r := NewReader(strings.NewReader(tt.Input))
|
||||
r.Comment = tt.Comment
|
||||
if tt.UseFieldsPerRecord {
|
||||
r.FieldsPerRecord = tt.FieldsPerRecord
|
||||
} else {
|
||||
r.FieldsPerRecord = -1
|
||||
}
|
||||
r.LazyQuotes = tt.LazyQuotes
|
||||
r.TrailingComma = tt.TrailingComma
|
||||
r.TrimLeadingSpace = tt.TrimLeadingSpace
|
||||
if tt.Comma != 0 {
|
||||
r.Comma = tt.Comma
|
||||
}
|
||||
out, err := r.ReadAll()
|
||||
perr, _ := err.(*ParseError)
|
||||
if tt.Error != "" {
|
||||
if err == nil || !strings.Contains(err.String(), tt.Error) {
|
||||
t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
|
||||
} else if tt.Line != 0 && (tt.Line != perr.Line || tt.Column != perr.Column) {
|
||||
t.Errorf("%s: error at %d:%d expected %d:%d", tt.Name, perr.Line, perr.Column, tt.Line, tt.Column)
|
||||
}
|
||||
} else if err != nil {
|
||||
t.Errorf("%s: unexpected error %v", tt.Name, err)
|
||||
} else if !reflect.DeepEqual(out, tt.Output) {
|
||||
t.Errorf("%s: out=%q want %q", tt.Name, out, tt.Output)
|
||||
}
|
||||
}
|
||||
}
|
123
src/pkg/csv/writer.go
Normal file
123
src/pkg/csv/writer.go
Normal file
@ -0,0 +1,123 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"unicode"
|
||||
"utf8"
|
||||
)
|
||||
|
||||
// A Writer writes records to a CSV encoded file.
|
||||
//
|
||||
// As returned by NewWriter, a Writer writes records terminated by a
|
||||
// newline and uses ',' as the field delimiter. The exported fields can be
|
||||
// changed to customize the details before the first call to Write or WriteAll.
|
||||
//
|
||||
// Comma is the field delimiter.
|
||||
//
|
||||
// If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
|
||||
// just \n is written.
|
||||
type Writer struct {
|
||||
Comma int // Field delimiter (set to to ',' by NewWriter)
|
||||
UseCRLF bool // True to use \r\n as the line terminator
|
||||
w *bufio.Writer
|
||||
}
|
||||
|
||||
// NewWriter returns a new Writer that writes to w.
|
||||
func NewWriter(w io.Writer) *Writer {
|
||||
return &Writer{
|
||||
Comma: ',',
|
||||
w: bufio.NewWriter(w),
|
||||
}
|
||||
}
|
||||
|
||||
// Writer writes a single CSV record to w along with any necessary quoting.
|
||||
// A record is a slice of strings with each string being one field.
|
||||
func (w *Writer) Write(record []string) (err os.Error) {
|
||||
for n, field := range record {
|
||||
if n > 0 {
|
||||
if _, err = w.w.WriteRune(w.Comma); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// If we don't have to have a quoted field then just
|
||||
// write out the field and continue to the next field.
|
||||
if !w.fieldNeedsQuotes(field) {
|
||||
if _, err = w.w.WriteString(field); err != nil {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err = w.w.WriteByte('"'); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, rune := range field {
|
||||
switch rune {
|
||||
case '"':
|
||||
_, err = w.w.WriteString(`""`)
|
||||
case '\r':
|
||||
if !w.UseCRLF {
|
||||
err = w.w.WriteByte('\r')
|
||||
}
|
||||
case '\n':
|
||||
if w.UseCRLF {
|
||||
_, err = w.w.WriteString("\r\n")
|
||||
} else {
|
||||
err = w.w.WriteByte('\n')
|
||||
}
|
||||
default:
|
||||
_, err = w.w.WriteRune(rune)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err = w.w.WriteByte('"'); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
if w.UseCRLF {
|
||||
_, err = w.w.WriteString("\r\n")
|
||||
} else {
|
||||
err = w.w.WriteByte('\n')
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Flush writes any buffered data to the underlying io.Writer.
|
||||
func (w *Writer) Flush() {
|
||||
w.w.Flush()
|
||||
}
|
||||
|
||||
// WriteAll writes multiple CSV records to w using Write and then calls Flush.
|
||||
func (w *Writer) WriteAll(records [][]string) (err os.Error) {
|
||||
for _, record := range records {
|
||||
err = w.Write(record)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
w.Flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
// fieldNeedsQuotes returns true if our field must be enclosed in quotes.
|
||||
// Empty fields, files with a Comma, fields with a quote or newline, and
|
||||
// fields which start with a space must be enclosed in quotes.
|
||||
func (w *Writer) fieldNeedsQuotes(field string) bool {
|
||||
if len(field) == 0 || strings.IndexRune(field, w.Comma) >= 0 || strings.IndexAny(field, "\"\r\n") >= 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
rune, _ := utf8.DecodeRuneInString(field)
|
||||
return unicode.IsSpace(rune)
|
||||
}
|
44
src/pkg/csv/writer_test.go
Normal file
44
src/pkg/csv/writer_test.go
Normal file
@ -0,0 +1,44 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var writeTests = []struct {
|
||||
Input [][]string
|
||||
Output string
|
||||
UseCRLF bool
|
||||
}{
|
||||
{Input: [][]string{{"abc"}}, Output: "abc\n"},
|
||||
{Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true},
|
||||
{Input: [][]string{{`"abc"`}}, Output: `"""abc"""` + "\n"},
|
||||
{Input: [][]string{{`a"b`}}, Output: `"a""b"` + "\n"},
|
||||
{Input: [][]string{{`"a"b"`}}, Output: `"""a""b"""` + "\n"},
|
||||
{Input: [][]string{{" abc"}}, Output: `" abc"` + "\n"},
|
||||
{Input: [][]string{{"abc,def"}}, Output: `"abc,def"` + "\n"},
|
||||
{Input: [][]string{{"abc", "def"}}, Output: "abc,def\n"},
|
||||
{Input: [][]string{{"abc"}, {"def"}}, Output: "abc\ndef\n"},
|
||||
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\ndef\"\n"},
|
||||
{Input: [][]string{{"abc\ndef"}}, Output: "\"abc\r\ndef\"\r\n", UseCRLF: true},
|
||||
}
|
||||
|
||||
func TestWrite(t *testing.T) {
|
||||
for n, tt := range writeTests {
|
||||
b := &bytes.Buffer{}
|
||||
f := NewWriter(b)
|
||||
f.UseCRLF = tt.UseCRLF
|
||||
err := f.WriteAll(tt.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %s\n", err)
|
||||
}
|
||||
out := b.String()
|
||||
if out != tt.Output {
|
||||
t.Errorf("#%d: out=%q want %q", n, out, tt.Output)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user