mirror of
https://github.com/golang/go
synced 2024-11-19 23:24:45 -07:00
encoding/csv: restore Go 1.9 quoted \r\n handling in Reader
CL 52810 changed Reader to interpret a quoted \r\n as a raw \r\n when reading fields. This seems likely to break existing users, and discussion on both #21201 (the original issue that triggered the change) and #22746 (discussing whether to revert the change) failed to identify a single motivating example for this change. To avoid breaking existing users for no clear reason, revert the change. The Reader has been rewritten in the interim so this is not a git revert but instead and adjustment (and slight simplification) of the new Reader. Fixes #22746. Change-Id: Ie857b2f4b1359a207d085b6d3c3a6d440a997d12 Reviewed-on: https://go-review.googlesource.com/78295 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
This commit is contained in:
parent
918b98ca70
commit
1d47a14591
@ -99,15 +99,24 @@ func validDelim(r rune) bool {
|
||||
// As returned by NewReader, a Reader expects input conforming to RFC 4180.
|
||||
// The exported fields can be changed to customize the details before the
|
||||
// first call to Read or ReadAll.
|
||||
//
|
||||
// The Reader converts all \r\n sequences in its input to plain \n,
|
||||
// including in multiline field values, so that the returned data does
|
||||
// not depend on which line-ending convention an input file uses.
|
||||
type Reader struct {
|
||||
// Comma is the field delimiter.
|
||||
// It is set to comma (',') by NewReader.
|
||||
// Comma must be a valid rune and must not be \r, \n,
|
||||
// or the Unicode replacement character (0xFFFD).
|
||||
Comma rune
|
||||
|
||||
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||
// Comment character without preceding whitespace are ignored.
|
||||
// With leading whitespace the Comment character becomes part of the
|
||||
// field, even if TrimLeadingSpace is true.
|
||||
// Comment must be a valid rune and must not be \r, \n,
|
||||
// or the Unicode replacement character (0xFFFD).
|
||||
// It must also not be equal to Comma.
|
||||
Comment rune
|
||||
|
||||
// FieldsPerRecord is the number of expected fields per record.
|
||||
@ -217,15 +226,17 @@ func (r *Reader) readLine() ([]byte, error) {
|
||||
err = nil
|
||||
}
|
||||
r.numLine++
|
||||
// Normalize \r\n to \n on all input lines.
|
||||
if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
|
||||
line[n-2] = '\n'
|
||||
line = line[:n-1]
|
||||
}
|
||||
return line, err
|
||||
}
|
||||
|
||||
// lengthCRLF reports the number of bytes for a trailing "\r\n".
|
||||
func lengthCRLF(b []byte) int {
|
||||
if j := len(b) - 1; j >= 0 && b[j] == '\n' {
|
||||
if j := len(b) - 2; j >= 0 && b[j] == '\r' {
|
||||
return 2
|
||||
}
|
||||
// lengthNL reports the number of bytes for the trailing \n.
|
||||
func lengthNL(b []byte) int {
|
||||
if len(b) > 0 && b[len(b)-1] == '\n' {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
@ -251,7 +262,7 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||
line = nil
|
||||
continue // Skip comment lines
|
||||
}
|
||||
if errRead == nil && len(line) == lengthCRLF(line) {
|
||||
if errRead == nil && len(line) == lengthNL(line) {
|
||||
line = nil
|
||||
continue // Skip empty lines
|
||||
}
|
||||
@ -281,7 +292,7 @@ parseField:
|
||||
if i >= 0 {
|
||||
field = field[:i]
|
||||
} else {
|
||||
field = field[:len(field)-lengthCRLF(field)]
|
||||
field = field[:len(field)-lengthNL(field)]
|
||||
}
|
||||
// Check to make sure a quote does not appear in field.
|
||||
if !r.LazyQuotes {
|
||||
@ -317,7 +328,7 @@ parseField:
|
||||
line = line[commaLen:]
|
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||
continue parseField
|
||||
case lengthCRLF(line) == len(line):
|
||||
case lengthNL(line) == len(line):
|
||||
// `"\n` sequence (end of line).
|
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||
break parseField
|
||||
|
@ -235,9 +235,9 @@ x,,,
|
||||
Error: &ParseError{StartLine: 2, Line: 5, Column: 0, Err: ErrQuote},
|
||||
}, {
|
||||
Name: "CRLFInQuotedField", // Issue 21201
|
||||
Input: "\"Hello\r\nHi\"",
|
||||
Input: "A,\"Hello\r\nHi\",B\r\n",
|
||||
Output: [][]string{
|
||||
{"Hello\r\nHi"},
|
||||
{"A", "Hello\nHi", "B"},
|
||||
},
|
||||
}, {
|
||||
Name: "BinaryBlobField", // Issue 19410
|
||||
|
@ -20,7 +20,7 @@ import (
|
||||
//
|
||||
// Comma is the field delimiter.
|
||||
//
|
||||
// If UseCRLF is true, the Writer ends each record with \r\n instead of \n.
|
||||
// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
|
||||
type Writer struct {
|
||||
Comma rune // Field delimiter (set to ',' by NewWriter)
|
||||
UseCRLF bool // True to use \r\n as the line terminator
|
||||
|
Loading…
Reference in New Issue
Block a user