1
0
mirror of https://github.com/golang/go synced 2024-11-17 00:14:50 -07:00

encoding/csv: avoid mangling invalid UTF-8 in Writer

In the situation where a quoted field is necessary, avoid processing
each UTF-8 rune one-by-one, which causes mangling of invalid sequences
into utf8.RuneError, causing a loss of information.
Instead, search only for the escaped characters, handle those specially
and copy everything else in between verbatim.

This symmetrically matches the behavior of Reader.

Fixes #24298

Change-Id: I9276f64891084ce8487678f663fad711b4095dbb
Reviewed-on: https://go-review.googlesource.com/99297
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Joe Tsai 2018-03-07 14:14:19 -08:00 committed by Joe Tsai
parent 88466e93a4
commit 0add9a4dcf
2 changed files with 35 additions and 20 deletions

View File

@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error {
}
continue
}
if err := w.w.WriteByte('"'); err != nil {
return err
}
for _, r1 := range field {
var err error
switch r1 {
case '"':
_, err = w.w.WriteString(`""`)
case '\r':
if !w.UseCRLF {
err = w.w.WriteByte('\r')
}
case '\n':
if w.UseCRLF {
_, err = w.w.WriteString("\r\n")
} else {
err = w.w.WriteByte('\n')
}
default:
_, err = w.w.WriteRune(r1)
for len(field) > 0 {
// Search for special characters.
i := strings.IndexAny(field, "\"\r\n")
if i < 0 {
i = len(field)
}
if err != nil {
// Copy verbatim everything before the special character.
if _, err := w.w.WriteString(field[:i]); err != nil {
return err
}
}
field = field[i:]
// Encode the special character.
if len(field) > 0 {
var err error
switch field[0] {
case '"':
_, err = w.w.WriteString(`""`)
case '\r':
if !w.UseCRLF {
err = w.w.WriteByte('\r')
}
case '\n':
if w.UseCRLF {
_, err = w.w.WriteString("\r\n")
} else {
err = w.w.WriteByte('\n')
}
}
field = field[1:]
if err != nil {
return err
}
}
}
if err := w.w.WriteByte('"'); err != nil {
return err
}

View File

@ -39,6 +39,8 @@ var writeTests = []struct {
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
}
func TestWrite(t *testing.T) {