mirror of
https://github.com/golang/go
synced 2024-11-17 00:14:50 -07:00
encoding/csv: avoid mangling invalid UTF-8 in Writer
In the situation where a quoted field is necessary, avoid processing each UTF-8 rune one-by-one, which causes mangling of invalid sequences into utf8.RuneError, causing a loss of information. Instead, search only for the escaped characters, handle those specially and copy everything else in between verbatim. This symmetrically matches the behavior of Reader. Fixes #24298 Change-Id: I9276f64891084ce8487678f663fad711b4095dbb Reviewed-on: https://go-review.googlesource.com/99297 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
88466e93a4
commit
0add9a4dcf
@ -57,33 +57,46 @@ func (w *Writer) Write(record []string) error {
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if err := w.w.WriteByte('"'); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, r1 := range field {
|
||||
var err error
|
||||
switch r1 {
|
||||
case '"':
|
||||
_, err = w.w.WriteString(`""`)
|
||||
case '\r':
|
||||
if !w.UseCRLF {
|
||||
err = w.w.WriteByte('\r')
|
||||
}
|
||||
case '\n':
|
||||
if w.UseCRLF {
|
||||
_, err = w.w.WriteString("\r\n")
|
||||
} else {
|
||||
err = w.w.WriteByte('\n')
|
||||
}
|
||||
default:
|
||||
_, err = w.w.WriteRune(r1)
|
||||
for len(field) > 0 {
|
||||
// Search for special characters.
|
||||
i := strings.IndexAny(field, "\"\r\n")
|
||||
if i < 0 {
|
||||
i = len(field)
|
||||
}
|
||||
if err != nil {
|
||||
|
||||
// Copy verbatim everything before the special character.
|
||||
if _, err := w.w.WriteString(field[:i]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
field = field[i:]
|
||||
|
||||
// Encode the special character.
|
||||
if len(field) > 0 {
|
||||
var err error
|
||||
switch field[0] {
|
||||
case '"':
|
||||
_, err = w.w.WriteString(`""`)
|
||||
case '\r':
|
||||
if !w.UseCRLF {
|
||||
err = w.w.WriteByte('\r')
|
||||
}
|
||||
case '\n':
|
||||
if w.UseCRLF {
|
||||
_, err = w.w.WriteString("\r\n")
|
||||
} else {
|
||||
err = w.w.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
field = field[1:]
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := w.w.WriteByte('"'); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -39,6 +39,8 @@ var writeTests = []struct {
|
||||
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
|
||||
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
|
||||
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
|
||||
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
|
||||
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
|
||||
}
|
||||
|
||||
func TestWrite(t *testing.T) {
|
||||
|
Loading…
Reference in New Issue
Block a user