1
0
mirror of https://github.com/golang/go synced 2024-11-17 08:24:43 -07:00

strings: avoid utf8.RuneError mangling in Split

Split should only split strings and not perform mangling
of invalid UTF-8 into ut8.RuneError.
The prior behavior is clearly a bug since mangling is not
performed in all other situations (e.g., separator is non-empty).

Fixes #53511

Change-Id: I112a2ef15ee46ddecda015ee14bca04cd76adfbf
Reviewed-on: https://go-review.googlesource.com/c/go/+/413715
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Joe Tsai 2022-06-22 20:57:50 -07:00 committed by Joseph Tsai
parent ced4d6fd2d
commit 9a4685f220
3 changed files with 6 additions and 5 deletions

View File

@ -755,6 +755,8 @@ var splittests = []SplitTest{
{"123", "", 2, []string{"1", "23"}},
{"123", "", 17, []string{"1", "2", "3"}},
{"bT", "T", math.MaxInt / 4, []string{"b", ""}},
{"\xff-\xff", "", -1, []string{"\xff", "-", "\xff"}},
{"\xff-\xff", "-", -1, []string{"\xff", "\xff"}},
}
func TestSplit(t *testing.T) {

View File

@ -15,7 +15,7 @@ import (
// explode splits s into a slice of UTF-8 strings,
// one string per Unicode character up to a maximum of n (n < 0 means no limit).
// Invalid UTF-8 sequences become correct encodings of U+FFFD.
// Invalid UTF-8 bytes are sliced individually.
func explode(s string, n int) []string {
l := utf8.RuneCountInString(s)
if n < 0 || n > l {
@ -23,12 +23,9 @@ func explode(s string, n int) []string {
}
a := make([]string, n)
for i := 0; i < n-1; i++ {
ch, size := utf8.DecodeRuneInString(s)
_, size := utf8.DecodeRuneInString(s)
a[i] = s[:size]
s = s[size:]
if ch == utf8.RuneError {
a[i] = string(utf8.RuneError)
}
}
if n > 0 {
a[n-1] = s

View File

@ -406,6 +406,8 @@ var splittests = []SplitTest{
{"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
{"1 2", " ", 3, []string{"1", "2"}},
{"", "T", math.MaxInt / 4, []string{""}},
{"\xff-\xff", "", -1, []string{"\xff", "-", "\xff"}},
{"\xff-\xff", "-", -1, []string{"\xff", "\xff"}},
}
func TestSplit(t *testing.T) {