mirror of
https://github.com/golang/go
synced 2024-11-18 15:04:44 -07:00
strings: fix encoding of \u0080 in map
Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf being incorrectly encoded as \x80 in strings.Map due to using <= instead of a < comparison operator to check one byte encodings for utf8. Fixes #25242 Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc Reviewed-on: https://go-review.googlesource.com/111286 Run-TryBot: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
98409a44d5
commit
8c62fc0ca3
@ -479,7 +479,7 @@ func Map(mapping func(rune) rune, s string) string {
|
|||||||
b = make([]byte, len(s)+utf8.UTFMax)
|
b = make([]byte, len(s)+utf8.UTFMax)
|
||||||
nbytes = copy(b, s[:i])
|
nbytes = copy(b, s[:i])
|
||||||
if r >= 0 {
|
if r >= 0 {
|
||||||
if r <= utf8.RuneSelf {
|
if r < utf8.RuneSelf {
|
||||||
b[nbytes] = byte(r)
|
b[nbytes] = byte(r)
|
||||||
nbytes++
|
nbytes++
|
||||||
} else {
|
} else {
|
||||||
@ -509,7 +509,7 @@ func Map(mapping func(rune) rune, s string) string {
|
|||||||
r := mapping(c)
|
r := mapping(c)
|
||||||
|
|
||||||
// common case
|
// common case
|
||||||
if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
|
if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) {
|
||||||
b[nbytes] = byte(r)
|
b[nbytes] = byte(r)
|
||||||
nbytes++
|
nbytes++
|
||||||
continue
|
continue
|
||||||
|
@ -528,6 +528,7 @@ var upperTests = []StringTest{
|
|||||||
{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
|
{"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"},
|
||||||
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
|
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"},
|
||||||
{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
|
{"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char
|
||||||
|
{"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
|
||||||
}
|
}
|
||||||
|
|
||||||
var lowerTests = []StringTest{
|
var lowerTests = []StringTest{
|
||||||
@ -538,6 +539,7 @@ var lowerTests = []StringTest{
|
|||||||
{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
|
{"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"},
|
||||||
{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
|
{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"},
|
||||||
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
|
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char
|
||||||
|
{"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune
|
||||||
}
|
}
|
||||||
|
|
||||||
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
|
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
|
||||||
@ -650,6 +652,27 @@ func TestMap(t *testing.T) {
|
|||||||
if m != expect {
|
if m != expect {
|
||||||
t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
|
t.Errorf("replace invalid sequence: expected %q got %q", expect, m)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 8. Check utf8.RuneSelf and utf8.MaxRune encoding
|
||||||
|
encode := func(r rune) rune {
|
||||||
|
switch r {
|
||||||
|
case utf8.RuneSelf:
|
||||||
|
return unicode.MaxRune
|
||||||
|
case unicode.MaxRune:
|
||||||
|
return utf8.RuneSelf
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
s := string(utf8.RuneSelf) + string(utf8.MaxRune)
|
||||||
|
r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s
|
||||||
|
m = Map(encode, s)
|
||||||
|
if m != r {
|
||||||
|
t.Errorf("encoding not handled correctly: expected %q got %q", r, m)
|
||||||
|
}
|
||||||
|
m = Map(encode, r)
|
||||||
|
if m != s {
|
||||||
|
t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
|
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
|
||||||
|
Loading…
Reference in New Issue
Block a user