mirror of
https://github.com/golang/go
synced 2024-11-12 07:00:21 -07:00
bytes: optimize ToLower and ToUpper for ASCII-only case
Follow what CL 68370 and CL 76470 did for the respective functions in package strings. Also adjust godoc strings to match the respective strings functions and mention the special case for ASCII-only byte slices which don't need conversion. name old time/op new time/op delta ToUpper/#00-8 9.35ns ± 3% 6.08ns ± 2% -35.04% (p=0.000 n=9+9) ToUpper/ONLYUPPER-8 77.7ns ± 1% 16.9ns ± 2% -78.22% (p=0.000 n=10+10) ToUpper/abc-8 36.5ns ± 1% 22.1ns ± 1% -39.43% (p=0.000 n=10+8) ToUpper/AbC123-8 56.9ns ± 2% 28.2ns ± 2% -50.54% (p=0.000 n=8+10) ToUpper/azAZ09_-8 62.3ns ± 1% 26.9ns ± 1% -56.82% (p=0.000 n=9+10) ToUpper/longStrinGwitHmixofsmaLLandcAps-8 219ns ± 2% 63ns ± 2% -71.17% (p=0.000 n=10+10) ToUpper/longɐstringɐwithɐnonasciiⱯchars-8 367ns ± 2% 374ns ± 3% +2.05% (p=0.000 n=9+10) ToUpper/ɐɐɐɐɐ-8 200ns ± 1% 206ns ± 1% +2.49% (p=0.000 n=10+10) ToUpper/a\u0080\U0010ffff-8 90.4ns ± 1% 93.8ns ± 0% +3.82% (p=0.000 n=10+7) ToLower/#00-8 9.59ns ± 1% 6.13ns ± 2% -36.08% (p=0.000 n=10+10) ToLower/abc-8 36.4ns ± 1% 10.4ns ± 1% -71.50% (p=0.000 n=10+10) ToLower/AbC123-8 55.8ns ± 1% 27.5ns ± 1% -50.61% (p=0.000 n=10+10) ToLower/azAZ09_-8 61.7ns ± 1% 30.2ns ± 1% -50.98% (p=0.000 n=8+10) ToLower/longStrinGwitHmixofsmaLLandcAps-8 226ns ± 1% 64ns ± 1% -71.53% (p=0.000 n=10+9) ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS-8 354ns ± 0% 361ns ± 0% +2.18% (p=0.000 n=10+10) ToLower/ⱭⱭⱭⱭⱭ-8 180ns ± 1% 186ns ± 0% +3.45% (p=0.000 n=10+9) ToLower/A\u0080\U0010ffff-8 91.7ns ± 0% 94.5ns ± 0% +2.99% (p=0.000 n=10+10) Change-Id: Ifdb8ae328ff9feacd1c170db8eebbf98c399e204 Reviewed-on: https://go-review.googlesource.com/c/go/+/170954 Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
2ab75c0f40
commit
08e1823a63
@ -521,11 +521,66 @@ func Repeat(b []byte, count int) []byte {
|
|||||||
return nb
|
return nb
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
|
// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
|
||||||
func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
|
// their upper case.
|
||||||
|
func ToUpper(s []byte) []byte {
|
||||||
|
isASCII, hasLower := true, false
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if c >= utf8.RuneSelf {
|
||||||
|
isASCII = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
hasLower = hasLower || ('a' <= c && c <= 'z')
|
||||||
|
}
|
||||||
|
|
||||||
// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
|
if isASCII { // optimize for ASCII-only byte slices.
|
||||||
func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
|
if !hasLower {
|
||||||
|
// Just return a copy.
|
||||||
|
return append([]byte(""), s...)
|
||||||
|
}
|
||||||
|
b := make([]byte, len(s))
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if 'a' <= c && c <= 'z' {
|
||||||
|
c -= 'a' - 'A'
|
||||||
|
}
|
||||||
|
b[i] = c
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
return Map(unicode.ToUpper, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToLower returns a copy of the byte slice s with all Unicode letters mapped to
|
||||||
|
// their lower case.
|
||||||
|
func ToLower(s []byte) []byte {
|
||||||
|
isASCII, hasUpper := true, false
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if c >= utf8.RuneSelf {
|
||||||
|
isASCII = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
hasUpper = hasUpper || ('A' <= c && c <= 'Z')
|
||||||
|
}
|
||||||
|
|
||||||
|
if isASCII { // optimize for ASCII-only byte slices.
|
||||||
|
if !hasUpper {
|
||||||
|
return append([]byte(""), s...)
|
||||||
|
}
|
||||||
|
b := make([]byte, len(s))
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if 'A' <= c && c <= 'Z' {
|
||||||
|
c += 'a' - 'A'
|
||||||
|
}
|
||||||
|
b[i] = c
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
return Map(unicode.ToLower, s)
|
||||||
|
}
|
||||||
|
|
||||||
// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
|
// ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
|
||||||
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
|
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
|
||||||
|
@ -891,10 +891,14 @@ type StringTest struct {
|
|||||||
|
|
||||||
var upperTests = []StringTest{
|
var upperTests = []StringTest{
|
||||||
{"", []byte("")},
|
{"", []byte("")},
|
||||||
|
{"ONLYUPPER", []byte("ONLYUPPER")},
|
||||||
{"abc", []byte("ABC")},
|
{"abc", []byte("ABC")},
|
||||||
{"AbC123", []byte("ABC123")},
|
{"AbC123", []byte("ABC123")},
|
||||||
{"azAZ09_", []byte("AZAZ09_")},
|
{"azAZ09_", []byte("AZAZ09_")},
|
||||||
|
{"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")},
|
||||||
|
{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")},
|
||||||
{"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
|
{"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
|
||||||
|
{"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune
|
||||||
}
|
}
|
||||||
|
|
||||||
var lowerTests = []StringTest{
|
var lowerTests = []StringTest{
|
||||||
@ -902,7 +906,10 @@ var lowerTests = []StringTest{
|
|||||||
{"abc", []byte("abc")},
|
{"abc", []byte("abc")},
|
||||||
{"AbC123", []byte("abc123")},
|
{"AbC123", []byte("abc123")},
|
||||||
{"azAZ09_", []byte("azaz09_")},
|
{"azAZ09_", []byte("azaz09_")},
|
||||||
|
{"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")},
|
||||||
|
{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")},
|
||||||
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
|
{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
|
||||||
|
{"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune
|
||||||
}
|
}
|
||||||
|
|
||||||
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
|
const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
|
||||||
@ -1029,6 +1036,34 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
|
|||||||
|
|
||||||
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
|
func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
|
||||||
|
|
||||||
|
func BenchmarkToUpper(b *testing.B) {
|
||||||
|
for _, tc := range upperTests {
|
||||||
|
tin := []byte(tc.in)
|
||||||
|
b.Run(tc.in, func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
actual := ToUpper(tin)
|
||||||
|
if !Equal(actual, tc.out) {
|
||||||
|
b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkToLower(b *testing.B) {
|
||||||
|
for _, tc := range lowerTests {
|
||||||
|
tin := []byte(tc.in)
|
||||||
|
b.Run(tc.in, func(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
actual := ToLower(tin)
|
||||||
|
if !Equal(actual, tc.out) {
|
||||||
|
b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
|
func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
|
||||||
|
|
||||||
type RepeatTest struct {
|
type RepeatTest struct {
|
||||||
|
Loading…
Reference in New Issue
Block a user