1
0
mirror of https://github.com/golang/go synced 2024-11-23 05:00:07 -07:00

utf16: reduce utf16.Decode allocations

This CL avoids allocating in utf16.Decode for code point sequences
with less than 64 elements. It does so by splitting the function in two,
one that can be inlined that preallocates a buffer and the other that
does the heavy-lifting.

The mid-stack inliner will allocate the buffer in the caller stack,
and in many cases this will be enough to avoid the allocation.

unicode/utf16 benchmarks:

name                         old time/op    new time/op    delta
DecodeValidASCII-12            60.1ns ± 3%    16.0ns ±20%   -73.40%  (p=0.000 n=8+10)
DecodeValidJapaneseChars-12    61.3ns ±10%    14.9ns ±39%   -75.71%  (p=0.000 n=10+10)

name                         old alloc/op   new alloc/op   delta
DecodeValidASCII-12             48.0B ± 0%      0.0B       -100.00%  (p=0.000 n=10+10)
DecodeValidJapaneseChars-12     48.0B ± 0%      0.0B       -100.00%  (p=0.000 n=10+10)

name                         old allocs/op  new allocs/op  delta
DecodeValidASCII-12              1.00 ± 0%      0.00       -100.00%  (p=0.000 n=10+10)
DecodeValidJapaneseChars-12      1.00 ± 0%      0.00       -100.00%  (p=0.000 n=10+10)

I've also benchmarked os.File.ReadDir with this change applied
to demonstrate that it does make a difference in the caller site, in this
case via syscall.UTF16ToString:

name        old time/op    new time/op    delta
ReadDir-12     592µs ± 8%     620µs ±16%     ~     (p=0.280 n=10+10)

name        old alloc/op   new alloc/op   delta
ReadDir-12    30.4kB ± 0%    22.4kB ± 0%  -26.10%  (p=0.000 n=8+10)

name        old allocs/op  new allocs/op  delta
ReadDir-12       402 ± 0%       272 ± 0%  -32.34%  (p=0.000 n=10+10)

Change-Id: I65cf5caa3fd3b3a466c0ed837a50a96e975bbe6b
Reviewed-on: https://go-review.googlesource.com/c/go/+/453415
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>
This commit is contained in:
qmuntal 2022-11-25 14:22:36 +01:00 committed by Quim Muntal
parent 0c8480acfe
commit 2423370136
3 changed files with 35 additions and 7 deletions

View File

@ -123,6 +123,9 @@ func TestIntendedInlining(t *testing.T) {
"AppendRune",
"ValidRune",
},
"unicode/utf16": {
"Decode",
},
"reflect": {
"Value.Bool",
"Value.Bytes",

View File

@ -103,23 +103,31 @@ func AppendRune(a []uint16, r rune) []uint16 {
// Decode returns the Unicode code point sequence represented
// by the UTF-16 encoding s.
func Decode(s []uint16) []rune {
a := make([]rune, len(s))
n := 0
// Preallocate capacity to hold up to 64 runes.
// Decode inlines, so the allocation can live on the stack.
buf := make([]rune, 0, 64)
return decode(s, buf)
}
// decode appends to buf the Unicode code point sequence represented
// by the UTF-16 encoding s and return the extended buffer.
func decode(s []uint16, buf []rune) []rune {
for i := 0; i < len(s); i++ {
var ar rune
switch r := s[i]; {
case r < surr1, surr3 <= r:
// normal rune
a[n] = rune(r)
ar = rune(r)
case surr1 <= r && r < surr2 && i+1 < len(s) &&
surr2 <= s[i+1] && s[i+1] < surr3:
// valid surrogate sequence
a[n] = DecodeRune(rune(r), rune(s[i+1]))
ar = DecodeRune(rune(r), rune(s[i+1]))
i++
default:
// invalid surrogate sequence
a[n] = replacementChar
ar = replacementChar
}
n++
buf = append(buf, ar)
}
return a[:n]
return buf
}

View File

@ -5,6 +5,7 @@
package utf16_test
import (
"internal/testenv"
"reflect"
"testing"
"unicode"
@ -103,6 +104,22 @@ var decodeTests = []decodeTest{
{[]uint16{0xdfff}, []rune{0xfffd}},
}
func TestAllocationsDecode(t *testing.T) {
testenv.SkipIfOptimizationOff(t)
for _, tt := range decodeTests {
allocs := testing.AllocsPerRun(10, func() {
out := Decode(tt.in)
if out == nil {
t.Errorf("Decode(%x) = nil", tt.in)
}
})
if allocs > 0 {
t.Errorf("Decode allocated %v times", allocs)
}
}
}
func TestDecode(t *testing.T) {
for _, tt := range decodeTests {
out := Decode(tt.in)