From c1e267cc734135a66af8a1a5015e572cbb598d44 Mon Sep 17 00:00:00 2001 From: Carl Mastrangelo Date: Mon, 3 Oct 2016 12:45:12 -0700 Subject: [PATCH] runtime: make append only clear uncopied memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also add a benchmark that shows off the new behavior. The existing benchmarks reuse the same slice, and thus don't ever have to clear memory. Running the Append|Grow benchmarks in runtime: name old time/op new time/op delta AppendSliceLarge/1024Bytes-12 265ns ± 1% 265ns ± 3% ~ (p=0.524 n=17+20) AppendSliceLarge/4096Bytes-12 807ns ± 3% 772ns ± 1% -4.38% (p=0.000 n=20+20) AppendSliceLarge/16384Bytes-12 3.20µs ± 4% 2.82µs ± 4% -11.93% (p=0.000 n=19+20) AppendSliceLarge/65536Bytes-12 13.0µs ± 4% 11.0µs ± 3% -15.22% (p=0.000 n=20+20) AppendSliceLarge/262144Bytes-12 62.7µs ± 1% 51.6µs ± 1% -17.67% (p=0.000 n=19+20) AppendSliceLarge/1048576Bytes-12 337µs ± 3% 289µs ± 3% -14.36% (p=0.000 n=20+20) GrowSliceBytes-12 31.2ns ± 4% 31.4ns ±11% ~ (p=0.308 n=19+18) GrowSliceInts-12 53.4ns ±14% 45.0ns ± 6% -15.74% (p=0.000 n=20+19) GrowSlicePtr-12 87.0ns ± 3% 83.3ns ± 3% -4.26% (p=0.000 n=18+17) GrowSliceStruct24Bytes-12 88.9ns ± 5% 77.8ns ± 2% -12.45% (p=0.000 n=20+19) Append-12 17.2ns ± 1% 17.3ns ± 2% ~ (p=0.464 n=18+17) AppendGrowByte-12 2.28ms ± 1% 1.92ms ± 2% -15.65% (p=0.000 n=20+18) AppendGrowString-12 255ms ± 3% 253ms ± 4% ~ (p=0.065 n=19+19) AppendSlice/1Bytes-12 3.13ns ± 0% 3.11ns ± 1% -0.65% (p=0.000 n=17+18) AppendSlice/4Bytes-12 3.02ns ± 2% 3.11ns ± 1% +3.27% (p=0.000 n=18+17) AppendSlice/7Bytes-12 4.14ns ± 3% 4.13ns ± 2% ~ (p=0.380 n=19+18) AppendSlice/8Bytes-12 3.74ns ± 3% 3.68ns ± 1% -1.76% (p=0.000 n=19+18) AppendSlice/15Bytes-12 4.03ns ± 2% 4.04ns ± 2% ~ (p=0.261 n=19+20) AppendSlice/16Bytes-12 4.03ns ± 2% 4.03ns ± 0% ~ (p=0.062 n=18+17) AppendSlice/32Bytes-12 3.23ns ± 4% 3.43ns ± 1% +6.10% (p=0.000 n=17+18) AppendStr/1Bytes-12 3.51ns ± 1% 3.52ns ± 1% ~ (p=0.321 n=18+19) AppendStr/4Bytes-12 3.46ns ± 1% 3.46ns ± 1% ~ (p=0.977 n=18+20) AppendStr/8Bytes-12 3.18ns ± 1% 3.19ns ± 1% ~ (p=0.650 n=16+17) AppendStr/16Bytes-12 6.08ns ±27% 5.52ns ± 3% -9.16% (p=0.002 n=18+19) AppendStr/32Bytes-12 3.71ns ± 1% 3.53ns ± 1% -4.73% (p=0.000 n=20+19) AppendSpecialCase-12 17.7ns ± 1% 17.8ns ± 3% +0.86% (p=0.045 n=17+18) AppendInPlace/NoGrow/Byte-12 375ns ± 1% 376ns ± 1% +0.35% (p=0.021 n=20+18) AppendInPlace/NoGrow/1Ptr-12 1.01µs ± 1% 1.10µs ± 1% +9.28% (p=0.000 n=18+20) AppendInPlace/NoGrow/2Ptr-12 1.85µs ± 2% 1.71µs ± 1% -7.51% (p=0.000 n=19+18) AppendInPlace/NoGrow/3Ptr-12 2.57µs ± 2% 2.44µs ± 1% -5.08% (p=0.000 n=19+19) AppendInPlace/NoGrow/4Ptr-12 3.52µs ± 2% 3.35µs ± 2% -4.70% (p=0.000 n=20+19) AppendInPlace/Grow/Byte-12 212ns ± 1% 217ns ± 8% +2.57% (p=0.000 n=20+20) AppendInPlace/Grow/1Ptr-12 214ns ± 2% 217ns ± 3% +1.23% (p=0.001 n=18+19) AppendInPlace/Grow/2Ptr-12 298ns ± 2% 300ns ± 2% +0.55% (p=0.038 n=19+20) AppendInPlace/Grow/3Ptr-12 367ns ± 2% 366ns ± 2% ~ (p=0.452 n=20+18) AppendInPlace/Grow/4Ptr-12 416ns ± 2% 411ns ± 2% -1.18% (p=0.000 n=20+19) StackGrowth-12 43.4ns ± 1% 43.4ns ± 0% ~ (p=1.000 n=16+16) StackGrowthDeep-12 11.4µs ± 4% 10.3µs ± 4% -9.65% (p=0.000 n=20+19) Change-Id: I69a8afbd942c787c591d95b9d9439bd6db4d1e49 Reviewed-on: https://go-review.googlesource.com/30192 Reviewed-by: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot --- src/runtime/append_test.go | 16 ++++++++++++++++ src/runtime/slice.go | 9 +++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/runtime/append_test.go b/src/runtime/append_test.go index 6b8968e382..6bd8f3bd95 100644 --- a/src/runtime/append_test.go +++ b/src/runtime/append_test.go @@ -100,6 +100,22 @@ func BenchmarkAppendSlice(b *testing.B) { } } +var ( + blackhole []byte +) + +func BenchmarkAppendSliceLarge(b *testing.B) { + for _, length := range []int{1 << 10, 4 << 10, 16 << 10, 64 << 10, 256 << 10, 1024 << 10} { + y := make([]byte, length) + b.Run(fmt.Sprint(length, "Bytes"), func(b *testing.B) { + for i := 0; i < b.N; i++ { + blackhole = nil + blackhole = append(blackhole, y...) + } + }) + } +} + func BenchmarkAppendStr(b *testing.B) { for _, str := range []string{ "1", diff --git a/src/runtime/slice.go b/src/runtime/slice.go index dd8dcb1873..7f4de450d2 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -111,19 +111,22 @@ func growslice(et *_type, old slice, cap int) slice { } } - var lenmem, capmem uintptr + var lenmem, newlenmem, capmem uintptr const ptrSize = unsafe.Sizeof((*byte)(nil)) switch et.size { case 1: lenmem = uintptr(old.len) + newlenmem = uintptr(cap) capmem = roundupsize(uintptr(newcap)) newcap = int(capmem) case ptrSize: lenmem = uintptr(old.len) * ptrSize + newlenmem = uintptr(cap) * ptrSize capmem = roundupsize(uintptr(newcap) * ptrSize) newcap = int(capmem / ptrSize) default: lenmem = uintptr(old.len) * et.size + newlenmem = uintptr(cap) * et.size capmem = roundupsize(uintptr(newcap) * et.size) newcap = int(capmem / et.size) } @@ -136,7 +139,9 @@ func growslice(et *_type, old slice, cap int) slice { if et.kind&kindNoPointers != 0 { p = mallocgc(capmem, nil, false) memmove(p, old.array, lenmem) - memclr(add(p, lenmem), capmem-lenmem) + // The append() that calls growslice is going to overwrite from old.len to cap (which will be the new length). + // Only clear the part that will not be overwritten. + memclr(add(p, newlenmem), capmem-newlenmem) } else { // Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan uninitialized memory. p = mallocgc(capmem, et, true)