1
0
mirror of https://github.com/golang/go synced 2024-11-26 16:46:58 -07:00
go/src/sync/pool_test.go

396 lines
8.1 KiB
Go
Raw Normal View History

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Pool is no-op under race detector, so all these tests do not work.
//
//go:build !race
package sync_test
import (
"runtime"
"runtime/debug"
"sort"
. "sync"
"sync/atomic"
"testing"
"time"
)
func TestPool(t *testing.T) {
// disable GC so we can control when it happens.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
var p Pool
if p.Get() != nil {
t.Fatal("expected empty")
}
// Make sure that the goroutine doesn't migrate to another P
// between Put and Get calls.
Runtime_procPin()
p.Put("a")
p.Put("b")
if g := p.Get(); g != "a" {
t.Fatalf("got %#v; want a", g)
}
if g := p.Get(); g != "b" {
t.Fatalf("got %#v; want b", g)
}
if g := p.Get(); g != nil {
t.Fatalf("got %#v; want nil", g)
}
Runtime_procUnpin()
sync: smooth out Pool behavior over GC with a victim cache Currently, every Pool is cleared completely at the start of each GC. This is a problem for heavy users of Pool because it causes an allocation spike immediately after Pools are clear, which impacts both throughput and latency. This CL fixes this by introducing a victim cache mechanism. Instead of clearing Pools, the victim cache is dropped and the primary cache is moved to the victim cache. As a result, in steady-state, there are (roughly) no new allocations, but if Pool usage drops, objects will still be collected within two GCs (as opposed to one). This victim cache approach also improves Pool's impact on GC dynamics. The current approach causes all objects in Pools to be short lived. However, if an application is in steady state and is just going to repopulate its Pools, then these objects impact the live heap size *as if* they were long lived. Since Pooled objects count as short lived when computing the GC trigger and goal, but act as long lived objects in the live heap, this causes GC to trigger too frequently. If Pooled objects are a non-trivial portion of an application's heap, this increases the CPU overhead of GC. The victim cache lets Pooled objects affect the GC trigger and goal as long-lived objects. This has no impact on Get/Put performance, but substantially reduces the impact to the Pool user when a GC happens. PoolExpensiveNew demonstrates this in the substantially reduction in the rate at which the "New" function is called. name old time/op new time/op delta Pool-12 2.21ns ±36% 2.00ns ± 0% ~ (p=0.070 n=19+16) PoolOverflow-12 587ns ± 1% 583ns ± 1% -0.77% (p=0.000 n=18+18) PoolSTW-12 5.57µs ± 3% 4.52µs ± 4% -18.82% (p=0.000 n=20+19) PoolExpensiveNew-12 3.69ms ± 7% 1.25ms ± 5% -66.25% (p=0.000 n=20+19) name old p50-ns/STW new p50-ns/STW delta PoolSTW-12 5.48k ± 2% 4.53k ± 2% -17.32% (p=0.000 n=20+20) name old p95-ns/STW new p95-ns/STW delta PoolSTW-12 6.69k ± 4% 5.13k ± 3% -23.31% (p=0.000 n=19+18) name old GCs/op new GCs/op delta PoolExpensiveNew-12 0.39 ± 1% 0.32 ± 2% -17.95% (p=0.000 n=18+20) name old New/op new New/op delta PoolExpensiveNew-12 40.0 ± 6% 12.4 ± 6% -68.91% (p=0.000 n=20+19) (https://perf.golang.org/search?q=upload:20190311.2) Fixes #22950. Change-Id: If2e183d948c650417283076aacc20739682cdd70 Reviewed-on: https://go-review.googlesource.com/c/go/+/166961 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2019-03-02 13:16:29 -07:00
// Put in a large number of objects so they spill into
// stealable space.
for i := 0; i < 100; i++ {
p.Put("c")
}
// After one GC, the victim cache should keep them alive.
runtime.GC()
if g := p.Get(); g != "c" {
t.Fatalf("got %#v; want c after GC", g)
}
// A second GC should drop the victim cache.
runtime.GC()
if g := p.Get(); g != nil {
sync: smooth out Pool behavior over GC with a victim cache Currently, every Pool is cleared completely at the start of each GC. This is a problem for heavy users of Pool because it causes an allocation spike immediately after Pools are clear, which impacts both throughput and latency. This CL fixes this by introducing a victim cache mechanism. Instead of clearing Pools, the victim cache is dropped and the primary cache is moved to the victim cache. As a result, in steady-state, there are (roughly) no new allocations, but if Pool usage drops, objects will still be collected within two GCs (as opposed to one). This victim cache approach also improves Pool's impact on GC dynamics. The current approach causes all objects in Pools to be short lived. However, if an application is in steady state and is just going to repopulate its Pools, then these objects impact the live heap size *as if* they were long lived. Since Pooled objects count as short lived when computing the GC trigger and goal, but act as long lived objects in the live heap, this causes GC to trigger too frequently. If Pooled objects are a non-trivial portion of an application's heap, this increases the CPU overhead of GC. The victim cache lets Pooled objects affect the GC trigger and goal as long-lived objects. This has no impact on Get/Put performance, but substantially reduces the impact to the Pool user when a GC happens. PoolExpensiveNew demonstrates this in the substantially reduction in the rate at which the "New" function is called. name old time/op new time/op delta Pool-12 2.21ns ±36% 2.00ns ± 0% ~ (p=0.070 n=19+16) PoolOverflow-12 587ns ± 1% 583ns ± 1% -0.77% (p=0.000 n=18+18) PoolSTW-12 5.57µs ± 3% 4.52µs ± 4% -18.82% (p=0.000 n=20+19) PoolExpensiveNew-12 3.69ms ± 7% 1.25ms ± 5% -66.25% (p=0.000 n=20+19) name old p50-ns/STW new p50-ns/STW delta PoolSTW-12 5.48k ± 2% 4.53k ± 2% -17.32% (p=0.000 n=20+20) name old p95-ns/STW new p95-ns/STW delta PoolSTW-12 6.69k ± 4% 5.13k ± 3% -23.31% (p=0.000 n=19+18) name old GCs/op new GCs/op delta PoolExpensiveNew-12 0.39 ± 1% 0.32 ± 2% -17.95% (p=0.000 n=18+20) name old New/op new New/op delta PoolExpensiveNew-12 40.0 ± 6% 12.4 ± 6% -68.91% (p=0.000 n=20+19) (https://perf.golang.org/search?q=upload:20190311.2) Fixes #22950. Change-Id: If2e183d948c650417283076aacc20739682cdd70 Reviewed-on: https://go-review.googlesource.com/c/go/+/166961 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
2019-03-02 13:16:29 -07:00
t.Fatalf("got %#v; want nil after second GC", g)
}
}
func TestPoolNew(t *testing.T) {
// disable GC so we can control when it happens.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
i := 0
p := Pool{
New: func() any {
i++
return i
},
}
if v := p.Get(); v != 1 {
t.Fatalf("got %v; want 1", v)
}
if v := p.Get(); v != 2 {
t.Fatalf("got %v; want 2", v)
}
// Make sure that the goroutine doesn't migrate to another P
// between Put and Get calls.
Runtime_procPin()
p.Put(42)
if v := p.Get(); v != 42 {
t.Fatalf("got %v; want 42", v)
}
Runtime_procUnpin()
if v := p.Get(); v != 3 {
t.Fatalf("got %v; want 3", v)
}
}
// Test that Pool does not hold pointers to previously cached resources.
func TestPoolGC(t *testing.T) {
testPool(t, true)
}
// Test that Pool releases resources on GC.
func TestPoolRelease(t *testing.T) {
testPool(t, false)
}
func testPool(t *testing.T, drain bool) {
var p Pool
const N = 100
loop:
for try := 0; try < 3; try++ {
if try == 1 && testing.Short() {
break
}
var fin, fin1 uint32
for i := 0; i < N; i++ {
v := new(string)
runtime.SetFinalizer(v, func(vv *string) {
atomic.AddUint32(&fin, 1)
})
p.Put(v)
}
if drain {
for i := 0; i < N; i++ {
p.Get()
}
}
for i := 0; i < 5; i++ {
runtime.GC()
time.Sleep(time.Duration(i*100+10) * time.Millisecond)
// 1 pointer can remain on stack or elsewhere
if fin1 = atomic.LoadUint32(&fin); fin1 >= N-1 {
continue loop
}
}
t.Fatalf("only %v out of %v resources are finalized on try %v", fin1, N, try)
}
}
func TestPoolStress(t *testing.T) {
const P = 10
N := int(1e6)
if testing.Short() {
N /= 100
}
var p Pool
done := make(chan bool)
for i := 0; i < P; i++ {
go func() {
var v any = 0
for j := 0; j < N; j++ {
if v == nil {
v = 0
}
p.Put(v)
v = p.Get()
if v != nil && v.(int) != 0 {
t.Errorf("expect 0, got %v", v)
break
}
}
done <- true
}()
}
for i := 0; i < P; i++ {
<-done
}
}
func TestPoolDequeue(t *testing.T) {
testPoolDequeue(t, NewPoolDequeue(16))
}
func TestPoolChain(t *testing.T) {
testPoolDequeue(t, NewPoolChain())
}
func testPoolDequeue(t *testing.T, d PoolDequeue) {
const P = 10
var N int = 2e6
if testing.Short() {
N = 1e3
}
have := make([]int32, N)
var stop int32
var wg WaitGroup
record := func(val int) {
atomic.AddInt32(&have[val], 1)
if val == N-1 {
atomic.StoreInt32(&stop, 1)
}
}
// Start P-1 consumers.
for i := 1; i < P; i++ {
wg.Add(1)
go func() {
fail := 0
for atomic.LoadInt32(&stop) == 0 {
val, ok := d.PopTail()
if ok {
fail = 0
record(val.(int))
} else {
// Speed up the test by
// allowing the pusher to run.
if fail++; fail%100 == 0 {
runtime.Gosched()
}
}
}
wg.Done()
}()
}
// Start 1 producer.
nPopHead := 0
wg.Add(1)
go func() {
for j := 0; j < N; j++ {
for !d.PushHead(j) {
// Allow a popper to run.
runtime.Gosched()
}
if j%10 == 0 {
val, ok := d.PopHead()
if ok {
nPopHead++
record(val.(int))
}
}
}
wg.Done()
}()
wg.Wait()
// Check results.
for i, count := range have {
if count != 1 {
t.Errorf("expected have[%d] = 1, got %d", i, count)
}
}
// Check that at least some PopHeads succeeded. We skip this
// check in short mode because it's common enough that the
// queue will stay nearly empty all the time and a PopTail
// will happen during the window between every PushHead and
// PopHead.
if !testing.Short() && nPopHead == 0 {
t.Errorf("popHead never succeeded")
}
}
func TestNilPool(t *testing.T) {
catch := func() {
if recover() == nil {
t.Error("expected panic")
}
}
var p *Pool
t.Run("Get", func(t *testing.T) {
defer catch()
if p.Get() != nil {
t.Error("expected empty")
}
t.Error("should have panicked already")
})
t.Run("Put", func(t *testing.T) {
defer catch()
p.Put("a")
t.Error("should have panicked already")
})
}
func BenchmarkPool(b *testing.B) {
sync: scalable Pool Introduce fixed-size P-local caches. When local caches overflow/underflow a batch of items is transferred to/from global mutex-protected cache. benchmark old ns/op new ns/op delta BenchmarkPool 50554 22423 -55.65% BenchmarkPool-4 400359 5904 -98.53% BenchmarkPool-16 403311 1598 -99.60% BenchmarkPool-32 367310 1526 -99.58% BenchmarkPoolOverlflow 5214 3633 -30.32% BenchmarkPoolOverlflow-4 42663 9539 -77.64% BenchmarkPoolOverlflow-8 46919 11385 -75.73% BenchmarkPoolOverlflow-16 39454 13048 -66.93% BenchmarkSprintfEmpty 84 63 -25.68% BenchmarkSprintfEmpty-2 371 32 -91.13% BenchmarkSprintfEmpty-4 465 22 -95.25% BenchmarkSprintfEmpty-8 565 12 -97.77% BenchmarkSprintfEmpty-16 498 5 -98.87% BenchmarkSprintfEmpty-32 492 4 -99.04% BenchmarkSprintfString 259 229 -11.58% BenchmarkSprintfString-2 574 144 -74.91% BenchmarkSprintfString-4 651 77 -88.05% BenchmarkSprintfString-8 868 47 -94.48% BenchmarkSprintfString-16 825 33 -95.96% BenchmarkSprintfString-32 825 30 -96.28% BenchmarkSprintfInt 213 188 -11.74% BenchmarkSprintfInt-2 448 138 -69.20% BenchmarkSprintfInt-4 624 52 -91.63% BenchmarkSprintfInt-8 691 31 -95.43% BenchmarkSprintfInt-16 724 18 -97.46% BenchmarkSprintfInt-32 718 16 -97.70% BenchmarkSprintfIntInt 311 282 -9.32% BenchmarkSprintfIntInt-2 333 145 -56.46% BenchmarkSprintfIntInt-4 642 110 -82.87% BenchmarkSprintfIntInt-8 832 42 -94.90% BenchmarkSprintfIntInt-16 817 24 -97.00% BenchmarkSprintfIntInt-32 805 22 -97.17% BenchmarkSprintfPrefixedInt 309 269 -12.94% BenchmarkSprintfPrefixedInt-2 245 168 -31.43% BenchmarkSprintfPrefixedInt-4 598 99 -83.36% BenchmarkSprintfPrefixedInt-8 770 67 -91.23% BenchmarkSprintfPrefixedInt-16 829 54 -93.49% BenchmarkSprintfPrefixedInt-32 824 50 -93.83% BenchmarkSprintfFloat 418 398 -4.78% BenchmarkSprintfFloat-2 295 203 -31.19% BenchmarkSprintfFloat-4 585 128 -78.12% BenchmarkSprintfFloat-8 873 60 -93.13% BenchmarkSprintfFloat-16 884 33 -96.24% BenchmarkSprintfFloat-32 881 29 -96.62% BenchmarkManyArgs 1097 1069 -2.55% BenchmarkManyArgs-2 705 567 -19.57% BenchmarkManyArgs-4 792 319 -59.72% BenchmarkManyArgs-8 963 172 -82.14% BenchmarkManyArgs-16 1115 103 -90.76% BenchmarkManyArgs-32 1133 90 -92.03% LGTM=rsc R=golang-codereviews, bradfitz, minux.ma, gobot, rsc CC=golang-codereviews https://golang.org/cl/46010043
2014-01-24 11:29:53 -07:00
var p Pool
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
p.Put(1)
p.Get()
}
})
sync: scalable Pool Introduce fixed-size P-local caches. When local caches overflow/underflow a batch of items is transferred to/from global mutex-protected cache. benchmark old ns/op new ns/op delta BenchmarkPool 50554 22423 -55.65% BenchmarkPool-4 400359 5904 -98.53% BenchmarkPool-16 403311 1598 -99.60% BenchmarkPool-32 367310 1526 -99.58% BenchmarkPoolOverlflow 5214 3633 -30.32% BenchmarkPoolOverlflow-4 42663 9539 -77.64% BenchmarkPoolOverlflow-8 46919 11385 -75.73% BenchmarkPoolOverlflow-16 39454 13048 -66.93% BenchmarkSprintfEmpty 84 63 -25.68% BenchmarkSprintfEmpty-2 371 32 -91.13% BenchmarkSprintfEmpty-4 465 22 -95.25% BenchmarkSprintfEmpty-8 565 12 -97.77% BenchmarkSprintfEmpty-16 498 5 -98.87% BenchmarkSprintfEmpty-32 492 4 -99.04% BenchmarkSprintfString 259 229 -11.58% BenchmarkSprintfString-2 574 144 -74.91% BenchmarkSprintfString-4 651 77 -88.05% BenchmarkSprintfString-8 868 47 -94.48% BenchmarkSprintfString-16 825 33 -95.96% BenchmarkSprintfString-32 825 30 -96.28% BenchmarkSprintfInt 213 188 -11.74% BenchmarkSprintfInt-2 448 138 -69.20% BenchmarkSprintfInt-4 624 52 -91.63% BenchmarkSprintfInt-8 691 31 -95.43% BenchmarkSprintfInt-16 724 18 -97.46% BenchmarkSprintfInt-32 718 16 -97.70% BenchmarkSprintfIntInt 311 282 -9.32% BenchmarkSprintfIntInt-2 333 145 -56.46% BenchmarkSprintfIntInt-4 642 110 -82.87% BenchmarkSprintfIntInt-8 832 42 -94.90% BenchmarkSprintfIntInt-16 817 24 -97.00% BenchmarkSprintfIntInt-32 805 22 -97.17% BenchmarkSprintfPrefixedInt 309 269 -12.94% BenchmarkSprintfPrefixedInt-2 245 168 -31.43% BenchmarkSprintfPrefixedInt-4 598 99 -83.36% BenchmarkSprintfPrefixedInt-8 770 67 -91.23% BenchmarkSprintfPrefixedInt-16 829 54 -93.49% BenchmarkSprintfPrefixedInt-32 824 50 -93.83% BenchmarkSprintfFloat 418 398 -4.78% BenchmarkSprintfFloat-2 295 203 -31.19% BenchmarkSprintfFloat-4 585 128 -78.12% BenchmarkSprintfFloat-8 873 60 -93.13% BenchmarkSprintfFloat-16 884 33 -96.24% BenchmarkSprintfFloat-32 881 29 -96.62% BenchmarkManyArgs 1097 1069 -2.55% BenchmarkManyArgs-2 705 567 -19.57% BenchmarkManyArgs-4 792 319 -59.72% BenchmarkManyArgs-8 963 172 -82.14% BenchmarkManyArgs-16 1115 103 -90.76% BenchmarkManyArgs-32 1133 90 -92.03% LGTM=rsc R=golang-codereviews, bradfitz, minux.ma, gobot, rsc CC=golang-codereviews https://golang.org/cl/46010043
2014-01-24 11:29:53 -07:00
}
func BenchmarkPoolOverflow(b *testing.B) {
var p Pool
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
for b := 0; b < 100; b++ {
p.Put(1)
}
for b := 0; b < 100; b++ {
p.Get()
}
}
})
}
// Simulate object starvation in order to force Ps to steal objects
// from other Ps.
func BenchmarkPoolStarvation(b *testing.B) {
var p Pool
count := 100
// Reduce number of putted objects by 33 %. It creates objects starvation
// that force P-local storage to steal objects from other Ps.
countStarved := count - int(float32(count)*0.33)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
for b := 0; b < countStarved; b++ {
p.Put(1)
}
for b := 0; b < count; b++ {
p.Get()
}
}
})
}
var globalSink any
func BenchmarkPoolSTW(b *testing.B) {
// Take control of GC.
defer debug.SetGCPercent(debug.SetGCPercent(-1))
var mstats runtime.MemStats
var pauses []uint64
var p Pool
for i := 0; i < b.N; i++ {
// Put a large number of items into a pool.
const N = 100000
var item any = 42
for i := 0; i < N; i++ {
p.Put(item)
}
// Do a GC.
runtime.GC()
// Record pause time.
runtime.ReadMemStats(&mstats)
pauses = append(pauses, mstats.PauseNs[(mstats.NumGC+255)%256])
}
// Get pause time stats.
sort.Slice(pauses, func(i, j int) bool { return pauses[i] < pauses[j] })
var total uint64
for _, ns := range pauses {
total += ns
}
// ns/op for this benchmark is average STW time.
b.ReportMetric(float64(total)/float64(b.N), "ns/op")
b.ReportMetric(float64(pauses[len(pauses)*95/100]), "p95-ns/STW")
b.ReportMetric(float64(pauses[len(pauses)*50/100]), "p50-ns/STW")
}
func BenchmarkPoolExpensiveNew(b *testing.B) {
// Populate a pool with items that are expensive to construct
// to stress pool cleanup and subsequent reconstruction.
// Create a ballast so the GC has a non-zero heap size and
// runs at reasonable times.
globalSink = make([]byte, 8<<20)
defer func() { globalSink = nil }()
// Create a pool that's "expensive" to fill.
var p Pool
var nNew uint64
p.New = func() any {
atomic.AddUint64(&nNew, 1)
time.Sleep(time.Millisecond)
return 42
}
var mstats1, mstats2 runtime.MemStats
runtime.ReadMemStats(&mstats1)
b.RunParallel(func(pb *testing.PB) {
// Simulate 100X the number of goroutines having items
// checked out from the Pool simultaneously.
items := make([]any, 100)
var sink []byte
for pb.Next() {
// Stress the pool.
for i := range items {
items[i] = p.Get()
// Simulate doing some work with this
// item checked out.
sink = make([]byte, 32<<10)
}
for i, v := range items {
p.Put(v)
items[i] = nil
}
}
_ = sink
})
runtime.ReadMemStats(&mstats2)
b.ReportMetric(float64(mstats2.NumGC-mstats1.NumGC)/float64(b.N), "GCs/op")
b.ReportMetric(float64(nNew)/float64(b.N), "New/op")
}