runtime: add ReadMemStats latency benchmark

This change adds a benchmark to the runtime which measures ReadMemStats latencies. It generates allocations with lots of pointers to keep the GC busy while hitting ReadMemStats and measuring the time it takes to complete. Updates #19812. Change-Id: I7a76aaf497ba5324d3c7a7b3df32461b3e6c3ac8 Reviewed-on: https://go-review.googlesource.com/c/go/+/220177 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
2024-11-23 12:00:14 -07:00 · 2020-02-19 20:54:25 +00:00 · 2020-02-19 20:54:25 +00:00 · 7cfb814b0a
commit 7cfb814b0a
parent 79b43fa819
1 changed files with 85 additions and 0 deletions
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@ -10,6 +10,7 @@ import (
 	"reflect"
 	"runtime"
 	"runtime/debug"
+	"sort"
 	"sync"
 	"sync/atomic"
 	"testing"
@ -506,6 +507,90 @@ func BenchmarkReadMemStats(b *testing.B) {
 	hugeSink = nil
 }

+func BenchmarkReadMemStatsLatency(b *testing.B) {
+	// We’ll apply load to the runtime with maxProcs-1 goroutines
+	// and use one more to actually benchmark. It doesn't make sense
+	// to try to run this test with only 1 P (that's what
+	// BenchmarkReadMemStats is for).
+	maxProcs := runtime.GOMAXPROCS(-1)
+	if maxProcs == 1 {
+		b.Skip("This benchmark can only be run with GOMAXPROCS > 1")
+	}
+
+	// Code to build a big tree with lots of pointers.
+	type node struct {
+		children [16]*node
+	}
+	var buildTree func(depth int) *node
+	buildTree = func(depth int) *node {
+		tree := new(node)
+		if depth != 0 {
+			for i := range tree.children {
+				tree.children[i] = buildTree(depth - 1)
+			}
+		}
+		return tree
+	}
+
+	// Keep the GC busy by continuously generating large trees.
+	done := make(chan struct{})
+	var wg sync.WaitGroup
+	for i := 0; i < maxProcs-1; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			var hold *node
+		loop:
+			for {
+				hold = buildTree(5)
+				select {
+				case <-done:
+					break loop
+				default:
+				}
+			}
+			runtime.KeepAlive(hold)
+		}()
+	}
+
+	// Spend this much time measuring latencies.
+	latencies := make([]time.Duration, 0, 1024)
+
+	// Run for timeToBench hitting ReadMemStats continuously
+	// and measuring the latency.
+	b.ResetTimer()
+	var ms runtime.MemStats
+	for i := 0; i < b.N; i++ {
+		// Sleep for a bit, otherwise we're just going to keep
+		// stopping the world and no one will get to do anything.
+		time.Sleep(100 * time.Millisecond)
+		start := time.Now()
+		runtime.ReadMemStats(&ms)
+		latencies = append(latencies, time.Now().Sub(start))
+	}
+	close(done)
+	// Make sure to stop the timer before we wait! The goroutines above
+	// are very heavy-weight and not easy to stop, so we could end up
+	// confusing the benchmarking framework for small b.N.
+	b.StopTimer()
+	wg.Wait()
+
+	// Disable the default */op metrics.
+	// ns/op doesn't mean anything because it's an average, but we
+	// have a sleep in our b.N loop above which skews this significantly.
+	b.ReportMetric(0, "ns/op")
+	b.ReportMetric(0, "B/op")
+	b.ReportMetric(0, "allocs/op")
+
+	// Sort latencies then report percentiles.
+	sort.Slice(latencies, func(i, j int) bool {
+		return latencies[i] < latencies[j]
+	})
+	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
+	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
+	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
+}
+
 func TestUserForcedGC(t *testing.T) {
 	// Test that runtime.GC() triggers a GC even if GOGC=off.
 	defer debug.SetGCPercent(debug.SetGCPercent(-1))