From 948b21a3d7419ba42c574bde89c199b522221dc6 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Fri, 10 Feb 2017 11:44:19 -0800 Subject: [PATCH] testing: only call ReadMemStats if necessary when benchmarking When running benchmarks with -cpuprofile, the entire process gets profiled, and ReadMemStats is surprisingly expensive. Running the sort benchmarks right now with -cpuprofile shows almost half of all execution time in ReadMemStats. Since ReadMemStats is not required if the benchmark does not need allocation stats, simply skip it. This will make cpu profiles nicer to read and significantly speed up the process of running benchmarks. It might also make sense to toggle cpu profiling on/off as we begin/end individual benchmarks, but that wouldn't get us the time savings of skipping ReadMemStats, so this CL is useful in itself. Change-Id: I425197b1ee11be4bc91d22b929e2caf648ebd7c5 Reviewed-on: https://go-review.googlesource.com/36791 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- src/testing/benchmark.go | 29 ++++++++++++++++++++--------- src/testing/sub_test.go | 10 ++++++++-- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/src/testing/benchmark.go b/src/testing/benchmark.go index 8d3f63d232..cac6e9fb41 100644 --- a/src/testing/benchmark.go +++ b/src/testing/benchmark.go @@ -73,9 +73,11 @@ type B struct { // a call to StopTimer. func (b *B) StartTimer() { if !b.timerOn { - runtime.ReadMemStats(&memStats) - b.startAllocs = memStats.Mallocs - b.startBytes = memStats.TotalAlloc + if *benchmarkMemory || b.showAllocResult { + runtime.ReadMemStats(&memStats) + b.startAllocs = memStats.Mallocs + b.startBytes = memStats.TotalAlloc + } b.start = time.Now() b.timerOn = true } @@ -87,9 +89,11 @@ func (b *B) StartTimer() { func (b *B) StopTimer() { if b.timerOn { b.duration += time.Now().Sub(b.start) - runtime.ReadMemStats(&memStats) - b.netAllocs += memStats.Mallocs - b.startAllocs - b.netBytes += memStats.TotalAlloc - b.startBytes + if *benchmarkMemory || b.showAllocResult { + runtime.ReadMemStats(&memStats) + b.netAllocs += memStats.Mallocs - b.startAllocs + b.netBytes += memStats.TotalAlloc - b.startBytes + } b.timerOn = false } } @@ -98,9 +102,11 @@ func (b *B) StopTimer() { // It does not affect whether the timer is running. func (b *B) ResetTimer() { if b.timerOn { - runtime.ReadMemStats(&memStats) - b.startAllocs = memStats.Mallocs - b.startBytes = memStats.TotalAlloc + if *benchmarkMemory || b.showAllocResult { + runtime.ReadMemStats(&memStats) + b.startAllocs = memStats.Mallocs + b.startBytes = memStats.TotalAlloc + } b.start = time.Now() } b.duration = 0 @@ -294,6 +300,8 @@ func (b *B) launch() { } // The results of a benchmark run. +// MemAllocs and MemBytes may be zero if memory benchmarking is not requested +// using B.ReportAllocs or the -benchmem command line flag. type BenchmarkResult struct { N int // The number of iterations. T time.Duration // The total time taken. @@ -316,6 +324,7 @@ func (r BenchmarkResult) mbPerSec() float64 { return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() } +// AllocsPerOp returns r.MemAllocs / r.N. func (r BenchmarkResult) AllocsPerOp() int64 { if r.N <= 0 { return 0 @@ -323,6 +332,7 @@ func (r BenchmarkResult) AllocsPerOp() int64 { return int64(r.MemAllocs) / int64(r.N) } +// AllocsPerOp returns r.MemBytes / r.N. func (r BenchmarkResult) AllocedBytesPerOp() int64 { if r.N <= 0 { return 0 @@ -350,6 +360,7 @@ func (r BenchmarkResult) String() string { return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) } +// MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'. func (r BenchmarkResult) MemString() string { return fmt.Sprintf("%8d B/op\t%8d allocs/op", r.AllocedBytesPerOp(), r.AllocsPerOp()) diff --git a/src/testing/sub_test.go b/src/testing/sub_test.go index 1d1092c979..c12a2c807a 100644 --- a/src/testing/sub_test.go +++ b/src/testing/sub_test.go @@ -457,8 +457,14 @@ func TestBRun(t *T) { _ = append([]byte(nil), buf[:]...) } } - b.Run("", func(b *B) { alloc(b) }) - b.Run("", func(b *B) { alloc(b) }) + b.Run("", func(b *B) { + alloc(b) + b.ReportAllocs() + }) + b.Run("", func(b *B) { + alloc(b) + b.ReportAllocs() + }) // runtime.MemStats sometimes reports more allocations than the // benchmark is responsible for. Luckily the point of this test is // to ensure that the results are not underreported, so we can