From b116404444addc69b5ec987a2a64b92d4956eab0 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 22 Dec 2020 17:47:43 +0000 Subject: [PATCH] runtime: shift timeHistogram buckets and allow negative durations Today, timeHistogram, when copied, has the wrong set of counts for the bucket that should represent (-inf, 0), when in fact it contains [0, 1). In essence, the buckets are all shifted over by one from where they're supposed to be. But this also means that the existence of the overflow bucket is wrong: the top bucket is supposed to extend to infinity, and what we're really missing is an underflow bucket to represent the range (-inf, 0). We could just always zero this bucket and continue ignoring negative durations, but that likely isn't prudent. timeHistogram is intended to be used with differences in nanotime, but depending on how a platform is implemented (or due to a bug in that platform) it's possible to get a negative duration without having done anything wrong. We should just be resilient to that and be able to detect it. So this change removes the overflow bucket and replaces it with an underflow bucket, and timeHistogram no longer panics when faced with a negative duration. Fixes #43328. Fixes #43329. Change-Id: If336425d7d080fd37bf071e18746800e22d38108 Reviewed-on: https://go-review.googlesource.com/c/go/+/279468 Run-TryBot: Michael Knyszek TryBot-Result: Go Bot Trust: Michael Knyszek Reviewed-by: Michael Pratt --- src/runtime/export_test.go | 4 ++-- src/runtime/histogram.go | 30 +++++++++++++++--------------- src/runtime/histogram_test.go | 22 +++++++++++++++++----- src/runtime/metrics.go | 4 ++-- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 44551dcaf14..22fef3134f5 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1201,12 +1201,12 @@ type TimeHistogram timeHistogram // Counts returns the counts for the given bucket, subBucket indices. // Returns true if the bucket was valid, otherwise returns the counts -// for the overflow bucket and false. +// for the underflow bucket and false. func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) { t := (*timeHistogram)(th) i := bucket*TimeHistNumSubBuckets + subBucket if i >= uint(len(t.counts)) { - return t.overflow, false + return t.underflow, false } return t.counts[i], true } diff --git a/src/runtime/histogram.go b/src/runtime/histogram.go index 4020969eb99..d48e856cd0d 100644 --- a/src/runtime/histogram.go +++ b/src/runtime/histogram.go @@ -69,17 +69,15 @@ const ( // for concurrent use. It is also safe to read all the values // atomically. type timeHistogram struct { - counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64 - overflow uint64 + counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64 + underflow uint64 } // record adds the given duration to the distribution. -// -// Although the duration is an int64 to facilitate ease-of-use -// with e.g. nanotime, the duration must be non-negative. func (h *timeHistogram) record(duration int64) { if duration < 0 { - throw("timeHistogram encountered negative duration") + atomic.Xadd64(&h.underflow, 1) + return } // The index of the exponential bucket is just the index // of the highest set bit adjusted for how many bits we @@ -92,15 +90,17 @@ func (h *timeHistogram) record(duration int64) { superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) { // The bucket index we got is larger than what we support, so - // add into the special overflow bucket. - atomic.Xadd64(&h.overflow, 1) - return + // include this count in the highest bucket, which extends to + // infinity. + superBucket = timeHistNumSuperBuckets - 1 + subBucket = timeHistNumSubBuckets - 1 + } else { + // The linear subbucket index is just the timeHistSubBucketsBits + // bits after the top bit. To extract that value, shift down + // the duration such that we leave the top bit and the next bits + // intact, then extract the index. + subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets) } - // The linear subbucket index is just the timeHistSubBucketsBits - // bits after the top bit. To extract that value, shift down - // the duration such that we leave the top bit and the next bits - // intact, then extract the index. - subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets) } else { subBucket = uint(duration) } @@ -128,7 +128,7 @@ func timeHistogramMetricsBuckets() []float64 { // index to combine it with the bucketMin. subBucketShift := uint(0) if i > 1 { - // The first two buckets are exact with respect to integers, + // The first two super buckets are exact with respect to integers, // so we'll never have to shift the sub-bucket index. Thereafter, // we shift up by 1 with each subsequent bucket. subBucketShift = uint(i - 2) diff --git a/src/runtime/histogram_test.go b/src/runtime/histogram_test.go index 5f5b28f7848..dbc64fa5592 100644 --- a/src/runtime/histogram_test.go +++ b/src/runtime/histogram_test.go @@ -5,6 +5,7 @@ package runtime_test import ( + "math" . "runtime" "testing" ) @@ -32,8 +33,8 @@ func TestTimeHistogram(t *testing.T) { h.Record(base + v) } } - // Hit the overflow bucket. - h.Record(int64(^uint64(0) >> 1)) + // Hit the underflow bucket. + h.Record(int64(-1)) // Check to make sure there's exactly one count in each // bucket. @@ -41,7 +42,7 @@ func TestTimeHistogram(t *testing.T) { for j := uint(0); j < TimeHistNumSubBuckets; j++ { c, ok := h.Count(i, j) if !ok { - t.Errorf("hit overflow bucket unexpectedly: (%d, %d)", i, j) + t.Errorf("hit underflow bucket unexpectedly: (%d, %d)", i, j) } else if c != 1 { t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c) } @@ -49,10 +50,21 @@ func TestTimeHistogram(t *testing.T) { } c, ok := h.Count(TimeHistNumSuperBuckets, 0) if ok { - t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0) + t.Errorf("expected to hit underflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0) } if c != 1 { - t.Errorf("overflow bucket has count that is not 1: %d", c) + t.Errorf("underflow bucket has count that is not 1: %d", c) } + + // Check overflow behavior. + // By hitting a high value, we should just be adding into the highest bucket. + h.Record(math.MaxInt64) + c, ok = h.Count(TimeHistNumSuperBuckets-1, TimeHistNumSubBuckets-1) + if !ok { + t.Error("hit underflow bucket in highest bucket unexpectedly") + } else if c != 2 { + t.Errorf("highest has count that is not 2: %d", c) + } + dummyTimeHistogram = TimeHistogram{} } diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index af86a0f03aa..1d191e62985 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -116,9 +116,9 @@ func initMetrics() { "/gc/pauses:seconds": { compute: func(_ *statAggregate, out *metricValue) { hist := out.float64HistOrInit(timeHistBuckets) - hist.counts[len(hist.counts)-1] = atomic.Load64(&memstats.gcPauseDist.overflow) + hist.counts[0] = atomic.Load64(&memstats.gcPauseDist.underflow) for i := range hist.buckets { - hist.counts[i] = atomic.Load64(&memstats.gcPauseDist.counts[i]) + hist.counts[i+1] = atomic.Load64(&memstats.gcPauseDist.counts[i]) } }, },