runtime: shift timeHistogram buckets and allow negative durations

Today, timeHistogram, when copied, has the wrong set of counts for the bucket that should represent (-inf, 0), when in fact it contains [0, 1). In essence, the buckets are all shifted over by one from where they're supposed to be. But this also means that the existence of the overflow bucket is wrong: the top bucket is supposed to extend to infinity, and what we're really missing is an underflow bucket to represent the range (-inf, 0). We could just always zero this bucket and continue ignoring negative durations, but that likely isn't prudent. timeHistogram is intended to be used with differences in nanotime, but depending on how a platform is implemented (or due to a bug in that platform) it's possible to get a negative duration without having done anything wrong. We should just be resilient to that and be able to detect it. So this change removes the overflow bucket and replaces it with an underflow bucket, and timeHistogram no longer panics when faced with a negative duration. Fixes #43328. Fixes #43329. Change-Id: If336425d7d080fd37bf071e18746800e22d38108 Reviewed-on: https://go-review.googlesource.com/c/go/+/279468 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Trust: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
2024-11-22 08:54:39 -07:00 · 2020-12-22 17:47:43 +00:00 · 2020-12-22 17:47:43 +00:00 · b116404444
commit b116404444
parent 8db7e2fecd
4 changed files with 36 additions and 24 deletions
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@ -1201,12 +1201,12 @@ type TimeHistogram timeHistogram

 // Counts returns the counts for the given bucket, subBucket indices.
 // Returns true if the bucket was valid, otherwise returns the counts
-// for the overflow bucket and false.
+// for the underflow bucket and false.
 func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) {
 	t := (*timeHistogram)(th)
 	i := bucket*TimeHistNumSubBuckets + subBucket
 	if i >= uint(len(t.counts)) {
-		return t.overflow, false
+		return t.underflow, false
 	}
 	return t.counts[i], true
 }
--- a/src/runtime/histogram.go
+++ b/src/runtime/histogram.go
@ -69,17 +69,15 @@ const (
 // for concurrent use. It is also safe to read all the values
 // atomically.
 type timeHistogram struct {
-	counts   [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64
-	overflow uint64
+	counts    [timeHistNumSuperBuckets * timeHistNumSubBuckets]uint64
+	underflow uint64
 }

 // record adds the given duration to the distribution.
-//
-// Although the duration is an int64 to facilitate ease-of-use
-// with e.g. nanotime, the duration must be non-negative.
 func (h *timeHistogram) record(duration int64) {
 	if duration < 0 {
-		throw("timeHistogram encountered negative duration")
+		atomic.Xadd64(&h.underflow, 1)
+		return
 	}
 	// The index of the exponential bucket is just the index
 	// of the highest set bit adjusted for how many bits we
@ -92,15 +90,17 @@ func (h *timeHistogram) record(duration int64) {
 		superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits
 		if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) {
 			// The bucket index we got is larger than what we support, so
-			// add into the special overflow bucket.
-			atomic.Xadd64(&h.overflow, 1)
-			return
+			// include this count in the highest bucket, which extends to
+			// infinity.
+			superBucket = timeHistNumSuperBuckets - 1
+			subBucket = timeHistNumSubBuckets - 1
+		} else {
+			// The linear subbucket index is just the timeHistSubBucketsBits
+			// bits after the top bit. To extract that value, shift down
+			// the duration such that we leave the top bit and the next bits
+			// intact, then extract the index.
+			subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
 		}
-		// The linear subbucket index is just the timeHistSubBucketsBits
-		// bits after the top bit. To extract that value, shift down
-		// the duration such that we leave the top bit and the next bits
-		// intact, then extract the index.
-		subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
 	} else {
 		subBucket = uint(duration)
 	}
@ -128,7 +128,7 @@ func timeHistogramMetricsBuckets() []float64 {
 		// index to combine it with the bucketMin.
 		subBucketShift := uint(0)
 		if i > 1 {
-			// The first two buckets are exact with respect to integers,
+			// The first two super buckets are exact with respect to integers,
 			// so we'll never have to shift the sub-bucket index. Thereafter,
 			// we shift up by 1 with each subsequent bucket.
 			subBucketShift = uint(i - 2)
--- a/src/runtime/histogram_test.go
+++ b/src/runtime/histogram_test.go
@ -5,6 +5,7 @@
 package runtime_test

 import (
+	"math"
 	. "runtime"
 	"testing"
 )
@ -32,8 +33,8 @@ func TestTimeHistogram(t *testing.T) {
 			h.Record(base + v)
 		}
 	}
-	// Hit the overflow bucket.
-	h.Record(int64(^uint64(0) >> 1))
+	// Hit the underflow bucket.
+	h.Record(int64(-1))

 	// Check to make sure there's exactly one count in each
 	// bucket.
@ -41,7 +42,7 @@ func TestTimeHistogram(t *testing.T) {
 		for j := uint(0); j < TimeHistNumSubBuckets; j++ {
 			c, ok := h.Count(i, j)
 			if !ok {
-				t.Errorf("hit overflow bucket unexpectedly: (%d, %d)", i, j)
+				t.Errorf("hit underflow bucket unexpectedly: (%d, %d)", i, j)
 			} else if c != 1 {
 				t.Errorf("bucket (%d, %d) has count that is not 1: %d", i, j, c)
 			}
@ -49,10 +50,21 @@ func TestTimeHistogram(t *testing.T) {
 	}
 	c, ok := h.Count(TimeHistNumSuperBuckets, 0)
 	if ok {
-		t.Errorf("expected to hit overflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0)
+		t.Errorf("expected to hit underflow bucket: (%d, %d)", TimeHistNumSuperBuckets, 0)
 	}
 	if c != 1 {
-		t.Errorf("overflow bucket has count that is not 1: %d", c)
+		t.Errorf("underflow bucket has count that is not 1: %d", c)
 	}
+
+	// Check overflow behavior.
+	// By hitting a high value, we should just be adding into the highest bucket.
+	h.Record(math.MaxInt64)
+	c, ok = h.Count(TimeHistNumSuperBuckets-1, TimeHistNumSubBuckets-1)
+	if !ok {
+		t.Error("hit underflow bucket in highest bucket unexpectedly")
+	} else if c != 2 {
+		t.Errorf("highest has count that is not 2: %d", c)
+	}
+
 	dummyTimeHistogram = TimeHistogram{}
 }
--- a/src/runtime/metrics.go
+++ b/src/runtime/metrics.go
@ -116,9 +116,9 @@ func initMetrics() {
 		"/gc/pauses:seconds": {
 			compute: func(_ *statAggregate, out *metricValue) {
 				hist := out.float64HistOrInit(timeHistBuckets)
-				hist.counts[len(hist.counts)-1] = atomic.Load64(&memstats.gcPauseDist.overflow)
+				hist.counts[0] = atomic.Load64(&memstats.gcPauseDist.underflow)
 				for i := range hist.buckets {
-					hist.counts[i] = atomic.Load64(&memstats.gcPauseDist.counts[i])
+					hist.counts[i+1] = atomic.Load64(&memstats.gcPauseDist.counts[i])
 				}
 			},
 		},