From cec01395c5df103f8e359027fd80c8070ce41506 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Wed, 25 Sep 2019 15:55:29 +0000 Subject: [PATCH] runtime: add packed bitmap summaries This change adds the concept of summaries and of summarizing a set of pallocBits, a core concept in the new page allocator. These summaries are really just three integers packed into a uint64. This change also adds tests and a benchmark for generating these summaries. Updates #35112. Change-Id: I69686316086c820c792b7a54235859c2105e5fee Reviewed-on: https://go-review.googlesource.com/c/go/+/190621 Run-TryBot: Michael Knyszek TryBot-Result: Gobot Gobot Reviewed-by: Austin Clements Reviewed-by: Cherry Zhang --- src/runtime/export_test.go | 35 +++++++++ src/runtime/mpagealloc.go | 58 +++++++++++++++ src/runtime/mpallocbits.go | 74 +++++++++++++++++++ src/runtime/mpallocbits_test.go | 126 ++++++++++++++++++++++++++++++++ 4 files changed, 293 insertions(+) diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 27692791107..c00180c9fc9 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -735,6 +735,14 @@ const ( PallocChunkPages = pallocChunkPages ) +// Expose pallocSum for testing. +type PallocSum pallocSum + +func PackPallocSum(start, max, end uint) PallocSum { return PallocSum(packPallocSum(start, max, end)) } +func (m PallocSum) Start() uint { return pallocSum(m).start() } +func (m PallocSum) Max() uint { return pallocSum(m).max() } +func (m PallocSum) End() uint { return pallocSum(m).end() } + // Expose pallocBits for testing. type PallocBits pallocBits @@ -743,6 +751,33 @@ func (b *PallocBits) Find(npages uintptr, searchIdx uint) (uint, uint) { } func (b *PallocBits) AllocRange(i, n uint) { (*pallocBits)(b).allocRange(i, n) } func (b *PallocBits) Free(i, n uint) { (*pallocBits)(b).free(i, n) } +func (b *PallocBits) Summarize() PallocSum { return PallocSum((*pallocBits)(b).summarize()) } + +// SummarizeSlow is a slow but more obviously correct implementation +// of (*pallocBits).summarize. Used for testing. +func SummarizeSlow(b *PallocBits) PallocSum { + var start, max, end uint + + const N = uint(len(b)) * 64 + for start < N && (*pageBits)(b).get(start) == 0 { + start++ + } + for end < N && (*pageBits)(b).get(N-end-1) == 0 { + end++ + } + run := uint(0) + for i := uint(0); i < N; i++ { + if (*pageBits)(b).get(i) == 0 { + run++ + } else { + run = 0 + } + if run > max { + max = run + } + } + return PackPallocSum(start, max, end) +} // Expose non-trivial helpers for testing. func FindBitRange64(c uint64, n uint) uint { return findBitRange64(c, n) } diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go index 1818c7a3532..0f4ded05f02 100644 --- a/src/runtime/mpagealloc.go +++ b/src/runtime/mpagealloc.go @@ -70,3 +70,61 @@ const ( summaryLevelBits = 3 summaryL0Bits = heapAddrBits - logPallocChunkBytes - (summaryLevels-1)*summaryLevelBits ) + +const ( + // maxPackedValue is the maximum value that any of the three fields in + // the pallocSum may take on. + maxPackedValue = 1 << logMaxPackedValue + logMaxPackedValue = logPallocChunkPages + (summaryLevels-1)*summaryLevelBits +) + +// pallocSum is a packed summary type which packs three numbers: start, max, +// and end into a single 8-byte value. Each of these values are a summary of +// a bitmap and are thus counts, each of which may have a maximum value of +// 2^21 - 1, or all three may be equal to 2^21. The latter case is represented +// by just setting the 64th bit. +type pallocSum uint64 + +// packPallocSum takes a start, max, and end value and produces a pallocSum. +func packPallocSum(start, max, end uint) pallocSum { + if max == maxPackedValue { + return pallocSum(uint64(1 << 63)) + } + return pallocSum((uint64(start) & (maxPackedValue - 1)) | + ((uint64(max) & (maxPackedValue - 1)) << logMaxPackedValue) | + ((uint64(end) & (maxPackedValue - 1)) << (2 * logMaxPackedValue))) +} + +// start extracts the start value from a packed sum. +func (p pallocSum) start() uint { + if uint64(p)&uint64(1<<63) != 0 { + return maxPackedValue + } + return uint(uint64(p) & (maxPackedValue - 1)) +} + +// max extracts the max value from a packed sum. +func (p pallocSum) max() uint { + if uint64(p)&uint64(1<<63) != 0 { + return maxPackedValue + } + return uint((uint64(p) >> logMaxPackedValue) & (maxPackedValue - 1)) +} + +// end extracts the end value from a packed sum. +func (p pallocSum) end() uint { + if uint64(p)&uint64(1<<63) != 0 { + return maxPackedValue + } + return uint((uint64(p) >> (2 * logMaxPackedValue)) & (maxPackedValue - 1)) +} + +// unpack unpacks all three values from the summary. +func (p pallocSum) unpack() (uint, uint, uint) { + if uint64(p)&uint64(1<<63) != 0 { + return maxPackedValue, maxPackedValue, maxPackedValue + } + return uint(uint64(p) & (maxPackedValue - 1)), + uint((uint64(p) >> logMaxPackedValue) & (maxPackedValue - 1)), + uint((uint64(p) >> (2 * logMaxPackedValue)) & (maxPackedValue - 1)) +} diff --git a/src/runtime/mpallocbits.go b/src/runtime/mpallocbits.go index fe8cde92256..117a59bb3d9 100644 --- a/src/runtime/mpallocbits.go +++ b/src/runtime/mpallocbits.go @@ -95,6 +95,80 @@ func (b *pageBits) clearAll() { // sake of documentation, 0s are free pages and 1s are allocated pages. type pallocBits pageBits +// consec8tab is a table containing the number of consecutive +// zero bits for any uint8 value. +// +// The table is generated by calling consec8(i) for each +// possible uint8 value, which is defined as: +// +// // consec8 counts the maximum number of consecutive 0 bits +// // in a uint8. +// func consec8(n uint8) int { +// n = ^n +// i := 0 +// for n != 0 { +// n &= (n << 1) +// i++ +// } +// return i +// } +var consec8tab = [256]uint{ + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, + 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, + 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, + 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, + 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, + 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, + 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, + 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, + 6, 5, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 1, + 5, 4, 3, 3, 2, 2, 2, 2, 3, 2, 1, 1, 2, 1, 1, 1, + 4, 3, 2, 2, 2, 1, 1, 1, 3, 2, 1, 1, 2, 1, 1, 0, +} + +// summarize returns a packed summary of the bitmap in mallocBits. +func (b *pallocBits) summarize() pallocSum { + // TODO(mknyszek): There may be something more clever to be done + // here to make the summarize operation more efficient. For example, + // we can compute start and end with 64-bit wide operations easily, + // but max is a bit more complex. Perhaps there exists some way to + // leverage the 64-bit start and end to our advantage? + var start, max, end uint + for i := 0; i < len(b); i++ { + a := b[i] + for j := 0; j < 64; j += 8 { + k := uint8(a >> j) + + // Compute start. + si := uint(bits.TrailingZeros8(k)) + if start == uint(i*64+j) { + start += si + } + + // Compute max. + if end+si > max { + max = end + si + } + if mi := consec8tab[k]; mi > max { + max = mi + } + + // Compute end. + if k == 0 { + end += 8 + } else { + end = uint(bits.LeadingZeros8(k)) + } + } + } + return packPallocSum(start, max, end) +} + // find searches for npages contiguous free pages in pallocBits and returns // the index where that run starts, as well as the index of the first free page // it found in the search. searchIdx represents the first known free page and diff --git a/src/runtime/mpallocbits_test.go b/src/runtime/mpallocbits_test.go index 2ac7899c36b..668ec12b058 100644 --- a/src/runtime/mpallocbits_test.go +++ b/src/runtime/mpallocbits_test.go @@ -5,6 +5,8 @@ package runtime_test import ( + "fmt" + "math/rand" . "runtime" "testing" ) @@ -95,6 +97,130 @@ func invertPallocBits(b *PallocBits) { } } +// Ensures two packed summaries are identical, and reports a detailed description +// of the difference if they're not. +func checkPallocSum(t *testing.T, got, want PallocSum) { + if got.Start() != want.Start() { + t.Errorf("inconsistent start: got %d, want %d", got.Start(), want.Start()) + } + if got.Max() != want.Max() { + t.Errorf("inconsistent max: got %d, want %d", got.Max(), want.Max()) + } + if got.End() != want.End() { + t.Errorf("inconsistent end: got %d, want %d", got.End(), want.End()) + } +} + +// Ensures computing bit summaries works as expected by generating random +// bitmaps and checking against a reference implementation. +func TestPallocBitsSummarizeRandom(t *testing.T) { + b := new(PallocBits) + for i := 0; i < 1000; i++ { + // Randomize bitmap. + for i := range b { + b[i] = rand.Uint64() + } + // Check summary against reference implementation. + checkPallocSum(t, b.Summarize(), SummarizeSlow(b)) + } +} + +// Ensures computing bit summaries works as expected. +func TestPallocBitsSummarize(t *testing.T) { + var emptySum = PackPallocSum(PallocChunkPages, PallocChunkPages, PallocChunkPages) + type test struct { + free []BitRange // Ranges of free (zero) bits. + hits []PallocSum + } + tests := make(map[string]test) + tests["NoneFree"] = test{ + free: []BitRange{}, + hits: []PallocSum{ + PackPallocSum(0, 0, 0), + }, + } + tests["OnlyStart"] = test{ + free: []BitRange{{0, 10}}, + hits: []PallocSum{ + PackPallocSum(10, 10, 0), + }, + } + tests["OnlyEnd"] = test{ + free: []BitRange{{PallocChunkPages - 40, 40}}, + hits: []PallocSum{ + PackPallocSum(0, 40, 40), + }, + } + tests["StartAndEnd"] = test{ + free: []BitRange{{0, 11}, {PallocChunkPages - 23, 23}}, + hits: []PallocSum{ + PackPallocSum(11, 23, 23), + }, + } + tests["StartMaxEnd"] = test{ + free: []BitRange{{0, 4}, {50, 100}, {PallocChunkPages - 4, 4}}, + hits: []PallocSum{ + PackPallocSum(4, 100, 4), + }, + } + tests["OnlyMax"] = test{ + free: []BitRange{{1, 20}, {35, 241}, {PallocChunkPages - 50, 30}}, + hits: []PallocSum{ + PackPallocSum(0, 241, 0), + }, + } + tests["MultiMax"] = test{ + free: []BitRange{{35, 2}, {40, 5}, {100, 5}}, + hits: []PallocSum{ + PackPallocSum(0, 5, 0), + }, + } + tests["One"] = test{ + free: []BitRange{{2, 1}}, + hits: []PallocSum{ + PackPallocSum(0, 1, 0), + }, + } + tests["AllFree"] = test{ + free: []BitRange{{0, PallocChunkPages}}, + hits: []PallocSum{ + emptySum, + }, + } + for name, v := range tests { + v := v + t.Run(name, func(t *testing.T) { + b := makePallocBits(v.free) + // In the PallocBits we create 1's represent free spots, but in our actual + // PallocBits 1 means not free, so invert. + invertPallocBits(b) + for _, h := range v.hits { + checkPallocSum(t, b.Summarize(), h) + } + }) + } +} + +// Benchmarks how quickly we can summarize a PallocBits. +func BenchmarkPallocBitsSummarize(b *testing.B) { + buf0 := new(PallocBits) + buf1 := new(PallocBits) + for i := 0; i < len(buf1); i++ { + buf1[i] = ^uint64(0) + } + bufa := new(PallocBits) + for i := 0; i < len(bufa); i++ { + bufa[i] = 0xaa + } + for _, buf := range []*PallocBits{buf0, buf1, bufa} { + b.Run(fmt.Sprintf("Unpacked%02X", buf[0]), func(b *testing.B) { + for i := 0; i < b.N; i++ { + buf.Summarize() + } + }) + } +} + // Ensures page allocation works. func TestPallocBitsAlloc(t *testing.T) { tests := map[string]struct {