From 29e9c4d4a4064fcd5edcb47d4782bd96082a068e Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 23 Jun 2016 14:25:50 -0600 Subject: [PATCH] runtime: lay out heap bitmap forward in memory Currently the heap bitamp is laid in reverse order in memory relative to the heap itself. This was originally done out of "excessive cleverness" so that computing a bitmap pointer could load only the arena_start field and so that heaps could be more contiguous by growing the arena and the bitmap out from a common center point. However, this appears to have no actual performance benefit, it complicates nearly every use of the bitmap, and it makes already confusing code more confusing. Furthermore, it's still possible to use a single field (the new bitmap_delta) for the bitmap pointer computation by employing slightly different excessive cleverness. Hence, this CL puts the bitmap into forward order. This is a (very) updated version of CL 9404. Change-Id: I743587cc626c4ecd81e660658bad85b54584108c Reviewed-on: https://go-review.googlesource.com/85881 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Rick Hudson --- src/runtime/malloc.go | 15 +++++++- src/runtime/mbitmap.go | 87 ++++++++++++++++++++---------------------- src/runtime/mheap.go | 5 ++- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 72b8f40b96..4122b7ba23 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -369,7 +369,7 @@ func mallocinit() { spansStart := p1 p1 += spansSize - mheap_.bitmap = p1 + bitmapSize + mheap_.bitmap_start = p1 p1 += bitmapSize if sys.PtrSize == 4 { // Set arena_start such that we can accept memory @@ -383,6 +383,19 @@ func mallocinit() { mheap_.arena_alloc = p1 mheap_.arena_reserved = reserved + // Pre-compute the value heapBitsForAddr can use to directly + // map a heap address to a bitmap address. The obvious + // computation is: + // + // bitp = bitmap_start + (addr - arena_start)/ptrSize/4 + // + // We can shuffle this to + // + // bitp = (bitmap_start - arena_start/ptrSize/4) + addr/ptrSize/4 + // + // bitmap_delta is the value of the first term. + mheap_.bitmap_delta = mheap_.bitmap_start - mheap_.arena_start/heapBitmapScale + if mheap_.arena_start&(_PageSize-1) != 0 { println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(bitmapSize), hex(_PageSize), "start", hex(mheap_.arena_start)) throw("misrounded allocation in mallocinit") diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 35c81e4bd9..8e414ecaf3 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -16,10 +16,11 @@ // The allocated heap comes from a subset of the memory in the range [start, used), // where start == mheap_.arena_start and used == mheap_.arena_used. // The heap bitmap comprises 2 bits for each pointer-sized word in that range, -// stored in bytes indexed backward in memory from start. -// That is, the byte at address start-1 holds the 2-bit entries for the four words -// start through start+3*ptrSize, the byte at start-2 holds the entries for -// start+4*ptrSize through start+7*ptrSize, and so on. +// stored in bytes indexed forward in memory from bitmap_start. +// That is, the byte at address bitmap holds the 2-bit entries for the +// four words start through start+3*ptrSize, the byte at +// bitmap_start+1 holds the entries for start+4*ptrSize through +// start+7*ptrSize, and so on. // // In each 2-bit entry, the lower bit holds the same information as in the 1-bit // bitmaps: 0 means uninteresting and 1 means live pointer to be visited during GC. @@ -104,8 +105,6 @@ func addb(p *byte, n uintptr) *byte { } // subtractb returns the byte pointer p-n. -// subtractb is typically used when traversing the pointer tables referred to by hbits -// which are arranged in reverse order. //go:nowritebarrier //go:nosplit func subtractb(p *byte, n uintptr) *byte { @@ -126,8 +125,6 @@ func add1(p *byte) *byte { } // subtract1 returns the byte pointer p-1. -// subtract1 is typically used when traversing the pointer tables referred to by hbits -// which are arranged in reverse order. //go:nowritebarrier // // nosplit because it is used during write barriers and must not be preempted. @@ -157,7 +154,7 @@ func (h *mheap) mapBits(arena_used uintptr) { return } - sysMap(unsafe.Pointer(h.bitmap-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) + sysMap(unsafe.Pointer(h.bitmap_start+h.bitmap_mapped), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys) h.bitmap_mapped = n } @@ -356,9 +353,9 @@ func (m *markBits) advance() { // nosplit because it is used during write barriers and must not be preempted. //go:nosplit func heapBitsForAddr(addr uintptr) heapBits { - // 2 bits per work, 4 pairs per byte, and a mask is hard coded. - off := (addr - mheap_.arena_start) / sys.PtrSize - return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap - off/4 - 1)), uint32(off & 3)} + // 2 bits per word, 4 pairs per byte, and a mask is hard coded. + off := addr / sys.PtrSize + return heapBits{(*uint8)(unsafe.Pointer(mheap_.bitmap_delta + off/4)), uint32(off & 3)} } // heapBitsForSpan returns the heapBits for the span base address base. @@ -450,7 +447,7 @@ func (h heapBits) next() heapBits { if h.shift < 3*heapBitsShift { return heapBits{h.bitp, h.shift + heapBitsShift} } - return heapBits{subtract1(h.bitp), 0} + return heapBits{add1(h.bitp), 0} } // forward returns the heapBits describing n pointer-sized words ahead of h in memory. @@ -460,7 +457,7 @@ func (h heapBits) next() heapBits { // bits returns the heap bits for the current word. func (h heapBits) forward(n uintptr) heapBits { n += uintptr(h.shift) / heapBitsShift - return heapBits{subtractb(h.bitp, n/4), uint32(n%4) * heapBitsShift} + return heapBits{addb(h.bitp, n/4), uint32(n%4) * heapBitsShift} } // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer. @@ -723,20 +720,20 @@ func (h heapBits) initSpan(s *mspan) { if total%heapBitmapScale != 0 { throw("initSpan: unaligned length") } + if h.shift != 0 { + throw("initSpan: unaligned base") + } nbyte := total / heapBitmapScale if sys.PtrSize == 8 && size == sys.PtrSize { - end := h.bitp - bitp := subtractb(end, nbyte-1) - for { + bitp := h.bitp + end := addb(bitp, nbyte) + for bitp != end { *bitp = bitPointerAll | bitScanAll - if bitp == end { - break - } bitp = add1(bitp) } return } - memclrNoHeapPointers(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) + memclrNoHeapPointers(unsafe.Pointer(h.bitp), nbyte) } // initCheckmarkSpan initializes a span for being checkmarked. @@ -751,7 +748,7 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) { bitp := h.bitp for i := uintptr(0); i < n; i += 4 { *bitp &^= bitPointerAll - bitp = subtract1(bitp) + bitp = add1(bitp) } return } @@ -775,7 +772,7 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) { bitp := h.bitp for i := uintptr(0); i < n; i += 4 { *bitp |= bitPointerAll - bitp = subtract1(bitp) + bitp = add1(bitp) } } } @@ -1130,7 +1127,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { goto Phase3 } *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) b >>= 4 nb -= 4 @@ -1151,7 +1148,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { // the checkmark. *hbitp &^= uint8((bitPointer | bitScan | (bitPointer << heapBitsShift)) << (2 * heapBitsShift)) *hbitp |= uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) if w += 2; w >= nw { // We know that there is more data, because we handled 2-word objects above. // This must be at least a 6-word object. If we're out of pointer words, @@ -1181,7 +1178,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { break } *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) b >>= 4 // Load more bits. b has nb right now. @@ -1229,7 +1226,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) { break } *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) b >>= 4 } @@ -1250,11 +1247,11 @@ Phase3: // The first is hb, the rest are zero. if w <= nw { *hbitp = uint8(hb) - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) hb = 0 // for possible final half-byte below for w += 4; w <= nw; w += 4 { *hbitp = 0 - hbitp = subtract1(hbitp) + hbitp = add1(hbitp) } } @@ -1420,9 +1417,9 @@ func heapBitsSetTypeGCProg(h heapBits, progSize, elemSize, dataSize, allocSize u // so that scanobject can stop early in the final element. totalBits = (elemSize*(count-1) + progSize) / sys.PtrSize } - endProg := unsafe.Pointer(subtractb(h.bitp, (totalBits+3)/4)) - endAlloc := unsafe.Pointer(subtractb(h.bitp, allocSize/heapBitmapScale)) - memclrNoHeapPointers(add(endAlloc, 1), uintptr(endProg)-uintptr(endAlloc)) + endProg := unsafe.Pointer(addb(h.bitp, (totalBits+3)/4)) + endAlloc := unsafe.Pointer(addb(h.bitp, allocSize/heapBitmapScale)) + memclrNoHeapPointers(endProg, uintptr(endAlloc)-uintptr(endProg)) } // progToPointerMask returns the 1-bit pointer mask output by the GC program prog. @@ -1481,11 +1478,11 @@ Run: } else { v := bits&bitPointerAll | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 v = bits&bitPointerAll | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } } @@ -1519,11 +1516,11 @@ Run: } else { v := bits&0xf | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 v = bits&0xf | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } } @@ -1583,11 +1580,11 @@ Run: npattern += 8 } } else { - src = add1(src) + src = subtract1(src) for npattern < n { pattern <<= 4 pattern |= uintptr(*src) & 0xf - src = add1(src) + src = subtract1(src) npattern += 4 } } @@ -1649,7 +1646,7 @@ Run: } else { for nbits >= 4 { *dst = uint8(bits&0xf | bitScanAll) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 nbits -= 4 } @@ -1694,10 +1691,10 @@ Run: } } else { // Leading src fragment. - src = addb(src, (off+3)/4) + src = subtractb(src, (off+3)/4) if frag := off & 3; frag != 0 { bits |= (uintptr(*src) & 0xf) >> (4 - frag) << nbits - src = subtract1(src) + src = add1(src) nbits += frag c -= frag } @@ -1705,9 +1702,9 @@ Run: // The bits are rotating through the bit buffer. for i := c / 4; i > 0; i-- { bits |= (uintptr(*src) & 0xf) << nbits - src = subtract1(src) + src = add1(src) *dst = uint8(bits&0xf | bitScanAll) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } // Final src fragment. @@ -1729,12 +1726,12 @@ Run: bits >>= 8 } } else { - totalBits = (uintptr(unsafe.Pointer(dstStart))-uintptr(unsafe.Pointer(dst)))*4 + nbits + totalBits = (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*4 + nbits nbits += -nbits & 3 for ; nbits > 0; nbits -= 4 { v := bits&0xf | bitScanAll *dst = uint8(v) - dst = subtract1(dst) + dst = add1(dst) bits >>= 4 } } diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go index 832ac2a5e1..737161dfee 100644 --- a/src/runtime/mheap.go +++ b/src/runtime/mheap.go @@ -114,8 +114,9 @@ type mheap struct { nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize) // range of addresses we might see in the heap - bitmap uintptr // Points to one byte past the end of the bitmap + bitmap_start uintptr // Points to first byte of bitmap bitmap_mapped uintptr + bitmap_delta uintptr // Used to map heap address to bitmap address // The arena_* fields indicate the addresses of the Go heap. // @@ -142,7 +143,7 @@ type mheap struct { // here and *must* clobber it to use it. arena_reserved bool - _ uint32 // ensure 64-bit alignment + //_ uint32 // ensure 64-bit alignment // central free lists for small size classes. // the padding makes sure that the MCentrals are