1
0
mirror of https://github.com/golang/go synced 2024-11-19 11:14:47 -07:00

runtime: Remove boundary bit logic.

This is an experiment to see if removing the boundary bit logic will
lead to fewer cache misses and improved performance. Instead of using
boundary bits we use the span information to get element size and use
some bit whacking to get the boundary without having to touch the
random heap bits which cause cache misses.

Furthermore once the boundary bit is removed we can either use that
bit for a simpler checkmark routine or we can reduce the number of
bits in the GC bitmap to 2 bits per pointer sized work. For example
the 2 bits at the boundary can be used for marking and pointer/scalar
differentiation. Since we don't need the mark bit except at the
boundary nibble of the object other nibbles can use this bit
as a noscan bit to indicate that there are no more pointers in
the object.

Currently the changed included in this CL slows down the garbage
benchmark. With the boundary bits garbage gives 5.78 and without
(this CL) it gives 5.88 which is a 2% slowdown.

Change-Id: Id68f831ad668176f7dc9f7b57b339e4ebb6dc4c2
Reviewed-on: https://go-review.googlesource.com/6665
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Rick Hudson 2015-03-03 16:55:14 -05:00
parent 7be32d038a
commit 122384e489
4 changed files with 34 additions and 130 deletions

View File

@ -32,7 +32,7 @@
// describe p and the high 4 bits describe p+ptrSize. // describe p and the high 4 bits describe p+ptrSize.
// //
// The 4 bits for each word are: // The 4 bits for each word are:
// 0001 - bitBoundary: this is the start of an object // 0001 - not used
// 0010 - bitMarked: this object has been marked by GC // 0010 - bitMarked: this object has been marked by GC
// tt00 - word type bits, as in a type bitmap. // tt00 - word type bits, as in a type bitmap.
// //
@ -77,7 +77,6 @@ const (
heapBitsWidth = 4 heapBitsWidth = 4
heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte
bitBoundary = 1
bitMarked = 2 bitMarked = 2
typeShift = 2 typeShift = 2
) )
@ -151,30 +150,21 @@ func heapBitsForSpan(base uintptr) (hbits heapBits) {
// heapBitsForObject returns the base address for the heap object // heapBitsForObject returns the base address for the heap object
// containing the address p, along with the heapBits for base. // containing the address p, along with the heapBits for base.
// If p does not point into a heap object, heapBitsForObject returns base == 0. // If p does not point into a heap object,
func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits) { // return base == 0
// otherwise return the base of the object.
func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
if p < mheap_.arena_start || p >= mheap_.arena_used { if p < mheap_.arena_start || p >= mheap_.arena_used {
return return
} }
// If heap bits for the pointer-sized word containing p have bitBoundary set, // p points into the heap, but possibly to the middle of an object.
// then we know this is the base of the object, and we can stop now.
// This handles the case where p is the base and, due to rounding
// when looking up the heap bits, also the case where p points beyond
// the base but still into the first pointer-sized word of the object.
hbits = heapBitsForAddr(p)
if hbits.isBoundary() {
base = p &^ (ptrSize - 1)
return
}
// Otherwise, p points into the middle of an object.
// Consult the span table to find the block beginning. // Consult the span table to find the block beginning.
// TODO(rsc): Factor this out. // TODO(rsc): Factor this out.
k := p >> _PageShift k := p >> _PageShift
x := k x := k
x -= mheap_.arena_start >> _PageShift x -= mheap_.arena_start >> _PageShift
s := h_spans[x] s = h_spans[x]
if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse { if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
if s == nil || s.state == _MSpanStack { if s == nil || s.state == _MSpanStack {
// If s is nil, the virtual address has never been part of the heap. // If s is nil, the virtual address has never been part of the heap.
@ -216,19 +206,16 @@ func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits) {
base += n * s.elemsize base += n * s.elemsize
} }
if base == p {
print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
throw("failed to find block beginning")
}
// Now that we know the actual base, compute heapBits to return to caller. // Now that we know the actual base, compute heapBits to return to caller.
hbits = heapBitsForAddr(base) hbits = heapBitsForAddr(base)
if !hbits.isBoundary() {
throw("missing boundary at computed object start")
}
return return
} }
// prefetch the bits.
func (h heapBits) prefetch() {
prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
}
// next returns the heapBits describing the next pointer-sized word in memory. // next returns the heapBits describing the next pointer-sized word in memory.
// That is, if h describes address p, h.next() describes p+ptrSize. // That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result. // Note that next does not modify h. The caller must record the result.
@ -258,14 +245,6 @@ func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift *h.bitp |= bitMarked << h.shift
} }
// isBoundary reports whether the heap bits have the boundary bit set.
func (h heapBits) isBoundary() bool {
return *h.bitp&(bitBoundary<<h.shift) != 0
}
// Note that there is no setBoundary or setBoundaryNonAtomic.
// Boundaries are always in bulk, for the entire span.
// typeBits returns the heap bits' type bits. // typeBits returns the heap bits' type bits.
func (h heapBits) typeBits() uint8 { func (h heapBits) typeBits() uint8 {
return (*h.bitp >> (h.shift + typeShift)) & typeMask return (*h.bitp >> (h.shift + typeShift)) & typeMask
@ -299,60 +278,8 @@ func (h heapBits) setCheckmarked() {
// initSpan initializes the heap bitmap for a span. // initSpan initializes the heap bitmap for a span.
func (h heapBits) initSpan(size, n, total uintptr) { func (h heapBits) initSpan(size, n, total uintptr) {
if size == ptrSize {
// Only possible on 64-bit system, since minimum size is 8.
// Set all nibbles to bitBoundary using uint64 writes.
nbyte := n * ptrSize / heapBitmapScale
nuint64 := nbyte / 8
bitp := subtractb(h.bitp, nbyte-1)
for i := uintptr(0); i < nuint64; i++ {
const boundary64 = bitBoundary |
bitBoundary<<4 |
bitBoundary<<8 |
bitBoundary<<12 |
bitBoundary<<16 |
bitBoundary<<20 |
bitBoundary<<24 |
bitBoundary<<28 |
bitBoundary<<32 |
bitBoundary<<36 |
bitBoundary<<40 |
bitBoundary<<44 |
bitBoundary<<48 |
bitBoundary<<52 |
bitBoundary<<56 |
bitBoundary<<60
*(*uint64)(unsafe.Pointer(bitp)) = boundary64
bitp = addb(bitp, 8)
}
return
}
if size*n < total {
// To detect end of object during GC object scan,
// add boundary just past end of last block.
// The object scan knows to stop when it reaches
// the end of the span, but in this case the object
// ends before the end of the span.
//
// TODO(rsc): If the bitmap bits were going to be typeDead
// otherwise, what's the point of this?
// Can we delete this logic?
n++
}
step := size / heapBitmapScale
bitp := h.bitp
for i := uintptr(0); i < n; i++ {
*bitp = bitBoundary
bitp = subtractb(bitp, step)
}
}
// clearSpan clears the heap bitmap bytes for the span.
func (h heapBits) clearSpan(size, n, total uintptr) {
if total%heapBitmapScale != 0 { if total%heapBitmapScale != 0 {
throw("clearSpan: unaligned length") throw("initSpan: unaligned length")
} }
nbyte := total / heapBitmapScale nbyte := total / heapBitmapScale
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte) memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
@ -371,9 +298,7 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
bitp := h.bitp bitp := h.bitp
for i := uintptr(0); i < n; i += 2 { for i := uintptr(0); i < n; i += 2 {
x := int(*bitp) x := int(*bitp)
if x&0x11 != 0x11 {
throw("missing bitBoundary")
}
if (x>>typeShift)&typeMask == typeDead { if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift x += (typeScalar - typeDead) << typeShift
} }
@ -392,9 +317,6 @@ func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
bitp := h.bitp bitp := h.bitp
step := size / heapBitmapScale step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ { for i := uintptr(0); i < n; i++ {
if *bitp&bitBoundary == 0 {
throw("missing bitBoundary")
}
x := *bitp x := *bitp
if (x>>typeShift)&typeMask == typeDead { if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift x += (typeScalar - typeDead) << typeShift
@ -416,10 +338,6 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
bitp := h.bitp bitp := h.bitp
for i := uintptr(0); i < n; i += 2 { for i := uintptr(0); i < n; i += 2 {
x := int(*bitp) x := int(*bitp)
if x&(bitBoundary|bitBoundary<<4) != (bitBoundary | bitBoundary<<4) {
throw("missing bitBoundary")
}
switch typ := (x >> typeShift) & typeMask; typ { switch typ := (x >> typeShift) & typeMask; typ {
case typeScalar: case typeScalar:
x += (typeDead - typeScalar) << typeShift x += (typeDead - typeScalar) << typeShift
@ -448,10 +366,6 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
step := size / heapBitmapScale step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ { for i := uintptr(0); i < n; i++ {
x := int(*bitp) x := int(*bitp)
if x&bitBoundary == 0 {
throw("missing bitBoundary")
}
switch typ := (x >> typeShift) & typeMask; { switch typ := (x >> typeShift) & typeMask; {
case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead: case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead:
x += (typeScalar - typeScalarCheckmarked) << typeShift x += (typeScalar - typeScalarCheckmarked) << typeShift
@ -503,7 +417,7 @@ func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
if x&bitMarked != 0 { if x&bitMarked != 0 {
x &^= bitMarked x &^= bitMarked
} else { } else {
x = bitBoundary // clear marked bit, set type bits to typeDead x = 0
f(base + i*size) f(base + i*size)
} }
*bitp = uint8(x) *bitp = uint8(x)
@ -522,10 +436,6 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// From here till marked label marking the object as allocated // From here till marked label marking the object as allocated
// and storing type info in the GC bitmap. // and storing type info in the GC bitmap.
h := heapBitsForAddr(x) h := heapBitsForAddr(x)
if debugMalloc && (*h.bitp>>h.shift)&0x0f != bitBoundary {
println("runtime: bits =", (*h.bitp>>h.shift)&0x0f)
throw("bad bits in markallocated")
}
var ti, te uintptr var ti, te uintptr
var ptrmask *uint8 var ptrmask *uint8
@ -572,7 +482,8 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
} }
if size == 2*ptrSize { if size == 2*ptrSize {
*h.bitp = *ptrmask | bitBoundary // h.shift is 0 for all sizes > ptrSize.
*h.bitp = *ptrmask
return return
} }
te = uintptr(typ.size) / ptrSize te = uintptr(typ.size) / ptrSize
@ -581,15 +492,17 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
te /= 2 te /= 2
} }
// Copy pointer bitmask into the bitmap. // Copy pointer bitmask into the bitmap.
// TODO(rlh): add comment addressing the following concerns:
// If size > 2*ptrSize, is x guaranteed to be at least 2*ptrSize-aligned?
// And if type occupies and odd number of words, why are we only going through half
// of ptrmask and why don't we have to shift everything by 4 on odd iterations?
for i := uintptr(0); i < dataSize; i += 2 * ptrSize { for i := uintptr(0); i < dataSize; i += 2 * ptrSize {
v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti)) v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
ti++ ti++
if ti == te { if ti == te {
ti = 0 ti = 0
} }
if i == 0 {
v |= bitBoundary
}
if i+ptrSize == dataSize { if i+ptrSize == dataSize {
v &^= typeMask << (4 + typeShift) v &^= typeMask << (4 + typeShift)
} }
@ -783,12 +696,6 @@ func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
// Mark first word as bitAllocated. // Mark first word as bitAllocated.
// Mark word after last as typeDead. // Mark word after last as typeDead.
// TODO(rsc): Explain why we need to set this boundary.
// Aren't the boundaries always set for the whole span?
// Did unrollgcproc1 overwrite the boundary bit?
// Is that okay?
h := heapBitsForAddr(uintptr(v))
*h.bitp |= bitBoundary << h.shift
if size0 < size { if size0 < size {
h := heapBitsForAddr(uintptr(v) + size0) h := heapBitsForAddr(uintptr(v) + size0)
*h.bitp &^= typeMask << typeShift *h.bitp &^= typeMask << typeShift

View File

@ -173,7 +173,7 @@ func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start gclinkptr, end gcli
s.needzero = 1 s.needzero = 1
s.freelist = 0 s.freelist = 0
unlock(&c.lock) unlock(&c.lock)
heapBitsForSpan(s.base()).clearSpan(s.layout()) heapBitsForSpan(s.base()).initSpan(s.layout())
mHeap_Free(&mheap_, s, 0) mHeap_Free(&mheap_, s, 0)
return true return true
} }

View File

@ -404,7 +404,7 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
} }
} }
// Scan the object b of size n, adding pointers to wbuf. // Scan the object b of size n bytes, adding pointers to wbuf.
// Return possibly new wbuf to use. // Return possibly new wbuf to use.
// If ptrmask != nil, it specifies where pointers are in b. // If ptrmask != nil, it specifies where pointers are in b.
// If ptrmask == nil, the GC bitmap should be consulted. // If ptrmask == nil, the GC bitmap should be consulted.
@ -417,13 +417,16 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
// Find bits of the beginning of the object. // Find bits of the beginning of the object.
var hbits heapBits var hbits heapBits
if ptrmask == nil { if ptrmask == nil {
b, hbits = heapBitsForObject(b) var s *mspan
b, hbits, s = heapBitsForObject(b)
if b == 0 { if b == 0 {
return return
} }
n = s.elemsize
if n == 0 { if n == 0 {
n = mheap_.arena_used - b throw("scanobject n == 0")
} }
} }
for i := uintptr(0); i < n; i += ptrSize { for i := uintptr(0); i < n; i += ptrSize {
@ -433,15 +436,9 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
// dense mask (stack or data) // dense mask (stack or data)
bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
} else { } else {
// Check if we have reached end of span.
// n is an overestimate of the size of the object.
if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
break
}
bits = uintptr(hbits.typeBits()) bits = uintptr(hbits.typeBits())
if i > 0 && (hbits.isBoundary() || bits == typeDead) { if bits == typeDead {
break // reached beginning of the next object break // no more pointers in this object
} }
hbits = hbits.next() hbits = hbits.next()
} }
@ -468,7 +465,7 @@ func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWorkProducer) {
} }
// Mark the object. // Mark the object.
if obj, hbits := heapBitsForObject(obj); obj != 0 { if obj, hbits, _ := heapBitsForObject(obj); obj != 0 {
greyobject(obj, b, i, hbits, gcw) greyobject(obj, b, i, hbits, gcw)
} }
} }
@ -481,7 +478,7 @@ func shade(b uintptr) {
if !inheap(b) { if !inheap(b) {
throw("shade: passed an address not in the heap") throw("shade: passed an address not in the heap")
} }
if obj, hbits := heapBitsForObject(b); obj != 0 { if obj, hbits, _ := heapBitsForObject(b); obj != 0 {
// TODO: this would be a great place to put a check to see // TODO: this would be a great place to put a check to see
// if we are harvesting and if we are then we should // if we are harvesting and if we are then we should
// figure out why there is a call to shade when the // figure out why there is a call to shade when the

View File

@ -218,7 +218,7 @@ func mSpan_Sweep(s *mspan, preserve bool) bool {
if preserve { if preserve {
throw("can't preserve large span") throw("can't preserve large span")
} }
heapBitsForSpan(p).clearSpan(s.layout()) heapBitsForSpan(p).initSpan(s.layout())
s.needzero = 1 s.needzero = 1
// important to set sweepgen before returning it to heap // important to set sweepgen before returning it to heap