From 5380b22991dfb5f3bad25cd2e29f59fd07716581 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Sun, 23 Oct 2016 11:03:56 -0400 Subject: [PATCH] runtime: implement unconditional hybrid barrier This implements the unconditional version of the hybrid deletion write barrier, which always shades both the old and new pointer. It's unconditional for now because barriers on channel operations require checking both the source and destination stacks and we don't have a way to funnel this information into the write barrier at the moment. As part of this change, we modify the typed memclr operations introduced earlier to invoke the write barrier. This has basically no overall effect on benchmark performance. This is good, since it indicates that neither the extra shade nor the new bulk clear barriers have much effect. It also has little effect on latency. This is expected, since we haven't yet modified mark termination to take advantage of the hybrid barrier. Updates #17503. Change-Id: Iebedf84af2f0e857bd5d3a2d525f760b5cf7224b Reviewed-on: https://go-review.googlesource.com/31765 Reviewed-by: Rick Hudson --- src/runtime/mbarrier.go | 75 +++++++++++++++++++++++++++++++++-------- src/runtime/mbitmap.go | 34 ++++++++++++++----- 2 files changed, 87 insertions(+), 22 deletions(-) diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go index 888dfc465d..5848b43eb0 100644 --- a/src/runtime/mbarrier.go +++ b/src/runtime/mbarrier.go @@ -21,26 +21,57 @@ import ( // gcmarkwb_m is the mark-phase write barrier, the only barrier we have. // The rest of this file exists only to make calls to this function. // -// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object. -// The original Dijkstra barrier only shaded ptrs being placed in black slots. +// This is a hybrid barrier that combines a Yuasa-style deletion +// barrier—which shades the object whose reference is being +// overwritten—with Dijkstra insertion barrier—which shades the object +// whose reference is being written. The insertion part of the barrier +// is necessary while the calling goroutine's stack is grey. In +// pseudocode, the barrier is: +// +// writePointer(slot, ptr): +// shade(*slot) +// if current stack is grey: +// shade(ptr) +// *slot = ptr +// +// slot is the destination in Go code. +// ptr is the value that goes into the slot in Go code. // // Shade indicates that it has seen a white pointer by adding the referent // to wbuf as well as marking it. // -// slot is the destination (dst) in go code -// ptr is the value that goes into the slot (src) in the go code +// The two shades and the condition work together to prevent a mutator +// from hiding an object from the garbage collector: +// +// 1. shade(*slot) prevents a mutator from hiding an object by moving +// the sole pointer to it from the heap to its stack. If it attempts +// to unlink an object from the heap, this will shade it. +// +// 2. shade(ptr) prevents a mutator from hiding an object by moving +// the sole pointer to it from its stack into a black object in the +// heap. If it attempts to install the pointer into a black object, +// this will shade it. +// +// 3. Once a goroutine's stack is black, the shade(ptr) becomes +// unnecessary. shade(ptr) prevents hiding an object by moving it from +// the stack to the heap, but this requires first having a pointer +// hidden on the stack. Immediately after a stack is scanned, it only +// points to shaded objects, so it's not hiding anything, and the +// shade(*slot) prevents it from hiding any other pointers on its +// stack. +// +// For a detailed description of this barrier and proof of +// correctness, see https://github.com/golang/proposal/blob/master/design/17503-eliminate-rescan.md +// // // // Dealing with memory ordering: // -// Dijkstra pointed out that maintaining the no black to white -// pointers means that white to white pointers do not need -// to be noted by the write barrier. Furthermore if either -// white object dies before it is reached by the -// GC then the object can be collected during this GC cycle -// instead of waiting for the next cycle. Unfortunately the cost of -// ensuring that the object holding the slot doesn't concurrently -// change to black without the mutator noticing seems prohibitive. +// Both the Yuasa and Dijkstra barriers can be made conditional on the +// color of the object containing the slot. We chose not to make these +// conditional because the cost of ensuring that the object holding +// the slot doesn't concurrently change color without the mutator +// noticing seems prohibitive. // // Consider the following example where the mutator writes into // a slot and then loads the slot's mark bit while the GC thread @@ -110,6 +141,20 @@ import ( //go:systemstack func gcmarkwb_m(slot *uintptr, ptr uintptr) { if writeBarrier.needed { + // Note: This turns bad pointer writes into bad + // pointer reads, which could be confusing. We avoid + // reading from obviously bad pointers, which should + // take care of the vast majority of these. We could + // patch this up in the signal handler, or use XCHG to + // combine the read and the write. Checking inheap is + // insufficient since we need to track changes to + // roots outside the heap. + if slot1 := uintptr(unsafe.Pointer(slot)); slot1 >= minPhysPageSize { + if optr := *slot; optr != 0 { + shade(optr) + } + } + // TODO: Make this conditional on the caller's stack color. if ptr != 0 && inheap(ptr) { shade(ptr) } @@ -365,7 +410,9 @@ func reflect_typedslicecopy(elemType *_type, dst, src slice) int { // //go:nosplit func typedmemclr(typ *_type, ptr unsafe.Pointer) { - // TODO(austin): Call the hybrid barrier. + if typ.kind&kindNoPointers == 0 { + bulkBarrierPreWrite(uintptr(ptr), 0, typ.size) + } memclrNoHeapPointers(ptr, typ.size) } @@ -376,6 +423,6 @@ func typedmemclr(typ *_type, ptr unsafe.Pointer) { // //go:nosplit func memclrHasPointers(ptr unsafe.Pointer, n uintptr) { - // TODO(austin): Call the hybrid barrier. + bulkBarrierPreWrite(uintptr(ptr), 0, n) memclrNoHeapPointers(ptr, n) } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index ddbe3efc96..b6d31055b5 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -553,6 +553,10 @@ func (h heapBits) setCheckmarked(size uintptr) { // src, dst, and size must be pointer-aligned. // The range [dst, dst+size) must lie within a single object. // +// As a special case, src == 0 indicates that this is being used for a +// memclr. bulkBarrierPreWrite will pass 0 for the src of each write +// barrier. +// // Callers should call bulkBarrierPreWrite immediately before // calling memmove(dst, src, size). This function is marked nosplit // to avoid being preempted; the GC must not stop the goroutine @@ -618,13 +622,23 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { } h := heapBitsForAddr(dst) - for i := uintptr(0); i < size; i += sys.PtrSize { - if h.isPointer() { - dstx := (*uintptr)(unsafe.Pointer(dst + i)) - srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if src == 0 { + for i := uintptr(0); i < size; i += sys.PtrSize { + if h.isPointer() { + dstx := (*uintptr)(unsafe.Pointer(dst + i)) + writebarrierptr_prewrite1(dstx, 0) + } + h = h.next() + } + } else { + for i := uintptr(0); i < size; i += sys.PtrSize { + if h.isPointer() { + dstx := (*uintptr)(unsafe.Pointer(dst + i)) + srcx := (*uintptr)(unsafe.Pointer(src + i)) + writebarrierptr_prewrite1(dstx, *srcx) + } + h = h.next() } - h = h.next() } } @@ -653,8 +667,12 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { } if *bits&mask != 0 { dstx := (*uintptr)(unsafe.Pointer(dst + i)) - srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if src == 0 { + writebarrierptr_prewrite1(dstx, 0) + } else { + srcx := (*uintptr)(unsafe.Pointer(src + i)) + writebarrierptr_prewrite1(dstx, *srcx) + } } mask <<= 1 }