From 877387e38a734db8a2a151ddd4af7ba53bcf6460 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 27 Oct 2017 13:48:08 -0400 Subject: [PATCH] runtime: use buffered write barrier for bulkBarrierPreWrite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This modifies bulkBarrierPreWrite to use the buffered write barrier instead of the eager write barrier. This reduces the number of system stack switches and sanity checks by a factor of the buffer size (currently 256). This affects both typedmemmove and typedmemclr. Since this is purely a runtime change, it applies to all arches (unlike the pointer write barrier). name old time/op new time/op delta BulkWriteBarrier-12 7.33ns ± 6% 4.46ns ± 9% -39.10% (p=0.000 n=20+19) Updates #22460. Change-Id: I6a686a63bbf08be02b9b97250e37163c5a90cdd8 Reviewed-on: https://go-review.googlesource.com/73832 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Rick Hudson --- src/runtime/mbarrier.go | 4 ++++ src/runtime/mbitmap.go | 21 ++++++++++++++++----- src/runtime/mwbbuf.go | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go index 1183fa91b8..cb2959fbc3 100644 --- a/src/runtime/mbarrier.go +++ b/src/runtime/mbarrier.go @@ -239,6 +239,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) { // typedmemmove copies a value of type t to dst from src. // Must be nosplit, see #16026. +// +// TODO: Perfect for go:nosplitrec since we can't have a safe point +// anywhere in the bulk barrier or memmove. +// //go:nosplit func typedmemmove(typ *_type, dst, src unsafe.Pointer) { if typ.kind&kindNoPointers == 0 { diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 23afc7dd04..6e2f12db15 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -523,12 +523,13 @@ func (h heapBits) setCheckmarked(size uintptr) { atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift)) } -// bulkBarrierPreWrite executes writebarrierptr_prewrite1 +// bulkBarrierPreWrite executes a write barrier // for every pointer slot in the memory range [src, src+size), // using pointer/scalar information from [dst, dst+size). // This executes the write barriers necessary before a memmove. // src, dst, and size must be pointer-aligned. // The range [dst, dst+size) must lie within a single object. +// It does not perform the actual writes. // // As a special case, src == 0 indicates that this is being used for a // memclr. bulkBarrierPreWrite will pass 0 for the src of each write @@ -578,12 +579,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { return } + buf := &getg().m.p.ptr().wbBuf h := heapBitsForAddr(dst) if src == 0 { for i := uintptr(0); i < size; i += sys.PtrSize { if h.isPointer() { dstx := (*uintptr)(unsafe.Pointer(dst + i)) - writebarrierptr_prewrite1(dstx, 0) + if !buf.putFast(*dstx, 0) { + wbBufFlush(nil, 0) + } } h = h.next() } @@ -592,7 +596,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { if h.isPointer() { dstx := (*uintptr)(unsafe.Pointer(dst + i)) srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } h = h.next() } @@ -612,6 +618,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { bits = addb(bits, word/8) mask := uint8(1) << (word % 8) + buf := &getg().m.p.ptr().wbBuf for i := uintptr(0); i < size; i += sys.PtrSize { if mask == 0 { bits = addb(bits, 1) @@ -625,10 +632,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { if *bits&mask != 0 { dstx := (*uintptr)(unsafe.Pointer(dst + i)) if src == 0 { - writebarrierptr_prewrite1(dstx, 0) + if !buf.putFast(*dstx, 0) { + wbBufFlush(nil, 0) + } } else { srcx := (*uintptr)(unsafe.Pointer(src + i)) - writebarrierptr_prewrite1(dstx, *srcx) + if !buf.putFast(*dstx, *srcx) { + wbBufFlush(nil, 0) + } } } mask <<= 1 diff --git a/src/runtime/mwbbuf.go b/src/runtime/mwbbuf.go index d1cd193665..2c06996210 100644 --- a/src/runtime/mwbbuf.go +++ b/src/runtime/mwbbuf.go @@ -20,6 +20,7 @@ package runtime import ( + "runtime/internal/sys" "unsafe" ) @@ -94,6 +95,37 @@ func (b *wbBuf) reset() { } } +// putFast adds old and new to the write barrier buffer and returns +// false if a flush is necessary. Callers should use this as: +// +// buf := &getg().m.p.ptr().wbBuf +// if !buf.putFast(old, new) { +// wbBufFlush(...) +// } +// +// The arguments to wbBufFlush depend on whether the caller is doing +// its own cgo pointer checks. If it is, then this can be +// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and +// new. +// +// Since buf is a per-P resource, the caller must ensure there are no +// preemption points while buf is in use. +// +// It must be nowritebarrierrec to because write barriers here would +// corrupt the write barrier buffer. It (and everything it calls, if +// it called anything) has to be nosplit to avoid scheduling on to a +// different P and a different buffer. +// +//go:nowritebarrierrec +//go:nosplit +func (b *wbBuf) putFast(old, new uintptr) bool { + p := (*[2]uintptr)(unsafe.Pointer(b.next)) + p[0] = old + p[1] = new + b.next += 2 * sys.PtrSize + return b.next != b.end +} + // wbBufFlush flushes the current P's write barrier buffer to the GC // workbufs. It is passed the slot and value of the write barrier that // caused the flush so that it can implement cgocheck. @@ -118,7 +150,7 @@ func wbBufFlush(dst *uintptr, src uintptr) { return } - if writeBarrier.cgo { + if writeBarrier.cgo && dst != nil { // This must be called from the stack that did the // write. It's nosplit all the way down. cgoCheckWriteBarrier(dst, src)