1
0
mirror of https://github.com/golang/go synced 2024-10-02 02:18:33 -06:00

runtime: use buffered write barrier for bulkBarrierPreWrite

This modifies bulkBarrierPreWrite to use the buffered write barrier
instead of the eager write barrier. This reduces the number of system
stack switches and sanity checks by a factor of the buffer size
(currently 256). This affects both typedmemmove and typedmemclr.

Since this is purely a runtime change, it applies to all arches
(unlike the pointer write barrier).

name                 old time/op  new time/op  delta
BulkWriteBarrier-12  7.33ns ± 6%  4.46ns ± 9%  -39.10%  (p=0.000 n=20+19)

Updates #22460.

Change-Id: I6a686a63bbf08be02b9b97250e37163c5a90cdd8
Reviewed-on: https://go-review.googlesource.com/73832
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
Austin Clements 2017-10-27 13:48:08 -04:00
parent 6a5f1e58ed
commit 877387e38a
3 changed files with 53 additions and 6 deletions

View File

@ -239,6 +239,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
// typedmemmove copies a value of type t to dst from src.
// Must be nosplit, see #16026.
//
// TODO: Perfect for go:nosplitrec since we can't have a safe point
// anywhere in the bulk barrier or memmove.
//
//go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if typ.kind&kindNoPointers == 0 {

View File

@ -523,12 +523,13 @@ func (h heapBits) setCheckmarked(size uintptr) {
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
}
// bulkBarrierPreWrite executes writebarrierptr_prewrite1
// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
// This executes the write barriers necessary before a memmove.
// src, dst, and size must be pointer-aligned.
// The range [dst, dst+size) must lie within a single object.
// It does not perform the actual writes.
//
// As a special case, src == 0 indicates that this is being used for a
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
@ -578,12 +579,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
return
}
buf := &getg().m.p.ptr().wbBuf
h := heapBitsForAddr(dst)
if src == 0 {
for i := uintptr(0); i < size; i += sys.PtrSize {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
writebarrierptr_prewrite1(dstx, 0)
if !buf.putFast(*dstx, 0) {
wbBufFlush(nil, 0)
}
}
h = h.next()
}
@ -592,7 +596,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
if h.isPointer() {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
writebarrierptr_prewrite1(dstx, *srcx)
if !buf.putFast(*dstx, *srcx) {
wbBufFlush(nil, 0)
}
}
h = h.next()
}
@ -612,6 +618,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
bits = addb(bits, word/8)
mask := uint8(1) << (word % 8)
buf := &getg().m.p.ptr().wbBuf
for i := uintptr(0); i < size; i += sys.PtrSize {
if mask == 0 {
bits = addb(bits, 1)
@ -625,10 +632,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
writebarrierptr_prewrite1(dstx, 0)
if !buf.putFast(*dstx, 0) {
wbBufFlush(nil, 0)
}
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
writebarrierptr_prewrite1(dstx, *srcx)
if !buf.putFast(*dstx, *srcx) {
wbBufFlush(nil, 0)
}
}
}
mask <<= 1

View File

@ -20,6 +20,7 @@
package runtime
import (
"runtime/internal/sys"
"unsafe"
)
@ -94,6 +95,37 @@ func (b *wbBuf) reset() {
}
}
// putFast adds old and new to the write barrier buffer and returns
// false if a flush is necessary. Callers should use this as:
//
// buf := &getg().m.p.ptr().wbBuf
// if !buf.putFast(old, new) {
// wbBufFlush(...)
// }
//
// The arguments to wbBufFlush depend on whether the caller is doing
// its own cgo pointer checks. If it is, then this can be
// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
// new.
//
// Since buf is a per-P resource, the caller must ensure there are no
// preemption points while buf is in use.
//
// It must be nowritebarrierrec to because write barriers here would
// corrupt the write barrier buffer. It (and everything it calls, if
// it called anything) has to be nosplit to avoid scheduling on to a
// different P and a different buffer.
//
//go:nowritebarrierrec
//go:nosplit
func (b *wbBuf) putFast(old, new uintptr) bool {
p := (*[2]uintptr)(unsafe.Pointer(b.next))
p[0] = old
p[1] = new
b.next += 2 * sys.PtrSize
return b.next != b.end
}
// wbBufFlush flushes the current P's write barrier buffer to the GC
// workbufs. It is passed the slot and value of the write barrier that
// caused the flush so that it can implement cgocheck.
@ -118,7 +150,7 @@ func wbBufFlush(dst *uintptr, src uintptr) {
return
}
if writeBarrier.cgo {
if writeBarrier.cgo && dst != nil {
// This must be called from the stack that did the
// write. It's nosplit all the way down.
cgoCheckWriteBarrier(dst, src)