mirror of
https://github.com/golang/go
synced 2024-11-19 13:04:45 -07:00
runtime: use buffered write barrier for bulkBarrierPreWrite
This modifies bulkBarrierPreWrite to use the buffered write barrier instead of the eager write barrier. This reduces the number of system stack switches and sanity checks by a factor of the buffer size (currently 256). This affects both typedmemmove and typedmemclr. Since this is purely a runtime change, it applies to all arches (unlike the pointer write barrier). name old time/op new time/op delta BulkWriteBarrier-12 7.33ns ± 6% 4.46ns ± 9% -39.10% (p=0.000 n=20+19) Updates #22460. Change-Id: I6a686a63bbf08be02b9b97250e37163c5a90cdd8 Reviewed-on: https://go-review.googlesource.com/73832 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
6a5f1e58ed
commit
877387e38a
@ -239,6 +239,10 @@ func writebarrierptr_prewrite(dst *uintptr, src uintptr) {
|
||||
|
||||
// typedmemmove copies a value of type t to dst from src.
|
||||
// Must be nosplit, see #16026.
|
||||
//
|
||||
// TODO: Perfect for go:nosplitrec since we can't have a safe point
|
||||
// anywhere in the bulk barrier or memmove.
|
||||
//
|
||||
//go:nosplit
|
||||
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
|
||||
if typ.kind&kindNoPointers == 0 {
|
||||
|
@ -523,12 +523,13 @@ func (h heapBits) setCheckmarked(size uintptr) {
|
||||
atomic.Or8(h.bitp, bitScan<<(heapBitsShift+h.shift))
|
||||
}
|
||||
|
||||
// bulkBarrierPreWrite executes writebarrierptr_prewrite1
|
||||
// bulkBarrierPreWrite executes a write barrier
|
||||
// for every pointer slot in the memory range [src, src+size),
|
||||
// using pointer/scalar information from [dst, dst+size).
|
||||
// This executes the write barriers necessary before a memmove.
|
||||
// src, dst, and size must be pointer-aligned.
|
||||
// The range [dst, dst+size) must lie within a single object.
|
||||
// It does not perform the actual writes.
|
||||
//
|
||||
// As a special case, src == 0 indicates that this is being used for a
|
||||
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
|
||||
@ -578,12 +579,15 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
|
||||
return
|
||||
}
|
||||
|
||||
buf := &getg().m.p.ptr().wbBuf
|
||||
h := heapBitsForAddr(dst)
|
||||
if src == 0 {
|
||||
for i := uintptr(0); i < size; i += sys.PtrSize {
|
||||
if h.isPointer() {
|
||||
dstx := (*uintptr)(unsafe.Pointer(dst + i))
|
||||
writebarrierptr_prewrite1(dstx, 0)
|
||||
if !buf.putFast(*dstx, 0) {
|
||||
wbBufFlush(nil, 0)
|
||||
}
|
||||
}
|
||||
h = h.next()
|
||||
}
|
||||
@ -592,7 +596,9 @@ func bulkBarrierPreWrite(dst, src, size uintptr) {
|
||||
if h.isPointer() {
|
||||
dstx := (*uintptr)(unsafe.Pointer(dst + i))
|
||||
srcx := (*uintptr)(unsafe.Pointer(src + i))
|
||||
writebarrierptr_prewrite1(dstx, *srcx)
|
||||
if !buf.putFast(*dstx, *srcx) {
|
||||
wbBufFlush(nil, 0)
|
||||
}
|
||||
}
|
||||
h = h.next()
|
||||
}
|
||||
@ -612,6 +618,7 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
|
||||
bits = addb(bits, word/8)
|
||||
mask := uint8(1) << (word % 8)
|
||||
|
||||
buf := &getg().m.p.ptr().wbBuf
|
||||
for i := uintptr(0); i < size; i += sys.PtrSize {
|
||||
if mask == 0 {
|
||||
bits = addb(bits, 1)
|
||||
@ -625,10 +632,14 @@ func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
|
||||
if *bits&mask != 0 {
|
||||
dstx := (*uintptr)(unsafe.Pointer(dst + i))
|
||||
if src == 0 {
|
||||
writebarrierptr_prewrite1(dstx, 0)
|
||||
if !buf.putFast(*dstx, 0) {
|
||||
wbBufFlush(nil, 0)
|
||||
}
|
||||
} else {
|
||||
srcx := (*uintptr)(unsafe.Pointer(src + i))
|
||||
writebarrierptr_prewrite1(dstx, *srcx)
|
||||
if !buf.putFast(*dstx, *srcx) {
|
||||
wbBufFlush(nil, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
mask <<= 1
|
||||
|
@ -20,6 +20,7 @@
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"runtime/internal/sys"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
@ -94,6 +95,37 @@ func (b *wbBuf) reset() {
|
||||
}
|
||||
}
|
||||
|
||||
// putFast adds old and new to the write barrier buffer and returns
|
||||
// false if a flush is necessary. Callers should use this as:
|
||||
//
|
||||
// buf := &getg().m.p.ptr().wbBuf
|
||||
// if !buf.putFast(old, new) {
|
||||
// wbBufFlush(...)
|
||||
// }
|
||||
//
|
||||
// The arguments to wbBufFlush depend on whether the caller is doing
|
||||
// its own cgo pointer checks. If it is, then this can be
|
||||
// wbBufFlush(nil, 0). Otherwise, it must pass the slot address and
|
||||
// new.
|
||||
//
|
||||
// Since buf is a per-P resource, the caller must ensure there are no
|
||||
// preemption points while buf is in use.
|
||||
//
|
||||
// It must be nowritebarrierrec to because write barriers here would
|
||||
// corrupt the write barrier buffer. It (and everything it calls, if
|
||||
// it called anything) has to be nosplit to avoid scheduling on to a
|
||||
// different P and a different buffer.
|
||||
//
|
||||
//go:nowritebarrierrec
|
||||
//go:nosplit
|
||||
func (b *wbBuf) putFast(old, new uintptr) bool {
|
||||
p := (*[2]uintptr)(unsafe.Pointer(b.next))
|
||||
p[0] = old
|
||||
p[1] = new
|
||||
b.next += 2 * sys.PtrSize
|
||||
return b.next != b.end
|
||||
}
|
||||
|
||||
// wbBufFlush flushes the current P's write barrier buffer to the GC
|
||||
// workbufs. It is passed the slot and value of the write barrier that
|
||||
// caused the flush so that it can implement cgocheck.
|
||||
@ -118,7 +150,7 @@ func wbBufFlush(dst *uintptr, src uintptr) {
|
||||
return
|
||||
}
|
||||
|
||||
if writeBarrier.cgo {
|
||||
if writeBarrier.cgo && dst != nil {
|
||||
// This must be called from the stack that did the
|
||||
// write. It's nosplit all the way down.
|
||||
cgoCheckWriteBarrier(dst, src)
|
||||
|
Loading…
Reference in New Issue
Block a user