1
0
mirror of https://github.com/golang/go synced 2024-11-14 05:50:27 -07:00

runtime: specialize heapSetType

Last CL we separated mallocgc into several specialized paths. Let's
split up heapSetType too. This will make the specialized heapSetType
functions inlineable and cut out some branches as well as a function
call.

Microbenchmark results at this point in the stack:

                   │ before.out  │            after-5.out             │
                   │   sec/op    │   sec/op     vs base               │
Malloc8-4            13.52n ± 3%   12.15n ± 2%  -10.13% (p=0.002 n=6)
Malloc16-4           21.49n ± 2%   18.32n ± 4%  -14.75% (p=0.002 n=6)
MallocTypeInfo8-4    27.12n ± 1%   18.64n ± 2%  -31.30% (p=0.002 n=6)
MallocTypeInfo16-4   28.71n ± 3%   21.63n ± 5%  -24.65% (p=0.002 n=6)
geomean              21.81n        17.31n       -20.64%

Change-Id: I5de9ac5089b9eb49bf563af2a74e6dc564420e05
Reviewed-on: https://go-review.googlesource.com/c/go/+/614795
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Michael Anthony Knyszek 2024-09-20 20:56:40 +00:00 committed by Gopher Robot
parent 8730fcf885
commit a1c4fb4361
3 changed files with 92 additions and 77 deletions

View File

@ -45,6 +45,8 @@ func TestIntendedInlining(t *testing.T) {
"funcspdelta",
"getm",
"getMCache",
"heapSetTypeNoHeader",
"heapSetTypeSmallHeader",
"isDirectIface",
"itabHashFunc",
"nextslicecap",

View File

@ -1356,7 +1356,7 @@ func mallocgcSmallScanNoHeader(size uintptr, typ *_type, needzero bool) (unsafe.
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
c.scanAlloc += heapSetType(uintptr(x), size, typ, nil, span)
c.scanAlloc += heapSetTypeNoHeader(uintptr(x), size, typ, span)
size = uintptr(class_to_size[sizeclass])
// Ensure that the stores above that initialize x to
@ -1450,7 +1450,7 @@ func mallocgcSmallScanHeader(size uintptr, typ *_type, needzero bool) (unsafe.Po
}
header := (**_type)(x)
x = add(x, mallocHeaderSize)
c.scanAlloc += heapSetType(uintptr(x), size-mallocHeaderSize, typ, header, span)
c.scanAlloc += heapSetTypeSmallHeader(uintptr(x), size-mallocHeaderSize, typ, header, span)
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
@ -1583,7 +1583,7 @@ func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uin
// Finish storing the type information for this case.
if !noscan {
mp := acquirem()
getMCache(mp).scanAlloc += heapSetType(uintptr(x), size, typ, &span.largeType, span)
getMCache(mp).scanAlloc += heapSetTypeLarge(uintptr(x), size, typ, span)
// Publish the type information with the zeroed memory.
publicationBarrier()

View File

@ -694,33 +694,46 @@ func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize
return
}
// heapSetType records that the new allocation [x, x+size)
// heapSetType* functions record that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.Size.)
// If dataSize < size, the fragment [x+dataSize, x+size) is
// recorded as non-pointer data.
// It is known that the type has pointers somewhere;
// malloc does not call heapSetType when there are no pointers.
// malloc does not call heapSetType* when there are no pointers.
//
// There can be read-write races between heapSetType and things
// There can be read-write races between heapSetType* and things
// that read the heap metadata like scanobject. However, since
// heapSetType is only used for objects that have not yet been
// heapSetType* is only used for objects that have not yet been
// made reachable, readers will ignore bits being modified by this
// function. This does mean this function cannot transiently modify
// shared memory that belongs to neighboring objects. Also, on weakly-ordered
// machines, callers must execute a store/store (publication) barrier
// between calling this function and making the object reachable.
func heapSetType(x, dataSize uintptr, typ *_type, header **_type, span *mspan) (scanSize uintptr) {
const doubleCheck = false
gctyp := typ
if header == nil {
if doubleCheck && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(span.elemsize)) {
const doubleCheckHeapSetType = doubleCheckMalloc
func heapSetTypeNoHeader(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(span.elemsize)) {
throw("tried to write heap bits, but no heap bits in span")
}
// Handle the case where we have no malloc header.
scanSize = span.writeHeapBitsSmall(x, dataSize, typ)
} else {
scanSize := span.writeHeapBitsSmall(x, dataSize, typ)
if doubleCheckHeapSetType {
doubleCheckHeapType(x, dataSize, typ, nil, span)
}
return scanSize
}
func heapSetTypeSmallHeader(x, dataSize uintptr, typ *_type, header **_type, span *mspan) uintptr {
*header = typ
if doubleCheckHeapSetType {
doubleCheckHeapType(x, dataSize, typ, header, span)
}
return span.elemsize
}
func heapSetTypeLarge(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
gctyp := typ
if typ.Kind_&abi.KindGCProg != 0 {
// Allocate space to unroll the gcprog. This space will consist of
// a dummy _type value and the unrolled gcprog. The dummy _type will
@ -750,13 +763,15 @@ func heapSetType(x, dataSize uintptr, typ *_type, header **_type, span *mspan) (
// Expand the GC program into space reserved at the end of the new span.
runGCProg(addb(typ.GCData, 4), gctyp.GCData)
}
// Write out the header.
*header = gctyp
scanSize = span.elemsize
span.largeType = gctyp
if doubleCheckHeapSetType {
doubleCheckHeapType(x, dataSize, typ, &span.largeType, span)
}
return span.elemsize
}
if doubleCheck {
func doubleCheckHeapType(x, dataSize uintptr, gctyp *_type, header **_type, span *mspan) {
doubleCheckHeapPointers(x, dataSize, gctyp, header, span)
// To exercise the less common path more often, generate
@ -783,8 +798,6 @@ func heapSetType(x, dataSize uintptr, typ *_type, header **_type, span *mspan) (
size = x + maxIterBytes - interior
}
doubleCheckHeapPointersInterior(x, interior, size, dataSize, gctyp, header, span)
}
return
}
func doubleCheckHeapPointers(x, dataSize uintptr, typ *_type, header **_type, span *mspan) {