1
0
mirror of https://github.com/golang/go synced 2024-11-14 13:20:30 -07:00

runtime: refactor mallocgc into several independent codepaths

Right now mallocgc is a monster of a function. In real programs, we see
that a substantial amount of time in mallocgc is spent in mallocgc
itself. It's very branch-y, holds a lot of state, and handles quite a few
disparate cases, trying to merge them together.

This change breaks apart mallocgc into separate, leaner functions.
There's some duplication now, but there are a lot of branches that can
be pruned as a result.

There's definitely still more we can do here. heapSetType can be inlined
and broken down for each case, since its internals roughly map to each
case anyway (done in a follow-up CL). We can probably also do more with
the size class lookups, since we know more about the size of the object
in each case than before.

Below are the savings for the full stack up until now.

                    │ after-3.out │              after-4.out              │
                    │   sec/op    │     sec/op      vs base               │
Malloc8-4             13.32n ± 2%   12.17n ±  1%     -8.63% (p=0.002 n=6)
Malloc16-4            21.64n ± 3%   19.38n ± 10%    -10.47% (p=0.002 n=6)
MallocTypeInfo8-4     23.15n ± 2%   19.91n ±  2%    -14.00% (p=0.002 n=6)
MallocTypeInfo16-4    25.86n ± 4%   22.48n ±  5%    -13.11% (p=0.002 n=6)
MallocLargeStruct-4                 270.0n ±   ∞ ¹
geomean               20.38n        30.97n          -11.58%

Change-Id: I681029c0b442f9221c4429950626f06299a5cfe4
Reviewed-on: https://go-review.googlesource.com/c/go/+/614257
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Anthony Knyszek 2024-09-18 21:17:03 +00:00 committed by Gopher Robot
parent 60ee99cf5d
commit 8730fcf885
3 changed files with 492 additions and 208 deletions

View File

@ -1005,23 +1005,16 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
} }
} }
// Short-circuit zero-sized allocation requests.
if size == 0 { if size == 0 {
return unsafe.Pointer(&zerobase) return unsafe.Pointer(&zerobase)
} }
// It's possible for any malloc to trigger sweeping, which may in // It's possible for any malloc to trigger sweeping, which may in
// turn queue finalizers. Record this dynamic lock edge. // turn queue finalizers. Record this dynamic lock edge.
// N.B. Compiled away if lockrank experiment is not enabled.
lockRankMayQueueFinalizer() lockRankMayQueueFinalizer()
userSize := size
if asanenabled {
// Refer to ASAN runtime library, the malloc() function allocates extra memory,
// the redzone, around the user requested memory region. And the redzones are marked
// as unaddressable. We perform the same operations in Go to detect the overflows or
// underflows.
size += computeRZlog(size)
}
// Pre-malloc debug hooks. // Pre-malloc debug hooks.
if debug.malloc { if debug.malloc {
if x := preMallocgcDebug(size, typ); x != nil { if x := preMallocgcDebug(size, typ); x != nil {
@ -1029,11 +1022,67 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
} }
} }
// For ASAN, we allocate extra memory around each allocation called the "redzone."
// These "redzones" are marked as unaddressable.
var asanRZ uintptr
if asanenabled {
asanRZ = computeRZlog(size)
size += asanRZ
}
// Assist the GC if needed. // Assist the GC if needed.
if gcBlackenEnabled != 0 { if gcBlackenEnabled != 0 {
deductAssistCredit(size) deductAssistCredit(size)
} }
// Actually do the allocation.
var x unsafe.Pointer
var elemsize uintptr
if size <= maxSmallSize-mallocHeaderSize {
if typ == nil || !typ.Pointers() {
if size < maxTinySize {
x, elemsize = mallocgcTiny(size, typ, needzero)
} else {
x, elemsize = mallocgcSmallNoscan(size, typ, needzero)
}
} else if heapBitsInSpan(size) {
x, elemsize = mallocgcSmallScanNoHeader(size, typ, needzero)
} else {
x, elemsize = mallocgcSmallScanHeader(size, typ, needzero)
}
} else {
x, elemsize = mallocgcLarge(size, typ, needzero)
}
// Notify sanitizers, if enabled.
if raceenabled {
racemalloc(x, size-asanRZ)
}
if msanenabled {
msanmalloc(x, size-asanRZ)
}
if asanenabled {
// Poison the space between the end of the requested size of x
// and the end of the slot. Unpoison the requested allocation.
asanpoison(unsafe.Add(x, size-asanRZ), asanRZ+(elemsize-size))
asanunpoison(x, size-asanRZ)
}
// Adjust our GC assist debt to account for internal fragmentation.
if gcBlackenEnabled != 0 && elemsize != 0 {
if assistG := getg().m.curg; assistG != nil {
assistG.gcAssistBytes -= int64(elemsize - size)
}
}
// Post-malloc debug hooks.
if debug.malloc {
postMallocgcDebug(x, elemsize, typ)
}
return x
}
func mallocgcTiny(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC. // Set mp.mallocing to keep from being preempted by GC.
mp := acquirem() mp := acquirem()
if doubleCheckMalloc { if doubleCheckMalloc {
@ -1043,153 +1092,84 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if mp.gsignal == getg() { if mp.gsignal == getg() {
throw("malloc during signal") throw("malloc during signal")
} }
if typ != nil && typ.Pointers() {
throw("expected noscan for tiny alloc")
}
} }
mp.mallocing = 1 mp.mallocing = 1
checkGCTrigger := false // Tiny allocator.
dataSize := userSize
c := getMCache(mp)
if c == nil {
throw("mallocgc called without a P or outside bootstrapping")
}
var span *mspan
var header **_type
var x unsafe.Pointer
noscan := typ == nil || !typ.Pointers()
// In some cases block zeroing can profitably (for latency reduction purposes)
// be delayed till preemption is possible; delayedZeroing tracks that state.
delayedZeroing := false
// Determine if it's a 'small' object that goes into a size-classed span.
// //
// Note: This comparison looks a little strange, but it exists to smooth out // Tiny allocator combines several tiny allocation requests
// the crossover between the largest size class and large objects that have // into a single memory block. The resulting memory block
// their own spans. The small window of object sizes between maxSmallSize-mallocHeaderSize // is freed when all subobjects are unreachable. The subobjects
// and maxSmallSize will be considered large, even though they might fit in // must be noscan (don't have pointers), this ensures that
// a size class. In practice this is completely fine, since the largest small // the amount of potentially wasted memory is bounded.
// size class has a single object in it already, precisely to make the transition //
// to large objects smooth. // Size of the memory block used for combining (maxTinySize) is tunable.
if size <= maxSmallSize-mallocHeaderSize { // Current setting is 16 bytes, which relates to 2x worst case memory
if noscan && size < maxTinySize { // wastage (when all but one subobjects are unreachable).
// Tiny allocator. // 8 bytes would result in no wastage at all, but provides less
// // opportunities for combining.
// Tiny allocator combines several tiny allocation requests // 32 bytes provides more opportunities for combining,
// into a single memory block. The resulting memory block // but can lead to 4x worst case wastage.
// is freed when all subobjects are unreachable. The subobjects // The best case winning is 8x regardless of block size.
// must be noscan (don't have pointers), this ensures that //
// the amount of potentially wasted memory is bounded. // Objects obtained from tiny allocator must not be freed explicitly.
// // So when an object will be freed explicitly, we ensure that
// Size of the memory block used for combining (maxTinySize) is tunable. // its size >= maxTinySize.
// Current setting is 16 bytes, which relates to 2x worst case memory //
// wastage (when all but one subobjects are unreachable). // SetFinalizer has a special case for objects potentially coming
// 8 bytes would result in no wastage at all, but provides less // from tiny allocator, it such case it allows to set finalizers
// opportunities for combining. // for an inner byte of a memory block.
// 32 bytes provides more opportunities for combining, //
// but can lead to 4x worst case wastage. // The main targets of tiny allocator are small strings and
// The best case winning is 8x regardless of block size. // standalone escaping variables. On a json benchmark
// // the allocator reduces number of allocations by ~12% and
// Objects obtained from tiny allocator must not be freed explicitly. // reduces heap size by ~20%.
// So when an object will be freed explicitly, we ensure that c := getMCache(mp)
// its size >= maxTinySize. off := c.tinyoffset
// // Align tiny pointer for required (conservative) alignment.
// SetFinalizer has a special case for objects potentially coming if size&7 == 0 {
// from tiny allocator, it such case it allows to set finalizers off = alignUp(off, 8)
// for an inner byte of a memory block. } else if goarch.PtrSize == 4 && size == 12 {
// // Conservatively align 12-byte objects to 8 bytes on 32-bit
// The main targets of tiny allocator are small strings and // systems so that objects whose first field is a 64-bit
// standalone escaping variables. On a json benchmark // value is aligned to 8 bytes and does not cause a fault on
// the allocator reduces number of allocations by ~12% and // atomic access. See issue 37262.
// reduces heap size by ~20%. // TODO(mknyszek): Remove this workaround if/when issue 36606
off := c.tinyoffset // is resolved.
// Align tiny pointer for required (conservative) alignment. off = alignUp(off, 8)
if size&7 == 0 { } else if size&3 == 0 {
off = alignUp(off, 8) off = alignUp(off, 4)
} else if goarch.PtrSize == 4 && size == 12 { } else if size&1 == 0 {
// Conservatively align 12-byte objects to 8 bytes on 32-bit off = alignUp(off, 2)
// systems so that objects whose first field is a 64-bit
// value is aligned to 8 bytes and does not cause a fault on
// atomic access. See issue 37262.
// TODO(mknyszek): Remove this workaround if/when issue 36606
// is resolved.
off = alignUp(off, 8)
} else if size&3 == 0 {
off = alignUp(off, 4)
} else if size&1 == 0 {
off = alignUp(off, 2)
}
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
x = unsafe.Pointer(c.tiny + off)
c.tinyoffset = off + size
c.tinyAllocs++
mp.mallocing = 0
releasem(mp)
return x
}
// Allocate a new maxTinySize block.
span = c.alloc[tinySpanClass]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(tinySpanClass)
}
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
// Note: disabled when race detector is on, see comment near end of this function.
c.tiny = uintptr(x)
c.tinyoffset = size
}
size = maxTinySize
} else {
hasHeader := !noscan && !heapBitsInSpan(size)
if hasHeader {
size += mallocHeaderSize
}
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, noscan)
span = c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x = unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
if hasHeader {
header = (**_type)(x)
x = add(x, mallocHeaderSize)
size -= mallocHeaderSize
}
}
} else {
checkGCTrigger = true
// For large allocations, keep track of zeroed state so that
// bulk zeroing can be happen later in a preemptible context.
span = c.allocLarge(size, noscan)
span.freeindex = 1
span.allocCount = 1
size = span.elemsize
x = unsafe.Pointer(span.base())
if needzero && span.needzero != 0 {
delayedZeroing = true
}
if !noscan {
// Tell the GC not to look at this yet.
span.largeType = nil
header = &span.largeType
}
} }
if !noscan && !delayedZeroing { if off+size <= maxTinySize && c.tiny != 0 {
c.scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span) // The object fits into existing tiny block.
x := unsafe.Pointer(c.tiny + off)
c.tinyoffset = off + size
c.tinyAllocs++
mp.mallocing = 0
releasem(mp)
return x, 0
}
// Allocate a new maxTinySize block.
checkGCTrigger := false
span := c.alloc[tinySpanClass]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(tinySpanClass)
}
x := unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
// Note: disabled when race detector is on, see comment near end of this function.
c.tiny = uintptr(x)
c.tinyoffset = size
} }
// Ensure that the stores above that initialize x to // Ensure that the stores above that initialize x to
@ -1218,25 +1198,6 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
gcmarknewobject(span, uintptr(x)) gcmarknewobject(span, uintptr(x))
} }
if raceenabled {
racemalloc(x, size)
}
if msanenabled {
msanmalloc(x, size)
}
if asanenabled {
// We should only read/write the memory with the size asked by the user.
// The rest of the allocated memory should be poisoned, so that we can report
// errors when accessing poisoned memory.
// The allocated memory is larger than required userSize, it will also include
// redzone and some other padding bytes.
rzBeg := unsafe.Add(x, userSize)
asanpoison(rzBeg, size-userSize)
asanunpoison(x, userSize)
}
// Note cache c only valid while m acquired; see #47302 // Note cache c only valid while m acquired; see #47302
// //
// N.B. Use the full size because that matches how the GC // N.B. Use the full size because that matches how the GC
@ -1246,52 +1207,20 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
// of gc_sys or something. The code below just pretends it is // of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by // internal fragmentation and matches the GC's accounting by
// using the whole allocation slot. // using the whole allocation slot.
fullSize := span.elemsize c.nextSample -= int64(span.elemsize)
c.nextSample -= int64(fullSize)
if c.nextSample < 0 || MemProfileRate != c.memProfRate { if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, fullSize) profilealloc(mp, x, span.elemsize)
} }
mp.mallocing = 0 mp.mallocing = 0
releasem(mp) releasem(mp)
// Objects can be zeroed late in a context where preemption can occur.
// If the object contains pointers, its pointer data must be cleared
// or otherwise indicate that the GC shouldn't scan it.
// x will keep the memory alive.
if delayedZeroing {
// N.B. size == fullSize always in this case.
memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
// Finish storing the type information for this case.
if !noscan {
mp := acquirem()
getMCache(mp).scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
// Publish the type information with the zeroed memory.
publicationBarrier()
releasem(mp)
}
}
// Post-malloc debug hooks.
if debug.malloc {
postMallocgcDebug(x, fullSize, typ)
}
// Adjust our GC assist debt to account for internal fragmentation.
if gcBlackenEnabled != 0 {
if assistG := getg().m.curg; assistG != nil {
assistG.gcAssistBytes -= int64(fullSize - size)
}
}
if checkGCTrigger { if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() { if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t) gcStart(t)
} }
} }
if raceenabled && noscan && dataSize < maxTinySize { if raceenabled {
// Pad tinysize allocations so they are aligned with the end // Pad tinysize allocations so they are aligned with the end
// of the tinyalloc region. This ensures that any arithmetic // of the tinyalloc region. This ensures that any arithmetic
// that goes off the top end of the object will be detectable // that goes off the top end of the object will be detectable
@ -1304,10 +1233,364 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
// TODO: enable this padding for all allocations, not just // TODO: enable this padding for all allocations, not just
// tinyalloc ones. It's tricky because of pointer maps. // tinyalloc ones. It's tricky because of pointer maps.
// Maybe just all noscan objects? // Maybe just all noscan objects?
x = add(x, size-dataSize) x = add(x, span.elemsize-size)
}
return x, span.elemsize
}
func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ != nil && typ.Pointers() {
throw("expected noscan type for noscan alloc")
}
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, true)
span := c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
} }
return x // Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, size
}
func mallocgcSmallScanNoHeader(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ == nil || !typ.Pointers() {
throw("noscan allocated in scan-only path")
}
if !heapBitsInSpan(size) {
throw("heap bits in not in span for non-header-only path")
}
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
sizeclass := size_to_class8[divRoundUp(size, smallSizeDiv)]
spc := makeSpanClass(sizeclass, false)
span := c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
c.scanAlloc += heapSetType(uintptr(x), size, typ, nil, span)
size = uintptr(class_to_size[sizeclass])
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, size
}
func mallocgcSmallScanHeader(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ == nil || !typ.Pointers() {
throw("noscan allocated in scan-only path")
}
if heapBitsInSpan(size) {
throw("heap bits in span for header-only path")
}
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
size += mallocHeaderSize
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, false)
span := c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
header := (**_type)(x)
x = add(x, mallocHeaderSize)
c.scanAlloc += heapSetType(uintptr(x), size-mallocHeaderSize, typ, header, span)
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, size
}
func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
}
mp.mallocing = 1
c := getMCache(mp)
// For large allocations, keep track of zeroed state so that
// bulk zeroing can be happen later in a preemptible context.
span := c.allocLarge(size, typ == nil || !typ.Pointers())
span.freeindex = 1
span.allocCount = 1
span.largeType = nil // Tell the GC not to look at this yet.
size = span.elemsize
x := unsafe.Pointer(span.base())
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
// Check to see if we need to trigger the GC.
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
// Objects can be zeroed late in a context where preemption can occur.
// If the object contains pointers, its pointer data must be cleared
// or otherwise indicate that the GC shouldn't scan it.
// x will keep the memory alive.
if noscan := typ == nil || !typ.Pointers(); !noscan || (needzero && span.needzero != 0) {
// N.B. size == fullSize always in this case.
memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
// Finish storing the type information for this case.
if !noscan {
mp := acquirem()
getMCache(mp).scanAlloc += heapSetType(uintptr(x), size, typ, &span.largeType, span)
// Publish the type information with the zeroed memory.
publicationBarrier()
releasem(mp)
}
}
return x, size
} }
func preMallocgcDebug(size uintptr, typ *_type) unsafe.Pointer { func preMallocgcDebug(size uintptr, typ *_type) unsafe.Pointer {

View File

@ -1695,6 +1695,7 @@ func gcmarknewobject(span *mspan, obj uintptr) {
throw("gcmarknewobject called while doing checkmark") throw("gcmarknewobject called while doing checkmark")
} }
if gcphase == _GCmarktermination { if gcphase == _GCmarktermination {
// Check this here instead of on the hot path.
throw("mallocgc called with gcphase == _GCmarktermination") throw("mallocgc called with gcphase == _GCmarktermination")
} }

View File

@ -444,7 +444,7 @@ func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
} }
// Only use the part of mp.profStack we need and ignore the extra space // Only use the part of mp.profStack we need and ignore the extra space
// reserved for delayed inline expansion with frame pointer unwinding. // reserved for delayed inline expansion with frame pointer unwinding.
nstk := callers(4, mp.profStack[:debug.profstackdepth]) nstk := callers(5, mp.profStack[:debug.profstackdepth])
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
b := stkbucket(memProfile, size, mp.profStack[:nstk], true) b := stkbucket(memProfile, size, mp.profStack[:nstk], true)