1
0
mirror of https://github.com/golang/go synced 2024-11-14 05:30:21 -07:00

runtime: refactor mallocgc into several independent codepaths

Right now mallocgc is a monster of a function. In real programs, we see
that a substantial amount of time in mallocgc is spent in mallocgc
itself. It's very branch-y, holds a lot of state, and handles quite a few
disparate cases, trying to merge them together.

This change breaks apart mallocgc into separate, leaner functions.
There's some duplication now, but there are a lot of branches that can
be pruned as a result.

There's definitely still more we can do here. heapSetType can be inlined
and broken down for each case, since its internals roughly map to each
case anyway (done in a follow-up CL). We can probably also do more with
the size class lookups, since we know more about the size of the object
in each case than before.

Below are the savings for the full stack up until now.

                    │ after-3.out │              after-4.out              │
                    │   sec/op    │     sec/op      vs base               │
Malloc8-4             13.32n ± 2%   12.17n ±  1%     -8.63% (p=0.002 n=6)
Malloc16-4            21.64n ± 3%   19.38n ± 10%    -10.47% (p=0.002 n=6)
MallocTypeInfo8-4     23.15n ± 2%   19.91n ±  2%    -14.00% (p=0.002 n=6)
MallocTypeInfo16-4    25.86n ± 4%   22.48n ±  5%    -13.11% (p=0.002 n=6)
MallocLargeStruct-4                 270.0n ±   ∞ ¹
geomean               20.38n        30.97n          -11.58%

Change-Id: I681029c0b442f9221c4429950626f06299a5cfe4
Reviewed-on: https://go-review.googlesource.com/c/go/+/614257
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Anthony Knyszek 2024-09-18 21:17:03 +00:00 committed by Gopher Robot
parent 60ee99cf5d
commit 8730fcf885
3 changed files with 492 additions and 208 deletions

View File

@ -1005,23 +1005,16 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
}
}
// Short-circuit zero-sized allocation requests.
if size == 0 {
return unsafe.Pointer(&zerobase)
}
// It's possible for any malloc to trigger sweeping, which may in
// turn queue finalizers. Record this dynamic lock edge.
// N.B. Compiled away if lockrank experiment is not enabled.
lockRankMayQueueFinalizer()
userSize := size
if asanenabled {
// Refer to ASAN runtime library, the malloc() function allocates extra memory,
// the redzone, around the user requested memory region. And the redzones are marked
// as unaddressable. We perform the same operations in Go to detect the overflows or
// underflows.
size += computeRZlog(size)
}
// Pre-malloc debug hooks.
if debug.malloc {
if x := preMallocgcDebug(size, typ); x != nil {
@ -1029,11 +1022,67 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
}
}
// For ASAN, we allocate extra memory around each allocation called the "redzone."
// These "redzones" are marked as unaddressable.
var asanRZ uintptr
if asanenabled {
asanRZ = computeRZlog(size)
size += asanRZ
}
// Assist the GC if needed.
if gcBlackenEnabled != 0 {
deductAssistCredit(size)
}
// Actually do the allocation.
var x unsafe.Pointer
var elemsize uintptr
if size <= maxSmallSize-mallocHeaderSize {
if typ == nil || !typ.Pointers() {
if size < maxTinySize {
x, elemsize = mallocgcTiny(size, typ, needzero)
} else {
x, elemsize = mallocgcSmallNoscan(size, typ, needzero)
}
} else if heapBitsInSpan(size) {
x, elemsize = mallocgcSmallScanNoHeader(size, typ, needzero)
} else {
x, elemsize = mallocgcSmallScanHeader(size, typ, needzero)
}
} else {
x, elemsize = mallocgcLarge(size, typ, needzero)
}
// Notify sanitizers, if enabled.
if raceenabled {
racemalloc(x, size-asanRZ)
}
if msanenabled {
msanmalloc(x, size-asanRZ)
}
if asanenabled {
// Poison the space between the end of the requested size of x
// and the end of the slot. Unpoison the requested allocation.
asanpoison(unsafe.Add(x, size-asanRZ), asanRZ+(elemsize-size))
asanunpoison(x, size-asanRZ)
}
// Adjust our GC assist debt to account for internal fragmentation.
if gcBlackenEnabled != 0 && elemsize != 0 {
if assistG := getg().m.curg; assistG != nil {
assistG.gcAssistBytes -= int64(elemsize - size)
}
}
// Post-malloc debug hooks.
if debug.malloc {
postMallocgcDebug(x, elemsize, typ)
}
return x
}
func mallocgcTiny(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
@ -1043,153 +1092,84 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ != nil && typ.Pointers() {
throw("expected noscan for tiny alloc")
}
}
mp.mallocing = 1
checkGCTrigger := false
dataSize := userSize
c := getMCache(mp)
if c == nil {
throw("mallocgc called without a P or outside bootstrapping")
}
var span *mspan
var header **_type
var x unsafe.Pointer
noscan := typ == nil || !typ.Pointers()
// In some cases block zeroing can profitably (for latency reduction purposes)
// be delayed till preemption is possible; delayedZeroing tracks that state.
delayedZeroing := false
// Determine if it's a 'small' object that goes into a size-classed span.
// Tiny allocator.
//
// Note: This comparison looks a little strange, but it exists to smooth out
// the crossover between the largest size class and large objects that have
// their own spans. The small window of object sizes between maxSmallSize-mallocHeaderSize
// and maxSmallSize will be considered large, even though they might fit in
// a size class. In practice this is completely fine, since the largest small
// size class has a single object in it already, precisely to make the transition
// to large objects smooth.
if size <= maxSmallSize-mallocHeaderSize {
if noscan && size < maxTinySize {
// Tiny allocator.
//
// Tiny allocator combines several tiny allocation requests
// into a single memory block. The resulting memory block
// is freed when all subobjects are unreachable. The subobjects
// must be noscan (don't have pointers), this ensures that
// the amount of potentially wasted memory is bounded.
//
// Size of the memory block used for combining (maxTinySize) is tunable.
// Current setting is 16 bytes, which relates to 2x worst case memory
// wastage (when all but one subobjects are unreachable).
// 8 bytes would result in no wastage at all, but provides less
// opportunities for combining.
// 32 bytes provides more opportunities for combining,
// but can lead to 4x worst case wastage.
// The best case winning is 8x regardless of block size.
//
// Objects obtained from tiny allocator must not be freed explicitly.
// So when an object will be freed explicitly, we ensure that
// its size >= maxTinySize.
//
// SetFinalizer has a special case for objects potentially coming
// from tiny allocator, it such case it allows to set finalizers
// for an inner byte of a memory block.
//
// The main targets of tiny allocator are small strings and
// standalone escaping variables. On a json benchmark
// the allocator reduces number of allocations by ~12% and
// reduces heap size by ~20%.
off := c.tinyoffset
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
off = alignUp(off, 8)
} else if goarch.PtrSize == 4 && size == 12 {
// Conservatively align 12-byte objects to 8 bytes on 32-bit
// systems so that objects whose first field is a 64-bit
// value is aligned to 8 bytes and does not cause a fault on
// atomic access. See issue 37262.
// TODO(mknyszek): Remove this workaround if/when issue 36606
// is resolved.
off = alignUp(off, 8)
} else if size&3 == 0 {
off = alignUp(off, 4)
} else if size&1 == 0 {
off = alignUp(off, 2)
}
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
x = unsafe.Pointer(c.tiny + off)
c.tinyoffset = off + size
c.tinyAllocs++
mp.mallocing = 0
releasem(mp)
return x
}
// Allocate a new maxTinySize block.
span = c.alloc[tinySpanClass]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(tinySpanClass)
}
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
// Note: disabled when race detector is on, see comment near end of this function.
c.tiny = uintptr(x)
c.tinyoffset = size
}
size = maxTinySize
} else {
hasHeader := !noscan && !heapBitsInSpan(size)
if hasHeader {
size += mallocHeaderSize
}
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, noscan)
span = c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x = unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
if hasHeader {
header = (**_type)(x)
x = add(x, mallocHeaderSize)
size -= mallocHeaderSize
}
}
} else {
checkGCTrigger = true
// For large allocations, keep track of zeroed state so that
// bulk zeroing can be happen later in a preemptible context.
span = c.allocLarge(size, noscan)
span.freeindex = 1
span.allocCount = 1
size = span.elemsize
x = unsafe.Pointer(span.base())
if needzero && span.needzero != 0 {
delayedZeroing = true
}
if !noscan {
// Tell the GC not to look at this yet.
span.largeType = nil
header = &span.largeType
}
// Tiny allocator combines several tiny allocation requests
// into a single memory block. The resulting memory block
// is freed when all subobjects are unreachable. The subobjects
// must be noscan (don't have pointers), this ensures that
// the amount of potentially wasted memory is bounded.
//
// Size of the memory block used for combining (maxTinySize) is tunable.
// Current setting is 16 bytes, which relates to 2x worst case memory
// wastage (when all but one subobjects are unreachable).
// 8 bytes would result in no wastage at all, but provides less
// opportunities for combining.
// 32 bytes provides more opportunities for combining,
// but can lead to 4x worst case wastage.
// The best case winning is 8x regardless of block size.
//
// Objects obtained from tiny allocator must not be freed explicitly.
// So when an object will be freed explicitly, we ensure that
// its size >= maxTinySize.
//
// SetFinalizer has a special case for objects potentially coming
// from tiny allocator, it such case it allows to set finalizers
// for an inner byte of a memory block.
//
// The main targets of tiny allocator are small strings and
// standalone escaping variables. On a json benchmark
// the allocator reduces number of allocations by ~12% and
// reduces heap size by ~20%.
c := getMCache(mp)
off := c.tinyoffset
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
off = alignUp(off, 8)
} else if goarch.PtrSize == 4 && size == 12 {
// Conservatively align 12-byte objects to 8 bytes on 32-bit
// systems so that objects whose first field is a 64-bit
// value is aligned to 8 bytes and does not cause a fault on
// atomic access. See issue 37262.
// TODO(mknyszek): Remove this workaround if/when issue 36606
// is resolved.
off = alignUp(off, 8)
} else if size&3 == 0 {
off = alignUp(off, 4)
} else if size&1 == 0 {
off = alignUp(off, 2)
}
if !noscan && !delayedZeroing {
c.scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
x := unsafe.Pointer(c.tiny + off)
c.tinyoffset = off + size
c.tinyAllocs++
mp.mallocing = 0
releasem(mp)
return x, 0
}
// Allocate a new maxTinySize block.
checkGCTrigger := false
span := c.alloc[tinySpanClass]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(tinySpanClass)
}
x := unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
// Note: disabled when race detector is on, see comment near end of this function.
c.tiny = uintptr(x)
c.tinyoffset = size
}
// Ensure that the stores above that initialize x to
@ -1218,25 +1198,6 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
gcmarknewobject(span, uintptr(x))
}
if raceenabled {
racemalloc(x, size)
}
if msanenabled {
msanmalloc(x, size)
}
if asanenabled {
// We should only read/write the memory with the size asked by the user.
// The rest of the allocated memory should be poisoned, so that we can report
// errors when accessing poisoned memory.
// The allocated memory is larger than required userSize, it will also include
// redzone and some other padding bytes.
rzBeg := unsafe.Add(x, userSize)
asanpoison(rzBeg, size-userSize)
asanunpoison(x, userSize)
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
@ -1246,52 +1207,20 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
fullSize := span.elemsize
c.nextSample -= int64(fullSize)
c.nextSample -= int64(span.elemsize)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, fullSize)
profilealloc(mp, x, span.elemsize)
}
mp.mallocing = 0
releasem(mp)
// Objects can be zeroed late in a context where preemption can occur.
// If the object contains pointers, its pointer data must be cleared
// or otherwise indicate that the GC shouldn't scan it.
// x will keep the memory alive.
if delayedZeroing {
// N.B. size == fullSize always in this case.
memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
// Finish storing the type information for this case.
if !noscan {
mp := acquirem()
getMCache(mp).scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
// Publish the type information with the zeroed memory.
publicationBarrier()
releasem(mp)
}
}
// Post-malloc debug hooks.
if debug.malloc {
postMallocgcDebug(x, fullSize, typ)
}
// Adjust our GC assist debt to account for internal fragmentation.
if gcBlackenEnabled != 0 {
if assistG := getg().m.curg; assistG != nil {
assistG.gcAssistBytes -= int64(fullSize - size)
}
}
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
if raceenabled && noscan && dataSize < maxTinySize {
if raceenabled {
// Pad tinysize allocations so they are aligned with the end
// of the tinyalloc region. This ensures that any arithmetic
// that goes off the top end of the object will be detectable
@ -1304,10 +1233,364 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
// TODO: enable this padding for all allocations, not just
// tinyalloc ones. It's tricky because of pointer maps.
// Maybe just all noscan objects?
x = add(x, size-dataSize)
x = add(x, span.elemsize-size)
}
return x, span.elemsize
}
func mallocgcSmallNoscan(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ != nil && typ.Pointers() {
throw("expected noscan type for noscan alloc")
}
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, true)
span := c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
return x
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, size
}
func mallocgcSmallScanNoHeader(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ == nil || !typ.Pointers() {
throw("noscan allocated in scan-only path")
}
if !heapBitsInSpan(size) {
throw("heap bits in not in span for non-header-only path")
}
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
sizeclass := size_to_class8[divRoundUp(size, smallSizeDiv)]
spc := makeSpanClass(sizeclass, false)
span := c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
c.scanAlloc += heapSetType(uintptr(x), size, typ, nil, span)
size = uintptr(class_to_size[sizeclass])
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, size
}
func mallocgcSmallScanHeader(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
if typ == nil || !typ.Pointers() {
throw("noscan allocated in scan-only path")
}
if heapBitsInSpan(size) {
throw("heap bits in span for header-only path")
}
}
mp.mallocing = 1
checkGCTrigger := false
c := getMCache(mp)
size += mallocHeaderSize
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
} else {
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, false)
span := c.alloc[spc]
v := nextFreeFast(span)
if v == 0 {
v, span, checkGCTrigger = c.nextFree(spc)
}
x := unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(x, size)
}
header := (**_type)(x)
x = add(x, mallocHeaderSize)
c.scanAlloc += heapSetType(uintptr(x), size-mallocHeaderSize, typ, header, span)
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
if checkGCTrigger {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
}
return x, size
}
func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if doubleCheckMalloc {
if mp.mallocing != 0 {
throw("malloc deadlock")
}
if mp.gsignal == getg() {
throw("malloc during signal")
}
}
mp.mallocing = 1
c := getMCache(mp)
// For large allocations, keep track of zeroed state so that
// bulk zeroing can be happen later in a preemptible context.
span := c.allocLarge(size, typ == nil || !typ.Pointers())
span.freeindex = 1
span.allocCount = 1
span.largeType = nil // Tell the GC not to look at this yet.
size = span.elemsize
x := unsafe.Pointer(span.base())
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// Allocate black during GC.
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
if writeBarrier.enabled {
gcmarknewobject(span, uintptr(x))
}
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
//
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
c.nextSample -= int64(size)
if c.nextSample < 0 || MemProfileRate != c.memProfRate {
profilealloc(mp, x, size)
}
mp.mallocing = 0
releasem(mp)
// Check to see if we need to trigger the GC.
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
gcStart(t)
}
// Objects can be zeroed late in a context where preemption can occur.
// If the object contains pointers, its pointer data must be cleared
// or otherwise indicate that the GC shouldn't scan it.
// x will keep the memory alive.
if noscan := typ == nil || !typ.Pointers(); !noscan || (needzero && span.needzero != 0) {
// N.B. size == fullSize always in this case.
memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
// Finish storing the type information for this case.
if !noscan {
mp := acquirem()
getMCache(mp).scanAlloc += heapSetType(uintptr(x), size, typ, &span.largeType, span)
// Publish the type information with the zeroed memory.
publicationBarrier()
releasem(mp)
}
}
return x, size
}
func preMallocgcDebug(size uintptr, typ *_type) unsafe.Pointer {

View File

@ -1695,6 +1695,7 @@ func gcmarknewobject(span *mspan, obj uintptr) {
throw("gcmarknewobject called while doing checkmark")
}
if gcphase == _GCmarktermination {
// Check this here instead of on the hot path.
throw("mallocgc called with gcphase == _GCmarktermination")
}

View File

@ -444,7 +444,7 @@ func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
}
// Only use the part of mp.profStack we need and ignore the extra space
// reserved for delayed inline expansion with frame pointer unwinding.
nstk := callers(4, mp.profStack[:debug.profstackdepth])
nstk := callers(5, mp.profStack[:debug.profstackdepth])
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
b := stkbucket(memProfile, size, mp.profStack[:nstk], true)