mirror of
https://github.com/golang/go
synced 2024-11-13 12:40:26 -07:00
runtime: optimize 8-byte allocation pointer data writing
This change brings back a minor optimization lost in the Go 1.22 cycle wherein the 8-byte pointer-ful span class spans would have the pointer bitmap written ahead of time in bulk, because there's only one possible pattern. │ before │ after │ │ sec/op │ sec/op vs base │ MallocTypeInfo8-4 25.13n ± 1% 23.59n ± 2% -6.15% (p=0.002 n=6) Change-Id: I135b84bb1d5b7e678b841b56430930bc73c0a038 Reviewed-on: https://go-review.googlesource.com/c/go/+/614256 Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Michael Knyszek <mknyszek@google.com> Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
56fb8350c8
commit
721c04ae4e
@ -1088,7 +1088,7 @@ func (h *mheap) allocUserArenaChunk() *mspan {
|
||||
|
||||
// This must clear the entire heap bitmap so that it's safe
|
||||
// to allocate noscan data without writing anything out.
|
||||
s.initHeapBits(true)
|
||||
s.initHeapBits()
|
||||
|
||||
// Clear the span preemptively. It's an arena chunk, so let's assume
|
||||
// everything is going to be used.
|
||||
|
@ -535,12 +535,13 @@ func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr, typ *abi.Type) {
|
||||
}
|
||||
|
||||
// initHeapBits initializes the heap bitmap for a span.
|
||||
//
|
||||
// TODO(mknyszek): This should set the heap bits for single pointer
|
||||
// allocations eagerly to avoid calling heapSetType at allocation time,
|
||||
// just to write one bit.
|
||||
func (s *mspan) initHeapBits(forceClear bool) {
|
||||
if (!s.spanclass.noscan() && heapBitsInSpan(s.elemsize)) || s.isUserArenaChunk {
|
||||
func (s *mspan) initHeapBits() {
|
||||
if goarch.PtrSize == 8 && !s.spanclass.noscan() && s.spanclass.sizeclass() == 1 {
|
||||
b := s.heapBits()
|
||||
for i := range b {
|
||||
b[i] = ^uintptr(0)
|
||||
}
|
||||
} else if (!s.spanclass.noscan() && heapBitsInSpan(s.elemsize)) || s.isUserArenaChunk {
|
||||
b := s.heapBits()
|
||||
clear(b)
|
||||
}
|
||||
@ -639,16 +640,20 @@ func (span *mspan) heapBitsSmallForAddr(addr uintptr) uintptr {
|
||||
//
|
||||
//go:nosplit
|
||||
func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize uintptr) {
|
||||
if goarch.PtrSize == 8 && dataSize == goarch.PtrSize {
|
||||
// Already set by initHeapBits.
|
||||
return
|
||||
}
|
||||
|
||||
// The objects here are always really small, so a single load is sufficient.
|
||||
src0 := readUintptr(typ.GCData)
|
||||
|
||||
// Create repetitions of the bitmap if we have a small slice backing store.
|
||||
scanSize = typ.PtrBytes
|
||||
src := src0
|
||||
switch typ.Size_ {
|
||||
case goarch.PtrSize:
|
||||
if typ.Size_ == goarch.PtrSize {
|
||||
src = (1 << (dataSize / goarch.PtrSize)) - 1
|
||||
default:
|
||||
} else {
|
||||
for i := typ.Size_; i < dataSize; i += typ.Size_ {
|
||||
src |= src0 << (i / goarch.PtrSize)
|
||||
scanSize += typ.Size_
|
||||
|
@ -252,7 +252,7 @@ func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
|
||||
// visible to the background sweeper.
|
||||
mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
|
||||
s.limit = s.base() + size
|
||||
s.initHeapBits(false)
|
||||
s.initHeapBits()
|
||||
return s
|
||||
}
|
||||
|
||||
|
@ -260,6 +260,6 @@ func (c *mcentral) grow() *mspan {
|
||||
// n := (npages << _PageShift) / size
|
||||
n := s.divideByElemSize(npages << _PageShift)
|
||||
s.limit = s.base() + size*n
|
||||
s.initHeapBits(false)
|
||||
s.initHeapBits()
|
||||
return s
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user