mirror of
https://github.com/golang/go
synced 2024-11-19 13:54:56 -07:00
runtime: make span map sparse
This splits the span map into separate chunks for every 64MB of the heap. The span map chunks now live in the same indirect structure as the bitmap. Updates #10460. This causes a slight improvement in compilebench and the x/benchmarks garbage benchmark. I'm not sure why it improves performance. name old time/op new time/op delta Template 185ms ± 1% 184ms ± 1% ~ (p=0.315 n=9+10) Unicode 86.9ms ± 1% 86.9ms ± 3% ~ (p=0.356 n=9+10) GoTypes 602ms ± 1% 599ms ± 0% -0.59% (p=0.002 n=9+10) Compiler 2.89s ± 0% 2.87s ± 1% -0.50% (p=0.003 n=9+9) SSA 7.25s ± 0% 7.29s ± 1% ~ (p=0.400 n=9+10) Flate 118ms ± 1% 118ms ± 2% ~ (p=0.065 n=10+9) GoParser 147ms ± 2% 147ms ± 1% ~ (p=0.549 n=10+9) Reflect 403ms ± 1% 401ms ± 1% -0.47% (p=0.035 n=9+10) Tar 176ms ± 1% 175ms ± 1% -0.59% (p=0.013 n=10+9) XML 211ms ± 1% 209ms ± 1% -0.83% (p=0.011 n=10+10) (https://perf.golang.org/search?q=upload:20171231.1) name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.24ms ± 1% 2.23ms ± 1% -0.36% (p=0.001 n=20+19) (https://perf.golang.org/search?q=upload:20171231.2) Change-Id: I2563f8704ab9812434947faf293c5327f9b0d07a Reviewed-on: https://go-review.googlesource.com/85885 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
0de5324d61
commit
d6e8218581
@ -187,6 +187,8 @@ const (
|
|||||||
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
|
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
|
||||||
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
|
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
|
||||||
|
|
||||||
|
pagesPerArena = heapArenaBytes / pageSize
|
||||||
|
|
||||||
// Max number of threads to run garbage collection.
|
// Max number of threads to run garbage collection.
|
||||||
// 2, 3, and 4 are all plausible maximums depending
|
// 2, 3, and 4 are all plausible maximums depending
|
||||||
// on the hardware details of the machine. The garbage
|
// on the hardware details of the machine. The garbage
|
||||||
@ -284,10 +286,6 @@ func mallocinit() {
|
|||||||
var p, pSize uintptr
|
var p, pSize uintptr
|
||||||
var reserved bool
|
var reserved bool
|
||||||
|
|
||||||
// The spans array holds one *mspan per _PageSize of arena.
|
|
||||||
var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize
|
|
||||||
spansSize = round(spansSize, _PageSize)
|
|
||||||
|
|
||||||
// Set up the allocation arena, a contiguous area of memory where
|
// Set up the allocation arena, a contiguous area of memory where
|
||||||
// allocated data will be found.
|
// allocated data will be found.
|
||||||
if sys.PtrSize == 8 {
|
if sys.PtrSize == 8 {
|
||||||
@ -318,7 +316,7 @@ func mallocinit() {
|
|||||||
// translation buffers, the user address space is limited to 39 bits
|
// translation buffers, the user address space is limited to 39 bits
|
||||||
// On darwin/arm64, the address space is even smaller.
|
// On darwin/arm64, the address space is even smaller.
|
||||||
arenaSize := round(_MaxMem, _PageSize)
|
arenaSize := round(_MaxMem, _PageSize)
|
||||||
pSize = spansSize + arenaSize + _PageSize
|
pSize = arenaSize + _PageSize
|
||||||
for i := 0; i <= 0x7f; i++ {
|
for i := 0; i <= 0x7f; i++ {
|
||||||
switch {
|
switch {
|
||||||
case GOARCH == "arm64" && GOOS == "darwin":
|
case GOARCH == "arm64" && GOOS == "darwin":
|
||||||
@ -377,7 +375,7 @@ func mallocinit() {
|
|||||||
// away from the running binary image and then round up
|
// away from the running binary image and then round up
|
||||||
// to a MB boundary.
|
// to a MB boundary.
|
||||||
p = round(firstmoduledata.end+(1<<18), 1<<20)
|
p = round(firstmoduledata.end+(1<<18), 1<<20)
|
||||||
pSize = spansSize + arenaSize + _PageSize
|
pSize = arenaSize + _PageSize
|
||||||
if p <= procBrk && procBrk < p+pSize {
|
if p <= procBrk && procBrk < p+pSize {
|
||||||
// Move the start above the brk,
|
// Move the start above the brk,
|
||||||
// leaving some room for future brk
|
// leaving some room for future brk
|
||||||
@ -400,8 +398,6 @@ func mallocinit() {
|
|||||||
p1 := round(p, _PageSize)
|
p1 := round(p, _PageSize)
|
||||||
pSize -= p1 - p
|
pSize -= p1 - p
|
||||||
|
|
||||||
spansStart := p1
|
|
||||||
p1 += spansSize
|
|
||||||
if sys.PtrSize == 4 {
|
if sys.PtrSize == 4 {
|
||||||
// Set arena_start such that we can accept memory
|
// Set arena_start such that we can accept memory
|
||||||
// reservations located anywhere in the 4GB virtual space.
|
// reservations located anywhere in the 4GB virtual space.
|
||||||
@ -415,7 +411,7 @@ func mallocinit() {
|
|||||||
mheap_.arena_reserved = reserved
|
mheap_.arena_reserved = reserved
|
||||||
|
|
||||||
if mheap_.arena_start&(_PageSize-1) != 0 {
|
if mheap_.arena_start&(_PageSize-1) != 0 {
|
||||||
println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(_PageSize), "start", hex(mheap_.arena_start))
|
println("bad pagesize", hex(p), hex(p1), hex(_PageSize), "start", hex(mheap_.arena_start))
|
||||||
throw("misrounded allocation in mallocinit")
|
throw("misrounded allocation in mallocinit")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -427,7 +423,7 @@ func mallocinit() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the rest of the allocator.
|
// Initialize the rest of the allocator.
|
||||||
mheap_.init(spansStart, spansSize)
|
mheap_.init()
|
||||||
_g_ := getg()
|
_g_ := getg()
|
||||||
_g_.m.mcache = allocmcache()
|
_g_.m.mcache = allocmcache()
|
||||||
}
|
}
|
||||||
|
@ -50,23 +50,6 @@ type mheap struct {
|
|||||||
// access (since that may free the backing store).
|
// access (since that may free the backing store).
|
||||||
allspans []*mspan // all spans out there
|
allspans []*mspan // all spans out there
|
||||||
|
|
||||||
// spans is a lookup table to map virtual address page IDs to *mspan.
|
|
||||||
// For allocated spans, their pages map to the span itself.
|
|
||||||
// For free spans, only the lowest and highest pages map to the span itself.
|
|
||||||
// Internal pages map to an arbitrary span.
|
|
||||||
// For pages that have never been allocated, spans entries are nil.
|
|
||||||
//
|
|
||||||
// Modifications are protected by mheap.lock. Reads can be
|
|
||||||
// performed without locking, but ONLY from indexes that are
|
|
||||||
// known to contain in-use or stack spans. This means there
|
|
||||||
// must not be a safe-point between establishing that an
|
|
||||||
// address is live and looking it up in the spans array.
|
|
||||||
//
|
|
||||||
// This is backed by a reserved region of the address space so
|
|
||||||
// it can grow without moving. The memory up to len(spans) is
|
|
||||||
// mapped. cap(spans) indicates the total reserved memory.
|
|
||||||
spans []*mspan
|
|
||||||
|
|
||||||
// sweepSpans contains two mspan stacks: one of swept in-use
|
// sweepSpans contains two mspan stacks: one of swept in-use
|
||||||
// spans, and one of unswept in-use spans. These two trade
|
// spans, and one of unswept in-use spans. These two trade
|
||||||
// roles on each GC cycle. Since the sweepgen increases by 2
|
// roles on each GC cycle. Since the sweepgen increases by 2
|
||||||
@ -78,7 +61,7 @@ type mheap struct {
|
|||||||
// on the swept stack.
|
// on the swept stack.
|
||||||
sweepSpans [2]gcSweepBuf
|
sweepSpans [2]gcSweepBuf
|
||||||
|
|
||||||
_ uint32 // align uint64 fields on 32-bit for atomics
|
//_ uint32 // align uint64 fields on 32-bit for atomics
|
||||||
|
|
||||||
// Proportional sweep
|
// Proportional sweep
|
||||||
//
|
//
|
||||||
@ -155,7 +138,7 @@ type mheap struct {
|
|||||||
// to probe any index.
|
// to probe any index.
|
||||||
arenas *[memLimit / heapArenaBytes]*heapArena
|
arenas *[memLimit / heapArenaBytes]*heapArena
|
||||||
|
|
||||||
//_ uint32 // ensure 64-bit alignment
|
//_ uint32 // ensure 64-bit alignment of central
|
||||||
|
|
||||||
// central free lists for small size classes.
|
// central free lists for small size classes.
|
||||||
// the padding makes sure that the MCentrals are
|
// the padding makes sure that the MCentrals are
|
||||||
@ -193,7 +176,18 @@ type heapArena struct {
|
|||||||
// heapBits type to access this.
|
// heapBits type to access this.
|
||||||
bitmap [heapArenaBitmapBytes]byte
|
bitmap [heapArenaBitmapBytes]byte
|
||||||
|
|
||||||
// TODO: Also store the spans map here.
|
// spans maps from virtual address page ID within this arena to *mspan.
|
||||||
|
// For allocated spans, their pages map to the span itself.
|
||||||
|
// For free spans, only the lowest and highest pages map to the span itself.
|
||||||
|
// Internal pages map to an arbitrary span.
|
||||||
|
// For pages that have never been allocated, spans entries are nil.
|
||||||
|
//
|
||||||
|
// Modifications are protected by mheap.lock. Reads can be
|
||||||
|
// performed without locking, but ONLY from indexes that are
|
||||||
|
// known to contain in-use or stack spans. This means there
|
||||||
|
// must not be a safe-point between establishing that an
|
||||||
|
// address is live and looking it up in the spans array.
|
||||||
|
spans [pagesPerArena]*mspan
|
||||||
}
|
}
|
||||||
|
|
||||||
// An MSpan is a run of pages.
|
// An MSpan is a run of pages.
|
||||||
@ -453,10 +447,14 @@ func inHeapOrStack(b uintptr) bool {
|
|||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func spanOf(p uintptr) *mspan {
|
func spanOf(p uintptr) *mspan {
|
||||||
if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
|
if p < minLegalPointer || p/heapArenaBytes >= uintptr(len(mheap_.arenas)) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return spanOfUnchecked(p)
|
ha := mheap_.arenas[p/heapArenaBytes]
|
||||||
|
if ha == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return ha.spans[(p/pageSize)%pagesPerArena]
|
||||||
}
|
}
|
||||||
|
|
||||||
// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
|
// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
|
||||||
@ -467,7 +465,7 @@ func spanOf(p uintptr) *mspan {
|
|||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func spanOfUnchecked(p uintptr) *mspan {
|
func spanOfUnchecked(p uintptr) *mspan {
|
||||||
return mheap_.spans[(p-mheap_.arena_start)>>_PageShift]
|
return mheap_.arenas[p/heapArenaBytes].spans[(p/pageSize)%pagesPerArena]
|
||||||
}
|
}
|
||||||
|
|
||||||
// spanOfHeap is like spanOf, but returns nil if p does not point to a
|
// spanOfHeap is like spanOf, but returns nil if p does not point to a
|
||||||
@ -487,7 +485,7 @@ func spanOfHeap(p uintptr) *mspan {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the heap.
|
// Initialize the heap.
|
||||||
func (h *mheap) init(spansStart, spansBytes uintptr) {
|
func (h *mheap) init() {
|
||||||
h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
|
h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
|
||||||
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
|
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
|
||||||
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
|
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
|
||||||
@ -514,11 +512,6 @@ func (h *mheap) init(spansStart, spansBytes uintptr) {
|
|||||||
h.central[i].mcentral.init(spanClass(i))
|
h.central[i].mcentral.init(spanClass(i))
|
||||||
}
|
}
|
||||||
|
|
||||||
sp := (*slice)(unsafe.Pointer(&h.spans))
|
|
||||||
sp.array = unsafe.Pointer(spansStart)
|
|
||||||
sp.len = 0
|
|
||||||
sp.cap = int(spansBytes / sys.PtrSize)
|
|
||||||
|
|
||||||
// Map metadata structures. But don't map race detector memory
|
// Map metadata structures. But don't map race detector memory
|
||||||
// since we're not actually growing the arena here (and TSAN
|
// since we're not actually growing the arena here (and TSAN
|
||||||
// gets mad if you map 0 bytes).
|
// gets mad if you map 0 bytes).
|
||||||
@ -552,9 +545,6 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
|
|||||||
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
|
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map spans array.
|
|
||||||
h.mapSpans(arena_used)
|
|
||||||
|
|
||||||
// Tell the race detector about the new heap memory.
|
// Tell the race detector about the new heap memory.
|
||||||
if racemap && raceenabled {
|
if racemap && raceenabled {
|
||||||
racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
|
racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
|
||||||
@ -563,25 +553,6 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
|
|||||||
h.arena_used = arena_used
|
h.arena_used = arena_used
|
||||||
}
|
}
|
||||||
|
|
||||||
// mapSpans makes sure that the spans are mapped
|
|
||||||
// up to the new value of arena_used.
|
|
||||||
//
|
|
||||||
// Don't call this directly. Call mheap.setArenaUsed.
|
|
||||||
func (h *mheap) mapSpans(arena_used uintptr) {
|
|
||||||
// Map spans array, PageSize at a time.
|
|
||||||
n := arena_used
|
|
||||||
n -= h.arena_start
|
|
||||||
n = n / _PageSize * sys.PtrSize
|
|
||||||
n = round(n, physPageSize)
|
|
||||||
need := n / unsafe.Sizeof(h.spans[0])
|
|
||||||
have := uintptr(len(h.spans))
|
|
||||||
if have >= need {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
h.spans = h.spans[:need]
|
|
||||||
sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sweeps spans in list until reclaims at least npages into heap.
|
// Sweeps spans in list until reclaims at least npages into heap.
|
||||||
// Returns the actual number of pages reclaimed.
|
// Returns the actual number of pages reclaimed.
|
||||||
func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
|
func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
|
||||||
@ -808,15 +779,20 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
|
|||||||
|
|
||||||
// setSpan modifies the span map so spanOf(base) is s.
|
// setSpan modifies the span map so spanOf(base) is s.
|
||||||
func (h *mheap) setSpan(base uintptr, s *mspan) {
|
func (h *mheap) setSpan(base uintptr, s *mspan) {
|
||||||
h.spans[(base-h.arena_start)>>_PageShift] = s
|
h.arenas[base/heapArenaBytes].spans[(base/pageSize)%pagesPerArena] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
|
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
|
||||||
// is s.
|
// is s.
|
||||||
func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
|
func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
|
||||||
p := (base - h.arena_start) >> _PageShift
|
p := base / pageSize
|
||||||
|
ha := h.arenas[p/pagesPerArena]
|
||||||
for n := uintptr(0); n < npage; n++ {
|
for n := uintptr(0); n < npage; n++ {
|
||||||
h.spans[p+n] = s
|
i := (p + n) % pagesPerArena
|
||||||
|
if i == 0 {
|
||||||
|
ha = h.arenas[(p+n)/pagesPerArena]
|
||||||
|
}
|
||||||
|
ha.spans[i] = s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user