1
0
mirror of https://github.com/golang/go synced 2024-10-02 00:28:33 -06:00

runtime: make span map sparse

This splits the span map into separate chunks for every 64MB of the
heap. The span map chunks now live in the same indirect structure as
the bitmap.

Updates #10460.

This causes a slight improvement in compilebench and the x/benchmarks
garbage benchmark. I'm not sure why it improves performance.

name       old time/op     new time/op     delta
Template       185ms ± 1%      184ms ± 1%    ~            (p=0.315 n=9+10)
Unicode       86.9ms ± 1%     86.9ms ± 3%    ~            (p=0.356 n=9+10)
GoTypes        602ms ± 1%      599ms ± 0%  -0.59%         (p=0.002 n=9+10)
Compiler       2.89s ± 0%      2.87s ± 1%  -0.50%          (p=0.003 n=9+9)
SSA            7.25s ± 0%      7.29s ± 1%    ~            (p=0.400 n=9+10)
Flate          118ms ± 1%      118ms ± 2%    ~            (p=0.065 n=10+9)
GoParser       147ms ± 2%      147ms ± 1%    ~            (p=0.549 n=10+9)
Reflect        403ms ± 1%      401ms ± 1%  -0.47%         (p=0.035 n=9+10)
Tar            176ms ± 1%      175ms ± 1%  -0.59%         (p=0.013 n=10+9)
XML            211ms ± 1%      209ms ± 1%  -0.83%        (p=0.011 n=10+10)

(https://perf.golang.org/search?q=upload:20171231.1)

name                       old time/op  new time/op  delta
Garbage/benchmem-MB=64-12  2.24ms ± 1%  2.23ms ± 1%  -0.36%  (p=0.001 n=20+19)

(https://perf.golang.org/search?q=upload:20171231.2)

Change-Id: I2563f8704ab9812434947faf293c5327f9b0d07a
Reviewed-on: https://go-review.googlesource.com/85885
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
Austin Clements 2017-12-13 16:09:02 -05:00
parent 0de5324d61
commit d6e8218581
2 changed files with 36 additions and 64 deletions

View File

@ -187,6 +187,8 @@ const (
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
pagesPerArena = heapArenaBytes / pageSize
// Max number of threads to run garbage collection.
// 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage
@ -284,10 +286,6 @@ func mallocinit() {
var p, pSize uintptr
var reserved bool
// The spans array holds one *mspan per _PageSize of arena.
var spansSize uintptr = (_MaxMem + 1) / _PageSize * sys.PtrSize
spansSize = round(spansSize, _PageSize)
// Set up the allocation arena, a contiguous area of memory where
// allocated data will be found.
if sys.PtrSize == 8 {
@ -318,7 +316,7 @@ func mallocinit() {
// translation buffers, the user address space is limited to 39 bits
// On darwin/arm64, the address space is even smaller.
arenaSize := round(_MaxMem, _PageSize)
pSize = spansSize + arenaSize + _PageSize
pSize = arenaSize + _PageSize
for i := 0; i <= 0x7f; i++ {
switch {
case GOARCH == "arm64" && GOOS == "darwin":
@ -377,7 +375,7 @@ func mallocinit() {
// away from the running binary image and then round up
// to a MB boundary.
p = round(firstmoduledata.end+(1<<18), 1<<20)
pSize = spansSize + arenaSize + _PageSize
pSize = arenaSize + _PageSize
if p <= procBrk && procBrk < p+pSize {
// Move the start above the brk,
// leaving some room for future brk
@ -400,8 +398,6 @@ func mallocinit() {
p1 := round(p, _PageSize)
pSize -= p1 - p
spansStart := p1
p1 += spansSize
if sys.PtrSize == 4 {
// Set arena_start such that we can accept memory
// reservations located anywhere in the 4GB virtual space.
@ -415,7 +411,7 @@ func mallocinit() {
mheap_.arena_reserved = reserved
if mheap_.arena_start&(_PageSize-1) != 0 {
println("bad pagesize", hex(p), hex(p1), hex(spansSize), hex(_PageSize), "start", hex(mheap_.arena_start))
println("bad pagesize", hex(p), hex(p1), hex(_PageSize), "start", hex(mheap_.arena_start))
throw("misrounded allocation in mallocinit")
}
@ -427,7 +423,7 @@ func mallocinit() {
}
// Initialize the rest of the allocator.
mheap_.init(spansStart, spansSize)
mheap_.init()
_g_ := getg()
_g_.m.mcache = allocmcache()
}

View File

@ -50,23 +50,6 @@ type mheap struct {
// access (since that may free the backing store).
allspans []*mspan // all spans out there
// spans is a lookup table to map virtual address page IDs to *mspan.
// For allocated spans, their pages map to the span itself.
// For free spans, only the lowest and highest pages map to the span itself.
// Internal pages map to an arbitrary span.
// For pages that have never been allocated, spans entries are nil.
//
// Modifications are protected by mheap.lock. Reads can be
// performed without locking, but ONLY from indexes that are
// known to contain in-use or stack spans. This means there
// must not be a safe-point between establishing that an
// address is live and looking it up in the spans array.
//
// This is backed by a reserved region of the address space so
// it can grow without moving. The memory up to len(spans) is
// mapped. cap(spans) indicates the total reserved memory.
spans []*mspan
// sweepSpans contains two mspan stacks: one of swept in-use
// spans, and one of unswept in-use spans. These two trade
// roles on each GC cycle. Since the sweepgen increases by 2
@ -78,7 +61,7 @@ type mheap struct {
// on the swept stack.
sweepSpans [2]gcSweepBuf
_ uint32 // align uint64 fields on 32-bit for atomics
//_ uint32 // align uint64 fields on 32-bit for atomics
// Proportional sweep
//
@ -155,7 +138,7 @@ type mheap struct {
// to probe any index.
arenas *[memLimit / heapArenaBytes]*heapArena
//_ uint32 // ensure 64-bit alignment
//_ uint32 // ensure 64-bit alignment of central
// central free lists for small size classes.
// the padding makes sure that the MCentrals are
@ -193,7 +176,18 @@ type heapArena struct {
// heapBits type to access this.
bitmap [heapArenaBitmapBytes]byte
// TODO: Also store the spans map here.
// spans maps from virtual address page ID within this arena to *mspan.
// For allocated spans, their pages map to the span itself.
// For free spans, only the lowest and highest pages map to the span itself.
// Internal pages map to an arbitrary span.
// For pages that have never been allocated, spans entries are nil.
//
// Modifications are protected by mheap.lock. Reads can be
// performed without locking, but ONLY from indexes that are
// known to contain in-use or stack spans. This means there
// must not be a safe-point between establishing that an
// address is live and looking it up in the spans array.
spans [pagesPerArena]*mspan
}
// An MSpan is a run of pages.
@ -453,10 +447,14 @@ func inHeapOrStack(b uintptr) bool {
//
//go:nosplit
func spanOf(p uintptr) *mspan {
if p == 0 || p < mheap_.arena_start || p >= mheap_.arena_used {
if p < minLegalPointer || p/heapArenaBytes >= uintptr(len(mheap_.arenas)) {
return nil
}
return spanOfUnchecked(p)
ha := mheap_.arenas[p/heapArenaBytes]
if ha == nil {
return nil
}
return ha.spans[(p/pageSize)%pagesPerArena]
}
// spanOfUnchecked is equivalent to spanOf, but the caller must ensure
@ -467,7 +465,7 @@ func spanOf(p uintptr) *mspan {
//
//go:nosplit
func spanOfUnchecked(p uintptr) *mspan {
return mheap_.spans[(p-mheap_.arena_start)>>_PageShift]
return mheap_.arenas[p/heapArenaBytes].spans[(p/pageSize)%pagesPerArena]
}
// spanOfHeap is like spanOf, but returns nil if p does not point to a
@ -487,7 +485,7 @@ func spanOfHeap(p uintptr) *mspan {
}
// Initialize the heap.
func (h *mheap) init(spansStart, spansBytes uintptr) {
func (h *mheap) init() {
h.treapalloc.init(unsafe.Sizeof(treapNode{}), nil, nil, &memstats.other_sys)
h.spanalloc.init(unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
@ -514,11 +512,6 @@ func (h *mheap) init(spansStart, spansBytes uintptr) {
h.central[i].mcentral.init(spanClass(i))
}
sp := (*slice)(unsafe.Pointer(&h.spans))
sp.array = unsafe.Pointer(spansStart)
sp.len = 0
sp.cap = int(spansBytes / sys.PtrSize)
// Map metadata structures. But don't map race detector memory
// since we're not actually growing the arena here (and TSAN
// gets mad if you map 0 bytes).
@ -552,9 +545,6 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
}
// Map spans array.
h.mapSpans(arena_used)
// Tell the race detector about the new heap memory.
if racemap && raceenabled {
racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
@ -563,25 +553,6 @@ func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
h.arena_used = arena_used
}
// mapSpans makes sure that the spans are mapped
// up to the new value of arena_used.
//
// Don't call this directly. Call mheap.setArenaUsed.
func (h *mheap) mapSpans(arena_used uintptr) {
// Map spans array, PageSize at a time.
n := arena_used
n -= h.arena_start
n = n / _PageSize * sys.PtrSize
n = round(n, physPageSize)
need := n / unsafe.Sizeof(h.spans[0])
have := uintptr(len(h.spans))
if have >= need {
return
}
h.spans = h.spans[:need]
sysMap(unsafe.Pointer(&h.spans[have]), (need-have)*unsafe.Sizeof(h.spans[0]), h.arena_reserved, &memstats.other_sys)
}
// Sweeps spans in list until reclaims at least npages into heap.
// Returns the actual number of pages reclaimed.
func (h *mheap) reclaimList(list *mSpanList, npages uintptr) uintptr {
@ -808,15 +779,20 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
// setSpan modifies the span map so spanOf(base) is s.
func (h *mheap) setSpan(base uintptr, s *mspan) {
h.spans[(base-h.arena_start)>>_PageShift] = s
h.arenas[base/heapArenaBytes].spans[(base/pageSize)%pagesPerArena] = s
}
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
// is s.
func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
p := (base - h.arena_start) >> _PageShift
p := base / pageSize
ha := h.arenas[p/pagesPerArena]
for n := uintptr(0); n < npage; n++ {
h.spans[p+n] = s
i := (p + n) % pagesPerArena
if i == 0 {
ha = h.arenas[(p+n)/pagesPerArena]
}
ha.spans[i] = s
}
}