mirror of
https://github.com/golang/go
synced 2024-11-19 11:04:47 -07:00
runtime: support a two-level arena map
Currently, the heap arena map is a single, large array that covers every possible arena frame in the entire address space. This is practical up to about 48 bits of address space with 64 MB arenas. However, there are two problems with this: 1. mips64, ppc64, and s390x support full 64-bit address spaces (though on Linux only s390x has kernel support for 64-bit address spaces). On these platforms, it would be good to support these larger address spaces. 2. On Windows, processes are charged for untouched memory, so for processes with small heaps, the mostly-untouched 32 MB arena map plus a 64 MB arena are significant overhead. Hence, it would be good to reduce both the arena map size and the arena size, but with a single-level arena, these are inversely proportional. This CL adds support for a two-level arena map. Arena frame numbers are now divided into arenaL1Bits of L1 index and arenaL2Bits of L2 index. At the moment, arenaL1Bits is always 0, so we effectively have a single level map. We do a few things so that this has no cost beyond the current single-level map: 1. We embed the L2 array directly in mheap, so if there's a single entry in the L2 array, the representation is identical to the current representation and there's no extra level of indirection. 2. Hot code that accesses the arena map is structured so that it optimizes to nearly the same machine code as it does currently. 3. We make some small tweaks to hot code paths and to the inliner itself to keep some important functions inlined despite their now-larger ASTs. In particular, this is necessary for heapBitsForAddr and heapBits.next. Possibly as a result of some of the tweaks, this actually slightly improves the performance of the x/benchmarks garbage benchmark: name old time/op new time/op delta Garbage/benchmem-MB=64-12 2.28ms ± 1% 2.26ms ± 1% -1.07% (p=0.000 n=17+19) (https://perf.golang.org/search?q=upload:20180223.2) For #23900. Change-Id: If5164e0961754f97eb9eca58f837f36d759505ff Reviewed-on: https://go-review.googlesource.com/96779 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
2dbf15e88e
commit
ec25210564
@ -304,6 +304,21 @@ func (v *hairyVisitor) visit(n *Node) bool {
|
|||||||
if t.Nname() == nil {
|
if t.Nname() == nil {
|
||||||
Fatalf("no function definition for [%p] %+v\n", t, t)
|
Fatalf("no function definition for [%p] %+v\n", t, t)
|
||||||
}
|
}
|
||||||
|
if isRuntimePkg(n.Left.Sym.Pkg) {
|
||||||
|
fn := n.Left.Sym.Name
|
||||||
|
if fn == "heapBits.nextArena" {
|
||||||
|
// Special case: explicitly allow
|
||||||
|
// mid-stack inlining of
|
||||||
|
// runtime.heapBits.next even though
|
||||||
|
// it calls slow-path
|
||||||
|
// runtime.heapBits.nextArena.
|
||||||
|
//
|
||||||
|
// TODO(austin): Once mid-stack
|
||||||
|
// inlining is the default, remove
|
||||||
|
// this special case.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
if inlfn := asNode(t.FuncType().Nname).Func; inlfn.Inl.Len() != 0 {
|
if inlfn := asNode(t.FuncType().Nname).Func; inlfn.Inl.Len() != 0 {
|
||||||
v.budget -= inlfn.InlCost
|
v.budget -= inlfn.InlCost
|
||||||
break
|
break
|
||||||
|
@ -489,9 +489,15 @@ func dumpparams() {
|
|||||||
}
|
}
|
||||||
dumpint(sys.PtrSize)
|
dumpint(sys.PtrSize)
|
||||||
var arenaStart, arenaEnd uintptr
|
var arenaStart, arenaEnd uintptr
|
||||||
for i, ha := range mheap_.arenas {
|
for i1 := range mheap_.arenas {
|
||||||
if ha != nil {
|
if mheap_.arenas[i1] == nil {
|
||||||
base := arenaBase(uint(i))
|
continue
|
||||||
|
}
|
||||||
|
for i, ha := range mheap_.arenas[i1] {
|
||||||
|
if ha == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
base := arenaBase(arenaIdx(i1)<<arenaL1Shift | arenaIdx(i))
|
||||||
if arenaStart == 0 || base < arenaStart {
|
if arenaStart == 0 || base < arenaStart {
|
||||||
arenaStart = base
|
arenaStart = base
|
||||||
}
|
}
|
||||||
|
@ -92,8 +92,10 @@
|
|||||||
// Since arenas are aligned, the address space can be viewed as a
|
// Since arenas are aligned, the address space can be viewed as a
|
||||||
// series of arena frames. The arena map (mheap_.arenas) maps from
|
// series of arena frames. The arena map (mheap_.arenas) maps from
|
||||||
// arena frame number to *heapArena, or nil for parts of the address
|
// arena frame number to *heapArena, or nil for parts of the address
|
||||||
// space not backed by the Go heap. Since arenas are large, the arena
|
// space not backed by the Go heap. The arena map is structured as a
|
||||||
// index is just a single-level mapping.
|
// two-level array consisting of a "L1" arena map and many "L2" arena
|
||||||
|
// maps; however, since arenas are large, on many architectures, the
|
||||||
|
// arena map consists of a single, large L2 map.
|
||||||
//
|
//
|
||||||
// The arena map covers the entire possible address space, allowing
|
// The arena map covers the entire possible address space, allowing
|
||||||
// the Go heap to use any part of the address space. The allocator
|
// the Go heap to use any part of the address space. The allocator
|
||||||
@ -202,11 +204,6 @@ const (
|
|||||||
// space because doing so is cheap.
|
// space because doing so is cheap.
|
||||||
// mips32 only has access to the low 2GB of virtual memory, so
|
// mips32 only has access to the low 2GB of virtual memory, so
|
||||||
// we further limit it to 31 bits.
|
// we further limit it to 31 bits.
|
||||||
//
|
|
||||||
// The size of the arena map is proportional to
|
|
||||||
// 1<<heapAddrBits, so it's important that this not be too
|
|
||||||
// large. 48 bits is about the threshold; above that we would
|
|
||||||
// need to go to a two level arena map.
|
|
||||||
heapAddrBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
|
heapAddrBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
|
||||||
|
|
||||||
// maxAlloc is the maximum size of an allocation. On 64-bit,
|
// maxAlloc is the maximum size of an allocation. On 64-bit,
|
||||||
@ -219,13 +216,49 @@ const (
|
|||||||
// heapArenaBytes is the size of a heap arena. The heap
|
// heapArenaBytes is the size of a heap arena. The heap
|
||||||
// consists of mappings of size heapArenaBytes, aligned to
|
// consists of mappings of size heapArenaBytes, aligned to
|
||||||
// heapArenaBytes. The initial heap mapping is one arena.
|
// heapArenaBytes. The initial heap mapping is one arena.
|
||||||
heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit)
|
//
|
||||||
|
// This is currently 64MB on 64-bit and 4MB on 32-bit.
|
||||||
|
heapArenaBytes = 1 << logHeapArenaBytes
|
||||||
|
|
||||||
|
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
|
||||||
|
// prefer using heapArenaBytes where possible (we need the
|
||||||
|
// constant to compute some other constants).
|
||||||
|
logHeapArenaBytes = (6+20)*_64bit + (2+20)*(1-_64bit)
|
||||||
|
|
||||||
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
|
// heapArenaBitmapBytes is the size of each heap arena's bitmap.
|
||||||
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
|
heapArenaBitmapBytes = heapArenaBytes / (sys.PtrSize * 8 / 2)
|
||||||
|
|
||||||
pagesPerArena = heapArenaBytes / pageSize
|
pagesPerArena = heapArenaBytes / pageSize
|
||||||
|
|
||||||
|
// arenaL1Bits is the number of bits of the arena number
|
||||||
|
// covered by the first level arena map.
|
||||||
|
//
|
||||||
|
// This number should be small, since the first level arena
|
||||||
|
// map requires PtrSize*(1<<arenaL1Bits) of space in the
|
||||||
|
// binary's BSS. It can be zero, in which case the first level
|
||||||
|
// index is effectively unused. There is a performance benefit
|
||||||
|
// to this, since the generated code can be more efficient,
|
||||||
|
// but comes at the cost of having a large L2 mapping.
|
||||||
|
arenaL1Bits = 0
|
||||||
|
|
||||||
|
// arenaL2Bits is the number of bits of the arena number
|
||||||
|
// covered by the second level arena index.
|
||||||
|
//
|
||||||
|
// The size of each arena map allocation is proportional to
|
||||||
|
// 1<<arenaL2Bits, so it's important that this not be too
|
||||||
|
// large. 48 bits leads to 32MB arena index allocations, which
|
||||||
|
// is about the practical threshold.
|
||||||
|
arenaL2Bits = heapAddrBits - logHeapArenaBytes - arenaL1Bits
|
||||||
|
|
||||||
|
// arenaL1Shift is the number of bits to shift an arena frame
|
||||||
|
// number by to compute an index into the first level arena map.
|
||||||
|
arenaL1Shift = arenaL2Bits
|
||||||
|
|
||||||
|
// arenaBits is the total bits in a combined arena map index.
|
||||||
|
// This is split between the index into the L1 arena map and
|
||||||
|
// the L2 arena map.
|
||||||
|
arenaBits = arenaL1Bits + arenaL2Bits
|
||||||
|
|
||||||
// arenaBaseOffset is the pointer value that corresponds to
|
// arenaBaseOffset is the pointer value that corresponds to
|
||||||
// index 0 in the heap arena map.
|
// index 0 in the heap arena map.
|
||||||
//
|
//
|
||||||
@ -323,12 +356,6 @@ func mallocinit() {
|
|||||||
throw("bad system page size")
|
throw("bad system page size")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map the arena map. Most of this will never be written to,
|
|
||||||
mheap_.arenas = (*[(1 << heapAddrBits) / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, nil))
|
|
||||||
if mheap_.arenas == nil {
|
|
||||||
throw("failed to allocate arena map")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the heap.
|
// Initialize the heap.
|
||||||
mheap_.init()
|
mheap_.init()
|
||||||
_g_ := getg()
|
_g_ := getg()
|
||||||
@ -398,7 +425,7 @@ func mallocinit() {
|
|||||||
// 3. We try to stake out a reasonably large initial
|
// 3. We try to stake out a reasonably large initial
|
||||||
// heap reservation.
|
// heap reservation.
|
||||||
|
|
||||||
const arenaMetaSize = unsafe.Sizeof(heapArena{}) * uintptr(len(*mheap_.arenas))
|
const arenaMetaSize = unsafe.Sizeof([1 << arenaBits]heapArena{})
|
||||||
meta := uintptr(sysReserve(nil, arenaMetaSize))
|
meta := uintptr(sysReserve(nil, arenaMetaSize))
|
||||||
if meta != 0 {
|
if meta != 0 {
|
||||||
mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
|
mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
|
||||||
@ -476,7 +503,7 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
|
|||||||
if p+n < p {
|
if p+n < p {
|
||||||
// We can't use this, so don't ask.
|
// We can't use this, so don't ask.
|
||||||
v = nil
|
v = nil
|
||||||
} else if arenaIndex(p+n-1) >= uint(len(mheap_.arenas)) {
|
} else if arenaIndex(p+n-1) >= 1<<arenaBits {
|
||||||
// Outside addressable heap. Can't use.
|
// Outside addressable heap. Can't use.
|
||||||
v = nil
|
v = nil
|
||||||
} else {
|
} else {
|
||||||
@ -528,9 +555,9 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
|
|||||||
p := uintptr(v)
|
p := uintptr(v)
|
||||||
if p+size < p {
|
if p+size < p {
|
||||||
bad = "region exceeds uintptr range"
|
bad = "region exceeds uintptr range"
|
||||||
} else if arenaIndex(p) >= uint(len(mheap_.arenas)) {
|
} else if arenaIndex(p) >= 1<<arenaBits {
|
||||||
bad = "base outside usable address space"
|
bad = "base outside usable address space"
|
||||||
} else if arenaIndex(p+size-1) >= uint(len(mheap_.arenas)) {
|
} else if arenaIndex(p+size-1) >= 1<<arenaBits {
|
||||||
bad = "end outside usable address space"
|
bad = "end outside usable address space"
|
||||||
}
|
}
|
||||||
if bad != "" {
|
if bad != "" {
|
||||||
@ -551,7 +578,17 @@ func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
|
|||||||
mapped:
|
mapped:
|
||||||
// Create arena metadata.
|
// Create arena metadata.
|
||||||
for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ {
|
for ri := arenaIndex(uintptr(v)); ri <= arenaIndex(uintptr(v)+size-1); ri++ {
|
||||||
if h.arenas[ri] != nil {
|
l2 := h.arenas[ri.l1()]
|
||||||
|
if l2 == nil {
|
||||||
|
// Allocate an L2 arena map.
|
||||||
|
l2 = (*[1 << arenaL2Bits]*heapArena)(persistentalloc(unsafe.Sizeof(*l2), sys.PtrSize, nil))
|
||||||
|
if l2 == nil {
|
||||||
|
throw("out of memory allocating heap arena map")
|
||||||
|
}
|
||||||
|
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri.l1()]), unsafe.Pointer(l2))
|
||||||
|
}
|
||||||
|
|
||||||
|
if l2[ri.l2()] != nil {
|
||||||
throw("arena already initialized")
|
throw("arena already initialized")
|
||||||
}
|
}
|
||||||
var r *heapArena
|
var r *heapArena
|
||||||
@ -567,7 +604,7 @@ mapped:
|
|||||||
// new heap arena becomes visible before the heap lock
|
// new heap arena becomes visible before the heap lock
|
||||||
// is released (which shouldn't happen, but there's
|
// is released (which shouldn't happen, but there's
|
||||||
// little downside to this).
|
// little downside to this).
|
||||||
atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
|
atomic.StorepNoWB(unsafe.Pointer(&l2[ri.l2()]), unsafe.Pointer(r))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tell the race detector about the new heap memory.
|
// Tell the race detector about the new heap memory.
|
||||||
|
@ -332,21 +332,23 @@ func (m *markBits) advance() {
|
|||||||
//
|
//
|
||||||
// nosplit because it is used during write barriers and must not be preempted.
|
// nosplit because it is used during write barriers and must not be preempted.
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func heapBitsForAddr(addr uintptr) heapBits {
|
func heapBitsForAddr(addr uintptr) (h heapBits) {
|
||||||
// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
|
// 2 bits per word, 4 pairs per byte, and a mask is hard coded.
|
||||||
off := addr / sys.PtrSize
|
|
||||||
arena := arenaIndex(addr)
|
arena := arenaIndex(addr)
|
||||||
ha := mheap_.arenas[arena]
|
ha := mheap_.arenas[arena.l1()][arena.l2()]
|
||||||
// The compiler uses a load for nil checking ha, but in this
|
// The compiler uses a load for nil checking ha, but in this
|
||||||
// case we'll almost never hit that cache line again, so it
|
// case we'll almost never hit that cache line again, so it
|
||||||
// makes more sense to do a value check.
|
// makes more sense to do a value check.
|
||||||
if ha == nil {
|
if ha == nil {
|
||||||
// addr is not in the heap. Crash without inhibiting inlining.
|
// addr is not in the heap. Return nil heapBits, which
|
||||||
_ = *ha
|
// we expect to crash in the caller.
|
||||||
|
return
|
||||||
}
|
}
|
||||||
bitp := &ha.bitmap[(off/4)%heapArenaBitmapBytes]
|
h.bitp = &ha.bitmap[(addr/(sys.PtrSize*4))%heapArenaBitmapBytes]
|
||||||
last := &ha.bitmap[len(ha.bitmap)-1]
|
h.shift = uint32((addr / sys.PtrSize) & 3)
|
||||||
return heapBits{bitp, uint32(off & 3), uint32(arena), last}
|
h.arena = uint32(arena)
|
||||||
|
h.last = &ha.bitmap[len(ha.bitmap)-1]
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// findObject returns the base address for the heap object containing
|
// findObject returns the base address for the heap object containing
|
||||||
@ -432,21 +434,39 @@ func (h heapBits) next() heapBits {
|
|||||||
h.bitp, h.shift = add1(h.bitp), 0
|
h.bitp, h.shift = add1(h.bitp), 0
|
||||||
} else {
|
} else {
|
||||||
// Move to the next arena.
|
// Move to the next arena.
|
||||||
h.arena++
|
return h.nextArena()
|
||||||
a := mheap_.arenas[h.arena]
|
|
||||||
if a == nil {
|
|
||||||
// We just passed the end of the object, which
|
|
||||||
// was also the end of the heap. Poison h. It
|
|
||||||
// should never be dereferenced at this point.
|
|
||||||
h.bitp, h.last = nil, nil
|
|
||||||
} else {
|
|
||||||
h.bitp, h.shift = &a.bitmap[0], 0
|
|
||||||
h.last = &a.bitmap[len(a.bitmap)-1]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return h
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// nextArena advances h to the beginning of the next heap arena.
|
||||||
|
//
|
||||||
|
// This is a slow-path helper to next. gc's inliner knows that
|
||||||
|
// heapBits.next can be inlined even though it calls this. This is
|
||||||
|
// marked noinline so it doesn't get inlined into next and cause next
|
||||||
|
// to be too big to inline.
|
||||||
|
//
|
||||||
|
//go:nosplit
|
||||||
|
//go:noinline
|
||||||
|
func (h heapBits) nextArena() heapBits {
|
||||||
|
h.arena++
|
||||||
|
ai := arenaIdx(h.arena)
|
||||||
|
l2 := mheap_.arenas[ai.l1()]
|
||||||
|
if l2 == nil {
|
||||||
|
// We just passed the end of the object, which
|
||||||
|
// was also the end of the heap. Poison h. It
|
||||||
|
// should never be dereferenced at this point.
|
||||||
|
return heapBits{}
|
||||||
|
}
|
||||||
|
ha := l2[ai.l2()]
|
||||||
|
if ha == nil {
|
||||||
|
return heapBits{}
|
||||||
|
}
|
||||||
|
h.bitp, h.shift = &ha.bitmap[0], 0
|
||||||
|
h.last = &ha.bitmap[len(ha.bitmap)-1]
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
|
// forward returns the heapBits describing n pointer-sized words ahead of h in memory.
|
||||||
// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
|
// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
|
||||||
// h.forward(1) is equivalent to h.next(), just slower.
|
// h.forward(1) is equivalent to h.next(), just slower.
|
||||||
@ -465,12 +485,13 @@ func (h heapBits) forward(n uintptr) heapBits {
|
|||||||
// We're in a new heap arena.
|
// We're in a new heap arena.
|
||||||
past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
|
past := nbitp - (uintptr(unsafe.Pointer(h.last)) + 1)
|
||||||
h.arena += 1 + uint32(past/heapArenaBitmapBytes)
|
h.arena += 1 + uint32(past/heapArenaBitmapBytes)
|
||||||
a := mheap_.arenas[h.arena]
|
ai := arenaIdx(h.arena)
|
||||||
if a == nil {
|
if l2 := mheap_.arenas[ai.l1()]; l2 != nil && l2[ai.l2()] != nil {
|
||||||
h.bitp, h.last = nil, nil
|
a := l2[ai.l2()]
|
||||||
} else {
|
|
||||||
h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
|
h.bitp = &a.bitmap[past%heapArenaBitmapBytes]
|
||||||
h.last = &a.bitmap[len(a.bitmap)-1]
|
h.last = &a.bitmap[len(a.bitmap)-1]
|
||||||
|
} else {
|
||||||
|
h.bitp, h.last = nil, nil
|
||||||
}
|
}
|
||||||
return h
|
return h
|
||||||
}
|
}
|
||||||
@ -971,7 +992,7 @@ func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
|
|||||||
// machine instructions.
|
// machine instructions.
|
||||||
|
|
||||||
outOfPlace := false
|
outOfPlace := false
|
||||||
if arenaIndex(x+size-1) != uint(h.arena) {
|
if arenaIndex(x+size-1) != arenaIdx(h.arena) {
|
||||||
// This object spans heap arenas, so the bitmap may be
|
// This object spans heap arenas, so the bitmap may be
|
||||||
// discontiguous. Unroll it into the object instead
|
// discontiguous. Unroll it into the object instead
|
||||||
// and then copy it out.
|
// and then copy it out.
|
||||||
@ -1375,12 +1396,14 @@ Phase4:
|
|||||||
// x+size may not point to the heap, so back up one
|
// x+size may not point to the heap, so back up one
|
||||||
// word and then call next().
|
// word and then call next().
|
||||||
end := heapBitsForAddr(x + size - sys.PtrSize).next()
|
end := heapBitsForAddr(x + size - sys.PtrSize).next()
|
||||||
if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[end.arena].bitmap[0])) {
|
endAI := arenaIdx(end.arena)
|
||||||
|
if !outOfPlace && (end.bitp == nil || (end.shift == 0 && end.bitp == &mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0])) {
|
||||||
// The unrolling code above walks hbitp just
|
// The unrolling code above walks hbitp just
|
||||||
// past the bitmap without moving to the next
|
// past the bitmap without moving to the next
|
||||||
// arena. Synthesize this for end.bitp.
|
// arena. Synthesize this for end.bitp.
|
||||||
end.bitp = addb(&mheap_.arenas[end.arena-1].bitmap[0], heapArenaBitmapBytes)
|
|
||||||
end.arena--
|
end.arena--
|
||||||
|
endAI = arenaIdx(end.arena)
|
||||||
|
end.bitp = addb(&mheap_.arenas[endAI.l1()][endAI.l2()].bitmap[0], heapArenaBitmapBytes)
|
||||||
end.last = nil
|
end.last = nil
|
||||||
}
|
}
|
||||||
if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
|
if typ.kind&kindGCProg == 0 && (hbitp != end.bitp || (w == nw+2) != (end.shift == 2)) {
|
||||||
|
@ -96,9 +96,9 @@ type mheap struct {
|
|||||||
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
||||||
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
||||||
|
|
||||||
// arenas is the heap arena map.
|
// arenas is the heap arena map. It points to the metadata for
|
||||||
// arenas[(va+arenaBaseOffset)/heapArenaBytes] points to the
|
// the heap for every arena frame of the entire usable virtual
|
||||||
// metadata for the heap arena containing va.
|
// address space.
|
||||||
//
|
//
|
||||||
// Use arenaIndex to compute indexes into this array.
|
// Use arenaIndex to compute indexes into this array.
|
||||||
//
|
//
|
||||||
@ -110,9 +110,13 @@ type mheap struct {
|
|||||||
// transition from nil to non-nil at any time when the lock
|
// transition from nil to non-nil at any time when the lock
|
||||||
// isn't held. (Entries never transitions back to nil.)
|
// isn't held. (Entries never transitions back to nil.)
|
||||||
//
|
//
|
||||||
// This structure is fully mapped by mallocinit, so it's safe
|
// In general, this is a two-level mapping consisting of an L1
|
||||||
// to probe any index.
|
// map and possibly many L2 maps. This saves space when there
|
||||||
arenas *[(1 << heapAddrBits) / heapArenaBytes]*heapArena
|
// are a huge number of arena frames. However, on many
|
||||||
|
// platforms (even 64-bit), arenaL1Bits is 0, making this
|
||||||
|
// effectively a single-level map. In this case, arenas[0]
|
||||||
|
// will never be nil.
|
||||||
|
arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena
|
||||||
|
|
||||||
// heapArenaAlloc is pre-reserved space for allocating heapArena
|
// heapArenaAlloc is pre-reserved space for allocating heapArena
|
||||||
// objects. This is only used on 32-bit, where we pre-reserve
|
// objects. This is only used on 32-bit, where we pre-reserve
|
||||||
@ -410,24 +414,48 @@ func (sc spanClass) noscan() bool {
|
|||||||
return sc&1 != 0
|
return sc&1 != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// arenaIndex returns the mheap_.arenas index of the arena containing
|
// arenaIndex returns the index into mheap_.arenas of the arena
|
||||||
// metadata for p. If p is outside the range of valid heap addresses,
|
// containing metadata for p. This index combines of an index into the
|
||||||
// it returns an index larger than len(mheap_.arenas).
|
// L1 map and an index into the L2 map and should be used as
|
||||||
|
// mheap_.arenas[ai.l1()][ai.l2()].
|
||||||
|
//
|
||||||
|
// If p is outside the range of valid heap addresses, either l1() or
|
||||||
|
// l2() will be out of bounds.
|
||||||
//
|
//
|
||||||
// It is nosplit because it's called by spanOf and several other
|
// It is nosplit because it's called by spanOf and several other
|
||||||
// nosplit functions.
|
// nosplit functions.
|
||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func arenaIndex(p uintptr) uint {
|
func arenaIndex(p uintptr) arenaIdx {
|
||||||
return uint((p + arenaBaseOffset) / heapArenaBytes)
|
return arenaIdx((p + arenaBaseOffset) / heapArenaBytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
// arenaBase returns the low address of the region covered by heap
|
// arenaBase returns the low address of the region covered by heap
|
||||||
// arena i.
|
// arena i.
|
||||||
func arenaBase(i uint) uintptr {
|
func arenaBase(i arenaIdx) uintptr {
|
||||||
return uintptr(i)*heapArenaBytes - arenaBaseOffset
|
return uintptr(i)*heapArenaBytes - arenaBaseOffset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type arenaIdx uint
|
||||||
|
|
||||||
|
func (i arenaIdx) l1() uint {
|
||||||
|
if arenaL1Bits == 0 {
|
||||||
|
// Let the compiler optimize this away if there's no
|
||||||
|
// L1 map.
|
||||||
|
return 0
|
||||||
|
} else {
|
||||||
|
return uint(i) >> arenaL1Shift
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i arenaIdx) l2() uint {
|
||||||
|
if arenaL1Bits == 0 {
|
||||||
|
return uint(i)
|
||||||
|
} else {
|
||||||
|
return uint(i) & (1<<arenaL2Bits - 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// inheap reports whether b is a pointer into a (potentially dead) heap object.
|
// inheap reports whether b is a pointer into a (potentially dead) heap object.
|
||||||
// It returns false for pointers into _MSpanManual spans.
|
// It returns false for pointers into _MSpanManual spans.
|
||||||
// Non-preemptible because it is used by write barriers.
|
// Non-preemptible because it is used by write barriers.
|
||||||
@ -467,14 +495,28 @@ func inHeapOrStack(b uintptr) bool {
|
|||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func spanOf(p uintptr) *mspan {
|
func spanOf(p uintptr) *mspan {
|
||||||
if p < minLegalPointer {
|
// This function looks big, but we use a lot of constant
|
||||||
return nil
|
// folding around arenaL1Bits to get it under the inlining
|
||||||
}
|
// budget. Also, many of the checks here are safety checks
|
||||||
|
// that Go needs to do anyway, so the generated code is quite
|
||||||
|
// short.
|
||||||
ri := arenaIndex(p)
|
ri := arenaIndex(p)
|
||||||
if ri >= uint(len(mheap_.arenas)) {
|
if arenaL1Bits == 0 {
|
||||||
|
// If there's no L1, then ri.l1() can't be out of bounds but ri.l2() can.
|
||||||
|
if ri.l2() >= uint(len(mheap_.arenas[0])) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If there's an L1, then ri.l1() can be out of bounds but ri.l2() can't.
|
||||||
|
if ri.l1() >= uint(len(mheap_.arenas)) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l2 := mheap_.arenas[ri.l1()]
|
||||||
|
if arenaL1Bits != 0 && l2 == nil { // Should never happen if there's no L1.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
ha := mheap_.arenas[ri]
|
ha := l2[ri.l2()]
|
||||||
if ha == nil {
|
if ha == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -488,7 +530,8 @@ func spanOf(p uintptr) *mspan {
|
|||||||
//
|
//
|
||||||
//go:nosplit
|
//go:nosplit
|
||||||
func spanOfUnchecked(p uintptr) *mspan {
|
func spanOfUnchecked(p uintptr) *mspan {
|
||||||
return mheap_.arenas[arenaIndex(p)].spans[(p/pageSize)%pagesPerArena]
|
ai := arenaIndex(p)
|
||||||
|
return mheap_.arenas[ai.l1()][ai.l2()].spans[(p/pageSize)%pagesPerArena]
|
||||||
}
|
}
|
||||||
|
|
||||||
// spanOfHeap is like spanOf, but returns nil if p does not point to a
|
// spanOfHeap is like spanOf, but returns nil if p does not point to a
|
||||||
@ -763,18 +806,21 @@ func (h *mheap) allocManual(npage uintptr, stat *uint64) *mspan {
|
|||||||
|
|
||||||
// setSpan modifies the span map so spanOf(base) is s.
|
// setSpan modifies the span map so spanOf(base) is s.
|
||||||
func (h *mheap) setSpan(base uintptr, s *mspan) {
|
func (h *mheap) setSpan(base uintptr, s *mspan) {
|
||||||
h.arenas[arenaIndex(base)].spans[(base/pageSize)%pagesPerArena] = s
|
ai := arenaIndex(base)
|
||||||
|
h.arenas[ai.l1()][ai.l2()].spans[(base/pageSize)%pagesPerArena] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
|
// setSpans modifies the span map so [spanOf(base), spanOf(base+npage*pageSize))
|
||||||
// is s.
|
// is s.
|
||||||
func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
|
func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
|
||||||
p := base / pageSize
|
p := base / pageSize
|
||||||
ha := h.arenas[arenaIndex(base)]
|
ai := arenaIndex(base)
|
||||||
|
ha := h.arenas[ai.l1()][ai.l2()]
|
||||||
for n := uintptr(0); n < npage; n++ {
|
for n := uintptr(0); n < npage; n++ {
|
||||||
i := (p + n) % pagesPerArena
|
i := (p + n) % pagesPerArena
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
ha = h.arenas[arenaIndex(base+n*pageSize)]
|
ai = arenaIndex(base + n*pageSize)
|
||||||
|
ha = h.arenas[ai.l1()][ai.l2()]
|
||||||
}
|
}
|
||||||
ha.spans[i] = s
|
ha.spans[i] = s
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user