mirror of
https://github.com/golang/go
synced 2024-11-16 22:54:47 -07:00
runtime: use only treaps for tracking spans
Currently, mheap tracks spans in both mSpanLists and mTreaps, but mSpanLists, while they tend to be smaller, complicate the implementation. Here we simplify the implementation by removing free and busy from mheap and renaming freelarge -> free and busylarge -> busy. This change also slightly changes the reclamation policy. Previously, for allocations under 1MB we would attempt to find a small span of the right size. Now, we just try to find any number of spans totaling the right size. This may increase heap fragmentation, but that will be dealt with using virtual memory tricks in follow-up CLs. For #14045. Garbage-heavy benchmarks show very little change, except what appears to be a decrease in STW times and peak RSS. name old STW-ns/GC new STW-ns/GC delta Garbage/benchmem-MB=64-8 263k ±64% 217k ±24% -17.66% (p=0.028 n=25+23) name old STW-ns/op new STW-ns/op delta Garbage/benchmem-MB=64-8 9.39k ±65% 7.80k ±24% -16.88% (p=0.037 n=25+23) name old peak-RSS-bytes new peak-RSS-bytes delta Garbage/benchmem-MB=64-8 281M ± 0% 249M ± 4% -11.40% (p=0.000 n=19+18) https://perf.golang.org/search?q=upload:20181005.1 Go1 benchmarks perform roughly the same, the most notable regression being the JSON encode/decode benchmark with worsens by ~2%. name old time/op new time/op delta BinaryTree17-8 3.02s ± 2% 2.99s ± 2% -1.18% (p=0.000 n=25+24) Fannkuch11-8 3.05s ± 1% 3.02s ± 2% -1.20% (p=0.000 n=25+25) FmtFprintfEmpty-8 43.6ns ± 5% 43.4ns ± 3% ~ (p=0.528 n=25+25) FmtFprintfString-8 74.9ns ± 3% 73.4ns ± 1% -2.03% (p=0.001 n=25+24) FmtFprintfInt-8 79.3ns ± 3% 77.9ns ± 1% -1.73% (p=0.003 n=25+25) FmtFprintfIntInt-8 119ns ± 6% 116ns ± 0% -2.68% (p=0.000 n=25+18) FmtFprintfPrefixedInt-8 134ns ± 4% 132ns ± 1% -1.52% (p=0.004 n=25+25) FmtFprintfFloat-8 240ns ± 1% 241ns ± 1% ~ (p=0.403 n=24+23) FmtManyArgs-8 543ns ± 1% 537ns ± 1% -1.00% (p=0.000 n=25+25) GobDecode-8 6.88ms ± 1% 6.92ms ± 4% ~ (p=0.088 n=24+22) GobEncode-8 5.92ms ± 1% 5.93ms ± 1% ~ (p=0.898 n=25+24) Gzip-8 267ms ± 2% 266ms ± 2% ~ (p=0.213 n=25+24) Gunzip-8 35.4ms ± 1% 35.6ms ± 1% +0.70% (p=0.000 n=25+25) HTTPClientServer-8 104µs ± 2% 104µs ± 2% ~ (p=0.686 n=25+25) JSONEncode-8 9.67ms ± 1% 9.80ms ± 4% +1.32% (p=0.000 n=25+25) JSONDecode-8 47.7ms ± 1% 48.8ms ± 5% +2.33% (p=0.000 n=25+25) Mandelbrot200-8 4.87ms ± 1% 4.91ms ± 1% +0.79% (p=0.000 n=25+25) GoParse-8 3.59ms ± 4% 3.55ms ± 1% ~ (p=0.199 n=25+24) RegexpMatchEasy0_32-8 90.3ns ± 1% 89.9ns ± 1% -0.47% (p=0.000 n=25+21) RegexpMatchEasy0_1K-8 204ns ± 1% 204ns ± 1% ~ (p=0.914 n=25+24) RegexpMatchEasy1_32-8 84.9ns ± 0% 84.6ns ± 1% -0.36% (p=0.000 n=24+25) RegexpMatchEasy1_1K-8 350ns ± 1% 348ns ± 3% -0.59% (p=0.007 n=25+25) RegexpMatchMedium_32-8 122ns ± 1% 121ns ± 0% -1.08% (p=0.000 n=25+18) RegexpMatchMedium_1K-8 36.1µs ± 1% 34.6µs ± 1% -4.02% (p=0.000 n=25+25) RegexpMatchHard_32-8 1.69µs ± 2% 1.65µs ± 1% -2.38% (p=0.000 n=25+25) RegexpMatchHard_1K-8 50.8µs ± 1% 49.4µs ± 1% -2.69% (p=0.000 n=25+24) Revcomp-8 453ms ± 2% 449ms ± 3% -0.74% (p=0.022 n=25+24) Template-8 63.2ms ± 2% 63.4ms ± 1% ~ (p=0.127 n=25+24) TimeParse-8 313ns ± 1% 315ns ± 3% ~ (p=0.924 n=24+25) TimeFormat-8 294ns ± 1% 292ns ± 2% -0.65% (p=0.004 n=23+24) [Geo mean] 49.9µs 49.6µs -0.65% name old speed new speed delta GobDecode-8 112MB/s ± 1% 110MB/s ± 4% -1.00% (p=0.036 n=24+24) GobEncode-8 130MB/s ± 1% 129MB/s ± 1% ~ (p=0.894 n=25+24) Gzip-8 72.7MB/s ± 2% 73.0MB/s ± 2% ~ (p=0.208 n=25+24) Gunzip-8 549MB/s ± 1% 545MB/s ± 1% -0.70% (p=0.000 n=25+25) JSONEncode-8 201MB/s ± 1% 198MB/s ± 3% -1.29% (p=0.000 n=25+25) JSONDecode-8 40.7MB/s ± 1% 39.8MB/s ± 5% -2.23% (p=0.000 n=25+25) GoParse-8 16.2MB/s ± 4% 16.3MB/s ± 1% ~ (p=0.211 n=25+24) RegexpMatchEasy0_32-8 354MB/s ± 1% 356MB/s ± 1% +0.47% (p=0.000 n=25+21) RegexpMatchEasy0_1K-8 5.00GB/s ± 0% 4.99GB/s ± 1% ~ (p=0.588 n=24+24) RegexpMatchEasy1_32-8 377MB/s ± 1% 378MB/s ± 1% +0.39% (p=0.000 n=25+25) RegexpMatchEasy1_1K-8 2.92GB/s ± 1% 2.94GB/s ± 3% +0.65% (p=0.008 n=25+25) RegexpMatchMedium_32-8 8.14MB/s ± 1% 8.22MB/s ± 1% +0.98% (p=0.000 n=25+24) RegexpMatchMedium_1K-8 28.4MB/s ± 1% 29.6MB/s ± 1% +4.19% (p=0.000 n=25+25) RegexpMatchHard_32-8 18.9MB/s ± 2% 19.4MB/s ± 1% +2.43% (p=0.000 n=25+25) RegexpMatchHard_1K-8 20.2MB/s ± 1% 20.7MB/s ± 1% +2.76% (p=0.000 n=25+24) Revcomp-8 561MB/s ± 2% 566MB/s ± 3% +0.75% (p=0.021 n=25+24) Template-8 30.7MB/s ± 2% 30.6MB/s ± 1% ~ (p=0.131 n=25+24) [Geo mean] 120MB/s 121MB/s +0.48% https://perf.golang.org/search?q=upload:20181004.6 Change-Id: I97f9fee34577961a116a8ddd445c6272253f0f95 Reviewed-on: https://go-review.googlesource.com/c/139837 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
e508a5f072
commit
07e738ec32
@ -136,8 +136,7 @@ const (
|
||||
_TinySize = 16
|
||||
_TinySizeClass = int8(2)
|
||||
|
||||
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
||||
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
|
||||
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
|
||||
|
||||
// Per-P, per order stack segment cache size.
|
||||
_StackCacheSize = 32 * 1024
|
||||
|
@ -30,13 +30,11 @@ const minPhysPageSize = 4096
|
||||
//go:notinheap
|
||||
type mheap struct {
|
||||
lock mutex
|
||||
free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList
|
||||
freelarge mTreap // free treap of length >= _MaxMHeapList
|
||||
busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length
|
||||
busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList
|
||||
sweepgen uint32 // sweep generation, see comment in mspan
|
||||
sweepdone uint32 // all spans are swept
|
||||
sweepers uint32 // number of active sweepone calls
|
||||
free mTreap // free treap of spans
|
||||
busy mSpanList // busy list of spans
|
||||
sweepgen uint32 // sweep generation, see comment in mspan
|
||||
sweepdone uint32 // all spans are swept
|
||||
sweepers uint32 // number of active sweepone calls
|
||||
|
||||
// allspans is a slice of all mspans ever created. Each mspan
|
||||
// appears exactly once.
|
||||
@ -599,12 +597,7 @@ func (h *mheap) init() {
|
||||
h.spanalloc.zero = false
|
||||
|
||||
// h->mapcache needs no init
|
||||
for i := range h.free {
|
||||
h.free[i].init()
|
||||
h.busy[i].init()
|
||||
}
|
||||
|
||||
h.busylarge.init()
|
||||
h.busy.init()
|
||||
for i := range h.central {
|
||||
h.central[i].mcentral.init(spanClass(i))
|
||||
}
|
||||
@ -647,30 +640,12 @@ retry:
|
||||
// Sweeps and reclaims at least npage pages into heap.
|
||||
// Called before allocating npage pages.
|
||||
func (h *mheap) reclaim(npage uintptr) {
|
||||
// First try to sweep busy spans with large objects of size >= npage,
|
||||
// this has good chances of reclaiming the necessary space.
|
||||
for i := int(npage); i < len(h.busy); i++ {
|
||||
if h.reclaimList(&h.busy[i], npage) != 0 {
|
||||
return // Bingo!
|
||||
}
|
||||
}
|
||||
|
||||
// Then -- even larger objects.
|
||||
if h.reclaimList(&h.busylarge, npage) != 0 {
|
||||
if h.reclaimList(&h.busy, npage) != 0 {
|
||||
return // Bingo!
|
||||
}
|
||||
|
||||
// Now try smaller objects.
|
||||
// One such object is not enough, so we need to reclaim several of them.
|
||||
reclaimed := uintptr(0)
|
||||
for i := 0; i < int(npage) && i < len(h.busy); i++ {
|
||||
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
|
||||
if reclaimed >= npage {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Now sweep everything that is not yet swept.
|
||||
var reclaimed uintptr
|
||||
unlock(&h.lock)
|
||||
for {
|
||||
n := sweepone()
|
||||
@ -752,11 +727,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
|
||||
mheap_.nlargealloc++
|
||||
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
|
||||
// Swept spans are at the end of lists.
|
||||
if s.npages < uintptr(len(h.busy)) {
|
||||
h.busy[s.npages].insertBack(s)
|
||||
} else {
|
||||
h.busylarge.insertBack(s)
|
||||
}
|
||||
h.busy.insertBack(s)
|
||||
}
|
||||
}
|
||||
// heap_scan and heap_live were updated.
|
||||
@ -867,31 +838,20 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
|
||||
// The returned span has been removed from the
|
||||
// free list, but its state is still mSpanFree.
|
||||
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
|
||||
var list *mSpanList
|
||||
var s *mspan
|
||||
|
||||
// Try in fixed-size lists up to max.
|
||||
for i := int(npage); i < len(h.free); i++ {
|
||||
list = &h.free[i]
|
||||
if !list.isEmpty() {
|
||||
s = list.first
|
||||
list.remove(s)
|
||||
goto HaveSpan
|
||||
}
|
||||
}
|
||||
// Best fit in list of large spans.
|
||||
s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
|
||||
// Best fit in the treap of spans.
|
||||
s = h.free.remove(npage)
|
||||
if s == nil {
|
||||
if !h.grow(npage) {
|
||||
return nil
|
||||
}
|
||||
s = h.allocLarge(npage)
|
||||
s = h.free.remove(npage)
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
HaveSpan:
|
||||
// Mark span in use.
|
||||
if s.state != mSpanFree {
|
||||
throw("MHeap_AllocLocked - MSpan not free")
|
||||
@ -933,21 +893,6 @@ HaveSpan:
|
||||
return s
|
||||
}
|
||||
|
||||
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
|
||||
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
|
||||
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
|
||||
// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
|
||||
func (h *mheap) isLargeSpan(npages uintptr) bool {
|
||||
return npages >= uintptr(len(h.free))
|
||||
}
|
||||
|
||||
// allocLarge allocates a span of at least npage pages from the treap of large spans.
|
||||
// Returns nil if no such span currently exists.
|
||||
func (h *mheap) allocLarge(npage uintptr) *mspan {
|
||||
// Search treap for smallest span with >= npage pages.
|
||||
return h.freelarge.remove(npage)
|
||||
}
|
||||
|
||||
// Try to add at least npage pages of memory to the heap,
|
||||
// returning whether it worked.
|
||||
//
|
||||
@ -1023,7 +968,7 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) {
|
||||
unlock(&h.lock)
|
||||
}
|
||||
|
||||
// s must be on a busy list (h.busy or h.busylarge) or unlinked.
|
||||
// s must be on the busy list or unlinked.
|
||||
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
|
||||
switch s.state {
|
||||
case mSpanManual:
|
||||
@ -1048,7 +993,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
|
||||
}
|
||||
s.state = mSpanFree
|
||||
if s.inList() {
|
||||
h.busyList(s.npages).remove(s)
|
||||
h.busy.remove(s)
|
||||
}
|
||||
|
||||
// Stamp newly unused spans. The scavenger will use that
|
||||
@ -1069,12 +1014,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
|
||||
h.setSpan(before.base(), s)
|
||||
// The size is potentially changing so the treap needs to delete adjacent nodes and
|
||||
// insert back as a combined node.
|
||||
if h.isLargeSpan(before.npages) {
|
||||
// We have a t, it is large so it has to be in the treap so we can remove it.
|
||||
h.freelarge.removeSpan(before)
|
||||
} else {
|
||||
h.freeList(before.npages).remove(before)
|
||||
}
|
||||
h.free.removeSpan(before)
|
||||
before.state = mSpanDead
|
||||
h.spanalloc.free(unsafe.Pointer(before))
|
||||
}
|
||||
@ -1085,32 +1025,13 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
|
||||
s.npreleased += after.npreleased
|
||||
s.needzero |= after.needzero
|
||||
h.setSpan(s.base()+s.npages*pageSize-1, s)
|
||||
if h.isLargeSpan(after.npages) {
|
||||
h.freelarge.removeSpan(after)
|
||||
} else {
|
||||
h.freeList(after.npages).remove(after)
|
||||
}
|
||||
h.free.removeSpan(after)
|
||||
after.state = mSpanDead
|
||||
h.spanalloc.free(unsafe.Pointer(after))
|
||||
}
|
||||
|
||||
// Insert s into appropriate list or treap.
|
||||
if h.isLargeSpan(s.npages) {
|
||||
h.freelarge.insert(s)
|
||||
} else {
|
||||
h.freeList(s.npages).insert(s)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *mheap) freeList(npages uintptr) *mSpanList {
|
||||
return &h.free[npages]
|
||||
}
|
||||
|
||||
func (h *mheap) busyList(npages uintptr) *mSpanList {
|
||||
if npages < uintptr(len(h.busy)) {
|
||||
return &h.busy[npages]
|
||||
}
|
||||
return &h.busylarge
|
||||
// Insert s into the free treap.
|
||||
h.free.insert(s)
|
||||
}
|
||||
|
||||
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
|
||||
@ -1123,21 +1044,6 @@ func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
|
||||
return 0
|
||||
}
|
||||
|
||||
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
|
||||
if list.isEmpty() {
|
||||
return 0
|
||||
}
|
||||
|
||||
var sumreleased uintptr
|
||||
for s := list.first; s != nil; s = s.next {
|
||||
if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
|
||||
continue
|
||||
}
|
||||
sumreleased += s.scavenge()
|
||||
}
|
||||
return sumreleased
|
||||
}
|
||||
|
||||
func (h *mheap) scavenge(k int32, now, limit uint64) {
|
||||
// Disallow malloc or panic while holding the heap lock. We do
|
||||
// this here because this is an non-mallocgc entry-point to
|
||||
@ -1145,11 +1051,7 @@ func (h *mheap) scavenge(k int32, now, limit uint64) {
|
||||
gp := getg()
|
||||
gp.m.mallocing++
|
||||
lock(&h.lock)
|
||||
var sumreleased uintptr
|
||||
for i := 0; i < len(h.free); i++ {
|
||||
sumreleased += scavengelist(&h.free[i], now, limit)
|
||||
}
|
||||
sumreleased += scavengetreap(h.freelarge.treap, now, limit)
|
||||
sumreleased := scavengetreap(h.free.treap, now, limit)
|
||||
unlock(&h.lock)
|
||||
gp.m.mallocing--
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user