mirror of
https://github.com/golang/go
synced 2024-11-23 17:50:06 -07:00
runtime: implement transfer cache
Currently we do the following dance after sweeping a span: 1. lock mcentral 2. remove the span from a list 3. unlock mcentral 4. unmark span 5. lock mheap 6. insert the span into heap 7. unlock mheap 8. lock mcentral 9. observe empty list 10. unlock mcentral 11. lock mheap 12. grab the span 13. unlock mheap 14. mark span 15. lock mcentral 16. insert the span into empty list 17. unlock mcentral This change short-circuits this sequence to nothing, that is, we just cache and use the span after sweeping. This gives us functionality similar (even better) to tcmalloc's transfer cache. benchmark old ns/op new ns/op delta BenchmarkMalloc8 22.2 19.5 -12.16% BenchmarkMalloc16 31.0 26.6 -14.19% LGTM=khr R=golang-codereviews, khr CC=golang-codereviews, rlh, rsc https://golang.org/cl/119550043
This commit is contained in:
parent
101c00a44f
commit
e0df11d57e
@ -423,7 +423,7 @@ struct MSpan
|
|||||||
|
|
||||||
void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
|
void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
|
||||||
void runtime·MSpan_EnsureSwept(MSpan *span);
|
void runtime·MSpan_EnsureSwept(MSpan *span);
|
||||||
bool runtime·MSpan_Sweep(MSpan *span);
|
bool runtime·MSpan_Sweep(MSpan *span, bool preserve);
|
||||||
|
|
||||||
// Every MSpan is in one doubly-linked list,
|
// Every MSpan is in one doubly-linked list,
|
||||||
// either one of the MHeap's free lists or one of the
|
// either one of the MHeap's free lists or one of the
|
||||||
@ -447,7 +447,7 @@ struct MCentral
|
|||||||
void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
|
void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
|
||||||
MSpan* runtime·MCentral_CacheSpan(MCentral *c);
|
MSpan* runtime·MCentral_CacheSpan(MCentral *c);
|
||||||
void runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
|
void runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
|
||||||
bool runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
|
bool runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve);
|
||||||
|
|
||||||
// Main malloc heap.
|
// Main malloc heap.
|
||||||
// The heap itself is the "free[]" and "large" arrays,
|
// The heap itself is the "free[]" and "large" arrays,
|
||||||
|
@ -9,16 +9,12 @@
|
|||||||
// The MCentral doesn't actually contain the list of free objects; the MSpan does.
|
// The MCentral doesn't actually contain the list of free objects; the MSpan does.
|
||||||
// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
|
// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
|
||||||
// and those that are completely allocated (c->empty).
|
// and those that are completely allocated (c->empty).
|
||||||
//
|
|
||||||
// TODO(rsc): tcmalloc uses a "transfer cache" to split the list
|
|
||||||
// into sections of class_to_transfercount[sizeclass] objects
|
|
||||||
// so that it is faster to move those lists between MCaches and MCentrals.
|
|
||||||
|
|
||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
#include "arch_GOARCH.h"
|
#include "arch_GOARCH.h"
|
||||||
#include "malloc.h"
|
#include "malloc.h"
|
||||||
|
|
||||||
static bool MCentral_Grow(MCentral *c);
|
static MSpan* MCentral_Grow(MCentral *c);
|
||||||
|
|
||||||
// Initialize a single central free list.
|
// Initialize a single central free list.
|
||||||
void
|
void
|
||||||
@ -42,17 +38,20 @@ runtime·MCentral_CacheSpan(MCentral *c)
|
|||||||
retry:
|
retry:
|
||||||
for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
|
for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
|
||||||
if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
|
if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
|
||||||
|
runtime·MSpanList_Remove(s);
|
||||||
|
runtime·MSpanList_InsertBack(&c->empty, s);
|
||||||
runtime·unlock(&c->lock);
|
runtime·unlock(&c->lock);
|
||||||
runtime·MSpan_Sweep(s);
|
runtime·MSpan_Sweep(s, true);
|
||||||
runtime·lock(&c->lock);
|
goto havespan;
|
||||||
// the span could have been moved to heap, retry
|
|
||||||
goto retry;
|
|
||||||
}
|
}
|
||||||
if(s->sweepgen == sg-1) {
|
if(s->sweepgen == sg-1) {
|
||||||
// the span is being swept by background sweeper, skip
|
// the span is being swept by background sweeper, skip
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// we have a nonempty span that does not require sweeping, allocate from it
|
// we have a nonempty span that does not require sweeping, allocate from it
|
||||||
|
runtime·MSpanList_Remove(s);
|
||||||
|
runtime·MSpanList_InsertBack(&c->empty, s);
|
||||||
|
runtime·unlock(&c->lock);
|
||||||
goto havespan;
|
goto havespan;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -64,9 +63,12 @@ retry:
|
|||||||
// swept spans are at the end of the list
|
// swept spans are at the end of the list
|
||||||
runtime·MSpanList_InsertBack(&c->empty, s);
|
runtime·MSpanList_InsertBack(&c->empty, s);
|
||||||
runtime·unlock(&c->lock);
|
runtime·unlock(&c->lock);
|
||||||
runtime·MSpan_Sweep(s);
|
runtime·MSpan_Sweep(s, true);
|
||||||
|
if(s->freelist != nil)
|
||||||
|
goto havespan;
|
||||||
runtime·lock(&c->lock);
|
runtime·lock(&c->lock);
|
||||||
// the span could be moved to nonempty or heap, retry
|
// the span is still empty after sweep
|
||||||
|
// it is already in the empty list, so just retry
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
if(s->sweepgen == sg-1) {
|
if(s->sweepgen == sg-1) {
|
||||||
@ -77,25 +79,26 @@ retry:
|
|||||||
// all subsequent ones must also be either swept or in process of sweeping
|
// all subsequent ones must also be either swept or in process of sweeping
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
runtime·unlock(&c->lock);
|
||||||
|
|
||||||
// Replenish central list if empty.
|
// Replenish central list if empty.
|
||||||
if(!MCentral_Grow(c)) {
|
s = MCentral_Grow(c);
|
||||||
runtime·unlock(&c->lock);
|
if(s == nil)
|
||||||
return nil;
|
return nil;
|
||||||
}
|
runtime·lock(&c->lock);
|
||||||
goto retry;
|
runtime·MSpanList_InsertBack(&c->empty, s);
|
||||||
|
runtime·unlock(&c->lock);
|
||||||
|
|
||||||
havespan:
|
havespan:
|
||||||
|
// At this point s is a non-empty span, queued at the end of the empty list,
|
||||||
|
// c is unlocked.
|
||||||
cap = (s->npages << PageShift) / s->elemsize;
|
cap = (s->npages << PageShift) / s->elemsize;
|
||||||
n = cap - s->ref;
|
n = cap - s->ref;
|
||||||
if(n == 0)
|
if(n == 0)
|
||||||
runtime·throw("empty span");
|
runtime·throw("empty span");
|
||||||
if(s->freelist == nil)
|
if(s->freelist == nil)
|
||||||
runtime·throw("freelist empty");
|
runtime·throw("freelist empty");
|
||||||
runtime·MSpanList_Remove(s);
|
|
||||||
runtime·MSpanList_InsertBack(&c->empty, s);
|
|
||||||
s->incache = true;
|
s->incache = true;
|
||||||
runtime·unlock(&c->lock);
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,24 +128,39 @@ runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
|
|||||||
// Called during sweep.
|
// Called during sweep.
|
||||||
// Returns true if the span was returned to heap. Sets sweepgen to
|
// Returns true if the span was returned to heap. Sets sweepgen to
|
||||||
// the latest generation.
|
// the latest generation.
|
||||||
|
// If preserve=true, don't return the span to heap nor relink in MCentral lists;
|
||||||
|
// caller takes care of it.
|
||||||
bool
|
bool
|
||||||
runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end)
|
runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
|
||||||
{
|
{
|
||||||
|
bool wasempty;
|
||||||
|
|
||||||
if(s->incache)
|
if(s->incache)
|
||||||
runtime·throw("freespan into cached span");
|
runtime·throw("freespan into cached span");
|
||||||
|
|
||||||
|
// Add the objects back to s's free list.
|
||||||
|
wasempty = s->freelist == nil;
|
||||||
|
end->next = s->freelist;
|
||||||
|
s->freelist = start;
|
||||||
|
s->ref -= n;
|
||||||
|
|
||||||
|
if(preserve) {
|
||||||
|
// preserve is set only when called from MCentral_CacheSpan above,
|
||||||
|
// the span must be in the empty list.
|
||||||
|
if(s->next == nil)
|
||||||
|
runtime·throw("can't preserve unlinked span");
|
||||||
|
runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
runtime·lock(&c->lock);
|
runtime·lock(&c->lock);
|
||||||
|
|
||||||
// Move to nonempty if necessary.
|
// Move to nonempty if necessary.
|
||||||
if(s->freelist == nil) {
|
if(wasempty) {
|
||||||
runtime·MSpanList_Remove(s);
|
runtime·MSpanList_Remove(s);
|
||||||
runtime·MSpanList_Insert(&c->nonempty, s);
|
runtime·MSpanList_Insert(&c->nonempty, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the objects back to s's free list.
|
|
||||||
end->next = s->freelist;
|
|
||||||
s->freelist = start;
|
|
||||||
s->ref -= n;
|
|
||||||
|
|
||||||
// delay updating sweepgen until here. This is the signal that
|
// delay updating sweepgen until here. This is the signal that
|
||||||
// the span may be used in an MCache, so it must come after the
|
// the span may be used in an MCache, so it must come after the
|
||||||
// linked list operations above (actually, just after the
|
// linked list operations above (actually, just after the
|
||||||
@ -164,9 +182,8 @@ runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fetch a new span from the heap and
|
// Fetch a new span from the heap and carve into objects for the free list.
|
||||||
// carve into objects for the free list.
|
static MSpan*
|
||||||
static bool
|
|
||||||
MCentral_Grow(MCentral *c)
|
MCentral_Grow(MCentral *c)
|
||||||
{
|
{
|
||||||
uintptr size, npages, i, n;
|
uintptr size, npages, i, n;
|
||||||
@ -174,16 +191,12 @@ MCentral_Grow(MCentral *c)
|
|||||||
byte *p;
|
byte *p;
|
||||||
MSpan *s;
|
MSpan *s;
|
||||||
|
|
||||||
runtime·unlock(&c->lock);
|
|
||||||
npages = runtime·class_to_allocnpages[c->sizeclass];
|
npages = runtime·class_to_allocnpages[c->sizeclass];
|
||||||
size = runtime·class_to_size[c->sizeclass];
|
size = runtime·class_to_size[c->sizeclass];
|
||||||
n = (npages << PageShift) / size;
|
n = (npages << PageShift) / size;
|
||||||
s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
|
s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
|
||||||
if(s == nil) {
|
if(s == nil)
|
||||||
// TODO(rsc): Log out of memory
|
return nil;
|
||||||
runtime·lock(&c->lock);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Carve span into sequence of blocks.
|
// Carve span into sequence of blocks.
|
||||||
tailp = &s->freelist;
|
tailp = &s->freelist;
|
||||||
@ -197,8 +210,5 @@ MCentral_Grow(MCentral *c)
|
|||||||
}
|
}
|
||||||
*tailp = nil;
|
*tailp = nil;
|
||||||
runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
|
runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
|
||||||
|
return s;
|
||||||
runtime·lock(&c->lock);
|
|
||||||
runtime·MSpanList_Insert(&c->nonempty, s);
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
@ -884,7 +884,7 @@ runtime·MSpan_EnsureSwept(MSpan *s)
|
|||||||
if(runtime·atomicload(&s->sweepgen) == sg)
|
if(runtime·atomicload(&s->sweepgen) == sg)
|
||||||
return;
|
return;
|
||||||
if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
|
if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
|
||||||
runtime·MSpan_Sweep(s);
|
runtime·MSpan_Sweep(s, false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// unfortunate condition, and we don't have efficient means to wait
|
// unfortunate condition, and we don't have efficient means to wait
|
||||||
@ -895,8 +895,10 @@ runtime·MSpan_EnsureSwept(MSpan *s)
|
|||||||
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
|
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
|
||||||
// It clears the mark bits in preparation for the next GC round.
|
// It clears the mark bits in preparation for the next GC round.
|
||||||
// Returns true if the span was returned to heap.
|
// Returns true if the span was returned to heap.
|
||||||
|
// If preserve=true, don't return it to heap nor relink in MCentral lists;
|
||||||
|
// caller takes care of it.
|
||||||
bool
|
bool
|
||||||
runtime·MSpan_Sweep(MSpan *s)
|
runtime·MSpan_Sweep(MSpan *s, bool preserve)
|
||||||
{
|
{
|
||||||
int32 cl, n, npages, nfree;
|
int32 cl, n, npages, nfree;
|
||||||
uintptr size, off, *bitp, shift, xbits, bits;
|
uintptr size, off, *bitp, shift, xbits, bits;
|
||||||
@ -995,6 +997,8 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||||||
*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
|
*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
|
||||||
if(cl == 0) {
|
if(cl == 0) {
|
||||||
// Free large span.
|
// Free large span.
|
||||||
|
if(preserve)
|
||||||
|
runtime·throw("can't preserve large span");
|
||||||
runtime·unmarkspan(p, s->npages<<PageShift);
|
runtime·unmarkspan(p, s->npages<<PageShift);
|
||||||
s->needzero = 1;
|
s->needzero = 1;
|
||||||
// important to set sweepgen before returning it to heap
|
// important to set sweepgen before returning it to heap
|
||||||
@ -1056,7 +1060,7 @@ runtime·MSpan_Sweep(MSpan *s)
|
|||||||
c->local_nsmallfree[cl] += nfree;
|
c->local_nsmallfree[cl] += nfree;
|
||||||
c->local_cachealloc -= nfree * size;
|
c->local_cachealloc -= nfree * size;
|
||||||
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
|
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
|
||||||
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end);
|
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
|
||||||
// MCentral_FreeSpan updates sweepgen
|
// MCentral_FreeSpan updates sweepgen
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
@ -1129,7 +1133,7 @@ runtime·sweepone(void)
|
|||||||
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
|
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
|
||||||
continue;
|
continue;
|
||||||
npages = s->npages;
|
npages = s->npages;
|
||||||
if(!runtime·MSpan_Sweep(s))
|
if(!runtime·MSpan_Sweep(s, false))
|
||||||
npages = 0;
|
npages = 0;
|
||||||
g->m->locks--;
|
g->m->locks--;
|
||||||
return npages;
|
return npages;
|
||||||
|
@ -107,7 +107,7 @@ retry:
|
|||||||
// swept spans are at the end of the list
|
// swept spans are at the end of the list
|
||||||
runtime·MSpanList_InsertBack(list, s);
|
runtime·MSpanList_InsertBack(list, s);
|
||||||
runtime·unlock(&h->lock);
|
runtime·unlock(&h->lock);
|
||||||
n += runtime·MSpan_Sweep(s);
|
n += runtime·MSpan_Sweep(s, false);
|
||||||
runtime·lock(&h->lock);
|
runtime·lock(&h->lock);
|
||||||
if(n >= npages)
|
if(n >= npages)
|
||||||
return n;
|
return n;
|
||||||
|
Loading…
Reference in New Issue
Block a user