mirror of
https://github.com/golang/go
synced 2024-11-26 03:07:57 -07:00
runtime: speedup malloc stats collection
Count only number of frees, everything else is derivable and does not need to be counted on every malloc. benchmark old ns/op new ns/op delta BenchmarkMalloc8 68 66 -3.07% BenchmarkMalloc16 75 70 -6.48% BenchmarkMallocTypeInfo8 102 97 -4.80% BenchmarkMallocTypeInfo16 108 105 -2.78% R=golang-dev, dave, rsc CC=golang-dev https://golang.org/cl/9776043
This commit is contained in:
parent
07ea243d50
commit
5d637b83a9
@ -48,7 +48,6 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
|
||||
size += sizeof(uintptr);
|
||||
|
||||
c = m->mcache;
|
||||
c->local_nmalloc++;
|
||||
if(size <= MaxSmallSize) {
|
||||
// Allocate from mcache free lists.
|
||||
// Inlined version of SizeToClass().
|
||||
@ -70,10 +69,6 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
|
||||
runtime·memclr((byte*)v, size);
|
||||
}
|
||||
c->local_cachealloc += size;
|
||||
c->local_objects++;
|
||||
c->local_alloc += size;
|
||||
c->local_total_alloc += size;
|
||||
c->local_by_size[sizeclass].nmalloc++;
|
||||
} else {
|
||||
// TODO(rsc): Report tracebacks for very large allocations.
|
||||
|
||||
@ -86,21 +81,12 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
|
||||
runtime·throw("out of memory");
|
||||
s->limit = (byte*)(s->start<<PageShift) + size;
|
||||
size = npages<<PageShift;
|
||||
c->local_alloc += size;
|
||||
c->local_total_alloc += size;
|
||||
v = (void*)(s->start << PageShift);
|
||||
|
||||
// setup for mark sweep
|
||||
runtime·markspan(v, 0, 0, true);
|
||||
}
|
||||
|
||||
if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
|
||||
// purge cache stats to prevent overflow
|
||||
runtime·lock(&runtime·mheap);
|
||||
runtime·purgecachedstats(c);
|
||||
runtime·unlock(&runtime·mheap);
|
||||
}
|
||||
|
||||
if(!(flag & FlagNoGC))
|
||||
runtime·markallocated(v, size, (flag&FlagNoPointers) != 0);
|
||||
|
||||
@ -183,6 +169,8 @@ runtime·free(void *v)
|
||||
runtime·markfreed(v, size);
|
||||
runtime·unmarkspan(v, 1<<PageShift);
|
||||
runtime·MHeap_Free(&runtime·mheap, s, 1);
|
||||
c->local_nlargefree++;
|
||||
c->local_largefree += size;
|
||||
} else {
|
||||
// Small object.
|
||||
size = runtime·class_to_size[sizeclass];
|
||||
@ -192,11 +180,9 @@ runtime·free(void *v)
|
||||
// it might coalesce v and other blocks into a bigger span
|
||||
// and change the bitmap further.
|
||||
runtime·markfreed(v, size);
|
||||
c->local_by_size[sizeclass].nfree++;
|
||||
c->local_nsmallfree[sizeclass]++;
|
||||
runtime·MCache_Free(c, v, sizeclass, size);
|
||||
}
|
||||
c->local_nfree++;
|
||||
c->local_alloc -= size;
|
||||
if(prof)
|
||||
runtime·MProf_Free(v, size);
|
||||
m->mallocing = 0;
|
||||
@ -286,28 +272,22 @@ runtime·freemcache(MCache *c)
|
||||
void
|
||||
runtime·purgecachedstats(MCache *c)
|
||||
{
|
||||
MHeap *h;
|
||||
int32 i;
|
||||
|
||||
// Protected by either heap or GC lock.
|
||||
h = &runtime·mheap;
|
||||
mstats.heap_alloc += c->local_cachealloc;
|
||||
c->local_cachealloc = 0;
|
||||
mstats.heap_objects += c->local_objects;
|
||||
c->local_objects = 0;
|
||||
mstats.nmalloc += c->local_nmalloc;
|
||||
c->local_nmalloc = 0;
|
||||
mstats.nfree += c->local_nfree;
|
||||
c->local_nfree = 0;
|
||||
mstats.nlookup += c->local_nlookup;
|
||||
c->local_nlookup = 0;
|
||||
mstats.alloc += c->local_alloc;
|
||||
c->local_alloc= 0;
|
||||
mstats.total_alloc += c->local_total_alloc;
|
||||
c->local_total_alloc= 0;
|
||||
for(i=0; i<nelem(c->local_by_size); i++) {
|
||||
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
|
||||
c->local_by_size[i].nmalloc = 0;
|
||||
mstats.by_size[i].nfree += c->local_by_size[i].nfree;
|
||||
c->local_by_size[i].nfree = 0;
|
||||
h->largefree += c->local_largefree;
|
||||
c->local_largefree = 0;
|
||||
h->nlargefree += c->local_nlargefree;
|
||||
c->local_nlargefree = 0;
|
||||
for(i=0; i<nelem(c->local_nsmallfree); i++) {
|
||||
h->nsmallfree[i] += c->local_nsmallfree[i];
|
||||
c->local_nsmallfree[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -286,22 +286,15 @@ struct MCache
|
||||
{
|
||||
// The following members are accessed on every malloc,
|
||||
// so they are grouped here for better caching.
|
||||
int32 next_sample; // trigger heap sample after allocating this many bytes
|
||||
int32 next_sample; // trigger heap sample after allocating this many bytes
|
||||
intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
|
||||
// The rest is not accessed on every malloc.
|
||||
MCacheList list[NumSizeClasses];
|
||||
intptr local_objects; // objects allocated (or freed) from cache since last lock of heap
|
||||
intptr local_alloc; // bytes allocated (or freed) since last lock of heap
|
||||
uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap
|
||||
uintptr local_nmalloc; // number of mallocs since last lock of heap
|
||||
uintptr local_nfree; // number of frees since last lock of heap
|
||||
uintptr local_nlookup; // number of pointer lookups since last lock of heap
|
||||
// Statistics about allocation size classes since last lock of heap
|
||||
struct {
|
||||
uintptr nmalloc;
|
||||
uintptr nfree;
|
||||
} local_by_size[NumSizeClasses];
|
||||
|
||||
// Local allocator stats, flushed during GC.
|
||||
uintptr local_nlookup; // number of pointer lookups
|
||||
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
|
||||
uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize)
|
||||
uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
|
||||
};
|
||||
|
||||
void runtime·MCache_Refill(MCache *c, int32 sizeclass);
|
||||
@ -431,6 +424,11 @@ struct MHeap
|
||||
|
||||
FixAlloc spanalloc; // allocator for Span*
|
||||
FixAlloc cachealloc; // allocator for MCache*
|
||||
|
||||
// Malloc stats.
|
||||
uint64 largefree; // bytes freed for large objects (>MaxSmallSize)
|
||||
uint64 nlargefree; // number of frees for large objects (>MaxSmallSize)
|
||||
uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
|
||||
};
|
||||
extern MHeap runtime·mheap;
|
||||
|
||||
|
@ -57,7 +57,6 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
|
||||
l->list = p;
|
||||
l->nlist++;
|
||||
c->local_cachealloc -= size;
|
||||
c->local_objects--;
|
||||
|
||||
// We transfer span at a time from MCentral to MCache,
|
||||
// if we have 2 times more than that, release a half back.
|
||||
|
@ -1711,8 +1711,8 @@ sweepspan(ParFor *desc, uint32 idx)
|
||||
runtime·unmarkspan(p, 1<<PageShift);
|
||||
*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
|
||||
runtime·MHeap_Free(&runtime·mheap, s, 1);
|
||||
c->local_alloc -= size;
|
||||
c->local_nfree++;
|
||||
c->local_nlargefree++;
|
||||
c->local_largefree += size;
|
||||
} else {
|
||||
// Free small object.
|
||||
switch(compression) {
|
||||
@ -1733,11 +1733,8 @@ sweepspan(ParFor *desc, uint32 idx)
|
||||
}
|
||||
|
||||
if(nfree) {
|
||||
c->local_by_size[cl].nfree += nfree;
|
||||
c->local_alloc -= size * nfree;
|
||||
c->local_nfree += nfree;
|
||||
c->local_nsmallfree[cl] += nfree;
|
||||
c->local_cachealloc -= nfree * size;
|
||||
c->local_objects -= nfree;
|
||||
runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
|
||||
}
|
||||
}
|
||||
@ -1855,13 +1852,28 @@ runtime·gchelper(void)
|
||||
static int32 gcpercent = GcpercentUnknown;
|
||||
|
||||
static void
|
||||
cachestats(GCStats *stats)
|
||||
cachestats(void)
|
||||
{
|
||||
MCache *c;
|
||||
P *p, **pp;
|
||||
|
||||
for(pp=runtime·allp; p=*pp; pp++) {
|
||||
c = p->mcache;
|
||||
if(c==nil)
|
||||
continue;
|
||||
runtime·purgecachedstats(c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
updatememstats(GCStats *stats)
|
||||
{
|
||||
M *mp;
|
||||
MSpan *s;
|
||||
MCache *c;
|
||||
P *p, **pp;
|
||||
int32 i;
|
||||
uint64 stacks_inuse;
|
||||
uint64 stacks_inuse, smallfree;
|
||||
uint64 *src, *dst;
|
||||
|
||||
if(stats)
|
||||
@ -1877,13 +1889,65 @@ cachestats(GCStats *stats)
|
||||
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
|
||||
}
|
||||
}
|
||||
mstats.stacks_inuse = stacks_inuse;
|
||||
|
||||
// Calculate memory allocator stats.
|
||||
// During program execution we only count number of frees and amount of freed memory.
|
||||
// Current number of alive object in the heap and amount of alive heap memory
|
||||
// are calculated by scanning all spans.
|
||||
// Total number of mallocs is calculated as number of frees plus number of alive objects.
|
||||
// Similarly, total amount of allocated memory is calculated as amount of freed memory
|
||||
// plus amount of alive heap memory.
|
||||
mstats.alloc = 0;
|
||||
mstats.total_alloc = 0;
|
||||
mstats.nmalloc = 0;
|
||||
mstats.nfree = 0;
|
||||
for(i = 0; i < nelem(mstats.by_size); i++) {
|
||||
mstats.by_size[i].nmalloc = 0;
|
||||
mstats.by_size[i].nfree = 0;
|
||||
}
|
||||
|
||||
// Flush MCache's to MCentral.
|
||||
for(pp=runtime·allp; p=*pp; pp++) {
|
||||
c = p->mcache;
|
||||
if(c==nil)
|
||||
continue;
|
||||
runtime·purgecachedstats(c);
|
||||
runtime·MCache_ReleaseAll(c);
|
||||
}
|
||||
mstats.stacks_inuse = stacks_inuse;
|
||||
|
||||
// Aggregate local stats.
|
||||
cachestats();
|
||||
|
||||
// Scan all spans and count number of alive objects.
|
||||
for(i = 0; i < runtime·mheap.nspan; i++) {
|
||||
s = runtime·mheap.allspans[i];
|
||||
if(s->state != MSpanInUse)
|
||||
continue;
|
||||
if(s->sizeclass == 0) {
|
||||
mstats.nmalloc++;
|
||||
mstats.alloc += s->elemsize;
|
||||
} else {
|
||||
mstats.nmalloc += s->ref;
|
||||
mstats.by_size[s->sizeclass].nmalloc += s->ref;
|
||||
mstats.alloc += s->ref*s->elemsize;
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate by size class.
|
||||
smallfree = 0;
|
||||
mstats.nfree = runtime·mheap.nlargefree;
|
||||
for(i = 0; i < nelem(mstats.by_size); i++) {
|
||||
mstats.nfree += runtime·mheap.nsmallfree[i];
|
||||
mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
|
||||
mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
|
||||
smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
|
||||
}
|
||||
mstats.nmalloc += mstats.nfree;
|
||||
|
||||
// Calculate derived stats.
|
||||
mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
|
||||
mstats.heap_alloc = mstats.alloc;
|
||||
mstats.heap_objects = mstats.nmalloc - mstats.nfree;
|
||||
}
|
||||
|
||||
// Structure of arguments passed to function gc().
|
||||
@ -2029,7 +2093,7 @@ gc(struct gc_args *args)
|
||||
heap0 = 0;
|
||||
obj0 = 0;
|
||||
if(gctrace) {
|
||||
cachestats(nil);
|
||||
updatememstats(nil);
|
||||
heap0 = mstats.heap_alloc;
|
||||
obj0 = mstats.nmalloc - mstats.nfree;
|
||||
}
|
||||
@ -2079,18 +2143,10 @@ gc(struct gc_args *args)
|
||||
if(work.nproc > 1)
|
||||
runtime·notesleep(&work.alldone);
|
||||
|
||||
cachestats(&stats);
|
||||
|
||||
stats.nprocyield += work.sweepfor->nprocyield;
|
||||
stats.nosyield += work.sweepfor->nosyield;
|
||||
stats.nsleep += work.sweepfor->nsleep;
|
||||
|
||||
cachestats();
|
||||
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
|
||||
m->gcing = 0;
|
||||
|
||||
heap1 = mstats.heap_alloc;
|
||||
obj1 = mstats.nmalloc - mstats.nfree;
|
||||
|
||||
t4 = runtime·nanotime();
|
||||
mstats.last_gc = t4;
|
||||
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
|
||||
@ -2100,6 +2156,14 @@ gc(struct gc_args *args)
|
||||
runtime·printf("pause %D\n", t4-t0);
|
||||
|
||||
if(gctrace) {
|
||||
updatememstats(&stats);
|
||||
heap1 = mstats.heap_alloc;
|
||||
obj1 = mstats.nmalloc - mstats.nfree;
|
||||
|
||||
stats.nprocyield += work.sweepfor->nprocyield;
|
||||
stats.nosyield += work.sweepfor->nosyield;
|
||||
stats.nsleep += work.sweepfor->nsleep;
|
||||
|
||||
runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
|
||||
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
|
||||
mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
|
||||
@ -2144,7 +2208,7 @@ runtime·ReadMemStats(MStats *stats)
|
||||
runtime·semacquire(&runtime·worldsema);
|
||||
m->gcing = 1;
|
||||
runtime·stoptheworld();
|
||||
cachestats(nil);
|
||||
updatememstats(nil);
|
||||
*stats = mstats;
|
||||
m->gcing = 0;
|
||||
runtime·semrelease(&runtime·worldsema);
|
||||
|
Loading…
Reference in New Issue
Block a user