1
0
mirror of https://github.com/golang/go synced 2024-11-26 03:57:57 -07:00

runtime: speedup malloc stats collection

Count only number of frees, everything else is derivable
and does not need to be counted on every malloc.
benchmark                    old ns/op    new ns/op    delta
BenchmarkMalloc8                    68           66   -3.07%
BenchmarkMalloc16                   75           70   -6.48%
BenchmarkMallocTypeInfo8           102           97   -4.80%
BenchmarkMallocTypeInfo16          108          105   -2.78%

R=golang-dev, dave, rsc
CC=golang-dev
https://golang.org/cl/9776043
This commit is contained in:
Dmitriy Vyukov 2013-06-06 14:56:50 +04:00
parent 07ea243d50
commit 5d637b83a9
4 changed files with 108 additions and 67 deletions

View File

@ -48,7 +48,6 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
size += sizeof(uintptr);
c = m->mcache;
c->local_nmalloc++;
if(size <= MaxSmallSize) {
// Allocate from mcache free lists.
// Inlined version of SizeToClass().
@ -70,10 +69,6 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
runtime·memclr((byte*)v, size);
}
c->local_cachealloc += size;
c->local_objects++;
c->local_alloc += size;
c->local_total_alloc += size;
c->local_by_size[sizeclass].nmalloc++;
} else {
// TODO(rsc): Report tracebacks for very large allocations.
@ -86,21 +81,12 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
runtime·throw("out of memory");
s->limit = (byte*)(s->start<<PageShift) + size;
size = npages<<PageShift;
c->local_alloc += size;
c->local_total_alloc += size;
v = (void*)(s->start << PageShift);
// setup for mark sweep
runtime·markspan(v, 0, 0, true);
}
if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
// purge cache stats to prevent overflow
runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c);
runtime·unlock(&runtime·mheap);
}
if(!(flag & FlagNoGC))
runtime·markallocated(v, size, (flag&FlagNoPointers) != 0);
@ -183,6 +169,8 @@ runtime·free(void *v)
runtime·markfreed(v, size);
runtime·unmarkspan(v, 1<<PageShift);
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_nlargefree++;
c->local_largefree += size;
} else {
// Small object.
size = runtime·class_to_size[sizeclass];
@ -192,11 +180,9 @@ runtime·free(void *v)
// it might coalesce v and other blocks into a bigger span
// and change the bitmap further.
runtime·markfreed(v, size);
c->local_by_size[sizeclass].nfree++;
c->local_nsmallfree[sizeclass]++;
runtime·MCache_Free(c, v, sizeclass, size);
}
c->local_nfree++;
c->local_alloc -= size;
if(prof)
runtime·MProf_Free(v, size);
m->mallocing = 0;
@ -286,28 +272,22 @@ runtime·freemcache(MCache *c)
void
runtime·purgecachedstats(MCache *c)
{
MHeap *h;
int32 i;
// Protected by either heap or GC lock.
h = &runtime·mheap;
mstats.heap_alloc += c->local_cachealloc;
c->local_cachealloc = 0;
mstats.heap_objects += c->local_objects;
c->local_objects = 0;
mstats.nmalloc += c->local_nmalloc;
c->local_nmalloc = 0;
mstats.nfree += c->local_nfree;
c->local_nfree = 0;
mstats.nlookup += c->local_nlookup;
c->local_nlookup = 0;
mstats.alloc += c->local_alloc;
c->local_alloc= 0;
mstats.total_alloc += c->local_total_alloc;
c->local_total_alloc= 0;
for(i=0; i<nelem(c->local_by_size); i++) {
mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
c->local_by_size[i].nmalloc = 0;
mstats.by_size[i].nfree += c->local_by_size[i].nfree;
c->local_by_size[i].nfree = 0;
h->largefree += c->local_largefree;
c->local_largefree = 0;
h->nlargefree += c->local_nlargefree;
c->local_nlargefree = 0;
for(i=0; i<nelem(c->local_nsmallfree); i++) {
h->nsmallfree[i] += c->local_nsmallfree[i];
c->local_nsmallfree[i] = 0;
}
}

View File

@ -286,22 +286,15 @@ struct MCache
{
// The following members are accessed on every malloc,
// so they are grouped here for better caching.
int32 next_sample; // trigger heap sample after allocating this many bytes
int32 next_sample; // trigger heap sample after allocating this many bytes
intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
// The rest is not accessed on every malloc.
MCacheList list[NumSizeClasses];
intptr local_objects; // objects allocated (or freed) from cache since last lock of heap
intptr local_alloc; // bytes allocated (or freed) since last lock of heap
uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap
uintptr local_nmalloc; // number of mallocs since last lock of heap
uintptr local_nfree; // number of frees since last lock of heap
uintptr local_nlookup; // number of pointer lookups since last lock of heap
// Statistics about allocation size classes since last lock of heap
struct {
uintptr nmalloc;
uintptr nfree;
} local_by_size[NumSizeClasses];
// Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize)
uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
};
void runtime·MCache_Refill(MCache *c, int32 sizeclass);
@ -431,6 +424,11 @@ struct MHeap
FixAlloc spanalloc; // allocator for Span*
FixAlloc cachealloc; // allocator for MCache*
// Malloc stats.
uint64 largefree; // bytes freed for large objects (>MaxSmallSize)
uint64 nlargefree; // number of frees for large objects (>MaxSmallSize)
uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
};
extern MHeap runtime·mheap;

View File

@ -57,7 +57,6 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
l->list = p;
l->nlist++;
c->local_cachealloc -= size;
c->local_objects--;
// We transfer span at a time from MCentral to MCache,
// if we have 2 times more than that, release a half back.

View File

@ -1711,8 +1711,8 @@ sweepspan(ParFor *desc, uint32 idx)
runtime·unmarkspan(p, 1<<PageShift);
*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_alloc -= size;
c->local_nfree++;
c->local_nlargefree++;
c->local_largefree += size;
} else {
// Free small object.
switch(compression) {
@ -1733,11 +1733,8 @@ sweepspan(ParFor *desc, uint32 idx)
}
if(nfree) {
c->local_by_size[cl].nfree += nfree;
c->local_alloc -= size * nfree;
c->local_nfree += nfree;
c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size;
c->local_objects -= nfree;
runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
}
}
@ -1855,13 +1852,28 @@ runtime·gchelper(void)
static int32 gcpercent = GcpercentUnknown;
static void
cachestats(GCStats *stats)
cachestats(void)
{
MCache *c;
P *p, **pp;
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
runtime·purgecachedstats(c);
}
}
static void
updatememstats(GCStats *stats)
{
M *mp;
MSpan *s;
MCache *c;
P *p, **pp;
int32 i;
uint64 stacks_inuse;
uint64 stacks_inuse, smallfree;
uint64 *src, *dst;
if(stats)
@ -1877,13 +1889,65 @@ cachestats(GCStats *stats)
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
}
}
mstats.stacks_inuse = stacks_inuse;
// Calculate memory allocator stats.
// During program execution we only count number of frees and amount of freed memory.
// Current number of alive object in the heap and amount of alive heap memory
// are calculated by scanning all spans.
// Total number of mallocs is calculated as number of frees plus number of alive objects.
// Similarly, total amount of allocated memory is calculated as amount of freed memory
// plus amount of alive heap memory.
mstats.alloc = 0;
mstats.total_alloc = 0;
mstats.nmalloc = 0;
mstats.nfree = 0;
for(i = 0; i < nelem(mstats.by_size); i++) {
mstats.by_size[i].nmalloc = 0;
mstats.by_size[i].nfree = 0;
}
// Flush MCache's to MCentral.
for(pp=runtime·allp; p=*pp; pp++) {
c = p->mcache;
if(c==nil)
continue;
runtime·purgecachedstats(c);
runtime·MCache_ReleaseAll(c);
}
mstats.stacks_inuse = stacks_inuse;
// Aggregate local stats.
cachestats();
// Scan all spans and count number of alive objects.
for(i = 0; i < runtime·mheap.nspan; i++) {
s = runtime·mheap.allspans[i];
if(s->state != MSpanInUse)
continue;
if(s->sizeclass == 0) {
mstats.nmalloc++;
mstats.alloc += s->elemsize;
} else {
mstats.nmalloc += s->ref;
mstats.by_size[s->sizeclass].nmalloc += s->ref;
mstats.alloc += s->ref*s->elemsize;
}
}
// Aggregate by size class.
smallfree = 0;
mstats.nfree = runtime·mheap.nlargefree;
for(i = 0; i < nelem(mstats.by_size); i++) {
mstats.nfree += runtime·mheap.nsmallfree[i];
mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
}
mstats.nmalloc += mstats.nfree;
// Calculate derived stats.
mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
mstats.heap_alloc = mstats.alloc;
mstats.heap_objects = mstats.nmalloc - mstats.nfree;
}
// Structure of arguments passed to function gc().
@ -2029,7 +2093,7 @@ gc(struct gc_args *args)
heap0 = 0;
obj0 = 0;
if(gctrace) {
cachestats(nil);
updatememstats(nil);
heap0 = mstats.heap_alloc;
obj0 = mstats.nmalloc - mstats.nfree;
}
@ -2079,18 +2143,10 @@ gc(struct gc_args *args)
if(work.nproc > 1)
runtime·notesleep(&work.alldone);
cachestats(&stats);
stats.nprocyield += work.sweepfor->nprocyield;
stats.nosyield += work.sweepfor->nosyield;
stats.nsleep += work.sweepfor->nsleep;
cachestats();
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
m->gcing = 0;
heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree;
t4 = runtime·nanotime();
mstats.last_gc = t4;
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
@ -2100,6 +2156,14 @@ gc(struct gc_args *args)
runtime·printf("pause %D\n", t4-t0);
if(gctrace) {
updatememstats(&stats);
heap1 = mstats.heap_alloc;
obj1 = mstats.nmalloc - mstats.nfree;
stats.nprocyield += work.sweepfor->nprocyield;
stats.nosyield += work.sweepfor->nosyield;
stats.nsleep += work.sweepfor->nsleep;
runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
@ -2144,7 +2208,7 @@ runtime·ReadMemStats(MStats *stats)
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
cachestats(nil);
updatememstats(nil);
*stats = mstats;
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);