diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc index 1f035cb2393..7b5b5980e36 100644 --- a/src/pkg/runtime/malloc.goc +++ b/src/pkg/runtime/malloc.goc @@ -48,7 +48,6 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) size += sizeof(uintptr); c = m->mcache; - c->local_nmalloc++; if(size <= MaxSmallSize) { // Allocate from mcache free lists. // Inlined version of SizeToClass(). @@ -70,10 +69,6 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) runtime·memclr((byte*)v, size); } c->local_cachealloc += size; - c->local_objects++; - c->local_alloc += size; - c->local_total_alloc += size; - c->local_by_size[sizeclass].nmalloc++; } else { // TODO(rsc): Report tracebacks for very large allocations. @@ -86,21 +81,12 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) runtime·throw("out of memory"); s->limit = (byte*)(s->start<local_alloc += size; - c->local_total_alloc += size; v = (void*)(s->start << PageShift); // setup for mark sweep runtime·markspan(v, 0, 0, true); } - if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) { - // purge cache stats to prevent overflow - runtime·lock(&runtime·mheap); - runtime·purgecachedstats(c); - runtime·unlock(&runtime·mheap); - } - if(!(flag & FlagNoGC)) runtime·markallocated(v, size, (flag&FlagNoPointers) != 0); @@ -183,6 +169,8 @@ runtime·free(void *v) runtime·markfreed(v, size); runtime·unmarkspan(v, 1<local_nlargefree++; + c->local_largefree += size; } else { // Small object. size = runtime·class_to_size[sizeclass]; @@ -192,11 +180,9 @@ runtime·free(void *v) // it might coalesce v and other blocks into a bigger span // and change the bitmap further. runtime·markfreed(v, size); - c->local_by_size[sizeclass].nfree++; + c->local_nsmallfree[sizeclass]++; runtime·MCache_Free(c, v, sizeclass, size); } - c->local_nfree++; - c->local_alloc -= size; if(prof) runtime·MProf_Free(v, size); m->mallocing = 0; @@ -286,28 +272,22 @@ runtime·freemcache(MCache *c) void runtime·purgecachedstats(MCache *c) { + MHeap *h; int32 i; // Protected by either heap or GC lock. + h = &runtime·mheap; mstats.heap_alloc += c->local_cachealloc; c->local_cachealloc = 0; - mstats.heap_objects += c->local_objects; - c->local_objects = 0; - mstats.nmalloc += c->local_nmalloc; - c->local_nmalloc = 0; - mstats.nfree += c->local_nfree; - c->local_nfree = 0; mstats.nlookup += c->local_nlookup; c->local_nlookup = 0; - mstats.alloc += c->local_alloc; - c->local_alloc= 0; - mstats.total_alloc += c->local_total_alloc; - c->local_total_alloc= 0; - for(i=0; ilocal_by_size); i++) { - mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc; - c->local_by_size[i].nmalloc = 0; - mstats.by_size[i].nfree += c->local_by_size[i].nfree; - c->local_by_size[i].nfree = 0; + h->largefree += c->local_largefree; + c->local_largefree = 0; + h->nlargefree += c->local_nlargefree; + c->local_nlargefree = 0; + for(i=0; ilocal_nsmallfree); i++) { + h->nsmallfree[i] += c->local_nsmallfree[i]; + c->local_nsmallfree[i] = 0; } } diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index ba8036ab688..41604501f0b 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -286,22 +286,15 @@ struct MCache { // The following members are accessed on every malloc, // so they are grouped here for better caching. - int32 next_sample; // trigger heap sample after allocating this many bytes + int32 next_sample; // trigger heap sample after allocating this many bytes intptr local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap // The rest is not accessed on every malloc. MCacheList list[NumSizeClasses]; - intptr local_objects; // objects allocated (or freed) from cache since last lock of heap - intptr local_alloc; // bytes allocated (or freed) since last lock of heap - uintptr local_total_alloc; // bytes allocated (even if freed) since last lock of heap - uintptr local_nmalloc; // number of mallocs since last lock of heap - uintptr local_nfree; // number of frees since last lock of heap - uintptr local_nlookup; // number of pointer lookups since last lock of heap - // Statistics about allocation size classes since last lock of heap - struct { - uintptr nmalloc; - uintptr nfree; - } local_by_size[NumSizeClasses]; - + // Local allocator stats, flushed during GC. + uintptr local_nlookup; // number of pointer lookups + uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize) + uintptr local_nlargefree; // number of frees for large objects (>MaxSmallSize) + uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; void runtime·MCache_Refill(MCache *c, int32 sizeclass); @@ -431,6 +424,11 @@ struct MHeap FixAlloc spanalloc; // allocator for Span* FixAlloc cachealloc; // allocator for MCache* + + // Malloc stats. + uint64 largefree; // bytes freed for large objects (>MaxSmallSize) + uint64 nlargefree; // number of frees for large objects (>MaxSmallSize) + uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; extern MHeap runtime·mheap; diff --git a/src/pkg/runtime/mcache.c b/src/pkg/runtime/mcache.c index 1e11927df75..863030e7431 100644 --- a/src/pkg/runtime/mcache.c +++ b/src/pkg/runtime/mcache.c @@ -57,7 +57,6 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) l->list = p; l->nlist++; c->local_cachealloc -= size; - c->local_objects--; // We transfer span at a time from MCentral to MCache, // if we have 2 times more than that, release a half back. diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index 2d4aeb226c7..4a386d16b38 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -1711,8 +1711,8 @@ sweepspan(ParFor *desc, uint32 idx) runtime·unmarkspan(p, 1<local_alloc -= size; - c->local_nfree++; + c->local_nlargefree++; + c->local_largefree += size; } else { // Free small object. switch(compression) { @@ -1733,11 +1733,8 @@ sweepspan(ParFor *desc, uint32 idx) } if(nfree) { - c->local_by_size[cl].nfree += nfree; - c->local_alloc -= size * nfree; - c->local_nfree += nfree; + c->local_nsmallfree[cl] += nfree; c->local_cachealloc -= nfree * size; - c->local_objects -= nfree; runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end); } } @@ -1855,13 +1852,28 @@ runtime·gchelper(void) static int32 gcpercent = GcpercentUnknown; static void -cachestats(GCStats *stats) +cachestats(void) +{ + MCache *c; + P *p, **pp; + + for(pp=runtime·allp; p=*pp; pp++) { + c = p->mcache; + if(c==nil) + continue; + runtime·purgecachedstats(c); + } +} + +static void +updatememstats(GCStats *stats) { M *mp; + MSpan *s; MCache *c; P *p, **pp; int32 i; - uint64 stacks_inuse; + uint64 stacks_inuse, smallfree; uint64 *src, *dst; if(stats) @@ -1877,13 +1889,65 @@ cachestats(GCStats *stats) runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); } } + mstats.stacks_inuse = stacks_inuse; + + // Calculate memory allocator stats. + // During program execution we only count number of frees and amount of freed memory. + // Current number of alive object in the heap and amount of alive heap memory + // are calculated by scanning all spans. + // Total number of mallocs is calculated as number of frees plus number of alive objects. + // Similarly, total amount of allocated memory is calculated as amount of freed memory + // plus amount of alive heap memory. + mstats.alloc = 0; + mstats.total_alloc = 0; + mstats.nmalloc = 0; + mstats.nfree = 0; + for(i = 0; i < nelem(mstats.by_size); i++) { + mstats.by_size[i].nmalloc = 0; + mstats.by_size[i].nfree = 0; + } + + // Flush MCache's to MCentral. for(pp=runtime·allp; p=*pp; pp++) { c = p->mcache; if(c==nil) continue; - runtime·purgecachedstats(c); + runtime·MCache_ReleaseAll(c); } - mstats.stacks_inuse = stacks_inuse; + + // Aggregate local stats. + cachestats(); + + // Scan all spans and count number of alive objects. + for(i = 0; i < runtime·mheap.nspan; i++) { + s = runtime·mheap.allspans[i]; + if(s->state != MSpanInUse) + continue; + if(s->sizeclass == 0) { + mstats.nmalloc++; + mstats.alloc += s->elemsize; + } else { + mstats.nmalloc += s->ref; + mstats.by_size[s->sizeclass].nmalloc += s->ref; + mstats.alloc += s->ref*s->elemsize; + } + } + + // Aggregate by size class. + smallfree = 0; + mstats.nfree = runtime·mheap.nlargefree; + for(i = 0; i < nelem(mstats.by_size); i++) { + mstats.nfree += runtime·mheap.nsmallfree[i]; + mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i]; + mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i]; + smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i]; + } + mstats.nmalloc += mstats.nfree; + + // Calculate derived stats. + mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree; + mstats.heap_alloc = mstats.alloc; + mstats.heap_objects = mstats.nmalloc - mstats.nfree; } // Structure of arguments passed to function gc(). @@ -2029,7 +2093,7 @@ gc(struct gc_args *args) heap0 = 0; obj0 = 0; if(gctrace) { - cachestats(nil); + updatememstats(nil); heap0 = mstats.heap_alloc; obj0 = mstats.nmalloc - mstats.nfree; } @@ -2079,18 +2143,10 @@ gc(struct gc_args *args) if(work.nproc > 1) runtime·notesleep(&work.alldone); - cachestats(&stats); - - stats.nprocyield += work.sweepfor->nprocyield; - stats.nosyield += work.sweepfor->nosyield; - stats.nsleep += work.sweepfor->nsleep; - + cachestats(); mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; m->gcing = 0; - heap1 = mstats.heap_alloc; - obj1 = mstats.nmalloc - mstats.nfree; - t4 = runtime·nanotime(); mstats.last_gc = t4; mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0; @@ -2100,6 +2156,14 @@ gc(struct gc_args *args) runtime·printf("pause %D\n", t4-t0); if(gctrace) { + updatememstats(&stats); + heap1 = mstats.heap_alloc; + obj1 = mstats.nmalloc - mstats.nfree; + + stats.nprocyield += work.sweepfor->nprocyield; + stats.nosyield += work.sweepfor->nosyield; + stats.nsleep += work.sweepfor->nsleep; + runtime·printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects," " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n", mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000, @@ -2144,7 +2208,7 @@ runtime·ReadMemStats(MStats *stats) runtime·semacquire(&runtime·worldsema); m->gcing = 1; runtime·stoptheworld(); - cachestats(nil); + updatememstats(nil); *stats = mstats; m->gcing = 0; runtime·semrelease(&runtime·worldsema);