diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index d79c86d1244..5f03693f52a 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -205,6 +205,7 @@ struct MStats uint64 heap_sys; // bytes obtained from system uint64 heap_idle; // bytes in idle spans uint64 heap_inuse; // bytes in non-idle spans + uint64 heap_released; // bytes released to the OS uint64 heap_objects; // total number of allocated objects // Statistics about allocation of low-level fixed-size structures. @@ -220,6 +221,7 @@ struct MStats // Statistics about garbage collector. // Protected by stopping the world during GC. uint64 next_gc; // next GC (in heap_alloc time) + uint64 last_gc; // last GC (in absolute time) uint64 pause_total_ns; uint64 pause_ns[256]; uint32 numgc; @@ -304,14 +306,16 @@ struct MSpan { MSpan *next; // in a span linked list MSpan *prev; // in a span linked list - MSpan *allnext; // in the list of all spans + MSpan *allnext; // in the list of all spans PageID start; // starting page number uintptr npages; // number of pages in span MLink *freelist; // list of free objects uint32 ref; // number of allocated objects in this span uint32 sizeclass; // size class uint32 state; // MSpanInUse etc - byte *limit; // end of data in span + int64 unusedsince; // First time spotted by GC in MSpanFree state + uintptr npreleased; // number of pages released to the OS + byte *limit; // end of data in span }; void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); @@ -381,6 +385,7 @@ MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n); void runtime·MHeap_MapBits(MHeap *h); +void runtime·MHeap_Scavenger(void); void* runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s); diff --git a/src/pkg/runtime/mem.go b/src/pkg/runtime/mem.go index 3ad906ac276..76680086cd4 100644 --- a/src/pkg/runtime/mem.go +++ b/src/pkg/runtime/mem.go @@ -17,11 +17,12 @@ type MemStats struct { Frees uint64 // number of frees // Main allocation heap statistics. - HeapAlloc uint64 // bytes allocated and still in use - HeapSys uint64 // bytes obtained from system - HeapIdle uint64 // bytes in idle spans - HeapInuse uint64 // bytes in non-idle span - HeapObjects uint64 // total number of allocated objects + HeapAlloc uint64 // bytes allocated and still in use + HeapSys uint64 // bytes obtained from system + HeapIdle uint64 // bytes in idle spans + HeapInuse uint64 // bytes in non-idle span + HeapReleased uint64 // bytes released to the OS + HeapObjects uint64 // total number of allocated objects // Low-level fixed-size structure allocator statistics. // Inuse is bytes used now. @@ -35,7 +36,8 @@ type MemStats struct { BuckHashSys uint64 // profiling bucket hash table // Garbage collector statistics. - NextGC uint64 + NextGC uint64 // next run in HeapAlloc time (bytes) + LastGC uint64 // last run in absolute time (ns) PauseTotalNs uint64 PauseNs [256]uint64 // most recent GC pause times NumGC uint32 diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index 1b959286ac3..8efa7afc032 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -716,8 +716,10 @@ sweep(void) byte *p; MCache *c; byte *arena_start; + int64 now; arena_start = runtime·mheap.arena_start; + now = runtime·nanotime(); for(;;) { s = work.spans; @@ -726,6 +728,11 @@ sweep(void) if(!runtime·casp(&work.spans, s, s->allnext)) continue; + // Stamp newly unused spans. The scavenger will use that + // info to potentially give back some pages to the OS. + if(s->state == MSpanFree && s->unusedsince == 0) + s->unusedsince = now; + if(s->state != MSpanInUse) continue; @@ -963,6 +970,7 @@ runtime·gc(int32 force) obj1 = mstats.nmalloc - mstats.nfree; t3 = runtime·nanotime(); + mstats.last_gc = t3; mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0; mstats.pause_total_ns += t3 - t0; mstats.numgc++; diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c index d75c18d828a..a40a14512b6 100644 --- a/src/pkg/runtime/mheap.c +++ b/src/pkg/runtime/mheap.c @@ -103,6 +103,8 @@ HaveSpan: runtime·MSpanList_Remove(s); s->state = MSpanInUse; mstats.heap_idle -= s->npages<npreleased<npreleased = 0; if(s->npages > npage) { // Trim extra and put it back in the heap. @@ -280,6 +282,8 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) } mstats.heap_idle += s->npages<state = MSpanFree; + s->unusedsince = 0; + s->npreleased = 0; runtime·MSpanList_Remove(s); sp = (uintptr*)(s->start<start = t->start; s->npages += t->npages; + s->npreleased = t->npreleased; // absorb released pages p -= t->npages; h->map[p] = s; runtime·MSpanList_Remove(t); @@ -304,6 +309,7 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) tp = (uintptr*)(t->start<npages += t->npages; + s->npreleased += t->npreleased; h->map[p + s->npages - 1] = s; runtime·MSpanList_Remove(t); t->state = MSpanDead; @@ -317,8 +323,81 @@ MHeap_FreeLocked(MHeap *h, MSpan *s) runtime·MSpanList_Insert(&h->free[s->npages], s); else runtime·MSpanList_Insert(&h->large, s); +} - // TODO(rsc): IncrementalScavenge() to return memory to OS. +// Release (part of) unused memory to OS. +// Goroutine created in runtime·schedinit. +// Loop forever. +void +runtime·MHeap_Scavenger(void) +{ + MHeap *h; + MSpan *s, *list; + uint64 tick, now, forcegc, limit; + uint32 k, i; + uintptr released, sumreleased; + byte *env; + bool trace; + Note note; + + // If we go two minutes without a garbage collection, force one to run. + forcegc = 2*60*1e9; + // If a span goes unused for 5 minutes after a garbage collection, + // we hand it back to the operating system. + limit = 5*60*1e9; + // Make wake-up period small enough for the sampling to be correct. + tick = forcegc < limit ? forcegc/2 : limit/2; + + trace = false; + env = runtime·getenv("GOGCTRACE"); + if(env != nil) + trace = runtime·atoi(env) > 0; + + h = &runtime·mheap; + for(k=0;; k++) { + runtime·noteclear(¬e); + runtime·entersyscall(); + runtime·notetsleep(¬e, tick); + runtime·exitsyscall(); + + runtime·lock(h); + now = runtime·nanotime(); + if(now - mstats.last_gc > forcegc) { + runtime·unlock(h); + runtime·gc(1); + runtime·lock(h); + now = runtime·nanotime(); + if (trace) + runtime·printf("scvg%d: GC forced\n", k); + } + sumreleased = 0; + for(i=0; i < nelem(h->free)+1; i++) { + if(i < nelem(h->free)) + list = &h->free[i]; + else + list = &h->large; + if(runtime·MSpanList_IsEmpty(list)) + continue; + for(s=list->next; s != list; s=s->next) { + if(s->unusedsince != 0 && (now - s->unusedsince) > limit) { + released = (s->npages - s->npreleased) << PageShift; + mstats.heap_released += released; + sumreleased += released; + s->npreleased = s->npages; + runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift); + } + } + } + runtime·unlock(h); + + if(trace) { + if(sumreleased > 0) + runtime·printf("scvg%d: %p MB released\n", k, sumreleased>>20); + runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n", + k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20, + mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20); + } + } } // Initialize a new span with the given start and npages. @@ -333,6 +412,8 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages) span->ref = 0; span->sizeclass = 0; span->state = 0; + span->unusedsince = 0; + span->npreleased = 0; } // Initialize an empty doubly-linked list. diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 9a4d2059011..3dbf77a105d 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -164,6 +164,9 @@ setmcpumax(uint32 n) } } +// Keep trace of scavenger's goroutine for deadlock detection. +static G *scvg; + // The bootstrap sequence is: // // call osinit @@ -206,6 +209,8 @@ runtime·schedinit(void) mstats.enablegc = 1; m->nomemprof--; + + scvg = runtime·newproc1((byte*)runtime·MHeap_Scavenger, nil, 0, 0, runtime·schedinit); } extern void main·init(void); @@ -582,9 +587,12 @@ top: mput(m); } - v = runtime·atomicload(&runtime·sched.atomic); - if(runtime·sched.grunning == 0) - runtime·throw("all goroutines are asleep - deadlock!"); + // Look for deadlock situation: one single active g which happens to be scvg. + if(runtime·sched.grunning == 1 && runtime·sched.gwait == 0) { + if(scvg->status == Grunning || scvg->status == Gsyscall) + runtime·throw("all goroutines are asleep - deadlock!"); + } + m->nextg = nil; m->waitnextg = 1; runtime·noteclear(&m->havenextg); @@ -593,6 +601,7 @@ top: // Entersyscall might have decremented mcpu too, but if so // it will see the waitstop and take the slow path. // Exitsyscall never increments mcpu beyond mcpumax. + v = runtime·atomicload(&runtime·sched.atomic); if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) { // set waitstop = 0 (known to be 1) runtime·xadd(&runtime·sched.atomic, -1<