mirror of
https://github.com/golang/go
synced 2024-11-22 01:44:40 -07:00
runtime: faster allocator, garbage collector
GC is still single-threaded. Multiple threads will happen in another CL. Garbage collection pauses are typically about half as long as they were before this CL. R=brainman, iant, r CC=golang-dev https://golang.org/cl/3975046
This commit is contained in:
parent
6b93a92ac0
commit
1e063b32cd
@ -69,7 +69,8 @@ type MemStatsType struct {
|
||||
|
||||
// Per-size allocation statistics.
|
||||
// Not locked during update; approximate.
|
||||
BySize [67]struct {
|
||||
// 61 is NumSizeClasses in the C code.
|
||||
BySize [61]struct {
|
||||
Size uint32
|
||||
Mallocs uint64
|
||||
Frees uint64
|
||||
|
@ -702,7 +702,7 @@ unsafe·New(Eface typ, void *ret)
|
||||
t = (Type*)((Eface*)typ.data-1);
|
||||
|
||||
if(t->kind&KindNoPointers)
|
||||
ret = runtime·mallocgc(t->size, RefNoPointers, 1, 1);
|
||||
ret = runtime·mallocgc(t->size, FlagNoPointers, 1, 1);
|
||||
else
|
||||
ret = runtime·mal(t->size);
|
||||
FLUSH(&ret);
|
||||
@ -722,7 +722,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret)
|
||||
|
||||
size = n*t->size;
|
||||
if(t->kind&KindNoPointers)
|
||||
ret = runtime·mallocgc(size, RefNoPointers, 1, 1);
|
||||
ret = runtime·mallocgc(size, FlagNoPointers, 1, 1);
|
||||
else
|
||||
ret = runtime·mal(size);
|
||||
FLUSH(&ret);
|
||||
|
@ -36,14 +36,13 @@ fastrand1(void)
|
||||
// Small objects are allocated from the per-thread cache's free lists.
|
||||
// Large objects (> 32 kB) are allocated straight from the heap.
|
||||
void*
|
||||
runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
|
||||
runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
|
||||
{
|
||||
int32 sizeclass, rate;
|
||||
MCache *c;
|
||||
uintptr npages;
|
||||
MSpan *s;
|
||||
void *v;
|
||||
uint32 *ref;
|
||||
|
||||
if(runtime·gcwaiting && g != m->g0 && m->locks == 0)
|
||||
runtime·gosched();
|
||||
@ -65,12 +64,6 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
|
||||
mstats.alloc += size;
|
||||
mstats.total_alloc += size;
|
||||
mstats.by_size[sizeclass].nmalloc++;
|
||||
|
||||
if(!runtime·mlookup(v, nil, nil, nil, &ref)) {
|
||||
runtime·printf("malloc %D; runtime·mlookup failed\n", (uint64)size);
|
||||
runtime·throw("malloc runtime·mlookup");
|
||||
}
|
||||
*ref = RefNone | refflag;
|
||||
} else {
|
||||
// TODO(rsc): Report tracebacks for very large allocations.
|
||||
|
||||
@ -87,13 +80,14 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
|
||||
v = (void*)(s->start << PageShift);
|
||||
|
||||
// setup for mark sweep
|
||||
s->gcref0 = RefNone | refflag;
|
||||
ref = &s->gcref0;
|
||||
runtime·markspan(v, 0, 0, true);
|
||||
}
|
||||
if(!(flag & FlagNoGC))
|
||||
runtime·markallocated(v, size, (flag&FlagNoPointers) != 0);
|
||||
|
||||
m->mallocing = 0;
|
||||
|
||||
if(!(refflag & RefNoProfiling) && (rate = runtime·MemProfileRate) > 0) {
|
||||
if(!(flag & FlagNoProfiling) && (rate = runtime·MemProfileRate) > 0) {
|
||||
if(size >= rate)
|
||||
goto profile;
|
||||
if(m->mcache->next_sample > size)
|
||||
@ -104,7 +98,7 @@ runtime·mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed)
|
||||
rate = 0x3fffffff;
|
||||
m->mcache->next_sample = fastrand1() % (2*rate);
|
||||
profile:
|
||||
*ref |= RefProfiled;
|
||||
runtime·setblockspecial(v);
|
||||
runtime·MProf_Malloc(v, size);
|
||||
}
|
||||
}
|
||||
@ -124,33 +118,35 @@ runtime·malloc(uintptr size)
|
||||
void
|
||||
runtime·free(void *v)
|
||||
{
|
||||
int32 sizeclass, size;
|
||||
int32 sizeclass;
|
||||
MSpan *s;
|
||||
MCache *c;
|
||||
uint32 prof, *ref;
|
||||
uint32 prof;
|
||||
uintptr size;
|
||||
|
||||
if(v == nil)
|
||||
return;
|
||||
|
||||
// If you change this also change mgc0.c:/^sweepspan,
|
||||
// which has a copy of the guts of free.
|
||||
|
||||
if(m->mallocing)
|
||||
runtime·throw("malloc/free - deadlock");
|
||||
m->mallocing = 1;
|
||||
|
||||
if(!runtime·mlookup(v, nil, nil, &s, &ref)) {
|
||||
if(!runtime·mlookup(v, nil, nil, &s)) {
|
||||
runtime·printf("free %p: not an allocated block\n", v);
|
||||
runtime·throw("free runtime·mlookup");
|
||||
}
|
||||
prof = *ref & RefProfiled;
|
||||
*ref = RefFree;
|
||||
prof = runtime·blockspecial(v);
|
||||
|
||||
// Find size class for v.
|
||||
sizeclass = s->sizeclass;
|
||||
if(sizeclass == 0) {
|
||||
// Large object.
|
||||
if(prof)
|
||||
runtime·MProf_Free(v, s->npages<<PageShift);
|
||||
mstats.alloc -= s->npages<<PageShift;
|
||||
runtime·memclr(v, s->npages<<PageShift);
|
||||
size = s->npages<<PageShift;
|
||||
*(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed"
|
||||
runtime·unmarkspan(v, 1<<PageShift);
|
||||
runtime·MHeap_Free(&runtime·mheap, s, 1);
|
||||
} else {
|
||||
// Small object.
|
||||
@ -158,19 +154,20 @@ runtime·free(void *v)
|
||||
size = runtime·class_to_size[sizeclass];
|
||||
if(size > sizeof(uintptr))
|
||||
((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
|
||||
if(prof)
|
||||
runtime·MProf_Free(v, size);
|
||||
mstats.alloc -= size;
|
||||
mstats.by_size[sizeclass].nfree++;
|
||||
runtime·MCache_Free(c, v, sizeclass, size);
|
||||
}
|
||||
runtime·markfreed(v, size);
|
||||
mstats.alloc -= size;
|
||||
if(prof)
|
||||
runtime·MProf_Free(v, size);
|
||||
m->mallocing = 0;
|
||||
}
|
||||
|
||||
int32
|
||||
runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
|
||||
runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
|
||||
{
|
||||
uintptr n, nobj, i;
|
||||
uintptr n, i;
|
||||
byte *p;
|
||||
MSpan *s;
|
||||
|
||||
@ -179,12 +176,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
|
||||
if(sp)
|
||||
*sp = s;
|
||||
if(s == nil) {
|
||||
runtime·checkfreed(v, 1);
|
||||
if(base)
|
||||
*base = nil;
|
||||
if(size)
|
||||
*size = 0;
|
||||
if(ref)
|
||||
*ref = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -195,14 +191,11 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
|
||||
*base = p;
|
||||
if(size)
|
||||
*size = s->npages<<PageShift;
|
||||
if(ref)
|
||||
*ref = &s->gcref0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if((byte*)v >= (byte*)s->gcref) {
|
||||
// pointers into the gc ref counts
|
||||
// do not count as pointers.
|
||||
if((byte*)v >= (byte*)s->limit) {
|
||||
// pointers past the last block do not count as pointers.
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -213,21 +206,6 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp, uint32 **ref)
|
||||
if(size)
|
||||
*size = n;
|
||||
|
||||
// good for error checking, but expensive
|
||||
if(0) {
|
||||
nobj = (s->npages << PageShift) / (n + RefcountOverhead);
|
||||
if((byte*)s->gcref < p || (byte*)(s->gcref+nobj) > p+(s->npages<<PageShift)) {
|
||||
runtime·printf("odd span state=%d span=%p base=%p sizeclass=%d n=%D size=%D npages=%D\n",
|
||||
s->state, s, p, s->sizeclass, (uint64)nobj, (uint64)n, (uint64)s->npages);
|
||||
runtime·printf("s->base sizeclass %d v=%p base=%p gcref=%p blocksize=%D nobj=%D size=%D end=%p end=%p\n",
|
||||
s->sizeclass, v, p, s->gcref, (uint64)s->npages<<PageShift,
|
||||
(uint64)nobj, (uint64)n, s->gcref + nobj, p+(s->npages<<PageShift));
|
||||
runtime·throw("bad gcref");
|
||||
}
|
||||
}
|
||||
if(ref)
|
||||
*ref = &s->gcref[i];
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -246,14 +224,20 @@ runtime·allocmcache(void)
|
||||
|
||||
int32 runtime·sizeof_C_MStats = sizeof(MStats);
|
||||
|
||||
#define MaxArena32 (2U<<30)
|
||||
|
||||
void
|
||||
runtime·mallocinit(void)
|
||||
{
|
||||
byte *p;
|
||||
uintptr arena_size;
|
||||
uintptr arena_size, bitmap_size;
|
||||
extern byte end[];
|
||||
|
||||
runtime·InitSizes();
|
||||
|
||||
// Set up the allocation arena, a contiguous area of memory where
|
||||
// allocated data will be found. The arena begins with a bitmap large
|
||||
// enough to hold 4 bits per allocated word.
|
||||
if(sizeof(void*) == 8) {
|
||||
// On a 64-bit machine, allocate from a single contiguous reservation.
|
||||
// 16 GB should be big enough for now.
|
||||
@ -273,19 +257,44 @@ runtime·mallocinit(void)
|
||||
// odds of the conservative garbage collector not collecting memory
|
||||
// because some non-pointer block of memory had a bit pattern
|
||||
// that matched a memory address.
|
||||
//
|
||||
// Actually we reserve 17 GB (because the bitmap ends up being 1 GB)
|
||||
// but it hardly matters: fc is not valid UTF-8 either, and we have to
|
||||
// allocate 15 GB before we get that far.
|
||||
arena_size = 16LL<<30;
|
||||
p = runtime·SysReserve((void*)(0x00f8ULL<<32), arena_size);
|
||||
bitmap_size = arena_size / (sizeof(void*)*8/4);
|
||||
p = runtime·SysReserve((void*)(0x00f8ULL<<32), bitmap_size + arena_size);
|
||||
if(p == nil)
|
||||
runtime·throw("runtime: cannot reserve arena virtual address space");
|
||||
runtime·mheap.arena_start = p;
|
||||
runtime·mheap.arena_used = p;
|
||||
runtime·mheap.arena_end = p + arena_size;
|
||||
} else {
|
||||
// On a 32-bit machine, we'll take what we can get for each allocation
|
||||
// and maintain arena_start and arena_end as min, max we've seen.
|
||||
runtime·mheap.arena_start = (byte*)0xffffffff;
|
||||
runtime·mheap.arena_end = 0;
|
||||
// On a 32-bit machine, we can't typically get away
|
||||
// with a giant virtual address space reservation.
|
||||
// Instead we map the memory information bitmap
|
||||
// immediately after the data segment, large enough
|
||||
// to handle another 2GB of mappings (256 MB),
|
||||
// along with a reservation for another 512 MB of memory.
|
||||
// When that gets used up, we'll start asking the kernel
|
||||
// for any memory anywhere and hope it's in the 2GB
|
||||
// following the bitmap (presumably the executable begins
|
||||
// near the bottom of memory, so we'll have to use up
|
||||
// most of memory before the kernel resorts to giving out
|
||||
// memory before the beginning of the text segment).
|
||||
//
|
||||
// Alternatively we could reserve 512 MB bitmap, enough
|
||||
// for 4GB of mappings, and then accept any memory the
|
||||
// kernel threw at us, but normally that's a waste of 512 MB
|
||||
// of address space, which is probably too much in a 32-bit world.
|
||||
bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
|
||||
arena_size = 512<<20;
|
||||
|
||||
p = (void*)(((uintptr)end + 64*1024 - 1) & ~(64*1024-1));
|
||||
if(runtime·SysReserve(p, bitmap_size + arena_size) != p)
|
||||
runtime·throw("runtime: cannot reserve memory bitmap virtual address space");
|
||||
}
|
||||
runtime·mheap.bitmap = p;
|
||||
runtime·mheap.arena_start = p + bitmap_size;
|
||||
runtime·mheap.arena_used = runtime·mheap.arena_start;
|
||||
runtime·mheap.arena_end = runtime·mheap.arena_start + arena_size;
|
||||
|
||||
// Initialize the rest of the allocator.
|
||||
runtime·MHeap_Init(&runtime·mheap, runtime·SysAlloc);
|
||||
@ -300,25 +309,40 @@ runtime·MHeap_SysAlloc(MHeap *h, uintptr n)
|
||||
{
|
||||
byte *p;
|
||||
|
||||
if(sizeof(void*) == 8) {
|
||||
if(n <= h->arena_end - h->arena_used) {
|
||||
// Keep taking from our reservation.
|
||||
if(h->arena_end - h->arena_used < n)
|
||||
return nil;
|
||||
p = h->arena_used;
|
||||
runtime·SysMap(p, n);
|
||||
h->arena_used += n;
|
||||
runtime·MHeap_MapBits(h);
|
||||
return p;
|
||||
} else {
|
||||
// Take what we can get from the OS.
|
||||
}
|
||||
|
||||
// On 64-bit, our reservation is all we have.
|
||||
if(sizeof(void*) == 8)
|
||||
return nil;
|
||||
|
||||
// On 32-bit, once the reservation is gone we can
|
||||
// try to get memory at a location chosen by the OS
|
||||
// and hope that it is in the range we allocated bitmap for.
|
||||
p = runtime·SysAlloc(n);
|
||||
if(p == nil)
|
||||
return nil;
|
||||
if(p+n > h->arena_used)
|
||||
h->arena_used = p+n;
|
||||
if(p > h->arena_end)
|
||||
h->arena_end = p;
|
||||
return p;
|
||||
|
||||
if(p < h->arena_start || p+n - h->arena_start >= MaxArena32) {
|
||||
runtime·printf("runtime: memory allocated by OS not in usable range");
|
||||
runtime·SysFree(p, n);
|
||||
return nil;
|
||||
}
|
||||
|
||||
if(p+n > h->arena_used) {
|
||||
h->arena_used = p+n;
|
||||
if(h->arena_used > h->arena_end)
|
||||
h->arena_end = h->arena_used;
|
||||
runtime·MHeap_MapBits(h);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
// Runtime stubs.
|
||||
@ -353,7 +377,6 @@ void*
|
||||
runtime·stackalloc(uint32 n)
|
||||
{
|
||||
void *v;
|
||||
uint32 *ref;
|
||||
|
||||
if(m->mallocing || m->gcing || n == FixedStack) {
|
||||
runtime·lock(&stacks);
|
||||
@ -369,11 +392,7 @@ runtime·stackalloc(uint32 n)
|
||||
runtime·unlock(&stacks);
|
||||
return v;
|
||||
}
|
||||
v = runtime·mallocgc(n, RefNoProfiling, 0, 0);
|
||||
if(!runtime·mlookup(v, nil, nil, nil, &ref))
|
||||
runtime·throw("stackalloc runtime·mlookup");
|
||||
*ref = RefStack;
|
||||
return v;
|
||||
return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0);
|
||||
}
|
||||
|
||||
void
|
||||
@ -399,7 +418,7 @@ func Free(p *byte) {
|
||||
}
|
||||
|
||||
func Lookup(p *byte) (base *byte, size uintptr) {
|
||||
runtime·mlookup(p, &base, &size, nil, nil);
|
||||
runtime·mlookup(p, &base, &size, nil);
|
||||
}
|
||||
|
||||
func GC() {
|
||||
@ -422,7 +441,7 @@ func SetFinalizer(obj Eface, finalizer Eface) {
|
||||
runtime·printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.type->string);
|
||||
goto throw;
|
||||
}
|
||||
if(!runtime·mlookup(obj.data, &base, &size, nil, nil) || obj.data != base) {
|
||||
if(!runtime·mlookup(obj.data, &base, &size, nil) || obj.data != base) {
|
||||
runtime·printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
|
||||
goto throw;
|
||||
}
|
||||
|
@ -97,8 +97,14 @@ typedef uintptr PageID; // address >> PageShift
|
||||
|
||||
enum
|
||||
{
|
||||
// Computed constant. The definition of MaxSmallSize and the
|
||||
// algorithm in msize.c produce some number of different allocation
|
||||
// size classes. NumSizeClasses is that number. It's needed here
|
||||
// because there are static arrays of this length; when msize runs its
|
||||
// size choosing algorithm it double-checks that NumSizeClasses agrees.
|
||||
NumSizeClasses = 61,
|
||||
|
||||
// Tunable constants.
|
||||
NumSizeClasses = 67, // Number of size classes (must match msize.c)
|
||||
MaxSmallSize = 32<<10,
|
||||
|
||||
FixAllocChunk = 128<<10, // Chunk size for FixAlloc
|
||||
@ -290,10 +296,7 @@ struct MSpan
|
||||
uint32 ref; // number of allocated objects in this span
|
||||
uint32 sizeclass; // size class
|
||||
uint32 state; // MSpanInUse etc
|
||||
union {
|
||||
uint32 *gcref; // sizeclass > 0
|
||||
uint32 gcref0; // sizeclass == 0
|
||||
};
|
||||
byte *limit; // end of data in span
|
||||
};
|
||||
|
||||
void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
|
||||
@ -336,6 +339,7 @@ struct MHeap
|
||||
|
||||
// range of addresses we might see in the heap
|
||||
byte *bitmap;
|
||||
uintptr bitmap_mapped;
|
||||
byte *arena_start;
|
||||
byte *arena_used;
|
||||
byte *arena_end;
|
||||
@ -359,26 +363,29 @@ MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct
|
||||
void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
|
||||
MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
|
||||
MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
|
||||
void runtime·MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj);
|
||||
void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
|
||||
void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
|
||||
void runtime·MHeap_MapBits(MHeap *h);
|
||||
|
||||
void* runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
|
||||
int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref);
|
||||
int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
|
||||
void runtime·gc(int32 force);
|
||||
void runtime·markallocated(void *v, uintptr n, bool noptr);
|
||||
void runtime·checkallocated(void *v, uintptr n);
|
||||
void runtime·markfreed(void *v, uintptr n);
|
||||
void runtime·checkfreed(void *v, uintptr n);
|
||||
int32 runtime·checking;
|
||||
void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
|
||||
void runtime·unmarkspan(void *v, uintptr size);
|
||||
bool runtime·blockspecial(void*);
|
||||
void runtime·setblockspecial(void*);
|
||||
|
||||
enum
|
||||
{
|
||||
RefcountOverhead = 4, // one uint32 per object
|
||||
|
||||
RefFree = 0, // must be zero
|
||||
RefStack, // stack segment - don't free and don't scan for pointers
|
||||
RefNone, // no references
|
||||
RefSome, // some references
|
||||
RefNoPointers = 0x80000000U, // flag - no pointers here
|
||||
RefHasFinalizer = 0x40000000U, // flag - has finalizer
|
||||
RefProfiled = 0x20000000U, // flag - is in profiling table
|
||||
RefNoProfiling = 0x10000000U, // flag - must not profile
|
||||
RefFlags = 0xFFFF0000U,
|
||||
// flags to malloc
|
||||
FlagNoPointers = 1<<0, // no pointers here
|
||||
FlagNoProfiling = 1<<1, // must not profile
|
||||
FlagNoGC = 1<<2, // must not free or scan for pointers
|
||||
};
|
||||
|
||||
void runtime·MProf_Malloc(void*, uintptr);
|
||||
|
@ -113,8 +113,7 @@ static void
|
||||
MCentral_Free(MCentral *c, void *v)
|
||||
{
|
||||
MSpan *s;
|
||||
PageID page;
|
||||
MLink *p, *next;
|
||||
MLink *p;
|
||||
int32 size;
|
||||
|
||||
// Find span for v.
|
||||
@ -138,16 +137,8 @@ MCentral_Free(MCentral *c, void *v)
|
||||
if(--s->ref == 0) {
|
||||
size = runtime·class_to_size[c->sizeclass];
|
||||
runtime·MSpanList_Remove(s);
|
||||
// The second word of each freed block indicates
|
||||
// whether it needs to be zeroed. The first word
|
||||
// is the link pointer and must always be cleared.
|
||||
for(p=s->freelist; p; p=next) {
|
||||
next = p->next;
|
||||
if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0)
|
||||
runtime·memclr((byte*)p, size);
|
||||
else
|
||||
p->next = nil;
|
||||
}
|
||||
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
|
||||
*(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
|
||||
s->freelist = nil;
|
||||
c->nfree -= (s->npages << PageShift) / size;
|
||||
runtime·unlock(c);
|
||||
@ -157,7 +148,7 @@ MCentral_Free(MCentral *c, void *v)
|
||||
}
|
||||
|
||||
void
|
||||
runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj)
|
||||
runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
|
||||
{
|
||||
int32 size;
|
||||
int32 npages;
|
||||
@ -166,7 +157,7 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32
|
||||
size = runtime·class_to_size[sizeclass];
|
||||
*npagesp = npages;
|
||||
*sizep = size;
|
||||
*nobj = (npages << PageShift) / (size + RefcountOverhead);
|
||||
*nobj = (npages << PageShift) / size;
|
||||
}
|
||||
|
||||
// Fetch a new span from the heap and
|
||||
@ -174,7 +165,8 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32
|
||||
static bool
|
||||
MCentral_Grow(MCentral *c)
|
||||
{
|
||||
int32 i, n, npages, size;
|
||||
int32 i, n, npages;
|
||||
uintptr size;
|
||||
MLink **tailp, *v;
|
||||
byte *p;
|
||||
MSpan *s;
|
||||
@ -191,7 +183,7 @@ MCentral_Grow(MCentral *c)
|
||||
// Carve span into sequence of blocks.
|
||||
tailp = &s->freelist;
|
||||
p = (byte*)(s->start << PageShift);
|
||||
s->gcref = (uint32*)(p + size*n);
|
||||
s->limit = p + size*n;
|
||||
for(i=0; i<n; i++) {
|
||||
v = (MLink*)p;
|
||||
*tailp = v;
|
||||
@ -199,6 +191,7 @@ MCentral_Grow(MCentral *c)
|
||||
p += size;
|
||||
}
|
||||
*tailp = nil;
|
||||
runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
|
||||
|
||||
runtime·lock(c);
|
||||
c->nfree += n;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "runtime.h"
|
||||
#include "malloc.h"
|
||||
|
||||
// TODO(rsc): Why not just use mheap.Lock?
|
||||
static Lock finlock;
|
||||
|
||||
// Finalizer hash table. Direct hash, linear scan, at most 3/4 full.
|
||||
@ -101,24 +102,21 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)
|
||||
}
|
||||
|
||||
runtime·lock(&finlock);
|
||||
if(!runtime·mlookup(p, &base, nil, nil, &ref) || p != base) {
|
||||
if(!runtime·mlookup(p, &base, nil, nil) || p != base) {
|
||||
runtime·unlock(&finlock);
|
||||
runtime·throw("addfinalizer on invalid pointer");
|
||||
}
|
||||
if(f == nil) {
|
||||
if(*ref & RefHasFinalizer) {
|
||||
lookfintab(&fintab, p, 1);
|
||||
*ref &= ~RefHasFinalizer;
|
||||
}
|
||||
runtime·unlock(&finlock);
|
||||
return;
|
||||
}
|
||||
|
||||
if(*ref & RefHasFinalizer) {
|
||||
if(lookfintab(&fintab, p, 0)) {
|
||||
runtime·unlock(&finlock);
|
||||
runtime·throw("double finalizer");
|
||||
}
|
||||
*ref |= RefHasFinalizer;
|
||||
runtime·setblockspecial(p);
|
||||
|
||||
if(fintab.nkey >= fintab.max/2+fintab.max/4) {
|
||||
// keep table at most 3/4 full:
|
||||
@ -134,7 +132,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)
|
||||
newtab.max *= 3;
|
||||
}
|
||||
|
||||
newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1);
|
||||
newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1);
|
||||
newtab.val = runtime·mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
|
||||
|
||||
for(i=0; i<fintab.max; i++) {
|
||||
|
@ -2,28 +2,66 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Garbage collector -- step 0.
|
||||
//
|
||||
// Stop the world, mark and sweep garbage collector.
|
||||
// NOT INTENDED FOR PRODUCTION USE.
|
||||
//
|
||||
// A mark and sweep collector provides a way to exercise
|
||||
// and test the memory allocator and the stack walking machinery
|
||||
// without also needing to get reference counting
|
||||
// exactly right.
|
||||
// Garbage collector.
|
||||
|
||||
#include "runtime.h"
|
||||
#include "malloc.h"
|
||||
|
||||
enum {
|
||||
Debug = 0
|
||||
Debug = 0,
|
||||
UseCas = 1,
|
||||
PtrSize = sizeof(void*),
|
||||
|
||||
// Four bits per word (see #defines below).
|
||||
wordsPerBitmapWord = sizeof(void*)*8/4,
|
||||
bitShift = sizeof(void*)*8/4,
|
||||
};
|
||||
|
||||
typedef struct BlockList BlockList;
|
||||
struct BlockList
|
||||
// Bits in per-word bitmap.
|
||||
// #defines because enum might not be able to hold the values.
|
||||
//
|
||||
// Each word in the bitmap describes wordsPerBitmapWord words
|
||||
// of heap memory. There are 4 bitmap bits dedicated to each heap word,
|
||||
// so on a 64-bit system there is one bitmap word per 16 heap words.
|
||||
// The bits in the word are packed together by type first, then by
|
||||
// heap location, so each 64-bit bitmap word consists of, from top to bottom,
|
||||
// the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
|
||||
// then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
|
||||
// This layout makes it easier to iterate over the bits of a given type.
|
||||
//
|
||||
// The bitmap starts at mheap.arena_start and extends *backward* from
|
||||
// there. On a 64-bit system the off'th word in the arena is tracked by
|
||||
// the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
|
||||
// the only difference is that the divisor is 8.)
|
||||
//
|
||||
// To pull out the bits corresponding to a given pointer p, we use:
|
||||
//
|
||||
// off = p - (uintptr*)mheap.arena_start; // word offset
|
||||
// b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
// shift = off % wordsPerBitmapWord
|
||||
// bits = *b >> shift;
|
||||
// /* then test bits & bitAllocated, bits & bitMarked, etc. */
|
||||
//
|
||||
#define bitAllocated ((uintptr)1<<(bitShift*0))
|
||||
#define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
|
||||
#define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
|
||||
#define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
|
||||
#define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
|
||||
|
||||
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
|
||||
|
||||
static uint64 nlookup;
|
||||
static uint64 nsizelookup;
|
||||
static uint64 naddrlookup;
|
||||
static uint64 nhandoff;
|
||||
static int32 gctrace;
|
||||
|
||||
typedef struct Workbuf Workbuf;
|
||||
struct Workbuf
|
||||
{
|
||||
byte *obj;
|
||||
uintptr size;
|
||||
Workbuf *next;
|
||||
uintptr nw;
|
||||
byte *w[2048-2];
|
||||
};
|
||||
|
||||
extern byte data[];
|
||||
@ -33,72 +71,258 @@ extern byte end[];
|
||||
static G *fing;
|
||||
static Finalizer *finq;
|
||||
static int32 fingwait;
|
||||
static BlockList *bl, *ebl;
|
||||
static uint32 nfullwait;
|
||||
|
||||
static void runfinq(void);
|
||||
static bool bitlookup(void*, uintptr**, uintptr*, int32*);
|
||||
static Workbuf* getempty(Workbuf*);
|
||||
static Workbuf* getfull(Workbuf*);
|
||||
|
||||
enum {
|
||||
PtrSize = sizeof(void*)
|
||||
};
|
||||
|
||||
// scanblock scans a block of n bytes starting at pointer b for references
|
||||
// to other objects, scanning any it finds recursively until there are no
|
||||
// unscanned objects left. Instead of using an explicit recursion, it keeps
|
||||
// a work list in the Workbuf* structures and loops in the main function
|
||||
// body. Keeping an explicit work list is easier on the stack allocator and
|
||||
// more efficient.
|
||||
static void
|
||||
scanblock(byte *b, int64 n)
|
||||
{
|
||||
int32 off;
|
||||
void *obj;
|
||||
uintptr size;
|
||||
uint32 *refp, ref;
|
||||
byte *obj, *arena_start, *p;
|
||||
void **vp;
|
||||
int64 i;
|
||||
BlockList *w;
|
||||
uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
|
||||
MSpan *s;
|
||||
PageID k;
|
||||
void **bw, **w, **ew;
|
||||
Workbuf *wbuf;
|
||||
|
||||
w = bl;
|
||||
w->obj = b;
|
||||
w->size = n;
|
||||
w++;
|
||||
// Memory arena parameters.
|
||||
arena_start = runtime·mheap.arena_start;
|
||||
|
||||
while(w > bl) {
|
||||
w--;
|
||||
b = w->obj;
|
||||
n = w->size;
|
||||
wbuf = nil; // current work buffer
|
||||
ew = nil; // end of work buffer
|
||||
bw = nil; // beginning of work buffer
|
||||
w = nil; // current pointer into work buffer
|
||||
|
||||
if(Debug > 1)
|
||||
runtime·printf("scanblock %p %D\n", b, n);
|
||||
off = (uint32)(uintptr)b & (PtrSize-1);
|
||||
if(off) {
|
||||
// Align b to a word boundary.
|
||||
off = (uintptr)b & (PtrSize-1);
|
||||
if(off != 0) {
|
||||
b += PtrSize - off;
|
||||
n -= PtrSize - off;
|
||||
}
|
||||
|
||||
for(;;) {
|
||||
// Each iteration scans the block b of length n, queueing pointers in
|
||||
// the work buffer.
|
||||
if(Debug > 1)
|
||||
runtime·printf("scanblock %p %D\n", b, n);
|
||||
|
||||
vp = (void**)b;
|
||||
n /= PtrSize;
|
||||
for(i=0; i<n; i++) {
|
||||
obj = vp[i];
|
||||
if(obj == nil)
|
||||
obj = (byte*)vp[i];
|
||||
|
||||
// Words outside the arena cannot be pointers.
|
||||
if((byte*)obj < arena_start || (byte*)obj >= runtime·mheap.arena_used)
|
||||
continue;
|
||||
if(runtime·mheap.arena_start <= (byte*)obj && (byte*)obj < runtime·mheap.arena_end) {
|
||||
if(runtime·mlookup(obj, &obj, &size, nil, &refp)) {
|
||||
ref = *refp;
|
||||
switch(ref & ~RefFlags) {
|
||||
case RefNone:
|
||||
if(Debug > 1)
|
||||
runtime·printf("found at %p: ", &vp[i]);
|
||||
*refp = RefSome | (ref & RefFlags);
|
||||
if(!(ref & RefNoPointers)) {
|
||||
if(w >= ebl)
|
||||
runtime·throw("scanblock: garbage collection stack overflow");
|
||||
w->obj = obj;
|
||||
w->size = size;
|
||||
w++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// obj may be a pointer to a live object.
|
||||
// Try to find the beginning of the object.
|
||||
|
||||
// Round down to word boundary.
|
||||
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
|
||||
|
||||
// Find bits for this word.
|
||||
off = (uintptr*)obj - (uintptr*)arena_start;
|
||||
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
xbits = *bitp;
|
||||
bits = xbits >> shift;
|
||||
|
||||
// Pointing at the beginning of a block?
|
||||
if((bits & (bitAllocated|bitBlockBoundary)) != 0)
|
||||
goto found;
|
||||
|
||||
// Pointing just past the beginning?
|
||||
// Scan backward a little to find a block boundary.
|
||||
for(j=shift; j-->0; ) {
|
||||
if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
|
||||
obj = (byte*)obj - (shift-j)*PtrSize;
|
||||
shift = j;
|
||||
bits = xbits>>shift;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise consult span table to find beginning.
|
||||
// (Manually inlined copy of MHeap_LookupMaybe.)
|
||||
nlookup++;
|
||||
naddrlookup++;
|
||||
k = (uintptr)obj>>PageShift;
|
||||
x = k;
|
||||
if(sizeof(void*) == 8)
|
||||
x -= (uintptr)arena_start>>PageShift;
|
||||
s = runtime·mheap.map[x];
|
||||
if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
|
||||
continue;
|
||||
p = (byte*)((uintptr)s->start<<PageShift);
|
||||
if(s->sizeclass == 0) {
|
||||
obj = p;
|
||||
} else {
|
||||
if((byte*)obj >= (byte*)s->limit)
|
||||
continue;
|
||||
size = runtime·class_to_size[s->sizeclass];
|
||||
int32 i = ((byte*)obj - p)/size;
|
||||
obj = p+i*size;
|
||||
}
|
||||
|
||||
// Now that we know the object header, reload bits.
|
||||
off = (uintptr*)obj - (uintptr*)arena_start;
|
||||
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
xbits = *bitp;
|
||||
bits = xbits >> shift;
|
||||
|
||||
found:
|
||||
// Now we have bits, bitp, and shift correct for
|
||||
// obj pointing at the base of the object.
|
||||
// If not allocated or already marked, done.
|
||||
if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
|
||||
continue;
|
||||
*bitp |= bitMarked<<shift;
|
||||
|
||||
// If object has no pointers, don't need to scan further.
|
||||
if((bits & bitNoPointers) != 0)
|
||||
continue;
|
||||
|
||||
// If buffer is full, get a new one.
|
||||
if(w >= ew) {
|
||||
wbuf = getempty(wbuf);
|
||||
bw = wbuf->w;
|
||||
w = bw;
|
||||
ew = bw + nelem(wbuf->w);
|
||||
}
|
||||
*w++ = obj;
|
||||
}
|
||||
|
||||
// Done scanning [b, b+n). Prepare for the next iteration of
|
||||
// the loop by setting b and n to the parameters for the next block.
|
||||
|
||||
// Fetch b from the work buffers.
|
||||
if(w <= bw) {
|
||||
// Emptied our buffer: refill.
|
||||
wbuf = getfull(wbuf);
|
||||
if(wbuf == nil)
|
||||
break;
|
||||
bw = wbuf->w;
|
||||
ew = wbuf->w + nelem(wbuf->w);
|
||||
w = bw+wbuf->nw;
|
||||
}
|
||||
b = *--w;
|
||||
|
||||
// Figure out n = size of b. Start by loading bits for b.
|
||||
off = (uintptr*)b - (uintptr*)arena_start;
|
||||
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
xbits = *bitp;
|
||||
bits = xbits >> shift;
|
||||
|
||||
// Might be small; look for nearby block boundary.
|
||||
// A block boundary is marked by either bitBlockBoundary
|
||||
// or bitAllocated being set (see notes near their definition).
|
||||
enum {
|
||||
boundary = bitBlockBoundary|bitAllocated
|
||||
};
|
||||
// Look for a block boundary both after and before b
|
||||
// in the same bitmap word.
|
||||
//
|
||||
// A block boundary j words after b is indicated by
|
||||
// bits>>j & boundary
|
||||
// assuming shift+j < bitShift. (If shift+j >= bitShift then
|
||||
// we'll be bleeding other bit types like bitMarked into our test.)
|
||||
// Instead of inserting the conditional shift+j < bitShift into the loop,
|
||||
// we can let j range from 1 to bitShift as long as we first
|
||||
// apply a mask to keep only the bits corresponding
|
||||
// to shift+j < bitShift aka j < bitShift-shift.
|
||||
bits &= (boundary<<(bitShift-shift)) - boundary;
|
||||
|
||||
// A block boundary j words before b is indicated by
|
||||
// xbits>>(shift-j) & boundary
|
||||
// (assuming shift >= j). There is no cleverness here
|
||||
// avoid the test, because when j gets too large the shift
|
||||
// turns negative, which is undefined in C.
|
||||
|
||||
for(j=1; j<bitShift; j++) {
|
||||
if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) {
|
||||
n = j*PtrSize;
|
||||
goto scan;
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to asking span about size class.
|
||||
// (Manually inlined copy of MHeap_Lookup.)
|
||||
nlookup++;
|
||||
nsizelookup++;
|
||||
x = (uintptr)b>>PageShift;
|
||||
if(sizeof(void*) == 8)
|
||||
x -= (uintptr)arena_start>>PageShift;
|
||||
s = runtime·mheap.map[x];
|
||||
if(s->sizeclass == 0)
|
||||
n = s->npages<<PageShift;
|
||||
else
|
||||
n = runtime·class_to_size[s->sizeclass];
|
||||
scan:;
|
||||
}
|
||||
}
|
||||
|
||||
static struct {
|
||||
Workbuf *full;
|
||||
Workbuf *empty;
|
||||
byte *chunk;
|
||||
uintptr nchunk;
|
||||
} work;
|
||||
|
||||
// Get an empty work buffer off the work.empty list,
|
||||
// allocating new buffers as needed.
|
||||
static Workbuf*
|
||||
getempty(Workbuf *b)
|
||||
{
|
||||
if(b != nil) {
|
||||
b->nw = nelem(b->w);
|
||||
b->next = work.full;
|
||||
work.full = b;
|
||||
}
|
||||
b = work.empty;
|
||||
if(b != nil) {
|
||||
work.empty = b->next;
|
||||
return b;
|
||||
}
|
||||
|
||||
if(work.nchunk < sizeof *b) {
|
||||
work.nchunk = 1<<20;
|
||||
work.chunk = runtime·SysAlloc(work.nchunk);
|
||||
}
|
||||
b = (Workbuf*)work.chunk;
|
||||
work.chunk += sizeof *b;
|
||||
work.nchunk -= sizeof *b;
|
||||
return b;
|
||||
}
|
||||
|
||||
// Get a full work buffer off the work.full list, or return nil.
|
||||
static Workbuf*
|
||||
getfull(Workbuf *b)
|
||||
{
|
||||
if(b != nil) {
|
||||
b->nw = 0;
|
||||
b->next = work.empty;
|
||||
work.empty = b;
|
||||
}
|
||||
b = work.full;
|
||||
if(b != nil)
|
||||
work.full = b->next;
|
||||
return b;
|
||||
}
|
||||
|
||||
// Scanstack calls scanblock on each of gp's stack segments.
|
||||
static void
|
||||
scanstack(G *gp)
|
||||
{
|
||||
@ -119,46 +343,26 @@ scanstack(G *gp)
|
||||
}
|
||||
}
|
||||
|
||||
// Markfin calls scanblock on the blocks that have finalizers:
|
||||
// the things pointed at cannot be freed until the finalizers have run.
|
||||
static void
|
||||
markfin(void *v)
|
||||
{
|
||||
uintptr size;
|
||||
uint32 *refp;
|
||||
|
||||
size = 0;
|
||||
refp = nil;
|
||||
if(!runtime·mlookup(v, &v, &size, nil, &refp) || !(*refp & RefHasFinalizer))
|
||||
if(!runtime·mlookup(v, &v, &size, nil) || !runtime·blockspecial(v))
|
||||
runtime·throw("mark - finalizer inconsistency");
|
||||
|
||||
// do not mark the finalizer block itself. just mark the things it points at.
|
||||
scanblock(v, size);
|
||||
}
|
||||
|
||||
// Mark
|
||||
static void
|
||||
mark(void)
|
||||
{
|
||||
G *gp;
|
||||
uintptr blsize, nobj;
|
||||
|
||||
// Figure out how big an object stack we need.
|
||||
// Get a new one if we need more than we have
|
||||
// or we need significantly less than we have.
|
||||
nobj = mstats.heap_objects;
|
||||
if(nobj > ebl - bl || nobj < (ebl-bl)/4) {
|
||||
if(bl != nil)
|
||||
runtime·SysFree(bl, (byte*)ebl - (byte*)bl);
|
||||
|
||||
// While we're allocated a new object stack,
|
||||
// add 20% headroom and also round up to
|
||||
// the nearest page boundary, since mmap
|
||||
// will anyway.
|
||||
nobj = nobj * 12/10;
|
||||
blsize = nobj * sizeof *bl;
|
||||
blsize = (blsize + 4095) & ~4095;
|
||||
nobj = blsize / sizeof *bl;
|
||||
bl = runtime·SysAlloc(blsize);
|
||||
ebl = bl + nobj;
|
||||
}
|
||||
|
||||
// mark data+bss.
|
||||
// skip runtime·mheap itself, which has no interesting pointers
|
||||
@ -192,95 +396,83 @@ mark(void)
|
||||
runtime·walkfintab(markfin);
|
||||
}
|
||||
|
||||
// free RefNone, free & queue finalizers for RefNone|RefHasFinalizer, reset RefSome
|
||||
static void
|
||||
sweepspan(MSpan *s)
|
||||
{
|
||||
int32 n, npages, size;
|
||||
byte *p;
|
||||
uint32 ref, *gcrefp, *gcrefep;
|
||||
MCache *c;
|
||||
Finalizer *f;
|
||||
|
||||
p = (byte*)(s->start << PageShift);
|
||||
if(s->sizeclass == 0) {
|
||||
// Large block.
|
||||
ref = s->gcref0;
|
||||
switch(ref & ~(RefFlags^RefHasFinalizer)) {
|
||||
case RefNone:
|
||||
// Free large object.
|
||||
mstats.alloc -= s->npages<<PageShift;
|
||||
mstats.nfree++;
|
||||
runtime·memclr(p, s->npages<<PageShift);
|
||||
if(ref & RefProfiled)
|
||||
runtime·MProf_Free(p, s->npages<<PageShift);
|
||||
s->gcref0 = RefFree;
|
||||
runtime·MHeap_Free(&runtime·mheap, s, 1);
|
||||
break;
|
||||
case RefNone|RefHasFinalizer:
|
||||
f = runtime·getfinalizer(p, 1);
|
||||
if(f == nil)
|
||||
runtime·throw("finalizer inconsistency");
|
||||
f->arg = p;
|
||||
f->next = finq;
|
||||
finq = f;
|
||||
ref &= ~RefHasFinalizer;
|
||||
// fall through
|
||||
case RefSome:
|
||||
case RefSome|RefHasFinalizer:
|
||||
s->gcref0 = RefNone | (ref&RefFlags);
|
||||
break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Chunk full of small blocks.
|
||||
runtime·MGetSizeClassInfo(s->sizeclass, &size, &npages, &n);
|
||||
gcrefp = s->gcref;
|
||||
gcrefep = s->gcref + n;
|
||||
for(; gcrefp < gcrefep; gcrefp++, p += size) {
|
||||
ref = *gcrefp;
|
||||
if(ref < RefNone) // RefFree or RefStack
|
||||
continue;
|
||||
switch(ref & ~(RefFlags^RefHasFinalizer)) {
|
||||
case RefNone:
|
||||
// Free small object.
|
||||
if(ref & RefProfiled)
|
||||
runtime·MProf_Free(p, size);
|
||||
*gcrefp = RefFree;
|
||||
c = m->mcache;
|
||||
if(size > sizeof(uintptr))
|
||||
((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
|
||||
mstats.alloc -= size;
|
||||
mstats.nfree++;
|
||||
mstats.by_size[s->sizeclass].nfree++;
|
||||
runtime·MCache_Free(c, p, s->sizeclass, size);
|
||||
break;
|
||||
case RefNone|RefHasFinalizer:
|
||||
f = runtime·getfinalizer(p, 1);
|
||||
if(f == nil)
|
||||
runtime·throw("finalizer inconsistency");
|
||||
f->arg = p;
|
||||
f->next = finq;
|
||||
finq = f;
|
||||
ref &= ~RefHasFinalizer;
|
||||
// fall through
|
||||
case RefSome:
|
||||
case RefSome|RefHasFinalizer:
|
||||
*gcrefp = RefNone | (ref&RefFlags);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep frees or calls finalizers for blocks not marked in the mark phase.
|
||||
// It clears the mark bits in preparation for the next GC round.
|
||||
static void
|
||||
sweep(void)
|
||||
{
|
||||
MSpan *s;
|
||||
int32 cl, n, npages;
|
||||
uintptr size;
|
||||
byte *p;
|
||||
MCache *c;
|
||||
Finalizer *f;
|
||||
|
||||
for(s = runtime·mheap.allspans; s != nil; s = s->allnext)
|
||||
if(s->state == MSpanInUse)
|
||||
sweepspan(s);
|
||||
for(s = runtime·mheap.allspans; s != nil; s = s->allnext) {
|
||||
if(s->state != MSpanInUse)
|
||||
continue;
|
||||
|
||||
p = (byte*)(s->start << PageShift);
|
||||
cl = s->sizeclass;
|
||||
if(cl == 0) {
|
||||
size = s->npages<<PageShift;
|
||||
n = 1;
|
||||
} else {
|
||||
// Chunk full of small blocks.
|
||||
size = runtime·class_to_size[cl];
|
||||
npages = runtime·class_to_allocnpages[cl];
|
||||
n = (npages << PageShift) / size;
|
||||
}
|
||||
|
||||
// sweep through n objects of given size starting at p.
|
||||
for(; n > 0; n--, p += size) {
|
||||
uintptr off, *bitp, shift, bits;
|
||||
|
||||
off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;
|
||||
bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
bits = *bitp>>shift;
|
||||
|
||||
if((bits & bitAllocated) == 0)
|
||||
continue;
|
||||
|
||||
if((bits & bitMarked) != 0) {
|
||||
*bitp &= ~(bitMarked<<shift);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((bits & bitSpecial) != 0) {
|
||||
// Special means it has a finalizer or is being profiled.
|
||||
f = runtime·getfinalizer(p, 1);
|
||||
if(f != nil) {
|
||||
f->arg = p;
|
||||
f->next = finq;
|
||||
finq = f;
|
||||
continue;
|
||||
}
|
||||
runtime·MProf_Free(p, size);
|
||||
}
|
||||
|
||||
// Mark freed; restore block boundary bit.
|
||||
*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
|
||||
|
||||
if(s->sizeclass == 0) {
|
||||
// Free large span.
|
||||
runtime·unmarkspan(p, 1<<PageShift);
|
||||
*(uintptr*)p = 1; // needs zeroing
|
||||
runtime·MHeap_Free(&runtime·mheap, s, 1);
|
||||
} else {
|
||||
// Free small object.
|
||||
c = m->mcache;
|
||||
if(size > sizeof(uintptr))
|
||||
((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
|
||||
mstats.by_size[s->sizeclass].nfree++;
|
||||
runtime·MCache_Free(c, p, s->sizeclass, size);
|
||||
}
|
||||
mstats.alloc -= size;
|
||||
mstats.nfree++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Semaphore, not Lock, so that the goroutine
|
||||
@ -326,7 +518,8 @@ cachestats(void)
|
||||
void
|
||||
runtime·gc(int32 force)
|
||||
{
|
||||
int64 t0, t1;
|
||||
int64 t0, t1, t2, t3;
|
||||
uint64 heap0, heap1, obj0, obj1;
|
||||
byte *p;
|
||||
Finalizer *fp;
|
||||
|
||||
@ -349,23 +542,41 @@ runtime·gc(int32 force)
|
||||
gcpercent = -1;
|
||||
else
|
||||
gcpercent = runtime·atoi(p);
|
||||
|
||||
p = runtime·getenv("GOGCTRACE");
|
||||
if(p != nil)
|
||||
gctrace = runtime·atoi(p);
|
||||
}
|
||||
if(gcpercent < 0)
|
||||
return;
|
||||
|
||||
runtime·semacquire(&gcsema);
|
||||
if(!force && mstats.heap_alloc < mstats.next_gc) {
|
||||
runtime·semrelease(&gcsema);
|
||||
return;
|
||||
}
|
||||
|
||||
t0 = runtime·nanotime();
|
||||
nlookup = 0;
|
||||
nsizelookup = 0;
|
||||
naddrlookup = 0;
|
||||
|
||||
m->gcing = 1;
|
||||
runtime·stoptheworld();
|
||||
if(runtime·mheap.Lock.key != 0)
|
||||
runtime·throw("runtime·mheap locked during gc");
|
||||
if(force || mstats.heap_alloc >= mstats.next_gc) {
|
||||
|
||||
cachestats();
|
||||
heap0 = mstats.heap_alloc;
|
||||
obj0 = mstats.nmalloc - mstats.nfree;
|
||||
|
||||
mark();
|
||||
t1 = runtime·nanotime();
|
||||
sweep();
|
||||
t2 = runtime·nanotime();
|
||||
stealcache();
|
||||
|
||||
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
|
||||
}
|
||||
m->gcing = 0;
|
||||
|
||||
m->locks++; // disable gc during the mallocs in newproc
|
||||
@ -381,18 +592,34 @@ runtime·gc(int32 force)
|
||||
}
|
||||
m->locks--;
|
||||
|
||||
t1 = runtime·nanotime();
|
||||
cachestats();
|
||||
heap1 = mstats.heap_alloc;
|
||||
obj1 = mstats.nmalloc - mstats.nfree;
|
||||
|
||||
t3 = runtime·nanotime();
|
||||
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t3 - t0;
|
||||
mstats.pause_total_ns += t3 - t0;
|
||||
mstats.numgc++;
|
||||
mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t1 - t0;
|
||||
mstats.pause_total_ns += t1 - t0;
|
||||
if(mstats.debuggc)
|
||||
runtime·printf("pause %D\n", t1-t0);
|
||||
runtime·printf("pause %D\n", t3-t0);
|
||||
|
||||
if(gctrace) {
|
||||
runtime·printf("gc%d: %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr)\n",
|
||||
mstats.numgc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
|
||||
heap0>>20, heap1>>20, obj0, obj1,
|
||||
mstats.nmalloc, mstats.nfree,
|
||||
nlookup, nsizelookup, naddrlookup);
|
||||
}
|
||||
|
||||
runtime·semrelease(&gcsema);
|
||||
runtime·starttheworld();
|
||||
|
||||
// give the queued finalizers, if any, a chance to run
|
||||
if(fp != nil)
|
||||
runtime·gosched();
|
||||
|
||||
if(gctrace > 1 && !force)
|
||||
runtime·gc(1);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -430,3 +657,157 @@ runfinq(void)
|
||||
runtime·gc(1); // trigger another gc to clean up the finalized objects, if possible
|
||||
}
|
||||
}
|
||||
|
||||
// mark the block at v of size n as allocated.
|
||||
// If noptr is true, mark it as having no pointers.
|
||||
void
|
||||
runtime·markallocated(void *v, uintptr n, bool noptr)
|
||||
{
|
||||
uintptr *b, bits, off, shift;
|
||||
|
||||
if(0)
|
||||
runtime·printf("markallocated %p+%p\n", v, n);
|
||||
|
||||
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
|
||||
runtime·throw("markallocated: bad pointer");
|
||||
|
||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
|
||||
bits = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift);
|
||||
if(noptr)
|
||||
bits |= bitNoPointers<<shift;
|
||||
*b = bits;
|
||||
}
|
||||
|
||||
// mark the block at v of size n as freed.
|
||||
void
|
||||
runtime·markfreed(void *v, uintptr n)
|
||||
{
|
||||
uintptr *b, off, shift;
|
||||
|
||||
if(0)
|
||||
runtime·printf("markallocated %p+%p\n", v, n);
|
||||
|
||||
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
|
||||
runtime·throw("markallocated: bad pointer");
|
||||
|
||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
|
||||
*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
|
||||
}
|
||||
|
||||
// check that the block at v of size n is marked freed.
|
||||
void
|
||||
runtime·checkfreed(void *v, uintptr n)
|
||||
{
|
||||
uintptr *b, bits, off, shift;
|
||||
|
||||
if(!runtime·checking)
|
||||
return;
|
||||
|
||||
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
|
||||
return; // not allocated, so okay
|
||||
|
||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
|
||||
bits = *b>>shift;
|
||||
if((bits & bitAllocated) != 0) {
|
||||
runtime·printf("checkfreed %p+%p: off=%p have=%p\n",
|
||||
v, n, off, bits & bitMask);
|
||||
runtime·throw("checkfreed: not freed");
|
||||
}
|
||||
}
|
||||
|
||||
// mark the span of memory at v as having n blocks of the given size.
|
||||
// if leftover is true, there is left over space at the end of the span.
|
||||
void
|
||||
runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
|
||||
{
|
||||
uintptr *b, off, shift;
|
||||
byte *p;
|
||||
|
||||
if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
|
||||
runtime·throw("markspan: bad pointer");
|
||||
|
||||
p = v;
|
||||
if(leftover) // mark a boundary just past end of last block too
|
||||
n++;
|
||||
for(; n-- > 0; p += size) {
|
||||
off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start; // word offset
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
|
||||
}
|
||||
}
|
||||
|
||||
// unmark the span of memory at v of length n bytes.
|
||||
void
|
||||
runtime·unmarkspan(void *v, uintptr n)
|
||||
{
|
||||
uintptr *p, *b, off;
|
||||
|
||||
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
|
||||
runtime·throw("markspan: bad pointer");
|
||||
|
||||
p = v;
|
||||
off = p - (uintptr*)runtime·mheap.arena_start; // word offset
|
||||
if(off % wordsPerBitmapWord != 0)
|
||||
runtime·throw("markspan: unaligned pointer");
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
n /= PtrSize;
|
||||
if(n%wordsPerBitmapWord != 0)
|
||||
runtime·throw("unmarkspan: unaligned length");
|
||||
n /= wordsPerBitmapWord;
|
||||
while(n-- > 0)
|
||||
*b-- = 0;
|
||||
}
|
||||
|
||||
bool
|
||||
runtime·blockspecial(void *v)
|
||||
{
|
||||
uintptr *b, off, shift;
|
||||
|
||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
|
||||
return (*b & (bitSpecial<<shift)) != 0;
|
||||
}
|
||||
|
||||
void
|
||||
runtime·setblockspecial(void *v)
|
||||
{
|
||||
uintptr *b, off, shift;
|
||||
|
||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
|
||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||
shift = off % wordsPerBitmapWord;
|
||||
|
||||
*b |= bitSpecial<<shift;
|
||||
}
|
||||
|
||||
void
|
||||
runtime·MHeap_MapBits(MHeap *h)
|
||||
{
|
||||
// Caller has added extra mappings to the arena.
|
||||
// Add extra mappings of bitmap words as needed.
|
||||
// We allocate extra bitmap pieces in chunks of bitmapChunk.
|
||||
enum {
|
||||
bitmapChunk = 8192
|
||||
};
|
||||
uintptr n;
|
||||
|
||||
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
|
||||
n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
|
||||
if(h->bitmap_mapped >= n)
|
||||
return;
|
||||
|
||||
runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped);
|
||||
h->bitmap_mapped = n;
|
||||
}
|
||||
|
@ -180,7 +180,9 @@ MHeap_Grow(MHeap *h, uintptr npage)
|
||||
// Allocate a multiple of 64kB (16 pages).
|
||||
npage = (npage+15)&~15;
|
||||
ask = npage<<PageShift;
|
||||
if(ask < HeapAllocChunk)
|
||||
if(ask > h->arena_end - h->arena_used)
|
||||
return false;
|
||||
if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used)
|
||||
ask = HeapAllocChunk;
|
||||
|
||||
v = runtime·MHeap_SysAlloc(h, ask);
|
||||
@ -194,11 +196,6 @@ MHeap_Grow(MHeap *h, uintptr npage)
|
||||
}
|
||||
mstats.heap_sys += ask;
|
||||
|
||||
if((byte*)v < h->arena_start || h->arena_start == nil)
|
||||
h->arena_start = v;
|
||||
if((byte*)v+ask > h->arena_end)
|
||||
h->arena_end = (byte*)v+ask;
|
||||
|
||||
// Create a fake "in use" span and free it, so that the
|
||||
// right coalescing happens.
|
||||
s = runtime·FixAlloc_Alloc(&h->spanalloc);
|
||||
@ -370,10 +367,14 @@ runtime·MSpanList_IsEmpty(MSpan *list)
|
||||
void
|
||||
runtime·MSpanList_Insert(MSpan *list, MSpan *span)
|
||||
{
|
||||
if(span->next != nil || span->prev != nil)
|
||||
if(span->next != nil || span->prev != nil) {
|
||||
runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
|
||||
runtime·throw("MSpanList_Insert");
|
||||
}
|
||||
span->next = list->next;
|
||||
span->prev = list;
|
||||
span->next->prev = span;
|
||||
span->prev->next = span;
|
||||
}
|
||||
|
||||
|
||||
|
@ -65,7 +65,7 @@ stkbucket(uintptr *stk, int32 nstk)
|
||||
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
|
||||
return b;
|
||||
|
||||
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1);
|
||||
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
|
||||
bucketmem += sizeof *b + nstk*sizeof stk[0];
|
||||
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
|
||||
b->hash = h;
|
||||
@ -132,7 +132,7 @@ setaddrbucket(uintptr addr, Bucket *b)
|
||||
if(ah->addr == (addr>>20))
|
||||
goto found;
|
||||
|
||||
ah = runtime·mallocgc(sizeof *ah, RefNoProfiling, 0, 1);
|
||||
ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
|
||||
addrmem += sizeof *ah;
|
||||
ah->next = addrhash[h];
|
||||
ah->addr = addr>>20;
|
||||
@ -140,7 +140,7 @@ setaddrbucket(uintptr addr, Bucket *b)
|
||||
|
||||
found:
|
||||
if((e = addrfree) == nil) {
|
||||
e = runtime·mallocgc(64*sizeof *e, RefNoProfiling, 0, 0);
|
||||
e = runtime·mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
|
||||
addrmem += 64*sizeof *e;
|
||||
for(i=0; i+1<64; i++)
|
||||
e[i].next = &e[i+1];
|
||||
|
@ -57,7 +57,7 @@ runtime·SizeToClass(int32 size)
|
||||
void
|
||||
runtime·InitSizes(void)
|
||||
{
|
||||
int32 align, sizeclass, size, osize, nextsize, n;
|
||||
int32 align, sizeclass, size, nextsize, n;
|
||||
uint32 i;
|
||||
uintptr allocsize, npages;
|
||||
|
||||
@ -81,8 +81,7 @@ runtime·InitSizes(void)
|
||||
// the leftover is less than 1/8 of the total,
|
||||
// so wasted space is at most 12.5%.
|
||||
allocsize = PageSize;
|
||||
osize = size + RefcountOverhead;
|
||||
while(allocsize%osize > (allocsize/8))
|
||||
while(allocsize%size > allocsize/8)
|
||||
allocsize += PageSize;
|
||||
npages = allocsize >> PageShift;
|
||||
|
||||
@ -93,7 +92,7 @@ runtime·InitSizes(void)
|
||||
// different sizes.
|
||||
if(sizeclass > 1
|
||||
&& npages == runtime·class_to_allocnpages[sizeclass-1]
|
||||
&& allocsize/osize == allocsize/(runtime·class_to_size[sizeclass-1]+RefcountOverhead)) {
|
||||
&& allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
|
||||
runtime·class_to_size[sizeclass-1] = size;
|
||||
continue;
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret)
|
||||
ret->cap = cap;
|
||||
|
||||
if((t->elem->kind&KindNoPointers))
|
||||
ret->array = runtime·mallocgc(size, RefNoPointers, 1, 1);
|
||||
ret->array = runtime·mallocgc(size, FlagNoPointers, 1, 1);
|
||||
else
|
||||
ret->array = runtime·mal(size);
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ func slicebytetostring(b Slice) (s String) {
|
||||
}
|
||||
|
||||
func stringtoslicebyte(s String) (b Slice) {
|
||||
b.array = runtime·mallocgc(s.len, RefNoPointers, 1, 1);
|
||||
b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1);
|
||||
b.len = s.len;
|
||||
b.cap = s.len;
|
||||
runtime·mcpy(b.array, s.str, s.len);
|
||||
@ -268,7 +268,7 @@ func stringtosliceint(s String) (b Slice) {
|
||||
n++;
|
||||
}
|
||||
|
||||
b.array = runtime·mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1);
|
||||
b.array = runtime·mallocgc(n*sizeof(r[0]), FlagNoPointers, 1, 1);
|
||||
b.len = n;
|
||||
b.cap = n;
|
||||
p = s.str;
|
||||
|
@ -48,7 +48,7 @@ runtime·SysFree(void *v, uintptr n)
|
||||
void*
|
||||
runtime·SysReserve(void *v, uintptr n)
|
||||
{
|
||||
return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, 0);
|
||||
return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
|
||||
}
|
||||
|
||||
void
|
||||
|
Loading…
Reference in New Issue
Block a user