mirror of
https://github.com/golang/go
synced 2024-10-04 22:21:22 -06:00
e0deb2ef7f
broke arm garbage collector traceback_arm fails with a missing pc. It needs CL 7494043. But that only makes the build break later, this time with "invalid freelist". Roll back until it can be fixed correctly. ««« original CL description runtime: restrict stack root scan to locals and arguments R=rsc CC=golang-dev https://golang.org/cl/7301062 »»» R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/7493044
557 lines
12 KiB
Plaintext
557 lines
12 KiB
Plaintext
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Malloc profiling.
|
|
// Patterned after tcmalloc's algorithms; shorter code.
|
|
|
|
package runtime
|
|
#include "runtime.h"
|
|
#include "arch_GOARCH.h"
|
|
#include "malloc.h"
|
|
#include "defs_GOOS_GOARCH.h"
|
|
#include "type.h"
|
|
|
|
// NOTE(rsc): Everything here could use cas if contention became an issue.
|
|
static Lock proflock, alloclock;
|
|
|
|
// All memory allocations are local and do not escape outside of the profiler.
|
|
// The profiler is forbidden from referring to garbage-collected memory.
|
|
|
|
static byte *pool; // memory allocation pool
|
|
static uintptr poolfree; // number of bytes left in the pool
|
|
enum {
|
|
Chunk = 32*PageSize, // initial size of the pool
|
|
};
|
|
|
|
// Memory allocation local to this file.
|
|
// There is no way to return the allocated memory back to the OS.
|
|
static void*
|
|
allocate(uintptr size)
|
|
{
|
|
void *v;
|
|
|
|
if(size == 0)
|
|
return nil;
|
|
|
|
if(size >= Chunk/2)
|
|
return runtime·SysAlloc(size);
|
|
|
|
runtime·lock(&alloclock);
|
|
if(size > poolfree) {
|
|
pool = runtime·SysAlloc(Chunk);
|
|
if(pool == nil)
|
|
runtime·throw("runtime: cannot allocate memory");
|
|
poolfree = Chunk;
|
|
}
|
|
v = pool;
|
|
pool += size;
|
|
poolfree -= size;
|
|
runtime·unlock(&alloclock);
|
|
return v;
|
|
}
|
|
|
|
enum { MProf, BProf }; // profile types
|
|
|
|
// Per-call-stack profiling information.
|
|
// Lookup by hashing call stack into a linked-list hash table.
|
|
typedef struct Bucket Bucket;
|
|
struct Bucket
|
|
{
|
|
Bucket *next; // next in hash list
|
|
Bucket *allnext; // next in list of all mbuckets/bbuckets
|
|
int32 typ;
|
|
union
|
|
{
|
|
struct // typ == MProf
|
|
{
|
|
uintptr allocs;
|
|
uintptr frees;
|
|
uintptr alloc_bytes;
|
|
uintptr free_bytes;
|
|
uintptr recent_allocs; // since last gc
|
|
uintptr recent_frees;
|
|
uintptr recent_alloc_bytes;
|
|
uintptr recent_free_bytes;
|
|
};
|
|
struct // typ == BProf
|
|
{
|
|
int64 count;
|
|
int64 cycles;
|
|
};
|
|
};
|
|
uintptr hash;
|
|
uintptr nstk;
|
|
uintptr stk[1];
|
|
};
|
|
enum {
|
|
BuckHashSize = 179999,
|
|
};
|
|
static Bucket **buckhash;
|
|
static Bucket *mbuckets; // memory profile buckets
|
|
static Bucket *bbuckets; // blocking profile buckets
|
|
static uintptr bucketmem;
|
|
|
|
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
|
|
static Bucket*
|
|
stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
|
|
{
|
|
int32 i;
|
|
uintptr h;
|
|
Bucket *b;
|
|
|
|
if(buckhash == nil) {
|
|
buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]);
|
|
if(buckhash == nil)
|
|
runtime·throw("runtime: cannot allocate memory");
|
|
mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
|
|
}
|
|
|
|
// Hash stack.
|
|
h = 0;
|
|
for(i=0; i<nstk; i++) {
|
|
h += stk[i];
|
|
h += h<<10;
|
|
h ^= h>>6;
|
|
}
|
|
h += h<<3;
|
|
h ^= h>>11;
|
|
|
|
i = h%BuckHashSize;
|
|
for(b = buckhash[i]; b; b=b->next)
|
|
if(b->typ == typ && b->hash == h && b->nstk == nstk &&
|
|
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
|
|
return b;
|
|
|
|
if(!alloc)
|
|
return nil;
|
|
|
|
b = allocate(sizeof *b + nstk*sizeof stk[0]);
|
|
if(b == nil)
|
|
runtime·throw("runtime: cannot allocate memory");
|
|
bucketmem += sizeof *b + nstk*sizeof stk[0];
|
|
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
|
|
b->typ = typ;
|
|
b->hash = h;
|
|
b->nstk = nstk;
|
|
b->next = buckhash[i];
|
|
buckhash[i] = b;
|
|
if(typ == MProf) {
|
|
b->allnext = mbuckets;
|
|
mbuckets = b;
|
|
} else {
|
|
b->allnext = bbuckets;
|
|
bbuckets = b;
|
|
}
|
|
return b;
|
|
}
|
|
|
|
static void
|
|
MProf_GC(void)
|
|
{
|
|
Bucket *b;
|
|
|
|
for(b=mbuckets; b; b=b->allnext) {
|
|
b->allocs += b->recent_allocs;
|
|
b->frees += b->recent_frees;
|
|
b->alloc_bytes += b->recent_alloc_bytes;
|
|
b->free_bytes += b->recent_free_bytes;
|
|
b->recent_allocs = 0;
|
|
b->recent_frees = 0;
|
|
b->recent_alloc_bytes = 0;
|
|
b->recent_free_bytes = 0;
|
|
}
|
|
}
|
|
|
|
// Record that a gc just happened: all the 'recent' statistics are now real.
|
|
void
|
|
runtime·MProf_GC(void)
|
|
{
|
|
runtime·lock(&proflock);
|
|
MProf_GC();
|
|
runtime·unlock(&proflock);
|
|
}
|
|
|
|
// Map from pointer to Bucket* that allocated it.
|
|
// Three levels:
|
|
// Linked-list hash table for top N-AddrHashShift bits.
|
|
// Array index for next AddrDenseBits bits.
|
|
// Linked list for next AddrHashShift-AddrDenseBits bits.
|
|
// This is more efficient than using a general map,
|
|
// because of the typical clustering of the pointer keys.
|
|
|
|
typedef struct AddrHash AddrHash;
|
|
typedef struct AddrEntry AddrEntry;
|
|
|
|
enum {
|
|
AddrHashBits = 12, // good for 4GB of used address space
|
|
AddrHashShift = 20, // each AddrHash knows about 1MB of address space
|
|
AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
|
|
};
|
|
|
|
struct AddrHash
|
|
{
|
|
AddrHash *next; // next in top-level hash table linked list
|
|
uintptr addr; // addr>>20
|
|
AddrEntry *dense[1<<AddrDenseBits];
|
|
};
|
|
|
|
struct AddrEntry
|
|
{
|
|
AddrEntry *next; // next in bottom-level linked list
|
|
uint32 addr;
|
|
Bucket *b;
|
|
};
|
|
|
|
static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
|
|
static AddrEntry *addrfree;
|
|
static uintptr addrmem;
|
|
|
|
// Multiplicative hash function:
|
|
// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
|
|
// This is a good multiplier as suggested in CLR, Knuth. The hash
|
|
// value is taken to be the top AddrHashBits bits of the bottom 32 bits
|
|
// of the multiplied value.
|
|
enum {
|
|
HashMultiplier = 2654435769U
|
|
};
|
|
|
|
// Set the bucket associated with addr to b.
|
|
static void
|
|
setaddrbucket(uintptr addr, Bucket *b)
|
|
{
|
|
int32 i;
|
|
uint32 h;
|
|
AddrHash *ah;
|
|
AddrEntry *e;
|
|
|
|
h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
|
|
for(ah=addrhash[h]; ah; ah=ah->next)
|
|
if(ah->addr == (addr>>AddrHashShift))
|
|
goto found;
|
|
|
|
ah = allocate(sizeof *ah);
|
|
addrmem += sizeof *ah;
|
|
ah->next = addrhash[h];
|
|
ah->addr = addr>>AddrHashShift;
|
|
addrhash[h] = ah;
|
|
|
|
found:
|
|
if((e = addrfree) == nil) {
|
|
e = allocate(64*sizeof *e);
|
|
addrmem += 64*sizeof *e;
|
|
for(i=0; i+1<64; i++)
|
|
e[i].next = &e[i+1];
|
|
e[63].next = nil;
|
|
}
|
|
addrfree = e->next;
|
|
e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
|
|
e->b = b;
|
|
h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
|
|
e->next = ah->dense[h];
|
|
ah->dense[h] = e;
|
|
}
|
|
|
|
// Get the bucket associated with addr and clear the association.
|
|
static Bucket*
|
|
getaddrbucket(uintptr addr)
|
|
{
|
|
uint32 h;
|
|
AddrHash *ah;
|
|
AddrEntry *e, **l;
|
|
Bucket *b;
|
|
|
|
h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
|
|
for(ah=addrhash[h]; ah; ah=ah->next)
|
|
if(ah->addr == (addr>>AddrHashShift))
|
|
goto found;
|
|
return nil;
|
|
|
|
found:
|
|
h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
|
|
for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
|
|
if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
|
|
*l = e->next;
|
|
b = e->b;
|
|
e->next = addrfree;
|
|
addrfree = e;
|
|
return b;
|
|
}
|
|
}
|
|
return nil;
|
|
}
|
|
|
|
// Called by malloc to record a profiled block.
|
|
void
|
|
runtime·MProf_Malloc(void *p, uintptr size)
|
|
{
|
|
int32 nstk;
|
|
uintptr stk[32];
|
|
Bucket *b;
|
|
|
|
if(m->nomemprof > 0)
|
|
return;
|
|
|
|
m->nomemprof++;
|
|
nstk = runtime·callers(1, stk, 32);
|
|
runtime·lock(&proflock);
|
|
b = stkbucket(MProf, stk, nstk, true);
|
|
b->recent_allocs++;
|
|
b->recent_alloc_bytes += size;
|
|
setaddrbucket((uintptr)p, b);
|
|
runtime·unlock(&proflock);
|
|
m->nomemprof--;
|
|
}
|
|
|
|
// Called when freeing a profiled block.
|
|
void
|
|
runtime·MProf_Free(void *p, uintptr size)
|
|
{
|
|
Bucket *b;
|
|
|
|
if(m->nomemprof > 0)
|
|
return;
|
|
|
|
m->nomemprof++;
|
|
runtime·lock(&proflock);
|
|
b = getaddrbucket((uintptr)p);
|
|
if(b != nil) {
|
|
b->recent_frees++;
|
|
b->recent_free_bytes += size;
|
|
}
|
|
runtime·unlock(&proflock);
|
|
m->nomemprof--;
|
|
}
|
|
|
|
int64 runtime·blockprofilerate; // in CPU ticks
|
|
|
|
void
|
|
runtime·SetBlockProfileRate(intgo rate)
|
|
{
|
|
runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000));
|
|
}
|
|
|
|
void
|
|
runtime·blockevent(int64 cycles, int32 skip)
|
|
{
|
|
int32 nstk;
|
|
int64 rate;
|
|
uintptr stk[32];
|
|
Bucket *b;
|
|
|
|
if(cycles <= 0)
|
|
return;
|
|
rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
|
|
if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
|
|
return;
|
|
|
|
nstk = runtime·callers(skip, stk, 32);
|
|
runtime·lock(&proflock);
|
|
b = stkbucket(BProf, stk, nstk, true);
|
|
b->count++;
|
|
b->cycles += cycles;
|
|
runtime·unlock(&proflock);
|
|
}
|
|
|
|
// Go interface to profile data. (Declared in debug.go)
|
|
|
|
// Must match MemProfileRecord in debug.go.
|
|
typedef struct Record Record;
|
|
struct Record {
|
|
int64 alloc_bytes, free_bytes;
|
|
int64 alloc_objects, free_objects;
|
|
uintptr stk[32];
|
|
};
|
|
|
|
// Write b's data to r.
|
|
static void
|
|
record(Record *r, Bucket *b)
|
|
{
|
|
int32 i;
|
|
|
|
r->alloc_bytes = b->alloc_bytes;
|
|
r->free_bytes = b->free_bytes;
|
|
r->alloc_objects = b->allocs;
|
|
r->free_objects = b->frees;
|
|
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
|
|
r->stk[i] = b->stk[i];
|
|
for(; i<nelem(r->stk); i++)
|
|
r->stk[i] = 0;
|
|
}
|
|
|
|
func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
|
|
Bucket *b;
|
|
Record *r;
|
|
bool clear;
|
|
|
|
runtime·lock(&proflock);
|
|
n = 0;
|
|
clear = true;
|
|
for(b=mbuckets; b; b=b->allnext) {
|
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
|
n++;
|
|
if(b->allocs != 0 || b->frees != 0)
|
|
clear = false;
|
|
}
|
|
if(clear) {
|
|
// Absolutely no data, suggesting that a garbage collection
|
|
// has not yet happened. In order to allow profiling when
|
|
// garbage collection is disabled from the beginning of execution,
|
|
// accumulate stats as if a GC just happened, and recount buckets.
|
|
MProf_GC();
|
|
n = 0;
|
|
for(b=mbuckets; b; b=b->allnext)
|
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
|
n++;
|
|
}
|
|
ok = false;
|
|
if(n <= p.len) {
|
|
ok = true;
|
|
r = (Record*)p.array;
|
|
for(b=mbuckets; b; b=b->allnext)
|
|
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
|
|
record(r++, b);
|
|
}
|
|
runtime·unlock(&proflock);
|
|
}
|
|
|
|
// Must match BlockProfileRecord in debug.go.
|
|
typedef struct BRecord BRecord;
|
|
struct BRecord {
|
|
int64 count;
|
|
int64 cycles;
|
|
uintptr stk[32];
|
|
};
|
|
|
|
func BlockProfile(p Slice) (n int, ok bool) {
|
|
Bucket *b;
|
|
BRecord *r;
|
|
int32 i;
|
|
|
|
runtime·lock(&proflock);
|
|
n = 0;
|
|
for(b=bbuckets; b; b=b->allnext)
|
|
n++;
|
|
ok = false;
|
|
if(n <= p.len) {
|
|
ok = true;
|
|
r = (BRecord*)p.array;
|
|
for(b=bbuckets; b; b=b->allnext, r++) {
|
|
r->count = b->count;
|
|
r->cycles = b->cycles;
|
|
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
|
|
r->stk[i] = b->stk[i];
|
|
for(; i<nelem(r->stk); i++)
|
|
r->stk[i] = 0;
|
|
}
|
|
}
|
|
runtime·unlock(&proflock);
|
|
}
|
|
|
|
// Must match StackRecord in debug.go.
|
|
typedef struct TRecord TRecord;
|
|
struct TRecord {
|
|
uintptr stk[32];
|
|
};
|
|
|
|
func ThreadCreateProfile(p Slice) (n int, ok bool) {
|
|
TRecord *r;
|
|
M *first, *mp;
|
|
|
|
first = runtime·atomicloadp(&runtime·allm);
|
|
n = 0;
|
|
for(mp=first; mp; mp=mp->alllink)
|
|
n++;
|
|
ok = false;
|
|
if(n <= p.len) {
|
|
ok = true;
|
|
r = (TRecord*)p.array;
|
|
for(mp=first; mp; mp=mp->alllink) {
|
|
runtime·memmove(r->stk, mp->createstack, sizeof r->stk);
|
|
r++;
|
|
}
|
|
}
|
|
}
|
|
|
|
func Stack(b Slice, all bool) (n int) {
|
|
byte *pc, *sp;
|
|
|
|
sp = runtime·getcallersp(&b);
|
|
pc = runtime·getcallerpc(&b);
|
|
|
|
if(all) {
|
|
runtime·semacquire(&runtime·worldsema);
|
|
m->gcing = 1;
|
|
runtime·stoptheworld();
|
|
}
|
|
|
|
if(b.len == 0)
|
|
n = 0;
|
|
else{
|
|
g->writebuf = (byte*)b.array;
|
|
g->writenbuf = b.len;
|
|
runtime·goroutineheader(g);
|
|
runtime·traceback(pc, sp, 0, g);
|
|
if(all)
|
|
runtime·tracebackothers(g);
|
|
n = b.len - g->writenbuf;
|
|
g->writebuf = nil;
|
|
g->writenbuf = 0;
|
|
}
|
|
|
|
if(all) {
|
|
m->gcing = 0;
|
|
runtime·semrelease(&runtime·worldsema);
|
|
runtime·starttheworld();
|
|
}
|
|
}
|
|
|
|
static void
|
|
saveg(byte *pc, byte *sp, G *gp, TRecord *r)
|
|
{
|
|
int32 n;
|
|
|
|
n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk));
|
|
if(n < nelem(r->stk))
|
|
r->stk[n] = 0;
|
|
}
|
|
|
|
func GoroutineProfile(b Slice) (n int, ok bool) {
|
|
byte *pc, *sp;
|
|
TRecord *r;
|
|
G *gp;
|
|
|
|
sp = runtime·getcallersp(&b);
|
|
pc = runtime·getcallerpc(&b);
|
|
|
|
ok = false;
|
|
n = runtime·gcount();
|
|
if(n <= b.len) {
|
|
runtime·semacquire(&runtime·worldsema);
|
|
m->gcing = 1;
|
|
runtime·stoptheworld();
|
|
|
|
n = runtime·gcount();
|
|
if(n <= b.len) {
|
|
ok = true;
|
|
r = (TRecord*)b.array;
|
|
saveg(pc, sp, g, r++);
|
|
for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
|
|
if(gp == g || gp->status == Gdead)
|
|
continue;
|
|
saveg(gp->sched.pc, (byte*)gp->sched.sp, gp, r++);
|
|
}
|
|
}
|
|
|
|
m->gcing = 0;
|
|
runtime·semrelease(&runtime·worldsema);
|
|
runtime·starttheworld();
|
|
}
|
|
}
|
|
|
|
void
|
|
runtime·mprofinit(void)
|
|
{
|
|
addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);
|
|
}
|