1
0
mirror of https://github.com/golang/go synced 2024-10-04 22:21:22 -06:00
go/src/pkg/runtime/mprof.goc
Russ Cox e0deb2ef7f undo CL 7301062 / 9742f722b558
broke arm garbage collector

traceback_arm fails with a missing pc. It needs CL 7494043.
But that only makes the build break later, this time with
"invalid freelist". Roll back until it can be fixed correctly.

««« original CL description
runtime: restrict stack root scan to locals and arguments

R=rsc
CC=golang-dev
https://golang.org/cl/7301062
»»»

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/7493044
2013-03-05 15:36:40 -05:00

557 lines
12 KiB
Plaintext

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Malloc profiling.
// Patterned after tcmalloc's algorithms; shorter code.
package runtime
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
#include "defs_GOOS_GOARCH.h"
#include "type.h"
// NOTE(rsc): Everything here could use cas if contention became an issue.
static Lock proflock, alloclock;
// All memory allocations are local and do not escape outside of the profiler.
// The profiler is forbidden from referring to garbage-collected memory.
static byte *pool; // memory allocation pool
static uintptr poolfree; // number of bytes left in the pool
enum {
Chunk = 32*PageSize, // initial size of the pool
};
// Memory allocation local to this file.
// There is no way to return the allocated memory back to the OS.
static void*
allocate(uintptr size)
{
void *v;
if(size == 0)
return nil;
if(size >= Chunk/2)
return runtime·SysAlloc(size);
runtime·lock(&alloclock);
if(size > poolfree) {
pool = runtime·SysAlloc(Chunk);
if(pool == nil)
runtime·throw("runtime: cannot allocate memory");
poolfree = Chunk;
}
v = pool;
pool += size;
poolfree -= size;
runtime·unlock(&alloclock);
return v;
}
enum { MProf, BProf }; // profile types
// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table.
typedef struct Bucket Bucket;
struct Bucket
{
Bucket *next; // next in hash list
Bucket *allnext; // next in list of all mbuckets/bbuckets
int32 typ;
union
{
struct // typ == MProf
{
uintptr allocs;
uintptr frees;
uintptr alloc_bytes;
uintptr free_bytes;
uintptr recent_allocs; // since last gc
uintptr recent_frees;
uintptr recent_alloc_bytes;
uintptr recent_free_bytes;
};
struct // typ == BProf
{
int64 count;
int64 cycles;
};
};
uintptr hash;
uintptr nstk;
uintptr stk[1];
};
enum {
BuckHashSize = 179999,
};
static Bucket **buckhash;
static Bucket *mbuckets; // memory profile buckets
static Bucket *bbuckets; // blocking profile buckets
static uintptr bucketmem;
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
static Bucket*
stkbucket(int32 typ, uintptr *stk, int32 nstk, bool alloc)
{
int32 i;
uintptr h;
Bucket *b;
if(buckhash == nil) {
buckhash = runtime·SysAlloc(BuckHashSize*sizeof buckhash[0]);
if(buckhash == nil)
runtime·throw("runtime: cannot allocate memory");
mstats.buckhash_sys += BuckHashSize*sizeof buckhash[0];
}
// Hash stack.
h = 0;
for(i=0; i<nstk; i++) {
h += stk[i];
h += h<<10;
h ^= h>>6;
}
h += h<<3;
h ^= h>>11;
i = h%BuckHashSize;
for(b = buckhash[i]; b; b=b->next)
if(b->typ == typ && b->hash == h && b->nstk == nstk &&
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b;
if(!alloc)
return nil;
b = allocate(sizeof *b + nstk*sizeof stk[0]);
if(b == nil)
runtime·throw("runtime: cannot allocate memory");
bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
b->typ = typ;
b->hash = h;
b->nstk = nstk;
b->next = buckhash[i];
buckhash[i] = b;
if(typ == MProf) {
b->allnext = mbuckets;
mbuckets = b;
} else {
b->allnext = bbuckets;
bbuckets = b;
}
return b;
}
static void
MProf_GC(void)
{
Bucket *b;
for(b=mbuckets; b; b=b->allnext) {
b->allocs += b->recent_allocs;
b->frees += b->recent_frees;
b->alloc_bytes += b->recent_alloc_bytes;
b->free_bytes += b->recent_free_bytes;
b->recent_allocs = 0;
b->recent_frees = 0;
b->recent_alloc_bytes = 0;
b->recent_free_bytes = 0;
}
}
// Record that a gc just happened: all the 'recent' statistics are now real.
void
runtime·MProf_GC(void)
{
runtime·lock(&proflock);
MProf_GC();
runtime·unlock(&proflock);
}
// Map from pointer to Bucket* that allocated it.
// Three levels:
// Linked-list hash table for top N-AddrHashShift bits.
// Array index for next AddrDenseBits bits.
// Linked list for next AddrHashShift-AddrDenseBits bits.
// This is more efficient than using a general map,
// because of the typical clustering of the pointer keys.
typedef struct AddrHash AddrHash;
typedef struct AddrEntry AddrEntry;
enum {
AddrHashBits = 12, // good for 4GB of used address space
AddrHashShift = 20, // each AddrHash knows about 1MB of address space
AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
};
struct AddrHash
{
AddrHash *next; // next in top-level hash table linked list
uintptr addr; // addr>>20
AddrEntry *dense[1<<AddrDenseBits];
};
struct AddrEntry
{
AddrEntry *next; // next in bottom-level linked list
uint32 addr;
Bucket *b;
};
static AddrHash **addrhash; // points to (AddrHash*)[1<<AddrHashBits]
static AddrEntry *addrfree;
static uintptr addrmem;
// Multiplicative hash function:
// hashMultiplier is the bottom 32 bits of int((sqrt(5)-1)/2 * (1<<32)).
// This is a good multiplier as suggested in CLR, Knuth. The hash
// value is taken to be the top AddrHashBits bits of the bottom 32 bits
// of the multiplied value.
enum {
HashMultiplier = 2654435769U
};
// Set the bucket associated with addr to b.
static void
setaddrbucket(uintptr addr, Bucket *b)
{
int32 i;
uint32 h;
AddrHash *ah;
AddrEntry *e;
h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next)
if(ah->addr == (addr>>AddrHashShift))
goto found;
ah = allocate(sizeof *ah);
addrmem += sizeof *ah;
ah->next = addrhash[h];
ah->addr = addr>>AddrHashShift;
addrhash[h] = ah;
found:
if((e = addrfree) == nil) {
e = allocate(64*sizeof *e);
addrmem += 64*sizeof *e;
for(i=0; i+1<64; i++)
e[i].next = &e[i+1];
e[63].next = nil;
}
addrfree = e->next;
e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
e->b = b;
h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
e->next = ah->dense[h];
ah->dense[h] = e;
}
// Get the bucket associated with addr and clear the association.
static Bucket*
getaddrbucket(uintptr addr)
{
uint32 h;
AddrHash *ah;
AddrEntry *e, **l;
Bucket *b;
h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next)
if(ah->addr == (addr>>AddrHashShift))
goto found;
return nil;
found:
h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
*l = e->next;
b = e->b;
e->next = addrfree;
addrfree = e;
return b;
}
}
return nil;
}
// Called by malloc to record a profiled block.
void
runtime·MProf_Malloc(void *p, uintptr size)
{
int32 nstk;
uintptr stk[32];
Bucket *b;
if(m->nomemprof > 0)
return;
m->nomemprof++;
nstk = runtime·callers(1, stk, 32);
runtime·lock(&proflock);
b = stkbucket(MProf, stk, nstk, true);
b->recent_allocs++;
b->recent_alloc_bytes += size;
setaddrbucket((uintptr)p, b);
runtime·unlock(&proflock);
m->nomemprof--;
}
// Called when freeing a profiled block.
void
runtime·MProf_Free(void *p, uintptr size)
{
Bucket *b;
if(m->nomemprof > 0)
return;
m->nomemprof++;
runtime·lock(&proflock);
b = getaddrbucket((uintptr)p);
if(b != nil) {
b->recent_frees++;
b->recent_free_bytes += size;
}
runtime·unlock(&proflock);
m->nomemprof--;
}
int64 runtime·blockprofilerate; // in CPU ticks
void
runtime·SetBlockProfileRate(intgo rate)
{
runtime·atomicstore64((uint64*)&runtime·blockprofilerate, rate * runtime·tickspersecond() / (1000*1000*1000));
}
void
runtime·blockevent(int64 cycles, int32 skip)
{
int32 nstk;
int64 rate;
uintptr stk[32];
Bucket *b;
if(cycles <= 0)
return;
rate = runtime·atomicload64((uint64*)&runtime·blockprofilerate);
if(rate <= 0 || (rate > cycles && runtime·fastrand1()%rate > cycles))
return;
nstk = runtime·callers(skip, stk, 32);
runtime·lock(&proflock);
b = stkbucket(BProf, stk, nstk, true);
b->count++;
b->cycles += cycles;
runtime·unlock(&proflock);
}
// Go interface to profile data. (Declared in debug.go)
// Must match MemProfileRecord in debug.go.
typedef struct Record Record;
struct Record {
int64 alloc_bytes, free_bytes;
int64 alloc_objects, free_objects;
uintptr stk[32];
};
// Write b's data to r.
static void
record(Record *r, Bucket *b)
{
int32 i;
r->alloc_bytes = b->alloc_bytes;
r->free_bytes = b->free_bytes;
r->alloc_objects = b->allocs;
r->free_objects = b->frees;
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
r->stk[i] = b->stk[i];
for(; i<nelem(r->stk); i++)
r->stk[i] = 0;
}
func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) {
Bucket *b;
Record *r;
bool clear;
runtime·lock(&proflock);
n = 0;
clear = true;
for(b=mbuckets; b; b=b->allnext) {
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
n++;
if(b->allocs != 0 || b->frees != 0)
clear = false;
}
if(clear) {
// Absolutely no data, suggesting that a garbage collection
// has not yet happened. In order to allow profiling when
// garbage collection is disabled from the beginning of execution,
// accumulate stats as if a GC just happened, and recount buckets.
MProf_GC();
n = 0;
for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
n++;
}
ok = false;
if(n <= p.len) {
ok = true;
r = (Record*)p.array;
for(b=mbuckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
record(r++, b);
}
runtime·unlock(&proflock);
}
// Must match BlockProfileRecord in debug.go.
typedef struct BRecord BRecord;
struct BRecord {
int64 count;
int64 cycles;
uintptr stk[32];
};
func BlockProfile(p Slice) (n int, ok bool) {
Bucket *b;
BRecord *r;
int32 i;
runtime·lock(&proflock);
n = 0;
for(b=bbuckets; b; b=b->allnext)
n++;
ok = false;
if(n <= p.len) {
ok = true;
r = (BRecord*)p.array;
for(b=bbuckets; b; b=b->allnext, r++) {
r->count = b->count;
r->cycles = b->cycles;
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
r->stk[i] = b->stk[i];
for(; i<nelem(r->stk); i++)
r->stk[i] = 0;
}
}
runtime·unlock(&proflock);
}
// Must match StackRecord in debug.go.
typedef struct TRecord TRecord;
struct TRecord {
uintptr stk[32];
};
func ThreadCreateProfile(p Slice) (n int, ok bool) {
TRecord *r;
M *first, *mp;
first = runtime·atomicloadp(&runtime·allm);
n = 0;
for(mp=first; mp; mp=mp->alllink)
n++;
ok = false;
if(n <= p.len) {
ok = true;
r = (TRecord*)p.array;
for(mp=first; mp; mp=mp->alllink) {
runtime·memmove(r->stk, mp->createstack, sizeof r->stk);
r++;
}
}
}
func Stack(b Slice, all bool) (n int) {
byte *pc, *sp;
sp = runtime·getcallersp(&b);
pc = runtime·getcallerpc(&b);
if(all) {
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
}
if(b.len == 0)
n = 0;
else{
g->writebuf = (byte*)b.array;
g->writenbuf = b.len;
runtime·goroutineheader(g);
runtime·traceback(pc, sp, 0, g);
if(all)
runtime·tracebackothers(g);
n = b.len - g->writenbuf;
g->writebuf = nil;
g->writenbuf = 0;
}
if(all) {
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);
runtime·starttheworld();
}
}
static void
saveg(byte *pc, byte *sp, G *gp, TRecord *r)
{
int32 n;
n = runtime·gentraceback(pc, sp, 0, gp, 0, r->stk, nelem(r->stk));
if(n < nelem(r->stk))
r->stk[n] = 0;
}
func GoroutineProfile(b Slice) (n int, ok bool) {
byte *pc, *sp;
TRecord *r;
G *gp;
sp = runtime·getcallersp(&b);
pc = runtime·getcallerpc(&b);
ok = false;
n = runtime·gcount();
if(n <= b.len) {
runtime·semacquire(&runtime·worldsema);
m->gcing = 1;
runtime·stoptheworld();
n = runtime·gcount();
if(n <= b.len) {
ok = true;
r = (TRecord*)b.array;
saveg(pc, sp, g, r++);
for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
if(gp == g || gp->status == Gdead)
continue;
saveg(gp->sched.pc, (byte*)gp->sched.sp, gp, r++);
}
}
m->gcing = 0;
runtime·semrelease(&runtime·worldsema);
runtime·starttheworld();
}
}
void
runtime·mprofinit(void)
{
addrhash = allocate((1<<AddrHashBits)*sizeof *addrhash);
}