1
0
mirror of https://github.com/golang/go synced 2024-10-02 18:18:33 -06:00

runtime: lower memory overhead of heap profiling.

The previous code was preparing arrays of entries that would be
filled if there was one entry every 128 bytes. Moving to a 4096
byte interval reduces the overhead per megabyte of address space
to 2kB from 64kB (on 64-bit systems).
The performance impact will be negative for very small MemProfileRate.

test/bench/garbage/tree2 -heapsize 800000000 (default memprofilerate)
Before: mprof 65993056 bytes (1664 bucketmem + 65991392 addrmem)
After:  mprof  1989984 bytes (1680 bucketmem +  1988304 addrmem)

R=golang-dev, rsc
CC=golang-dev, remy
https://golang.org/cl/6257069
This commit is contained in:
Rémy Oudompheng 2012-05-31 23:30:55 +02:00
parent 29e32d73ef
commit baf91c313f

View File

@ -107,20 +107,26 @@ runtime·MProf_GC(void)
// Map from pointer to Bucket* that allocated it. // Map from pointer to Bucket* that allocated it.
// Three levels: // Three levels:
// Linked-list hash table for top N-20 bits. // Linked-list hash table for top N-AddrHashShift bits.
// Array index for next 13 bits. // Array index for next AddrDenseBits bits.
// Linked list for next 7 bits. // Linked list for next AddrHashShift-AddrDenseBits bits.
// This is more efficient than using a general map, // This is more efficient than using a general map,
// because of the typical clustering of the pointer keys. // because of the typical clustering of the pointer keys.
typedef struct AddrHash AddrHash; typedef struct AddrHash AddrHash;
typedef struct AddrEntry AddrEntry; typedef struct AddrEntry AddrEntry;
enum {
AddrHashBits = 12, // good for 4GB of used address space
AddrHashShift = 20, // each AddrHash knows about 1MB of address space
AddrDenseBits = 8, // good for a profiling rate of 4096 bytes
};
struct AddrHash struct AddrHash
{ {
AddrHash *next; // next in top-level hash table linked list AddrHash *next; // next in top-level hash table linked list
uintptr addr; // addr>>20 uintptr addr; // addr>>20
AddrEntry *dense[1<<13]; AddrEntry *dense[1<<AddrDenseBits];
}; };
struct AddrEntry struct AddrEntry
@ -130,9 +136,6 @@ struct AddrEntry
Bucket *b; Bucket *b;
}; };
enum {
AddrHashBits = 12 // 1MB per entry, so good for 4GB of used address space
};
static AddrHash *addrhash[1<<AddrHashBits]; static AddrHash *addrhash[1<<AddrHashBits];
static AddrEntry *addrfree; static AddrEntry *addrfree;
static uintptr addrmem; static uintptr addrmem;
@ -155,15 +158,15 @@ setaddrbucket(uintptr addr, Bucket *b)
AddrHash *ah; AddrHash *ah;
AddrEntry *e; AddrEntry *e;
h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next) for(ah=addrhash[h]; ah; ah=ah->next)
if(ah->addr == (addr>>20)) if(ah->addr == (addr>>AddrHashShift))
goto found; goto found;
ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1); ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
addrmem += sizeof *ah; addrmem += sizeof *ah;
ah->next = addrhash[h]; ah->next = addrhash[h];
ah->addr = addr>>20; ah->addr = addr>>AddrHashShift;
addrhash[h] = ah; addrhash[h] = ah;
found: found:
@ -175,9 +178,9 @@ found:
e[63].next = nil; e[63].next = nil;
} }
addrfree = e->next; addrfree = e->next;
e->addr = (uint32)~(addr & ((1<<20)-1)); e->addr = (uint32)~(addr & ((1<<AddrHashShift)-1));
e->b = b; e->b = b;
h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
e->next = ah->dense[h]; e->next = ah->dense[h];
ah->dense[h] = e; ah->dense[h] = e;
} }
@ -191,16 +194,16 @@ getaddrbucket(uintptr addr)
AddrEntry *e, **l; AddrEntry *e, **l;
Bucket *b; Bucket *b;
h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); h = (uint32)((addr>>AddrHashShift)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next) for(ah=addrhash[h]; ah; ah=ah->next)
if(ah->addr == (addr>>20)) if(ah->addr == (addr>>AddrHashShift))
goto found; goto found;
return nil; return nil;
found: found:
h = (addr>>7)&(nelem(ah->dense)-1); // entry in dense is top 13 bits of low 20. h = (addr>>(AddrHashShift-AddrDenseBits))&(nelem(ah->dense)-1); // entry in dense is top 8 bits of low 20.
for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) { for(l=&ah->dense[h]; (e=*l) != nil; l=&e->next) {
if(e->addr == (uint32)~(addr & ((1<<20)-1))) { if(e->addr == (uint32)~(addr & ((1<<AddrHashShift)-1))) {
*l = e->next; *l = e->next;
b = e->b; b = e->b;
e->next = addrfree; e->next = addrfree;