mirror of
https://github.com/golang/go
synced 2024-11-18 18:54:42 -07:00
runtime: faster GC scan
The change contains 3 spot optimizations to scan loop: 1. Don't use byte vars, use uintptr's instead. This seems to alleviate some codegen issue, and alone accounts to a half of speedup. 2. Remove bitmap cache. Currently we cache only 1 byte, so caching is not particularly effective anyway. Removal of the cache simplifies code and positively affects regalloc. 3. Replace BitsMultiword switch with if and do debug checks only in Debug mode. I've benchmarked changes separately and ensured that each of them provides speedup on top of the previous one. This change as a whole fixes the unintentional regressions of scan loop that were introduced during development cycle. Fixes #8625. Fixes #8565. On go.benchmarks/garbage benchmark: GOMAXPROCS=1 time: -3.13% cputime: -3.22% gc-pause-one: -15.71% gc-pause-total: -15.71% GOMAXPROCS=32 time: -1.96% cputime: -4.43% gc-pause-one: -6.22% gc-pause-total: -6.22% LGTM=khr, rsc R=golang-codereviews, khr CC=golang-codereviews, rlh, rsc https://golang.org/cl/153990043
This commit is contained in:
parent
94bdf13497
commit
b8fdaaf028
@ -179,9 +179,8 @@ have_cgo_allocate(void)
|
|||||||
static void
|
static void
|
||||||
scanblock(byte *b, uintptr n, byte *ptrmask)
|
scanblock(byte *b, uintptr n, byte *ptrmask)
|
||||||
{
|
{
|
||||||
byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp, bits, xbits, shift, cached;
|
byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
|
||||||
uintptr i, j, nobj, size, idx, x, off, scanbufpos;
|
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
|
||||||
intptr ncached;
|
|
||||||
Workbuf *wbuf;
|
Workbuf *wbuf;
|
||||||
Iface *iface;
|
Iface *iface;
|
||||||
Eface *eface;
|
Eface *eface;
|
||||||
@ -203,8 +202,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||||||
scanbuf[i] = nil;
|
scanbuf[i] = nil;
|
||||||
|
|
||||||
ptrbitp = nil;
|
ptrbitp = nil;
|
||||||
cached = 0;
|
|
||||||
ncached = 0;
|
|
||||||
|
|
||||||
// ptrmask can have 2 possible values:
|
// ptrmask can have 2 possible values:
|
||||||
// 1. nil - obtain pointer mask from GC bitmap.
|
// 1. nil - obtain pointer mask from GC bitmap.
|
||||||
@ -259,10 +256,6 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||||||
if(ptrmask == nil) {
|
if(ptrmask == nil) {
|
||||||
off = (uintptr*)b - (uintptr*)arena_start;
|
off = (uintptr*)b - (uintptr*)arena_start;
|
||||||
ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
|
ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
|
||||||
shift = (off % wordsPerBitmapByte) * gcBits;
|
|
||||||
cached = *ptrbitp >> shift;
|
|
||||||
cached &= ~bitBoundary;
|
|
||||||
ncached = (8 - shift)/gcBits;
|
|
||||||
}
|
}
|
||||||
for(i = 0; i < n; i += PtrSize) {
|
for(i = 0; i < n; i += PtrSize) {
|
||||||
obj = nil;
|
obj = nil;
|
||||||
@ -273,15 +266,12 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||||||
runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
|
runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
|
||||||
break;
|
break;
|
||||||
// Consult GC bitmap.
|
// Consult GC bitmap.
|
||||||
if(ncached <= 0) {
|
bits = *ptrbitp;
|
||||||
// Refill cache.
|
if((((uintptr)b+i)%(PtrSize*wordsPerBitmapByte)) != 0) {
|
||||||
cached = *--ptrbitp;
|
ptrbitp--;
|
||||||
ncached = 2;
|
bits >>= gcBits;
|
||||||
}
|
}
|
||||||
bits = cached;
|
if((bits&bitBoundary) != 0 && i != 0)
|
||||||
cached >>= gcBits;
|
|
||||||
ncached--;
|
|
||||||
if((bits&bitBoundary) != 0)
|
|
||||||
break; // reached beginning of the next object
|
break; // reached beginning of the next object
|
||||||
bits = (bits>>2)&BitsMask;
|
bits = (bits>>2)&BitsMask;
|
||||||
if(bits == BitsDead)
|
if(bits == BitsDead)
|
||||||
@ -289,7 +279,7 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||||||
} else // dense mask (stack or data)
|
} else // dense mask (stack or data)
|
||||||
bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
|
bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
|
||||||
|
|
||||||
if(bits == BitsScalar || bits == BitsDead)
|
if(bits <= BitsScalar) // BitsScalar || BitsDead
|
||||||
continue;
|
continue;
|
||||||
if(bits == BitsPointer) {
|
if(bits == BitsPointer) {
|
||||||
obj = *(byte**)(b+i);
|
obj = *(byte**)(b+i);
|
||||||
@ -298,43 +288,39 @@ scanblock(byte *b, uintptr n, byte *ptrmask)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// With those three out of the way, must be multi-word.
|
// With those three out of the way, must be multi-word.
|
||||||
if(bits != BitsMultiWord)
|
if(Debug && bits != BitsMultiWord)
|
||||||
runtime·throw("unexpected garbage collection bits");
|
runtime·throw("unexpected garbage collection bits");
|
||||||
// Find the next pair of bits.
|
// Find the next pair of bits.
|
||||||
if(ptrmask == nil) {
|
if(ptrmask == nil) {
|
||||||
if(ncached <= 0) {
|
bits = *ptrbitp;
|
||||||
// Refill cache.
|
if((((uintptr)b+i)%(PtrSize*wordsPerBitmapByte)) == 0) {
|
||||||
cached = *--ptrbitp;
|
ptrbitp--;
|
||||||
ncached = 2;
|
bits >>= gcBits;
|
||||||
}
|
}
|
||||||
bits = (cached>>2)&BitsMask;
|
bits = (bits>>2)&BitsMask;
|
||||||
} else
|
} else
|
||||||
bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
|
bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
|
||||||
|
|
||||||
switch(bits) {
|
if(Debug && bits != BitsIface && bits != BitsEface)
|
||||||
default:
|
|
||||||
runtime·throw("unexpected garbage collection bits");
|
runtime·throw("unexpected garbage collection bits");
|
||||||
case BitsIface:
|
|
||||||
|
if(bits == BitsIface) {
|
||||||
iface = (Iface*)(b+i);
|
iface = (Iface*)(b+i);
|
||||||
if(iface->tab != nil) {
|
if(iface->tab != nil) {
|
||||||
typ = iface->tab->type;
|
typ = iface->tab->type;
|
||||||
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
|
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
|
||||||
obj = iface->data;
|
obj = iface->data;
|
||||||
}
|
}
|
||||||
break;
|
} else {
|
||||||
case BitsEface:
|
|
||||||
eface = (Eface*)(b+i);
|
eface = (Eface*)(b+i);
|
||||||
typ = eface->type;
|
typ = eface->type;
|
||||||
if(typ != nil) {
|
if(typ != nil) {
|
||||||
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
|
if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
|
||||||
obj = eface->data;
|
obj = eface->data;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
i += PtrSize;
|
i += PtrSize;
|
||||||
cached >>= gcBits;
|
|
||||||
ncached--;
|
|
||||||
|
|
||||||
obj0 = obj;
|
obj0 = obj;
|
||||||
markobj:
|
markobj:
|
||||||
|
Loading…
Reference in New Issue
Block a user