mirror of
https://github.com/golang/go
synced 2024-11-18 07:04:52 -07:00
runtime: increase GC concurrency.
run GC in its own background goroutine making the caller runnable if resources are available. This is critical in single goroutine applications. Allow goroutines that allocate a lot to help out the GC and in doing so throttle their own allocation. Adjust test so that it only detects that a GC is run during init calls and not whether the GC is memory efficient. Memory efficiency work will happen later in 1.5. Change-Id: I4306f5e377bb47c69bda1aedba66164f12b20c2b Reviewed-on: https://go-review.googlesource.com/2349 Reviewed-by: Russ Cox <rsc@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
f21ee1e1d8
commit
db7fd1c142
@ -39,10 +39,27 @@ type pageID uintptr
|
|||||||
// base address for all 0-byte allocations
|
// base address for all 0-byte allocations
|
||||||
var zerobase uintptr
|
var zerobase uintptr
|
||||||
|
|
||||||
|
// Determine whether to initiate a GC.
|
||||||
|
// Currently the primitive heuristic we use will start a new
|
||||||
|
// concurrent GC when approximately half the available space
|
||||||
|
// made available by the last GC cycle has been used.
|
||||||
|
// If the GC is already working no need to trigger another one.
|
||||||
|
// This should establish a feedback loop where if the GC does not
|
||||||
|
// have sufficient time to complete then more memory will be
|
||||||
|
// requested from the OS increasing heap size thus allow future
|
||||||
|
// GCs more time to complete.
|
||||||
|
// memstat.heap_alloc and memstat.next_gc reads have benign races
|
||||||
|
// A false negative simple does not start a GC, a false positive
|
||||||
|
// will start a GC needlessly. Neither have correctness issues.
|
||||||
|
func shouldtriggergc() bool {
|
||||||
|
return memstats.heap_alloc+memstats.heap_alloc*3/4 >= memstats.next_gc && atomicloaduint(&bggc.working) == 0
|
||||||
|
}
|
||||||
|
|
||||||
// Allocate an object of size bytes.
|
// Allocate an object of size bytes.
|
||||||
// Small objects are allocated from the per-P cache's free lists.
|
// Small objects are allocated from the per-P cache's free lists.
|
||||||
// Large objects (> 32 kB) are allocated straight from the heap.
|
// Large objects (> 32 kB) are allocated straight from the heap.
|
||||||
func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
||||||
|
shouldhelpgc := false
|
||||||
if size == 0 {
|
if size == 0 {
|
||||||
return unsafe.Pointer(&zerobase)
|
return unsafe.Pointer(&zerobase)
|
||||||
}
|
}
|
||||||
@ -144,6 +161,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
|||||||
systemstack(func() {
|
systemstack(func() {
|
||||||
mCache_Refill(c, tinySizeClass)
|
mCache_Refill(c, tinySizeClass)
|
||||||
})
|
})
|
||||||
|
shouldhelpgc = true
|
||||||
s = c.alloc[tinySizeClass]
|
s = c.alloc[tinySizeClass]
|
||||||
v = s.freelist
|
v = s.freelist
|
||||||
}
|
}
|
||||||
@ -174,6 +192,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
|||||||
systemstack(func() {
|
systemstack(func() {
|
||||||
mCache_Refill(c, int32(sizeclass))
|
mCache_Refill(c, int32(sizeclass))
|
||||||
})
|
})
|
||||||
|
shouldhelpgc = true
|
||||||
s = c.alloc[sizeclass]
|
s = c.alloc[sizeclass]
|
||||||
v = s.freelist
|
v = s.freelist
|
||||||
}
|
}
|
||||||
@ -191,6 +210,7 @@ func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
|
|||||||
c.local_cachealloc += intptr(size)
|
c.local_cachealloc += intptr(size)
|
||||||
} else {
|
} else {
|
||||||
var s *mspan
|
var s *mspan
|
||||||
|
shouldhelpgc = true
|
||||||
systemstack(func() {
|
systemstack(func() {
|
||||||
s = largeAlloc(size, uint32(flags))
|
s = largeAlloc(size, uint32(flags))
|
||||||
})
|
})
|
||||||
@ -345,8 +365,15 @@ marked:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if memstats.heap_alloc >= memstats.next_gc/2 {
|
if shouldtriggergc() {
|
||||||
gogc(0)
|
gogc(0)
|
||||||
|
} else if shouldhelpgc && atomicloaduint(&bggc.working) == 1 {
|
||||||
|
// bggc.lock not taken since race on bggc.working is benign.
|
||||||
|
// At worse we don't call gchelpwork.
|
||||||
|
// Delay the gchelpwork until the epilogue so that it doesn't
|
||||||
|
// interfere with the inner working of malloc such as
|
||||||
|
// mcache refills that might happen while doing the gchelpwork
|
||||||
|
systemstack(gchelpwork)
|
||||||
}
|
}
|
||||||
|
|
||||||
return x
|
return x
|
||||||
@ -466,14 +493,25 @@ func gogc(force int32) {
|
|||||||
releasem(mp)
|
releasem(mp)
|
||||||
mp = nil
|
mp = nil
|
||||||
|
|
||||||
semacquire(&worldsema, false)
|
if force == 0 {
|
||||||
|
lock(&bggc.lock)
|
||||||
if force == 0 && memstats.heap_alloc < memstats.next_gc {
|
if !bggc.started {
|
||||||
// typically threads which lost the race to grab
|
bggc.working = 1
|
||||||
// worldsema exit here when gc is done.
|
bggc.started = true
|
||||||
semrelease(&worldsema)
|
go backgroundgc()
|
||||||
return
|
} else if bggc.working == 0 {
|
||||||
|
bggc.working = 1
|
||||||
|
ready(bggc.g)
|
||||||
|
}
|
||||||
|
unlock(&bggc.lock)
|
||||||
|
} else {
|
||||||
|
gcwork(force)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func gcwork(force int32) {
|
||||||
|
|
||||||
|
semacquire(&worldsema, false)
|
||||||
|
|
||||||
// Pick up the remaining unswept/not being swept spans concurrently
|
// Pick up the remaining unswept/not being swept spans concurrently
|
||||||
for gosweepone() != ^uintptr(0) {
|
for gosweepone() != ^uintptr(0) {
|
||||||
@ -482,14 +520,17 @@ func gogc(force int32) {
|
|||||||
|
|
||||||
// Ok, we're doing it! Stop everybody else
|
// Ok, we're doing it! Stop everybody else
|
||||||
|
|
||||||
startTime := nanotime()
|
mp := acquirem()
|
||||||
mp = acquirem()
|
|
||||||
mp.gcing = 1
|
mp.gcing = 1
|
||||||
releasem(mp)
|
releasem(mp)
|
||||||
gctimer.count++
|
gctimer.count++
|
||||||
if force == 0 {
|
if force == 0 {
|
||||||
gctimer.cycle.sweepterm = nanotime()
|
gctimer.cycle.sweepterm = nanotime()
|
||||||
}
|
}
|
||||||
|
// Pick up the remaining unswept/not being swept spans before we STW
|
||||||
|
for gosweepone() != ^uintptr(0) {
|
||||||
|
sweep.nbgsweep++
|
||||||
|
}
|
||||||
systemstack(stoptheworld)
|
systemstack(stoptheworld)
|
||||||
systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
|
systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
|
||||||
if force == 0 { // Do as much work concurrently as possible
|
if force == 0 { // Do as much work concurrently as possible
|
||||||
@ -500,7 +541,7 @@ func gogc(force int32) {
|
|||||||
systemstack(gcscan_m)
|
systemstack(gcscan_m)
|
||||||
gctimer.cycle.installmarkwb = nanotime()
|
gctimer.cycle.installmarkwb = nanotime()
|
||||||
systemstack(stoptheworld)
|
systemstack(stoptheworld)
|
||||||
gcinstallmarkwb()
|
systemstack(gcinstallmarkwb)
|
||||||
systemstack(starttheworld)
|
systemstack(starttheworld)
|
||||||
gctimer.cycle.mark = nanotime()
|
gctimer.cycle.mark = nanotime()
|
||||||
systemstack(gcmark_m)
|
systemstack(gcmark_m)
|
||||||
@ -509,6 +550,7 @@ func gogc(force int32) {
|
|||||||
systemstack(gcinstalloffwb_m)
|
systemstack(gcinstalloffwb_m)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
startTime := nanotime()
|
||||||
if mp != acquirem() {
|
if mp != acquirem() {
|
||||||
throw("gogc: rescheduled")
|
throw("gogc: rescheduled")
|
||||||
}
|
}
|
||||||
@ -527,6 +569,7 @@ func gogc(force int32) {
|
|||||||
eagersweep := force >= 2
|
eagersweep := force >= 2
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
|
// refresh start time if doing a second GC
|
||||||
startTime = nanotime()
|
startTime = nanotime()
|
||||||
}
|
}
|
||||||
// switch to g0, call gc, then switch back
|
// switch to g0, call gc, then switch back
|
||||||
@ -579,8 +622,8 @@ func GCcheckmarkdisable() {
|
|||||||
// gctimes records the time in nanoseconds of each phase of the concurrent GC.
|
// gctimes records the time in nanoseconds of each phase of the concurrent GC.
|
||||||
type gctimes struct {
|
type gctimes struct {
|
||||||
sweepterm int64 // stw
|
sweepterm int64 // stw
|
||||||
scan int64 // stw
|
scan int64
|
||||||
installmarkwb int64
|
installmarkwb int64 // stw
|
||||||
mark int64
|
mark int64
|
||||||
markterm int64 // stw
|
markterm int64 // stw
|
||||||
sweep int64
|
sweep int64
|
||||||
@ -601,7 +644,7 @@ type gcchronograph struct {
|
|||||||
|
|
||||||
var gctimer gcchronograph
|
var gctimer gcchronograph
|
||||||
|
|
||||||
// GCstarttimes initializes the gc timess. All previous timess are lost.
|
// GCstarttimes initializes the gc times. All previous times are lost.
|
||||||
func GCstarttimes(verbose int64) {
|
func GCstarttimes(verbose int64) {
|
||||||
gctimer = gcchronograph{verbose: verbose}
|
gctimer = gcchronograph{verbose: verbose}
|
||||||
}
|
}
|
||||||
@ -655,6 +698,11 @@ func calctimes() gctimes {
|
|||||||
// the information from the most recent Concurent GC cycle. Calls from the
|
// the information from the most recent Concurent GC cycle. Calls from the
|
||||||
// application to runtime.GC() are ignored.
|
// application to runtime.GC() are ignored.
|
||||||
func GCprinttimes() {
|
func GCprinttimes() {
|
||||||
|
if gctimer.verbose == 0 {
|
||||||
|
println("GC timers not enabled")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Explicitly put times on the heap so printPhase can use it.
|
// Explicitly put times on the heap so printPhase can use it.
|
||||||
times := new(gctimes)
|
times := new(gctimes)
|
||||||
*times = calctimes()
|
*times = calctimes()
|
||||||
|
@ -123,7 +123,7 @@ const (
|
|||||||
_DebugGCPtrs = false // if true, print trace of every pointer load during GC
|
_DebugGCPtrs = false // if true, print trace of every pointer load during GC
|
||||||
_ConcurrentSweep = true
|
_ConcurrentSweep = true
|
||||||
|
|
||||||
_WorkbufSize = 4 * 1024
|
_WorkbufSize = 4 * 256
|
||||||
_FinBlockSize = 4 * 1024
|
_FinBlockSize = 4 * 1024
|
||||||
_RootData = 0
|
_RootData = 0
|
||||||
_RootBss = 1
|
_RootBss = 1
|
||||||
@ -191,9 +191,9 @@ var badblock [1024]uintptr
|
|||||||
var nbadblock int32
|
var nbadblock int32
|
||||||
|
|
||||||
type workdata struct {
|
type workdata struct {
|
||||||
full uint64 // lock-free list of full blocks
|
full uint64 // lock-free list of full blocks workbuf
|
||||||
empty uint64 // lock-free list of empty blocks
|
empty uint64 // lock-free list of empty blocks workbuf
|
||||||
partial uint64 // lock-free list of partially filled blocks
|
partial uint64 // lock-free list of partially filled blocks workbuf
|
||||||
pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
|
pad0 [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
|
||||||
nproc uint32
|
nproc uint32
|
||||||
tstart int64
|
tstart int64
|
||||||
@ -587,6 +587,11 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
|
|||||||
// base and extent.
|
// base and extent.
|
||||||
b := b0
|
b := b0
|
||||||
n := n0
|
n := n0
|
||||||
|
|
||||||
|
// ptrmask can have 2 possible values:
|
||||||
|
// 1. nil - obtain pointer mask from GC bitmap.
|
||||||
|
// 2. pointer to a compact mask (for stacks and data).
|
||||||
|
|
||||||
wbuf := getpartialorempty()
|
wbuf := getpartialorempty()
|
||||||
if b != 0 {
|
if b != 0 {
|
||||||
wbuf = scanobject(b, n, ptrmask, wbuf)
|
wbuf = scanobject(b, n, ptrmask, wbuf)
|
||||||
@ -600,23 +605,23 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if gcphase == _GCscan {
|
|
||||||
throw("scanblock: In GCscan phase but no b passed in.")
|
|
||||||
}
|
|
||||||
|
|
||||||
keepworking := b == 0
|
drainallwbufs := b == 0
|
||||||
|
drainworkbuf(wbuf, drainallwbufs)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan objects in wbuf until wbuf is empty.
|
||||||
|
// If drainallwbufs is true find all other available workbufs and repeat the process.
|
||||||
|
//go:nowritebarrier
|
||||||
|
func drainworkbuf(wbuf *workbuf, drainallwbufs bool) {
|
||||||
if gcphase != _GCmark && gcphase != _GCmarktermination {
|
if gcphase != _GCmark && gcphase != _GCmarktermination {
|
||||||
println("gcphase", gcphase)
|
println("gcphase", gcphase)
|
||||||
throw("scanblock phase")
|
throw("scanblock phase")
|
||||||
}
|
}
|
||||||
|
|
||||||
// ptrmask can have 2 possible values:
|
|
||||||
// 1. nil - obtain pointer mask from GC bitmap.
|
|
||||||
// 2. pointer to a compact mask (for stacks and data).
|
|
||||||
for {
|
for {
|
||||||
if wbuf.nobj == 0 {
|
if wbuf.nobj == 0 {
|
||||||
if !keepworking {
|
if !drainallwbufs {
|
||||||
putempty(wbuf)
|
putempty(wbuf)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -641,11 +646,32 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8) {
|
|||||||
// PREFETCH(wbuf->obj[wbuf->nobj - 3];
|
// PREFETCH(wbuf->obj[wbuf->nobj - 3];
|
||||||
// }
|
// }
|
||||||
wbuf.nobj--
|
wbuf.nobj--
|
||||||
b = wbuf.obj[wbuf.nobj]
|
b := wbuf.obj[wbuf.nobj]
|
||||||
wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
|
wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scan at most count objects in the wbuf.
|
||||||
|
//go:nowritebarrier
|
||||||
|
func drainobjects(wbuf *workbuf, count uintptr) {
|
||||||
|
for i := uintptr(0); i < count; i++ {
|
||||||
|
if wbuf.nobj == 0 {
|
||||||
|
putempty(wbuf)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// This might be a good place to add prefetch code...
|
||||||
|
// if(wbuf->nobj > 4) {
|
||||||
|
// PREFETCH(wbuf->obj[wbuf->nobj - 3];
|
||||||
|
// }
|
||||||
|
wbuf.nobj--
|
||||||
|
b := wbuf.obj[wbuf.nobj]
|
||||||
|
wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
|
||||||
|
}
|
||||||
|
putpartial(wbuf)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
//go:nowritebarrier
|
//go:nowritebarrier
|
||||||
func markroot(desc *parfor, i uint32) {
|
func markroot(desc *parfor, i uint32) {
|
||||||
// Note: if you add a case here, please also update heapdump.c:dumproots.
|
// Note: if you add a case here, please also update heapdump.c:dumproots.
|
||||||
@ -809,6 +835,17 @@ func putpartial(b *workbuf) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trygetfull tries to get a full or partially empty workbuffer.
|
||||||
|
// if one is not immediately available return nil
|
||||||
|
//go:nowritebarrier
|
||||||
|
func trygetfull() *workbuf {
|
||||||
|
wbuf := (*workbuf)(lfstackpop(&work.full))
|
||||||
|
if wbuf == nil {
|
||||||
|
wbuf = (*workbuf)(lfstackpop(&work.partial))
|
||||||
|
}
|
||||||
|
return wbuf
|
||||||
|
}
|
||||||
|
|
||||||
// Get a full work buffer off the work.full or a partially
|
// Get a full work buffer off the work.full or a partially
|
||||||
// filled one off the work.partial list. If nothing is available
|
// filled one off the work.partial list. If nothing is available
|
||||||
// wait until all the other gc helpers have finished and then
|
// wait until all the other gc helpers have finished and then
|
||||||
@ -1090,6 +1127,38 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gchelpwork does a small bounded amount of gc work. The purpose is to
|
||||||
|
// shorten the time (as measured by allocations) spent doing a concurrent GC.
|
||||||
|
// The number of mutator calls is roughly propotional to the number of allocations
|
||||||
|
// made by that mutator. This slows down the allocation while speeding up the GC.
|
||||||
|
//go:nowritebarrier
|
||||||
|
func gchelpwork() {
|
||||||
|
switch gcphase {
|
||||||
|
default:
|
||||||
|
throw("gcphasework in bad gcphase")
|
||||||
|
case _GCoff, _GCquiesce, _GCstw:
|
||||||
|
// No work.
|
||||||
|
case _GCsweep:
|
||||||
|
// We could help by calling sweepone to sweep a single span.
|
||||||
|
// _ = sweepone()
|
||||||
|
case _GCscan:
|
||||||
|
// scan the stack, mark the objects, put pointers in work buffers
|
||||||
|
// hanging off the P where this is being run.
|
||||||
|
// scanstack(gp)
|
||||||
|
case _GCmark:
|
||||||
|
// Get a full work buffer and empty it.
|
||||||
|
var wbuf *workbuf
|
||||||
|
wbuf = trygetfull()
|
||||||
|
if wbuf != nil {
|
||||||
|
drainobjects(wbuf, uintptr(len(wbuf.obj))) // drain upto one buffer's worth of objects
|
||||||
|
}
|
||||||
|
case _GCmarktermination:
|
||||||
|
// We should never be here since the world is stopped.
|
||||||
|
// All available mark work will be emptied before returning.
|
||||||
|
throw("gcphasework in bad gcphase")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The gp has been moved to a GC safepoint. GC phase specific
|
// The gp has been moved to a GC safepoint. GC phase specific
|
||||||
// work is done here.
|
// work is done here.
|
||||||
//go:nowritebarrier
|
//go:nowritebarrier
|
||||||
@ -1425,6 +1494,14 @@ type sweepdata struct {
|
|||||||
|
|
||||||
var sweep sweepdata
|
var sweep sweepdata
|
||||||
|
|
||||||
|
// State of the background concurrent GC goroutine.
|
||||||
|
var bggc struct {
|
||||||
|
lock mutex
|
||||||
|
g *g
|
||||||
|
working uint
|
||||||
|
started bool
|
||||||
|
}
|
||||||
|
|
||||||
// sweeps one span
|
// sweeps one span
|
||||||
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
|
// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
|
||||||
//go:nowritebarrier
|
//go:nowritebarrier
|
||||||
|
@ -78,6 +78,19 @@ func clearpools() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// backgroundgc is running in a goroutine and does the concurrent GC work.
|
||||||
|
// bggc holds the state of the backgroundgc.
|
||||||
|
func backgroundgc() {
|
||||||
|
bggc.g = getg()
|
||||||
|
bggc.g.issystem = true
|
||||||
|
for {
|
||||||
|
gcwork(0)
|
||||||
|
lock(&bggc.lock)
|
||||||
|
bggc.working = 0
|
||||||
|
goparkunlock(&bggc.lock, "Concurrent GC wait")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func bgsweep() {
|
func bgsweep() {
|
||||||
sweep.g = getg()
|
sweep.g = getg()
|
||||||
getg().issystem = true
|
getg().issystem = true
|
||||||
|
@ -31,7 +31,7 @@ func init() {
|
|||||||
}
|
}
|
||||||
runtime.ReadMemStats(memstats)
|
runtime.ReadMemStats(memstats)
|
||||||
sys1 := memstats.Sys
|
sys1 := memstats.Sys
|
||||||
if sys1-sys > chunk*50 {
|
if sys1-sys > chunk*500 {
|
||||||
println("allocated 1000 chunks of", chunk, "and used ", sys1-sys, "memory")
|
println("allocated 1000 chunks of", chunk, "and used ", sys1-sys, "memory")
|
||||||
panic("init1")
|
panic("init1")
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user