2015-02-19 11:38:46 -07:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
// Garbage collector: marking and scanning
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
|
|
|
import "unsafe"
|
|
|
|
|
|
|
|
// Scan all of the stacks, greying (or graying if in America) the referents
|
|
|
|
// but not blackening them since the mark write barrier isn't installed.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func gcscan_m() {
|
|
|
|
_g_ := getg()
|
|
|
|
|
|
|
|
// Grab the g that called us and potentially allow rescheduling.
|
|
|
|
// This allows it to be scanned like other goroutines.
|
|
|
|
mastergp := _g_.m.curg
|
|
|
|
casgstatus(mastergp, _Grunning, _Gwaiting)
|
|
|
|
mastergp.waitreason = "garbage collection scan"
|
|
|
|
|
|
|
|
// Span sweeping has been done by finishsweep_m.
|
|
|
|
// Long term we will want to make this goroutine runnable
|
|
|
|
// by placing it onto a scanenqueue state and then calling
|
|
|
|
// runtime·restartg(mastergp) to make it Grunnable.
|
|
|
|
// At the bottom we will want to return this p back to the scheduler.
|
|
|
|
|
|
|
|
// Prepare flag indicating that the scan has not been completed.
|
2015-02-24 20:20:38 -07:00
|
|
|
local_allglen := gcResetGState()
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
work.nwait = 0
|
|
|
|
work.ndone = 0
|
|
|
|
work.nproc = 1 // For now do not do this in parallel.
|
|
|
|
// ackgcphase is not needed since we are not scanning running goroutines.
|
|
|
|
parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
|
|
|
|
parfordo(work.markfor)
|
|
|
|
|
|
|
|
lock(&allglock)
|
|
|
|
// Check that gc work is done.
|
2015-02-24 20:20:38 -07:00
|
|
|
for i := 0; i < local_allglen; i++ {
|
2015-02-19 11:38:46 -07:00
|
|
|
gp := allgs[i]
|
|
|
|
if !gp.gcworkdone {
|
|
|
|
throw("scan missed a g")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
unlock(&allglock)
|
|
|
|
|
|
|
|
casgstatus(mastergp, _Gwaiting, _Grunning)
|
|
|
|
// Let the g that called us continue to run.
|
|
|
|
}
|
|
|
|
|
|
|
|
// ptrmask for an allocation containing a single pointer.
|
|
|
|
var oneptr = [...]uint8{typePointer}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func markroot(desc *parfor, i uint32) {
|
2015-03-12 11:09:30 -06:00
|
|
|
var gcw gcWork
|
2015-02-19 11:38:46 -07:00
|
|
|
|
2015-03-11 13:58:47 -06:00
|
|
|
// Note: if you add a case here, please also update heapdump.go:dumproots.
|
2015-02-19 11:38:46 -07:00
|
|
|
switch i {
|
|
|
|
case _RootData:
|
2015-04-06 18:55:02 -06:00
|
|
|
for datap := &firstmoduledata; datap != nil; datap = datap.next {
|
2015-03-29 15:59:00 -06:00
|
|
|
scanblock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, &gcw)
|
|
|
|
}
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
case _RootBss:
|
2015-04-06 18:55:02 -06:00
|
|
|
for datap := &firstmoduledata; datap != nil; datap = datap.next {
|
2015-03-29 15:59:00 -06:00
|
|
|
scanblock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, &gcw)
|
|
|
|
}
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
case _RootFinalizers:
|
|
|
|
for fb := allfin; fb != nil; fb = fb.alllink {
|
|
|
|
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
|
|
|
|
}
|
|
|
|
|
|
|
|
case _RootSpans:
|
|
|
|
// mark MSpan.specials
|
|
|
|
sg := mheap_.sweepgen
|
|
|
|
for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
|
|
|
|
s := work.spans[spanidx]
|
|
|
|
if s.state != mSpanInUse {
|
|
|
|
continue
|
|
|
|
}
|
2015-02-19 14:43:27 -07:00
|
|
|
if !useCheckmark && s.sweepgen != sg {
|
2015-02-19 11:38:46 -07:00
|
|
|
// sweepgen was updated (+2) during non-checkmark GC pass
|
|
|
|
print("sweep ", s.sweepgen, " ", sg, "\n")
|
|
|
|
throw("gc: unswept span")
|
|
|
|
}
|
|
|
|
for sp := s.specials; sp != nil; sp = sp.next {
|
|
|
|
if sp.kind != _KindSpecialFinalizer {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// don't mark finalized object, but scan it so we
|
|
|
|
// retain everything it points to.
|
|
|
|
spf := (*specialfinalizer)(unsafe.Pointer(sp))
|
|
|
|
// A finalizer can be set for an inner byte of an object, find object beginning.
|
|
|
|
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
|
|
|
|
if gcphase != _GCscan {
|
|
|
|
scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase
|
|
|
|
}
|
|
|
|
scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
case _RootFlushCaches:
|
|
|
|
if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
|
|
|
|
flushallmcaches()
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
// the rest is scanning goroutine stacks
|
|
|
|
if uintptr(i-_RootCount) >= allglen {
|
|
|
|
throw("markroot: bad index")
|
|
|
|
}
|
|
|
|
gp := allgs[i-_RootCount]
|
|
|
|
|
|
|
|
// remember when we've first observed the G blocked
|
|
|
|
// needed only to output in traceback
|
|
|
|
status := readgstatus(gp) // We are not in a scan state
|
|
|
|
if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
|
|
|
|
gp.waitsince = work.tstart
|
|
|
|
}
|
|
|
|
|
|
|
|
// Shrink a stack if not much of it is being used but not in the scan phase.
|
|
|
|
if gcphase == _GCmarktermination {
|
|
|
|
// Shrink during STW GCmarktermination phase thus avoiding
|
|
|
|
// complications introduced by shrinking during
|
|
|
|
// non-STW phases.
|
|
|
|
shrinkstack(gp)
|
|
|
|
}
|
|
|
|
if readgstatus(gp) == _Gdead {
|
|
|
|
gp.gcworkdone = true
|
|
|
|
} else {
|
|
|
|
gp.gcworkdone = false
|
|
|
|
}
|
|
|
|
restart := stopg(gp)
|
|
|
|
|
|
|
|
// goroutine will scan its own stack when it stops running.
|
|
|
|
// Wait until it has.
|
|
|
|
for readgstatus(gp) == _Grunning && !gp.gcworkdone {
|
|
|
|
}
|
|
|
|
|
|
|
|
// scanstack(gp) is done as part of gcphasework
|
|
|
|
// But to make sure we finished we need to make sure that
|
|
|
|
// the stack traps have all responded so drop into
|
|
|
|
// this while loop until they respond.
|
|
|
|
for !gp.gcworkdone {
|
|
|
|
status = readgstatus(gp)
|
|
|
|
if status == _Gdead {
|
|
|
|
gp.gcworkdone = true // scan is a noop
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if status == _Gwaiting || status == _Grunnable {
|
|
|
|
restart = stopg(gp)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if restart {
|
|
|
|
restartg(gp)
|
|
|
|
}
|
|
|
|
}
|
2015-03-12 14:53:57 -06:00
|
|
|
|
|
|
|
// Root aren't part of the heap, so don't count them toward
|
|
|
|
// marked heap bytes.
|
|
|
|
gcw.bytesMarked = 0
|
2015-03-12 10:08:47 -06:00
|
|
|
gcw.scanWork = 0
|
2015-02-19 11:38:46 -07:00
|
|
|
gcw.dispose()
|
|
|
|
}
|
|
|
|
|
2015-03-16 12:22:00 -06:00
|
|
|
// gcAssistAlloc records and allocation of size bytes and, if
|
|
|
|
// allowAssist is true, may assist GC scanning in proportion to the
|
|
|
|
// allocations performed by this mutator since the last assist.
|
|
|
|
//
|
|
|
|
// It should only be called during gcphase == _GCmark.
|
2015-02-19 11:38:46 -07:00
|
|
|
//go:nowritebarrier
|
2015-03-16 12:22:00 -06:00
|
|
|
func gcAssistAlloc(size uintptr, allowAssist bool) {
|
|
|
|
// Find the G responsible for this assist.
|
|
|
|
gp := getg()
|
|
|
|
if gp.m.curg != nil {
|
|
|
|
gp = gp.m.curg
|
|
|
|
}
|
|
|
|
|
|
|
|
// Record allocation.
|
|
|
|
gp.gcalloc += size
|
|
|
|
|
|
|
|
if !allowAssist {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the amount of assist scan work we need to do.
|
|
|
|
scanWork := int64(gcController.assistRatio*float64(gp.gcalloc)) - gp.gcscanwork
|
|
|
|
// scanWork can be negative if the last assist scanned a large
|
|
|
|
// object and we're still ahead of our assist goal.
|
|
|
|
if scanWork <= 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Steal as much credit as we can from the background GC's
|
|
|
|
// scan credit. This is racy and may drop the background
|
|
|
|
// credit below 0 if two mutators steal at the same time. This
|
|
|
|
// will just cause steals to fail until credit is accumulated
|
|
|
|
// again, so in the long run it doesn't really matter, but we
|
|
|
|
// do have to handle the negative credit case.
|
|
|
|
bgScanCredit := atomicloadint64(&gcController.bgScanCredit)
|
|
|
|
stolen := int64(0)
|
|
|
|
if bgScanCredit > 0 {
|
|
|
|
if bgScanCredit < scanWork {
|
|
|
|
stolen = bgScanCredit
|
|
|
|
} else {
|
|
|
|
stolen = scanWork
|
|
|
|
}
|
|
|
|
xaddint64(&gcController.bgScanCredit, -scanWork)
|
|
|
|
|
|
|
|
scanWork -= stolen
|
|
|
|
gp.gcscanwork += stolen
|
|
|
|
|
|
|
|
if scanWork == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Perform assist work
|
|
|
|
systemstack(func() {
|
2015-03-17 10:17:47 -06:00
|
|
|
// Track time spent in this assist. Since we're on the
|
|
|
|
// system stack, this is non-preemptible, so we can
|
|
|
|
// just measure start and end time.
|
|
|
|
startTime := nanotime()
|
|
|
|
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
|
|
|
xadd(&work.nwait, -1)
|
|
|
|
|
2015-03-16 12:22:00 -06:00
|
|
|
// drain own current wbuf first in the hopes that it
|
|
|
|
// will be more cache friendly.
|
2015-02-19 11:38:46 -07:00
|
|
|
var gcw gcWork
|
|
|
|
gcw.initFromCache()
|
2015-03-16 12:22:00 -06:00
|
|
|
startScanWork := gcw.scanWork
|
|
|
|
gcDrainN(&gcw, scanWork)
|
|
|
|
// Record that we did this much scan work.
|
|
|
|
gp.gcscanwork += gcw.scanWork - startScanWork
|
2015-03-12 10:08:47 -06:00
|
|
|
// TODO(austin): This is the vast majority of our
|
|
|
|
// disposes. Instead of constantly disposing, keep a
|
|
|
|
// per-P gcWork cache (probably combined with the
|
|
|
|
// write barrier wbuf cache).
|
2015-02-19 11:38:46 -07:00
|
|
|
gcw.dispose()
|
2015-03-17 10:17:47 -06:00
|
|
|
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
|
|
|
// If this is the last worker and we ran out of work,
|
|
|
|
// signal a completion point.
|
|
|
|
if xadd(&work.nwait, +1) == work.nproc && work.full == 0 && work.partial == 0 {
|
|
|
|
// This has reached a background completion
|
|
|
|
// point. Is it the first this cycle?
|
|
|
|
if cas(&work.bgMarkDone, 0, 1) {
|
|
|
|
notewakeup(&work.bgMarkNote)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 10:17:47 -06:00
|
|
|
duration := nanotime() - startTime
|
|
|
|
_p_ := gp.m.p.ptr()
|
|
|
|
_p_.gcAssistTime += duration
|
|
|
|
if _p_.gcAssistTime > gcAssistTimeSlack {
|
|
|
|
xaddint64(&gcController.assistTime, _p_.gcAssistTime)
|
|
|
|
_p_.gcAssistTime = 0
|
|
|
|
}
|
2015-03-16 12:22:00 -06:00
|
|
|
})
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// The gp has been moved to a GC safepoint. GC phase specific
|
|
|
|
// work is done here.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func gcphasework(gp *g) {
|
|
|
|
switch gcphase {
|
|
|
|
default:
|
|
|
|
throw("gcphasework in bad gcphase")
|
|
|
|
case _GCoff, _GCquiesce, _GCstw, _GCsweep:
|
|
|
|
// No work.
|
|
|
|
case _GCscan:
|
|
|
|
// scan the stack, mark the objects, put pointers in work buffers
|
|
|
|
// hanging off the P where this is being run.
|
|
|
|
// Indicate that the scan is valid until the goroutine runs again
|
|
|
|
scanstack(gp)
|
|
|
|
case _GCmark:
|
|
|
|
// No work.
|
|
|
|
case _GCmarktermination:
|
|
|
|
scanstack(gp)
|
|
|
|
// All available mark work will be emptied before returning.
|
|
|
|
}
|
|
|
|
gp.gcworkdone = true
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func scanstack(gp *g) {
|
|
|
|
if gp.gcscanvalid {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if readgstatus(gp)&_Gscan == 0 {
|
|
|
|
print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
|
|
|
|
throw("scanstack - bad status")
|
|
|
|
}
|
|
|
|
|
|
|
|
switch readgstatus(gp) &^ _Gscan {
|
|
|
|
default:
|
|
|
|
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
|
|
|
|
throw("mark - bad status")
|
|
|
|
case _Gdead:
|
|
|
|
return
|
|
|
|
case _Grunning:
|
|
|
|
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
|
|
|
|
throw("scanstack: goroutine not stopped")
|
|
|
|
case _Grunnable, _Gsyscall, _Gwaiting:
|
|
|
|
// ok
|
|
|
|
}
|
|
|
|
|
|
|
|
if gp == getg() {
|
|
|
|
throw("can't scan our own stack")
|
|
|
|
}
|
|
|
|
mp := gp.m
|
|
|
|
if mp != nil && mp.helpgc != 0 {
|
|
|
|
throw("can't scan gchelper stack")
|
|
|
|
}
|
|
|
|
|
2015-03-12 11:09:30 -06:00
|
|
|
var gcw gcWork
|
2015-02-19 11:38:46 -07:00
|
|
|
gcw.initFromCache()
|
|
|
|
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
|
|
|
|
// Pick up gcw as free variable so gentraceback and friends can
|
|
|
|
// keep the same signature.
|
|
|
|
scanframeworker(frame, unused, &gcw)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
|
|
|
|
tracebackdefers(gp, scanframe, nil)
|
2015-03-12 14:53:57 -06:00
|
|
|
// Stacks aren't part of the heap, so don't count them toward
|
|
|
|
// marked heap bytes.
|
|
|
|
gcw.bytesMarked = 0
|
2015-03-12 10:08:47 -06:00
|
|
|
gcw.scanWork = 0
|
2015-02-19 11:38:46 -07:00
|
|
|
gcw.disposeToCache()
|
|
|
|
gp.gcscanvalid = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan a stack frame: local variables and function arguments/results.
|
|
|
|
//go:nowritebarrier
|
2015-03-12 11:09:30 -06:00
|
|
|
func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWork) {
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
f := frame.fn
|
|
|
|
targetpc := frame.continpc
|
|
|
|
if targetpc == 0 {
|
|
|
|
// Frame is dead.
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if _DebugGC > 1 {
|
|
|
|
print("scanframe ", funcname(f), "\n")
|
|
|
|
}
|
|
|
|
if targetpc != f.entry {
|
|
|
|
targetpc--
|
|
|
|
}
|
|
|
|
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
|
|
|
|
if pcdata == -1 {
|
|
|
|
// We do not have a valid pcdata value but there might be a
|
|
|
|
// stackmap for this function. It is likely that we are looking
|
|
|
|
// at the function prologue, assume so and hope for the best.
|
|
|
|
pcdata = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan local variables if stack frame has been allocated.
|
|
|
|
size := frame.varp - frame.sp
|
|
|
|
var minsize uintptr
|
2015-03-08 07:20:20 -06:00
|
|
|
switch thechar {
|
|
|
|
case '6', '8':
|
2015-02-19 11:38:46 -07:00
|
|
|
minsize = 0
|
2015-03-08 07:20:20 -06:00
|
|
|
case '7':
|
|
|
|
minsize = spAlign
|
|
|
|
default:
|
|
|
|
minsize = ptrSize
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
if size > minsize {
|
|
|
|
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
|
|
|
|
if stkmap == nil || stkmap.n <= 0 {
|
|
|
|
print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
|
|
|
|
throw("missing stackmap")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Locals bitmap information, scan just the pointers in locals.
|
|
|
|
if pcdata < 0 || pcdata >= stkmap.n {
|
|
|
|
// don't know where we are
|
|
|
|
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
|
|
|
|
throw("scanframe: bad symbol table")
|
|
|
|
}
|
|
|
|
bv := stackmapdata(stkmap, pcdata)
|
|
|
|
size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
|
|
|
|
scanblock(frame.varp-size, size, bv.bytedata, gcw)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scan arguments.
|
|
|
|
if frame.arglen > 0 {
|
|
|
|
var bv bitvector
|
|
|
|
if frame.argmap != nil {
|
|
|
|
bv = *frame.argmap
|
|
|
|
} else {
|
|
|
|
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
|
|
|
|
if stkmap == nil || stkmap.n <= 0 {
|
|
|
|
print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
|
|
|
|
throw("missing stackmap")
|
|
|
|
}
|
|
|
|
if pcdata < 0 || pcdata >= stkmap.n {
|
|
|
|
// don't know where we are
|
|
|
|
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
|
|
|
|
throw("scanframe: bad symbol table")
|
|
|
|
}
|
|
|
|
bv = stackmapdata(stkmap, pcdata)
|
|
|
|
}
|
|
|
|
scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
|
|
|
// TODO(austin): Can we consolidate the gcDrain* functions?
|
|
|
|
|
2015-03-20 11:21:51 -06:00
|
|
|
// gcDrain scans objects in work buffers, blackening grey
|
2015-02-19 11:38:46 -07:00
|
|
|
// objects until all work buffers have been drained.
|
2015-03-13 11:29:23 -06:00
|
|
|
// If flushScanCredit != -1, gcDrain flushes accumulated scan work
|
|
|
|
// credit to gcController.bgScanCredit whenever gcw's local scan work
|
|
|
|
// credit exceeds flushScanCredit.
|
2015-02-19 11:38:46 -07:00
|
|
|
//go:nowritebarrier
|
2015-03-13 11:29:23 -06:00
|
|
|
func gcDrain(gcw *gcWork, flushScanCredit int64) {
|
2015-02-19 11:38:46 -07:00
|
|
|
if gcphase != _GCmark && gcphase != _GCmarktermination {
|
|
|
|
throw("scanblock phase incorrect")
|
|
|
|
}
|
|
|
|
|
2015-03-13 11:29:23 -06:00
|
|
|
var lastScanFlush, nextScanFlush int64
|
|
|
|
if flushScanCredit != -1 {
|
|
|
|
lastScanFlush = gcw.scanWork
|
|
|
|
nextScanFlush = lastScanFlush + flushScanCredit
|
|
|
|
} else {
|
|
|
|
nextScanFlush = int64(^uint64(0) >> 1)
|
|
|
|
}
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
for {
|
|
|
|
// If another proc wants a pointer, give it some.
|
|
|
|
if work.nwait > 0 && work.full == 0 {
|
|
|
|
gcw.balance()
|
|
|
|
}
|
|
|
|
|
|
|
|
b := gcw.get()
|
|
|
|
if b == 0 {
|
|
|
|
// work barrier reached
|
|
|
|
break
|
|
|
|
}
|
|
|
|
// If the current wbuf is filled by the scan a new wbuf might be
|
|
|
|
// returned that could possibly hold only a single object. This
|
|
|
|
// could result in each iteration draining only a single object
|
|
|
|
// out of the wbuf passed in + a single object placed
|
|
|
|
// into an empty wbuf in scanobject so there could be
|
|
|
|
// a performance hit as we keep fetching fresh wbufs.
|
2015-03-12 11:09:30 -06:00
|
|
|
scanobject(b, 0, nil, gcw)
|
2015-03-13 11:29:23 -06:00
|
|
|
|
|
|
|
// Flush background scan work credit to the global
|
|
|
|
// account if we've accumulated enough locally so
|
|
|
|
// mutator assists can draw on it.
|
|
|
|
if gcw.scanWork >= nextScanFlush {
|
|
|
|
credit := gcw.scanWork - lastScanFlush
|
|
|
|
xaddint64(&gcController.bgScanCredit, credit)
|
|
|
|
lastScanFlush = gcw.scanWork
|
|
|
|
nextScanFlush = lastScanFlush + flushScanCredit
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if flushScanCredit != -1 {
|
|
|
|
credit := gcw.scanWork - lastScanFlush
|
|
|
|
xaddint64(&gcController.bgScanCredit, credit)
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
checknocurrentwbuf()
|
|
|
|
}
|
|
|
|
|
runtime: multi-threaded, utilization-scheduled background mark
Currently, the concurrent mark phase is performed by the main GC
goroutine. Prior to the previous commit enabling preemption, this
caused marking to always consume 1/GOMAXPROCS of the available CPU
time. If GOMAXPROCS=1, this meant background GC would consume 100% of
the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use
less than the goal of 25%. If GOMAXPROCS=4, background GC would use
the goal 25%, but if the mutator wasn't using the remaining 75%,
background marking wouldn't take advantage of the idle time. Enabling
preemption in the previous commit made GC miss CPU targets in
completely different ways, but set us up to bring everything back in
line.
This change replaces the fixed GC goroutine with per-P background mark
goroutines. Once started, these goroutines don't go in the standard
run queues; instead, they are scheduled specially such that the time
spent in mutator assists and the background mark goroutines totals 25%
of the CPU time available to the program. Furthermore, this lets
background marking take advantage of idle Ps, which significantly
boosts GC performance for applications that under-utilize the CPU.
This requires also changing how time is reported for gctrace, so this
change splits the concurrent mark CPU time into assist/background/idle
scanning.
This also requires increasing the size of the StackRecord slice used
in a GoroutineProfile test.
Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157
Reviewed-on: https://go-review.googlesource.com/8850
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
|
|
|
// gcDrainUntilPreempt blackens grey objects until g.preempt is set.
|
|
|
|
// This is best-effort, so it will return as soon as it is unable to
|
|
|
|
// get work, even though there may be more work in the system.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func gcDrainUntilPreempt(gcw *gcWork, flushScanCredit int64) {
|
|
|
|
if gcphase != _GCmark {
|
|
|
|
println("gcphase =", gcphase)
|
|
|
|
throw("gcDrainUntilPreempt phase incorrect")
|
|
|
|
}
|
|
|
|
|
|
|
|
var lastScanFlush, nextScanFlush int64
|
|
|
|
if flushScanCredit != -1 {
|
|
|
|
lastScanFlush = gcw.scanWork
|
|
|
|
nextScanFlush = lastScanFlush + flushScanCredit
|
|
|
|
} else {
|
|
|
|
nextScanFlush = int64(^uint64(0) >> 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
gp := getg()
|
|
|
|
for !gp.preempt {
|
|
|
|
// If the work queue is empty, balance. During
|
|
|
|
// concurrent mark we don't really know if anyone else
|
|
|
|
// can make use of this work, but even if we're the
|
|
|
|
// only worker, the total cost of this per cycle is
|
|
|
|
// only O(_WorkbufSize) pointer copies.
|
|
|
|
if work.full == 0 && work.partial == 0 {
|
|
|
|
gcw.balance()
|
|
|
|
}
|
|
|
|
|
|
|
|
b := gcw.tryGet()
|
|
|
|
if b == 0 {
|
|
|
|
// No more work
|
|
|
|
break
|
|
|
|
}
|
|
|
|
scanobject(b, 0, nil, gcw)
|
|
|
|
|
|
|
|
// Flush background scan work credit to the global
|
|
|
|
// account if we've accumulated enough locally so
|
|
|
|
// mutator assists can draw on it.
|
|
|
|
if gcw.scanWork >= nextScanFlush {
|
|
|
|
credit := gcw.scanWork - lastScanFlush
|
|
|
|
xaddint64(&gcController.bgScanCredit, credit)
|
|
|
|
lastScanFlush = gcw.scanWork
|
|
|
|
nextScanFlush = lastScanFlush + flushScanCredit
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if flushScanCredit != -1 {
|
|
|
|
credit := gcw.scanWork - lastScanFlush
|
|
|
|
xaddint64(&gcController.bgScanCredit, credit)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-13 12:01:16 -06:00
|
|
|
// gcDrainN blackens grey objects until it has performed roughly
|
|
|
|
// scanWork units of scan work. This is best-effort, so it may perform
|
|
|
|
// less work if it fails to get a work buffer. Otherwise, it will
|
|
|
|
// perform at least n units of work, but may perform more because
|
|
|
|
// scanning is always done in whole object increments.
|
2015-02-19 11:38:46 -07:00
|
|
|
//go:nowritebarrier
|
2015-03-13 12:01:16 -06:00
|
|
|
func gcDrainN(gcw *gcWork, scanWork int64) {
|
2015-02-19 11:38:46 -07:00
|
|
|
checknocurrentwbuf()
|
2015-03-13 12:01:16 -06:00
|
|
|
targetScanWork := gcw.scanWork + scanWork
|
|
|
|
for gcw.scanWork < targetScanWork {
|
2015-02-19 11:38:46 -07:00
|
|
|
// This might be a good place to add prefetch code...
|
|
|
|
// if(wbuf.nobj > 4) {
|
|
|
|
// PREFETCH(wbuf->obj[wbuf.nobj - 3];
|
|
|
|
// }
|
|
|
|
b := gcw.tryGet()
|
|
|
|
if b == 0 {
|
|
|
|
return
|
|
|
|
}
|
2015-03-12 11:09:30 -06:00
|
|
|
scanobject(b, 0, nil, gcw)
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// scanblock scans b as scanobject would.
|
|
|
|
// If the gcphase is GCscan, scanblock performs additional checks.
|
|
|
|
//go:nowritebarrier
|
2015-03-12 11:09:30 -06:00
|
|
|
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
|
2015-02-19 11:38:46 -07:00
|
|
|
// Use local copies of original parameters, so that a stack trace
|
|
|
|
// due to one of the throws below shows the original block
|
|
|
|
// base and extent.
|
|
|
|
b := b0
|
|
|
|
n := n0
|
|
|
|
|
|
|
|
// ptrmask can have 2 possible values:
|
|
|
|
// 1. nil - obtain pointer mask from GC bitmap.
|
|
|
|
// 2. pointer to a compact mask (for stacks and data).
|
|
|
|
|
|
|
|
scanobject(b, n, ptrmask, gcw)
|
|
|
|
if gcphase == _GCscan {
|
|
|
|
if inheap(b) && ptrmask == nil {
|
|
|
|
// b is in heap, we are in GCscan so there should be a ptrmask.
|
|
|
|
throw("scanblock: In GCscan phase and inheap is true.")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-03 14:55:14 -07:00
|
|
|
// Scan the object b of size n bytes, adding pointers to wbuf.
|
2015-02-19 11:38:46 -07:00
|
|
|
// If ptrmask != nil, it specifies where pointers are in b.
|
|
|
|
// If ptrmask == nil, the GC bitmap should be consulted.
|
|
|
|
// In this case, n may be an overestimate of the size; the GC bitmap
|
|
|
|
// must also be used to make sure the scan stops at the end of b.
|
|
|
|
//go:nowritebarrier
|
2015-03-12 11:09:30 -06:00
|
|
|
func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWork) {
|
2015-02-19 11:38:46 -07:00
|
|
|
arena_start := mheap_.arena_start
|
|
|
|
arena_used := mheap_.arena_used
|
2015-03-12 10:08:47 -06:00
|
|
|
scanWork := int64(0)
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Find bits of the beginning of the object.
|
|
|
|
var hbits heapBits
|
2015-03-03 14:55:14 -07:00
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
if ptrmask == nil {
|
2015-03-03 14:55:14 -07:00
|
|
|
var s *mspan
|
|
|
|
b, hbits, s = heapBitsForObject(b)
|
2015-02-19 11:38:46 -07:00
|
|
|
if b == 0 {
|
|
|
|
return
|
|
|
|
}
|
2015-03-03 14:55:14 -07:00
|
|
|
n = s.elemsize
|
2015-02-19 11:38:46 -07:00
|
|
|
if n == 0 {
|
2015-03-03 14:55:14 -07:00
|
|
|
throw("scanobject n == 0")
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
for i := uintptr(0); i < n; i += ptrSize {
|
|
|
|
// Find bits for this word.
|
|
|
|
var bits uintptr
|
|
|
|
if ptrmask != nil {
|
|
|
|
// dense mask (stack or data)
|
|
|
|
bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
|
|
|
|
} else {
|
2015-04-22 13:06:35 -06:00
|
|
|
if i != 0 {
|
|
|
|
// Avoid needless hbits.next() on last iteration.
|
|
|
|
hbits = hbits.next()
|
|
|
|
}
|
2015-02-19 11:38:46 -07:00
|
|
|
bits = uintptr(hbits.typeBits())
|
2015-03-03 14:55:14 -07:00
|
|
|
if bits == typeDead {
|
|
|
|
break // no more pointers in this object
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if bits&typePointer != typePointer {
|
2015-02-19 14:43:27 -07:00
|
|
|
print("gc useCheckmark=", useCheckmark, " b=", hex(b), " ptrmask=", ptrmask, "\n")
|
2015-02-19 11:38:46 -07:00
|
|
|
throw("unexpected garbage collection bits")
|
|
|
|
}
|
|
|
|
|
|
|
|
obj := *(*uintptr)(unsafe.Pointer(b + i))
|
|
|
|
|
2015-03-12 10:08:47 -06:00
|
|
|
// Track the scan work performed as a way to estimate
|
|
|
|
// GC time. We use the number of pointers scanned
|
|
|
|
// because pointer scanning dominates the cost of
|
|
|
|
// scanning.
|
|
|
|
//
|
|
|
|
// TODO(austin): Consider counting only pointers into
|
|
|
|
// the heap, since nil and non-heap pointers are
|
|
|
|
// probably cheap to scan.
|
|
|
|
scanWork++
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
// At this point we have extracted the next potential pointer.
|
|
|
|
// Check if it points into heap.
|
|
|
|
if obj == 0 || obj < arena_start || obj >= arena_used {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-02-19 14:43:27 -07:00
|
|
|
if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark {
|
2015-02-19 11:38:46 -07:00
|
|
|
checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark the object.
|
2015-03-12 14:53:57 -06:00
|
|
|
if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
|
|
|
|
greyobject(obj, b, i, hbits, span, gcw)
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
}
|
2015-03-12 14:53:57 -06:00
|
|
|
gcw.bytesMarked += uint64(n)
|
2015-03-12 10:08:47 -06:00
|
|
|
gcw.scanWork += scanWork
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Shade the object if it isn't already.
|
|
|
|
// The object is not nil and known to be in the heap.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func shade(b uintptr) {
|
2015-03-12 14:53:57 -06:00
|
|
|
if obj, hbits, span := heapBitsForObject(b); obj != 0 {
|
2015-02-19 11:38:46 -07:00
|
|
|
// TODO: this would be a great place to put a check to see
|
|
|
|
// if we are harvesting and if we are then we should
|
|
|
|
// figure out why there is a call to shade when the
|
|
|
|
// harvester thinks we are in a STW.
|
|
|
|
// if atomicload(&harvestingwbufs) == uint32(1) {
|
|
|
|
// // Throw here to discover write barriers
|
|
|
|
// // being executed during a STW.
|
|
|
|
// throw("shade during harvest")
|
|
|
|
// }
|
|
|
|
|
2015-03-12 11:09:30 -06:00
|
|
|
var gcw gcWork
|
2015-03-12 14:53:57 -06:00
|
|
|
greyobject(obj, 0, 0, hbits, span, &gcw)
|
2015-02-19 11:38:46 -07:00
|
|
|
// This is part of the write barrier so put the wbuf back.
|
|
|
|
if gcphase == _GCmarktermination {
|
|
|
|
gcw.dispose()
|
|
|
|
} else {
|
|
|
|
// If we added any pointers to the gcw, then
|
|
|
|
// currentwbuf must be nil because 1)
|
|
|
|
// greyobject got its wbuf from currentwbuf
|
|
|
|
// and 2) shade runs on the systemstack, so
|
|
|
|
// we're still on the same M. If either of
|
|
|
|
// these becomes no longer true, we need to
|
|
|
|
// rethink this.
|
|
|
|
gcw.disposeToCache()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// obj is the start of an object with mark mbits.
|
|
|
|
// If it isn't already marked, mark it and enqueue into workbuf.
|
|
|
|
// Return possibly new workbuf to use.
|
|
|
|
// base and off are for debugging only and could be removed.
|
|
|
|
//go:nowritebarrier
|
2015-03-12 14:53:57 -06:00
|
|
|
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) {
|
2015-02-19 11:38:46 -07:00
|
|
|
// obj should be start of allocation, and so must be at least pointer-aligned.
|
|
|
|
if obj&(ptrSize-1) != 0 {
|
|
|
|
throw("greyobject: obj not pointer-aligned")
|
|
|
|
}
|
|
|
|
|
2015-02-19 14:43:27 -07:00
|
|
|
if useCheckmark {
|
2015-02-19 11:38:46 -07:00
|
|
|
if !hbits.isMarked() {
|
2015-03-12 12:26:04 -06:00
|
|
|
printlock()
|
2015-02-19 11:38:46 -07:00
|
|
|
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
|
|
|
|
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
|
|
|
|
|
|
|
|
// Dump the source (base) object
|
2015-03-12 12:26:04 -06:00
|
|
|
gcDumpObject("base", base, off)
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
// Dump the object
|
2015-03-12 12:26:04 -06:00
|
|
|
gcDumpObject("obj", obj, ^uintptr(0))
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
throw("checkmark found unmarked object")
|
|
|
|
}
|
|
|
|
if !hbits.isCheckmarked() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
hbits.setCheckmarked()
|
|
|
|
if !hbits.isCheckmarked() {
|
|
|
|
throw("setCheckmarked and isCheckmarked disagree")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// If marked we have nothing to do.
|
|
|
|
if hbits.isMarked() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
hbits.setMarked()
|
|
|
|
|
2015-02-27 10:41:20 -07:00
|
|
|
// If this is a noscan object, fast-track it to black
|
|
|
|
// instead of greying it.
|
|
|
|
if hbits.typeBits() == typeDead {
|
2015-03-12 14:53:57 -06:00
|
|
|
gcw.bytesMarked += uint64(span.elemsize)
|
2015-02-27 10:41:20 -07:00
|
|
|
return
|
|
|
|
}
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
|
|
|
|
// seems like a nice optimization that can be added back in.
|
|
|
|
// There needs to be time between the PREFETCH and the use.
|
|
|
|
// Previously we put the obj in an 8 element buffer that is drained at a rate
|
|
|
|
// to give the PREFETCH time to do its work.
|
|
|
|
// Use of PREFETCHNTA might be more appropriate than PREFETCH
|
|
|
|
|
|
|
|
gcw.put(obj)
|
|
|
|
}
|
|
|
|
|
2015-03-12 12:26:04 -06:00
|
|
|
// gcDumpObject dumps the contents of obj for debugging and marks the
|
|
|
|
// field at byte offset off in obj.
|
|
|
|
func gcDumpObject(label string, obj, off uintptr) {
|
|
|
|
k := obj >> _PageShift
|
|
|
|
x := k
|
|
|
|
x -= mheap_.arena_start >> _PageShift
|
|
|
|
s := h_spans[x]
|
|
|
|
print(label, "=", hex(obj), " k=", hex(k))
|
|
|
|
if s == nil {
|
|
|
|
print(" s=nil\n")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
|
|
|
|
for i := uintptr(0); i < s.elemsize; i += ptrSize {
|
|
|
|
print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)))))
|
|
|
|
if i == off {
|
|
|
|
print(" <==")
|
|
|
|
}
|
|
|
|
print("\n")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-19 11:38:46 -07:00
|
|
|
// When in GCmarkterminate phase we allocate black.
|
|
|
|
//go:nowritebarrier
|
2015-03-12 14:53:57 -06:00
|
|
|
func gcmarknewobject_m(obj, size uintptr) {
|
2015-02-19 11:38:46 -07:00
|
|
|
if gcphase != _GCmarktermination {
|
|
|
|
throw("marking new object while not in mark termination phase")
|
|
|
|
}
|
2015-02-19 14:43:27 -07:00
|
|
|
if useCheckmark { // The world should be stopped so this should not happen.
|
2015-02-19 11:38:46 -07:00
|
|
|
throw("gcmarknewobject called while doing checkmark")
|
|
|
|
}
|
|
|
|
|
|
|
|
heapBitsForAddr(obj).setMarked()
|
2015-03-12 14:53:57 -06:00
|
|
|
xadd64(&work.bytesMarked, int64(size))
|
2015-02-19 11:38:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Checkmarking
|
|
|
|
|
|
|
|
// To help debug the concurrent GC we remark with the world
|
|
|
|
// stopped ensuring that any object encountered has their normal
|
|
|
|
// mark bit set. To do this we use an orthogonal bit
|
|
|
|
// pattern to indicate the object is marked. The following pattern
|
|
|
|
// uses the upper two bits in the object's bounday nibble.
|
|
|
|
// 01: scalar not marked
|
|
|
|
// 10: pointer not marked
|
|
|
|
// 11: pointer marked
|
|
|
|
// 00: scalar marked
|
|
|
|
// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
|
|
|
|
// The higher bit is 1 for pointers and 0 for scalars, whether the object
|
|
|
|
// is marked or not.
|
|
|
|
// The first nibble no longer holds the typeDead pattern indicating that the
|
|
|
|
// there are no more pointers in the object. This information is held
|
|
|
|
// in the second nibble.
|
|
|
|
|
2015-02-19 14:43:27 -07:00
|
|
|
// If useCheckmark is true, marking of an object uses the
|
|
|
|
// checkmark bits (encoding above) instead of the standard
|
|
|
|
// mark bits.
|
|
|
|
var useCheckmark = false
|
2015-02-19 11:38:46 -07:00
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func initCheckmarks() {
|
2015-02-19 14:43:27 -07:00
|
|
|
useCheckmark = true
|
2015-02-19 11:38:46 -07:00
|
|
|
for _, s := range work.spans {
|
|
|
|
if s.state == _MSpanInUse {
|
|
|
|
heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func clearCheckmarks() {
|
2015-02-19 14:43:27 -07:00
|
|
|
useCheckmark = false
|
2015-02-19 11:38:46 -07:00
|
|
|
for _, s := range work.spans {
|
|
|
|
if s.state == _MSpanInUse {
|
|
|
|
heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|