1
0
mirror of https://github.com/golang/go synced 2024-11-19 22:24:49 -07:00
go/src/runtime/mgcmark.go

817 lines
24 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector: marking and scanning
package runtime
import "unsafe"
// Scan all of the stacks, greying (or graying if in America) the referents
// but not blackening them since the mark write barrier isn't installed.
//go:nowritebarrier
func gcscan_m() {
_g_ := getg()
// Grab the g that called us and potentially allow rescheduling.
// This allows it to be scanned like other goroutines.
mastergp := _g_.m.curg
casgstatus(mastergp, _Grunning, _Gwaiting)
mastergp.waitreason = "garbage collection scan"
// Span sweeping has been done by finishsweep_m.
// Long term we will want to make this goroutine runnable
// by placing it onto a scanenqueue state and then calling
// runtime·restartg(mastergp) to make it Grunnable.
// At the bottom we will want to return this p back to the scheduler.
// Prepare flag indicating that the scan has not been completed.
local_allglen := gcResetGState()
work.nwait = 0
work.ndone = 0
work.nproc = 1 // For now do not do this in parallel.
// ackgcphase is not needed since we are not scanning running goroutines.
parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
parfordo(work.markfor)
lock(&allglock)
// Check that gc work is done.
for i := 0; i < local_allglen; i++ {
gp := allgs[i]
if !gp.gcworkdone {
throw("scan missed a g")
}
}
unlock(&allglock)
casgstatus(mastergp, _Gwaiting, _Grunning)
// Let the g that called us continue to run.
}
// ptrmask for an allocation containing a single pointer.
var oneptr = [...]uint8{typePointer}
//go:nowritebarrier
func markroot(desc *parfor, i uint32) {
var gcw gcWork
// Note: if you add a case here, please also update heapdump.go:dumproots.
switch i {
case _RootData:
for datap := &firstmoduledata; datap != nil; datap = datap.next {
scanblock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, &gcw)
}
case _RootBss:
for datap := &firstmoduledata; datap != nil; datap = datap.next {
scanblock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, &gcw)
}
case _RootFinalizers:
for fb := allfin; fb != nil; fb = fb.alllink {
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
}
case _RootSpans:
// mark MSpan.specials
sg := mheap_.sweepgen
for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
s := work.spans[spanidx]
if s.state != mSpanInUse {
continue
}
if !useCheckmark && s.sweepgen != sg {
// sweepgen was updated (+2) during non-checkmark GC pass
print("sweep ", s.sweepgen, " ", sg, "\n")
throw("gc: unswept span")
}
for sp := s.specials; sp != nil; sp = sp.next {
if sp.kind != _KindSpecialFinalizer {
continue
}
// don't mark finalized object, but scan it so we
// retain everything it points to.
spf := (*specialfinalizer)(unsafe.Pointer(sp))
// A finalizer can be set for an inner byte of an object, find object beginning.
p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
if gcphase != _GCscan {
scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase
}
scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw)
}
}
case _RootFlushCaches:
if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
flushallmcaches()
}
default:
// the rest is scanning goroutine stacks
if uintptr(i-_RootCount) >= allglen {
throw("markroot: bad index")
}
gp := allgs[i-_RootCount]
// remember when we've first observed the G blocked
// needed only to output in traceback
status := readgstatus(gp) // We are not in a scan state
if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
gp.waitsince = work.tstart
}
// Shrink a stack if not much of it is being used but not in the scan phase.
if gcphase == _GCmarktermination {
// Shrink during STW GCmarktermination phase thus avoiding
// complications introduced by shrinking during
// non-STW phases.
shrinkstack(gp)
}
if readgstatus(gp) == _Gdead {
gp.gcworkdone = true
} else {
gp.gcworkdone = false
}
restart := stopg(gp)
// goroutine will scan its own stack when it stops running.
// Wait until it has.
for readgstatus(gp) == _Grunning && !gp.gcworkdone {
}
// scanstack(gp) is done as part of gcphasework
// But to make sure we finished we need to make sure that
// the stack traps have all responded so drop into
// this while loop until they respond.
for !gp.gcworkdone {
status = readgstatus(gp)
if status == _Gdead {
gp.gcworkdone = true // scan is a noop
break
}
if status == _Gwaiting || status == _Grunnable {
restart = stopg(gp)
}
}
if restart {
restartg(gp)
}
}
// Root aren't part of the heap, so don't count them toward
// marked heap bytes.
gcw.bytesMarked = 0
gcw.scanWork = 0
gcw.dispose()
}
// gcAssistAlloc records and allocation of size bytes and, if
// allowAssist is true, may assist GC scanning in proportion to the
// allocations performed by this mutator since the last assist.
//
// It should only be called during gcphase == _GCmark.
//go:nowritebarrier
func gcAssistAlloc(size uintptr, allowAssist bool) {
// Find the G responsible for this assist.
gp := getg()
if gp.m.curg != nil {
gp = gp.m.curg
}
// Record allocation.
gp.gcalloc += size
if !allowAssist {
return
}
// Compute the amount of assist scan work we need to do.
scanWork := int64(gcController.assistRatio*float64(gp.gcalloc)) - gp.gcscanwork
// scanWork can be negative if the last assist scanned a large
// object and we're still ahead of our assist goal.
if scanWork <= 0 {
return
}
// Steal as much credit as we can from the background GC's
// scan credit. This is racy and may drop the background
// credit below 0 if two mutators steal at the same time. This
// will just cause steals to fail until credit is accumulated
// again, so in the long run it doesn't really matter, but we
// do have to handle the negative credit case.
bgScanCredit := atomicloadint64(&gcController.bgScanCredit)
stolen := int64(0)
if bgScanCredit > 0 {
if bgScanCredit < scanWork {
stolen = bgScanCredit
} else {
stolen = scanWork
}
xaddint64(&gcController.bgScanCredit, -scanWork)
scanWork -= stolen
gp.gcscanwork += stolen
if scanWork == 0 {
return
}
}
// Perform assist work
systemstack(func() {
// Track time spent in this assist. Since we're on the
// system stack, this is non-preemptible, so we can
// just measure start and end time.
startTime := nanotime()
runtime: multi-threaded, utilization-scheduled background mark Currently, the concurrent mark phase is performed by the main GC goroutine. Prior to the previous commit enabling preemption, this caused marking to always consume 1/GOMAXPROCS of the available CPU time. If GOMAXPROCS=1, this meant background GC would consume 100% of the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use less than the goal of 25%. If GOMAXPROCS=4, background GC would use the goal 25%, but if the mutator wasn't using the remaining 75%, background marking wouldn't take advantage of the idle time. Enabling preemption in the previous commit made GC miss CPU targets in completely different ways, but set us up to bring everything back in line. This change replaces the fixed GC goroutine with per-P background mark goroutines. Once started, these goroutines don't go in the standard run queues; instead, they are scheduled specially such that the time spent in mutator assists and the background mark goroutines totals 25% of the CPU time available to the program. Furthermore, this lets background marking take advantage of idle Ps, which significantly boosts GC performance for applications that under-utilize the CPU. This requires also changing how time is reported for gctrace, so this change splits the concurrent mark CPU time into assist/background/idle scanning. This also requires increasing the size of the StackRecord slice used in a GoroutineProfile test. Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157 Reviewed-on: https://go-review.googlesource.com/8850 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
xadd(&work.nwait, -1)
// drain own current wbuf first in the hopes that it
// will be more cache friendly.
var gcw gcWork
gcw.initFromCache()
startScanWork := gcw.scanWork
gcDrainN(&gcw, scanWork)
// Record that we did this much scan work.
gp.gcscanwork += gcw.scanWork - startScanWork
// TODO(austin): This is the vast majority of our
// disposes. Instead of constantly disposing, keep a
// per-P gcWork cache (probably combined with the
// write barrier wbuf cache).
gcw.dispose()
runtime: multi-threaded, utilization-scheduled background mark Currently, the concurrent mark phase is performed by the main GC goroutine. Prior to the previous commit enabling preemption, this caused marking to always consume 1/GOMAXPROCS of the available CPU time. If GOMAXPROCS=1, this meant background GC would consume 100% of the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use less than the goal of 25%. If GOMAXPROCS=4, background GC would use the goal 25%, but if the mutator wasn't using the remaining 75%, background marking wouldn't take advantage of the idle time. Enabling preemption in the previous commit made GC miss CPU targets in completely different ways, but set us up to bring everything back in line. This change replaces the fixed GC goroutine with per-P background mark goroutines. Once started, these goroutines don't go in the standard run queues; instead, they are scheduled specially such that the time spent in mutator assists and the background mark goroutines totals 25% of the CPU time available to the program. Furthermore, this lets background marking take advantage of idle Ps, which significantly boosts GC performance for applications that under-utilize the CPU. This requires also changing how time is reported for gctrace, so this change splits the concurrent mark CPU time into assist/background/idle scanning. This also requires increasing the size of the StackRecord slice used in a GoroutineProfile test. Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157 Reviewed-on: https://go-review.googlesource.com/8850 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
// If this is the last worker and we ran out of work,
// signal a completion point.
if xadd(&work.nwait, +1) == work.nproc && work.full == 0 && work.partial == 0 {
// This has reached a background completion
// point. Is it the first this cycle?
if cas(&work.bgMarkDone, 0, 1) {
notewakeup(&work.bgMarkNote)
}
}
duration := nanotime() - startTime
_p_ := gp.m.p.ptr()
_p_.gcAssistTime += duration
if _p_.gcAssistTime > gcAssistTimeSlack {
xaddint64(&gcController.assistTime, _p_.gcAssistTime)
_p_.gcAssistTime = 0
}
})
}
// The gp has been moved to a GC safepoint. GC phase specific
// work is done here.
//go:nowritebarrier
func gcphasework(gp *g) {
switch gcphase {
default:
throw("gcphasework in bad gcphase")
case _GCoff, _GCquiesce, _GCstw, _GCsweep:
// No work.
case _GCscan:
// scan the stack, mark the objects, put pointers in work buffers
// hanging off the P where this is being run.
// Indicate that the scan is valid until the goroutine runs again
scanstack(gp)
case _GCmark:
// No work.
case _GCmarktermination:
scanstack(gp)
// All available mark work will be emptied before returning.
}
gp.gcworkdone = true
}
//go:nowritebarrier
func scanstack(gp *g) {
if gp.gcscanvalid {
return
}
if readgstatus(gp)&_Gscan == 0 {
print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
throw("scanstack - bad status")
}
switch readgstatus(gp) &^ _Gscan {
default:
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
throw("mark - bad status")
case _Gdead:
return
case _Grunning:
print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
throw("scanstack: goroutine not stopped")
case _Grunnable, _Gsyscall, _Gwaiting:
// ok
}
if gp == getg() {
throw("can't scan our own stack")
}
mp := gp.m
if mp != nil && mp.helpgc != 0 {
throw("can't scan gchelper stack")
}
var gcw gcWork
gcw.initFromCache()
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
// Pick up gcw as free variable so gentraceback and friends can
// keep the same signature.
scanframeworker(frame, unused, &gcw)
return true
}
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
tracebackdefers(gp, scanframe, nil)
// Stacks aren't part of the heap, so don't count them toward
// marked heap bytes.
gcw.bytesMarked = 0
gcw.scanWork = 0
gcw.disposeToCache()
gp.gcscanvalid = true
}
// Scan a stack frame: local variables and function arguments/results.
//go:nowritebarrier
func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWork) {
f := frame.fn
targetpc := frame.continpc
if targetpc == 0 {
// Frame is dead.
return
}
if _DebugGC > 1 {
print("scanframe ", funcname(f), "\n")
}
if targetpc != f.entry {
targetpc--
}
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
if pcdata == -1 {
// We do not have a valid pcdata value but there might be a
// stackmap for this function. It is likely that we are looking
// at the function prologue, assume so and hope for the best.
pcdata = 0
}
// Scan local variables if stack frame has been allocated.
size := frame.varp - frame.sp
var minsize uintptr
switch thechar {
case '6', '8':
minsize = 0
case '7':
minsize = spAlign
default:
minsize = ptrSize
}
if size > minsize {
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
throw("missing stackmap")
}
// Locals bitmap information, scan just the pointers in locals.
if pcdata < 0 || pcdata >= stkmap.n {
// don't know where we are
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
throw("scanframe: bad symbol table")
}
bv := stackmapdata(stkmap, pcdata)
size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
scanblock(frame.varp-size, size, bv.bytedata, gcw)
}
// Scan arguments.
if frame.arglen > 0 {
var bv bitvector
if frame.argmap != nil {
bv = *frame.argmap
} else {
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
throw("missing stackmap")
}
if pcdata < 0 || pcdata >= stkmap.n {
// don't know where we are
print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
throw("scanframe: bad symbol table")
}
bv = stackmapdata(stkmap, pcdata)
}
scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw)
}
}
runtime: multi-threaded, utilization-scheduled background mark Currently, the concurrent mark phase is performed by the main GC goroutine. Prior to the previous commit enabling preemption, this caused marking to always consume 1/GOMAXPROCS of the available CPU time. If GOMAXPROCS=1, this meant background GC would consume 100% of the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use less than the goal of 25%. If GOMAXPROCS=4, background GC would use the goal 25%, but if the mutator wasn't using the remaining 75%, background marking wouldn't take advantage of the idle time. Enabling preemption in the previous commit made GC miss CPU targets in completely different ways, but set us up to bring everything back in line. This change replaces the fixed GC goroutine with per-P background mark goroutines. Once started, these goroutines don't go in the standard run queues; instead, they are scheduled specially such that the time spent in mutator assists and the background mark goroutines totals 25% of the CPU time available to the program. Furthermore, this lets background marking take advantage of idle Ps, which significantly boosts GC performance for applications that under-utilize the CPU. This requires also changing how time is reported for gctrace, so this change splits the concurrent mark CPU time into assist/background/idle scanning. This also requires increasing the size of the StackRecord slice used in a GoroutineProfile test. Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157 Reviewed-on: https://go-review.googlesource.com/8850 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
// TODO(austin): Can we consolidate the gcDrain* functions?
// gcDrain scans objects in work buffers, blackening grey
// objects until all work buffers have been drained.
// If flushScanCredit != -1, gcDrain flushes accumulated scan work
// credit to gcController.bgScanCredit whenever gcw's local scan work
// credit exceeds flushScanCredit.
//go:nowritebarrier
func gcDrain(gcw *gcWork, flushScanCredit int64) {
if gcphase != _GCmark && gcphase != _GCmarktermination {
throw("scanblock phase incorrect")
}
var lastScanFlush, nextScanFlush int64
if flushScanCredit != -1 {
lastScanFlush = gcw.scanWork
nextScanFlush = lastScanFlush + flushScanCredit
} else {
nextScanFlush = int64(^uint64(0) >> 1)
}
for {
// If another proc wants a pointer, give it some.
if work.nwait > 0 && work.full == 0 {
gcw.balance()
}
b := gcw.get()
if b == 0 {
// work barrier reached
break
}
// If the current wbuf is filled by the scan a new wbuf might be
// returned that could possibly hold only a single object. This
// could result in each iteration draining only a single object
// out of the wbuf passed in + a single object placed
// into an empty wbuf in scanobject so there could be
// a performance hit as we keep fetching fresh wbufs.
scanobject(b, 0, nil, gcw)
// Flush background scan work credit to the global
// account if we've accumulated enough locally so
// mutator assists can draw on it.
if gcw.scanWork >= nextScanFlush {
credit := gcw.scanWork - lastScanFlush
xaddint64(&gcController.bgScanCredit, credit)
lastScanFlush = gcw.scanWork
nextScanFlush = lastScanFlush + flushScanCredit
}
}
if flushScanCredit != -1 {
credit := gcw.scanWork - lastScanFlush
xaddint64(&gcController.bgScanCredit, credit)
}
checknocurrentwbuf()
}
runtime: multi-threaded, utilization-scheduled background mark Currently, the concurrent mark phase is performed by the main GC goroutine. Prior to the previous commit enabling preemption, this caused marking to always consume 1/GOMAXPROCS of the available CPU time. If GOMAXPROCS=1, this meant background GC would consume 100% of the CPU (effectively a STW). If GOMAXPROCS>4, background GC would use less than the goal of 25%. If GOMAXPROCS=4, background GC would use the goal 25%, but if the mutator wasn't using the remaining 75%, background marking wouldn't take advantage of the idle time. Enabling preemption in the previous commit made GC miss CPU targets in completely different ways, but set us up to bring everything back in line. This change replaces the fixed GC goroutine with per-P background mark goroutines. Once started, these goroutines don't go in the standard run queues; instead, they are scheduled specially such that the time spent in mutator assists and the background mark goroutines totals 25% of the CPU time available to the program. Furthermore, this lets background marking take advantage of idle Ps, which significantly boosts GC performance for applications that under-utilize the CPU. This requires also changing how time is reported for gctrace, so this change splits the concurrent mark CPU time into assist/background/idle scanning. This also requires increasing the size of the StackRecord slice used in a GoroutineProfile test. Change-Id: I0936ff907d2cee6cb687a208f2df47e8988e3157 Reviewed-on: https://go-review.googlesource.com/8850 Reviewed-by: Rick Hudson <rlh@golang.org>
2015-03-23 19:07:33 -06:00
// gcDrainUntilPreempt blackens grey objects until g.preempt is set.
// This is best-effort, so it will return as soon as it is unable to
// get work, even though there may be more work in the system.
//go:nowritebarrier
func gcDrainUntilPreempt(gcw *gcWork, flushScanCredit int64) {
if gcphase != _GCmark {
println("gcphase =", gcphase)
throw("gcDrainUntilPreempt phase incorrect")
}
var lastScanFlush, nextScanFlush int64
if flushScanCredit != -1 {
lastScanFlush = gcw.scanWork
nextScanFlush = lastScanFlush + flushScanCredit
} else {
nextScanFlush = int64(^uint64(0) >> 1)
}
gp := getg()
for !gp.preempt {
// If the work queue is empty, balance. During
// concurrent mark we don't really know if anyone else
// can make use of this work, but even if we're the
// only worker, the total cost of this per cycle is
// only O(_WorkbufSize) pointer copies.
if work.full == 0 && work.partial == 0 {
gcw.balance()
}
b := gcw.tryGet()
if b == 0 {
// No more work
break
}
scanobject(b, 0, nil, gcw)
// Flush background scan work credit to the global
// account if we've accumulated enough locally so
// mutator assists can draw on it.
if gcw.scanWork >= nextScanFlush {
credit := gcw.scanWork - lastScanFlush
xaddint64(&gcController.bgScanCredit, credit)
lastScanFlush = gcw.scanWork
nextScanFlush = lastScanFlush + flushScanCredit
}
}
if flushScanCredit != -1 {
credit := gcw.scanWork - lastScanFlush
xaddint64(&gcController.bgScanCredit, credit)
}
}
// gcDrainN blackens grey objects until it has performed roughly
// scanWork units of scan work. This is best-effort, so it may perform
// less work if it fails to get a work buffer. Otherwise, it will
// perform at least n units of work, but may perform more because
// scanning is always done in whole object increments.
//go:nowritebarrier
func gcDrainN(gcw *gcWork, scanWork int64) {
checknocurrentwbuf()
targetScanWork := gcw.scanWork + scanWork
for gcw.scanWork < targetScanWork {
// This might be a good place to add prefetch code...
// if(wbuf.nobj > 4) {
// PREFETCH(wbuf->obj[wbuf.nobj - 3];
// }
b := gcw.tryGet()
if b == 0 {
return
}
scanobject(b, 0, nil, gcw)
}
}
// scanblock scans b as scanobject would.
// If the gcphase is GCscan, scanblock performs additional checks.
//go:nowritebarrier
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
// Use local copies of original parameters, so that a stack trace
// due to one of the throws below shows the original block
// base and extent.
b := b0
n := n0
// ptrmask can have 2 possible values:
// 1. nil - obtain pointer mask from GC bitmap.
// 2. pointer to a compact mask (for stacks and data).
scanobject(b, n, ptrmask, gcw)
if gcphase == _GCscan {
if inheap(b) && ptrmask == nil {
// b is in heap, we are in GCscan so there should be a ptrmask.
throw("scanblock: In GCscan phase and inheap is true.")
}
}
}
// Scan the object b of size n bytes, adding pointers to wbuf.
// If ptrmask != nil, it specifies where pointers are in b.
// If ptrmask == nil, the GC bitmap should be consulted.
// In this case, n may be an overestimate of the size; the GC bitmap
// must also be used to make sure the scan stops at the end of b.
//go:nowritebarrier
func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWork) {
arena_start := mheap_.arena_start
arena_used := mheap_.arena_used
scanWork := int64(0)
// Find bits of the beginning of the object.
var hbits heapBits
if ptrmask == nil {
var s *mspan
b, hbits, s = heapBitsForObject(b)
if b == 0 {
return
}
n = s.elemsize
if n == 0 {
throw("scanobject n == 0")
}
}
for i := uintptr(0); i < n; i += ptrSize {
// Find bits for this word.
var bits uintptr
if ptrmask != nil {
// dense mask (stack or data)
bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
} else {
if i != 0 {
// Avoid needless hbits.next() on last iteration.
hbits = hbits.next()
}
bits = uintptr(hbits.typeBits())
if bits == typeDead {
break // no more pointers in this object
}
}
if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
continue
}
if bits&typePointer != typePointer {
print("gc useCheckmark=", useCheckmark, " b=", hex(b), " ptrmask=", ptrmask, "\n")
throw("unexpected garbage collection bits")
}
obj := *(*uintptr)(unsafe.Pointer(b + i))
// Track the scan work performed as a way to estimate
// GC time. We use the number of pointers scanned
// because pointer scanning dominates the cost of
// scanning.
//
// TODO(austin): Consider counting only pointers into
// the heap, since nil and non-heap pointers are
// probably cheap to scan.
scanWork++
// At this point we have extracted the next potential pointer.
// Check if it points into heap.
if obj == 0 || obj < arena_start || obj >= arena_used {
continue
}
if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark {
checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
}
// Mark the object.
if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
greyobject(obj, b, i, hbits, span, gcw)
}
}
gcw.bytesMarked += uint64(n)
gcw.scanWork += scanWork
}
// Shade the object if it isn't already.
// The object is not nil and known to be in the heap.
//go:nowritebarrier
func shade(b uintptr) {
if obj, hbits, span := heapBitsForObject(b); obj != 0 {
// TODO: this would be a great place to put a check to see
// if we are harvesting and if we are then we should
// figure out why there is a call to shade when the
// harvester thinks we are in a STW.
// if atomicload(&harvestingwbufs) == uint32(1) {
// // Throw here to discover write barriers
// // being executed during a STW.
// throw("shade during harvest")
// }
var gcw gcWork
greyobject(obj, 0, 0, hbits, span, &gcw)
// This is part of the write barrier so put the wbuf back.
if gcphase == _GCmarktermination {
gcw.dispose()
} else {
// If we added any pointers to the gcw, then
// currentwbuf must be nil because 1)
// greyobject got its wbuf from currentwbuf
// and 2) shade runs on the systemstack, so
// we're still on the same M. If either of
// these becomes no longer true, we need to
// rethink this.
gcw.disposeToCache()
}
}
}
// obj is the start of an object with mark mbits.
// If it isn't already marked, mark it and enqueue into workbuf.
// Return possibly new workbuf to use.
// base and off are for debugging only and could be removed.
//go:nowritebarrier
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) {
// obj should be start of allocation, and so must be at least pointer-aligned.
if obj&(ptrSize-1) != 0 {
throw("greyobject: obj not pointer-aligned")
}
if useCheckmark {
if !hbits.isMarked() {
printlock()
print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
// Dump the source (base) object
gcDumpObject("base", base, off)
// Dump the object
gcDumpObject("obj", obj, ^uintptr(0))
throw("checkmark found unmarked object")
}
if !hbits.isCheckmarked() {
return
}
hbits.setCheckmarked()
if !hbits.isCheckmarked() {
throw("setCheckmarked and isCheckmarked disagree")
}
} else {
// If marked we have nothing to do.
if hbits.isMarked() {
return
}
hbits.setMarked()
// If this is a noscan object, fast-track it to black
// instead of greying it.
if hbits.typeBits() == typeDead {
gcw.bytesMarked += uint64(span.elemsize)
return
}
}
// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
// seems like a nice optimization that can be added back in.
// There needs to be time between the PREFETCH and the use.
// Previously we put the obj in an 8 element buffer that is drained at a rate
// to give the PREFETCH time to do its work.
// Use of PREFETCHNTA might be more appropriate than PREFETCH
gcw.put(obj)
}
// gcDumpObject dumps the contents of obj for debugging and marks the
// field at byte offset off in obj.
func gcDumpObject(label string, obj, off uintptr) {
k := obj >> _PageShift
x := k
x -= mheap_.arena_start >> _PageShift
s := h_spans[x]
print(label, "=", hex(obj), " k=", hex(k))
if s == nil {
print(" s=nil\n")
return
}
print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
for i := uintptr(0); i < s.elemsize; i += ptrSize {
print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)))))
if i == off {
print(" <==")
}
print("\n")
}
}
// When in GCmarkterminate phase we allocate black.
//go:nowritebarrier
func gcmarknewobject_m(obj, size uintptr) {
if gcphase != _GCmarktermination {
throw("marking new object while not in mark termination phase")
}
if useCheckmark { // The world should be stopped so this should not happen.
throw("gcmarknewobject called while doing checkmark")
}
heapBitsForAddr(obj).setMarked()
xadd64(&work.bytesMarked, int64(size))
}
// Checkmarking
// To help debug the concurrent GC we remark with the world
// stopped ensuring that any object encountered has their normal
// mark bit set. To do this we use an orthogonal bit
// pattern to indicate the object is marked. The following pattern
// uses the upper two bits in the object's bounday nibble.
// 01: scalar not marked
// 10: pointer not marked
// 11: pointer marked
// 00: scalar marked
// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
// The higher bit is 1 for pointers and 0 for scalars, whether the object
// is marked or not.
// The first nibble no longer holds the typeDead pattern indicating that the
// there are no more pointers in the object. This information is held
// in the second nibble.
// If useCheckmark is true, marking of an object uses the
// checkmark bits (encoding above) instead of the standard
// mark bits.
var useCheckmark = false
//go:nowritebarrier
func initCheckmarks() {
useCheckmark = true
for _, s := range work.spans {
if s.state == _MSpanInUse {
heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
}
}
}
func clearCheckmarks() {
useCheckmark = false
for _, s := range work.spans {
if s.state == _MSpanInUse {
heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
}
}
}