go/src/runtime/mgcmark.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Garbage collector: marking and scanning

package runtime

import "unsafe"

// Scan all of the stacks, greying (or graying if in America) the referents
// but not blackening them since the mark write barrier isn't installed.
//go:nowritebarrier
func gcscan_m() {
	_g_ := getg()

	// Grab the g that called us and potentially allow rescheduling.
	// This allows it to be scanned like other goroutines.
	mastergp := _g_.m.curg
	casgstatus(mastergp, _Grunning, _Gwaiting)
	mastergp.waitreason = "garbage collection scan"

	// Span sweeping has been done by finishsweep_m.
	// Long term we will want to make this goroutine runnable
	// by placing it onto a scanenqueue state and then calling
	// runtime·restartg(mastergp) to make it Grunnable.
	// At the bottom we will want to return this p back to the scheduler.

	// Prepare flag indicating that the scan has not been completed.
	local_allglen := gcResetGState()

	work.nwait = 0
	work.ndone = 0
	work.nproc = 1 // For now do not do this in parallel.
	//	ackgcphase is not needed since we are not scanning running goroutines.
	parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), false, markroot)
	parfordo(work.markfor)

	lock(&allglock)
	// Check that gc work is done.
	for i := 0; i < local_allglen; i++ {
		gp := allgs[i]
		if !gp.gcworkdone {
			throw("scan missed a g")
		}
	}
	unlock(&allglock)

	casgstatus(mastergp, _Gwaiting, _Grunning)
	// Let the g that called us continue to run.
}

// ptrmask for an allocation containing a single pointer.
var oneptr = [...]uint8{typePointer}

//go:nowritebarrier
func markroot(desc *parfor, i uint32) {
	var gcw gcWork

	// Note: if you add a case here, please also update heapdump.go:dumproots.
	switch i {
	case _RootData:
		for datap := &firstmoduledata; datap != nil; datap = datap.next {
			scanblock(datap.data, datap.edata-datap.data, datap.gcdatamask.bytedata, &gcw)
		}

	case _RootBss:
		for datap := &firstmoduledata; datap != nil; datap = datap.next {
			scanblock(datap.bss, datap.ebss-datap.bss, datap.gcbssmask.bytedata, &gcw)
		}

	case _RootFinalizers:
		for fb := allfin; fb != nil; fb = fb.alllink {
			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], &gcw)
		}

	case _RootSpans:
		// mark MSpan.specials
		sg := mheap_.sweepgen
		for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
			s := work.spans[spanidx]
			if s.state != mSpanInUse {
				continue
			}
			if !useCheckmark && s.sweepgen != sg {
				// sweepgen was updated (+2) during non-checkmark GC pass
				print("sweep ", s.sweepgen, " ", sg, "\n")
				throw("gc: unswept span")
			}
			for sp := s.specials; sp != nil; sp = sp.next {
				if sp.kind != _KindSpecialFinalizer {
					continue
				}
				// don't mark finalized object, but scan it so we
				// retain everything it points to.
				spf := (*specialfinalizer)(unsafe.Pointer(sp))
				// A finalizer can be set for an inner byte of an object, find object beginning.
				p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
				if gcphase != _GCscan {
					scanblock(p, s.elemsize, nil, &gcw) // scanned during mark phase
				}
				scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0], &gcw)
			}
		}

	case _RootFlushCaches:
		if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
			flushallmcaches()
		}

	default:
		// the rest is scanning goroutine stacks
		if uintptr(i-_RootCount) >= allglen {
			throw("markroot: bad index")
		}
		gp := allgs[i-_RootCount]

		// remember when we've first observed the G blocked
		// needed only to output in traceback
		status := readgstatus(gp) // We are not in a scan state
		if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
			gp.waitsince = work.tstart
		}

		// Shrink a stack if not much of it is being used but not in the scan phase.
		if gcphase == _GCmarktermination {
			// Shrink during STW GCmarktermination phase thus avoiding
			// complications introduced by shrinking during
			// non-STW phases.
			shrinkstack(gp)
		}
		if readgstatus(gp) == _Gdead {
			gp.gcworkdone = true
		} else {
			gp.gcworkdone = false
		}
		restart := stopg(gp)

		// goroutine will scan its own stack when it stops running.
		// Wait until it has.
		for readgstatus(gp) == _Grunning && !gp.gcworkdone {
		}

		// scanstack(gp) is done as part of gcphasework
		// But to make sure we finished we need to make sure that
		// the stack traps have all responded so drop into
		// this while loop until they respond.
		for !gp.gcworkdone {
			status = readgstatus(gp)
			if status == _Gdead {
				gp.gcworkdone = true // scan is a noop
				break
			}
			if status == _Gwaiting || status == _Grunnable {
				restart = stopg(gp)
			}
		}
		if restart {
			restartg(gp)
		}
	}

	// Root aren't part of the heap, so don't count them toward
	// marked heap bytes.
	gcw.bytesMarked = 0
	gcw.scanWork = 0
	gcw.dispose()
}

// gcAssistAlloc records and allocation of size bytes and, if
// allowAssist is true, may assist GC scanning in proportion to the
// allocations performed by this mutator since the last assist.
//
// It should only be called during gcphase == _GCmark.
//go:nowritebarrier
func gcAssistAlloc(size uintptr, allowAssist bool) {
	// Find the G responsible for this assist.
	gp := getg()
	if gp.m.curg != nil {
		gp = gp.m.curg
	}

	// Record allocation.
	gp.gcalloc += size

	if !allowAssist {
		return
	}

	// Compute the amount of assist scan work we need to do.
	scanWork := int64(gcController.assistRatio*float64(gp.gcalloc)) - gp.gcscanwork
	// scanWork can be negative if the last assist scanned a large
	// object and we're still ahead of our assist goal.
	if scanWork <= 0 {
		return
	}

	// Steal as much credit as we can from the background GC's
	// scan credit. This is racy and may drop the background
	// credit below 0 if two mutators steal at the same time. This
	// will just cause steals to fail until credit is accumulated
	// again, so in the long run it doesn't really matter, but we
	// do have to handle the negative credit case.
	bgScanCredit := atomicloadint64(&gcController.bgScanCredit)
	stolen := int64(0)
	if bgScanCredit > 0 {
		if bgScanCredit < scanWork {
			stolen = bgScanCredit
		} else {
			stolen = scanWork
		}
		xaddint64(&gcController.bgScanCredit, -scanWork)

		scanWork -= stolen
		gp.gcscanwork += stolen

		if scanWork == 0 {
			return
		}
	}

	// Perform assist work
	systemstack(func() {
		// Track time spent in this assist. Since we're on the
		// system stack, this is non-preemptible, so we can
		// just measure start and end time.
		startTime := nanotime()

		xadd(&work.nwait, -1)

		// drain own current wbuf first in the hopes that it
		// will be more cache friendly.
		var gcw gcWork
		gcw.initFromCache()
		startScanWork := gcw.scanWork
		gcDrainN(&gcw, scanWork)
		// Record that we did this much scan work.
		gp.gcscanwork += gcw.scanWork - startScanWork
		// TODO(austin): This is the vast majority of our
		// disposes. Instead of constantly disposing, keep a
		// per-P gcWork cache (probably combined with the
		// write barrier wbuf cache).
		gcw.dispose()

		// If this is the last worker and we ran out of work,
		// signal a completion point.
		if xadd(&work.nwait, +1) == work.nproc && work.full == 0 && work.partial == 0 {
			// This has reached a background completion
			// point. Is it the first this cycle?
			if cas(&work.bgMarkDone, 0, 1) {
				notewakeup(&work.bgMarkNote)
			}
		}

		duration := nanotime() - startTime
		_p_ := gp.m.p.ptr()
		_p_.gcAssistTime += duration
		if _p_.gcAssistTime > gcAssistTimeSlack {
			xaddint64(&gcController.assistTime, _p_.gcAssistTime)
			_p_.gcAssistTime = 0
		}
	})
}

// The gp has been moved to a GC safepoint. GC phase specific
// work is done here.
//go:nowritebarrier
func gcphasework(gp *g) {
	switch gcphase {
	default:
		throw("gcphasework in bad gcphase")
	case _GCoff, _GCquiesce, _GCstw, _GCsweep:
		// No work.
	case _GCscan:
		// scan the stack, mark the objects, put pointers in work buffers
		// hanging off the P where this is being run.
		// Indicate that the scan is valid until the goroutine runs again
		scanstack(gp)
	case _GCmark:
		// No work.
	case _GCmarktermination:
		scanstack(gp)
		// All available mark work will be emptied before returning.
	}
	gp.gcworkdone = true
}

//go:nowritebarrier
func scanstack(gp *g) {
	if gp.gcscanvalid {
		return
	}

	if readgstatus(gp)&_Gscan == 0 {
		print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
		throw("scanstack - bad status")
	}

	switch readgstatus(gp) &^ _Gscan {
	default:
		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
		throw("mark - bad status")
	case _Gdead:
		return
	case _Grunning:
		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
		throw("scanstack: goroutine not stopped")
	case _Grunnable, _Gsyscall, _Gwaiting:
		// ok
	}

	if gp == getg() {
		throw("can't scan our own stack")
	}
	mp := gp.m
	if mp != nil && mp.helpgc != 0 {
		throw("can't scan gchelper stack")
	}

	var gcw gcWork
	gcw.initFromCache()
	scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
		// Pick up gcw as free variable so gentraceback and friends can
		// keep the same signature.
		scanframeworker(frame, unused, &gcw)
		return true
	}
	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
	tracebackdefers(gp, scanframe, nil)
	// Stacks aren't part of the heap, so don't count them toward
	// marked heap bytes.
	gcw.bytesMarked = 0
	gcw.scanWork = 0
	gcw.disposeToCache()
	gp.gcscanvalid = true
}

// Scan a stack frame: local variables and function arguments/results.
//go:nowritebarrier
func scanframeworker(frame *stkframe, unused unsafe.Pointer, gcw *gcWork) {

	f := frame.fn
	targetpc := frame.continpc
	if targetpc == 0 {
		// Frame is dead.
		return
	}
	if _DebugGC > 1 {
		print("scanframe ", funcname(f), "\n")
	}
	if targetpc != f.entry {
		targetpc--
	}
	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
	if pcdata == -1 {
		// We do not have a valid pcdata value but there might be a
		// stackmap for this function.  It is likely that we are looking
		// at the function prologue, assume so and hope for the best.
		pcdata = 0
	}

	// Scan local variables if stack frame has been allocated.
	size := frame.varp - frame.sp
	var minsize uintptr
	switch thechar {
	case '6', '8':
		minsize = 0
	case '7':
		minsize = spAlign
	default:
		minsize = ptrSize
	}
	if size > minsize {
		stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
		if stkmap == nil || stkmap.n <= 0 {
			print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
			throw("missing stackmap")
		}

		// Locals bitmap information, scan just the pointers in locals.
		if pcdata < 0 || pcdata >= stkmap.n {
			// don't know where we are
			print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
			throw("scanframe: bad symbol table")
		}
		bv := stackmapdata(stkmap, pcdata)
		size = (uintptr(bv.n) / typeBitsWidth) * ptrSize
		scanblock(frame.varp-size, size, bv.bytedata, gcw)
	}

	// Scan arguments.
	if frame.arglen > 0 {
		var bv bitvector
		if frame.argmap != nil {
			bv = *frame.argmap
		} else {
			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
			if stkmap == nil || stkmap.n <= 0 {
				print("runtime: frame ", funcname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
				throw("missing stackmap")
			}
			if pcdata < 0 || pcdata >= stkmap.n {
				// don't know where we are
				print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
				throw("scanframe: bad symbol table")
			}
			bv = stackmapdata(stkmap, pcdata)
		}
		scanblock(frame.argp, uintptr(bv.n)/typeBitsWidth*ptrSize, bv.bytedata, gcw)
	}
}

// TODO(austin): Can we consolidate the gcDrain* functions?

// gcDrain scans objects in work buffers, blackening grey
// objects until all work buffers have been drained.
// If flushScanCredit != -1, gcDrain flushes accumulated scan work
// credit to gcController.bgScanCredit whenever gcw's local scan work
// credit exceeds flushScanCredit.
//go:nowritebarrier
func gcDrain(gcw *gcWork, flushScanCredit int64) {
	if gcphase != _GCmark && gcphase != _GCmarktermination {
		throw("scanblock phase incorrect")
	}

	var lastScanFlush, nextScanFlush int64
	if flushScanCredit != -1 {
		lastScanFlush = gcw.scanWork
		nextScanFlush = lastScanFlush + flushScanCredit
	} else {
		nextScanFlush = int64(^uint64(0) >> 1)
	}

	for {
		// If another proc wants a pointer, give it some.
		if work.nwait > 0 && work.full == 0 {
			gcw.balance()
		}

		b := gcw.get()
		if b == 0 {
			// work barrier reached
			break
		}
		// If the current wbuf is filled by the scan a new wbuf might be
		// returned that could possibly hold only a single object. This
		// could result in each iteration draining only a single object
		// out of the wbuf passed in + a single object placed
		// into an empty wbuf in scanobject so there could be
		// a performance hit as we keep fetching fresh wbufs.
		scanobject(b, 0, nil, gcw)

		// Flush background scan work credit to the global
		// account if we've accumulated enough locally so
		// mutator assists can draw on it.
		if gcw.scanWork >= nextScanFlush {
			credit := gcw.scanWork - lastScanFlush
			xaddint64(&gcController.bgScanCredit, credit)
			lastScanFlush = gcw.scanWork
			nextScanFlush = lastScanFlush + flushScanCredit
		}
	}
	if flushScanCredit != -1 {
		credit := gcw.scanWork - lastScanFlush
		xaddint64(&gcController.bgScanCredit, credit)
	}
	checknocurrentwbuf()
}

// gcDrainUntilPreempt blackens grey objects until g.preempt is set.
// This is best-effort, so it will return as soon as it is unable to
// get work, even though there may be more work in the system.
//go:nowritebarrier
func gcDrainUntilPreempt(gcw *gcWork, flushScanCredit int64) {
	if gcphase != _GCmark {
		println("gcphase =", gcphase)
		throw("gcDrainUntilPreempt phase incorrect")
	}

	var lastScanFlush, nextScanFlush int64
	if flushScanCredit != -1 {
		lastScanFlush = gcw.scanWork
		nextScanFlush = lastScanFlush + flushScanCredit
	} else {
		nextScanFlush = int64(^uint64(0) >> 1)
	}

	gp := getg()
	for !gp.preempt {
		// If the work queue is empty, balance. During
		// concurrent mark we don't really know if anyone else
		// can make use of this work, but even if we're the
		// only worker, the total cost of this per cycle is
		// only O(_WorkbufSize) pointer copies.
		if work.full == 0 && work.partial == 0 {
			gcw.balance()
		}

		b := gcw.tryGet()
		if b == 0 {
			// No more work
			break
		}
		scanobject(b, 0, nil, gcw)

		// Flush background scan work credit to the global
		// account if we've accumulated enough locally so
		// mutator assists can draw on it.
		if gcw.scanWork >= nextScanFlush {
			credit := gcw.scanWork - lastScanFlush
			xaddint64(&gcController.bgScanCredit, credit)
			lastScanFlush = gcw.scanWork
			nextScanFlush = lastScanFlush + flushScanCredit
		}
	}
	if flushScanCredit != -1 {
		credit := gcw.scanWork - lastScanFlush
		xaddint64(&gcController.bgScanCredit, credit)
	}
}

// gcDrainN blackens grey objects until it has performed roughly
// scanWork units of scan work. This is best-effort, so it may perform
// less work if it fails to get a work buffer. Otherwise, it will
// perform at least n units of work, but may perform more because
// scanning is always done in whole object increments.
//go:nowritebarrier
func gcDrainN(gcw *gcWork, scanWork int64) {
	checknocurrentwbuf()
	targetScanWork := gcw.scanWork + scanWork
	for gcw.scanWork < targetScanWork {
		// This might be a good place to add prefetch code...
		// if(wbuf.nobj > 4) {
		//         PREFETCH(wbuf->obj[wbuf.nobj - 3];
		//  }
		b := gcw.tryGet()
		if b == 0 {
			return
		}
		scanobject(b, 0, nil, gcw)
	}
}

// scanblock scans b as scanobject would.
// If the gcphase is GCscan, scanblock performs additional checks.
//go:nowritebarrier
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
	// Use local copies of original parameters, so that a stack trace
	// due to one of the throws below shows the original block
	// base and extent.
	b := b0
	n := n0

	// ptrmask can have 2 possible values:
	// 1. nil - obtain pointer mask from GC bitmap.
	// 2. pointer to a compact mask (for stacks and data).

	scanobject(b, n, ptrmask, gcw)
	if gcphase == _GCscan {
		if inheap(b) && ptrmask == nil {
			// b is in heap, we are in GCscan so there should be a ptrmask.
			throw("scanblock: In GCscan phase and inheap is true.")
		}
	}
}

// Scan the object b of size n bytes, adding pointers to wbuf.
// If ptrmask != nil, it specifies where pointers are in b.
// If ptrmask == nil, the GC bitmap should be consulted.
// In this case, n may be an overestimate of the size; the GC bitmap
// must also be used to make sure the scan stops at the end of b.
//go:nowritebarrier
func scanobject(b, n uintptr, ptrmask *uint8, gcw *gcWork) {
	arena_start := mheap_.arena_start
	arena_used := mheap_.arena_used
	scanWork := int64(0)

	// Find bits of the beginning of the object.
	var hbits heapBits

	if ptrmask == nil {
		var s *mspan
		b, hbits, s = heapBitsForObject(b)
		if b == 0 {
			return
		}
		n = s.elemsize
		if n == 0 {
			throw("scanobject n == 0")
		}
	}
	for i := uintptr(0); i < n; i += ptrSize {
		// Find bits for this word.
		var bits uintptr
		if ptrmask != nil {
			// dense mask (stack or data)
			bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * typeBitsWidth)) & typeMask
		} else {
			if i != 0 {
				// Avoid needless hbits.next() on last iteration.
				hbits = hbits.next()
			}
			bits = uintptr(hbits.typeBits())
			if bits == typeDead {
				break // no more pointers in this object
			}
		}

		if bits <= typeScalar { // typeScalar, typeDead, typeScalarMarked
			continue
		}

		if bits&typePointer != typePointer {
			print("gc useCheckmark=", useCheckmark, " b=", hex(b), " ptrmask=", ptrmask, "\n")
			throw("unexpected garbage collection bits")
		}

		obj := *(*uintptr)(unsafe.Pointer(b + i))

		// Track the scan work performed as a way to estimate
		// GC time. We use the number of pointers scanned
		// because pointer scanning dominates the cost of
		// scanning.
		//
		// TODO(austin): Consider counting only pointers into
		// the heap, since nil and non-heap pointers are
		// probably cheap to scan.
		scanWork++

		// At this point we have extracted the next potential pointer.
		// Check if it points into heap.
		if obj == 0 || obj < arena_start || obj >= arena_used {
			continue
		}

		if mheap_.shadow_enabled && debug.wbshadow >= 2 && debug.gccheckmark > 0 && useCheckmark {
			checkwbshadow((*uintptr)(unsafe.Pointer(b + i)))
		}

		// Mark the object.
		if obj, hbits, span := heapBitsForObject(obj); obj != 0 {
			greyobject(obj, b, i, hbits, span, gcw)
		}
	}
	gcw.bytesMarked += uint64(n)
	gcw.scanWork += scanWork
}

// Shade the object if it isn't already.
// The object is not nil and known to be in the heap.
//go:nowritebarrier
func shade(b uintptr) {
	if obj, hbits, span := heapBitsForObject(b); obj != 0 {
		// TODO: this would be a great place to put a check to see
		// if we are harvesting and if we are then we should
		// figure out why there is a call to shade when the
		// harvester thinks we are in a STW.
		// if atomicload(&harvestingwbufs) == uint32(1) {
		//	// Throw here to discover write barriers
		//	// being executed during a STW.
		//	throw("shade during harvest")
		// }

		var gcw gcWork
		greyobject(obj, 0, 0, hbits, span, &gcw)
		// This is part of the write barrier so put the wbuf back.
		if gcphase == _GCmarktermination {
			gcw.dispose()
		} else {
			// If we added any pointers to the gcw, then
			// currentwbuf must be nil because 1)
			// greyobject got its wbuf from currentwbuf
			// and 2) shade runs on the systemstack, so
			// we're still on the same M.  If either of
			// these becomes no longer true, we need to
			// rethink this.
			gcw.disposeToCache()
		}
	}
}

// obj is the start of an object with mark mbits.
// If it isn't already marked, mark it and enqueue into workbuf.
// Return possibly new workbuf to use.
// base and off are for debugging only and could be removed.
//go:nowritebarrier
func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork) {
	// obj should be start of allocation, and so must be at least pointer-aligned.
	if obj&(ptrSize-1) != 0 {
		throw("greyobject: obj not pointer-aligned")
	}

	if useCheckmark {
		if !hbits.isMarked() {
			printlock()
			print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
			print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")

			// Dump the source (base) object
			gcDumpObject("base", base, off)

			// Dump the object
			gcDumpObject("obj", obj, ^uintptr(0))

			throw("checkmark found unmarked object")
		}
		if !hbits.isCheckmarked() {
			return
		}
		hbits.setCheckmarked()
		if !hbits.isCheckmarked() {
			throw("setCheckmarked and isCheckmarked disagree")
		}
	} else {
		// If marked we have nothing to do.
		if hbits.isMarked() {
			return
		}

		hbits.setMarked()

		// If this is a noscan object, fast-track it to black
		// instead of greying it.
		if hbits.typeBits() == typeDead {
			gcw.bytesMarked += uint64(span.elemsize)
			return
		}
	}

	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
	// seems like a nice optimization that can be added back in.
	// There needs to be time between the PREFETCH and the use.
	// Previously we put the obj in an 8 element buffer that is drained at a rate
	// to give the PREFETCH time to do its work.
	// Use of PREFETCHNTA might be more appropriate than PREFETCH

	gcw.put(obj)
}

// gcDumpObject dumps the contents of obj for debugging and marks the
// field at byte offset off in obj.
func gcDumpObject(label string, obj, off uintptr) {
	k := obj >> _PageShift
	x := k
	x -= mheap_.arena_start >> _PageShift
	s := h_spans[x]
	print(label, "=", hex(obj), " k=", hex(k))
	if s == nil {
		print(" s=nil\n")
		return
	}
	print(" s.start*_PageSize=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
	for i := uintptr(0); i < s.elemsize; i += ptrSize {
		print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)))))
		if i == off {
			print(" <==")
		}
		print("\n")
	}
}

// When in GCmarkterminate phase we allocate black.
//go:nowritebarrier
func gcmarknewobject_m(obj, size uintptr) {
	if gcphase != _GCmarktermination {
		throw("marking new object while not in mark termination phase")
	}
	if useCheckmark { // The world should be stopped so this should not happen.
		throw("gcmarknewobject called while doing checkmark")
	}

	heapBitsForAddr(obj).setMarked()
	xadd64(&work.bytesMarked, int64(size))
}

// Checkmarking

// To help debug the concurrent GC we remark with the world
// stopped ensuring that any object encountered has their normal
// mark bit set. To do this we use an orthogonal bit
// pattern to indicate the object is marked. The following pattern
// uses the upper two bits in the object's bounday nibble.
// 01: scalar  not marked
// 10: pointer not marked
// 11: pointer     marked
// 00: scalar      marked
// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
// The higher bit is 1 for pointers and 0 for scalars, whether the object
// is marked or not.
// The first nibble no longer holds the typeDead pattern indicating that the
// there are no more pointers in the object. This information is held
// in the second nibble.

// If useCheckmark is true, marking of an object uses the
// checkmark bits (encoding above) instead of the standard
// mark bits.
var useCheckmark = false

//go:nowritebarrier
func initCheckmarks() {
	useCheckmark = true
	for _, s := range work.spans {
		if s.state == _MSpanInUse {
			heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
		}
	}
}

func clearCheckmarks() {
	useCheckmark = false
	for _, s := range work.spans {
		if s.state == _MSpanInUse {
			heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())
		}
	}
}