1
0
mirror of https://github.com/golang/go synced 2024-11-20 09:04:44 -07:00
go/src/runtime/mbitmap.go

823 lines
27 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector: type and heap bitmaps.
//
// Type bitmaps
//
// The global variables (in the data and bss sections) and types that aren't too large
// record information about the layout of their memory words using a type bitmap.
// The bitmap holds two bits for each pointer-sized word. The two-bit values are:
//
// 00 - typeDead: not a pointer, and no pointers in the rest of the object
// 01 - typeScalar: not a pointer
// 10 - typePointer: a pointer that GC should trace
// 11 - unused
//
// typeDead only appears in type bitmaps in Go type descriptors
// and in type bitmaps embedded in the heap bitmap (see below).
// It is not used in the type bitmap for the global variables.
//
// Heap bitmap
//
// The allocated heap comes from a subset of the memory in the range [start, used),
// where start == mheap_.arena_start and used == mheap_.arena_used.
// The heap bitmap comprises 4 bits for each pointer-sized word in that range,
// stored in bytes indexed backward in memory from start.
// That is, the byte at address start-1 holds the 4-bit entries for the two words
// start, start+ptrSize, the byte at start-2 holds the entries for start+2*ptrSize,
// start+3*ptrSize, and so on.
// In the byte holding the entries for addresses p and p+ptrSize, the low 4 bits
// describe p and the high 4 bits describe p+ptrSize.
//
// The 4 bits for each word are:
// 0001 - not used
// 0010 - bitMarked: this object has been marked by GC
// tt00 - word type bits, as in a type bitmap.
//
// The code makes use of the fact that the zero value for a heap bitmap nibble
// has no boundary bit set, no marked bit set, and type bits == typeDead.
// These properties must be preserved when modifying the encoding.
//
// Checkmarks
//
// In a concurrent garbage collector, one worries about failing to mark
// a live object due to mutations without write barriers or bugs in the
// collector implementation. As a sanity check, the GC has a 'checkmark'
// mode that retraverses the object graph with the world stopped, to make
// sure that everything that should be marked is marked.
// In checkmark mode, in the heap bitmap, the type bits for the first word
// of an object are redefined:
//
// 00 - typeScalarCheckmarked // typeScalar, checkmarked
// 01 - typeScalar // typeScalar, not checkmarked
// 10 - typePointer // typePointer, not checkmarked
// 11 - typePointerCheckmarked // typePointer, checkmarked
//
// That is, typeDead is redefined to be typeScalar + a checkmark, and the
// previously unused 11 pattern is redefined to be typePointer + a checkmark.
// To prepare for this mode, we must move any typeDead in the first word of
// a multiword object to the second word.
package runtime
import "unsafe"
const (
typeDead = 0
typeScalarCheckmarked = 0
typeScalar = 1
typePointer = 2
typePointerCheckmarked = 3
typeBitsWidth = 2 // # of type bits per pointer-sized word
typeMask = 1<<typeBitsWidth - 1
typeBitmapScale = ptrSize * (8 / typeBitsWidth) // number of data bytes per type bitmap byte
heapBitsWidth = 4
heapBitmapScale = ptrSize * (8 / heapBitsWidth) // number of data bytes per heap bitmap byte
bitMarked = 2
typeShift = 2
)
// Information from the compiler about the layout of stack frames.
type bitvector struct {
n int32 // # of bits
bytedata *uint8
}
// addb returns the byte pointer p+n.
//go:nowritebarrier
func addb(p *byte, n uintptr) *byte {
return (*byte)(add(unsafe.Pointer(p), n))
}
// subtractb returns the byte pointer p-n.
//go:nowritebarrier
func subtractb(p *byte, n uintptr) *byte {
return (*byte)(add(unsafe.Pointer(p), -n))
}
// mHeap_MapBits is called each time arena_used is extended.
// It maps any additional bitmap memory needed for the new arena memory.
//
//go:nowritebarrier
func mHeap_MapBits(h *mheap) {
// Caller has added extra mappings to the arena.
// Add extra mappings of bitmap words as needed.
// We allocate extra bitmap pieces in chunks of bitmapChunk.
const bitmapChunk = 8192
n := (mheap_.arena_used - mheap_.arena_start) / heapBitmapScale
n = round(n, bitmapChunk)
n = round(n, _PhysPageSize)
if h.bitmap_mapped >= n {
return
}
sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
h.bitmap_mapped = n
}
// heapBits provides access to the bitmap bits for a single heap word.
// The methods on heapBits take value receivers so that the compiler
// can more easily inline calls to those methods and registerize the
// struct fields independently.
type heapBits struct {
bitp *uint8
shift uint32
}
// heapBitsForAddr returns the heapBits for the address addr.
// The caller must have already checked that addr is in the range [mheap_.arena_start, mheap_.arena_used).
func heapBitsForAddr(addr uintptr) heapBits {
off := (addr - mheap_.arena_start) / ptrSize
return heapBits{(*uint8)(unsafe.Pointer(mheap_.arena_start - off/2 - 1)), uint32(4 * (off & 1))}
}
// heapBitsForSpan returns the heapBits for the span base address base.
func heapBitsForSpan(base uintptr) (hbits heapBits) {
if base < mheap_.arena_start || base >= mheap_.arena_end {
throw("heapBitsForSpan: base out of range")
}
hbits = heapBitsForAddr(base)
if hbits.shift != 0 {
throw("heapBitsForSpan: unaligned start")
}
return hbits
}
// heapBitsForObject returns the base address for the heap object
// containing the address p, along with the heapBits for base.
// If p does not point into a heap object,
// return base == 0
// otherwise return the base of the object.
func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits, s *mspan) {
runtime: Speed up heapBitsForObject Optimized heapBitsForObject by special casing objects whose size is a power of two. When a span holding such objects is initialized I added a mask that when &ed with an interior pointer results in the base of the pointer. For the garbage benchmark this resulted in CPU_CLK_UNHALTED in heapBitsForObject going from 7.7% down to 5.9% of the total, INST_RETIRED went from 12.2 -> 8.7. Here are the benchmarks that were at lease plus or minus 1%. benchmark old ns/op new ns/op delta BenchmarkFmtFprintfString 249 221 -11.24% BenchmarkFmtFprintfInt 247 223 -9.72% BenchmarkFmtFprintfEmpty 76.5 69.6 -9.02% BenchmarkBinaryTree17 4106631412 3744550160 -8.82% BenchmarkFmtFprintfFloat 424 399 -5.90% BenchmarkGoParse 4484421 4242115 -5.40% BenchmarkGobEncode 8803668 8449107 -4.03% BenchmarkFmtManyArgs 1494 1436 -3.88% BenchmarkGobDecode 10431051 10032606 -3.82% BenchmarkFannkuch11 2591306713 2517400464 -2.85% BenchmarkTimeParse 361 371 +2.77% BenchmarkJSONDecode 70620492 68830357 -2.53% BenchmarkRegexpMatchMedium_1K 54693 53343 -2.47% BenchmarkTemplate 90008879 91929940 +2.13% BenchmarkTimeFormat 380 387 +1.84% BenchmarkRegexpMatchEasy1_32 111 113 +1.80% BenchmarkJSONEncode 21359159 21007583 -1.65% BenchmarkRegexpMatchEasy1_1K 603 613 +1.66% BenchmarkRegexpMatchEasy0_32 127 129 +1.57% BenchmarkFmtFprintfIntInt 399 393 -1.50% BenchmarkRegexpMatchEasy0_1K 373 378 +1.34% Change-Id: I78e297161026f8b5cc7507c965fd3e486f81ed29 Reviewed-on: https://go-review.googlesource.com/8980 Reviewed-by: Austin Clements <austin@google.com>
2015-04-15 15:08:58 -06:00
arenaStart := mheap_.arena_start
if p < arenaStart || p >= mheap_.arena_used {
return
}
runtime: Speed up heapBitsForObject Optimized heapBitsForObject by special casing objects whose size is a power of two. When a span holding such objects is initialized I added a mask that when &ed with an interior pointer results in the base of the pointer. For the garbage benchmark this resulted in CPU_CLK_UNHALTED in heapBitsForObject going from 7.7% down to 5.9% of the total, INST_RETIRED went from 12.2 -> 8.7. Here are the benchmarks that were at lease plus or minus 1%. benchmark old ns/op new ns/op delta BenchmarkFmtFprintfString 249 221 -11.24% BenchmarkFmtFprintfInt 247 223 -9.72% BenchmarkFmtFprintfEmpty 76.5 69.6 -9.02% BenchmarkBinaryTree17 4106631412 3744550160 -8.82% BenchmarkFmtFprintfFloat 424 399 -5.90% BenchmarkGoParse 4484421 4242115 -5.40% BenchmarkGobEncode 8803668 8449107 -4.03% BenchmarkFmtManyArgs 1494 1436 -3.88% BenchmarkGobDecode 10431051 10032606 -3.82% BenchmarkFannkuch11 2591306713 2517400464 -2.85% BenchmarkTimeParse 361 371 +2.77% BenchmarkJSONDecode 70620492 68830357 -2.53% BenchmarkRegexpMatchMedium_1K 54693 53343 -2.47% BenchmarkTemplate 90008879 91929940 +2.13% BenchmarkTimeFormat 380 387 +1.84% BenchmarkRegexpMatchEasy1_32 111 113 +1.80% BenchmarkJSONEncode 21359159 21007583 -1.65% BenchmarkRegexpMatchEasy1_1K 603 613 +1.66% BenchmarkRegexpMatchEasy0_32 127 129 +1.57% BenchmarkFmtFprintfIntInt 399 393 -1.50% BenchmarkRegexpMatchEasy0_1K 373 378 +1.34% Change-Id: I78e297161026f8b5cc7507c965fd3e486f81ed29 Reviewed-on: https://go-review.googlesource.com/8980 Reviewed-by: Austin Clements <austin@google.com>
2015-04-15 15:08:58 -06:00
off := p - arenaStart
idx := off >> _PageShift
// p points into the heap, but possibly to the middle of an object.
// Consult the span table to find the block beginning.
k := p >> _PageShift
runtime: Speed up heapBitsForObject Optimized heapBitsForObject by special casing objects whose size is a power of two. When a span holding such objects is initialized I added a mask that when &ed with an interior pointer results in the base of the pointer. For the garbage benchmark this resulted in CPU_CLK_UNHALTED in heapBitsForObject going from 7.7% down to 5.9% of the total, INST_RETIRED went from 12.2 -> 8.7. Here are the benchmarks that were at lease plus or minus 1%. benchmark old ns/op new ns/op delta BenchmarkFmtFprintfString 249 221 -11.24% BenchmarkFmtFprintfInt 247 223 -9.72% BenchmarkFmtFprintfEmpty 76.5 69.6 -9.02% BenchmarkBinaryTree17 4106631412 3744550160 -8.82% BenchmarkFmtFprintfFloat 424 399 -5.90% BenchmarkGoParse 4484421 4242115 -5.40% BenchmarkGobEncode 8803668 8449107 -4.03% BenchmarkFmtManyArgs 1494 1436 -3.88% BenchmarkGobDecode 10431051 10032606 -3.82% BenchmarkFannkuch11 2591306713 2517400464 -2.85% BenchmarkTimeParse 361 371 +2.77% BenchmarkJSONDecode 70620492 68830357 -2.53% BenchmarkRegexpMatchMedium_1K 54693 53343 -2.47% BenchmarkTemplate 90008879 91929940 +2.13% BenchmarkTimeFormat 380 387 +1.84% BenchmarkRegexpMatchEasy1_32 111 113 +1.80% BenchmarkJSONEncode 21359159 21007583 -1.65% BenchmarkRegexpMatchEasy1_1K 603 613 +1.66% BenchmarkRegexpMatchEasy0_32 127 129 +1.57% BenchmarkFmtFprintfIntInt 399 393 -1.50% BenchmarkRegexpMatchEasy0_1K 373 378 +1.34% Change-Id: I78e297161026f8b5cc7507c965fd3e486f81ed29 Reviewed-on: https://go-review.googlesource.com/8980 Reviewed-by: Austin Clements <austin@google.com>
2015-04-15 15:08:58 -06:00
s = h_spans[idx]
if s == nil || pageID(k) < s.start || p >= s.limit || s.state != mSpanInUse {
if s == nil || s.state == _MSpanStack {
// If s is nil, the virtual address has never been part of the heap.
// This pointer may be to some mmap'd region, so we allow it.
// Pointers into stacks are also ok, the runtime manages these explicitly.
return
}
// The following ensures that we are rigorous about what data
// structures hold valid pointers.
// TODO(rsc): Check if this still happens.
if false {
// Still happens sometimes. We don't know why.
printlock()
print("runtime:objectstart Span weird: p=", hex(p), " k=", hex(k))
if s == nil {
print(" s=nil\n")
} else {
print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n")
}
printunlock()
throw("objectstart: bad pointer in unexpected span")
}
}
runtime: Speed up heapBitsForObject Optimized heapBitsForObject by special casing objects whose size is a power of two. When a span holding such objects is initialized I added a mask that when &ed with an interior pointer results in the base of the pointer. For the garbage benchmark this resulted in CPU_CLK_UNHALTED in heapBitsForObject going from 7.7% down to 5.9% of the total, INST_RETIRED went from 12.2 -> 8.7. Here are the benchmarks that were at lease plus or minus 1%. benchmark old ns/op new ns/op delta BenchmarkFmtFprintfString 249 221 -11.24% BenchmarkFmtFprintfInt 247 223 -9.72% BenchmarkFmtFprintfEmpty 76.5 69.6 -9.02% BenchmarkBinaryTree17 4106631412 3744550160 -8.82% BenchmarkFmtFprintfFloat 424 399 -5.90% BenchmarkGoParse 4484421 4242115 -5.40% BenchmarkGobEncode 8803668 8449107 -4.03% BenchmarkFmtManyArgs 1494 1436 -3.88% BenchmarkGobDecode 10431051 10032606 -3.82% BenchmarkFannkuch11 2591306713 2517400464 -2.85% BenchmarkTimeParse 361 371 +2.77% BenchmarkJSONDecode 70620492 68830357 -2.53% BenchmarkRegexpMatchMedium_1K 54693 53343 -2.47% BenchmarkTemplate 90008879 91929940 +2.13% BenchmarkTimeFormat 380 387 +1.84% BenchmarkRegexpMatchEasy1_32 111 113 +1.80% BenchmarkJSONEncode 21359159 21007583 -1.65% BenchmarkRegexpMatchEasy1_1K 603 613 +1.66% BenchmarkRegexpMatchEasy0_32 127 129 +1.57% BenchmarkFmtFprintfIntInt 399 393 -1.50% BenchmarkRegexpMatchEasy0_1K 373 378 +1.34% Change-Id: I78e297161026f8b5cc7507c965fd3e486f81ed29 Reviewed-on: https://go-review.googlesource.com/8980 Reviewed-by: Austin Clements <austin@google.com>
2015-04-15 15:08:58 -06:00
// If this span holds object of a power of 2 size, just mask off the bits to
// the interior of the object. Otherwise use the size to get the base.
if s.baseMask != 0 {
// optimize for power of 2 sized objects.
base = s.base()
base = base + (p-base)&s.baseMask
// base = p & s.baseMask is faster for small spans,
// but doesn't work for large spans.
// Overall, it's faster to use the more general computation above.
} else {
base = s.base()
if p-base >= s.elemsize {
// n := (p - base) / s.elemsize, using division by multiplication
n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
base += n * s.elemsize
runtime: use multiply instead of divide in heapBitsForObject These benchmarks show the effect of the combination of this change and Rick's pending CL 6665. Code with interior pointers is helped much more than code without, but even code without doesn't suffer too badly. benchmark old ns/op new ns/op delta BenchmarkBinaryTree17 6989407768 6851728175 -1.97% BenchmarkFannkuch11 4416250775 4405762558 -0.24% BenchmarkFmtFprintfEmpty 134 130 -2.99% BenchmarkFmtFprintfString 491 402 -18.13% BenchmarkFmtFprintfInt 430 420 -2.33% BenchmarkFmtFprintfIntInt 748 663 -11.36% BenchmarkFmtFprintfPrefixedInt 602 534 -11.30% BenchmarkFmtFprintfFloat 728 699 -3.98% BenchmarkFmtManyArgs 2528 2507 -0.83% BenchmarkGobDecode 17448191 17749756 +1.73% BenchmarkGobEncode 14579824 14370183 -1.44% BenchmarkGzip 656489990 652669348 -0.58% BenchmarkGunzip 141254147 141099278 -0.11% BenchmarkHTTPClientServer 94111 93738 -0.40% BenchmarkJSONEncode 36305013 36696440 +1.08% BenchmarkJSONDecode 124652000 128176454 +2.83% BenchmarkMandelbrot200 6009333 5997093 -0.20% BenchmarkGoParse 7651583 7623494 -0.37% BenchmarkRegexpMatchEasy0_32 213 213 +0.00% BenchmarkRegexpMatchEasy0_1K 511 494 -3.33% BenchmarkRegexpMatchEasy1_32 186 187 +0.54% BenchmarkRegexpMatchEasy1_1K 1834 1827 -0.38% BenchmarkRegexpMatchMedium_32 427 412 -3.51% BenchmarkRegexpMatchMedium_1K 154841 153086 -1.13% BenchmarkRegexpMatchHard_32 7473 7478 +0.07% BenchmarkRegexpMatchHard_1K 233587 232272 -0.56% BenchmarkRevcomp 918797689 944528032 +2.80% BenchmarkTemplate 167665081 167773121 +0.06% BenchmarkTimeParse 631 636 +0.79% BenchmarkTimeFormat 672 666 -0.89% Change-Id: Ia923de3cdb3993b640fe0a02cbe2c7babc16f32c Reviewed-on: https://go-review.googlesource.com/6782 Reviewed-by: Rick Hudson <rlh@golang.org> Reviewed-by: Austin Clements <austin@google.com>
2015-03-04 09:34:50 -07:00
}
}
// Now that we know the actual base, compute heapBits to return to caller.
hbits = heapBitsForAddr(base)
return
}
// prefetch the bits.
func (h heapBits) prefetch() {
prefetchnta(uintptr(unsafe.Pointer((h.bitp))))
}
// next returns the heapBits describing the next pointer-sized word in memory.
// That is, if h describes address p, h.next() describes p+ptrSize.
// Note that next does not modify h. The caller must record the result.
func (h heapBits) next() heapBits {
if h.shift == 0 {
return heapBits{h.bitp, 4}
}
return heapBits{subtractb(h.bitp, 1), 0}
}
// isMarked reports whether the heap bits have the marked bit set.
func (h heapBits) isMarked() bool {
return *h.bitp&(bitMarked<<h.shift) != 0
}
// setMarked sets the marked bit in the heap bits, atomically.
func (h heapBits) setMarked() {
// Each byte of GC bitmap holds info for two words.
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomicor8(h.bitp, bitMarked<<h.shift)
}
// setMarkedNonAtomic sets the marked bit in the heap bits, non-atomically.
func (h heapBits) setMarkedNonAtomic() {
*h.bitp |= bitMarked << h.shift
}
// typeBits returns the heap bits' type bits.
func (h heapBits) typeBits() uint8 {
return (*h.bitp >> (h.shift + typeShift)) & typeMask
}
// isCheckmarked reports whether the heap bits have the checkmarked bit set.
func (h heapBits) isCheckmarked() bool {
typ := h.typeBits()
return typ == typeScalarCheckmarked || typ == typePointerCheckmarked
}
// setCheckmarked sets the checkmarked bit.
func (h heapBits) setCheckmarked() {
typ := h.typeBits()
if typ == typeScalar {
// Clear low type bit to turn 01 into 00.
atomicand8(h.bitp, ^((1 << typeShift) << h.shift))
} else if typ == typePointer {
// Set low type bit to turn 10 into 11.
atomicor8(h.bitp, (1<<typeShift)<<h.shift)
}
}
// The methods operating on spans all require that h has been returned
// by heapBitsForSpan and that size, n, total are the span layout description
// returned by the mspan's layout method.
// If total > size*n, it means that there is extra leftover memory in the span,
// usually due to rounding.
//
// TODO(rsc): Perhaps introduce a different heapBitsSpan type.
// initSpan initializes the heap bitmap for a span.
func (h heapBits) initSpan(size, n, total uintptr) {
if total%heapBitmapScale != 0 {
throw("initSpan: unaligned length")
}
nbyte := total / heapBitmapScale
memclr(unsafe.Pointer(subtractb(h.bitp, nbyte-1)), nbyte)
}
// initCheckmarkSpan initializes a span for being checkmarked.
// This would be a no-op except that we need to rewrite any
// typeDead bits in the first word of the object into typeScalar
// followed by a typeDead in the second word of the object.
func (h heapBits) initCheckmarkSpan(size, n, total uintptr) {
if size == ptrSize {
// Only possible on 64-bit system, since minimum size is 8.
// Must update both top and bottom nibble of each byte.
// There is no second word in these objects, so all we have
// to do is rewrite typeDead to typeScalar by adding the 1<<typeShift bit.
bitp := h.bitp
for i := uintptr(0); i < n; i += 2 {
x := int(*bitp)
if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift
}
if (x>>(4+typeShift))&typeMask == typeDead {
x += (typeScalar - typeDead) << (4 + typeShift)
}
*bitp = uint8(x)
bitp = subtractb(bitp, 1)
}
return
}
// Update bottom nibble for first word of each object.
// If the bottom nibble says typeDead, change to typeScalar
// and clear top nibble to mark as typeDead.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := *bitp
if (x>>typeShift)&typeMask == typeDead {
x += (typeScalar - typeDead) << typeShift
x &= 0x0f // clear top nibble to typeDead
}
bitp = subtractb(bitp, step)
}
}
// clearCheckmarkSpan removes all the checkmarks from a span.
// If it finds a multiword object starting with typeScalar typeDead,
// it rewrites the heap bits to the simpler typeDead typeDead.
func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
if size == ptrSize {
// Only possible on 64-bit system, since minimum size is 8.
// Must update both top and bottom nibble of each byte.
// typeScalarCheckmarked can be left as typeDead,
// but we want to change typeScalar back to typeDead.
bitp := h.bitp
for i := uintptr(0); i < n; i += 2 {
x := int(*bitp)
switch typ := (x >> typeShift) & typeMask; typ {
case typeScalar:
x += (typeDead - typeScalar) << typeShift
case typePointerCheckmarked:
x += (typePointer - typePointerCheckmarked) << typeShift
}
switch typ := (x >> (4 + typeShift)) & typeMask; typ {
case typeScalar:
x += (typeDead - typeScalar) << (4 + typeShift)
case typePointerCheckmarked:
x += (typePointer - typePointerCheckmarked) << (4 + typeShift)
}
*bitp = uint8(x)
bitp = subtractb(bitp, 1)
}
return
}
// Update bottom nibble for first word of each object.
// If the bottom nibble says typeScalarCheckmarked and the top is not typeDead,
// change to typeScalar. Otherwise leave, since typeScalarCheckmarked == typeDead.
// If the bottom nibble says typePointerCheckmarked, change to typePointer.
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := int(*bitp)
switch typ := (x >> typeShift) & typeMask; {
case typ == typeScalarCheckmarked && (x>>(4+typeShift))&typeMask != typeDead:
x += (typeScalar - typeScalarCheckmarked) << typeShift
case typ == typePointerCheckmarked:
x += (typePointer - typePointerCheckmarked) << typeShift
}
*bitp = uint8(x)
bitp = subtractb(bitp, step)
}
}
// heapBitsSweepSpan coordinates the sweeping of a span by reading
// and updating the corresponding heap bitmap entries.
// For each free object in the span, heapBitsSweepSpan sets the type
// bits for the first two words (or one for single-word objects) to typeDead
// and then calls f(p), where p is the object's base address.
// f is expected to add the object to a free list.
func heapBitsSweepSpan(base, size, n uintptr, f func(uintptr)) {
h := heapBitsForSpan(base)
if size == ptrSize {
// Only possible on 64-bit system, since minimum size is 8.
// Must read and update both top and bottom nibble of each byte.
bitp := h.bitp
for i := uintptr(0); i < n; i += 2 {
x := int(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x &^= typeMask << typeShift
f(base + i*ptrSize)
}
if x&(bitMarked<<4) != 0 {
x &^= bitMarked << 4
} else {
x &^= typeMask << (4 + typeShift)
f(base + (i+1)*ptrSize)
}
*bitp = uint8(x)
bitp = subtractb(bitp, 1)
}
return
}
bitp := h.bitp
step := size / heapBitmapScale
for i := uintptr(0); i < n; i++ {
x := int(*bitp)
if x&bitMarked != 0 {
x &^= bitMarked
} else {
x = 0
f(base + i*size)
}
*bitp = uint8(x)
bitp = subtractb(bitp, step)
}
}
// TODO(rsc): Clean up the next two functions.
// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.size.)
// If dataSize < size, the fragment [x+dataSize, x+size) is
// recorded as non-pointer data.
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
// From here till marked label marking the object as allocated
// and storing type info in the GC bitmap.
h := heapBitsForAddr(x)
var ti, te uintptr
var ptrmask *uint8
if size == ptrSize {
// It's one word and it has pointers, it must be a pointer.
// The bitmap byte is shared with the one-word object
// next to it, and concurrent GC might be marking that
// object, so we must use an atomic update.
atomicor8(h.bitp, typePointer<<(typeShift+h.shift))
return
}
if typ.kind&kindGCProg != 0 {
nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
masksize := nptr
if masksize%2 != 0 {
masksize *= 2 // repeated
}
const typeBitsPerByte = 8 / typeBitsWidth
masksize = masksize * typeBitsPerByte / 8 // 4 bits per word
masksize++ // unroll flag in the beginning
if masksize > maxGCMask && typ.gc[1] != 0 {
// write barriers have not been updated to deal with this case yet.
throw("maxGCMask too small for now")
// If the mask is too large, unroll the program directly
// into the GC bitmap. It's 7 times slower than copying
// from the pre-unrolled mask, but saves 1/16 of type size
// memory for the mask.
systemstack(func() {
unrollgcproginplace_m(unsafe.Pointer(x), typ, size, dataSize)
})
return
}
ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
// Check whether the program is already unrolled
// by checking if the unroll flag byte is set
maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
systemstack(func() {
unrollgcprog_m(typ)
})
}
ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
} else {
ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
}
if size == 2*ptrSize {
// h.shift is 0 for all sizes > ptrSize.
*h.bitp = *ptrmask
return
}
te = uintptr(typ.size) / ptrSize
// If the type occupies odd number of words, its mask is repeated.
if te%2 == 0 {
te /= 2
}
// Copy pointer bitmask into the bitmap.
// TODO(rlh): add comment addressing the following concerns:
// If size > 2*ptrSize, is x guaranteed to be at least 2*ptrSize-aligned?
// And if type occupies and odd number of words, why are we only going through half
// of ptrmask and why don't we have to shift everything by 4 on odd iterations?
for i := uintptr(0); i < dataSize; i += 2 * ptrSize {
v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
ti++
if ti == te {
ti = 0
}
if i+ptrSize == dataSize {
v &^= typeMask << (4 + typeShift)
}
*h.bitp = v
h.bitp = subtractb(h.bitp, 1)
}
if dataSize%(2*ptrSize) == 0 && dataSize < size {
// Mark the word after last object's word as typeDead.
*h.bitp = 0
}
}
// typeBitmapInHeapBitmapFormat returns a bitmap holding
// the type bits for the type typ, but expanded into heap bitmap format
// to make it easier to copy them into the heap bitmap.
// TODO(rsc): Change clients to use the type bitmap format instead,
// which can be stored more densely (especially if we drop to 1 bit per pointer).
//
// To make it easier to replicate the bits when filling out the heap
// bitmap for an array of typ, if typ holds an odd number of words
// (meaning the heap bitmap would stop halfway through a byte),
// typeBitmapInHeapBitmapFormat returns the bitmap for two instances
// of typ in a row.
// TODO(rsc): Remove doubling.
func typeBitmapInHeapBitmapFormat(typ *_type) []uint8 {
var ptrmask *uint8
nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
if typ.kind&kindGCProg != 0 {
masksize := nptr
if masksize%2 != 0 {
masksize *= 2 // repeated
}
const typeBitsPerByte = 8 / typeBitsWidth
masksize = masksize * typeBitsPerByte / 8 // 4 bits per word
masksize++ // unroll flag in the beginning
if masksize > maxGCMask && typ.gc[1] != 0 {
// write barriers have not been updated to deal with this case yet.
throw("maxGCMask too small for now")
}
ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
// Check whether the program is already unrolled
// by checking if the unroll flag byte is set
maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
systemstack(func() {
unrollgcprog_m(typ)
})
}
ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
} else {
ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
}
return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
}
// GC type info programs
//
// TODO(rsc): Clean up and enable.
const (
// GC type info programs.
// The programs allow to store type info required for GC in a compact form.
// Most importantly arrays take O(1) space instead of O(n).
// The program grammar is:
//
// Program = {Block} "insEnd"
// Block = Data | Array
// Data = "insData" DataSize DataBlock
// DataSize = int // size of the DataBlock in bit pairs, 1 byte
// DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes
// Array = "insArray" ArrayLen Block "insArrayEnd"
// ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch)
//
// Each instruction (insData, insArray, etc) is 1 byte.
// For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
// the program looks as:
//
// insData 3 (typePointer typeScalar typeScalar)
// insArray 20 insData 2 (typeScalar typePointer) insArrayEnd insEnd
//
// Total size of the program is 17 bytes (13 bytes on 32-bits).
// The corresponding GC mask would take 43 bytes (it would be repeated
// because the type has odd number of words).
insData = 1 + iota
insArray
insArrayEnd
insEnd
// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
maxGCMask = 65536 // TODO(rsc): change back to 64
)
// Recursively unrolls GC program in prog.
// mask is where to store the result.
// If inplace is true, store the result not in mask but in the heap bitmap for mask.
// ppos is a pointer to position in mask, in bits.
// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
//go:nowritebarrier
func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte {
pos := *ppos
mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
for {
switch *prog {
default:
throw("unrollgcprog: unknown instruction")
case insData:
prog = addb(prog, 1)
siz := int(*prog)
prog = addb(prog, 1)
p := (*[1 << 30]byte)(unsafe.Pointer(prog))
for i := 0; i < siz; i++ {
const typeBitsPerByte = 8 / typeBitsWidth
v := p[i/typeBitsPerByte]
v >>= (uint(i) % typeBitsPerByte) * typeBitsWidth
v &= typeMask
if inplace {
// Store directly into GC bitmap.
h := heapBitsForAddr(uintptr(unsafe.Pointer(&mask[pos])))
if h.shift == 0 {
*h.bitp = v << typeShift
} else {
*h.bitp |= v << (4 + typeShift)
}
pos += ptrSize
} else if sparse {
// 4-bits per word, type bits in high bits
v <<= (pos % 8) + typeShift
mask[pos/8] |= v
pos += heapBitsWidth
} else {
// 2-bits per word
v <<= pos % 8
mask[pos/8] |= v
pos += typeBitsWidth
}
}
prog = addb(prog, round(uintptr(siz)*typeBitsWidth, 8)/8)
case insArray:
prog = (*byte)(add(unsafe.Pointer(prog), 1))
siz := uintptr(0)
for i := uintptr(0); i < ptrSize; i++ {
siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1)))
}
prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
var prog1 *byte
for i := uintptr(0); i < siz; i++ {
prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse)
}
if *prog1 != insArrayEnd {
throw("unrollgcprog: array does not end with insArrayEnd")
}
prog = (*byte)(add(unsafe.Pointer(prog1), 1))
case insArrayEnd, insEnd:
*ppos = pos
return prog
}
}
}
// Unrolls GC program prog for data/bss, returns dense GC mask.
func unrollglobgcprog(prog *byte, size uintptr) bitvector {
masksize := round(round(size, ptrSize)/ptrSize*typeBitsWidth, 8) / 8
mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
mask[masksize] = 0xa1
pos := uintptr(0)
prog = unrollgcprog1(&mask[0], prog, &pos, false, false)
if pos != size/ptrSize*typeBitsWidth {
print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize*typeBitsWidth, "\n")
throw("unrollglobgcprog: bad program size")
}
if *prog != insEnd {
throw("unrollglobgcprog: program does not end with insEnd")
}
if mask[masksize] != 0xa1 {
throw("unrollglobgcprog: overflow")
}
return bitvector{int32(masksize * 8), &mask[0]}
}
func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
// TODO(rsc): Explain why these non-atomic updates are okay.
pos := uintptr(0)
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
for pos != size0 {
unrollgcprog1((*byte)(v), prog, &pos, true, true)
}
// Mark first word as bitAllocated.
// Mark word after last as typeDead.
if size0 < size {
h := heapBitsForAddr(uintptr(v) + size0)
*h.bitp &^= typeMask << typeShift
}
}
var unroll mutex
// Unrolls GC program in typ.gc[1] into typ.gc[0]
//go:nowritebarrier
func unrollgcprog_m(typ *_type) {
lock(&unroll)
mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0])))
if *mask == 0 {
pos := uintptr(8) // skip the unroll flag
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
prog = unrollgcprog1(mask, prog, &pos, false, true)
if *prog != insEnd {
throw("unrollgcprog: program does not end with insEnd")
}
if typ.size/ptrSize%2 != 0 {
// repeat the program
prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
unrollgcprog1(mask, prog, &pos, false, true)
}
// atomic way to say mask[0] = 1
atomicor8(mask, 1)
}
unlock(&unroll)
}
// Testing.
func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
target := (*stkframe)(ctxt)
if frame.sp <= target.sp && target.sp < frame.varp {
*target = *frame
return false
}
return true
}
// Returns GC type info for object p for testing.
func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) {
*mask = nil
*len = 0
const typeBitsPerByte = 8 / typeBitsWidth
// data
for datap := &firstmoduledata; datap != nil; datap = datap.next {
if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - datap.data) / ptrSize
bits := (*(*byte)(add(unsafe.Pointer(datap.gcdatamask.bytedata), off/typeBitsPerByte)) >> ((off % typeBitsPerByte) * typeBitsWidth)) & typeMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
return
}
// bss
if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - datap.bss) / ptrSize
bits := (*(*byte)(add(unsafe.Pointer(datap.gcbssmask.bytedata), off/typeBitsPerByte)) >> ((off % typeBitsPerByte) * typeBitsWidth)) & typeMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
return
}
}
// heap
var n uintptr
var base uintptr
if mlookup(uintptr(p), &base, &n, nil) != 0 {
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
bits := heapBitsForAddr(base + i).typeBits()
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
return
}
// stack
var frame stkframe
frame.sp = uintptr(p)
_g_ := getg()
gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
if frame.fn != nil {
f := frame.fn
targetpc := frame.continpc
if targetpc == 0 {
return
}
if targetpc != f.entry {
targetpc--
}
pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
if pcdata == -1 {
return
}
stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
if stkmap == nil || stkmap.n <= 0 {
return
}
bv := stackmapdata(stkmap, pcdata)
size := uintptr(bv.n) / typeBitsWidth * ptrSize
n := (*ptrtype)(unsafe.Pointer(t)).elem.size
*len = n / ptrSize
*mask = &make([]byte, *len)[0]
for i := uintptr(0); i < n; i += ptrSize {
off := (uintptr(p) + i - frame.varp + size) / ptrSize
bits := ((*(*byte)(add(unsafe.Pointer(bv.bytedata), off*typeBitsWidth/8))) >> ((off * typeBitsWidth) % 8)) & typeMask
*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
}
}
}