mirror of
https://github.com/golang/go
synced 2024-11-17 13:04:54 -07:00
runtime: use OnesCount64 to count allocated objects in a span
This change modifies the implementation of (*mspan).countAlloc by using OnesCount64 (which on many systems is intrinsified). It does so by using an unsafe pointer cast, but in this case we don't care about endianness because we're just counting bits set. This change means we no longer need the popcnt table which was redundant in the runtime anyway. We can also simplify the logic here significantly by observing that mark bits allocations are always 8-byte aligned, so we don't need to handle any edge-cases due to the fact that OnesCount64 operates on 64 bits at a time: all irrelevant bits will be zero. Overall, this implementation is significantly faster than the old one on amd64, and should be similarly faster (or better!) on other systems which support the intrinsic. On systems which do not, it should be roughly the same performance because OnesCount64 is implemented using a table in the general case. Results on linux/amd64: name old time/op new time/op delta MSpanCountAlloc/bits=64-4 16.8ns ± 0% 12.7ns ± 0% -24.40% (p=0.000 n=5+4) MSpanCountAlloc/bits=128-4 23.5ns ± 0% 12.8ns ± 0% -45.70% (p=0.000 n=4+5) MSpanCountAlloc/bits=256-4 43.5ns ± 0% 12.8ns ± 0% -70.67% (p=0.000 n=4+5) MSpanCountAlloc/bits=512-4 59.5ns ± 0% 15.4ns ± 0% -74.12% (p=0.008 n=5+5) MSpanCountAlloc/bits=1024-4 116ns ± 1% 23ns ± 0% -79.84% (p=0.000 n=5+4) Change-Id: Id4c994be22224653af5333683a69b0937130ed04 Reviewed-on: https://go-review.googlesource.com/c/go/+/216558 Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
7af6a31b48
commit
67c2dcbc59
@ -865,58 +865,22 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
|
||||
}
|
||||
}
|
||||
|
||||
// oneBitCount is indexed by byte and produces the
|
||||
// number of 1 bits in that byte. For example 128 has 1 bit set
|
||||
// and oneBitCount[128] will holds 1.
|
||||
var oneBitCount = [256]uint8{
|
||||
0, 1, 1, 2, 1, 2, 2, 3,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7,
|
||||
5, 6, 6, 7, 6, 7, 7, 8}
|
||||
|
||||
// countAlloc returns the number of objects allocated in span s by
|
||||
// scanning the allocation bitmap.
|
||||
// TODO:(rlh) Use popcount intrinsic.
|
||||
func (s *mspan) countAlloc() int {
|
||||
count := 0
|
||||
maxIndex := s.nelems / 8
|
||||
for i := uintptr(0); i < maxIndex; i++ {
|
||||
mrkBits := *s.gcmarkBits.bytep(i)
|
||||
count += int(oneBitCount[mrkBits])
|
||||
}
|
||||
if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
|
||||
mrkBits := *s.gcmarkBits.bytep(maxIndex)
|
||||
mask := uint8((1 << bitsInLastByte) - 1)
|
||||
bits := mrkBits & mask
|
||||
count += int(oneBitCount[bits])
|
||||
bytes := divRoundUp(s.nelems, 8)
|
||||
// Iterate over each 8-byte chunk and count allocations
|
||||
// with an intrinsic. Note that newMarkBits guarantees that
|
||||
// gcmarkBits will be 8-byte aligned, so we don't have to
|
||||
// worry about edge cases, irrelevant bits will simply be zero.
|
||||
for i := uintptr(0); i < bytes; i += 8 {
|
||||
// Extract 64 bits from the byte pointer and get a OnesCount.
|
||||
// Note that the unsafe cast here doesn't preserve endianness,
|
||||
// but that's OK. We only care about how many bits are 1, not
|
||||
// about the order we discover them in.
|
||||
mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
|
||||
count += sys.OnesCount64(mrkBits)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user