1
0
mirror of https://github.com/golang/go synced 2024-11-17 13:04:54 -07:00

runtime: use OnesCount64 to count allocated objects in a span

This change modifies the implementation of (*mspan).countAlloc by
using OnesCount64 (which on many systems is intrinsified). It does so by
using an unsafe pointer cast, but in this case we don't care about
endianness because we're just counting bits set.

This change means we no longer need the popcnt table which was redundant
in the runtime anyway. We can also simplify the logic here significantly
by observing that mark bits allocations are always 8-byte aligned, so we
don't need to handle any edge-cases due to the fact that OnesCount64
operates on 64 bits at a time: all irrelevant bits will be zero.

Overall, this implementation is significantly faster than the old one on
amd64, and should be similarly faster (or better!) on other systems
which support the intrinsic. On systems which do not, it should be
roughly the same performance because OnesCount64 is implemented using a
table in the general case.

Results on linux/amd64:

name                         old time/op  new time/op  delta
MSpanCountAlloc/bits=64-4    16.8ns ± 0%  12.7ns ± 0%  -24.40%  (p=0.000 n=5+4)
MSpanCountAlloc/bits=128-4   23.5ns ± 0%  12.8ns ± 0%  -45.70%  (p=0.000 n=4+5)
MSpanCountAlloc/bits=256-4   43.5ns ± 0%  12.8ns ± 0%  -70.67%  (p=0.000 n=4+5)
MSpanCountAlloc/bits=512-4   59.5ns ± 0%  15.4ns ± 0%  -74.12%  (p=0.008 n=5+5)
MSpanCountAlloc/bits=1024-4   116ns ± 1%    23ns ± 0%  -79.84%  (p=0.000 n=5+4)

Change-Id: Id4c994be22224653af5333683a69b0937130ed04
Reviewed-on: https://go-review.googlesource.com/c/go/+/216558
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Anthony Knyszek 2020-01-27 20:41:38 +00:00 committed by Michael Knyszek
parent 7af6a31b48
commit 67c2dcbc59

View File

@ -865,58 +865,22 @@ func (h heapBits) clearCheckmarkSpan(size, n, total uintptr) {
}
}
// oneBitCount is indexed by byte and produces the
// number of 1 bits in that byte. For example 128 has 1 bit set
// and oneBitCount[128] will holds 1.
var oneBitCount = [256]uint8{
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7,
5, 6, 6, 7, 6, 7, 7, 8}
// countAlloc returns the number of objects allocated in span s by
// scanning the allocation bitmap.
// TODO:(rlh) Use popcount intrinsic.
func (s *mspan) countAlloc() int {
count := 0
maxIndex := s.nelems / 8
for i := uintptr(0); i < maxIndex; i++ {
mrkBits := *s.gcmarkBits.bytep(i)
count += int(oneBitCount[mrkBits])
}
if bitsInLastByte := s.nelems % 8; bitsInLastByte != 0 {
mrkBits := *s.gcmarkBits.bytep(maxIndex)
mask := uint8((1 << bitsInLastByte) - 1)
bits := mrkBits & mask
count += int(oneBitCount[bits])
bytes := divRoundUp(s.nelems, 8)
// Iterate over each 8-byte chunk and count allocations
// with an intrinsic. Note that newMarkBits guarantees that
// gcmarkBits will be 8-byte aligned, so we don't have to
// worry about edge cases, irrelevant bits will simply be zero.
for i := uintptr(0); i < bytes; i += 8 {
// Extract 64 bits from the byte pointer and get a OnesCount.
// Note that the unsafe cast here doesn't preserve endianness,
// but that's OK. We only care about how many bits are 1, not
// about the order we discover them in.
mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
count += sys.OnesCount64(mrkBits)
}
return count
}