mirror of
https://github.com/golang/go
synced 2024-11-25 03:47:57 -07:00
[math/bits] Use Wilkes-Wheeler-Gill algorithm for OnesCount64.
This implementation is based on the C function from "Faster Population Counts Using AVX2 Instructions" paper, Figure 3, available at https://arxiv.org/pdf/1612.07612.pdf More details and benchmark results are available in the #46188. Closes #46188
This commit is contained in:
parent
ce92a2023c
commit
b51e555469
@ -132,33 +132,14 @@ func OnesCount32(x uint32) int {
|
|||||||
|
|
||||||
// OnesCount64 returns the number of one bits ("population count") in x.
|
// OnesCount64 returns the number of one bits ("population count") in x.
|
||||||
func OnesCount64(x uint64) int {
|
func OnesCount64(x uint64) int {
|
||||||
// Implementation: Parallel summing of adjacent bits.
|
// Implementation: Wilkes-Wheeler-Gill algorithm.
|
||||||
// See "Hacker's Delight", Chap. 5: Counting Bits.
|
// See "Faster Population Counts Using AVX2 Instructions", FIGURE 3.
|
||||||
// The following pattern shows the general approach:
|
// Full paper is available at https://arxiv.org/pdf/1611.07612.pdf
|
||||||
//
|
x -= (x >> 1) & m0
|
||||||
// x = x>>1&(m0&m) + x&(m0&m)
|
x = ((x >> 2) & m1) + (x & m1)
|
||||||
// x = x>>2&(m1&m) + x&(m1&m)
|
x = (x + (x >> 4)) & m2
|
||||||
// x = x>>4&(m2&m) + x&(m2&m)
|
x *= 0x0101010101010101
|
||||||
// x = x>>8&(m3&m) + x&(m3&m)
|
return int(x >> 56)
|
||||||
// x = x>>16&(m4&m) + x&(m4&m)
|
|
||||||
// x = x>>32&(m5&m) + x&(m5&m)
|
|
||||||
// return int(x)
|
|
||||||
//
|
|
||||||
// Masking (& operations) can be left away when there's no
|
|
||||||
// danger that a field's sum will carry over into the next
|
|
||||||
// field: Since the result cannot be > 64, 8 bits is enough
|
|
||||||
// and we can ignore the masks for the shifts by 8 and up.
|
|
||||||
// Per "Hacker's Delight", the first line can be simplified
|
|
||||||
// more, but it saves at best one instruction, so we leave
|
|
||||||
// it alone for clarity.
|
|
||||||
const m = 1<<64 - 1
|
|
||||||
x = x>>1&(m0&m) + x&(m0&m)
|
|
||||||
x = x>>2&(m1&m) + x&(m1&m)
|
|
||||||
x = (x>>4 + x) & (m2 & m)
|
|
||||||
x += x >> 8
|
|
||||||
x += x >> 16
|
|
||||||
x += x >> 32
|
|
||||||
return int(x) & (1<<7 - 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- RotateLeft ---
|
// --- RotateLeft ---
|
||||||
|
Loading…
Reference in New Issue
Block a user