mirror of
https://github.com/golang/go
synced 2024-11-24 08:40:14 -07:00
math/bits: faster OnesCount using table lookups for sizes 8,16,32
For uint64, the existing algorithm is faster. benchmark old ns/op new ns/op delta BenchmarkOnesCount8-8 1.95 0.97 -50.26% BenchmarkOnesCount16-8 2.54 1.39 -45.28% BenchmarkOnesCount32-8 2.61 1.96 -24.90% Measured on 2.3 GHz Intel Core i7 running macOS 10.12.3. Change-Id: I6cc42882fef3d24694720464039161e339a9ae99 Reviewed-on: https://go-review.googlesource.com/37580 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
064e44f218
commit
d7a659b11b
@ -63,32 +63,17 @@ func OnesCount(x uint) int {
|
|||||||
|
|
||||||
// OnesCount8 returns the number of one bits ("population count") in x.
|
// OnesCount8 returns the number of one bits ("population count") in x.
|
||||||
func OnesCount8(x uint8) int {
|
func OnesCount8(x uint8) int {
|
||||||
const m = 1<<8 - 1
|
return int(pop8tab[x])
|
||||||
x = x>>1&(m0&m) + x&(m0&m)
|
|
||||||
x = x>>2&(m1&m) + x&(m1&m)
|
|
||||||
x += x >> 4
|
|
||||||
return int(x) & (1<<4 - 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// OnesCount16 returns the number of one bits ("population count") in x.
|
// OnesCount16 returns the number of one bits ("population count") in x.
|
||||||
func OnesCount16(x uint16) int {
|
func OnesCount16(x uint16) int {
|
||||||
const m = 1<<16 - 1
|
return int(pop8tab[x>>8] + pop8tab[x&0xff])
|
||||||
x = x>>1&(m0&m) + x&(m0&m)
|
|
||||||
x = x>>2&(m1&m) + x&(m1&m)
|
|
||||||
x = (x>>4 + x) & (m2 & m)
|
|
||||||
x += x >> 8
|
|
||||||
return int(x) & (1<<5 - 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// OnesCount32 returns the number of one bits ("population count") in x.
|
// OnesCount32 returns the number of one bits ("population count") in x.
|
||||||
func OnesCount32(x uint32) int {
|
func OnesCount32(x uint32) int {
|
||||||
const m = 1<<32 - 1
|
return int(pop8tab[x>>24] + pop8tab[x>>16&0xff] + pop8tab[x>>8&0xff] + pop8tab[x&0xff])
|
||||||
x = x>>1&(m0&m) + x&(m0&m)
|
|
||||||
x = x>>2&(m1&m) + x&(m1&m)
|
|
||||||
x = (x>>4 + x) & (m2 & m)
|
|
||||||
x += x >> 8
|
|
||||||
x += x >> 16
|
|
||||||
return int(x) & (1<<6 - 1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// OnesCount64 returns the number of one bits ("population count") in x.
|
// OnesCount64 returns the number of one bits ("population count") in x.
|
||||||
|
@ -6,6 +6,25 @@
|
|||||||
|
|
||||||
package bits
|
package bits
|
||||||
|
|
||||||
|
var pop8tab = [256]uint8{
|
||||||
|
0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
|
||||||
|
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
|
||||||
|
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
|
||||||
|
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
|
||||||
|
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
|
||||||
|
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
|
||||||
|
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
|
||||||
|
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
|
||||||
|
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
|
||||||
|
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
|
||||||
|
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
|
||||||
|
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
|
||||||
|
0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
|
||||||
|
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
|
||||||
|
0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06, 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
|
||||||
|
0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07, 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08,
|
||||||
|
}
|
||||||
|
|
||||||
var rev8tab = [256]uint8{
|
var rev8tab = [256]uint8{
|
||||||
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
|
0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
|
||||||
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
|
0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
|
||||||
|
@ -30,6 +30,7 @@ package bits
|
|||||||
func main() {
|
func main() {
|
||||||
buf := bytes.NewBuffer(header)
|
buf := bytes.NewBuffer(header)
|
||||||
|
|
||||||
|
gen(buf, "pop8tab", pop8)
|
||||||
gen(buf, "rev8tab", rev8)
|
gen(buf, "rev8tab", rev8)
|
||||||
// add more tables as needed
|
// add more tables as needed
|
||||||
|
|
||||||
@ -57,6 +58,14 @@ func gen(w io.Writer, name string, f func(uint8) uint8) {
|
|||||||
fmt.Fprint(w, "\n}\n\n")
|
fmt.Fprint(w, "\n}\n\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func pop8(x uint8) (n uint8) {
|
||||||
|
for x != 0 {
|
||||||
|
x &= x - 1
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func rev8(x uint8) (r uint8) {
|
func rev8(x uint8) (r uint8) {
|
||||||
for i := 8; i > 0; i-- {
|
for i := 8; i > 0; i-- {
|
||||||
r = r<<1 | x&1
|
r = r<<1 | x&1
|
||||||
|
Loading…
Reference in New Issue
Block a user