From a12edb8db6f3fa93a1ccd96a0f84b647d08429ef Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Fri, 17 Feb 2017 14:20:11 -0800 Subject: [PATCH] math/bits: faster OnesCount, added respective benchmarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also: Changed Reverse/ReverseBytes implementations to use the same (smaller) masks as OnesCount. BenchmarkOnesCount-8 37.0 6.26 -83.08% BenchmarkOnesCount8-8 7.24 1.99 -72.51% BenchmarkOnesCount16-8 11.3 2.47 -78.14% BenchmarkOnesCount32-8 18.4 3.02 -83.59% BenchmarkOnesCount64-8 40.0 3.78 -90.55% BenchmarkReverse-8 6.69 6.22 -7.03% BenchmarkReverse8-8 1.64 1.64 +0.00% BenchmarkReverse16-8 2.26 2.18 -3.54% BenchmarkReverse32-8 2.88 2.87 -0.35% BenchmarkReverse64-8 5.64 4.34 -23.05% BenchmarkReverseBytes-8 2.48 2.17 -12.50% BenchmarkReverseBytes16-8 0.63 0.95 +50.79% BenchmarkReverseBytes32-8 1.13 1.24 +9.73% BenchmarkReverseBytes64-8 2.50 2.16 -13.60% OnesCount-8 37.0ns ± 0% 6.3ns ± 0% ~ (p=1.000 n=1+1) OnesCount8-8 7.24ns ± 0% 1.99ns ± 0% ~ (p=1.000 n=1+1) OnesCount16-8 11.3ns ± 0% 2.5ns ± 0% ~ (p=1.000 n=1+1) OnesCount32-8 18.4ns ± 0% 3.0ns ± 0% ~ (p=1.000 n=1+1) OnesCount64-8 40.0ns ± 0% 3.8ns ± 0% ~ (p=1.000 n=1+1) Reverse-8 6.69ns ± 0% 6.22ns ± 0% ~ (p=1.000 n=1+1) Reverse8-8 1.64ns ± 0% 1.64ns ± 0% ~ (all samples are equal) Reverse16-8 2.26ns ± 0% 2.18ns ± 0% ~ (p=1.000 n=1+1) Reverse32-8 2.88ns ± 0% 2.87ns ± 0% ~ (p=1.000 n=1+1) Reverse64-8 5.64ns ± 0% 4.34ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes-8 2.48ns ± 0% 2.17ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes16-8 0.63ns ± 0% 0.95ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes32-8 1.13ns ± 0% 1.24ns ± 0% ~ (p=1.000 n=1+1) ReverseBytes64-8 2.50ns ± 0% 2.16ns ± 0% ~ (p=1.000 n=1+1) Change-Id: I591b0ffc83fc3a42828256b6e5030f32c64f9497 Reviewed-on: https://go-review.googlesource.com/37218 Reviewed-by: Matthew Dempsky --- src/math/bits/bits.go | 88 ++++++++++++++++++++++++++------------ src/math/bits/bits_impl.go | 8 ---- src/math/bits/bits_test.go | 73 +++++++++++++++++++++++++------ 3 files changed, 119 insertions(+), 50 deletions(-) diff --git a/src/math/bits/bits.go b/src/math/bits/bits.go index eef59581cd..7a1ffdf304 100644 --- a/src/math/bits/bits.go +++ b/src/math/bits/bits.go @@ -45,20 +45,58 @@ func TrailingZeros64(x uint64) int { return ntz64(x) } // --- OnesCount --- +const m0 = 0x5555555555555555 // 01010101 ... +const m1 = 0x3333333333333333 // 00110011 ... +const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... +const m3 = 0x00ff00ff00ff00ff // etc. +const m4 = 0x0000ffff0000ffff +const m5 = 0x00000000ffffffff + // OnesCount returns the number of one bits ("population count") in x. -func OnesCount(x uint) int { return pop(uint64(x)) } +func OnesCount(x uint) int { + if UintSize == 32 { + return OnesCount32(uint32(x)) + } + return OnesCount64(uint64(x)) +} // OnesCount8 returns the number of one bits ("population count") in x. -func OnesCount8(x uint8) int { return pop(uint64(x)) } +func OnesCount8(x uint8) int { + const m = 1<<8 - 1 + x = x>>1&(m0&m) + x&(m0&m) + x = x>>2&(m1&m) + x&(m1&m) + return int(x>>4 + x&(m2&m)) +} // OnesCount16 returns the number of one bits ("population count") in x. -func OnesCount16(x uint16) int { return pop(uint64(x)) } +func OnesCount16(x uint16) int { + const m = 1<<16 - 1 + x = x>>1&(m0&m) + x&(m0&m) + x = x>>2&(m1&m) + x&(m1&m) + x = x>>4&(m2&m) + x&(m2&m) + return int(x>>8 + x&(m3&m)) +} // OnesCount32 returns the number of one bits ("population count") in x. -func OnesCount32(x uint32) int { return pop(uint64(x)) } +func OnesCount32(x uint32) int { + const m = 1<<32 - 1 + x = x>>1&(m0&m) + x&(m0&m) + x = x>>2&(m1&m) + x&(m1&m) + x = x>>4&(m2&m) + x&(m2&m) + x = x>>8&(m3&m) + x&(m3&m) + return int(x>>16 + x&(m4&m)) +} // OnesCount64 returns the number of one bits ("population count") in x. -func OnesCount64(x uint64) int { return pop(uint64(x)) } +func OnesCount64(x uint64) int { + const m = 1<<64 - 1 + x = x>>1&(m0&m) + x&(m0&m) + x = x>>2&(m1&m) + x&(m1&m) + x = x>>4&(m2&m) + x&(m2&m) + x = x>>8&(m3&m) + x&(m3&m) + x = x>>16&(m4&m) + x&(m4&m) + return int(x>>32 + x&(m5&m)) +} // --- RotateLeft --- @@ -96,12 +134,6 @@ func RotateRight64(x uint64, k int) uint64 { return uint64(rot(uint64(x), 64, 64 // --- Reverse --- -const m0 = 0xaaaaaaaaaaaaaaaa // 10101010 ... -const m1 = 0xcccccccccccccccc // 11001100 ... -const m2 = 0xf0f0f0f0f0f0f0f0 // 11110000 ... -const m3 = 0xff00ff00ff00ff00 // etc. -const m4 = 0xffff0000ffff0000 - // Reverse returns the value of x with its bits in reversed order. func Reverse(x uint) uint { if UintSize == 32 { @@ -113,38 +145,38 @@ func Reverse(x uint) uint { // Reverse8 returns the value of x with its bits in reversed order. func Reverse8(x uint8) uint8 { const m = 1<<8 - 1 - x = x&(m0&m)>>1 | x<<1&(m0&m) - x = x&(m1&m)>>2 | x<<2&(m1&m) + x = x>>1&(m0&m) | x&(m0&m)<<1 + x = x>>2&(m1&m) | x&(m1&m)<<2 return x>>4 | x<<4 } // Reverse16 returns the value of x with its bits in reversed order. func Reverse16(x uint16) uint16 { const m = 1<<16 - 1 - x = x&(m0&m)>>1 | x<<1&(m0&m) - x = x&(m1&m)>>2 | x<<2&(m1&m) - x = x&(m2&m)>>4 | x<<4&(m2&m) + x = x>>1&(m0&m) | x&(m0&m)<<1 + x = x>>2&(m1&m) | x&(m1&m)<<2 + x = x>>4&(m2&m) | x&(m2&m)<<4 return x>>8 | x<<8 } // Reverse32 returns the value of x with its bits in reversed order. func Reverse32(x uint32) uint32 { const m = 1<<32 - 1 - x = x&(m0&m)>>1 | x<<1&(m0&m) - x = x&(m1&m)>>2 | x<<2&(m1&m) - x = x&(m2&m)>>4 | x<<4&(m2&m) - x = x&(m3&m)>>8 | x<<8&(m3&m) + x = x>>1&(m0&m) | x&(m0&m)<<1 + x = x>>2&(m1&m) | x&(m1&m)<<2 + x = x>>4&(m2&m) | x&(m2&m)<<4 + x = x>>8&(m3&m) | x&(m3&m)<<8 return x>>16 | x<<16 } // Reverse64 returns the value of x with its bits in reversed order. func Reverse64(x uint64) uint64 { const m = 1<<64 - 1 - x = x&(m0&m)>>1 | x<<1&(m0&m) - x = x&(m1&m)>>2 | x<<2&(m1&m) - x = x&(m2&m)>>4 | x<<4&(m2&m) - x = x&(m3&m)>>8 | x<<8&(m3&m) - x = x&(m4&m)>>16 | x<<16&(m4&m) + x = x>>1&(m0&m) | x&(m0&m)<<1 + x = x>>2&(m1&m) | x&(m1&m)<<2 + x = x>>4&(m2&m) | x&(m2&m)<<4 + x = x>>8&(m3&m) | x&(m3&m)<<8 + x = x>>16&(m4&m) | x&(m4&m)<<16 return x>>32 | x<<32 } @@ -166,15 +198,15 @@ func ReverseBytes16(x uint16) uint16 { // ReverseBytes32 returns the value of x with its bytes in reversed order. func ReverseBytes32(x uint32) uint32 { const m = 1<<32 - 1 - x = x&(m3&m)>>8 | x<<8&(m3&m) + x = x>>8&(m3&m) | x&(m3&m)<<8 return x>>16 | x<<16 } // ReverseBytes64 returns the value of x with its bytes in reversed order. func ReverseBytes64(x uint64) uint64 { const m = 1<<64 - 1 - x = x&(m3&m)>>8 | x<<8&(m3&m) - x = x&(m4&m)>>16 | x<<16&(m4&m) + x = x>>8&(m3&m) | x&(m3&m)<<8 + x = x>>16&(m4&m) | x&(m4&m)<<16 return x>>32 | x<<32 } diff --git a/src/math/bits/bits_impl.go b/src/math/bits/bits_impl.go index 3a425e3b83..c7834106c8 100644 --- a/src/math/bits/bits_impl.go +++ b/src/math/bits/bits_impl.go @@ -74,14 +74,6 @@ func ntz64(x uint64) int { return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)]) } -func pop(x uint64) (n int) { - for x != 0 { - n++ - x &= x - 1 - } - return -} - func pos(k int) uint { if k < 0 { panic("negative rotation count") diff --git a/src/math/bits/bits_test.go b/src/math/bits/bits_test.go index 9a8ae926d3..b268b0a004 100644 --- a/src/math/bits/bits_test.go +++ b/src/math/bits/bits_test.go @@ -189,6 +189,56 @@ func TestOnesCount(t *testing.T) { } } +// Exported (global) variable to store function results +// during benchmarking to ensure side-effect free calls +// are not optimized away. +var Unused int + +// Exported (global) variable serving as input for some +// of the benchmarks to ensure side-effect free calls +// are not optimized away. +var Input uint64 = deBruijn64 + +func BenchmarkOnesCount(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount(uint(Input)) + } + Unused = s +} + +func BenchmarkOnesCount8(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount8(uint8(Input)) + } + Unused = s +} + +func BenchmarkOnesCount16(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount16(uint16(Input)) + } + Unused = s +} + +func BenchmarkOnesCount32(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount32(uint32(Input)) + } + Unused = s +} + +func BenchmarkOnesCount64(b *testing.B) { + var s int + for i := 0; i < b.N; i++ { + s += OnesCount64(uint64(Input)) + } + Unused = s +} + func TestRotateLeft(t *testing.T) { var m uint64 = deBruijn64 @@ -367,17 +417,12 @@ func testReverse(t *testing.T, x64, want64 uint64) { } } -// Exported (global) variable to store function results -// during benchmarking, to ensure side-effect free calls -// are not optimized away. -var Unused uint64 - func BenchmarkReverse(b *testing.B) { var s uint for i := 0; i < b.N; i++ { s += Reverse(uint(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverse8(b *testing.B) { @@ -385,7 +430,7 @@ func BenchmarkReverse8(b *testing.B) { for i := 0; i < b.N; i++ { s += Reverse8(uint8(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverse16(b *testing.B) { @@ -393,7 +438,7 @@ func BenchmarkReverse16(b *testing.B) { for i := 0; i < b.N; i++ { s += Reverse16(uint16(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverse32(b *testing.B) { @@ -401,7 +446,7 @@ func BenchmarkReverse32(b *testing.B) { for i := 0; i < b.N; i++ { s += Reverse32(uint32(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverse64(b *testing.B) { @@ -409,7 +454,7 @@ func BenchmarkReverse64(b *testing.B) { for i := 0; i < b.N; i++ { s += Reverse64(uint64(i)) } - Unused = s + Unused = int(s) } func TestReverseBytes(t *testing.T) { @@ -473,7 +518,7 @@ func BenchmarkReverseBytes(b *testing.B) { for i := 0; i < b.N; i++ { s += ReverseBytes(uint(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverseBytes16(b *testing.B) { @@ -481,7 +526,7 @@ func BenchmarkReverseBytes16(b *testing.B) { for i := 0; i < b.N; i++ { s += ReverseBytes16(uint16(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverseBytes32(b *testing.B) { @@ -489,7 +534,7 @@ func BenchmarkReverseBytes32(b *testing.B) { for i := 0; i < b.N; i++ { s += ReverseBytes32(uint32(i)) } - Unused = uint64(s) + Unused = int(s) } func BenchmarkReverseBytes64(b *testing.B) { @@ -497,7 +542,7 @@ func BenchmarkReverseBytes64(b *testing.B) { for i := 0; i < b.N; i++ { s += ReverseBytes64(uint64(i)) } - Unused = s + Unused = int(s) } func TestLen(t *testing.T) {