1
0
mirror of https://github.com/golang/go synced 2024-11-11 23:20:24 -07:00

runtime/internal/sys: better fallback algorithms for intrinsics

Use deBruijn sequences to count low-order zeros.
Reorg bswap to not use &^, it takes another instruction on x86.

Change-Id: I4a5ed9fd16ee6a279d88c067e8a2ba11de821156
Reviewed-on: https://go-review.googlesource.com/22084
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Keith Randall 2016-04-14 10:28:35 -07:00
parent 12e3b184f0
commit 98b6febcef
2 changed files with 126 additions and 63 deletions

View File

@ -4,88 +4,97 @@
package sys
// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
const deBruijn64 = 0x0218a392cd3d5dbf
var deBruijnIdx64 = [64]byte{
0, 1, 2, 7, 3, 13, 8, 19,
4, 25, 14, 28, 9, 34, 20, 40,
5, 17, 26, 38, 15, 46, 29, 48,
10, 31, 35, 54, 21, 50, 41, 57,
63, 6, 12, 18, 24, 27, 33, 39,
16, 37, 45, 47, 30, 53, 49, 56,
62, 11, 23, 32, 36, 44, 52, 55,
61, 22, 43, 51, 60, 42, 59, 58,
}
const deBruijn32 = 0x04653adf
var deBruijnIdx32 = [32]byte{
0, 1, 2, 6, 3, 11, 7, 16,
4, 14, 12, 21, 8, 23, 17, 26,
31, 5, 10, 15, 13, 20, 22, 25,
30, 9, 19, 24, 29, 18, 28, 27,
}
const deBruijn16 = 0x09af
var deBruijnIdx16 = [16]byte{
0, 1, 2, 5, 3, 9, 6, 11,
15, 4, 8, 10, 14, 7, 13, 12,
}
const deBruijn8 = 0x17
var deBruijnIdx8 = [8]byte{
0, 1, 2, 4, 7, 3, 6, 5,
}
// Ctz64 counts trailing (low-order) zeroes,
// and if all are zero, then 64.
func Ctz64(x uint64) uint64 {
if x&0xffffffff == 0 {
return 32 + uint64(Ctz32(uint32(x>>32)))
}
return uint64(Ctz32(uint32(x)))
x &= -x // isolate low-order bit
y := x * deBruijn64 >> 58 // extract part of deBruijn sequence
y = uint64(deBruijnIdx64[y]) // convert to bit index
z := (x - 1) >> 57 & 64 // adjustment if zero
return y + z
}
// Ctz32 counts trailing (low-order) zeroes,
// and if all are zero, then 32.
func Ctz32(x uint32) uint32 {
if x&0xffff == 0 {
return 16 + uint32(Ctz16(uint16(x>>16)))
}
return uint32(Ctz16(uint16(x)))
x &= -x // isolate low-order bit
y := x * deBruijn32 >> 27 // extract part of deBruijn sequence
y = uint32(deBruijnIdx32[y]) // convert to bit index
z := (x - 1) >> 26 & 32 // adjustment if zero
return y + z
}
// Ctz16 counts trailing (low-order) zeroes,
// and if all are zero, then 16.
func Ctz16(x uint16) uint16 {
if x&0xff == 0 {
return 8 + uint16(Ctz8(uint8(x>>8)))
}
return uint16(Ctz8(uint8(x)))
x &= -x // isolate low-order bit
y := x * deBruijn16 >> 12 // extract part of deBruijn sequence
y = uint16(deBruijnIdx16[y]) // convert to bit index
z := (x - 1) >> 11 & 16 // adjustment if zero
return y + z
}
// Ctz8 counts trailing (low-order) zeroes,
// and if all are zero, then 8.
func Ctz8(x uint8) uint8 {
return ctzVals[x]
x &= -x // isolate low-order bit
y := x * deBruijn8 >> 5 // extract part of deBruijn sequence
y = uint8(deBruijnIdx8[y]) // convert to bit index
z := (x - 1) >> 4 & 8 // adjustment if zero
return y + z
}
var ctzVals = [256]uint8{
8, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0,
3, 0, 1, 0, 2, 0, 1, 0}
// Bswap64 returns its input with byte order reversed
// 0x0102030405060708 -> 0x0807060504030201
func Bswap64(x uint64) uint64 {
c8 := uint64(0xff00ff00ff00ff00)
a := (x & c8) >> 8
b := (x &^ c8) << 8
c8 := uint64(0x00ff00ff00ff00ff)
a := x >> 8 & c8
b := (x & c8) << 8
x = a | b
c16 := uint64(0xffff0000ffff0000)
a = (x & c16) >> 16
b = (x &^ c16) << 16
c16 := uint64(0x0000ffff0000ffff)
a = x >> 16 & c16
b = (x & c16) << 16
x = a | b
c32 := uint64(0xffffffff00000000)
a = (x & c32) >> 32
b = (x &^ c32) << 32
c32 := uint64(0x00000000ffffffff)
a = x >> 32 & c32
b = (x & c32) << 32
x = a | b
return x
}
@ -93,13 +102,13 @@ func Bswap64(x uint64) uint64 {
// Bswap32 returns its input with byte order reversed
// 0x01020304 -> 0x04030201
func Bswap32(x uint32) uint32 {
c8 := uint32(0xff00ff00)
a := (x & c8) >> 8
b := (x &^ c8) << 8
c8 := uint32(0x00ff00ff)
a := x >> 8 & c8
b := (x & c8) << 8
x = a | b
c16 := uint32(0xffff0000)
a = (x & c16) >> 16
b = (x &^ c16) << 16
c16 := uint32(0x0000ffff)
a = x >> 16 & c16
b = (x & c16) << 16
x = a | b
return x
}

View File

@ -0,0 +1,54 @@
package sys_test
import (
"runtime/internal/sys"
"testing"
)
func TestCtz64(t *testing.T) {
for i := uint(0); i <= 64; i++ {
x := uint64(5) << i
if got := sys.Ctz64(x); got != uint64(i) {
t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
}
}
}
func TestCtz32(t *testing.T) {
for i := uint(0); i <= 32; i++ {
x := uint32(5) << i
if got := sys.Ctz32(x); got != uint32(i) {
t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
}
}
}
func TestCtz16(t *testing.T) {
for i := uint(0); i <= 16; i++ {
x := uint16(5) << i
if got := sys.Ctz16(x); got != uint16(i) {
t.Errorf("Ctz16(%d)=%d, want %d", x, got, i)
}
}
}
func TestCtz8(t *testing.T) {
for i := uint(0); i <= 8; i++ {
x := uint8(5) << i
if got := sys.Ctz8(x); got != uint8(i) {
t.Errorf("Ctz8(%d)=%d, want %d", x, got, i)
}
}
}
func TestBswap64(t *testing.T) {
x := uint64(0x1122334455667788)
y := sys.Bswap64(x)
if y != 0x8877665544332211 {
t.Errorf("Bswap(%x)=%x, want 0x8877665544332211", x, y)
}
}
func TestBswap32(t *testing.T) {
x := uint32(0x11223344)
y := sys.Bswap32(x)
if y != 0x44332211 {
t.Errorf("Bswap(%x)=%x, want 0x44332211", x, y)
}
}