mirror of
https://github.com/golang/go
synced 2024-11-11 23:20:24 -07:00
runtime/internal/sys: better fallback algorithms for intrinsics
Use deBruijn sequences to count low-order zeros. Reorg bswap to not use &^, it takes another instruction on x86. Change-Id: I4a5ed9fd16ee6a279d88c067e8a2ba11de821156 Reviewed-on: https://go-review.googlesource.com/22084 Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
12e3b184f0
commit
98b6febcef
@ -4,88 +4,97 @@
|
||||
|
||||
package sys
|
||||
|
||||
// Using techniques from http://supertech.csail.mit.edu/papers/debruijn.pdf
|
||||
|
||||
const deBruijn64 = 0x0218a392cd3d5dbf
|
||||
|
||||
var deBruijnIdx64 = [64]byte{
|
||||
0, 1, 2, 7, 3, 13, 8, 19,
|
||||
4, 25, 14, 28, 9, 34, 20, 40,
|
||||
5, 17, 26, 38, 15, 46, 29, 48,
|
||||
10, 31, 35, 54, 21, 50, 41, 57,
|
||||
63, 6, 12, 18, 24, 27, 33, 39,
|
||||
16, 37, 45, 47, 30, 53, 49, 56,
|
||||
62, 11, 23, 32, 36, 44, 52, 55,
|
||||
61, 22, 43, 51, 60, 42, 59, 58,
|
||||
}
|
||||
|
||||
const deBruijn32 = 0x04653adf
|
||||
|
||||
var deBruijnIdx32 = [32]byte{
|
||||
0, 1, 2, 6, 3, 11, 7, 16,
|
||||
4, 14, 12, 21, 8, 23, 17, 26,
|
||||
31, 5, 10, 15, 13, 20, 22, 25,
|
||||
30, 9, 19, 24, 29, 18, 28, 27,
|
||||
}
|
||||
|
||||
const deBruijn16 = 0x09af
|
||||
|
||||
var deBruijnIdx16 = [16]byte{
|
||||
0, 1, 2, 5, 3, 9, 6, 11,
|
||||
15, 4, 8, 10, 14, 7, 13, 12,
|
||||
}
|
||||
|
||||
const deBruijn8 = 0x17
|
||||
|
||||
var deBruijnIdx8 = [8]byte{
|
||||
0, 1, 2, 4, 7, 3, 6, 5,
|
||||
}
|
||||
|
||||
// Ctz64 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 64.
|
||||
func Ctz64(x uint64) uint64 {
|
||||
if x&0xffffffff == 0 {
|
||||
return 32 + uint64(Ctz32(uint32(x>>32)))
|
||||
}
|
||||
return uint64(Ctz32(uint32(x)))
|
||||
|
||||
x &= -x // isolate low-order bit
|
||||
y := x * deBruijn64 >> 58 // extract part of deBruijn sequence
|
||||
y = uint64(deBruijnIdx64[y]) // convert to bit index
|
||||
z := (x - 1) >> 57 & 64 // adjustment if zero
|
||||
return y + z
|
||||
}
|
||||
|
||||
// Ctz32 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 32.
|
||||
func Ctz32(x uint32) uint32 {
|
||||
if x&0xffff == 0 {
|
||||
return 16 + uint32(Ctz16(uint16(x>>16)))
|
||||
}
|
||||
return uint32(Ctz16(uint16(x)))
|
||||
x &= -x // isolate low-order bit
|
||||
y := x * deBruijn32 >> 27 // extract part of deBruijn sequence
|
||||
y = uint32(deBruijnIdx32[y]) // convert to bit index
|
||||
z := (x - 1) >> 26 & 32 // adjustment if zero
|
||||
return y + z
|
||||
}
|
||||
|
||||
// Ctz16 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 16.
|
||||
func Ctz16(x uint16) uint16 {
|
||||
if x&0xff == 0 {
|
||||
return 8 + uint16(Ctz8(uint8(x>>8)))
|
||||
}
|
||||
return uint16(Ctz8(uint8(x)))
|
||||
x &= -x // isolate low-order bit
|
||||
y := x * deBruijn16 >> 12 // extract part of deBruijn sequence
|
||||
y = uint16(deBruijnIdx16[y]) // convert to bit index
|
||||
z := (x - 1) >> 11 & 16 // adjustment if zero
|
||||
return y + z
|
||||
}
|
||||
|
||||
// Ctz8 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 8.
|
||||
func Ctz8(x uint8) uint8 {
|
||||
return ctzVals[x]
|
||||
x &= -x // isolate low-order bit
|
||||
y := x * deBruijn8 >> 5 // extract part of deBruijn sequence
|
||||
y = uint8(deBruijnIdx8[y]) // convert to bit index
|
||||
z := (x - 1) >> 4 & 8 // adjustment if zero
|
||||
return y + z
|
||||
}
|
||||
|
||||
var ctzVals = [256]uint8{
|
||||
8, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
7, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0}
|
||||
|
||||
// Bswap64 returns its input with byte order reversed
|
||||
// 0x0102030405060708 -> 0x0807060504030201
|
||||
func Bswap64(x uint64) uint64 {
|
||||
c8 := uint64(0xff00ff00ff00ff00)
|
||||
a := (x & c8) >> 8
|
||||
b := (x &^ c8) << 8
|
||||
c8 := uint64(0x00ff00ff00ff00ff)
|
||||
a := x >> 8 & c8
|
||||
b := (x & c8) << 8
|
||||
x = a | b
|
||||
c16 := uint64(0xffff0000ffff0000)
|
||||
a = (x & c16) >> 16
|
||||
b = (x &^ c16) << 16
|
||||
c16 := uint64(0x0000ffff0000ffff)
|
||||
a = x >> 16 & c16
|
||||
b = (x & c16) << 16
|
||||
x = a | b
|
||||
c32 := uint64(0xffffffff00000000)
|
||||
a = (x & c32) >> 32
|
||||
b = (x &^ c32) << 32
|
||||
c32 := uint64(0x00000000ffffffff)
|
||||
a = x >> 32 & c32
|
||||
b = (x & c32) << 32
|
||||
x = a | b
|
||||
return x
|
||||
}
|
||||
@ -93,13 +102,13 @@ func Bswap64(x uint64) uint64 {
|
||||
// Bswap32 returns its input with byte order reversed
|
||||
// 0x01020304 -> 0x04030201
|
||||
func Bswap32(x uint32) uint32 {
|
||||
c8 := uint32(0xff00ff00)
|
||||
a := (x & c8) >> 8
|
||||
b := (x &^ c8) << 8
|
||||
c8 := uint32(0x00ff00ff)
|
||||
a := x >> 8 & c8
|
||||
b := (x & c8) << 8
|
||||
x = a | b
|
||||
c16 := uint32(0xffff0000)
|
||||
a = (x & c16) >> 16
|
||||
b = (x &^ c16) << 16
|
||||
c16 := uint32(0x0000ffff)
|
||||
a = x >> 16 & c16
|
||||
b = (x & c16) << 16
|
||||
x = a | b
|
||||
return x
|
||||
}
|
||||
|
54
src/runtime/internal/sys/intrinsics_test.go
Normal file
54
src/runtime/internal/sys/intrinsics_test.go
Normal file
@ -0,0 +1,54 @@
|
||||
package sys_test
|
||||
|
||||
import (
|
||||
"runtime/internal/sys"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCtz64(t *testing.T) {
|
||||
for i := uint(0); i <= 64; i++ {
|
||||
x := uint64(5) << i
|
||||
if got := sys.Ctz64(x); got != uint64(i) {
|
||||
t.Errorf("Ctz64(%d)=%d, want %d", x, got, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
func TestCtz32(t *testing.T) {
|
||||
for i := uint(0); i <= 32; i++ {
|
||||
x := uint32(5) << i
|
||||
if got := sys.Ctz32(x); got != uint32(i) {
|
||||
t.Errorf("Ctz32(%d)=%d, want %d", x, got, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
func TestCtz16(t *testing.T) {
|
||||
for i := uint(0); i <= 16; i++ {
|
||||
x := uint16(5) << i
|
||||
if got := sys.Ctz16(x); got != uint16(i) {
|
||||
t.Errorf("Ctz16(%d)=%d, want %d", x, got, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
func TestCtz8(t *testing.T) {
|
||||
for i := uint(0); i <= 8; i++ {
|
||||
x := uint8(5) << i
|
||||
if got := sys.Ctz8(x); got != uint8(i) {
|
||||
t.Errorf("Ctz8(%d)=%d, want %d", x, got, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBswap64(t *testing.T) {
|
||||
x := uint64(0x1122334455667788)
|
||||
y := sys.Bswap64(x)
|
||||
if y != 0x8877665544332211 {
|
||||
t.Errorf("Bswap(%x)=%x, want 0x8877665544332211", x, y)
|
||||
}
|
||||
}
|
||||
func TestBswap32(t *testing.T) {
|
||||
x := uint32(0x11223344)
|
||||
y := sys.Bswap32(x)
|
||||
if y != 0x44332211 {
|
||||
t.Errorf("Bswap(%x)=%x, want 0x44332211", x, y)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user