mirror of
https://github.com/golang/go
synced 2024-11-06 00:26:11 -07:00
d9a50a6531
On amd64, Ctz must include special handling of zeros. But the prove pass has enough information to detect whether the input is non-zero, allowing a more efficient lowering. Introduce new CtzNonZero ops to capture and use this information. Benchmark code: func BenchmarkVisitBits(b *testing.B) { b.Run("8", func(b *testing.B) { for i := 0; i < b.N; i++ { x := uint8(0xff) for x != 0 { sink = bits.TrailingZeros8(x) x &= x - 1 } } }) // and similarly so for 16, 32, 64 } name old time/op new time/op delta VisitBits/8-8 7.27ns ± 4% 5.58ns ± 4% -23.35% (p=0.000 n=28+26) VisitBits/16-8 14.7ns ± 7% 10.5ns ± 4% -28.43% (p=0.000 n=30+28) VisitBits/32-8 27.6ns ± 8% 19.3ns ± 3% -30.14% (p=0.000 n=30+26) VisitBits/64-8 44.0ns ±11% 38.0ns ± 5% -13.48% (p=0.000 n=30+30) Fixes #25077 Change-Id: Ie6e5bd86baf39ee8a4ca7cadcf56d934e047f957 Reviewed-on: https://go-review.googlesource.com/109358 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
270 lines
5.0 KiB
Go
270 lines
5.0 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "math/bits"
|
|
|
|
// ----------------------- //
|
|
// bits.LeadingZeros //
|
|
// ----------------------- //
|
|
|
|
func LeadingZeros(n uint) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.LeadingZeros(n)
|
|
}
|
|
|
|
func LeadingZeros64(n uint64) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.LeadingZeros64(n)
|
|
}
|
|
|
|
func LeadingZeros32(n uint32) int {
|
|
// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.LeadingZeros32(n)
|
|
}
|
|
|
|
func LeadingZeros16(n uint16) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.LeadingZeros16(n)
|
|
}
|
|
|
|
func LeadingZeros8(n uint8) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.LeadingZeros8(n)
|
|
}
|
|
|
|
// --------------- //
|
|
// bits.Len* //
|
|
// --------------- //
|
|
|
|
func Len(n uint) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.Len(n)
|
|
}
|
|
|
|
func Len64(n uint64) int {
|
|
// amd64:"BSRQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.Len64(n)
|
|
}
|
|
|
|
func Len32(n uint32) int {
|
|
// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.Len32(n)
|
|
}
|
|
|
|
func Len16(n uint16) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.Len16(n)
|
|
}
|
|
|
|
func Len8(n uint8) int {
|
|
// amd64:"BSRL","LEAL",-"CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
// arm:"CLZ" arm64:"CLZ"
|
|
// mips:"CLZ"
|
|
return bits.Len8(n)
|
|
}
|
|
|
|
// -------------------- //
|
|
// bits.OnesCount //
|
|
// -------------------- //
|
|
|
|
func OnesCount(n uint) int {
|
|
// amd64:"POPCNTQ",".*support_popcnt"
|
|
// arm64:"VCNT","VUADDLV"
|
|
return bits.OnesCount(n)
|
|
}
|
|
|
|
func OnesCount64(n uint64) int {
|
|
// amd64:"POPCNTQ",".*support_popcnt"
|
|
// arm64:"VCNT","VUADDLV"
|
|
return bits.OnesCount64(n)
|
|
}
|
|
|
|
func OnesCount32(n uint32) int {
|
|
// amd64:"POPCNTL",".*support_popcnt"
|
|
// arm64:"VCNT","VUADDLV"
|
|
return bits.OnesCount32(n)
|
|
}
|
|
|
|
func OnesCount16(n uint16) int {
|
|
// amd64:"POPCNTL",".*support_popcnt"
|
|
// arm64:"VCNT","VUADDLV"
|
|
return bits.OnesCount16(n)
|
|
}
|
|
|
|
// ----------------------- //
|
|
// bits.ReverseBytes //
|
|
// ----------------------- //
|
|
|
|
func ReverseBytes(n uint) uint {
|
|
// amd64:"BSWAPQ"
|
|
// s390x:"MOVDBR"
|
|
// arm64:"REV"
|
|
return bits.ReverseBytes(n)
|
|
}
|
|
|
|
func ReverseBytes64(n uint64) uint64 {
|
|
// amd64:"BSWAPQ"
|
|
// s390x:"MOVDBR"
|
|
// arm64:"REV"
|
|
return bits.ReverseBytes64(n)
|
|
}
|
|
|
|
func ReverseBytes32(n uint32) uint32 {
|
|
// amd64:"BSWAPL"
|
|
// s390x:"MOVWBR"
|
|
// arm64:"REVW"
|
|
return bits.ReverseBytes32(n)
|
|
}
|
|
|
|
func ReverseBytes16(n uint16) uint16 {
|
|
// amd64:"ROLW"
|
|
return bits.ReverseBytes16(n)
|
|
}
|
|
|
|
// --------------------- //
|
|
// bits.RotateLeft //
|
|
// --------------------- //
|
|
|
|
func RotateLeft64(n uint64) uint64 {
|
|
// amd64:"ROLQ"
|
|
// arm64:"ROR"
|
|
// ppc64:"ROTL"
|
|
return bits.RotateLeft64(n, 37)
|
|
}
|
|
|
|
func RotateLeft32(n uint32) uint32 {
|
|
// amd64:"ROLL" 386:"ROLL"
|
|
// arm64:"RORW"
|
|
// ppc64:"ROTLW"
|
|
return bits.RotateLeft32(n, 9)
|
|
}
|
|
|
|
func RotateLeft16(n uint16) uint16 {
|
|
// amd64:"ROLW" 386:"ROLW"
|
|
return bits.RotateLeft16(n, 5)
|
|
}
|
|
|
|
func RotateLeft8(n uint8) uint8 {
|
|
// amd64:"ROLB" 386:"ROLB"
|
|
return bits.RotateLeft8(n, 5)
|
|
}
|
|
|
|
// ------------------------ //
|
|
// bits.TrailingZeros //
|
|
// ------------------------ //
|
|
|
|
func TrailingZeros(n uint) int {
|
|
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
return bits.TrailingZeros(n)
|
|
}
|
|
|
|
func TrailingZeros64(n uint64) int {
|
|
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
|
|
// s390x:"FLOGR"
|
|
return bits.TrailingZeros64(n)
|
|
}
|
|
|
|
func TrailingZeros32(n uint32) int {
|
|
// amd64:"BTSQ\\t\\$32","BSFQ"
|
|
// s390x:"FLOGR","MOVWZ"
|
|
return bits.TrailingZeros32(n)
|
|
}
|
|
|
|
func TrailingZeros16(n uint16) int {
|
|
// amd64:"BSFL","BTSL\\t\\$16"
|
|
// s390x:"FLOGR","OR\t\\$65536"
|
|
return bits.TrailingZeros16(n)
|
|
}
|
|
|
|
func TrailingZeros8(n uint8) int {
|
|
// amd64:"BSFL","BTSL\\t\\$8"
|
|
// s390x:"FLOGR","OR\t\\$256"
|
|
return bits.TrailingZeros8(n)
|
|
}
|
|
|
|
// IterateBitsNN checks special handling of TrailingZerosNN when the input is known to be non-zero.
|
|
|
|
func IterateBits(n uint) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFQ",-"CMOVEQ"
|
|
i += bits.TrailingZeros(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits64(n uint64) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFQ",-"CMOVEQ"
|
|
i += bits.TrailingZeros64(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits32(n uint32) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFL",-"BTSQ"
|
|
i += bits.TrailingZeros32(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits16(n uint16) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFL",-"BTSL"
|
|
i += bits.TrailingZeros16(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|
|
|
|
func IterateBits8(n uint8) int {
|
|
i := 0
|
|
for n != 0 {
|
|
// amd64:"BSFL",-"BTSL"
|
|
i += bits.TrailingZeros8(n)
|
|
n &= n - 1
|
|
}
|
|
return i
|
|
}
|