mirror of
https://github.com/golang/go
synced 2024-11-12 05:30:21 -07:00
cmd/compile: eliminate unnecessary type conversions in TrailingZeros(16|8) for arm64
This CL eliminates unnecessary type conversion operations: OpZeroExt16to64 and OpZeroExt8to64. If the input argrument is a nonzero value, then ORconst operation can also be eliminated. Benchmarks: name old time/op new time/op delta TrailingZeros-8 2.75ns ± 0% 2.75ns ± 0% ~ (all equal) TrailingZeros8-8 3.49ns ± 1% 2.93ns ± 0% -16.00% (p=0.000 n=10+10) TrailingZeros16-8 3.49ns ± 1% 2.93ns ± 0% -16.05% (p=0.000 n=9+10) TrailingZeros32-8 2.67ns ± 1% 2.68ns ± 1% ~ (p=0.468 n=10+10) TrailingZeros64-8 2.67ns ± 1% 2.65ns ± 0% -0.62% (p=0.022 n=10+9) code: func f16(x uint) { z = bits.TrailingZeros16(uint16(x)) } Before: "".f16 STEXT size=48 args=0x8 locals=0x0 leaf 0x0000 00000 (test.go:7) TEXT "".f16(SB), LEAF|NOFRAME|ABIInternal, $0-8 0x0000 00000 (test.go:7) FUNCDATA ZR, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (test.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (test.go:7) FUNCDATA $3, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (test.go:7) PCDATA $2, ZR 0x0000 00000 (test.go:7) PCDATA ZR, ZR 0x0000 00000 (test.go:7) MOVD "".x(FP), R0 0x0004 00004 (test.go:7) MOVHU R0, R0 0x0008 00008 (test.go:7) ORR $65536, R0, R0 0x000c 00012 (test.go:7) RBIT R0, R0 0x0010 00016 (test.go:7) CLZ R0, R0 0x0014 00020 (test.go:7) MOVD R0, "".z(SB) 0x0020 00032 (test.go:7) RET (R30) This line of code is unnecessary: 0x0004 00004 (test.go:7) MOVHU R0, R0 After: "".f16 STEXT size=32 args=0x8 locals=0x0 leaf 0x0000 00000 (test.go:7) TEXT "".f16(SB), LEAF|NOFRAME|ABIInternal, $0-8 0x0000 00000 (test.go:7) FUNCDATA ZR, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (test.go:7) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (test.go:7) FUNCDATA $3, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0000 00000 (test.go:7) PCDATA $2, ZR 0x0000 00000 (test.go:7) PCDATA ZR, ZR 0x0000 00000 (test.go:7) MOVD "".x(FP), R0 0x0004 00004 (test.go:7) ORR $65536, R0, R0 0x0008 00008 (test.go:7) RBITW R0, R0 0x000c 00012 (test.go:7) CLZW R0, R0 0x0010 00016 (test.go:7) MOVD R0, "".z(SB) 0x001c 00028 (test.go:7) RET (R30) The situation of TrailingZeros8 is similar to TrailingZeros16. Change-Id: I473bdca06be8460a0be87abbae6fe640017e4c9d Reviewed-on: https://go-review.googlesource.com/c/go/+/156999 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
fee84cc905
commit
4e2b0dda8c
@ -3290,7 +3290,7 @@ func init() {
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpCtz16, types.Types[TINT], args[0])
|
||||
},
|
||||
sys.AMD64)
|
||||
sys.AMD64, sys.ARM64)
|
||||
addF("math/bits", "TrailingZeros16",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
x := s.newValue1(ssa.OpZeroExt16to64, types.Types[TUINT64], args[0])
|
||||
@ -3298,7 +3298,7 @@ func init() {
|
||||
y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c)
|
||||
return s.newValue1(ssa.OpCtz64, types.Types[TINT], y)
|
||||
},
|
||||
sys.ARM64, sys.S390X, sys.PPC64)
|
||||
sys.S390X, sys.PPC64)
|
||||
addF("math/bits", "TrailingZeros8",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
x := s.newValue1(ssa.OpZeroExt8to32, types.Types[TUINT32], args[0])
|
||||
@ -3311,7 +3311,7 @@ func init() {
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpCtz8, types.Types[TINT], args[0])
|
||||
},
|
||||
sys.AMD64)
|
||||
sys.AMD64, sys.ARM64)
|
||||
addF("math/bits", "TrailingZeros8",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
x := s.newValue1(ssa.OpZeroExt8to64, types.Types[TUINT64], args[0])
|
||||
@ -3319,7 +3319,7 @@ func init() {
|
||||
y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c)
|
||||
return s.newValue1(ssa.OpCtz64, types.Types[TINT], y)
|
||||
},
|
||||
sys.ARM64, sys.S390X)
|
||||
sys.S390X)
|
||||
alias("math/bits", "ReverseBytes64", "runtime/internal/sys", "Bswap64", all...)
|
||||
alias("math/bits", "ReverseBytes32", "runtime/internal/sys", "Bswap32", all...)
|
||||
// ReverseBytes inlines correctly, no need to intrinsify it.
|
||||
|
@ -97,9 +97,13 @@
|
||||
|
||||
(Ctz64NonZero x) -> (Ctz64 x)
|
||||
(Ctz32NonZero x) -> (Ctz32 x)
|
||||
(Ctz16NonZero x) -> (Ctz32 x)
|
||||
(Ctz8NonZero x) -> (Ctz32 x)
|
||||
|
||||
(Ctz64 <t> x) -> (CLZ (RBIT <t> x))
|
||||
(Ctz32 <t> x) -> (CLZW (RBITW <t> x))
|
||||
(Ctz16 <t> x) -> (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
|
||||
(Ctz8 <t> x) -> (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
|
||||
|
||||
(PopCount64 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
|
||||
(PopCount32 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
|
||||
|
@ -473,6 +473,10 @@ func rewriteValueARM64(v *Value) bool {
|
||||
return rewriteValueARM64_OpConstBool_0(v)
|
||||
case OpConstNil:
|
||||
return rewriteValueARM64_OpConstNil_0(v)
|
||||
case OpCtz16:
|
||||
return rewriteValueARM64_OpCtz16_0(v)
|
||||
case OpCtz16NonZero:
|
||||
return rewriteValueARM64_OpCtz16NonZero_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValueARM64_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
@ -481,6 +485,10 @@ func rewriteValueARM64(v *Value) bool {
|
||||
return rewriteValueARM64_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValueARM64_OpCtz64NonZero_0(v)
|
||||
case OpCtz8:
|
||||
return rewriteValueARM64_OpCtz8_0(v)
|
||||
case OpCtz8NonZero:
|
||||
return rewriteValueARM64_OpCtz8NonZero_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueARM64_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto32U:
|
||||
@ -33182,6 +33190,39 @@ func rewriteValueARM64_OpConstNil_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz16_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz16 <t> x)
|
||||
// cond:
|
||||
// result: (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
|
||||
for {
|
||||
t := v.Type
|
||||
x := v.Args[0]
|
||||
v.reset(OpARM64CLZW)
|
||||
v.Type = t
|
||||
v0 := b.NewValue0(v.Pos, OpARM64RBITW, typ.UInt32)
|
||||
v1 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32)
|
||||
v1.AuxInt = 0x10000
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz16NonZero_0(v *Value) bool {
|
||||
// match: (Ctz16NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz32_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
@ -33236,6 +33277,39 @@ func rewriteValueARM64_OpCtz64NonZero_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz8_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz8 <t> x)
|
||||
// cond:
|
||||
// result: (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
|
||||
for {
|
||||
t := v.Type
|
||||
x := v.Args[0]
|
||||
v.reset(OpARM64CLZW)
|
||||
v.Type = t
|
||||
v0 := b.NewValue0(v.Pos, OpARM64RBITW, typ.UInt32)
|
||||
v1 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32)
|
||||
v1.AuxInt = 0x100
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCtz8NonZero_0(v *Value) bool {
|
||||
// match: (Ctz8NonZero x)
|
||||
// cond:
|
||||
// result: (Ctz32 x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpCtz32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpCvt32Fto32_0(v *Value) bool {
|
||||
// match: (Cvt32Fto32 x)
|
||||
// cond:
|
||||
|
@ -242,6 +242,7 @@ func RotateLeftVariable32(n uint32, m int) uint32 {
|
||||
|
||||
func TrailingZeros(n uint) int {
|
||||
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
|
||||
// arm64:"RBIT","CLZ"
|
||||
// s390x:"FLOGR"
|
||||
// ppc64:"ANDN","POPCNTD"
|
||||
// ppc64le:"ANDN","POPCNTD"
|
||||
@ -250,6 +251,7 @@ func TrailingZeros(n uint) int {
|
||||
|
||||
func TrailingZeros64(n uint64) int {
|
||||
// amd64:"BSFQ","MOVL\t\\$64","CMOVQEQ"
|
||||
// arm64:"RBIT","CLZ"
|
||||
// s390x:"FLOGR"
|
||||
// ppc64:"ANDN","POPCNTD"
|
||||
// ppc64le:"ANDN","POPCNTD"
|
||||
@ -258,6 +260,7 @@ func TrailingZeros64(n uint64) int {
|
||||
|
||||
func TrailingZeros32(n uint32) int {
|
||||
// amd64:"BTSQ\\t\\$32","BSFQ"
|
||||
// arm64:"RBITW","CLZW"
|
||||
// s390x:"FLOGR","MOVWZ"
|
||||
// ppc64:"ANDN","POPCNTW"
|
||||
// ppc64le:"ANDN","POPCNTW"
|
||||
@ -266,6 +269,7 @@ func TrailingZeros32(n uint32) int {
|
||||
|
||||
func TrailingZeros16(n uint16) int {
|
||||
// amd64:"BSFL","BTSL\\t\\$16"
|
||||
// arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t"
|
||||
// s390x:"FLOGR","OR\t\\$65536"
|
||||
// ppc64:"POPCNTD","OR\\t\\$65536"
|
||||
// ppc64le:"POPCNTD","OR\\t\\$65536"
|
||||
@ -274,6 +278,7 @@ func TrailingZeros16(n uint16) int {
|
||||
|
||||
func TrailingZeros8(n uint8) int {
|
||||
// amd64:"BSFL","BTSL\\t\\$8"
|
||||
// arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t"
|
||||
// s390x:"FLOGR","OR\t\\$256"
|
||||
return bits.TrailingZeros8(n)
|
||||
}
|
||||
@ -314,6 +319,7 @@ func IterateBits16(n uint16) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFL",-"BTSL"
|
||||
// arm64:"RBITW","CLZW",-"ORR"
|
||||
i += bits.TrailingZeros16(n)
|
||||
n &= n - 1
|
||||
}
|
||||
@ -324,6 +330,7 @@ func IterateBits8(n uint8) int {
|
||||
i := 0
|
||||
for n != 0 {
|
||||
// amd64:"BSFL",-"BTSL"
|
||||
// arm64:"RBITW","CLZW",-"ORR"
|
||||
i += bits.TrailingZeros8(n)
|
||||
n &= n - 1
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user