mirror of
https://github.com/golang/go
synced 2024-11-26 16:16:57 -07:00
cmd/compile: optimize RotateLeft8/16 on arm64
This CL optimizes RotateLeft8/16 on arm64. For 16 bits, we form a 32 bits register by duplicating two 16 bits registers, then use RORW instruction to do the rotate shift. For 8 bits, we just use LSR and LSL instead of RORW because the code is simpler. Benchmark Old ThisCL delta RotateLeft8-46 2.16 ns/op 1.73 ns/op -19.70% RotateLeft16-46 2.16 ns/op 1.54 ns/op -28.53% Change-Id: I09cde4383d12e31876a57f8cdfd3bb4f324fadb0 Reviewed-on: https://go-review.googlesource.com/c/go/+/420976 Reviewed-by: Keith Randall <khr@google.com> Auto-Submit: Keith Randall <khr@golang.org> Reviewed-by: Keith Randall <khr@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Run-TryBot: Keith Randall <khr@golang.org>
This commit is contained in:
parent
f45c2d7e47
commit
121344ac33
@ -64,8 +64,11 @@
|
|||||||
(Sqrt32 ...) => (FSQRTS ...)
|
(Sqrt32 ...) => (FSQRTS ...)
|
||||||
|
|
||||||
// lowering rotates
|
// lowering rotates
|
||||||
|
// we do rotate detection in generic rules, if the following rules need to be changed, chcek generic rules first.
|
||||||
(RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
|
(RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
|
||||||
|
(RotateLeft8 <t> x y) => (OR <t> (SLL <t> x (ANDconst <typ.Int64> [7] y)) (SRL <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEG <typ.Int64> y))))
|
||||||
(RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
|
(RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
|
||||||
|
(RotateLeft16 <t> x y) => (RORW <t> (ORshiftLL <typ.UInt32> (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG <typ.Int64> y))
|
||||||
(RotateLeft32 x y) => (RORW x (NEG <y.Type> y))
|
(RotateLeft32 x y) => (RORW x (NEG <y.Type> y))
|
||||||
(RotateLeft64 x y) => (ROR x (NEG <y.Type> y))
|
(RotateLeft64 x y) => (ROR x (NEG <y.Type> y))
|
||||||
|
|
||||||
|
@ -1986,9 +1986,9 @@ func canRotate(c *Config, bits int64) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
switch c.arch {
|
switch c.arch {
|
||||||
case "386", "amd64":
|
case "386", "amd64", "arm64":
|
||||||
return true
|
return true
|
||||||
case "arm", "arm64", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
|
case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64":
|
||||||
return bits >= 32
|
return bits >= 32
|
||||||
default:
|
default:
|
||||||
return false
|
return false
|
||||||
|
@ -25610,7 +25610,24 @@ func rewriteValueARM64_OpRotateLeft16(v *Value) bool {
|
|||||||
v.AddArg2(v0, v2)
|
v.AddArg2(v0, v2)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
// match: (RotateLeft16 <t> x y)
|
||||||
|
// result: (RORW <t> (ORshiftLL <typ.UInt32> (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG <typ.Int64> y))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
v.reset(OpARM64RORW)
|
||||||
|
v.Type = t
|
||||||
|
v0 := b.NewValue0(v.Pos, OpARM64ORshiftLL, typ.UInt32)
|
||||||
|
v0.AuxInt = int64ToAuxInt(16)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
||||||
|
v1.AddArg(x)
|
||||||
|
v0.AddArg2(v1, v1)
|
||||||
|
v2 := b.NewValue0(v.Pos, OpARM64NEG, typ.Int64)
|
||||||
|
v2.AddArg(y)
|
||||||
|
v.AddArg2(v0, v2)
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueARM64_OpRotateLeft32(v *Value) bool {
|
func rewriteValueARM64_OpRotateLeft32(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
@ -25670,7 +25687,31 @@ func rewriteValueARM64_OpRotateLeft8(v *Value) bool {
|
|||||||
v.AddArg2(v0, v2)
|
v.AddArg2(v0, v2)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
// match: (RotateLeft8 <t> x y)
|
||||||
|
// result: (OR <t> (SLL <t> x (ANDconst <typ.Int64> [7] y)) (SRL <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEG <typ.Int64> y))))
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
v.reset(OpARM64OR)
|
||||||
|
v.Type = t
|
||||||
|
v0 := b.NewValue0(v.Pos, OpARM64SLL, t)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpARM64ANDconst, typ.Int64)
|
||||||
|
v1.AuxInt = int64ToAuxInt(7)
|
||||||
|
v1.AddArg(y)
|
||||||
|
v0.AddArg2(x, v1)
|
||||||
|
v2 := b.NewValue0(v.Pos, OpARM64SRL, t)
|
||||||
|
v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
|
||||||
|
v3.AddArg(x)
|
||||||
|
v4 := b.NewValue0(v.Pos, OpARM64ANDconst, typ.Int64)
|
||||||
|
v4.AuxInt = int64ToAuxInt(7)
|
||||||
|
v5 := b.NewValue0(v.Pos, OpARM64NEG, typ.Int64)
|
||||||
|
v5.AddArg(y)
|
||||||
|
v4.AddArg(v5)
|
||||||
|
v2.AddArg2(v3, v4)
|
||||||
|
v.AddArg2(v0, v2)
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
func rewriteValueARM64_OpRsh16Ux16(v *Value) bool {
|
func rewriteValueARM64_OpRsh16Ux16(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
|
@ -258,14 +258,16 @@ func RotateLeft32(n uint32) uint32 {
|
|||||||
return bits.RotateLeft32(n, 9)
|
return bits.RotateLeft32(n, 9)
|
||||||
}
|
}
|
||||||
|
|
||||||
func RotateLeft16(n uint16) uint16 {
|
func RotateLeft16(n uint16, s int) uint16 {
|
||||||
// amd64:"ROLW" 386:"ROLW"
|
// amd64:"ROLW" 386:"ROLW"
|
||||||
return bits.RotateLeft16(n, 5)
|
// arm64:"RORW",-"CSEL"
|
||||||
|
return bits.RotateLeft16(n, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func RotateLeft8(n uint8) uint8 {
|
func RotateLeft8(n uint8, s int) uint8 {
|
||||||
// amd64:"ROLB" 386:"ROLB"
|
// amd64:"ROLB" 386:"ROLB"
|
||||||
return bits.RotateLeft8(n, 5)
|
// arm64:"LSL","LSR",-"CSEL"
|
||||||
|
return bits.RotateLeft8(n, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func RotateLeftVariable(n uint, m int) uint {
|
func RotateLeftVariable(n uint, m int) uint {
|
||||||
|
Loading…
Reference in New Issue
Block a user