From 121344ac338ef21d87eee4f64a60d0ae8a7f6fe3 Mon Sep 17 00:00:00 2001 From: ruinan Date: Wed, 13 Jul 2022 09:00:57 +0000 Subject: [PATCH] cmd/compile: optimize RotateLeft8/16 on arm64 This CL optimizes RotateLeft8/16 on arm64. For 16 bits, we form a 32 bits register by duplicating two 16 bits registers, then use RORW instruction to do the rotate shift. For 8 bits, we just use LSR and LSL instead of RORW because the code is simpler. Benchmark Old ThisCL delta RotateLeft8-46 2.16 ns/op 1.73 ns/op -19.70% RotateLeft16-46 2.16 ns/op 1.54 ns/op -28.53% Change-Id: I09cde4383d12e31876a57f8cdfd3bb4f324fadb0 Reviewed-on: https://go-review.googlesource.com/c/go/+/420976 Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Heschi Kreinick Run-TryBot: Keith Randall --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 3 ++ src/cmd/compile/internal/ssa/rewrite.go | 4 +- src/cmd/compile/internal/ssa/rewriteARM64.go | 45 +++++++++++++++++++- test/codegen/mathbits.go | 10 +++-- 4 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index a91f17a2d34..0d319609aae 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -64,8 +64,11 @@ (Sqrt32 ...) => (FSQRTS ...) // lowering rotates +// we do rotate detection in generic rules, if the following rules need to be changed, chcek generic rules first. (RotateLeft8 x (MOVDconst [c])) => (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) +(RotateLeft8 x y) => (OR (SLL x (ANDconst [7] y)) (SRL (ZeroExt8to64 x) (ANDconst [7] (NEG y)))) (RotateLeft16 x (MOVDconst [c])) => (Or16 (Lsh16x64 x (MOVDconst [c&15])) (Rsh16Ux64 x (MOVDconst [-c&15]))) +(RotateLeft16 x y) => (RORW (ORshiftLL (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG y)) (RotateLeft32 x y) => (RORW x (NEG y)) (RotateLeft64 x y) => (ROR x (NEG y)) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 44b5173968b..c95d8734564 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1986,9 +1986,9 @@ func canRotate(c *Config, bits int64) bool { return false } switch c.arch { - case "386", "amd64": + case "386", "amd64", "arm64": return true - case "arm", "arm64", "s390x", "ppc64", "ppc64le", "wasm", "loong64": + case "arm", "s390x", "ppc64", "ppc64le", "wasm", "loong64": return bits >= 32 default: return false diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index ecb8a6b779f..097d1772ab3 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -25610,7 +25610,24 @@ func rewriteValueARM64_OpRotateLeft16(v *Value) bool { v.AddArg2(v0, v2) return true } - return false + // match: (RotateLeft16 x y) + // result: (RORW (ORshiftLL (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG y)) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpARM64RORW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpARM64ORshiftLL, typ.UInt32) + v0.AuxInt = int64ToAuxInt(16) + v1 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, v1) + v2 := b.NewValue0(v.Pos, OpARM64NEG, typ.Int64) + v2.AddArg(y) + v.AddArg2(v0, v2) + return true + } } func rewriteValueARM64_OpRotateLeft32(v *Value) bool { v_1 := v.Args[1] @@ -25670,7 +25687,31 @@ func rewriteValueARM64_OpRotateLeft8(v *Value) bool { v.AddArg2(v0, v2) return true } - return false + // match: (RotateLeft8 x y) + // result: (OR (SLL x (ANDconst [7] y)) (SRL (ZeroExt8to64 x) (ANDconst [7] (NEG y)))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpARM64OR) + v.Type = t + v0 := b.NewValue0(v.Pos, OpARM64SLL, t) + v1 := b.NewValue0(v.Pos, OpARM64ANDconst, typ.Int64) + v1.AuxInt = int64ToAuxInt(7) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpARM64SRL, t) + v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v3.AddArg(x) + v4 := b.NewValue0(v.Pos, OpARM64ANDconst, typ.Int64) + v4.AuxInt = int64ToAuxInt(7) + v5 := b.NewValue0(v.Pos, OpARM64NEG, typ.Int64) + v5.AddArg(y) + v4.AddArg(v5) + v2.AddArg2(v3, v4) + v.AddArg2(v0, v2) + return true + } } func rewriteValueARM64_OpRsh16Ux16(v *Value) bool { v_1 := v.Args[1] diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 9c643647ee9..0620766f5ae 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -258,14 +258,16 @@ func RotateLeft32(n uint32) uint32 { return bits.RotateLeft32(n, 9) } -func RotateLeft16(n uint16) uint16 { +func RotateLeft16(n uint16, s int) uint16 { // amd64:"ROLW" 386:"ROLW" - return bits.RotateLeft16(n, 5) + // arm64:"RORW",-"CSEL" + return bits.RotateLeft16(n, s) } -func RotateLeft8(n uint8) uint8 { +func RotateLeft8(n uint8, s int) uint8 { // amd64:"ROLB" 386:"ROLB" - return bits.RotateLeft8(n, 5) + // arm64:"LSL","LSR",-"CSEL" + return bits.RotateLeft8(n, s) } func RotateLeftVariable(n uint, m int) uint {