1
0
mirror of https://github.com/golang/go synced 2024-11-26 08:27:56 -07:00

cmd/compile: optimize shift ops on arm64 when the shift value is v&63

For the following code case:

  var x uint64
  x >> (shift & 63)

We can directly genereta `x >> shift` on arm64, since the hardware will
only use the bottom 6 bits of the shift amount.

Benchmark               old time/op  new time/op    delta
ShiftArithmeticRight-8  0.40ns       0.31ns        -21.7%

Change-Id: Id58c8a5b2f6dd5c30c3876f4a36e11b4d81e2dc9
Reviewed-on: https://go-review.googlesource.com/c/go/+/425294
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
This commit is contained in:
ruinan 2022-08-08 04:17:19 +00:00 committed by Gopher Robot
parent 5befb24bb5
commit 54c7bc9cff
3 changed files with 49 additions and 1 deletions

View File

@ -1235,9 +1235,12 @@
(EON x (MOVDconst [c])) => (XORconst [^c] x) (EON x (MOVDconst [c])) => (XORconst [^c] x)
(ORN x (MOVDconst [c])) => (ORconst [^c] x) (ORN x (MOVDconst [c])) => (ORconst [^c] x)
(SLL x (MOVDconst [c])) => (SLLconst x [c&63]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=64) (SLL x (MOVDconst [c])) => (SLLconst x [c&63])
(SRL x (MOVDconst [c])) => (SRLconst x [c&63]) (SRL x (MOVDconst [c])) => (SRLconst x [c&63])
(SRA x (MOVDconst [c])) => (SRAconst x [c&63]) (SRA x (MOVDconst [c])) => (SRAconst x [c&63])
(SLL x (ANDconst [63] y)) => (SLL x y)
(SRL x (ANDconst [63] y)) => (SRL x y)
(SRA x (ANDconst [63] y)) => (SRA x y)
(CMP x (MOVDconst [c])) => (CMPconst [c] x) (CMP x (MOVDconst [c])) => (CMPconst [c] x)
(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x)) (CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x))

View File

@ -20484,6 +20484,18 @@ func rewriteValueARM64_OpARM64SLL(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SLL x (ANDconst [63] y))
// result: (SLL x y)
for {
x := v_0
if v_1.Op != OpARM64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 {
break
}
y := v_1.Args[0]
v.reset(OpARM64SLL)
v.AddArg2(x, y)
return true
}
return false return false
} }
func rewriteValueARM64_OpARM64SLLconst(v *Value) bool { func rewriteValueARM64_OpARM64SLLconst(v *Value) bool {
@ -20649,6 +20661,18 @@ func rewriteValueARM64_OpARM64SRA(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SRA x (ANDconst [63] y))
// result: (SRA x y)
for {
x := v_0
if v_1.Op != OpARM64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 {
break
}
y := v_1.Args[0]
v.reset(OpARM64SRA)
v.AddArg2(x, y)
return true
}
return false return false
} }
func rewriteValueARM64_OpARM64SRAconst(v *Value) bool { func rewriteValueARM64_OpARM64SRAconst(v *Value) bool {
@ -20806,6 +20830,18 @@ func rewriteValueARM64_OpARM64SRL(v *Value) bool {
v.AddArg(x) v.AddArg(x)
return true return true
} }
// match: (SRL x (ANDconst [63] y))
// result: (SRL x y)
for {
x := v_0
if v_1.Op != OpARM64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 {
break
}
y := v_1.Args[0]
v.reset(OpARM64SRL)
v.AddArg2(x, y)
return true
}
return false return false
} }
func rewriteValueARM64_OpARM64SRLconst(v *Value) bool { func rewriteValueARM64_OpARM64SRLconst(v *Value) bool {

View File

@ -82,6 +82,7 @@ func lshMask64x64(v int64, s uint64) int64 {
// ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64le:"ANDCC",-"ORN",-"ISEL"
// riscv64:"SLL",-"AND\t",-"SLTIU" // riscv64:"SLL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"LSL",-"AND"
return v << (s & 63) return v << (s & 63)
} }
@ -90,6 +91,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 {
// ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64le:"ANDCC",-"ORN",-"ISEL"
// riscv64:"SRL",-"AND\t",-"SLTIU" // riscv64:"SRL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"LSR",-"AND"
return v >> (s & 63) return v >> (s & 63)
} }
@ -98,6 +100,7 @@ func rshMask64x64(v int64, s uint64) int64 {
// ppc64le:"ANDCC",-ORN",-"ISEL" // ppc64le:"ANDCC",-ORN",-"ISEL"
// riscv64:"SRA",-"OR",-"SLTIU" // riscv64:"SRA",-"OR",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"ASR",-"AND"
return v >> (s & 63) return v >> (s & 63)
} }
@ -106,6 +109,7 @@ func lshMask32x64(v int32, s uint64) int32 {
// ppc64le:"ISEL",-"ORN" // ppc64le:"ISEL",-"ORN"
// riscv64:"SLL","AND","SLTIU" // riscv64:"SLL","AND","SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"LSL",-"AND"
return v << (s & 63) return v << (s & 63)
} }
@ -114,6 +118,7 @@ func rshMask32Ux64(v uint32, s uint64) uint32 {
// ppc64le:"ISEL",-"ORN" // ppc64le:"ISEL",-"ORN"
// riscv64:"SRL","AND","SLTIU" // riscv64:"SRL","AND","SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"LSR",-"AND"
return v >> (s & 63) return v >> (s & 63)
} }
@ -122,6 +127,7 @@ func rshMask32x64(v int32, s uint64) int32 {
// ppc64le:"ISEL",-"ORN" // ppc64le:"ISEL",-"ORN"
// riscv64:"SRA","OR","SLTIU" // riscv64:"SRA","OR","SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"ASR",-"AND"
return v >> (s & 63) return v >> (s & 63)
} }
@ -130,6 +136,7 @@ func lshMask64x32(v int64, s uint32) int64 {
// ppc64le:"ANDCC",-"ORN" // ppc64le:"ANDCC",-"ORN"
// riscv64:"SLL",-"AND\t",-"SLTIU" // riscv64:"SLL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"LSL",-"AND"
return v << (s & 63) return v << (s & 63)
} }
@ -138,6 +145,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 {
// ppc64le:"ANDCC",-"ORN" // ppc64le:"ANDCC",-"ORN"
// riscv64:"SRL",-"AND\t",-"SLTIU" // riscv64:"SRL",-"AND\t",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"LSR",-"AND"
return v >> (s & 63) return v >> (s & 63)
} }
@ -146,6 +154,7 @@ func rshMask64x32(v int64, s uint32) int64 {
// ppc64le:"ANDCC",-"ORN",-"ISEL" // ppc64le:"ANDCC",-"ORN",-"ISEL"
// riscv64:"SRA",-"OR",-"SLTIU" // riscv64:"SRA",-"OR",-"SLTIU"
// s390x:-"RISBGZ",-"AND",-"LOCGR" // s390x:-"RISBGZ",-"AND",-"LOCGR"
// arm64:"ASR",-"AND"
return v >> (s & 63) return v >> (s & 63)
} }