mirror of
https://github.com/golang/go
synced 2024-11-19 14:24:47 -07:00
cmd/compile: improve absorb shifts optimization for arm64
Current absorb shifts optimization can generate dead Value nodes which increase use count of other live nodes. It will impact other optimizations (such as combined loads) which are enabled based on specific use count. This patch fixes the issue by decreasing the use count of nodes referenced by dead Value nodes generated by absorb shifts optimization. Performance impacts on go1 benchmarks (data collected on A57@2GHzx8): name old time/op new time/op delta BinaryTree17-8 6.28s ± 2% 6.24s ± 1% ~ (p=0.065 n=10+9) Fannkuch11-8 6.32s ± 0% 6.33s ± 0% +0.17% (p=0.000 n=10+10) FmtFprintfEmpty-8 98.9ns ± 0% 99.2ns ± 0% +0.34% (p=0.000 n=9+7) FmtFprintfString-8 183ns ± 1% 182ns ± 1% -1.01% (p=0.005 n=9+10) FmtFprintfInt-8 199ns ± 1% 202ns ± 1% +1.41% (p=0.000 n=10+9) FmtFprintfIntInt-8 272ns ± 1% 276ns ± 3% +1.36% (p=0.015 n=10+10) FmtFprintfPrefixedInt-8 367ns ± 1% 369ns ± 1% +0.68% (p=0.042 n=10+10) FmtFprintfFloat-8 491ns ± 1% 493ns ± 1% ~ (p=0.064 n=10+10) FmtManyArgs-8 1.31µs ± 1% 1.32µs ± 1% +0.39% (p=0.042 n=8+9) GobDecode-8 17.0ms ± 2% 16.2ms ± 2% -4.74% (p=0.000 n=10+10) GobEncode-8 13.7ms ± 2% 13.4ms ± 1% -2.40% (p=0.000 n=10+9) Gzip-8 844ms ± 0% 737ms ± 0% -12.70% (p=0.000 n=10+10) Gunzip-8 84.4ms ± 1% 83.9ms ± 0% -0.55% (p=0.000 n=10+8) HTTPClientServer-8 122µs ± 1% 124µs ± 1% +1.75% (p=0.000 n=10+9) JSONEncode-8 34.9ms ± 1% 32.4ms ± 0% -7.11% (p=0.000 n=10+9) JSONDecode-8 150ms ± 0% 146ms ± 1% -2.84% (p=0.000 n=7+10) Mandelbrot200-8 10.0ms ± 0% 10.0ms ± 0% ~ (p=0.529 n=10+10) GoParse-8 8.18ms ± 1% 8.03ms ± 0% -1.93% (p=0.000 n=10+10) RegexpMatchEasy0_32-8 209ns ± 0% 209ns ± 0% ~ (p=0.248 n=10+9) RegexpMatchEasy0_1K-8 789ns ± 1% 790ns ± 0% ~ (p=0.361 n=10+10) RegexpMatchEasy1_32-8 202ns ± 0% 202ns ± 1% ~ (p=0.137 n=8+10) RegexpMatchEasy1_1K-8 1.12µs ± 2% 1.12µs ± 1% ~ (p=0.810 n=10+10) RegexpMatchMedium_32-8 298ns ± 0% 298ns ± 0% ~ (p=0.443 n=10+9) RegexpMatchMedium_1K-8 83.0µs ± 5% 78.6µs ± 0% -5.37% (p=0.000 n=10+10) RegexpMatchHard_32-8 4.32µs ± 0% 4.26µs ± 0% -1.47% (p=0.000 n=10+10) RegexpMatchHard_1K-8 132µs ± 4% 126µs ± 0% -4.41% (p=0.000 n=10+9) Revcomp-8 1.11s ± 0% 1.11s ± 0% +0.14% (p=0.017 n=10+9) Template-8 155ms ± 1% 155ms ± 1% ~ (p=0.796 n=10+10) TimeParse-8 774ns ± 1% 785ns ± 1% +1.41% (p=0.001 n=10+10) TimeFormat-8 788ns ± 1% 806ns ± 1% +2.24% (p=0.000 n=10+9) name old speed new speed delta GobDecode-8 45.2MB/s ± 2% 47.5MB/s ± 2% +4.96% (p=0.000 n=10+10) GobEncode-8 56.0MB/s ± 2% 57.4MB/s ± 1% +2.44% (p=0.000 n=10+9) Gzip-8 23.0MB/s ± 0% 26.3MB/s ± 0% +14.55% (p=0.000 n=10+10) Gunzip-8 230MB/s ± 1% 231MB/s ± 0% +0.55% (p=0.000 n=10+8) JSONEncode-8 55.6MB/s ± 1% 59.9MB/s ± 0% +7.65% (p=0.000 n=10+9) JSONDecode-8 12.9MB/s ± 0% 13.3MB/s ± 1% +2.94% (p=0.000 n=7+10) GoParse-8 7.08MB/s ± 1% 7.22MB/s ± 0% +1.95% (p=0.000 n=10+10) RegexpMatchEasy0_32-8 153MB/s ± 0% 153MB/s ± 0% -0.16% (p=0.023 n=10+10) RegexpMatchEasy0_1K-8 1.30GB/s ± 1% 1.30GB/s ± 0% ~ (p=0.393 n=10+10) RegexpMatchEasy1_32-8 158MB/s ± 0% 158MB/s ± 0% ~ (p=0.684 n=10+10) RegexpMatchEasy1_1K-8 915MB/s ± 2% 918MB/s ± 1% ~ (p=0.796 n=10+10) RegexpMatchMedium_32-8 3.35MB/s ± 0% 3.35MB/s ± 0% ~ (p=1.000 n=10+9) RegexpMatchMedium_1K-8 12.3MB/s ± 5% 13.0MB/s ± 0% +5.56% (p=0.000 n=10+10) RegexpMatchHard_32-8 7.40MB/s ± 0% 7.51MB/s ± 0% +1.50% (p=0.000 n=10+10) RegexpMatchHard_1K-8 7.75MB/s ± 4% 8.10MB/s ± 0% +4.52% (p=0.000 n=10+8) Revcomp-8 229MB/s ± 0% 228MB/s ± 0% -0.14% (p=0.017 n=10+9) Template-8 12.5MB/s ± 1% 12.5MB/s ± 1% ~ (p=0.780 n=10+10) Change-Id: I103389f168eac79f6af44e8fef93acc2a7a4ac96 Reviewed-on: https://go-review.googlesource.com/88415 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
b3cb740be5
commit
51932c326f
@ -248,7 +248,7 @@ var allAsmTests = []*asmTests{
|
||||
{
|
||||
arch: "arm64",
|
||||
os: "linux",
|
||||
imports: []string{"math/bits"},
|
||||
imports: []string{"encoding/binary", "math/bits"},
|
||||
tests: linuxARM64Tests,
|
||||
},
|
||||
{
|
||||
@ -2751,6 +2751,80 @@ var linuxARM64Tests = []*asmTest{
|
||||
`,
|
||||
pos: []string{"TBZ"},
|
||||
},
|
||||
// Load-combining tests.
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) uint64 {
|
||||
return binary.LittleEndian.Uint64(b)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tMOVD\t\\(R[0-9]+\\)"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte, i int) uint64 {
|
||||
return binary.LittleEndian.Uint64(b[i:])
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tMOVD\t\\(R[0-9]+\\)"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) uint32 {
|
||||
return binary.LittleEndian.Uint32(b)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tMOVWU\t\\(R[0-9]+\\)"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte, i int) uint32 {
|
||||
return binary.LittleEndian.Uint32(b[i:])
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tMOVWU\t\\(R[0-9]+\\)"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) uint64 {
|
||||
return binary.BigEndian.Uint64(b)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tREV\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte, i int) uint64 {
|
||||
return binary.BigEndian.Uint64(b[i:])
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tREV\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) uint32 {
|
||||
return binary.BigEndian.Uint32(b)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tREVW\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte, i int) uint32 {
|
||||
return binary.BigEndian.Uint32(b[i:])
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tREVW\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(s []byte) uint16 {
|
||||
return uint16(s[0]) | uint16(s[1]) << 8
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tMOVHU\t\\(R[0-9]+\\)"},
|
||||
neg: []string{"ORR\tR[0-9]+<<8\t"},
|
||||
},
|
||||
}
|
||||
|
||||
var linuxMIPSTests = []*asmTest{
|
||||
|
@ -1078,30 +1078,30 @@
|
||||
(CSELULT0 _ (FlagGT_UGT)) -> (MOVDconst [0])
|
||||
|
||||
// absorb shifts into ops
|
||||
(ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
|
||||
(ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
|
||||
(ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
|
||||
(SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
|
||||
(SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
|
||||
(SUB x (SRAconst [c] y)) -> (SUBshiftRA x y [c])
|
||||
(AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
|
||||
(AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
|
||||
(AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
|
||||
(OR x (SLLconst [c] y)) -> (ORshiftLL x y [c]) // useful for combined load
|
||||
(OR x (SRLconst [c] y)) -> (ORshiftRL x y [c])
|
||||
(OR x (SRAconst [c] y)) -> (ORshiftRA x y [c])
|
||||
(XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
|
||||
(XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
|
||||
(XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
|
||||
(BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
|
||||
(BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
|
||||
(BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
|
||||
(CMP x (SLLconst [c] y)) -> (CMPshiftLL x y [c])
|
||||
(CMP (SLLconst [c] y) x) -> (InvertFlags (CMPshiftLL x y [c]))
|
||||
(CMP x (SRLconst [c] y)) -> (CMPshiftRL x y [c])
|
||||
(CMP (SRLconst [c] y) x) -> (InvertFlags (CMPshiftRL x y [c]))
|
||||
(CMP x (SRAconst [c] y)) -> (CMPshiftRA x y [c])
|
||||
(CMP (SRAconst [c] y) x) -> (InvertFlags (CMPshiftRA x y [c]))
|
||||
(ADD x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ADDshiftLL x0 y [c])
|
||||
(ADD x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ADDshiftRL x0 y [c])
|
||||
(ADD x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ADDshiftRA x0 y [c])
|
||||
(SUB x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (SUBshiftLL x0 y [c])
|
||||
(SUB x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (SUBshiftRL x0 y [c])
|
||||
(SUB x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (SUBshiftRA x0 y [c])
|
||||
(AND x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ANDshiftLL x0 y [c])
|
||||
(AND x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ANDshiftRL x0 y [c])
|
||||
(AND x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ANDshiftRA x0 y [c])
|
||||
(OR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ORshiftLL x0 y [c]) // useful for combined load
|
||||
(OR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ORshiftRL x0 y [c])
|
||||
(OR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ORshiftRA x0 y [c])
|
||||
(XOR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (XORshiftLL x0 y [c])
|
||||
(XOR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (XORshiftRL x0 y [c])
|
||||
(XOR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (XORshiftRA x0 y [c])
|
||||
(BIC x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (BICshiftLL x0 y [c])
|
||||
(BIC x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (BICshiftRL x0 y [c])
|
||||
(BIC x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (BICshiftRA x0 y [c])
|
||||
(CMP x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (CMPshiftLL x0 y [c])
|
||||
(CMP x0:(SLLconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftLL x1 y [c]))
|
||||
(CMP x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (CMPshiftRL x0 y [c])
|
||||
(CMP x0:(SRLconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftRL x1 y [c]))
|
||||
(CMP x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (CMPshiftRA x0 y [c])
|
||||
(CMP x0:(SRAconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftRA x1 y [c]))
|
||||
|
||||
// prefer *const ops to *shift ops
|
||||
(ADDshiftLL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SLLconst <x.Type> x [d]))
|
||||
|
@ -505,6 +505,17 @@ func clobber(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// clobberIfDead resets v when use count is 1. Returns true.
|
||||
// clobberIfDead is used by rewrite rules to decrement
|
||||
// use counts of v's args when v is dead and never used.
|
||||
func clobberIfDead(v *Value) bool {
|
||||
if v.Uses == 1 {
|
||||
v.reset(OpInvalid)
|
||||
}
|
||||
// Note: leave v.Block intact. The Block field is used after clobberIfDead.
|
||||
return true
|
||||
}
|
||||
|
||||
// noteRule is an easy way to track if a rule is matched when writing
|
||||
// new ones. Make the rule of interest also conditional on
|
||||
// noteRule("note to self: rule of interest matched")
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user