mirror of
https://github.com/golang/go
synced 2024-11-17 06:54:48 -07:00
cmd/compile/internal/ssa: combine zero stores into larger stores on arm64
This reduces the go tool binary on arm64 by 12k. go1 results on Amberwing: name old time/op new time/op delta RegexpMatchEasy0_32 249ns ± 0% 249ns ± 0% ~ (p=0.087 n=10+10) RegexpMatchEasy0_1K 584ns ± 0% 584ns ± 0% ~ (all equal) RegexpMatchEasy1_32 246ns ± 0% 246ns ± 0% ~ (p=1.000 n=10+10) RegexpMatchEasy1_1K 806ns ± 0% 806ns ± 0% ~ (p=0.706 n=10+9) RegexpMatchMedium_32 314ns ± 0% 314ns ± 0% ~ (all equal) RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% ~ (p=0.245 n=10+8) RegexpMatchHard_32 2.75µs ± 1% 2.75µs ± 1% ~ (p=0.690 n=10+10) RegexpMatchHard_1K 78.9µs ± 0% 78.9µs ± 1% ~ (p=0.295 n=9+9) FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal) FmtFprintfString 112ns ± 0% 112ns ± 0% ~ (all equal) FmtFprintfInt 117ns ± 0% 116ns ± 0% -0.85% (p=0.000 n=10+10) FmtFprintfIntInt 181ns ± 0% 181ns ± 0% ~ (all equal) FmtFprintfPrefixedInt 222ns ± 0% 224ns ± 0% +0.90% (p=0.000 n=9+10) FmtFprintfFloat 318ns ± 1% 322ns ± 0% ~ (p=0.059 n=10+8) FmtManyArgs 736ns ± 1% 735ns ± 0% ~ (p=0.206 n=9+9) Gzip 437ms ± 0% 436ms ± 0% -0.25% (p=0.000 n=10+10) HTTPClientServer 89.8µs ± 1% 90.2µs ± 2% ~ (p=0.393 n=10+10) JSONEncode 20.1ms ± 1% 20.2ms ± 1% ~ (p=0.065 n=9+10) JSONDecode 94.2ms ± 1% 93.9ms ± 1% -0.42% (p=0.043 n=10+10) GobDecode 12.7ms ± 1% 12.8ms ± 2% +0.94% (p=0.019 n=10+10) GobEncode 12.1ms ± 0% 12.1ms ± 0% ~ (p=0.052 n=10+10) Mandelbrot200 5.06ms ± 0% 5.05ms ± 0% -0.04% (p=0.000 n=9+10) TimeParse 450ns ± 3% 446ns ± 0% ~ (p=0.238 n=10+9) TimeFormat 485ns ± 1% 483ns ± 1% ~ (p=0.073 n=10+10) Template 90.4ms ± 0% 90.7ms ± 0% +0.29% (p=0.000 n=8+10) GoParse 6.01ms ± 0% 6.03ms ± 0% +0.35% (p=0.000 n=10+10) BinaryTree17 11.7s ± 0% 11.7s ± 0% ~ (p=0.481 n=10+10) Revcomp 669ms ± 0% 669ms ± 0% ~ (p=0.315 n=10+10) Fannkuch11 3.40s ± 0% 3.37s ± 0% -0.92% (p=0.000 n=10+10) [Geo mean] 67.9µs 67.9µs +0.02% name old speed new speed delta RegexpMatchEasy0_32 128MB/s ± 0% 128MB/s ± 0% -0.08% (p=0.003 n=8+10) RegexpMatchEasy0_1K 1.75GB/s ± 0% 1.75GB/s ± 0% ~ (p=0.642 n=8+10) RegexpMatchEasy1_32 130MB/s ± 0% 130MB/s ± 0% ~ (p=0.690 n=10+9) RegexpMatchEasy1_1K 1.27GB/s ± 0% 1.27GB/s ± 0% ~ (p=0.661 n=10+9) RegexpMatchMedium_32 3.18MB/s ± 0% 3.18MB/s ± 0% ~ (all equal) RegexpMatchMedium_1K 19.7MB/s ± 0% 19.6MB/s ± 0% ~ (p=0.190 n=10+9) RegexpMatchHard_32 11.6MB/s ± 0% 11.6MB/s ± 1% ~ (p=0.669 n=10+10) RegexpMatchHard_1K 13.0MB/s ± 0% 13.0MB/s ± 0% ~ (p=0.718 n=9+9) Gzip 44.4MB/s ± 0% 44.5MB/s ± 0% +0.24% (p=0.000 n=10+10) JSONEncode 96.5MB/s ± 1% 96.1MB/s ± 1% ~ (p=0.065 n=9+10) JSONDecode 20.6MB/s ± 1% 20.7MB/s ± 1% +0.42% (p=0.041 n=10+10) GobDecode 60.6MB/s ± 1% 60.0MB/s ± 2% -0.92% (p=0.016 n=10+10) GobEncode 63.4MB/s ± 0% 63.6MB/s ± 0% ~ (p=0.055 n=10+10) Template 21.5MB/s ± 0% 21.4MB/s ± 0% -0.30% (p=0.000 n=9+10) GoParse 9.64MB/s ± 0% 9.61MB/s ± 0% -0.36% (p=0.000 n=10+10) Revcomp 380MB/s ± 0% 380MB/s ± 0% ~ (p=0.323 n=10+10) [Geo mean] 56.0MB/s 55.9MB/s -0.07% Change-Id: Ia732fa57fbcf4767d72382516d9f16705d177736 Reviewed-on: https://go-review.googlesource.com/96435 Run-TryBot: Cherry Zhang <cherryyz@google.com> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
3a9e4440fd
commit
ecd9e8a2fe
@ -2971,6 +2971,232 @@ var linuxARM64Tests = []*asmTest{
|
||||
`,
|
||||
pos: []string{"\tCSEL\t"},
|
||||
},
|
||||
// Check that zero stores are combine into larger stores
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) {
|
||||
_ = b[1] // early bounds check to guarantee safety of writes below
|
||||
b[0] = 0
|
||||
b[1] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVH\tZR"},
|
||||
neg: []string{"MOVB"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) {
|
||||
_ = b[1] // early bounds check to guarantee safety of writes below
|
||||
b[1] = 0
|
||||
b[0] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVH\tZR"},
|
||||
neg: []string{"MOVB"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) {
|
||||
_ = b[3] // early bounds check to guarantee safety of writes below
|
||||
b[0] = 0
|
||||
b[1] = 0
|
||||
b[2] = 0
|
||||
b[3] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVW\tZR"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) {
|
||||
_ = b[3] // early bounds check to guarantee safety of writes below
|
||||
b[2] = 0
|
||||
b[3] = 0
|
||||
b[1] = 0
|
||||
b[0] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVW\tZR"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(h []uint16) {
|
||||
_ = h[1] // early bounds check to guarantee safety of writes below
|
||||
h[0] = 0
|
||||
h[1] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVW\tZR"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(h []uint16) {
|
||||
_ = h[1] // early bounds check to guarantee safety of writes below
|
||||
h[1] = 0
|
||||
h[0] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVW\tZR"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) {
|
||||
_ = b[7] // early bounds check to guarantee safety of writes below
|
||||
b[0] = 0
|
||||
b[1] = 0
|
||||
b[2] = 0
|
||||
b[3] = 0
|
||||
b[4] = 0
|
||||
b[5] = 0
|
||||
b[6] = 0
|
||||
b[7] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVD\tZR"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(h []uint16) {
|
||||
_ = h[3] // early bounds check to guarantee safety of writes below
|
||||
h[0] = 0
|
||||
h[1] = 0
|
||||
h[2] = 0
|
||||
h[3] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVD\tZR"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(h []uint16) {
|
||||
_ = h[3] // early bounds check to guarantee safety of writes below
|
||||
h[2] = 0
|
||||
h[3] = 0
|
||||
h[1] = 0
|
||||
h[0] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVD\tZR"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(w []uint32) {
|
||||
_ = w[1] // early bounds check to guarantee safety of writes below
|
||||
w[0] = 0
|
||||
w[1] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVD\tZR"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(w []uint32) {
|
||||
_ = w[1] // early bounds check to guarantee safety of writes below
|
||||
w[1] = 0
|
||||
w[0] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVD\tZR"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(b []byte) {
|
||||
_ = b[15] // early bounds check to guarantee safety of writes below
|
||||
b[0] = 0
|
||||
b[1] = 0
|
||||
b[2] = 0
|
||||
b[3] = 0
|
||||
b[4] = 0
|
||||
b[5] = 0
|
||||
b[6] = 0
|
||||
b[7] = 0
|
||||
b[8] = 0
|
||||
b[9] = 0
|
||||
b[10] = 0
|
||||
b[11] = 0
|
||||
b[12] = 0
|
||||
b[13] = 0
|
||||
b[15] = 0
|
||||
b[14] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(h []uint16) {
|
||||
_ = h[7] // early bounds check to guarantee safety of writes below
|
||||
h[0] = 0
|
||||
h[1] = 0
|
||||
h[2] = 0
|
||||
h[3] = 0
|
||||
h[4] = 0
|
||||
h[5] = 0
|
||||
h[6] = 0
|
||||
h[7] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(w []uint32) {
|
||||
_ = w[3] // early bounds check to guarantee safety of writes below
|
||||
w[0] = 0
|
||||
w[1] = 0
|
||||
w[2] = 0
|
||||
w[3] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(w []uint32) {
|
||||
_ = w[3] // early bounds check to guarantee safety of writes below
|
||||
w[1] = 0
|
||||
w[0] = 0
|
||||
w[3] = 0
|
||||
w[2] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(d []uint64) {
|
||||
_ = d[1] // early bounds check to guarantee safety of writes below
|
||||
d[0] = 0
|
||||
d[1] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(d []uint64) {
|
||||
_ = d[1] // early bounds check to guarantee safety of writes below
|
||||
d[1] = 0
|
||||
d[0] = 0
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
}
|
||||
|
||||
var linuxMIPSTests = []*asmTest{
|
||||
|
@ -1439,6 +1439,36 @@
|
||||
&& clobber(o4) && clobber(o5) && clobber(s0)
|
||||
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
|
||||
|
||||
// Combine zero stores into larger (unaligned) stores.
|
||||
(MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(i,j,1)
|
||||
&& is32Bit(min(i,j))
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVHstorezero [min(i,j)] {s} ptr0 mem)
|
||||
(MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(i,j,2)
|
||||
&& is32Bit(min(i,j))
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstorezero [min(i,j)] {s} ptr0 mem)
|
||||
(MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(i,j,4)
|
||||
&& is32Bit(min(i,j))
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVDstorezero [min(i,j)] {s} ptr0 mem)
|
||||
(MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(i,j,8)
|
||||
&& is32Bit(min(i,j))
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
|
||||
|
||||
// FP simplification
|
||||
(FNEGS (FMULS x y)) -> (FNMULS x y)
|
||||
(FNEGD (FMULD x y)) -> (FNMULD x y)
|
||||
|
@ -769,6 +769,10 @@ func overlap(offset1, size1, offset2, size2 int64) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func areAdjacentOffsets(off1, off2, size int64) bool {
|
||||
return off1+size == off2 || off1 == off2+size
|
||||
}
|
||||
|
||||
// check if value zeroes out upper 32-bit of 64-bit register.
|
||||
// depth limits recursion depth. In AMD64.rules 3 is used as limit,
|
||||
// because it catches same amount of cases as 4.
|
||||
|
@ -5941,6 +5941,35 @@ func rewriteValueARM64_OpARM64MOVBstorezero_0(v *Value) bool {
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
|
||||
// cond: x.Uses == 1 && areAdjacentOffsets(i,j,1) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
|
||||
// result: (MOVHstorezero [min(i,j)] {s} ptr0 mem)
|
||||
for {
|
||||
i := v.AuxInt
|
||||
s := v.Aux
|
||||
_ = v.Args[1]
|
||||
ptr0 := v.Args[0]
|
||||
x := v.Args[1]
|
||||
if x.Op != OpARM64MOVBstorezero {
|
||||
break
|
||||
}
|
||||
j := x.AuxInt
|
||||
if x.Aux != s {
|
||||
break
|
||||
}
|
||||
_ = x.Args[1]
|
||||
ptr1 := x.Args[0]
|
||||
mem := x.Args[1]
|
||||
if !(x.Uses == 1 && areAdjacentOffsets(i, j, 1) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARM64MOVHstorezero)
|
||||
v.AuxInt = min(i, j)
|
||||
v.Aux = s
|
||||
v.AddArg(ptr0)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64MOVDload_0(v *Value) bool {
|
||||
@ -6205,6 +6234,35 @@ func rewriteValueARM64_OpARM64MOVDstorezero_0(v *Value) bool {
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
|
||||
// cond: x.Uses == 1 && areAdjacentOffsets(i,j,8) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
|
||||
// result: (MOVQstorezero [min(i,j)] {s} ptr0 mem)
|
||||
for {
|
||||
i := v.AuxInt
|
||||
s := v.Aux
|
||||
_ = v.Args[1]
|
||||
ptr0 := v.Args[0]
|
||||
x := v.Args[1]
|
||||
if x.Op != OpARM64MOVDstorezero {
|
||||
break
|
||||
}
|
||||
j := x.AuxInt
|
||||
if x.Aux != s {
|
||||
break
|
||||
}
|
||||
_ = x.Args[1]
|
||||
ptr1 := x.Args[0]
|
||||
mem := x.Args[1]
|
||||
if !(x.Uses == 1 && areAdjacentOffsets(i, j, 8) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARM64MOVQstorezero)
|
||||
v.AuxInt = min(i, j)
|
||||
v.Aux = s
|
||||
v.AddArg(ptr0)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64MOVHUload_0(v *Value) bool {
|
||||
@ -6747,6 +6805,35 @@ func rewriteValueARM64_OpARM64MOVHstorezero_0(v *Value) bool {
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
|
||||
// cond: x.Uses == 1 && areAdjacentOffsets(i,j,2) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
|
||||
// result: (MOVWstorezero [min(i,j)] {s} ptr0 mem)
|
||||
for {
|
||||
i := v.AuxInt
|
||||
s := v.Aux
|
||||
_ = v.Args[1]
|
||||
ptr0 := v.Args[0]
|
||||
x := v.Args[1]
|
||||
if x.Op != OpARM64MOVHstorezero {
|
||||
break
|
||||
}
|
||||
j := x.AuxInt
|
||||
if x.Aux != s {
|
||||
break
|
||||
}
|
||||
_ = x.Args[1]
|
||||
ptr1 := x.Args[0]
|
||||
mem := x.Args[1]
|
||||
if !(x.Uses == 1 && areAdjacentOffsets(i, j, 2) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARM64MOVWstorezero)
|
||||
v.AuxInt = min(i, j)
|
||||
v.Aux = s
|
||||
v.AddArg(ptr0)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64MOVQstorezero_0(v *Value) bool {
|
||||
@ -7379,6 +7466,35 @@ func rewriteValueARM64_OpARM64MOVWstorezero_0(v *Value) bool {
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
|
||||
// cond: x.Uses == 1 && areAdjacentOffsets(i,j,4) && is32Bit(min(i,j)) && isSamePtr(ptr0, ptr1) && clobber(x)
|
||||
// result: (MOVDstorezero [min(i,j)] {s} ptr0 mem)
|
||||
for {
|
||||
i := v.AuxInt
|
||||
s := v.Aux
|
||||
_ = v.Args[1]
|
||||
ptr0 := v.Args[0]
|
||||
x := v.Args[1]
|
||||
if x.Op != OpARM64MOVWstorezero {
|
||||
break
|
||||
}
|
||||
j := x.AuxInt
|
||||
if x.Aux != s {
|
||||
break
|
||||
}
|
||||
_ = x.Args[1]
|
||||
ptr1 := x.Args[0]
|
||||
mem := x.Args[1]
|
||||
if !(x.Uses == 1 && areAdjacentOffsets(i, j, 4) && is32Bit(min(i, j)) && isSamePtr(ptr0, ptr1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpARM64MOVDstorezero)
|
||||
v.AuxInt = min(i, j)
|
||||
v.Aux = s
|
||||
v.AddArg(ptr0)
|
||||
v.AddArg(mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueARM64_OpARM64MUL_0(v *Value) bool {
|
||||
|
Loading…
Reference in New Issue
Block a user