mirror of
https://github.com/golang/go
synced 2024-11-17 02:14:42 -07:00
cmd/compile: improve fractional word zeroing
This change improves fractional word zeroing by using overlapping MOVDs for the fractions. Performance of go1 benchmarks on Amberwing was all noise: name old time/op new time/op delta RegexpMatchEasy0_32 247ns ± 0% 246ns ± 0% -0.40% (p=0.008 n=5+5) RegexpMatchEasy0_1K 581ns ± 0% 579ns ± 0% -0.34% (p=0.000 n=5+4) RegexpMatchEasy1_32 244ns ± 0% 242ns ± 0% ~ (p=0.079 n=4+5) RegexpMatchEasy1_1K 804ns ± 0% 805ns ± 0% ~ (p=0.238 n=5+4) RegexpMatchMedium_32 313ns ± 0% 311ns ± 0% -0.64% (p=0.008 n=5+5) RegexpMatchMedium_1K 52.2µs ± 0% 51.9µs ± 0% -0.52% (p=0.016 n=5+4) RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 0% ~ (p=0.603 n=5+5) RegexpMatchHard_1K 78.8µs ± 0% 78.9µs ± 0% +0.05% (p=0.008 n=5+5) FmtFprintfEmpty 58.6ns ± 0% 58.6ns ± 0% ~ (p=0.159 n=5+5) FmtFprintfString 118ns ± 0% 119ns ± 0% +0.85% (p=0.008 n=5+5) FmtFprintfInt 119ns ± 0% 123ns ± 0% +3.36% (p=0.016 n=5+4) FmtFprintfIntInt 192ns ± 0% 200ns ± 0% +4.17% (p=0.008 n=5+5) FmtFprintfPrefixedInt 224ns ± 0% 209ns ± 0% -6.70% (p=0.008 n=5+5) FmtFprintfFloat 335ns ± 0% 335ns ± 0% ~ (all equal) FmtManyArgs 775ns ± 0% 811ns ± 1% +4.67% (p=0.016 n=4+5) Gzip 437ms ± 0% 438ms ± 0% +0.19% (p=0.008 n=5+5) HTTPClientServer 88.7µs ± 1% 90.3µs ± 1% +1.75% (p=0.016 n=5+5) JSONEncode 20.1ms ± 1% 20.1ms ± 0% ~ (p=1.000 n=5+5) JSONDecode 94.7ms ± 1% 94.8ms ± 1% ~ (p=0.548 n=5+5) GobDecode 12.8ms ± 1% 12.8ms ± 1% ~ (p=0.548 n=5+5) GobEncode 12.1ms ± 0% 12.1ms ± 0% ~ (p=0.151 n=5+5) Mandelbrot200 5.37ms ± 0% 5.37ms ± 0% -0.03% (p=0.008 n=5+5) TimeParse 450ns ± 0% 451ns ± 1% ~ (p=0.635 n=4+5) TimeFormat 485ns ± 0% 484ns ± 0% ~ (p=0.508 n=5+5) Template 90.4ms ± 0% 90.2ms ± 0% -0.24% (p=0.016 n=5+5) GoParse 5.98ms ± 0% 5.98ms ± 0% ~ (p=1.000 n=5+5) BinaryTree17 11.8s ± 0% 11.8s ± 0% ~ (p=0.841 n=5+5) Revcomp 669ms ± 0% 669ms ± 0% ~ (p=0.310 n=5+5) Fannkuch11 3.28s ± 0% 3.34s ± 0% +1.64% (p=0.008 n=5+5) name old speed new speed delta RegexpMatchEasy0_32 129MB/s ± 0% 130MB/s ± 0% +0.30% (p=0.016 n=4+5) RegexpMatchEasy0_1K 1.76GB/s ± 0% 1.77GB/s ± 0% +0.27% (p=0.016 n=5+4) RegexpMatchEasy1_32 131MB/s ± 0% 132MB/s ± 0% +0.71% (p=0.016 n=4+5) RegexpMatchEasy1_1K 1.27GB/s ± 0% 1.27GB/s ± 0% -0.17% (p=0.016 n=5+4) RegexpMatchMedium_32 3.19MB/s ± 0% 3.21MB/s ± 0% +0.63% (p=0.008 n=5+5) RegexpMatchMedium_1K 19.6MB/s ± 0% 19.7MB/s ± 0% +0.52% (p=0.016 n=5+4) RegexpMatchHard_32 11.7MB/s ± 0% 11.7MB/s ± 0% ~ (p=0.643 n=5+5) RegexpMatchHard_1K 13.0MB/s ± 0% 13.0MB/s ± 0% ~ (p=0.079 n=4+5) Gzip 44.4MB/s ± 0% 44.3MB/s ± 0% -0.19% (p=0.008 n=5+5) JSONEncode 96.3MB/s ± 1% 96.4MB/s ± 0% ~ (p=1.000 n=5+5) JSONDecode 20.5MB/s ± 1% 20.5MB/s ± 1% ~ (p=0.460 n=5+5) GobDecode 60.1MB/s ± 1% 59.9MB/s ± 1% ~ (p=0.548 n=5+5) GobEncode 63.5MB/s ± 0% 63.7MB/s ± 0% ~ (p=0.135 n=5+5) Template 21.5MB/s ± 0% 21.5MB/s ± 0% +0.24% (p=0.016 n=5+5) GoParse 9.68MB/s ± 0% 9.69MB/s ± 0% ~ (p=0.786 n=5+5) Revcomp 380MB/s ± 0% 380MB/s ± 0% ~ (p=0.310 n=5+5) Change-Id: I596eee6421cdbad1a0189cdb9fe0628bba534eaf Reviewed-on: https://go-review.googlesource.com/96775 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
413d8a833d
commit
094258408d
@ -3245,6 +3245,24 @@ var linuxARM64Tests = []*asmTest{
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(a *[39]byte) {
|
||||
*a = [39]byte{}
|
||||
}
|
||||
`,
|
||||
pos: []string{"MOVD"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func $(a *[30]byte) {
|
||||
*a = [30]byte{}
|
||||
}
|
||||
`,
|
||||
pos: []string{"STP"},
|
||||
neg: []string{"MOVB", "MOVH", "MOVW"},
|
||||
},
|
||||
}
|
||||
|
||||
var linuxMIPSTests = []*asmTest{
|
||||
|
@ -399,10 +399,14 @@
|
||||
(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
|
||||
|
||||
// strip off fractional word zeroing
|
||||
(Zero [s] ptr mem) && s%16 != 0 && s > 16 ->
|
||||
(Zero [s-s%16]
|
||||
(OffPtr <ptr.Type> ptr [s%16])
|
||||
(Zero [s%16] ptr mem))
|
||||
(Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 ->
|
||||
(Zero [8]
|
||||
(OffPtr <ptr.Type> ptr [s-8])
|
||||
(Zero [s-s%16] ptr mem))
|
||||
(Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 ->
|
||||
(Zero [16]
|
||||
(OffPtr <ptr.Type> ptr [s-16])
|
||||
(Zero [s-s%16] ptr mem))
|
||||
|
||||
// medium zeroing uses a duff device
|
||||
// 4, 16, and 64 are magic constants, see runtime/mkduff.go
|
||||
|
@ -18551,24 +18551,48 @@ func rewriteValueARM64_OpZero_20(v *Value) bool {
|
||||
config := b.Func.Config
|
||||
_ = config
|
||||
// match: (Zero [s] ptr mem)
|
||||
// cond: s%16 != 0 && s > 16
|
||||
// result: (Zero [s-s%16] (OffPtr <ptr.Type> ptr [s%16]) (Zero [s%16] ptr mem))
|
||||
// cond: s%16 != 0 && s%16 <= 8 && s > 16
|
||||
// result: (Zero [8] (OffPtr <ptr.Type> ptr [s-8]) (Zero [s-s%16] ptr mem))
|
||||
for {
|
||||
s := v.AuxInt
|
||||
_ = v.Args[1]
|
||||
ptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(s%16 != 0 && s > 16) {
|
||||
if !(s%16 != 0 && s%16 <= 8 && s > 16) {
|
||||
break
|
||||
}
|
||||
v.reset(OpZero)
|
||||
v.AuxInt = s - s%16
|
||||
v.AuxInt = 8
|
||||
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
|
||||
v0.AuxInt = s % 16
|
||||
v0.AuxInt = s - 8
|
||||
v0.AddArg(ptr)
|
||||
v.AddArg(v0)
|
||||
v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
|
||||
v1.AuxInt = s % 16
|
||||
v1.AuxInt = s - s%16
|
||||
v1.AddArg(ptr)
|
||||
v1.AddArg(mem)
|
||||
v.AddArg(v1)
|
||||
return true
|
||||
}
|
||||
// match: (Zero [s] ptr mem)
|
||||
// cond: s%16 != 0 && s%16 > 8 && s > 16
|
||||
// result: (Zero [16] (OffPtr <ptr.Type> ptr [s-16]) (Zero [s-s%16] ptr mem))
|
||||
for {
|
||||
s := v.AuxInt
|
||||
_ = v.Args[1]
|
||||
ptr := v.Args[0]
|
||||
mem := v.Args[1]
|
||||
if !(s%16 != 0 && s%16 > 8 && s > 16) {
|
||||
break
|
||||
}
|
||||
v.reset(OpZero)
|
||||
v.AuxInt = 16
|
||||
v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
|
||||
v0.AuxInt = s - 16
|
||||
v0.AddArg(ptr)
|
||||
v.AddArg(v0)
|
||||
v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
|
||||
v1.AuxInt = s - s%16
|
||||
v1.AddArg(ptr)
|
||||
v1.AddArg(mem)
|
||||
v.AddArg(v1)
|
||||
|
Loading…
Reference in New Issue
Block a user