mirror of
https://github.com/golang/go
synced 2024-11-05 16:16:11 -07:00
cmd/compile/internal/ssa: optimize away double NEG on amd64
When lowering some ops on amd64 we generate additional NEGQ. This may result in code like this: NEGQ R12 NEGQ R12 Optimize it away. Gain is not significant, about ~0.5% gain in geomean in compress/flate and 200 bytes codesize reduction in go tool. Full results below: name old time/op new time/op delta Encode/Digits/Huffman/1e4-6 65.8µs ± 0% 65.7µs ± 0% -0.21% (p=0.010 n=10+9) Encode/Digits/Huffman/1e5-6 633µs ± 0% 632µs ± 0% ~ (p=0.370 n=8+9) Encode/Digits/Huffman/1e6-6 6.30ms ± 1% 6.29ms ± 1% ~ (p=0.796 n=10+10) Encode/Digits/Speed/1e4-6 281µs ± 0% 280µs ± 1% -0.34% (p=0.043 n=8+10) Encode/Digits/Speed/1e5-6 2.66ms ± 0% 2.66ms ± 0% -0.09% (p=0.043 n=10+10) Encode/Digits/Speed/1e6-6 26.3ms ± 0% 26.3ms ± 0% ~ (p=0.190 n=10+10) Encode/Digits/Default/1e4-6 554µs ± 0% 557µs ± 0% +0.46% (p=0.001 n=9+10) Encode/Digits/Default/1e5-6 8.63ms ± 1% 8.62ms ± 1% ~ (p=0.912 n=10+10) Encode/Digits/Default/1e6-6 92.7ms ± 1% 92.2ms ± 1% ~ (p=0.052 n=10+10) Encode/Digits/Compression/1e4-6 558µs ± 1% 557µs ± 1% ~ (p=0.481 n=10+10) Encode/Digits/Compression/1e5-6 8.58ms ± 0% 8.61ms ± 1% ~ (p=0.315 n=8+10) Encode/Digits/Compression/1e6-6 92.3ms ± 1% 92.4ms ± 1% ~ (p=0.971 n=10+10) Encode/Twain/Huffman/1e4-6 89.5µs ± 0% 89.0µs ± 1% -0.48% (p=0.001 n=9+9) Encode/Twain/Huffman/1e5-6 727µs ± 1% 728µs ± 0% ~ (p=0.604 n=10+9) Encode/Twain/Huffman/1e6-6 7.21ms ± 0% 7.19ms ± 1% ~ (p=0.696 n=8+10) Encode/Twain/Speed/1e4-6 320µs ± 1% 321µs ± 1% ~ (p=0.353 n=10+10) Encode/Twain/Speed/1e5-6 2.63ms ± 0% 2.62ms ± 1% -0.33% (p=0.016 n=8+10) Encode/Twain/Speed/1e6-6 25.8ms ± 0% 25.8ms ± 0% ~ (p=0.360 n=10+8) Encode/Twain/Default/1e4-6 677µs ± 1% 671µs ± 1% -0.88% (p=0.000 n=10+10) Encode/Twain/Default/1e5-6 10.5ms ± 1% 10.3ms ± 0% -2.06% (p=0.000 n=10+10) Encode/Twain/Default/1e6-6 113ms ± 1% 111ms ± 1% -1.96% (p=0.000 n=10+9) Encode/Twain/Compression/1e4-6 688µs ± 0% 679µs ± 1% -1.30% (p=0.000 n=7+10) Encode/Twain/Compression/1e5-6 11.6ms ± 1% 11.3ms ± 1% -2.10% (p=0.000 n=10+10) Encode/Twain/Compression/1e6-6 126ms ± 1% 124ms ± 0% -1.57% (p=0.000 n=10+10) [Geo mean] 3.45ms 3.44ms -0.46% name old speed new speed delta Encode/Digits/Huffman/1e4-6 152MB/s ± 0% 152MB/s ± 0% +0.21% (p=0.009 n=10+9) Encode/Digits/Huffman/1e5-6 158MB/s ± 0% 158MB/s ± 0% ~ (p=0.336 n=8+9) Encode/Digits/Huffman/1e6-6 159MB/s ± 1% 159MB/s ± 1% ~ (p=0.781 n=10+10) Encode/Digits/Speed/1e4-6 35.6MB/s ± 0% 35.7MB/s ± 1% +0.34% (p=0.020 n=8+10) Encode/Digits/Speed/1e5-6 37.6MB/s ± 0% 37.7MB/s ± 0% +0.09% (p=0.049 n=10+10) Encode/Digits/Speed/1e6-6 38.0MB/s ± 0% 38.0MB/s ± 0% ~ (p=0.146 n=10+10) Encode/Digits/Default/1e4-6 18.0MB/s ± 0% 18.0MB/s ± 0% -0.45% (p=0.002 n=9+10) Encode/Digits/Default/1e5-6 11.6MB/s ± 1% 11.6MB/s ± 1% ~ (p=0.644 n=10+10) Encode/Digits/Default/1e6-6 10.8MB/s ± 1% 10.8MB/s ± 1% +0.51% (p=0.044 n=10+10) Encode/Digits/Compression/1e4-6 17.9MB/s ± 1% 17.9MB/s ± 1% ~ (p=0.468 n=10+10) Encode/Digits/Compression/1e5-6 11.7MB/s ± 0% 11.6MB/s ± 1% ~ (p=0.322 n=8+10) Encode/Digits/Compression/1e6-6 10.8MB/s ± 1% 10.8MB/s ± 1% ~ (p=0.983 n=10+10) Encode/Twain/Huffman/1e4-6 112MB/s ± 0% 112MB/s ± 1% +0.42% (p=0.002 n=8+9) Encode/Twain/Huffman/1e5-6 138MB/s ± 1% 137MB/s ± 0% ~ (p=0.616 n=10+9) Encode/Twain/Huffman/1e6-6 139MB/s ± 0% 139MB/s ± 1% ~ (p=0.652 n=8+10) Encode/Twain/Speed/1e4-6 31.3MB/s ± 1% 31.2MB/s ± 1% ~ (p=0.342 n=10+10) Encode/Twain/Speed/1e5-6 38.0MB/s ± 0% 38.1MB/s ± 1% +0.33% (p=0.011 n=8+10) Encode/Twain/Speed/1e6-6 38.8MB/s ± 0% 38.7MB/s ± 0% ~ (p=0.325 n=10+8) Encode/Twain/Default/1e4-6 14.8MB/s ± 1% 14.9MB/s ± 1% +0.88% (p=0.000 n=10+10) Encode/Twain/Default/1e5-6 9.48MB/s ± 1% 9.68MB/s ± 0% +2.11% (p=0.000 n=10+10) Encode/Twain/Default/1e6-6 8.86MB/s ± 1% 9.03MB/s ± 1% +1.97% (p=0.000 n=10+9) Encode/Twain/Compression/1e4-6 14.5MB/s ± 0% 14.7MB/s ± 1% +1.31% (p=0.000 n=7+10) Encode/Twain/Compression/1e5-6 8.63MB/s ± 1% 8.82MB/s ± 1% +2.17% (p=0.000 n=10+10) Encode/Twain/Compression/1e6-6 7.92MB/s ± 1% 8.05MB/s ± 1% +1.59% (p=0.000 n=10+10) [Geo mean] 29.0MB/s 29.1MB/s +0.47% // symSizeComp `which go` go_old: section differences: global text (code) = 203 bytes (0.005131%) read-only data = 1 bytes (0.000057%) Total difference 204 bytes (0.003297%) Change-Id: Ie2cdfa1216472d78694fff44d215b3b8e71cf7bf Reviewed-on: https://go-review.googlesource.com/102277 Run-TryBot: Ilya Tocar <ilya.tocar@intel.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
ea1f483240
commit
24cd112086
@ -1441,6 +1441,10 @@
|
|||||||
// If we cared, we might do:
|
// If we cared, we might do:
|
||||||
// (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0])
|
// (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0])
|
||||||
|
|
||||||
|
// Remove redundant ops
|
||||||
|
// Not in generic rules, because they may appear after lowering e. g. Slicemask
|
||||||
|
(NEG(Q|L) (NEG(Q|L) x)) -> x
|
||||||
|
|
||||||
// Convert constant subtracts to constant adds
|
// Convert constant subtracts to constant adds
|
||||||
(SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x)
|
(SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x)
|
||||||
(SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x)
|
(SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x)
|
||||||
|
@ -19847,6 +19847,20 @@ func rewriteValueAMD64_OpAMD64MULSSmem_0(v *Value) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpAMD64NEGL_0(v *Value) bool {
|
func rewriteValueAMD64_OpAMD64NEGL_0(v *Value) bool {
|
||||||
|
// match: (NEGL (NEGL x))
|
||||||
|
// cond:
|
||||||
|
// result: x
|
||||||
|
for {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
if v_0.Op != OpAMD64NEGL {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
x := v_0.Args[0]
|
||||||
|
v.reset(OpCopy)
|
||||||
|
v.Type = x.Type
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (NEGL (MOVLconst [c]))
|
// match: (NEGL (MOVLconst [c]))
|
||||||
// cond:
|
// cond:
|
||||||
// result: (MOVLconst [int64(int32(-c))])
|
// result: (MOVLconst [int64(int32(-c))])
|
||||||
@ -19863,6 +19877,20 @@ func rewriteValueAMD64_OpAMD64NEGL_0(v *Value) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
func rewriteValueAMD64_OpAMD64NEGQ_0(v *Value) bool {
|
func rewriteValueAMD64_OpAMD64NEGQ_0(v *Value) bool {
|
||||||
|
// match: (NEGQ (NEGQ x))
|
||||||
|
// cond:
|
||||||
|
// result: x
|
||||||
|
for {
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
if v_0.Op != OpAMD64NEGQ {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
x := v_0.Args[0]
|
||||||
|
v.reset(OpCopy)
|
||||||
|
v.Type = x.Type
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (NEGQ (MOVQconst [c]))
|
// match: (NEGQ (MOVQconst [c]))
|
||||||
// cond:
|
// cond:
|
||||||
// result: (MOVQconst [-c])
|
// result: (MOVQconst [-c])
|
||||||
|
Loading…
Reference in New Issue
Block a user