mirror of
https://github.com/golang/go
synced 2024-11-26 20:11:26 -07:00
cmd/compile: optimize A->B->C Moves that include VarDefs
We have an existing optimization that recognizes memory moves of the form A -> B -> C and converts them into A -> C, in the hopes that the store to B will be end up being dead and thus eliminated. However, when A, B, and C are large types, the front end sometimes emits VarDef ops for the moves. This change adds an optimization to match that pattern. This required changing an old compiler test. The test assumed that a temporary was required to deal with a large return value. With this optimization in place, that temporary ended up being eliminated. Triggers 649 times during 'go build -a std cmd'. Cuts 16k off cmd/go. name old object-bytes new object-bytes delta Template 507kB ± 0% 507kB ± 0% -0.15% (p=0.008 n=5+5) Unicode 225kB ± 0% 225kB ± 0% ~ (all equal) GoTypes 1.85MB ± 0% 1.85MB ± 0% ~ (all equal) Flate 328kB ± 0% 328kB ± 0% ~ (all equal) GoParser 402kB ± 0% 402kB ± 0% -0.00% (p=0.008 n=5+5) Reflect 1.41MB ± 0% 1.41MB ± 0% -0.20% (p=0.008 n=5+5) Tar 458kB ± 0% 458kB ± 0% ~ (all equal) XML 601kB ± 0% 599kB ± 0% -0.21% (p=0.008 n=5+5) Change-Id: I9b5f25c8663a0b772ad1ee51fa61f74b74d26dd3 Reviewed-on: https://go-review.googlesource.com/c/143479 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Munday <mike.munday@ibm.com>
This commit is contained in:
parent
f9fff4554c
commit
8607b2e825
@ -1815,6 +1815,15 @@
|
||||
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
|
||||
-> (Move {t1} [s] dst src midmem)
|
||||
|
||||
// Same, but for large types that require VarDefs.
|
||||
(Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
|
||||
&& t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq
|
||||
&& isSamePtr(tmp1, tmp2)
|
||||
&& isStackPtr(src)
|
||||
&& disjoint(src, s, tmp2, s)
|
||||
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
|
||||
-> (Move {t1} [s] dst src midmem)
|
||||
|
||||
// Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go).
|
||||
// However, this rule is needed to prevent the previous rule from looping forever in such cases.
|
||||
(Move dst src mem) && isSamePtr(dst, src) -> mem
|
||||
|
@ -17388,6 +17388,41 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool {
|
||||
v.AddArg(midmem)
|
||||
return true
|
||||
}
|
||||
// match: (Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
|
||||
// cond: t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
|
||||
// result: (Move {t1} [s] dst src midmem)
|
||||
for {
|
||||
s := v.AuxInt
|
||||
t1 := v.Aux
|
||||
_ = v.Args[2]
|
||||
dst := v.Args[0]
|
||||
tmp1 := v.Args[1]
|
||||
midmem := v.Args[2]
|
||||
if midmem.Op != OpVarDef {
|
||||
break
|
||||
}
|
||||
midmem_0 := midmem.Args[0]
|
||||
if midmem_0.Op != OpMove {
|
||||
break
|
||||
}
|
||||
if midmem_0.AuxInt != s {
|
||||
break
|
||||
}
|
||||
t2 := midmem_0.Aux
|
||||
_ = midmem_0.Args[2]
|
||||
tmp2 := midmem_0.Args[0]
|
||||
src := midmem_0.Args[1]
|
||||
if !(t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
|
||||
break
|
||||
}
|
||||
v.reset(OpMove)
|
||||
v.AuxInt = s
|
||||
v.Aux = t1
|
||||
v.AddArg(dst)
|
||||
v.AddArg(src)
|
||||
v.AddArg(midmem)
|
||||
return true
|
||||
}
|
||||
// match: (Move dst src mem)
|
||||
// cond: isSamePtr(dst, src)
|
||||
// result: mem
|
||||
|
@ -6,15 +6,14 @@
|
||||
|
||||
// We have a limit of 1GB for stack frames.
|
||||
// Make sure we include the callee args section.
|
||||
// (The dispatch wrapper which implements (*S).f
|
||||
// copies the return value from f to a stack temp, then
|
||||
// from that stack temp to the return value of (*S).f.
|
||||
// It uses ~800MB for each section.)
|
||||
|
||||
package main
|
||||
|
||||
type S struct {
|
||||
i interface {
|
||||
f() [800e6]byte
|
||||
}
|
||||
func f() { // ERROR "stack frame too large"
|
||||
var x [800e6]byte
|
||||
g(x)
|
||||
return
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func g([800e6]byte) {}
|
||||
|
Loading…
Reference in New Issue
Block a user