1
0
mirror of https://github.com/golang/go synced 2024-11-26 20:11:26 -07:00

cmd/compile: optimize A->B->C Moves that include VarDefs

We have an existing optimization that recognizes
memory moves of the form A -> B -> C and converts
them into A -> C, in the hopes that the store to
B will be end up being dead and thus eliminated.

However, when A, B, and C are large types,
the front end sometimes emits VarDef ops for the moves.
This change adds an optimization to match that pattern.

This required changing an old compiler test.
The test assumed that a temporary was required
to deal with a large return value.
With this optimization in place, that temporary
ended up being eliminated.

Triggers 649 times during 'go build -a std cmd'.

Cuts 16k off cmd/go.

name        old object-bytes  new object-bytes  delta
Template          507kB ± 0%        507kB ± 0%  -0.15%  (p=0.008 n=5+5)
Unicode           225kB ± 0%        225kB ± 0%    ~     (all equal)
GoTypes          1.85MB ± 0%       1.85MB ± 0%    ~     (all equal)
Flate             328kB ± 0%        328kB ± 0%    ~     (all equal)
GoParser          402kB ± 0%        402kB ± 0%  -0.00%  (p=0.008 n=5+5)
Reflect          1.41MB ± 0%       1.41MB ± 0%  -0.20%  (p=0.008 n=5+5)
Tar               458kB ± 0%        458kB ± 0%    ~     (all equal)
XML               601kB ± 0%        599kB ± 0%  -0.21%  (p=0.008 n=5+5)

Change-Id: I9b5f25c8663a0b772ad1ee51fa61f74b74d26dd3
Reviewed-on: https://go-review.googlesource.com/c/143479
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Munday <mike.munday@ibm.com>
This commit is contained in:
Josh Bleecher Snyder 2018-10-20 19:59:27 -07:00
parent f9fff4554c
commit 8607b2e825
3 changed files with 51 additions and 8 deletions

View File

@ -1815,6 +1815,15 @@
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
-> (Move {t1} [s] dst src midmem)
// Same, but for large types that require VarDefs.
(Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
&& t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq
&& isSamePtr(tmp1, tmp2)
&& isStackPtr(src)
&& disjoint(src, s, tmp2, s)
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
-> (Move {t1} [s] dst src midmem)
// Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go).
// However, this rule is needed to prevent the previous rule from looping forever in such cases.
(Move dst src mem) && isSamePtr(dst, src) -> mem

View File

@ -17388,6 +17388,41 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool {
v.AddArg(midmem)
return true
}
// match: (Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
// cond: t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
// result: (Move {t1} [s] dst src midmem)
for {
s := v.AuxInt
t1 := v.Aux
_ = v.Args[2]
dst := v.Args[0]
tmp1 := v.Args[1]
midmem := v.Args[2]
if midmem.Op != OpVarDef {
break
}
midmem_0 := midmem.Args[0]
if midmem_0.Op != OpMove {
break
}
if midmem_0.AuxInt != s {
break
}
t2 := midmem_0.Aux
_ = midmem_0.Args[2]
tmp2 := midmem_0.Args[0]
src := midmem_0.Args[1]
if !(t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) {
break
}
v.reset(OpMove)
v.AuxInt = s
v.Aux = t1
v.AddArg(dst)
v.AddArg(src)
v.AddArg(midmem)
return true
}
// match: (Move dst src mem)
// cond: isSamePtr(dst, src)
// result: mem

View File

@ -6,15 +6,14 @@
// We have a limit of 1GB for stack frames.
// Make sure we include the callee args section.
// (The dispatch wrapper which implements (*S).f
// copies the return value from f to a stack temp, then
// from that stack temp to the return value of (*S).f.
// It uses ~800MB for each section.)
package main
type S struct {
i interface {
f() [800e6]byte
}
func f() { // ERROR "stack frame too large"
var x [800e6]byte
g(x)
return
}
//go:noinline
func g([800e6]byte) {}