From 00437ebe73944854d58ddb6710a185677317ee6e Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Mon, 20 Jul 2015 18:50:17 -0700 Subject: [PATCH] [dev.ssa] cmd/compile: don't combine phi vars from different blocks in CSE Here is a concrete case in which this goes wrong. func f_ssa() int { var n int Next: for j := 0; j < 3; j++ { for i := 0; i < 10; i++ { if i == 6 { continue Next } n = i } n += j + j + j + j + j + j + j + j + j + j // j * 10 } return n } What follows is the function printout before and after CSE. Note blocks b8 and b10 in the before case. b8 is the inner loop's condition: i < 10. b10 is the inner loop's increment: i++. v82 is i. On entry to b8, it is either 0 (v19) the first time, or the result of incrementing v82, by way of v29. The CSE pass considered v82 and v49 to be common subexpressions, and eliminated v82 in favor of v49. In the after case, v82 is now dead and will shortly be eliminated. As a result, v29 is also dead, and we have lost the increment. The loop runs forever. BEFORE CSE f_ssa b1: v1 = Arg v2 = SP v4 = Addr <*int> {~r0} v2 v13 = Zero [8] v4 v1 v14 = Const v15 = Const v17 = Const [3] v19 = Const v21 = Const [10] v24 = Const [6] v28 = Const [1] v43 = Const [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi v15 v44 v68 = Phi v14 v67 v81 = Phi v13 v81 v18 = Less v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi v66 v41 v44 = Add v49 v43 Plain -> b4 b7: <- b4 v47 = Store v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi v68 v82 v82 = Phi v19 v29 v22 = Less v82 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq v82 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add v82 v28 Plain -> b8 b11: <- b8 v32 = Add v49 v49 v33 = Add v32 v49 v34 = Add v33 v49 v35 = Add v34 v49 v36 = Add v35 v49 v37 = Add v36 v49 v38 = Add v37 v49 v39 = Add v38 v49 v40 = Add v39 v49 v41 = Add v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 AFTER CSE f_ssa b1: v1 = Arg v2 = SP v4 = Addr <*int> {~r0} v2 v13 = Zero [8] v4 v1 v14 = Const v15 = Const v17 = Const [3] v19 = Const v21 = Const [10] v24 = Const [6] v28 = Const [1] v43 = Const [1] Plain -> b3 b2: <- b7 Exit v47 b3: <- b1 Plain -> b4 b4: <- b3 b6 v49 = Phi v19 v44 v68 = Phi v19 v67 v81 = Phi v13 v81 v18 = Less v49 v17 If v18 -> b5 b7 b5: <- b4 Plain -> b8 b6: <- b12 b11 v67 = Phi v66 v41 v44 = Add v49 v43 Plain -> b4 b7: <- b4 v47 = Store v4 v68 v81 Plain -> b2 b8: <- b5 b10 v66 = Phi v68 v49 v82 = Phi v19 v29 v22 = Less v49 v21 If v22 -> b9 b11 b9: <- b8 v25 = Eq v49 v24 If v25 -> b12 b13 b10: <- b13 v29 = Add v49 v43 Plain -> b8 b11: <- b8 v32 = Add v49 v49 v33 = Add v32 v49 v34 = Add v33 v49 v35 = Add v34 v49 v36 = Add v35 v49 v37 = Add v36 v49 v38 = Add v37 v49 v39 = Add v38 v49 v40 = Add v39 v49 v41 = Add v66 v40 Plain -> b6 b12: <- b9 Plain -> b6 b13: <- b9 Plain -> b10 Change-Id: I16fc4ec527ec63f24f7d0d79d1a4a59bf37269de Reviewed-on: https://go-review.googlesource.com/12444 Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/cse.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go index a64e993e2a..9212aaf314 100644 --- a/src/cmd/compile/internal/ssa/cse.go +++ b/src/cmd/compile/internal/ssa/cse.go @@ -17,6 +17,7 @@ func cse(f *Func) { // v.aux == w.aux // v.auxint == w.auxint // len(v.args) == len(w.args) + // v.block == w.block if v.op == OpPhi // equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1 // The algorithm searches for a partition of f's values into @@ -24,18 +25,23 @@ func cse(f *Func) { // It starts with a coarse partition and iteratively refines it // until it reaches a fixed point. - // Make initial partition based on opcode/type-name/aux/auxint/nargs + // Make initial partition based on opcode/type-name/aux/auxint/nargs/phi-block type key struct { op Op typ string aux interface{} auxint int64 nargs int + block ID // block id for phi vars, -1 otherwise } m := map[key]eqclass{} for _, b := range f.Blocks { for _, v := range b.Values { - k := key{v.Op, v.Type.String(), v.Aux, v.AuxInt, len(v.Args)} + bid := ID(-1) + if v.Op == OpPhi { + bid = b.ID + } + k := key{v.Op, v.Type.String(), v.Aux, v.AuxInt, len(v.Args), bid} m[k] = append(m[k], v) } } @@ -45,6 +51,9 @@ func cse(f *Func) { for _, v := range m { partition = append(partition, v) } + // TODO: Sort partition here for perfect reproducibility? + // Sort by what? Partition size? + // (Could that improve efficiency by discovering splits earlier?) // map from value id back to eqclass id valueEqClass := make([]int, f.NumValues())