From 94f02451148755b31cc4dd455c9e215d5f898898 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Wed, 10 Feb 2016 19:39:32 -0600 Subject: [PATCH] [dev.ssa] cmd/compile: add a zero arg cse pass Add an initial cse pass that only operates on zero argument values. This removes the need for a special case in cse for removing OpSB and speeds up arithConst_ssa.go compilation by 9% while slowing "test -c net/http" by 1.5%. Change-Id: Id1500482485426f66c6c2eba75eeaf4f19c8a889 Reviewed-on: https://go-review.googlesource.com/19454 Run-TryBot: Todd Neal TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/compile.go | 3 +- src/cmd/compile/internal/ssa/cse.go | 36 ++------- src/cmd/compile/internal/ssa/cse_test.go | 50 ++++++++++++- src/cmd/compile/internal/ssa/zcse.go | 95 ++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 33 deletions(-) create mode 100644 src/cmd/compile/internal/ssa/zcse.go diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 69f751187d4..dfead98c658 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -102,8 +102,9 @@ var passes = [...]pass{ {"decompose user", decomposeUser, true}, {"decompose builtin", decomposeBuiltIn, true}, {"opt", opt, true}, // TODO: split required rules and optimizing rules + {"zero arg cse", zcse, true}, // required to merge OpSB values {"opt deadcode", deadcode, false}, // remove any blocks orphaned during opt - {"generic cse", cse, true}, + {"generic cse", cse, false}, {"nilcheckelim", nilcheckelim, false}, {"generic deadcode", deadcode, false}, {"fuse", fuse, false}, diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go index 36ab6a3680f..545e173928a 100644 --- a/src/cmd/compile/internal/ssa/cse.go +++ b/src/cmd/compile/internal/ssa/cse.go @@ -13,34 +13,6 @@ import ( // Values are just relinked, nothing is deleted. A subsequent deadcode // pass is required to actually remove duplicate expressions. func cse(f *Func) { - if !f.Config.optimize { - // Don't do CSE in this case. But we need to do - // just a little bit, to combine multiple OpSB ops. - // Regalloc gets very confused otherwise. - var sb *Value - outer: - for _, b := range f.Blocks { - for _, v := range b.Values { - if v.Op == OpSB { - sb = v - break outer - } - } - } - if sb == nil { - return - } - for _, b := range f.Blocks { - for _, v := range b.Values { - for i, a := range v.Args { - if a.Op == OpSB { - v.Args[i] = sb - } - } - } - } - return - } // Two values are equivalent if they satisfy the following definition: // equivalent(v, w): // v.op == w.op @@ -77,6 +49,14 @@ func cse(f *Func) { } } for i, e := range partition { + if Debug > 1 && len(e) > 500 { + fmt.Printf("CSE.large partition (%d): ", len(e)) + for j := 0; j < 3; j++ { + fmt.Printf("%s ", e[j].LongString()) + } + fmt.Println() + } + for _, v := range e { valueEqClass[v.ID] = ID(i) } diff --git a/src/cmd/compile/internal/ssa/cse_test.go b/src/cmd/compile/internal/ssa/cse_test.go index fb9fada120a..905939fc321 100644 --- a/src/cmd/compile/internal/ssa/cse_test.go +++ b/src/cmd/compile/internal/ssa/cse_test.go @@ -6,12 +6,16 @@ package ssa import "testing" +type tstAux struct { + s string +} + // This tests for a bug found when partitioning, but not sorting by the Aux value. func TestCSEAuxPartitionBug(t *testing.T) { c := testConfig(t) - arg1Aux := "arg1-aux" - arg2Aux := "arg2-aux" - arg3Aux := "arg3-aux" + arg1Aux := &tstAux{"arg1-aux"} + arg2Aux := &tstAux{"arg2-aux"} + arg3Aux := &tstAux{"arg3-aux"} // construct lots of values with args that have aux values and place // them in an order that triggers the bug @@ -77,5 +81,43 @@ func TestCSEAuxPartitionBug(t *testing.T) { if s1Cnt != 0 || s2Cnt != 0 { t.Errorf("%d values missed during cse", s1Cnt+s2Cnt) } - +} + +// TestZCSE tests the zero arg cse. +func TestZCSE(t *testing.T) { + c := testConfig(t) + + fun := Fun(c, "entry", + Bloc("entry", + Valu("start", OpInitMem, TypeMem, 0, nil), + Valu("sp", OpSP, TypeBytePtr, 0, nil), + Valu("sb1", OpSB, TypeBytePtr, 0, nil), + Valu("sb2", OpSB, TypeBytePtr, 0, nil), + Valu("addr1", OpAddr, TypeInt64Ptr, 0, nil, "sb1"), + Valu("addr2", OpAddr, TypeInt64Ptr, 0, nil, "sb2"), + Valu("a1ld", OpLoad, TypeInt64, 0, nil, "addr1", "start"), + Valu("a2ld", OpLoad, TypeInt64, 0, nil, "addr2", "start"), + Valu("c1", OpConst64, TypeInt64, 1, nil), + Valu("r1", OpAdd64, TypeInt64, 0, nil, "a1ld", "c1"), + Valu("c2", OpConst64, TypeInt64, 1, nil), + Valu("r2", OpAdd64, TypeInt64, 0, nil, "a2ld", "c2"), + Valu("r3", OpAdd64, TypeInt64, 0, nil, "r1", "r2"), + Valu("raddr", OpAddr, TypeInt64Ptr, 0, nil, "sp"), + Valu("raddrdef", OpVarDef, TypeMem, 0, nil, "start"), + Valu("rstore", OpStore, TypeMem, 8, nil, "raddr", "r3", "raddrdef"), + Goto("exit")), + Bloc("exit", + Exit("rstore"))) + + CheckFunc(fun.f) + zcse(fun.f) + deadcode(fun.f) + CheckFunc(fun.f) + + if fun.values["c1"].Op != OpInvalid && fun.values["c2"].Op != OpInvalid { + t.Errorf("zsce should have removed c1 or c2") + } + if fun.values["sb1"].Op != OpInvalid && fun.values["sb2"].Op != OpInvalid { + t.Errorf("zsce should have removed sb1 or sb2") + } } diff --git a/src/cmd/compile/internal/ssa/zcse.go b/src/cmd/compile/internal/ssa/zcse.go new file mode 100644 index 00000000000..3206e19974e --- /dev/null +++ b/src/cmd/compile/internal/ssa/zcse.go @@ -0,0 +1,95 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// zcse does an initial pass of common-subexpression elimination on the +// function for values with zero arguments to allow the more expensive cse +// to begin with a reduced number of values. Values are just relinked, +// nothing is deleted. A subsequent deadcode pass is required to actually +// remove duplicate expressions. +func zcse(f *Func) { + vals := make(map[vkey]*Value) + + for _, b := range f.Blocks { + for i := 0; i < len(b.Values); { + v := b.Values[i] + next := true + switch v.Op { + case OpSB, OpConst64, OpConst32, OpConst16, OpConst8, OpConst64F, + OpConst32F, OpConstBool, OpConstNil, OpConstSlice, OpConstInterface: + key := vkey{v.Op, keyFor(v), typeStr(v)} + if vals[key] == nil { + vals[key] = v + if b != f.Entry { + // Move v to the entry block so it will dominate every block + // where we might use it. This prevents the need for any dominator + // calculations in this pass. + v.Block = f.Entry + f.Entry.Values = append(f.Entry.Values, v) + last := len(b.Values) - 1 + b.Values[i] = b.Values[last] + b.Values[last] = nil + b.Values = b.Values[:last] + + // process b.Values[i] again + next = false + } + } + } + if next { + i++ + } + } + } + + for _, b := range f.Blocks { + for _, v := range b.Values { + for i, a := range v.Args { + // TODO: encode arglen in the opcode table, then do this switch with a table lookup? + switch a.Op { + case OpSB, OpConst64, OpConst32, OpConst16, OpConst8, OpConst64F, + OpConst32F, OpConstBool, OpConstNil, OpConstSlice, OpConstInterface: + key := vkey{a.Op, keyFor(a), typeStr(a)} + if rv, ok := vals[key]; ok { + v.Args[i] = rv + } + } + } + } + } +} + +// vkey is a type used to uniquely identify a zero arg value. +type vkey struct { + op Op + a int64 // aux + t string // type +} + +// typeStr returns a string version of the type of v. +func typeStr(v *Value) string { + if v.Type == nil { + return "" + } + return v.Type.String() +} + +// keyFor returns the AuxInt portion of a key structure uniquely identifying a +// zero arg value for the supported ops. +func keyFor(v *Value) int64 { + switch v.Op { + case OpConst64, OpConst64F, OpConst32F: + return v.AuxInt + case OpConst32: + return int64(int32(v.AuxInt)) + case OpConst16: + return int64(int16(v.AuxInt)) + case OpConst8, OpConstBool: + return int64(int8(v.AuxInt)) + default: + // Also matches OpSB, OpConstNil, OpConstSlice, OpConstInterface: + return 0 + } +}