From 295307ae78f8dd463a2ab8d85a1592ca76619d36 Mon Sep 17 00:00:00 2001 From: philhofer Date: Mon, 13 Mar 2017 15:03:17 -0700 Subject: [PATCH] cmd/compile: de-virtualize interface calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this change, code like h := sha1.New() h.Write(buf) sum := h.Sum() gets compiled into static calls rather than interface calls, because the compiler is able to prove that 'h' is really a *sha1.digest. The InterCall re-write rule hits a few dozen times during make.bash, and hundreds of times during all.bash. The most common pattern identified by the compiler is a constructor like func New() Interface { return &impl{...} } where the constructor gets inlined into the caller, and the result is used immediately. Examples include {sha1,md5,crc32,crc64,...}.New, base64.NewEncoder, base64.NewDecoder, errors.New, net.Pipe, and so on. Some existing benchmarks that change on darwin/amd64: Crc64/ISO4KB-8 2.67µs ± 1% 2.66µs ± 0% -0.36% (p=0.015 n=10+10) Crc64/ISO1KB-8 694ns ± 0% 690ns ± 1% -0.59% (p=0.001 n=10+10) Adler32KB-8 473ns ± 1% 471ns ± 0% -0.39% (p=0.010 n=10+9) On architectures like amd64, the reduction in code size appears to contribute more to benchmark improvements than just removing the indirect call, since that branch gets predicted accurately when called in a loop. Updates #19361 Change-Id: I57d4dc21ef40a05ec0fbd55a9bb0eb74cdc67a3d Reviewed-on: https://go-review.googlesource.com/38139 Run-TryBot: Philip Hofer TryBot-Result: Gobot Gobot Reviewed-by: David Chase --- src/cmd/compile/internal/gc/main.go | 4 + src/cmd/compile/internal/gc/reflect.go | 82 ++++++++++++++++++- src/cmd/compile/internal/gc/ssa.go | 4 + src/cmd/compile/internal/gc/subr.go | 10 ++- src/cmd/compile/internal/ssa/config.go | 6 ++ src/cmd/compile/internal/ssa/export_test.go | 31 +++---- .../compile/internal/ssa/gen/generic.rules | 7 ++ src/cmd/compile/internal/ssa/rewrite.go | 20 +++++ .../compile/internal/ssa/rewritegeneric.go | 48 +++++++++++ test/devirt.go | 39 +++++++++ 10 files changed, 234 insertions(+), 17 deletions(-) create mode 100644 test/devirt.go diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 49227ecaf66..b5478ebb7ce 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -483,7 +483,11 @@ func Main() { } } + // Just before compilation, compile itabs found on + // the right side of OCONVIFACE so that methods + // can be de-virtualized during compilation. Curfn = nil + peekitabs() // Phase 8: Compile top level functions. // Don't use range--walk can add functions to xtop. diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go index 57302b50afd..803bad6257c 100644 --- a/src/cmd/compile/internal/gc/reflect.go +++ b/src/cmd/compile/internal/gc/reflect.go @@ -16,6 +16,15 @@ import ( type itabEntry struct { t, itype *Type sym *Sym + + // symbol of the itab itself; + // filled in lazily after typecheck + lsym *obj.LSym + + // symbols of each method in + // the itab, sorted by byte offset; + // filled in at the same time as lsym + entries []*obj.LSym } type ptabEntry struct { @@ -415,7 +424,6 @@ func imethods(t *Type) []*Sig { // Generate the method body, so that compiled // code can refer to it. isym := methodsym(method, t, 0) - if !isym.Siggen() { isym.SetSiggen(true) genwrapper(t, f, isym, 0) @@ -1379,6 +1387,78 @@ ok: return s } +// for each itabEntry, gather the methods on +// the concrete type that implement the interface +func peekitabs() { + for i := range itabs { + tab := &itabs[i] + methods := genfun(tab.t, tab.itype) + if len(methods) == 0 { + continue + } + tab.lsym = Linksym(tab.sym) + tab.entries = methods + } +} + +// for the given concrete type and interface +// type, return the (sorted) set of methods +// on the concrete type that implement the interface +func genfun(t, it *Type) []*obj.LSym { + if t == nil || it == nil { + return nil + } + sigs := imethods(it) + methods := methods(t) + out := make([]*obj.LSym, 0, len(sigs)) + if len(sigs) == 0 { + return nil + } + + // both sigs and methods are sorted by name, + // so we can find the intersect in a single pass + for _, m := range methods { + if m.name == sigs[0].name { + out = append(out, Linksym(m.isym)) + sigs = sigs[1:] + if len(sigs) == 0 { + break + } + } + } + + return out +} + +// itabsym uses the information gathered in +// peekitabs to de-virtualize interface methods. +// Since this is called by the SSA backend, it shouldn't +// generate additional Nodes, Syms, etc. +func itabsym(it *obj.LSym, offset int64) *obj.LSym { + var syms []*obj.LSym + if it == nil { + return nil + } + + for i := range itabs { + e := &itabs[i] + if e.lsym == it { + syms = e.entries + break + } + } + if syms == nil { + return nil + } + + // keep this arithmetic in sync with *itab layout + methodnum := int((offset - 3*int64(Widthptr) - 8) / int64(Widthptr)) + if methodnum >= len(syms) { + return nil + } + return syms[methodnum] +} + func dumptypestructs() { // copy types from externdcl list to signatlist for _, n := range externdcl { diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 796972ac06b..450be95e066 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -4967,6 +4967,10 @@ func (e *ssaExport) SplitArray(name ssa.LocalSlot) ssa.LocalSlot { return ssa.LocalSlot{N: n, Type: et, Off: name.Off} } +func (e *ssaExport) DerefItab(it *obj.LSym, offset int64) *obj.LSym { + return itabsym(it, offset) +} + // namedAuto returns a new AUTO variable with the given name and type. // These are exposed to the debugger. func (e *ssaExport) namedAuto(name string, typ ssa.Type) ssa.GCNode { diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go index f8a7f26420a..6001b83e294 100644 --- a/src/cmd/compile/internal/gc/subr.go +++ b/src/cmd/compile/internal/gc/subr.go @@ -1679,7 +1679,6 @@ func structargs(tl *Type, mustname bool) []*Node { // rcvr - U // method - M func (t T)(), a TFIELD type struct // newnam - the eventual mangled name of this function - func genwrapper(rcvr *Type, method *Field, newnam *Sym, iface int) { if false && Debug['r'] != 0 { fmt.Printf("genwrapper rcvrtype=%v method=%v newnam=%v\n", rcvr, method, newnam) @@ -1715,6 +1714,7 @@ func genwrapper(rcvr *Type, method *Field, newnam *Sym, iface int) { fn.Func.Nname = newname(newnam) fn.Func.Nname.Name.Defn = fn fn.Func.Nname.Name.Param.Ntype = t + fn.Func.Nname.Sym.SetExported(true) // prevent export; see closure.go declare(fn.Func.Nname, PFUNC) funchdr(fn) @@ -1918,6 +1918,14 @@ func implements(t, iface *Type, m, samename **Field, ptr *int) bool { } } + // We're going to emit an OCONVIFACE. + // Call itabname so that (t, iface) + // gets added to itabs early, which allows + // us to de-virtualize calls through this + // type/interface pair later. See peekitabs in reflect.go + if isdirectiface(t0) && !iface.IsEmptyInterface() { + itabname(t0, iface) + } return true } diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index ea03ed7f10b..52692847b96 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -121,6 +121,12 @@ type Frontend interface { SplitArray(LocalSlot) LocalSlot // array must be length 1 SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo) + // DerefItab dereferences an itab function + // entry, given the symbol of the itab and + // the byte offset of the function pointer. + // It may return nil. + DerefItab(sym *obj.LSym, offset int64) *obj.LSym + // Line returns a string describing the given position. Line(src.XPos) string diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go index 74bb08d5c29..b687076a285 100644 --- a/src/cmd/compile/internal/ssa/export_test.go +++ b/src/cmd/compile/internal/ssa/export_test.go @@ -97,21 +97,22 @@ func (d DummyFrontend) Warnl(_ src.XPos, msg string, args ...interface{}) { d.t func (d DummyFrontend) Debug_checknil() bool { return false } func (d DummyFrontend) Debug_wb() bool { return false } -func (d DummyFrontend) TypeBool() Type { return TypeBool } -func (d DummyFrontend) TypeInt8() Type { return TypeInt8 } -func (d DummyFrontend) TypeInt16() Type { return TypeInt16 } -func (d DummyFrontend) TypeInt32() Type { return TypeInt32 } -func (d DummyFrontend) TypeInt64() Type { return TypeInt64 } -func (d DummyFrontend) TypeUInt8() Type { return TypeUInt8 } -func (d DummyFrontend) TypeUInt16() Type { return TypeUInt16 } -func (d DummyFrontend) TypeUInt32() Type { return TypeUInt32 } -func (d DummyFrontend) TypeUInt64() Type { return TypeUInt64 } -func (d DummyFrontend) TypeFloat32() Type { return TypeFloat32 } -func (d DummyFrontend) TypeFloat64() Type { return TypeFloat64 } -func (d DummyFrontend) TypeInt() Type { return TypeInt64 } -func (d DummyFrontend) TypeUintptr() Type { return TypeUInt64 } -func (d DummyFrontend) TypeString() Type { panic("unimplemented") } -func (d DummyFrontend) TypeBytePtr() Type { return TypeBytePtr } +func (d DummyFrontend) TypeBool() Type { return TypeBool } +func (d DummyFrontend) TypeInt8() Type { return TypeInt8 } +func (d DummyFrontend) TypeInt16() Type { return TypeInt16 } +func (d DummyFrontend) TypeInt32() Type { return TypeInt32 } +func (d DummyFrontend) TypeInt64() Type { return TypeInt64 } +func (d DummyFrontend) TypeUInt8() Type { return TypeUInt8 } +func (d DummyFrontend) TypeUInt16() Type { return TypeUInt16 } +func (d DummyFrontend) TypeUInt32() Type { return TypeUInt32 } +func (d DummyFrontend) TypeUInt64() Type { return TypeUInt64 } +func (d DummyFrontend) TypeFloat32() Type { return TypeFloat32 } +func (d DummyFrontend) TypeFloat64() Type { return TypeFloat64 } +func (d DummyFrontend) TypeInt() Type { return TypeInt64 } +func (d DummyFrontend) TypeUintptr() Type { return TypeUInt64 } +func (d DummyFrontend) TypeString() Type { panic("unimplemented") } +func (d DummyFrontend) TypeBytePtr() Type { return TypeBytePtr } +func (d DummyFrontend) DerefItab(sym *obj.LSym, off int64) *obj.LSym { return nil } func (d DummyFrontend) CanSSA(t Type) bool { // There are no un-SSAable types in dummy land. diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 53f0490c4c3..73341a52d7e 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1431,3 +1431,10 @@ && c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value && warnRule(config.Debug_checknil() && v.Pos.Line() > 1, v, "removed nil check") -> (Invalid) + +// De-virtualize interface calls into static calls. +// Note that (ITab (IMake)) doesn't get +// rewritten until after the first opt pass, +// so this rule should trigger reliably. +(InterCall [argsize] (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem) && devirt(v, itab, off) != nil -> + (StaticCall [argsize] {devirt(v, itab, off)} mem) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9c9c6b5ecc7..26c6518eeb4 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -5,6 +5,7 @@ package ssa import ( + "cmd/internal/obj" "crypto/sha1" "fmt" "math" @@ -384,6 +385,25 @@ func uaddOvf(a, b int64) bool { return uint64(a)+uint64(b) < uint64(a) } +// de-virtualize an InterCall +// 'sym' is the symbol for the itab +func devirt(v *Value, sym interface{}, offset int64) *obj.LSym { + f := v.Block.Func + ext, ok := sym.(*ExternSymbol) + if !ok { + return nil + } + lsym := f.Config.Frontend().DerefItab(ext.Sym, offset) + if f.pass.debug > 0 { + if lsym != nil { + f.Config.Warnl(v.Pos, "de-virtualizing call") + } else { + f.Config.Warnl(v.Pos, "couldn't de-virtualize call") + } + } + return lsym +} + // isSamePtr reports whether p1 and p2 point to the same address. func isSamePtr(p1, p2 *Value) bool { if p1 == p2 { diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 10a4a4383c4..b998266f2b5 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -124,6 +124,8 @@ func rewriteValuegeneric(v *Value, config *Config) bool { return rewriteValuegeneric_OpGreater8U(v, config) case OpIMake: return rewriteValuegeneric_OpIMake(v, config) + case OpInterCall: + return rewriteValuegeneric_OpInterCall(v, config) case OpIsInBounds: return rewriteValuegeneric_OpIsInBounds(v, config) case OpIsNonNil: @@ -5736,6 +5738,52 @@ func rewriteValuegeneric_OpIMake(v *Value, config *Config) bool { } return false } +func rewriteValuegeneric_OpInterCall(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (InterCall [argsize] (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem) + // cond: devirt(v, itab, off) != nil + // result: (StaticCall [argsize] {devirt(v, itab, off)} mem) + for { + argsize := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpLoad { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpOffPtr { + break + } + off := v_0_0.AuxInt + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpITab { + break + } + v_0_0_0_0 := v_0_0_0.Args[0] + if v_0_0_0_0.Op != OpIMake { + break + } + v_0_0_0_0_0 := v_0_0_0_0.Args[0] + if v_0_0_0_0_0.Op != OpAddr { + break + } + itab := v_0_0_0_0_0.Aux + v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0] + if v_0_0_0_0_0_0.Op != OpSB { + break + } + mem := v.Args[1] + if !(devirt(v, itab, off) != nil) { + break + } + v.reset(OpStaticCall) + v.AuxInt = argsize + v.Aux = devirt(v, itab, off) + v.AddArg(mem) + return true + } + return false +} func rewriteValuegeneric_OpIsInBounds(v *Value, config *Config) bool { b := v.Block _ = b diff --git a/test/devirt.go b/test/devirt.go new file mode 100644 index 00000000000..23577098e49 --- /dev/null +++ b/test/devirt.go @@ -0,0 +1,39 @@ +// errorcheck -0 -d=ssa/opt/debug=3 + +package main + +// Trivial interface call devirtualization test. + +type real struct { + value int +} + +func (r *real) Value() int { return r.value } + +type Valuer interface { + Value() int +} + +type indirectiface struct { + a, b, c int +} + +func (i indirectiface) Value() int { + return i.a + i.b + i.c +} + +func main() { + var r Valuer + rptr := &real{value: 3} + r = rptr + + if r.Value() != 3 { // ERROR "de-virtualizing call$" + panic("not 3") + } + + // Can't do types that aren't "direct" interfaces (yet). + r = indirectiface{3, 4, 5} + if r.Value() != 12 { + panic("not 12") + } +}