From af18bce87cc7ee1ffc68f91abefa241ab209539e Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Sun, 20 Sep 2020 23:29:20 -0400 Subject: [PATCH] cmd/link: consider interface conversions only in reachable code The linker prunes methods that are not directly reachable if the receiver type is never converted to interface. Currently, this "never" is too strong: it is invalidated even if the interface conversion is in an unreachable function. This CL improves it by only considering interface conversions in reachable code. To do that, we introduce a marker relocation R_USEIFACE, which marks the target symbol as UsedInIface if the source symbol is reached. binary size before after cmd/compile 18897528 18887400 cmd/go 13607372 13470652 Change-Id: I66c6b69eeff9ae02d84d2e6f2bc7f1b29dd53910 Reviewed-on: https://go-review.googlesource.com/c/go/+/256797 Trust: Cherry Zhang Reviewed-by: Jeremy Faller Reviewed-by: Than McIntosh --- src/cmd/compile/internal/gc/pgen.go | 8 ++- src/cmd/compile/internal/gc/sinit.go | 2 +- src/cmd/compile/internal/gc/walk.go | 15 +++-- src/cmd/internal/obj/s390x/asmz.go | 6 +- src/cmd/internal/obj/wasm/wasmobj.go | 2 + src/cmd/internal/obj/x86/asm6.go | 7 +- src/cmd/internal/objabi/reloctype.go | 5 ++ src/cmd/internal/objabi/reloctype_string.go | 66 ++++++++++++++++++- src/cmd/link/internal/ld/data.go | 6 ++ src/cmd/link/internal/ld/deadcode.go | 14 ++++ .../ld/testdata/deadcode/ifacemethod.go | 9 ++- src/cmd/link/internal/loader/loader.go | 1 + src/cmd/link/internal/wasm/asm.go | 3 + 13 files changed, 129 insertions(+), 15 deletions(-) diff --git a/src/cmd/compile/internal/gc/pgen.go b/src/cmd/compile/internal/gc/pgen.go index 74262595b05..52b1ed351d6 100644 --- a/src/cmd/compile/internal/gc/pgen.go +++ b/src/cmd/compile/internal/gc/pgen.go @@ -231,6 +231,11 @@ func compile(fn *Node) { return } + // Set up the function's LSym early to avoid data races with the assemblers. + // Do this before walk, as walk needs the LSym to set attributes/relocations + // (e.g. in markTypeUsedInInterface). + fn.Func.initLSym(true) + walk(fn) if nerrors != 0 { return @@ -250,9 +255,6 @@ func compile(fn *Node) { return } - // Set up the function's LSym early to avoid data races with the assemblers. - fn.Func.initLSym(true) - // Make sure type syms are declared for all types that might // be types of stack objects. We need to do this here // because symbols must be allocated before the parallel diff --git a/src/cmd/compile/internal/gc/sinit.go b/src/cmd/compile/internal/gc/sinit.go index 71ed5584612..af19a96bbc4 100644 --- a/src/cmd/compile/internal/gc/sinit.go +++ b/src/cmd/compile/internal/gc/sinit.go @@ -278,7 +278,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool { return Isconst(val, CTNIL) } - markTypeUsedInInterface(val.Type) + markTypeUsedInInterface(val.Type, l.Sym.Linksym()) var itab *Node if l.Type.IsEmptyInterface() { diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index 933f16d9a01..d238cc2f45f 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -805,8 +805,8 @@ opswitch: fromType := n.Left.Type toType := n.Type - if !fromType.IsInterface() { - markTypeUsedInInterface(fromType) + if !fromType.IsInterface() && !Curfn.Func.Nname.isBlank() { // skip unnamed functions (func _()) + markTypeUsedInInterface(fromType, Curfn.Func.lsym) } // typeword generates the type word of the interface value. @@ -1621,8 +1621,13 @@ opswitch: // markTypeUsedInInterface marks that type t is converted to an interface. // This information is used in the linker in dead method elimination. -func markTypeUsedInInterface(t *types.Type) { - typenamesym(t).Linksym().Set(obj.AttrUsedInIface, true) +func markTypeUsedInInterface(t *types.Type, from *obj.LSym) { + tsym := typenamesym(t).Linksym() + // Emit a marker relocation. The linker will know the type is converted + // to an interface if "from" is reachable. + r := obj.Addrel(from) + r.Sym = tsym + r.Type = objabi.R_USEIFACE } // rtconvfn returns the parameter and result types that will be used by a @@ -3687,6 +3692,8 @@ func usemethod(n *Node) { // Also need to check for reflect package itself (see Issue #38515). if s := res0.Type.Sym; s != nil && s.Name == "Method" && isReflectPkg(s.Pkg) { Curfn.Func.SetReflectMethod(true) + // The LSym is initialized at this point. We need to set the attribute on the LSym. + Curfn.Func.lsym.Set(obj.AttrReflectMethod, true) } } diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go index 68f01f1c5d8..cb3a2c31965 100644 --- a/src/cmd/internal/obj/s390x/asmz.go +++ b/src/cmd/internal/obj/s390x/asmz.go @@ -461,6 +461,7 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { buffer := make([]byte, 0) changed := true loop := 0 + nrelocs0 := len(c.cursym.R) for changed { if loop > 100 { c.ctxt.Diag("stuck in spanz loop") @@ -468,7 +469,10 @@ func spanz(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } changed = false buffer = buffer[:0] - c.cursym.R = make([]obj.Reloc, 0) + for i := range c.cursym.R[nrelocs0:] { + c.cursym.R[nrelocs0+i] = obj.Reloc{} + } + c.cursym.R = c.cursym.R[:nrelocs0] // preserve marker relocations generated by the compiler for p := c.cursym.Func.Text; p != nil; p = p.Link { pc := int64(len(buffer)) if pc != p.Pc { diff --git a/src/cmd/internal/obj/wasm/wasmobj.go b/src/cmd/internal/obj/wasm/wasmobj.go index 70e8e51e655..a9e093a8add 100644 --- a/src/cmd/internal/obj/wasm/wasmobj.go +++ b/src/cmd/internal/obj/wasm/wasmobj.go @@ -1007,6 +1007,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { panic("bad name for Call") } r := obj.Addrel(s) + r.Siz = 1 // actually variable sized r.Off = int32(w.Len()) r.Type = objabi.R_CALL if p.Mark&WasmImport != 0 { @@ -1033,6 +1034,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { case AI32Const, AI64Const: if p.From.Name == obj.NAME_EXTERN { r := obj.Addrel(s) + r.Siz = 1 // actually variable sized r.Off = int32(w.Len()) r.Type = objabi.R_ADDR r.Sym = p.From.Sym diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index fb99c620ad6..4940c79eaab 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -2100,14 +2100,15 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) { var c int32 errors := ctxt.Errors var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies) + nrelocs0 := len(s.R) for { // This loop continues while there are reasons to re-assemble // whole block, like the presence of long forward jumps. reAssemble := false - for i := range s.R { - s.R[i] = obj.Reloc{} + for i := range s.R[nrelocs0:] { + s.R[nrelocs0+i] = obj.Reloc{} } - s.R = s.R[:0] + s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler s.P = s.P[:0] c = 0 var pPrev *obj.Prog diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go index f029a3c396d..1e328d659f6 100644 --- a/src/cmd/internal/objabi/reloctype.go +++ b/src/cmd/internal/objabi/reloctype.go @@ -89,6 +89,11 @@ const ( // should be linked into the final binary, even if there are no other // direct references. (This is used for types reachable by reflection.) R_USETYPE + // R_USEIFACE marks a type is converted to an interface in the function this + // relocation is applied to. The target is a type descriptor. + // This is a marker relocation (0-sized), for the linker's reachabililty + // analysis. + R_USEIFACE // R_METHODOFF resolves to a 32-bit offset from the beginning of the section // holding the data being relocated to the referenced symbol. // It is a variant of R_ADDROFF used when linking from the uncommonType of a diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go index 83dfe71e071..caf24eea58d 100644 --- a/src/cmd/internal/objabi/reloctype_string.go +++ b/src/cmd/internal/objabi/reloctype_string.go @@ -4,9 +4,71 @@ package objabi import "strconv" -const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_WEAKADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CALLRISCVR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[R_ADDR-1] + _ = x[R_ADDRPOWER-2] + _ = x[R_ADDRARM64-3] + _ = x[R_ADDRMIPS-4] + _ = x[R_ADDROFF-5] + _ = x[R_WEAKADDROFF-6] + _ = x[R_SIZE-7] + _ = x[R_CALL-8] + _ = x[R_CALLARM-9] + _ = x[R_CALLARM64-10] + _ = x[R_CALLIND-11] + _ = x[R_CALLPOWER-12] + _ = x[R_CALLMIPS-13] + _ = x[R_CALLRISCV-14] + _ = x[R_CONST-15] + _ = x[R_PCREL-16] + _ = x[R_TLS_LE-17] + _ = x[R_TLS_IE-18] + _ = x[R_GOTOFF-19] + _ = x[R_PLT0-20] + _ = x[R_PLT1-21] + _ = x[R_PLT2-22] + _ = x[R_USEFIELD-23] + _ = x[R_USETYPE-24] + _ = x[R_USEIFACE-25] + _ = x[R_METHODOFF-26] + _ = x[R_POWER_TOC-27] + _ = x[R_GOTPCREL-28] + _ = x[R_JMPMIPS-29] + _ = x[R_DWARFSECREF-30] + _ = x[R_DWARFFILEREF-31] + _ = x[R_ARM64_TLS_LE-32] + _ = x[R_ARM64_TLS_IE-33] + _ = x[R_ARM64_GOTPCREL-34] + _ = x[R_ARM64_GOT-35] + _ = x[R_ARM64_PCREL-36] + _ = x[R_ARM64_LDST8-37] + _ = x[R_ARM64_LDST32-38] + _ = x[R_ARM64_LDST64-39] + _ = x[R_ARM64_LDST128-40] + _ = x[R_POWER_TLS_LE-41] + _ = x[R_POWER_TLS_IE-42] + _ = x[R_POWER_TLS-43] + _ = x[R_ADDRPOWER_DS-44] + _ = x[R_ADDRPOWER_GOT-45] + _ = x[R_ADDRPOWER_PCREL-46] + _ = x[R_ADDRPOWER_TOCREL-47] + _ = x[R_ADDRPOWER_TOCREL_DS-48] + _ = x[R_RISCV_PCREL_ITYPE-49] + _ = x[R_RISCV_PCREL_STYPE-50] + _ = x[R_PCRELDBL-51] + _ = x[R_ADDRMIPSU-52] + _ = x[R_ADDRMIPSTLS-53] + _ = x[R_ADDRCUOFF-54] + _ = x[R_WASMIMPORT-55] + _ = x[R_XCOFFREF-56] +} -var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 60, 66, 72, 81, 92, 101, 112, 122, 133, 140, 147, 155, 163, 171, 177, 183, 189, 199, 208, 219, 230, 240, 249, 262, 276, 290, 304, 320, 331, 344, 357, 371, 385, 400, 414, 428, 439, 453, 468, 485, 503, 524, 543, 562, 572, 583, 596, 607, 619, 629} +const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_WEAKADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CALLRISCVR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_LDST8R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" + +var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 60, 66, 72, 81, 92, 101, 112, 122, 133, 140, 147, 155, 163, 171, 177, 183, 189, 199, 208, 218, 229, 240, 250, 259, 272, 286, 300, 314, 330, 341, 354, 367, 381, 395, 410, 424, 438, 449, 463, 478, 495, 513, 534, 553, 572, 582, 593, 606, 617, 629, 639} func (i RelocType) String() string { i -= 1 diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index a730125cf2c..0a3418bfc9b 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -698,6 +698,9 @@ func windynrelocsym(ctxt *Link, rel *loader.SymbolBuilder, s loader.Sym) { relocs := ctxt.loader.Relocs(s) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) + if r.IsMarker() { + continue // skip marker relocations + } targ := r.Sym() if targ == 0 { continue @@ -775,6 +778,9 @@ func dynrelocsym(ctxt *Link, s loader.Sym) { relocs := ldr.Relocs(s) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) + if r.IsMarker() { + continue // skip marker relocations + } if ctxt.BuildMode == BuildModePIE && ctxt.LinkMode == LinkInternal { // It's expected that some relocations will be done // later by relocsym (R_TLS_LE, R_ADDROFF), so diff --git a/src/cmd/link/internal/ld/deadcode.go b/src/cmd/link/internal/ld/deadcode.go index 7f14aa3d278..816a23b9a71 100644 --- a/src/cmd/link/internal/ld/deadcode.go +++ b/src/cmd/link/internal/ld/deadcode.go @@ -153,6 +153,20 @@ func (d *deadcodePass) flood() { // do nothing for now as we still load all type symbols. continue } + if t == objabi.R_USEIFACE { + // R_USEIFACE is a marker relocation that tells the linker the type is + // converted to an interface, i.e. should have UsedInIface set. See the + // comment below for why we need to unset the Reachable bit and re-mark it. + rs := r.Sym() + if !d.ldr.AttrUsedInIface(rs) { + d.ldr.SetAttrUsedInIface(rs, true) + if d.ldr.AttrReachable(rs) { + d.ldr.SetAttrReachable(rs, false) + d.mark(rs, symIdx) + } + } + continue + } rs := r.Sym() if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) { // If a type is converted to an interface, it is possible to obtain an diff --git a/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go b/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go index b62f18c342e..32a24cf6f00 100644 --- a/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go +++ b/src/cmd/link/internal/ld/testdata/deadcode/ifacemethod.go @@ -18,6 +18,13 @@ var p *T var e interface{} func main() { - p = new(T) // used T, but never converted to interface + p = new(T) // used T, but never converted to interface in any reachable code e.(I).M() // used I and I.M } + +func Unused() { // convert T to interface, but this function is not reachable + var i I = T(0) + i.M() +} + +var Unused2 interface{} = T(1) // convert T to interface, in an unreachable global initializer diff --git a/src/cmd/link/internal/loader/loader.go b/src/cmd/link/internal/loader/loader.go index 43a0352e0bb..ea99233f67c 100644 --- a/src/cmd/link/internal/loader/loader.go +++ b/src/cmd/link/internal/loader/loader.go @@ -63,6 +63,7 @@ type Reloc struct { func (rel Reloc) Type() objabi.RelocType { return objabi.RelocType(rel.Reloc.Type()) + rel.typ } func (rel Reloc) Sym() Sym { return rel.l.resolve(rel.r, rel.Reloc.Sym()) } func (rel Reloc) SetSym(s Sym) { rel.Reloc.SetSym(goobj.SymRef{PkgIdx: 0, SymIdx: uint32(s)}) } +func (rel Reloc) IsMarker() bool { return rel.Siz() == 0 } func (rel Reloc) SetType(t objabi.RelocType) { if t != objabi.RelocType(uint8(t)) { diff --git a/src/cmd/link/internal/wasm/asm.go b/src/cmd/link/internal/wasm/asm.go index 3bd56a6e3ad..31851fbb567 100644 --- a/src/cmd/link/internal/wasm/asm.go +++ b/src/cmd/link/internal/wasm/asm.go @@ -167,6 +167,9 @@ func asmb2(ctxt *ld.Link, ldr *loader.Loader) { off := int32(0) for ri := 0; ri < relocs.Count(); ri++ { r := relocs.At(ri) + if r.Siz() == 0 { + continue // skip marker relocations + } wfn.Write(P[off:r.Off()]) off = r.Off() rs := ldr.ResolveABIAlias(r.Sym())