diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 9bd3655e52..407b143809 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -91,11 +91,11 @@ func buildssa(fn *Node) (ssafn *ssa.Func, usessa bool) { n := d.N switch n.Class { case PPARAM, PPARAMOUT: - aux := &ssa.ArgSymbol{Typ: n.Type, Offset: n.Xoffset, Sym: n.Sym} + aux := &ssa.ArgSymbol{Typ: n.Type, Node: n} s.decladdrs[n] = s.entryNewValue1A(ssa.OpAddr, Ptrto(n.Type), aux, s.sp) case PAUTO: - aux := &ssa.AutoSymbol{Typ: n.Type, Offset: -1, Sym: n.Sym} // offset TBD by SSA pass - s.decladdrs[n] = s.entryNewValue1A(ssa.OpAddr, Ptrto(n.Type), aux, s.sp) + // processed at each use, to prevent Addr coming + // before the decl. default: str := "" if n.Class&PHEAP != 0 { @@ -105,7 +105,7 @@ func buildssa(fn *Node) (ssafn *ssa.Func, usessa bool) { } } // nodfp is a special argument which is the function's FP. - aux := &ssa.ArgSymbol{Typ: Types[TUINTPTR], Offset: 0, Sym: nodfp.Sym} + aux := &ssa.ArgSymbol{Typ: Types[TUINTPTR], Node: nodfp} s.decladdrs[nodfp] = s.entryNewValue1A(ssa.OpAddr, Types[TUINTPTR], aux, s.sp) // Convert the AST-based IR to the SSA-based IR @@ -200,7 +200,7 @@ type state struct { // all defined variables at the end of each block. Indexed by block ID. defvars []map[*Node]*ssa.Value - // addresses of PPARAM, PPARAMOUT, and PAUTO variables. + // addresses of PPARAM and PPARAMOUT variables. decladdrs map[*Node]*ssa.Value // starting values. Memory, frame pointer, and stack pointer @@ -721,8 +721,11 @@ func (s *state) stmt(n *Node) { s.startBlock(bEnd) case OVARKILL: - // TODO(khr): ??? anything to do here? Only for addrtaken variables? - // Maybe just link it in the store chain? + // Insert a varkill op to record that a variable is no longer live. + // We only care about liveness info at call sites, so putting the + // varkill in the store chain is enough to keep it correctly ordered + // with respect to call ops. + s.vars[&memvar] = s.newValue1A(ssa.OpVarKill, ssa.TypeMem, n.Left, s.mem()) default: s.Unimplementedf("unhandled stmt %s", opnames[n.Op]) } @@ -1175,9 +1178,9 @@ func (s *state) expr(n *Node) *ssa.Value { return s.entryNewValue0A(ssa.OpConstString, n.Type, n.Val().U) case CTBOOL: if n.Val().U.(bool) { - return s.entryNewValue0I(ssa.OpConstBool, n.Type, 1) // 1 = true + return s.entryNewValue0I(ssa.OpConstBool, Types[TBOOL], 1) // 1 = true } else { - return s.entryNewValue0I(ssa.OpConstBool, n.Type, 0) // 0 = false + return s.entryNewValue0I(ssa.OpConstBool, Types[TBOOL], 0) // 0 = false } case CTNIL: t := n.Type @@ -1798,6 +1801,9 @@ func (s *state) assign(op uint8, left *Node, right *Node) { if !canSSA(left) { // if we can't ssa this memory, treat it as just zeroing out the backing memory addr := s.addr(left) + if left.Op == ONAME { + s.vars[&memvar] = s.newValue1A(ssa.OpVarDef, ssa.TypeMem, left, s.mem()) + } s.vars[&memvar] = s.newValue2I(ssa.OpZero, ssa.TypeMem, t.Size(), addr, s.mem()) return } @@ -1812,6 +1818,9 @@ func (s *state) assign(op uint8, left *Node, right *Node) { } // not ssa-able. Treat as a store. addr := s.addr(left) + if left.Op == ONAME { + s.vars[&memvar] = s.newValue1A(ssa.OpVarDef, ssa.TypeMem, left, s.mem()) + } s.vars[&memvar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, t.Size(), addr, val, s.mem()) } @@ -1857,7 +1866,7 @@ func (s *state) zeroVal(t *Type) *ssa.Value { case t.IsPtr(): return s.entryNewValue0(ssa.OpConstNil, t) case t.IsBoolean(): - return s.entryNewValue0I(ssa.OpConstBool, t, 0) // 0 = false + return s.entryNewValue0I(ssa.OpConstBool, Types[TBOOL], 0) // 0 = false case t.IsInterface(): return s.entryNewValue0(ssa.OpConstInterface, t) case t.IsSlice(): @@ -1894,7 +1903,7 @@ func (s *state) addr(n *Node) *ssa.Value { v = s.entryNewValue1I(ssa.OpOffPtr, v.Type, n.Xoffset, v) } return v - case PPARAM, PPARAMOUT, PAUTO: + case PPARAM, PPARAMOUT: // parameter/result slot or local variable v := s.decladdrs[n] if v == nil { @@ -1904,6 +1913,17 @@ func (s *state) addr(n *Node) *ssa.Value { s.Fatalf("addr of undeclared ONAME %v. declared: %v", n, s.decladdrs) } return v + case PAUTO: + // We need to regenerate the address of autos + // at every use. This prevents LEA instructions + // from occurring before the corresponding VarDef + // op and confusing the liveness analysis into thinking + // the variable is live at function entry. + // TODO: I'm not sure if this really works or we're just + // getting lucky. We might need a real dependency edge + // between vardef and addr ops. + aux := &ssa.AutoSymbol{Typ: n.Type, Node: n} + return s.newValue1A(ssa.OpAddr, Ptrto(n.Type), aux, s.sp) case PAUTO | PHEAP, PPARAMREF: return s.expr(n.Name.Heapaddr) default: @@ -2477,23 +2497,12 @@ type branch struct { // genssa appends entries to ptxt for each instruction in f. // gcargs and gclocals are filled in with pointer maps for the frame. func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { - // TODO: line numbers - - if f.FrameSize > 1<<31 { - Yyerror("stack frame too large (>2GB)") - return - } - e := f.Config.Frontend().(*ssaExport) // We're about to emit a bunch of Progs. // Since the only way to get here is to explicitly request it, // just fail on unimplemented instead of trying to unwind our mess. e.mustImplement = true - ptxt.To.Type = obj.TYPE_TEXTSIZE - ptxt.To.Val = int32(Rnd(Curfn.Type.Argwid, int64(Widthptr))) // arg size - ptxt.To.Offset = f.FrameSize - 8 // TODO: arch-dependent - // Remember where each block starts. bstart := make([]*obj.Prog, f.NumBlocks()) @@ -2592,18 +2601,22 @@ func genssa(f *ssa.Func, ptxt *obj.Prog, gcargs, gclocals *Sym) { } } - // TODO: liveness - // TODO: gcargs - // TODO: gclocals + // Allocate stack frame + allocauto(ptxt) - // TODO: dump frame if -f + // Generate gc bitmaps. + liveness(Curfn, ptxt, gcargs, gclocals) + gcsymdup(gcargs) + gcsymdup(gclocals) - // Emit garbage collection symbols. TODO: put something in them - //liveness(Curfn, ptxt, gcargs, gclocals) - duint32(gcargs, 0, 0) - ggloblsym(gcargs, 4, obj.RODATA|obj.DUPOK) - duint32(gclocals, 0, 0) - ggloblsym(gclocals, 4, obj.RODATA|obj.DUPOK) + // Add frame prologue. Zero ambiguously live variables. + Thearch.Defframe(ptxt) + if Debug['f'] != 0 { + frame(0) + } + + // Remove leftover instrumentation from the instruction stream. + removevardef(ptxt) f.Config.HTML.Close() } @@ -3056,9 +3069,11 @@ func genValue(v *ssa.Value) { return } p := Prog(movSizeByType(v.Type)) + n := autoVar(v.Args[0]) p.From.Type = obj.TYPE_MEM - p.From.Reg = x86.REG_SP - p.From.Offset = localOffset(v.Args[0]) + p.From.Name = obj.NAME_AUTO + p.From.Node = n + p.From.Sym = Linksym(n.Sym) p.To.Type = obj.TYPE_REG p.To.Reg = regnum(v) @@ -3070,9 +3085,11 @@ func genValue(v *ssa.Value) { p := Prog(movSizeByType(v.Type)) p.From.Type = obj.TYPE_REG p.From.Reg = regnum(v.Args[0]) + n := autoVar(v) p.To.Type = obj.TYPE_MEM - p.To.Reg = x86.REG_SP - p.To.Offset = localOffset(v) + p.To.Name = obj.NAME_AUTO + p.To.Node = n + p.To.Sym = Linksym(n.Sym) case ssa.OpPhi: // just check to make sure regalloc and stackalloc did it right if v.Type.IsMemory() { @@ -3106,19 +3123,19 @@ func genValue(v *ssa.Value) { q.From.Reg = x86.REG_AX q.To.Type = obj.TYPE_MEM q.To.Reg = r - // TODO: need AUNDEF here? + Prog(obj.AUNDEF) // tell plive.go that we never reach here case ssa.OpAMD64LoweredPanicIndexCheck: p := Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = Linksym(Panicindex.Sym) - // TODO: need AUNDEF here? + Prog(obj.AUNDEF) case ssa.OpAMD64LoweredPanicSliceCheck: p := Prog(obj.ACALL) p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = Linksym(panicslice.Sym) - // TODO: need AUNDEF here? + Prog(obj.AUNDEF) case ssa.OpAMD64LoweredGetG: r := regnum(v) // See the comments in cmd/internal/obj/x86/obj6.go @@ -3151,10 +3168,16 @@ func genValue(v *ssa.Value) { p.To.Type = obj.TYPE_MEM p.To.Name = obj.NAME_EXTERN p.To.Sym = Linksym(v.Aux.(*Sym)) + if Maxarg < v.AuxInt { + Maxarg = v.AuxInt + } case ssa.OpAMD64CALLclosure: p := Prog(obj.ACALL) p.To.Type = obj.TYPE_REG p.To.Reg = regnum(v.Args[0]) + if Maxarg < v.AuxInt { + Maxarg = v.AuxInt + } case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB, ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB: x := regnum(v.Args[0]) @@ -3215,6 +3238,10 @@ func genValue(v *ssa.Value) { case ssa.OpAMD64REPMOVSB: Prog(x86.AREP) Prog(x86.AMOVSB) + case ssa.OpVarDef: + Gvardef(v.Aux.(*Node)) + case ssa.OpVarKill: + gvarkill(v.Aux.(*Node)) default: v.Unimplementedf("genValue not implemented: %s", v.LongString()) } @@ -3414,12 +3441,16 @@ func addAux(a *obj.Addr, v *ssa.Value) { a.Name = obj.NAME_EXTERN a.Sym = Linksym(sym.Sym.(*Sym)) case *ssa.ArgSymbol: - a.Offset += v.Block.Func.FrameSize + sym.Offset + n := sym.Node.(*Node) + a.Name = obj.NAME_PARAM + a.Node = n + a.Sym = Linksym(n.Orig.Sym) + a.Offset += n.Xoffset // TODO: why do I have to add this here? I don't for auto variables. case *ssa.AutoSymbol: - if sym.Offset == -1 { - v.Fatalf("auto symbol %s offset not calculated", sym.Sym) - } - a.Offset += sym.Offset + n := sym.Node.(*Node) + a.Name = obj.NAME_AUTO + a.Node = n + a.Sym = Linksym(n.Sym) default: v.Fatalf("aux in %s not implemented %#v", v, v.Aux) } @@ -3571,18 +3602,9 @@ func regnum(v *ssa.Value) int16 { return ssaRegToReg[reg.(*ssa.Register).Num] } -// localOffset returns the offset below the frame pointer where -// a stack-allocated local has been allocated. Panics if v -// is not assigned to a local slot. -// TODO: Make this panic again once it stops happening routinely. -func localOffset(v *ssa.Value) int64 { - reg := v.Block.Func.RegAlloc[v.ID] - slot, ok := reg.(*ssa.LocalSlot) - if !ok { - v.Unimplementedf("localOffset of non-LocalSlot value: %s\n%s\n", v.LongString(), v.Block.Func) - return 0 - } - return slot.Idx +// autoVar returns a *Node representing the auto variable assigned to v. +func autoVar(v *ssa.Value) *Node { + return v.Block.Func.RegAlloc[v.ID].(*ssa.LocalSlot).N.(*Node) } // ssaExport exports a bunch of compiler services for the ssa backend. @@ -3616,6 +3638,12 @@ func (*ssaExport) StringData(s string) interface{} { return &ssa.ExternSymbol{Typ: idealstring, Sym: data} } +func (e *ssaExport) Auto(t ssa.Type) fmt.Stringer { + n := temp(t.(*Type)) // Note: adds new auto to Curfn.Func.Dcl list + e.mustImplement = true // This modifies the input to SSA, so we want to make sure we succeed from here! + return n +} + // Log logs a message from the compiler. func (e *ssaExport) Logf(msg string, args ...interface{}) { // If e was marked as unimplemented, anything could happen. Ignore. diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 865066870d..8ae74d0b2f 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -4,6 +4,8 @@ package ssa +import "fmt" + type Config struct { arch string // "amd64", etc. IntSize int64 // 4 or 8 @@ -52,6 +54,10 @@ type Frontend interface { // StringData returns a symbol pointing to the given string's contents. StringData(string) interface{} // returns *gc.Sym + + // Auto returns a Node for an auto variable of the given type. + // The SSA compiler uses this function to allocate space for spills. + Auto(Type) fmt.Stringer // returns *gc.Node } // NewConfig returns a new configuration object for the given architecture. diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go index 5b56aa5184..7c314c2630 100644 --- a/src/cmd/compile/internal/ssa/export_test.go +++ b/src/cmd/compile/internal/ssa/export_test.go @@ -4,7 +4,10 @@ package ssa -import "testing" +import ( + "fmt" + "testing" +) var CheckFunc = checkFunc var PrintFunc = printFunc @@ -24,6 +27,9 @@ type DummyFrontend struct { func (DummyFrontend) StringData(s string) interface{} { return nil } +func (DummyFrontend) Auto(t Type) fmt.Stringer { + return nil +} func (d DummyFrontend) Logf(msg string, args ...interface{}) { d.t.Logf(msg, args...) } func (d DummyFrontend) Fatalf(msg string, args ...interface{}) { d.t.Fatalf(msg, args...) } diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 747a5c7f03..b6956a459f 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -25,8 +25,6 @@ type Func struct { // when register allocation is done, maps value ids to locations RegAlloc []Location - // when stackalloc is done, the size of the stack frame - FrameSize int64 } // NumBlocks returns an integer larger than the id of any Block in the Func. diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 8cd8165028..81fe20547e 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -278,8 +278,8 @@ var genericOps = []opData{ // Function calls. Arguments to the call have already been written to the stack. // Return values appear on the stack. The method receiver, if any, is treated // as a phantom first argument. - {name: "ClosureCall"}, // arg0=code pointer, arg1=context ptr, arg2=memory. Returns memory. - {name: "StaticCall"}, // call function aux.(*gc.Sym), arg0=memory. Returns memory. + {name: "ClosureCall"}, // arg0=code pointer, arg1=context ptr, arg2=memory. auxint=arg size. Returns memory. + {name: "StaticCall"}, // call function aux.(*gc.Sym), arg0=memory. auxint=arg size. Returns memory. // Conversions: signed extensions, zero (unsigned) extensions, truncations {name: "SignExt8to16", typ: "Int16"}, @@ -359,6 +359,9 @@ var genericOps = []opData{ // Used during ssa construction. Like Copy, but the arg has not been specified yet. {name: "FwdRef"}, + + {name: "VarDef"}, // aux is a *gc.Node of a variable that is about to be initialized. arg0=mem, returns mem + {name: "VarKill"}, // aux is a *gc.Node of a variable that is known to be dead. arg0=mem, returns mem } // kind control successors diff --git a/src/cmd/compile/internal/ssa/location.go b/src/cmd/compile/internal/ssa/location.go index 1b6f6d66c1..9f445e5b5a 100644 --- a/src/cmd/compile/internal/ssa/location.go +++ b/src/cmd/compile/internal/ssa/location.go @@ -26,9 +26,9 @@ func (r *Register) Name() string { // A LocalSlot is a location in the stack frame. type LocalSlot struct { - Idx int64 // offset in locals area (distance up from SP) + N fmt.Stringer // a *gc.Node for an auto variable } func (s *LocalSlot) Name() string { - return fmt.Sprintf("%d(SP)", s.Idx) + return s.N.String() } diff --git a/src/cmd/compile/internal/ssa/lower.go b/src/cmd/compile/internal/ssa/lower.go index 3dac264fac..9c28bd10a5 100644 --- a/src/cmd/compile/internal/ssa/lower.go +++ b/src/cmd/compile/internal/ssa/lower.go @@ -21,7 +21,7 @@ func checkLower(f *Func) { continue // lowered } switch v.Op { - case OpSP, OpSB, OpArg, OpCopy, OpPhi: + case OpSP, OpSB, OpArg, OpCopy, OpPhi, OpVarDef, OpVarKill: continue // ok not to lower } s := "not lowered: " + v.Op.String() + " " + v.Type.SimpleString() diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index a61c31ad5a..087a0e75b8 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -524,6 +524,8 @@ const ( OpStoreReg OpLoadReg OpFwdRef + OpVarDef + OpVarKill ) var opcodeTable = [...]opInfo{ @@ -4109,6 +4111,14 @@ var opcodeTable = [...]opInfo{ name: "FwdRef", generic: true, }, + { + name: "VarDef", + generic: true, + }, + { + name: "VarKill", + generic: true, + }, } func (o Op) Asm() int { return opcodeTable[o].asm } diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go index cf5f872e0f..e551a6375c 100644 --- a/src/cmd/compile/internal/ssa/schedule.go +++ b/src/cmd/compile/internal/ssa/schedule.go @@ -74,7 +74,8 @@ func schedule(f *Func) { score[v.ID] = 0 case v.Type.IsMemory(): // Schedule stores as early as possible. This tends to - // reduce register pressure. + // reduce register pressure. It also helps make sure + // VARDEF ops are scheduled before the corresponding LEA. score[v.ID] = 1 case v.Type.IsFlags(): // Schedule flag register generation as late as possible. diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go index 626fb8f369..d60f8d1df2 100644 --- a/src/cmd/compile/internal/ssa/stackalloc.go +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -18,22 +18,6 @@ func setloc(home []Location, v *Value, loc Location) []Location { func stackalloc(f *Func) { home := f.RegAlloc - // Start with space for callee arguments/returns. - var n int64 - for _, b := range f.Blocks { - if b.Kind != BlockCall { - continue - } - v := b.Control - if n < v.AuxInt { - n = v.AuxInt - } - } - f.Logf("stackalloc: 0-%d for callee arguments/returns\n", n) - - // TODO: group variables by ptr/nonptr, size, etc. Emit ptr vars last - // so stackmap is smaller. - // Assign stack locations to phis first, because we // must also assign the same locations to the phi stores // introduced during regalloc. @@ -49,10 +33,9 @@ func stackalloc(f *Func) { continue // register-based phi } // stack-based phi - n = align(n, v.Type.Alignment()) - f.Logf("stackalloc: %d-%d for %v\n", n, n+v.Type.Size(), v) + n := f.Config.fe.Auto(v.Type) + f.Logf("stackalloc: %s: for %v <%v>\n", n, v, v.Type) loc := &LocalSlot{n} - n += v.Type.Size() home = setloc(home, v, loc) for _, w := range v.Args { if w.Op != OpStoreReg { @@ -79,34 +62,15 @@ func stackalloc(f *Func) { if len(v.Args) == 1 && (v.Args[0].Op == OpSP || v.Args[0].Op == OpSB) { continue } - n = align(n, v.Type.Alignment()) - f.Logf("stackalloc: %d-%d for %v\n", n, n+v.Type.Size(), v) + + n := f.Config.fe.Auto(v.Type) + f.Logf("stackalloc: %s for %v\n", n, v) loc := &LocalSlot{n} - n += v.Type.Size() home = setloc(home, v, loc) } } - // Finally, allocate space for all autos that we used - for _, b := range f.Blocks { - for _, v := range b.Values { - s, ok := v.Aux.(*AutoSymbol) - if !ok || s.Offset >= 0 { - continue - } - t := s.Typ - n = align(n, t.Alignment()) - f.Logf("stackalloc: %d-%d for auto %v\n", n, n+t.Size(), v) - s.Offset = n - n += t.Size() - } - } - - n = align(n, f.Config.PtrSize) - f.Logf("stackalloc: %d-%d for return address\n", n, n+f.Config.PtrSize) - n += f.Config.PtrSize // space for return address. TODO: arch-dependent f.RegAlloc = home - f.FrameSize = n // TODO: share stack slots among noninterfering (& gc type compatible) values } diff --git a/src/cmd/compile/internal/ssa/value.go b/src/cmd/compile/internal/ssa/value.go index d213b72df3..a5915da025 100644 --- a/src/cmd/compile/internal/ssa/value.go +++ b/src/cmd/compile/internal/ssa/value.go @@ -142,17 +142,15 @@ type ExternSymbol struct { // ArgSymbol is an aux value that encodes an argument or result // variable's constant offset from FP (FP = SP + framesize). type ArgSymbol struct { - Typ Type // Go type - Offset int64 // Distance above frame pointer - Sym fmt.Stringer // A *gc.Sym referring to the argument/result variable. + Typ Type // Go type + Node fmt.Stringer // A *gc.Node referring to the argument/result variable. } // AutoSymbol is an aux value that encodes a local variable's // constant offset from SP. type AutoSymbol struct { - Typ Type // Go type - Offset int64 // Distance above stack pointer. Set by stackalloc in SSA. - Sym fmt.Stringer // A *gc.Sym referring to a local (auto) variable. + Typ Type // Go type + Node fmt.Stringer // A *gc.Node referring to a local (auto) variable. } func (s *ExternSymbol) String() string { @@ -160,9 +158,9 @@ func (s *ExternSymbol) String() string { } func (s *ArgSymbol) String() string { - return s.Sym.String() + return s.Node.String() } func (s *AutoSymbol) String() string { - return s.Sym.String() + return s.Node.String() }