mirror of
https://github.com/golang/go
synced 2024-11-15 00:20:30 -07:00
cmd/compile, cmd/internal: fine-grained fiddling with loop alignment
This appears to be useful only on amd64, and was specifically benchmarked on Apple Silicon and did not produce any benefit there. This CL adds the assembly instruction `PCALIGNMAX align,amount` which aligns to `align` if that can be achieved with `amount` or fewer bytes of padding. (0 means never, but will align the enclosing function.) Specifically, if low-order-address-bits + amount are greater than or equal to align; thus, `PCALIGNMAX 64,63` is the same as `PCALIGN 64` and `PCALIGNMAX 64,0` will never emit any alignment, but will still cause the function itself to be aligned to (at least) 64 bytes. Change-Id: Id51a056f1672f8095e8f755e01f72836c9686aa3 Reviewed-on: https://go-review.googlesource.com/c/go/+/577935 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
31c8150082
commit
18d0e6a14f
@ -16,6 +16,7 @@ var Debug DebugFlags
|
||||
// The -d option takes a comma-separated list of settings.
|
||||
// Each setting is name=value; for ints, name is short for name=1.
|
||||
type DebugFlags struct {
|
||||
AlignHot int `help:"enable hot block alignment (currently requires -pgo)" concurrent:"ok"`
|
||||
Append int `help:"print information about append compilation"`
|
||||
Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
|
||||
Closure int `help:"print information about closure compilation"`
|
||||
|
@ -178,6 +178,7 @@ func ParseFlags() {
|
||||
|
||||
Debug.ConcurrentOk = true
|
||||
Debug.MaxShapeLen = 500
|
||||
Debug.AlignHot = 1
|
||||
Debug.InlFuncsWithClosures = 1
|
||||
Debug.InlStaticInit = 1
|
||||
Debug.PGOInline = 1
|
||||
|
@ -14,6 +14,7 @@ import (
|
||||
"cmd/compile/internal/ir"
|
||||
"cmd/compile/internal/liveness"
|
||||
"cmd/compile/internal/objw"
|
||||
"cmd/compile/internal/pgoir"
|
||||
"cmd/compile/internal/ssagen"
|
||||
"cmd/compile/internal/staticinit"
|
||||
"cmd/compile/internal/types"
|
||||
@ -112,7 +113,7 @@ func prepareFunc(fn *ir.Func) {
|
||||
// compileFunctions compiles all functions in compilequeue.
|
||||
// It fans out nBackendWorkers to do the work
|
||||
// and waits for them to complete.
|
||||
func compileFunctions() {
|
||||
func compileFunctions(profile *pgoir.Profile) {
|
||||
if race.Enabled {
|
||||
// Randomize compilation order to try to shake out races.
|
||||
tmp := make([]*ir.Func, len(compilequeue))
|
||||
@ -179,7 +180,7 @@ func compileFunctions() {
|
||||
for _, fn := range fns {
|
||||
fn := fn
|
||||
queue(func(worker int) {
|
||||
ssagen.Compile(fn, worker)
|
||||
ssagen.Compile(fn, worker, profile)
|
||||
compile(fn.Closures)
|
||||
wg.Done()
|
||||
})
|
||||
|
@ -303,7 +303,7 @@ func Main(archInit func(*ssagen.ArchInfo)) {
|
||||
// as late as possible to maximize how much work we can batch and
|
||||
// process concurrently.
|
||||
if len(compilequeue) != 0 {
|
||||
compileFunctions()
|
||||
compileFunctions(profile)
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -61,6 +61,9 @@ var (
|
||||
// TODO(prattmic): Make this non-global.
|
||||
candHotCalleeMap = make(map[*pgoir.IRNode]struct{})
|
||||
|
||||
// Set of functions that contain hot call sites.
|
||||
hasHotCall = make(map[*ir.Func]struct{})
|
||||
|
||||
// List of all hot call sites. CallSiteInfo.Callee is always nil.
|
||||
// TODO(prattmic): Make this non-global.
|
||||
candHotEdgeMap = make(map[pgoir.CallSiteInfo]struct{})
|
||||
@ -78,6 +81,22 @@ var (
|
||||
inlineHotMaxBudget int32 = 2000
|
||||
)
|
||||
|
||||
func IsPgoHotFunc(fn *ir.Func, profile *pgoir.Profile) bool {
|
||||
if profile == nil {
|
||||
return false
|
||||
}
|
||||
if n, ok := profile.WeightedCG.IRNodes[ir.LinkFuncName(fn)]; ok {
|
||||
_, ok := candHotCalleeMap[n]
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func HasPgoHotInline(fn *ir.Func) bool {
|
||||
_, has := hasHotCall[fn]
|
||||
return has
|
||||
}
|
||||
|
||||
// PGOInlinePrologue records the hot callsites from ir-graph.
|
||||
func PGOInlinePrologue(p *pgoir.Profile) {
|
||||
if base.Debug.PGOInlineCDFThreshold != "" {
|
||||
@ -228,14 +247,10 @@ func GarbageCollectUnreferencedHiddenClosures() {
|
||||
func inlineBudget(fn *ir.Func, profile *pgoir.Profile, relaxed bool, verbose bool) int32 {
|
||||
// Update the budget for profile-guided inlining.
|
||||
budget := int32(inlineMaxBudget)
|
||||
if profile != nil {
|
||||
if n, ok := profile.WeightedCG.IRNodes[ir.LinkFuncName(fn)]; ok {
|
||||
if _, ok := candHotCalleeMap[n]; ok {
|
||||
budget = inlineHotMaxBudget
|
||||
if verbose {
|
||||
fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
|
||||
}
|
||||
}
|
||||
if IsPgoHotFunc(fn, profile) {
|
||||
budget = inlineHotMaxBudget
|
||||
if verbose {
|
||||
fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
|
||||
}
|
||||
}
|
||||
if relaxed {
|
||||
@ -580,7 +595,7 @@ opSwitch:
|
||||
// Check whether we'd actually inline this call. Set
|
||||
// log == false since we aren't actually doing inlining
|
||||
// yet.
|
||||
if ok, _ := canInlineCallExpr(v.curFunc, n, callee, v.isBigFunc, false); ok {
|
||||
if ok, _, _ := canInlineCallExpr(v.curFunc, n, callee, v.isBigFunc, false); ok {
|
||||
// mkinlcall would inline this call [1], so use
|
||||
// the cost of the inline body as the cost of
|
||||
// the call, as that is what will actually
|
||||
@ -873,10 +888,11 @@ var InlineCall = func(callerfn *ir.Func, call *ir.CallExpr, fn *ir.Func, inlInde
|
||||
// inlineCostOK returns true if call n from caller to callee is cheap enough to
|
||||
// inline. bigCaller indicates that caller is a big function.
|
||||
//
|
||||
// In addition to the "cost OK" boolean, it also returns the "max
|
||||
// cost" limit used to make the decision (which may differ depending
|
||||
// on func size), and the score assigned to this specific callsite.
|
||||
func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool, int32, int32) {
|
||||
// In addition to the "cost OK" boolean, it also returns
|
||||
// - the "max cost" limit used to make the decision (which may differ depending on func size)
|
||||
// - the score assigned to this specific callsite
|
||||
// - whether the inlined function is "hot" according to PGO.
|
||||
func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool, int32, int32, bool) {
|
||||
maxCost := int32(inlineMaxBudget)
|
||||
if bigCaller {
|
||||
// We use this to restrict inlining into very big functions.
|
||||
@ -892,19 +908,21 @@ func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool
|
||||
}
|
||||
}
|
||||
|
||||
lineOffset := pgoir.NodeLineOffset(n, caller)
|
||||
csi := pgoir.CallSiteInfo{LineOffset: lineOffset, Caller: caller}
|
||||
_, hot := candHotEdgeMap[csi]
|
||||
|
||||
if metric <= maxCost {
|
||||
// Simple case. Function is already cheap enough.
|
||||
return true, 0, metric
|
||||
return true, 0, metric, hot
|
||||
}
|
||||
|
||||
// We'll also allow inlining of hot functions below inlineHotMaxBudget,
|
||||
// but only in small functions.
|
||||
|
||||
lineOffset := pgoir.NodeLineOffset(n, caller)
|
||||
csi := pgoir.CallSiteInfo{LineOffset: lineOffset, Caller: caller}
|
||||
if _, ok := candHotEdgeMap[csi]; !ok {
|
||||
if !hot {
|
||||
// Cold
|
||||
return false, maxCost, metric
|
||||
return false, maxCost, metric, false
|
||||
}
|
||||
|
||||
// Hot
|
||||
@ -913,49 +931,50 @@ func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool
|
||||
if base.Debug.PGODebug > 0 {
|
||||
fmt.Printf("hot-big check disallows inlining for call %s (cost %d) at %v in big function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller))
|
||||
}
|
||||
return false, maxCost, metric
|
||||
return false, maxCost, metric, false
|
||||
}
|
||||
|
||||
if metric > inlineHotMaxBudget {
|
||||
return false, inlineHotMaxBudget, metric
|
||||
return false, inlineHotMaxBudget, metric, false
|
||||
}
|
||||
|
||||
if !base.PGOHash.MatchPosWithInfo(n.Pos(), "inline", nil) {
|
||||
// De-selected by PGO Hash.
|
||||
return false, maxCost, metric
|
||||
return false, maxCost, metric, false
|
||||
}
|
||||
|
||||
if base.Debug.PGODebug > 0 {
|
||||
fmt.Printf("hot-budget check allows inlining for call %s (cost %d) at %v in function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller))
|
||||
}
|
||||
|
||||
return true, 0, metric
|
||||
return true, 0, metric, hot
|
||||
}
|
||||
|
||||
// canInlineCallExpr returns true if the call n from caller to callee
|
||||
// can be inlined, plus the score computed for the call expr in
|
||||
// question. bigCaller indicates that caller is a big function. log
|
||||
// can be inlined, plus the score computed for the call expr in question,
|
||||
// and whether the callee is hot according to PGO.
|
||||
// bigCaller indicates that caller is a big function. log
|
||||
// indicates that the 'cannot inline' reason should be logged.
|
||||
//
|
||||
// Preconditions: CanInline(callee) has already been called.
|
||||
func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCaller bool, log bool) (bool, int32) {
|
||||
func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCaller bool, log bool) (bool, int32, bool) {
|
||||
if callee.Inl == nil {
|
||||
// callee is never inlinable.
|
||||
if log && logopt.Enabled() {
|
||||
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
|
||||
fmt.Sprintf("%s cannot be inlined", ir.PkgFuncName(callee)))
|
||||
}
|
||||
return false, 0
|
||||
return false, 0, false
|
||||
}
|
||||
|
||||
ok, maxCost, callSiteScore := inlineCostOK(n, callerfn, callee, bigCaller)
|
||||
ok, maxCost, callSiteScore, hot := inlineCostOK(n, callerfn, callee, bigCaller)
|
||||
if !ok {
|
||||
// callee cost too high for this call site.
|
||||
if log && logopt.Enabled() {
|
||||
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
|
||||
fmt.Sprintf("cost %d of %s exceeds max caller cost %d", callee.Inl.Cost, ir.PkgFuncName(callee), maxCost))
|
||||
}
|
||||
return false, 0
|
||||
return false, 0, false
|
||||
}
|
||||
|
||||
if callee == callerfn {
|
||||
@ -963,7 +982,7 @@ func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCa
|
||||
if log && logopt.Enabled() {
|
||||
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", fmt.Sprintf("recursive call to %s", ir.FuncName(callerfn)))
|
||||
}
|
||||
return false, 0
|
||||
return false, 0, false
|
||||
}
|
||||
|
||||
if base.Flag.Cfg.Instrumenting && types.IsNoInstrumentPkg(callee.Sym().Pkg) {
|
||||
@ -977,7 +996,7 @@ func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCa
|
||||
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
|
||||
fmt.Sprintf("call to runtime function %s in instrumented build", ir.PkgFuncName(callee)))
|
||||
}
|
||||
return false, 0
|
||||
return false, 0, false
|
||||
}
|
||||
|
||||
if base.Flag.Race && types.IsNoRacePkg(callee.Sym().Pkg) {
|
||||
@ -985,7 +1004,7 @@ func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCa
|
||||
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
|
||||
fmt.Sprintf(`call to into "no-race" package function %s in race build`, ir.PkgFuncName(callee)))
|
||||
}
|
||||
return false, 0
|
||||
return false, 0, false
|
||||
}
|
||||
|
||||
// Check if we've already inlined this function at this particular
|
||||
@ -1008,11 +1027,11 @@ func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCa
|
||||
fmt.Sprintf("repeated recursive cycle to %s", ir.PkgFuncName(callee)))
|
||||
}
|
||||
}
|
||||
return false, 0
|
||||
return false, 0, false
|
||||
}
|
||||
}
|
||||
|
||||
return true, callSiteScore
|
||||
return true, callSiteScore, hot
|
||||
}
|
||||
|
||||
// mkinlcall returns an OINLCALL node that can replace OCALLFUNC n, or
|
||||
@ -1023,10 +1042,13 @@ func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCa
|
||||
//
|
||||
// n.Left = mkinlcall(n.Left, fn, isddd)
|
||||
func mkinlcall(callerfn *ir.Func, n *ir.CallExpr, fn *ir.Func, bigCaller bool) *ir.InlinedCallExpr {
|
||||
ok, score := canInlineCallExpr(callerfn, n, fn, bigCaller, true)
|
||||
ok, score, hot := canInlineCallExpr(callerfn, n, fn, bigCaller, true)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
if hot {
|
||||
hasHotCall[callerfn] = struct{}{}
|
||||
}
|
||||
typecheck.AssertFixedCall(n)
|
||||
|
||||
parent := base.Ctxt.PosTable.Pos(n.Pos()).Base().InliningIndex()
|
||||
|
@ -31,6 +31,9 @@ type Block struct {
|
||||
// After flagalloc, records whether flags are live at the end of the block.
|
||||
FlagsLiveAtEnd bool
|
||||
|
||||
// A block that would be good to align (according to the optimizer's guesses)
|
||||
Hotness Hotness
|
||||
|
||||
// Subsequent blocks, if any. The number and order depend on the block kind.
|
||||
Succs []Edge
|
||||
|
||||
@ -112,7 +115,7 @@ func (e Edge) String() string {
|
||||
}
|
||||
|
||||
// BlockKind is the kind of SSA block.
|
||||
type BlockKind int16
|
||||
type BlockKind uint8
|
||||
|
||||
// short form print
|
||||
func (b *Block) String() string {
|
||||
@ -426,3 +429,17 @@ const (
|
||||
BranchUnknown = BranchPrediction(0)
|
||||
BranchLikely = BranchPrediction(+1)
|
||||
)
|
||||
|
||||
type Hotness int8 // Could use negative numbers for specifically non-hot blocks, but don't, yet.
|
||||
const (
|
||||
// These values are arranged in what seems to be order of increasing alignment importance.
|
||||
// Currently only a few are relevant. Implicitly, they are all in a loop.
|
||||
HotNotFlowIn Hotness = 1 << iota // This block is only reached by branches
|
||||
HotInitial // In the block order, the first one for a given loop. Not necessarily topological header.
|
||||
HotPgo // By PGO-based heuristics, this block occurs in a hot loop
|
||||
|
||||
HotNot = 0
|
||||
HotInitialNotFlowIn = HotInitial | HotNotFlowIn // typically first block of a rotated loop, loop is entered with a branch (not to this block). No PGO
|
||||
HotPgoInitial = HotPgo | HotInitial // special case; single block loop, initial block is header block has a flow-in entry, but PGO says it is hot
|
||||
HotPgoInitialNotFLowIn = HotPgo | HotInitial | HotNotFlowIn // PGO says it is hot, and the loop is rotated so flow enters loop with a branch
|
||||
)
|
||||
|
@ -45,6 +45,7 @@ type Func struct {
|
||||
laidout bool // Blocks are ordered
|
||||
NoSplit bool // true if function is marked as nosplit. Used by schedule check pass.
|
||||
dumpFileSeq uint8 // the sequence numbers of dump file. (%s_%02d__%s.dump", funcname, dumpFileSeq, phaseName)
|
||||
IsPgoHot bool
|
||||
|
||||
// when register allocation is done, maps value ids to locations
|
||||
RegAlloc []Location
|
||||
|
@ -56,9 +56,20 @@ func loopRotate(f *Func) {
|
||||
}
|
||||
p = e.b
|
||||
}
|
||||
if p == nil || p == b {
|
||||
if p == nil {
|
||||
continue
|
||||
}
|
||||
p.Hotness |= HotInitial
|
||||
if f.IsPgoHot {
|
||||
p.Hotness |= HotPgo
|
||||
}
|
||||
// blocks will be arranged so that p is ordered first, if it isn't already.
|
||||
if p == b { // p is header, already first (and also, only block in the loop)
|
||||
continue
|
||||
}
|
||||
p.Hotness |= HotNotFlowIn
|
||||
|
||||
// the loop header b follows p
|
||||
after[p.ID] = []*Block{b}
|
||||
for {
|
||||
nextIdx := idToIdx[b.ID] + 1
|
||||
|
@ -12,9 +12,11 @@ import (
|
||||
"sync"
|
||||
|
||||
"cmd/compile/internal/base"
|
||||
"cmd/compile/internal/inline"
|
||||
"cmd/compile/internal/ir"
|
||||
"cmd/compile/internal/liveness"
|
||||
"cmd/compile/internal/objw"
|
||||
"cmd/compile/internal/pgoir"
|
||||
"cmd/compile/internal/ssa"
|
||||
"cmd/compile/internal/types"
|
||||
"cmd/internal/obj"
|
||||
@ -296,8 +298,8 @@ const maxStackSize = 1 << 30
|
||||
// uses it to generate a plist,
|
||||
// and flushes that plist to machine code.
|
||||
// worker indicates which of the backend workers is doing the processing.
|
||||
func Compile(fn *ir.Func, worker int) {
|
||||
f := buildssa(fn, worker)
|
||||
func Compile(fn *ir.Func, worker int, profile *pgoir.Profile) {
|
||||
f := buildssa(fn, worker, inline.IsPgoHotFunc(fn, profile) || inline.HasPgoHotInline(fn))
|
||||
// Note: check arg size to fix issue 25507.
|
||||
if f.Frontend().(*ssafn).stksize >= maxStackSize || f.OwnAux.ArgWidth() >= maxStackSize {
|
||||
largeStackFramesMu.Lock()
|
||||
|
@ -291,7 +291,7 @@ func (s *state) emitOpenDeferInfo() {
|
||||
|
||||
// buildssa builds an SSA function for fn.
|
||||
// worker indicates which of the backend workers is doing the processing.
|
||||
func buildssa(fn *ir.Func, worker int) *ssa.Func {
|
||||
func buildssa(fn *ir.Func, worker int, isPgoHot bool) *ssa.Func {
|
||||
name := ir.FuncName(fn)
|
||||
|
||||
abiSelf := abiForFunc(fn, ssaConfig.ABI0, ssaConfig.ABI1)
|
||||
@ -373,6 +373,7 @@ func buildssa(fn *ir.Func, worker int) *ssa.Func {
|
||||
// Allocate starting block
|
||||
s.f.Entry = s.f.NewBlock(ssa.BlockPlain)
|
||||
s.f.Entry.Pos = fn.Pos()
|
||||
s.f.IsPgoHot = isPgoHot
|
||||
|
||||
if printssa {
|
||||
ssaDF := ssaDumpFile
|
||||
@ -7302,12 +7303,47 @@ func genssa(f *ssa.Func, pp *objw.Progs) {
|
||||
|
||||
var argLiveIdx int = -1 // argument liveness info index
|
||||
|
||||
// These control cache line alignment; if the required portion of
|
||||
// a cache line is not available, then pad to obtain cache line
|
||||
// alignment. Not implemented on all architectures, may not be
|
||||
// useful on all architectures.
|
||||
var hotAlign, hotRequire int64
|
||||
|
||||
if base.Debug.AlignHot > 0 {
|
||||
switch base.Ctxt.Arch.Name {
|
||||
// enable this on a case-by-case basis, with benchmarking.
|
||||
// currently shown:
|
||||
// good for amd64
|
||||
// not helpful for Apple Silicon
|
||||
//
|
||||
case "amd64", "386":
|
||||
// Align to 64 if 31 or fewer bytes remain in a cache line
|
||||
// benchmarks a little better than always aligning, and also
|
||||
// adds slightly less to the (PGO-compiled) binary size.
|
||||
hotAlign = 64
|
||||
hotRequire = 31
|
||||
}
|
||||
}
|
||||
|
||||
// Emit basic blocks
|
||||
for i, b := range f.Blocks {
|
||||
s.bstart[b.ID] = s.pp.Next
|
||||
|
||||
s.lineRunStart = nil
|
||||
s.SetPos(s.pp.Pos.WithNotStmt()) // It needs a non-empty Pos, but cannot be a statement boundary (yet).
|
||||
|
||||
if hotAlign > 0 && b.Hotness&ssa.HotPgoInitial == ssa.HotPgoInitial {
|
||||
// So far this has only been shown profitable for PGO-hot loop headers.
|
||||
// The Hotness values allows distinctions betwen initial blocks that are "hot" or not, and "flow-in" or not.
|
||||
// Currently only the initial blocks of loops are tagged in this way;
|
||||
// there are no blocks tagged "pgo-hot" that are not also tagged "initial".
|
||||
// TODO more heuristics, more architectures.
|
||||
p := s.pp.Prog(obj.APCALIGNMAX)
|
||||
p.From.SetConst(hotAlign)
|
||||
p.To.SetConst(hotRequire)
|
||||
}
|
||||
|
||||
s.bstart[b.ID] = s.pp.Next
|
||||
|
||||
if idx, ok := argLiveBlockMap[b.ID]; ok && idx != argLiveIdx {
|
||||
argLiveIdx = idx
|
||||
p := s.pp.Prog(obj.APCDATA)
|
||||
@ -7466,7 +7502,8 @@ func genssa(f *ssa.Func, pp *objw.Progs) {
|
||||
// going to emit anyway, and use those instructions instead of the
|
||||
// inline marks.
|
||||
for p := s.pp.Text; p != nil; p = p.Link {
|
||||
if p.As == obj.ANOP || p.As == obj.AFUNCDATA || p.As == obj.APCDATA || p.As == obj.ATEXT || p.As == obj.APCALIGN || Arch.LinkArch.Family == sys.Wasm {
|
||||
if p.As == obj.ANOP || p.As == obj.AFUNCDATA || p.As == obj.APCDATA || p.As == obj.ATEXT ||
|
||||
p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX || Arch.LinkArch.Family == sys.Wasm {
|
||||
// Don't use 0-sized instructions as inline marks, because we need
|
||||
// to identify inline mark instructions by pc offset.
|
||||
// (Some of these instructions are sometimes zero-sized, sometimes not.
|
||||
|
@ -889,9 +889,10 @@ var optab = []Optab{
|
||||
{obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689
|
||||
{obj.ANOP, C_ZREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
|
||||
{obj.ANOP, C_VREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
|
||||
{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
|
||||
{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
|
||||
{obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // align code
|
||||
{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
|
||||
{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
|
||||
{obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // align code
|
||||
{obj.APCALIGNMAX, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0, 0}, // align code, conditional
|
||||
}
|
||||
|
||||
// Valid pstate field values, and value to use in instruction.
|
||||
@ -1109,13 +1110,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
m = o.size(c.ctxt, p)
|
||||
if m == 0 {
|
||||
switch p.As {
|
||||
case obj.APCALIGN:
|
||||
alignedValue := p.From.Offset
|
||||
m = pcAlignPadLength(ctxt, pc, alignedValue)
|
||||
// Update the current text symbol alignment value.
|
||||
if int32(alignedValue) > cursym.Func().Align {
|
||||
cursym.Func().Align = int32(alignedValue)
|
||||
}
|
||||
case obj.APCALIGN, obj.APCALIGNMAX:
|
||||
m = obj.AlignmentPadding(int32(pc), p, ctxt, cursym)
|
||||
break
|
||||
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
|
||||
continue
|
||||
@ -1181,9 +1177,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
|
||||
if m == 0 {
|
||||
switch p.As {
|
||||
case obj.APCALIGN:
|
||||
alignedValue := p.From.Offset
|
||||
m = pcAlignPadLength(ctxt, pc, alignedValue)
|
||||
case obj.APCALIGN, obj.APCALIGNMAX:
|
||||
m = obj.AlignmentPaddingLength(int32(pc), p, ctxt)
|
||||
break
|
||||
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
|
||||
continue
|
||||
@ -1214,9 +1209,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
if sz > 4*len(out) {
|
||||
log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p)
|
||||
}
|
||||
if p.As == obj.APCALIGN {
|
||||
alignedValue := p.From.Offset
|
||||
v := pcAlignPadLength(c.ctxt, p.Pc, alignedValue)
|
||||
if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
|
||||
v := obj.AlignmentPaddingLength(int32(p.Pc), p, c.ctxt)
|
||||
for i = 0; i < int(v/4); i++ {
|
||||
// emit ANOOP instruction by the padding size
|
||||
c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_NOOP)
|
||||
@ -3316,6 +3310,7 @@ func buildop(ctxt *obj.Link) {
|
||||
obj.AUNDEF,
|
||||
obj.AFUNCDATA,
|
||||
obj.APCALIGN,
|
||||
obj.APCALIGNMAX,
|
||||
obj.APCDATA,
|
||||
obj.ADUFFZERO,
|
||||
obj.ADUFFCOPY:
|
||||
|
@ -416,6 +416,7 @@ const (
|
||||
AJMP
|
||||
ANOP
|
||||
APCALIGN
|
||||
APCALIGNMAX // currently x86, amd64 and arm64
|
||||
APCDATA
|
||||
ARET
|
||||
AGETCALLERPC
|
||||
|
@ -6,6 +6,7 @@ package obj
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"cmd/internal/objabi"
|
||||
"fmt"
|
||||
"internal/abi"
|
||||
"internal/buildcfg"
|
||||
@ -642,6 +643,7 @@ var Anames = []string{
|
||||
"JMP",
|
||||
"NOP",
|
||||
"PCALIGN",
|
||||
"PCALIGNMAX",
|
||||
"PCDATA",
|
||||
"RET",
|
||||
"GETCALLERPC",
|
||||
@ -667,3 +669,62 @@ func abiDecorate(a *Addr, abiDetail bool) string {
|
||||
}
|
||||
return fmt.Sprintf("<%s>", a.Sym.ABI())
|
||||
}
|
||||
|
||||
// AlignmentPadding bytes to add to align code as requested.
|
||||
// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
|
||||
//
|
||||
// pc_: current offset in function, in bytes
|
||||
// p: a PCALIGN or PCALIGNMAX prog
|
||||
// ctxt: the context, for current function
|
||||
// cursym: current function being assembled
|
||||
// returns number of bytes of padding needed,
|
||||
// updates minimum alignment for the function.
|
||||
func AlignmentPadding(pc int32, p *Prog, ctxt *Link, cursym *LSym) int {
|
||||
v := AlignmentPaddingLength(pc, p, ctxt)
|
||||
requireAlignment(p.From.Offset, ctxt, cursym)
|
||||
return v
|
||||
}
|
||||
|
||||
// AlignmentPaddingLength is the number of bytes to add to align code as requested.
|
||||
// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
|
||||
// This only computes the length and does not update the (missing parameter)
|
||||
// current function's own required alignment.
|
||||
//
|
||||
// pc: current offset in function, in bytes
|
||||
// p: a PCALIGN or PCALIGNMAX prog
|
||||
// ctxt: the context, for current function
|
||||
// returns number of bytes of padding needed,
|
||||
func AlignmentPaddingLength(pc int32, p *Prog, ctxt *Link) int {
|
||||
a := p.From.Offset
|
||||
if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
|
||||
ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
|
||||
return 0
|
||||
}
|
||||
pc64 := int64(pc)
|
||||
lob := pc64 & (a - 1) // Low Order Bits -- if not zero, then not aligned
|
||||
if p.As == APCALIGN {
|
||||
if lob != 0 {
|
||||
return int(a - lob)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
// emit as many as s bytes of padding to obtain alignment
|
||||
s := p.To.Offset
|
||||
if s < 0 || s >= a {
|
||||
ctxt.Diag("PCALIGNMAX 'amount' %d must be non-negative and smaller than the aligment %d\n", s, a)
|
||||
return 0
|
||||
}
|
||||
if s >= a-lob {
|
||||
return int(a - lob)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// requireAlignment ensures that the function is aligned enough to support
|
||||
// the required code alignment
|
||||
func requireAlignment(a int64, ctxt *Link, cursym *LSym) {
|
||||
// TODO remove explicit knowledge about AIX.
|
||||
if ctxt.Headtype != objabi.Haix && cursym.Func().Align < int32(a) {
|
||||
cursym.Func().Align = int32(a)
|
||||
}
|
||||
}
|
||||
|
@ -2036,29 +2036,21 @@ type nopPad struct {
|
||||
n int32 // Size of the pad
|
||||
}
|
||||
|
||||
// Padding bytes to add to align code as requested.
|
||||
// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
|
||||
// requireAlignment ensures that the function alignment is at
|
||||
// least as high as a, which should be a power of two
|
||||
// and between 8 and 2048, inclusive.
|
||||
//
|
||||
// pc: current offset in function, in bytes
|
||||
// a: requested alignment, in bytes
|
||||
// cursym: current function being assembled
|
||||
// returns number of bytes of padding needed
|
||||
func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
|
||||
// the boolean result indicates whether the alignment meets those constraints
|
||||
func requireAlignment(a int64, ctxt *obj.Link, cursym *obj.LSym) bool {
|
||||
if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
|
||||
ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
|
||||
return 0
|
||||
return false
|
||||
}
|
||||
|
||||
// By default function alignment is 32 bytes for amd64
|
||||
if cursym.Func().Align < int32(a) {
|
||||
cursym.Func().Align = int32(a)
|
||||
}
|
||||
|
||||
if pc&(a-1) != 0 {
|
||||
return int(a - (pc & (a - 1)))
|
||||
}
|
||||
|
||||
return 0
|
||||
return true
|
||||
}
|
||||
|
||||
func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
||||
@ -2144,17 +2136,17 @@ func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
|
||||
c0 := c
|
||||
c = pjc.padJump(ctxt, s, p, c)
|
||||
|
||||
if p.As == obj.APCALIGN {
|
||||
aln := p.From.Offset
|
||||
v := addpad(int64(c), aln, ctxt, s)
|
||||
if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
|
||||
v := obj.AlignmentPadding(c, p, ctxt, s)
|
||||
if v > 0 {
|
||||
s.Grow(int64(c) + int64(v))
|
||||
fillnop(s.P[c:], int(v))
|
||||
}
|
||||
|
||||
p.Pc = int64(c)
|
||||
c += int32(v)
|
||||
pPrev = p
|
||||
continue
|
||||
|
||||
}
|
||||
|
||||
if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
|
||||
|
Loading…
Reference in New Issue
Block a user