1
0
mirror of https://github.com/golang/go synced 2024-11-23 18:50:05 -07:00

cmd/internal/obj/arm64: optimize function prologue/epilogue with STP/LDP

In function prologue and epilogue, we save and restore FP and LR
registers, and adjust RSP. The current instruction sequence is as
follow.

For frame size <= 240B,
  prologue:
    MOVD.W	R30, -offset(RSP)
    MOVD	R29, -8(RSP)
  epilogue:
    MOVD	-8(RSP), R29
    MOVD.P	offset(RSP), R30

For frame size > 240B,
  prologue:
    SUB		$offset, RSP, R27
    MOVD	R30, (R27)
    MOVD	R27, RSP
    MOVD	R29, -8(RSP)
  epilogue:
    MOVD	-8(RSP), R29
    MOVD	(RSP), R30
    ADD		$offset, RSP

Each sequence uses two load or store instructions, actually we can load
or store two registers with one LDP or STP instruction. This CL changes
the sequences as follow.

For frame size <= 496B,
  prologue:
    STP		(R29, R30), -(offset+8)(RSP)
    SUB		$offset, RSP, RSP
  epilogue:
    LDP		-8(RSP), (R29, R30)
    ADD		$offset, RSP, RSP

For frame size > 496B,
  prologue:
    SUB		$offset, RSP, R20
    STP		(R29, R30), -8(R20)
    MOVD	R20, RSP
  epilogue:
    LDP		-8(RSP), (R29, R30)
    ADD		$offset, RSP, RSP

Change-Id: Ia58af85fc81cce9b7c393dc38df43bffb203baad
Reviewed-on: https://go-review.googlesource.com/c/go/+/379075
Reviewed-by: Cherry Mui <cherryyz@google.com>
Trust: Eric Fang <eric.fang@arm.com>
Run-TryBot: Eric Fang <eric.fang@arm.com>
This commit is contained in:
eric fang 2022-01-18 02:52:08 +00:00 committed by Eric Fang
parent 1045faa38c
commit c6d9b38dd8

View File

@ -622,92 +622,124 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
var prologueEnd *obj.Prog
aoffset := c.autosize
if aoffset > 0xF0 {
aoffset = 0xF0
if aoffset > 0x1f0 {
// LDP offset variant range is -512 to 504, SP should be 16-byte aligned,
// so the maximum aoffset value is 496.
aoffset = 0x1f0
}
// Frame is non-empty. Make sure to save link register, even if
// it is a leaf function, so that traceback works.
q = p
if c.autosize > aoffset {
// Frame size is too large for a MOVD.W instruction.
// Store link register before decrementing SP, so if a signal comes
// during the execution of the function prologue, the traceback
// code will not see a half-updated stack frame.
// This sequence is not async preemptible, as if we open a frame
// at the current SP, it will clobber the saved LR.
q = c.ctxt.StartUnsafePoint(q, c.newprog)
q = obj.Appendp(q, c.newprog)
q.Pos = p.Pos
q.As = ASUB
q.From.Type = obj.TYPE_CONST
q.From.Offset = int64(c.autosize)
q.Reg = REGSP
q.To.Type = obj.TYPE_REG
q.To.Reg = REGTMP
prologueEnd = q
q = obj.Appendp(q, c.newprog)
q.Pos = p.Pos
q.As = AMOVD
q.From.Type = obj.TYPE_REG
q.From.Reg = REGLINK
q.To.Type = obj.TYPE_MEM
q.To.Reg = REGTMP
// Frame size is too large for a STP instruction. Store the frame pointer
// register and link register before decrementing SP, so if a signal comes
// during the execution of the function prologue, the traceback code will
// not see a half-updated stack frame.
// SUB $autosize, RSP, R20
q1 = obj.Appendp(q, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = int64(c.autosize)
q1.Reg = REGSP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REG_R20
prologueEnd = q1
// STP (R29, R30), -8(R20)
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASTP
q1.From.Type = obj.TYPE_REGREG
q1.From.Reg = REGFP
q1.From.Offset = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REG_R20
q1.To.Offset = -8
// This is not async preemptible, as if we open a frame
// at the current SP, it will clobber the saved LR.
q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
// MOVD R20, RSP
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGTMP
q1.From.Reg = REG_R20
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGSP
q1.Spadj = c.autosize
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
if buildcfg.GOOS == "ios" {
// iOS does not support SA_ONSTACK. We will run the signal handler
// on the G stack. If we write below SP, it may be clobbered by
// the signal handler. So we save LR after decrementing SP.
// the signal handler. So we save FP and LR after decrementing SP.
// STP (R29, R30), -8(RSP)
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGLINK
q1.As = ASTP
q1.From.Type = obj.TYPE_REGREG
q1.From.Reg = REGFP
q1.From.Offset = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
q1.To.Offset = -8
}
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
} else {
// small frame, update SP and save LR in a single MOVD.W instruction
// small frame, save FP and LR with one STP instruction, then update SP.
// Store first, so if a signal comes during the execution of the function
// prologue, the traceback code will not see a half-updated stack frame.
// STP (R29, R30), -aoffset-8(RSP)
q1 = obj.Appendp(q, c.newprog)
q1.As = AMOVD
q1.As = ASTP
q1.Pos = p.Pos
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGLINK
q1.From.Type = obj.TYPE_REGREG
q1.From.Reg = REGFP
q1.From.Offset = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.Scond = C_XPRE
q1.To.Offset = int64(-aoffset)
q1.To.Offset = int64(-aoffset - 8)
q1.To.Reg = REGSP
prologueEnd = q1
q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
// This instruction is not async preemptible, see the above comment.
// SUB $aoffset, RSP, RSP
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
q1.From.Type = obj.TYPE_CONST
q1.From.Offset = int64(aoffset)
q1.Reg = REGSP
q1.To.Type = obj.TYPE_REG
q1.To.Reg = REGSP
q1.Spadj = aoffset
prologueEnd = q1
q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
if buildcfg.GOOS == "ios" {
// See the above comment.
// STP (R29, R30), -8(RSP)
q1 = obj.Appendp(q1, c.newprog)
q1.As = ASTP
q1.Pos = p.Pos
q1.From.Type = obj.TYPE_REGREG
q1.From.Reg = REGFP
q1.From.Offset = REGLINK
q1.To.Type = obj.TYPE_MEM
q1.To.Offset = int64(-8)
q1.To.Reg = REGSP
}
}
prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)
// Frame pointer.
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = AMOVD
q1.From.Type = obj.TYPE_REG
q1.From.Reg = REGFP
q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
q1.To.Offset = -8
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
q1.As = ASUB
@ -850,48 +882,28 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
p.To.Reg = REGFP
}
} else {
/* want write-back pre-indexed SP+autosize -> SP, loading REGLINK*/
// Frame pointer.
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Reg = REGSP
p.From.Offset = -8
p.To.Type = obj.TYPE_REG
p.To.Reg = REGFP
p = obj.Appendp(p, c.newprog)
aoffset := c.autosize
// LDP -8(RSP), (R29, R30)
p.As = ALDP
p.From.Type = obj.TYPE_MEM
p.From.Offset = -8
p.From.Reg = REGSP
p.To.Type = obj.TYPE_REGREG
p.To.Reg = REGFP
p.To.Offset = REGLINK
if aoffset <= 0xF0 {
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.Scond = C_XPOST
p.From.Offset = int64(aoffset)
p.From.Reg = REGSP
p.To.Type = obj.TYPE_REG
p.To.Reg = REGLINK
p.Spadj = -aoffset
} else {
p.As = AMOVD
p.From.Type = obj.TYPE_MEM
p.From.Offset = 0
p.From.Reg = REGSP
p.To.Type = obj.TYPE_REG
p.To.Reg = REGLINK
q = newprog()
q.As = AADD
q.From.Type = obj.TYPE_CONST
q.From.Offset = int64(aoffset)
q.To.Type = obj.TYPE_REG
q.To.Reg = REGSP
q.Link = p.Link
q.Spadj = int32(-q.From.Offset)
q.Pos = p.Pos
p.Link = q
p = q
}
// ADD $aoffset, RSP, RSP
q = newprog()
q.As = AADD
q.From.Type = obj.TYPE_CONST
q.From.Offset = int64(aoffset)
q.To.Type = obj.TYPE_REG
q.To.Reg = REGSP
q.Spadj = -aoffset
q.Pos = p.Pos
q.Link = p.Link
p.Link = q
p = q
}
// If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC',