mirror of
https://github.com/golang/go
synced 2024-11-23 06:50:05 -07:00
cmd/internal/obj/arm64: improve classification of loads and stores
Currently, pool literals are added when they are not needed, namely in the case where the offset is a 24 bit unsigned scaled immediate. By improving the classification of loads and stores, we can avoid generating unused pool literals. However, more importantly this provides a basis for further improvement of the load and store code generation. Updates #59615 Change-Id: Ia3bad1709314565a05894a76c434cca2fa4533c4 Reviewed-on: https://go-review.googlesource.com/c/go/+/512538 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com> Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
parent
a37da52d75
commit
3313b39bae
@ -414,7 +414,8 @@ const (
|
||||
C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO
|
||||
C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO
|
||||
C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO
|
||||
C_LAUTO // any other 32-bit constant
|
||||
C_LAUTOPOOL // any other constant up to 64 bits (needs pool literal)
|
||||
C_LAUTO // any other constant up to 64 bits
|
||||
|
||||
C_SEXT1 // 0 to 4095, direct
|
||||
C_SEXT2 // 0 to 8190
|
||||
@ -454,6 +455,7 @@ const (
|
||||
C_UOREG32K_16
|
||||
C_UOREG32K
|
||||
C_UOREG64K
|
||||
C_LOREGPOOL
|
||||
C_LOREG
|
||||
|
||||
C_ADDR // TODO(aram): explain difference from C_VCONADDR
|
||||
|
@ -75,6 +75,7 @@ var cnames7 = []string{
|
||||
"UAUTO32K_8",
|
||||
"UAUTO32K",
|
||||
"UAUTO64K",
|
||||
"LAUTOPOOL",
|
||||
"LAUTO",
|
||||
"SEXT1",
|
||||
"SEXT2",
|
||||
@ -113,6 +114,7 @@ var cnames7 = []string{
|
||||
"UOREG32K_16",
|
||||
"UOREG32K",
|
||||
"UOREG64K",
|
||||
"LOREGPOOL",
|
||||
"LOREG",
|
||||
"ADDR",
|
||||
"GOTADDR",
|
||||
|
@ -591,38 +591,66 @@ var optab = []Optab{
|
||||
{AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0, 0},
|
||||
|
||||
/* long displacement store */
|
||||
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
|
||||
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
|
||||
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
|
||||
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
|
||||
|
||||
/* long displacement load */
|
||||
{AMOVB, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVH, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVW, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVD, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVB, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AMOVB, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AMOVB, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVH, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AMOVH, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AMOVH, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVW, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AMOVW, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AMOVW, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AMOVD, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AMOVD, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AMOVD, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
|
||||
{AFMOVS, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVS, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AFMOVS, C_LAUTOPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AFMOVS, C_LOREGPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AFMOVD, C_LAUTOPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AFMOVD, C_LOREGPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
{AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, 0, 0},
|
||||
{AFMOVQ, C_LAUTOPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
|
||||
{AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, 0, 0},
|
||||
{AFMOVQ, C_LOREGPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
|
||||
|
||||
/* pre/post-indexed load (unscaled, signed 9-bit offset) */
|
||||
{AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 22, 4, 0, 0, C_XPOST},
|
||||
@ -1476,6 +1504,14 @@ func isNEGop(op obj.As) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func isMOVop(op obj.As) bool {
|
||||
switch op {
|
||||
case AMOVB, AMOVBU, AMOVH, AMOVHU, AMOVW, AMOVWU, AMOVD, AFMOVS, AFMOVD, AFMOVQ:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isRegShiftOrExt(a *obj.Addr) bool {
|
||||
return (a.Index-obj.RBaseARM64)®_EXT != 0 || (a.Index-obj.RBaseARM64)®_LSL != 0
|
||||
}
|
||||
@ -1912,6 +1948,63 @@ func (c *ctxt7) con64class(a *obj.Addr) int {
|
||||
}
|
||||
}
|
||||
|
||||
// loadStoreClass reclassifies a load or store operation based on its offset.
|
||||
func (c *ctxt7) loadStoreClass(p *obj.Prog, lsc int, v int64) int {
|
||||
// Avoid reclassification of pre/post-indexed loads and stores.
|
||||
if p.Scond == C_XPRE || p.Scond == C_XPOST {
|
||||
return lsc
|
||||
}
|
||||
if cmp(C_NSAUTO, lsc) || cmp(C_NSOREG, lsc) {
|
||||
return lsc
|
||||
}
|
||||
|
||||
needsPool := true
|
||||
switch p.As {
|
||||
case AMOVB, AMOVBU:
|
||||
if cmp(C_UAUTO4K, lsc) || cmp(C_UOREG4K, lsc) {
|
||||
return lsc
|
||||
}
|
||||
if v >= 0 && v <= 0xffffff {
|
||||
needsPool = false
|
||||
}
|
||||
case AMOVH, AMOVHU:
|
||||
if cmp(C_UAUTO8K, lsc) || cmp(C_UOREG8K, lsc) {
|
||||
return lsc
|
||||
}
|
||||
if v >= 0 && v <= 0xfffffe && v&1 == 0 {
|
||||
needsPool = false
|
||||
}
|
||||
case AMOVW, AMOVWU, AFMOVS:
|
||||
if cmp(C_UAUTO16K, lsc) || cmp(C_UOREG16K, lsc) {
|
||||
return lsc
|
||||
}
|
||||
if v >= 0 && v <= 0xfffffc && v&3 == 0 {
|
||||
needsPool = false
|
||||
}
|
||||
case AMOVD, AFMOVD:
|
||||
if cmp(C_UAUTO32K, lsc) || cmp(C_UOREG32K, lsc) {
|
||||
return lsc
|
||||
}
|
||||
if v >= 0 && v <= 0xfffff8 && v&7 == 0 {
|
||||
needsPool = false
|
||||
}
|
||||
case AFMOVQ:
|
||||
if cmp(C_UAUTO64K, lsc) || cmp(C_UOREG64K, lsc) {
|
||||
return lsc
|
||||
}
|
||||
if v >= 0 && v <= 0xfffff0 && v&15 == 0 {
|
||||
needsPool = false
|
||||
}
|
||||
}
|
||||
if needsPool && cmp(C_LAUTO, lsc) {
|
||||
return C_LAUTOPOOL
|
||||
}
|
||||
if needsPool && cmp(C_LOREG, lsc) {
|
||||
return C_LOREGPOOL
|
||||
}
|
||||
return lsc
|
||||
}
|
||||
|
||||
func (c *ctxt7) aclass(a *obj.Addr) int {
|
||||
switch a.Type {
|
||||
case obj.TYPE_NONE:
|
||||
@ -2135,6 +2228,12 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
|
||||
a1 = c.con64class(&p.From)
|
||||
}
|
||||
}
|
||||
if p.From.Type == obj.TYPE_MEM {
|
||||
if isMOVop(p.As) && (cmp(C_LAUTO, a1) || cmp(C_LOREG, a1)) {
|
||||
// More specific classification of large offset loads and stores.
|
||||
a1 = c.loadStoreClass(p, a1, c.instoffset)
|
||||
}
|
||||
}
|
||||
p.From.Class = int8(a1)
|
||||
}
|
||||
|
||||
@ -2155,6 +2254,12 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
|
||||
a4 := int(p.To.Class)
|
||||
if a4 == 0 {
|
||||
a4 = c.aclass(&p.To)
|
||||
if p.To.Type == obj.TYPE_MEM {
|
||||
if isMOVop(p.As) && (cmp(C_LAUTO, a4) || cmp(C_LOREG, a4)) {
|
||||
// More specific classification of large offset loads and stores.
|
||||
a4 = c.loadStoreClass(p, a4, c.instoffset)
|
||||
}
|
||||
}
|
||||
p.To.Class = int8(a4)
|
||||
}
|
||||
|
||||
@ -3931,6 +4036,9 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
if err != nil {
|
||||
goto storeusepool
|
||||
}
|
||||
if p.Pool != nil {
|
||||
c.ctxt.Diag("%v: unused constant in pool (%v)\n", p, v)
|
||||
}
|
||||
o1 = c.oaddi(p, AADD, hi, REGTMP, r)
|
||||
o2 = c.olsr12u(p, c.opstr(p, p.As), lo, REGTMP, p.From.Reg)
|
||||
break
|
||||
@ -3939,6 +4047,9 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
if r == REGTMP || p.From.Reg == REGTMP {
|
||||
c.ctxt.Diag("REGTMP used in large offset store: %v", p)
|
||||
}
|
||||
if p.Pool == nil {
|
||||
c.ctxt.Diag("%v: constant is not in pool", p)
|
||||
}
|
||||
o1 = c.omovlit(AMOVD, p, &p.To, REGTMP)
|
||||
o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), int(r), REGTMP)
|
||||
|
||||
@ -3964,11 +4075,17 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
if err != nil {
|
||||
goto loadusepool
|
||||
}
|
||||
if p.Pool != nil {
|
||||
c.ctxt.Diag("%v: unused constant in pool (%v)\n", p, v)
|
||||
}
|
||||
o1 = c.oaddi(p, AADD, hi, REGTMP, r)
|
||||
o2 = c.olsr12u(p, c.opldr(p, p.As), lo, REGTMP, p.To.Reg)
|
||||
break
|
||||
|
||||
loadusepool:
|
||||
if p.Pool == nil {
|
||||
c.ctxt.Diag("%v: constant is not in pool", p)
|
||||
}
|
||||
if r == REGTMP || p.From.Reg == REGTMP {
|
||||
c.ctxt.Diag("REGTMP used in large offset load: %v", p)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user