mirror of
https://github.com/golang/go
synced 2024-11-19 04:04:47 -07:00
cmd/internal/obj/arm64: load large constants into vector registers from rodata
Load large constants into vector registers from rodata, instead of placing them in the literal pool. This treats VMOVQ/VMOVD/VMOVS the same as FMOVD/FMOVS and makes use of the existing mechanism for storing values in rodata. Two additional instructions are required for a load, however these instructions are used infrequently and already have a high latency. Updates #59615 Change-Id: I54226730267689963d73321e548733ae2d66740e Reviewed-on: https://go-review.googlesource.com/c/go/+/515617 Reviewed-by: Eric Fang <eric.fang@arm.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
a5ab4a9471
commit
0a17b2c174
@ -282,7 +282,6 @@ func MOVCONST(d int64, s int, rt int) uint32 {
|
|||||||
const (
|
const (
|
||||||
// Optab.flag
|
// Optab.flag
|
||||||
LFROM = 1 << iota // p.From uses constant pool
|
LFROM = 1 << iota // p.From uses constant pool
|
||||||
LFROM128 // p.From3<<64+p.From forms a 128-bit constant in literal pool
|
|
||||||
LTO // p.To uses constant pool
|
LTO // p.To uses constant pool
|
||||||
NOTUSETMP // p expands to multiple instructions, but does NOT use REGTMP
|
NOTUSETMP // p expands to multiple instructions, but does NOT use REGTMP
|
||||||
BRANCH14BITS // branch instruction encodes 14 bits
|
BRANCH14BITS // branch instruction encodes 14 bits
|
||||||
@ -423,10 +422,10 @@ var optab = []Optab{
|
|||||||
/* load long effective stack address (load int32 offset and add) */
|
/* load long effective stack address (load int32 offset and add) */
|
||||||
{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, C_NONE, 34, 8, REGSP, LFROM, 0},
|
{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, C_NONE, 34, 8, REGSP, LFROM, 0},
|
||||||
|
|
||||||
// Move a large constant to a vector register.
|
// Load a large constant into a vector register.
|
||||||
{AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, C_NONE, 101, 4, 0, LFROM128, 0},
|
{AVMOVS, C_ADDR, C_NONE, C_NONE, C_VREG, C_NONE, 65, 12, 0, 0, 0},
|
||||||
{AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, C_NONE, 101, 4, 0, LFROM, 0},
|
{AVMOVD, C_ADDR, C_NONE, C_NONE, C_VREG, C_NONE, 65, 12, 0, 0, 0},
|
||||||
{AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, C_NONE, 101, 4, 0, LFROM, 0},
|
{AVMOVQ, C_ADDR, C_NONE, C_NONE, C_VREG, C_NONE, 65, 12, 0, 0, 0},
|
||||||
|
|
||||||
/* jump operations */
|
/* jump operations */
|
||||||
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0},
|
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0},
|
||||||
@ -1117,9 +1116,6 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
if o.flag&LFROM != 0 {
|
if o.flag&LFROM != 0 {
|
||||||
c.addpool(p, &p.From)
|
c.addpool(p, &p.From)
|
||||||
}
|
}
|
||||||
if o.flag&LFROM128 != 0 {
|
|
||||||
c.addpool128(p, &p.From, p.GetFrom3())
|
|
||||||
}
|
|
||||||
if o.flag<O != 0 {
|
if o.flag<O != 0 {
|
||||||
c.addpool(p, &p.To)
|
c.addpool(p, &p.To)
|
||||||
}
|
}
|
||||||
@ -1321,34 +1317,6 @@ func (c *ctxt7) flushpool(p *obj.Prog) {
|
|||||||
c.pool.start = 0
|
c.pool.start = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// addpool128 adds a 128-bit constant to literal pool by two consecutive DWORD
|
|
||||||
// instructions, the 128-bit constant is formed by ah.Offset<<64+al.Offset.
|
|
||||||
func (c *ctxt7) addpool128(p *obj.Prog, al, ah *obj.Addr) {
|
|
||||||
q := c.newprog()
|
|
||||||
q.As = ADWORD
|
|
||||||
q.To.Type = obj.TYPE_CONST
|
|
||||||
q.To.Offset = al.Offset // q.Pc is lower than t.Pc, so al.Offset is stored in q.
|
|
||||||
|
|
||||||
t := c.newprog()
|
|
||||||
t.As = ADWORD
|
|
||||||
t.To.Type = obj.TYPE_CONST
|
|
||||||
t.To.Offset = ah.Offset
|
|
||||||
|
|
||||||
q.Link = t
|
|
||||||
|
|
||||||
if c.blitrl == nil {
|
|
||||||
c.blitrl = q
|
|
||||||
c.pool.start = uint32(p.Pc)
|
|
||||||
} else {
|
|
||||||
c.elitrl.Link = q
|
|
||||||
}
|
|
||||||
|
|
||||||
c.elitrl = t
|
|
||||||
c.pool.size = roundUp(c.pool.size, 16)
|
|
||||||
c.pool.size += 16
|
|
||||||
p.Pool = q
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MOVD foo(SB), R is actually
|
* MOVD foo(SB), R is actually
|
||||||
* MOVD addr, REGTMP
|
* MOVD addr, REGTMP
|
||||||
@ -1365,8 +1333,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) {
|
|||||||
sz := 4
|
sz := 4
|
||||||
|
|
||||||
if a.Type == obj.TYPE_CONST {
|
if a.Type == obj.TYPE_CONST {
|
||||||
if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD {
|
if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) {
|
||||||
// out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit.
|
// out of range -0x80000000 ~ 0xffffffff, must store 64-bit.
|
||||||
t.As = ADWORD
|
t.As = ADWORD
|
||||||
sz = 8
|
sz = 8
|
||||||
} // else store 32-bit
|
} // else store 32-bit
|
||||||
@ -5660,9 +5628,6 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||||||
o1 = q<<30 | 0xe<<24 | len<<13 | op<<12
|
o1 = q<<30 | 0xe<<24 | len<<13 | op<<12
|
||||||
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
|
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
|
||||||
|
|
||||||
case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool.
|
|
||||||
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
|
|
||||||
|
|
||||||
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
|
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
|
||||||
o1 = c.opirr(p, p.As)
|
o1 = c.opirr(p, p.As)
|
||||||
rf := p.Reg
|
rf := p.Reg
|
||||||
@ -7187,13 +7152,13 @@ func (c *ctxt7) opldr(p *obj.Prog, a obj.As) uint32 {
|
|||||||
case AMOVBU:
|
case AMOVBU:
|
||||||
return LDSTR(0, 0, 1)
|
return LDSTR(0, 0, 1)
|
||||||
|
|
||||||
case AFMOVS:
|
case AFMOVS, AVMOVS:
|
||||||
return LDSTR(2, 1, 1)
|
return LDSTR(2, 1, 1)
|
||||||
|
|
||||||
case AFMOVD:
|
case AFMOVD, AVMOVD:
|
||||||
return LDSTR(3, 1, 1)
|
return LDSTR(3, 1, 1)
|
||||||
|
|
||||||
case AFMOVQ:
|
case AFMOVQ, AVMOVQ:
|
||||||
return LDSTR(0, 1, 3)
|
return LDSTR(0, 1, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -329,8 +329,33 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rewrite float constants to values stored in memory.
|
// Rewrite float and vector constants to values stored in memory.
|
||||||
switch p.As {
|
switch p.As {
|
||||||
|
case AVMOVS:
|
||||||
|
if p.From.Type == obj.TYPE_CONST {
|
||||||
|
p.From.Type = obj.TYPE_MEM
|
||||||
|
p.From.Sym = c.ctxt.Int32Sym(p.From.Offset)
|
||||||
|
p.From.Name = obj.NAME_EXTERN
|
||||||
|
p.From.Offset = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
case AVMOVD:
|
||||||
|
if p.From.Type == obj.TYPE_CONST {
|
||||||
|
p.From.Type = obj.TYPE_MEM
|
||||||
|
p.From.Sym = c.ctxt.Int64Sym(p.From.Offset)
|
||||||
|
p.From.Name = obj.NAME_EXTERN
|
||||||
|
p.From.Offset = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
case AVMOVQ:
|
||||||
|
if p.From.Type == obj.TYPE_CONST {
|
||||||
|
p.From.Type = obj.TYPE_MEM
|
||||||
|
p.From.Sym = c.ctxt.Int128Sym(p.GetFrom3().Offset, p.From.Offset)
|
||||||
|
p.From.Name = obj.NAME_EXTERN
|
||||||
|
p.From.Offset = 0
|
||||||
|
p.RestArgs = nil
|
||||||
|
}
|
||||||
|
|
||||||
case AFMOVS:
|
case AFMOVS:
|
||||||
if p.From.Type == obj.TYPE_FCONST {
|
if p.From.Type == obj.TYPE_FCONST {
|
||||||
f64 := p.From.Val.(float64)
|
f64 := p.From.Val.(float64)
|
||||||
@ -365,8 +390,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
|||||||
p.From.Name = obj.NAME_EXTERN
|
p.From.Name = obj.NAME_EXTERN
|
||||||
p.From.Offset = 0
|
p.From.Offset = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.ctxt.Flag_dynlink {
|
if c.ctxt.Flag_dynlink {
|
||||||
|
@ -36,6 +36,7 @@ import (
|
|||||||
"cmd/internal/notsha256"
|
"cmd/internal/notsha256"
|
||||||
"cmd/internal/objabi"
|
"cmd/internal/objabi"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"internal/buildcfg"
|
"internal/buildcfg"
|
||||||
"log"
|
"log"
|
||||||
@ -162,6 +163,18 @@ func (ctxt *Link) Float64Sym(f float64) *LSym {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ctxt *Link) Int32Sym(i int64) *LSym {
|
||||||
|
name := fmt.Sprintf("$i32.%08x", uint64(i))
|
||||||
|
return ctxt.LookupInit(name, func(s *LSym) {
|
||||||
|
s.Size = 4
|
||||||
|
s.WriteInt(ctxt, 0, 4, i)
|
||||||
|
s.Type = objabi.SRODATA
|
||||||
|
s.Set(AttrLocal, true)
|
||||||
|
s.Set(AttrContentAddressable, true)
|
||||||
|
ctxt.constSyms = append(ctxt.constSyms, s)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (ctxt *Link) Int64Sym(i int64) *LSym {
|
func (ctxt *Link) Int64Sym(i int64) *LSym {
|
||||||
name := fmt.Sprintf("$i64.%016x", uint64(i))
|
name := fmt.Sprintf("$i64.%016x", uint64(i))
|
||||||
return ctxt.LookupInit(name, func(s *LSym) {
|
return ctxt.LookupInit(name, func(s *LSym) {
|
||||||
@ -174,6 +187,24 @@ func (ctxt *Link) Int64Sym(i int64) *LSym {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ctxt *Link) Int128Sym(hi, lo int64) *LSym {
|
||||||
|
name := fmt.Sprintf("$i128.%016x%016x", uint64(hi), uint64(lo))
|
||||||
|
return ctxt.LookupInit(name, func(s *LSym) {
|
||||||
|
s.Size = 16
|
||||||
|
if ctxt.Arch.ByteOrder == binary.LittleEndian {
|
||||||
|
s.WriteInt(ctxt, 0, 8, lo)
|
||||||
|
s.WriteInt(ctxt, 8, 8, hi)
|
||||||
|
} else {
|
||||||
|
s.WriteInt(ctxt, 0, 8, hi)
|
||||||
|
s.WriteInt(ctxt, 8, 8, lo)
|
||||||
|
}
|
||||||
|
s.Type = objabi.SRODATA
|
||||||
|
s.Set(AttrLocal, true)
|
||||||
|
s.Set(AttrContentAddressable, true)
|
||||||
|
ctxt.constSyms = append(ctxt.constSyms, s)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// GCLocalsSym generates a content-addressable sym containing data.
|
// GCLocalsSym generates a content-addressable sym containing data.
|
||||||
func (ctxt *Link) GCLocalsSym(data []byte) *LSym {
|
func (ctxt *Link) GCLocalsSym(data []byte) *LSym {
|
||||||
sum := notsha256.Sum256(data)
|
sum := notsha256.Sum256(data)
|
||||||
|
Loading…
Reference in New Issue
Block a user