mirror of
https://github.com/golang/go
synced 2024-11-17 16:34:43 -07:00
cmd/asm: fix the issue of moving 128-bit integers to vector registers on arm64
The CL 249758 added `FMOVQ $vcon, Vd` instruction and assembler used 128-bit simd literal-loading to load `$vcon` from pool into 128-bit vector register `Vd`. Because Go does not have 128-bit integers for now, the assembler will report an error of `immediate out of range` when assembleing `FMOVQ $0x123456789abcdef0123456789abcdef, V0` instruction. This patch lets 128-bit integers take two 64-bit operands, for the high and low parts separately and adds `VMOVQ $hi, $lo, Vd` instruction to move `$hi<<64+$lo' into 128-bit register `Vd`. In addition, this patch renames `FMOVQ/FMOVD/FMOVS` ops to 'VMOVQ/VMOVD/VMOVS' and uses them to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively Update the go doc. Fixes #40725 Change-Id: Ia3c83bb6463f104d2bee960905053a97299e0a3a Reviewed-on: https://go-review.googlesource.com/c/go/+/255900 Trust: fannie zhang <Fannie.Zhang@arm.com> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
ea106cc07a
commit
fa04d488bd
@ -82,6 +82,17 @@ func IsARM64STLXR(op obj.As) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsARM64TBL reports whether the op (as defined by an arm64.A*
|
||||||
|
// constant) is one of the TBL-like instructions and one of its
|
||||||
|
// inputs does not fit into prog.Reg, so require special handling.
|
||||||
|
func IsARM64TBL(op obj.As) bool {
|
||||||
|
switch op {
|
||||||
|
case arm64.AVTBL, arm64.AVMOVQ:
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// ARM64Suffix handles the special suffix for the ARM64.
|
// ARM64Suffix handles the special suffix for the ARM64.
|
||||||
// It returns a boolean to indicate success; failure means
|
// It returns a boolean to indicate success; failure means
|
||||||
// cond was unrecognized.
|
// cond was unrecognized.
|
||||||
@ -125,13 +136,6 @@ func arm64RegisterNumber(name string, n int16) (int16, bool) {
|
|||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsARM64TBL reports whether the op (as defined by an arm64.A*
|
|
||||||
// constant) is one of the table lookup instructions that require special
|
|
||||||
// handling.
|
|
||||||
func IsARM64TBL(op obj.As) bool {
|
|
||||||
return op == arm64.AVTBL
|
|
||||||
}
|
|
||||||
|
|
||||||
// ARM64RegisterExtension parses an ARM64 register with extension or arrangement.
|
// ARM64RegisterExtension parses an ARM64 register with extension or arrangement.
|
||||||
func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error {
|
func ARM64RegisterExtension(a *obj.Addr, ext string, reg, num int16, isAmount, isIndex bool) error {
|
||||||
Rnum := (reg & 31) + int16(num<<5)
|
Rnum := (reg & 31) + int16(num<<5)
|
||||||
|
@ -622,8 +622,9 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
|
|||||||
prog.SetFrom3(a[1])
|
prog.SetFrom3(a[1])
|
||||||
prog.To = a[2]
|
prog.To = a[2]
|
||||||
case sys.ARM64:
|
case sys.ARM64:
|
||||||
|
switch {
|
||||||
|
case arch.IsARM64STLXR(op):
|
||||||
// ARM64 instructions with one input and two outputs.
|
// ARM64 instructions with one input and two outputs.
|
||||||
if arch.IsARM64STLXR(op) {
|
|
||||||
prog.From = a[0]
|
prog.From = a[0]
|
||||||
prog.To = a[1]
|
prog.To = a[1]
|
||||||
if a[2].Type != obj.TYPE_REG {
|
if a[2].Type != obj.TYPE_REG {
|
||||||
@ -631,20 +632,16 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
prog.RegTo2 = a[2].Reg
|
prog.RegTo2 = a[2].Reg
|
||||||
break
|
case arch.IsARM64TBL(op):
|
||||||
}
|
// one of its inputs does not fit into prog.Reg.
|
||||||
if arch.IsARM64TBL(op) {
|
|
||||||
prog.From = a[0]
|
prog.From = a[0]
|
||||||
if a[1].Type != obj.TYPE_REGLIST {
|
|
||||||
p.errorf("%s: expected list; found %s", op, obj.Dconv(prog, &a[1]))
|
|
||||||
}
|
|
||||||
prog.SetFrom3(a[1])
|
prog.SetFrom3(a[1])
|
||||||
prog.To = a[2]
|
prog.To = a[2]
|
||||||
break
|
default:
|
||||||
}
|
|
||||||
prog.From = a[0]
|
prog.From = a[0]
|
||||||
prog.Reg = p.getRegister(prog, op, &a[1])
|
prog.Reg = p.getRegister(prog, op, &a[1])
|
||||||
prog.To = a[2]
|
prog.To = a[2]
|
||||||
|
}
|
||||||
case sys.I386:
|
case sys.I386:
|
||||||
prog.From = a[0]
|
prog.From = a[0]
|
||||||
prog.SetFrom3(a[1])
|
prog.SetFrom3(a[1])
|
||||||
|
6
src/cmd/asm/internal/asm/testdata/arm64.s
vendored
6
src/cmd/asm/internal/asm/testdata/arm64.s
vendored
@ -218,8 +218,10 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||||||
FMOVD $(28.0), F4 // 0490671e
|
FMOVD $(28.0), F4 // 0490671e
|
||||||
|
|
||||||
// move a large constant to a Vd.
|
// move a large constant to a Vd.
|
||||||
FMOVD $0x8040201008040201, V20 // FMOVD $-9205322385119247871, V20
|
VMOVS $0x80402010, V11 // VMOVS $2151686160, V11
|
||||||
FMOVQ $0x8040201008040202, V29 // FMOVQ $-9205322385119247870, V29
|
VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20
|
||||||
|
VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10
|
||||||
|
VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20
|
||||||
|
|
||||||
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
|
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
|
||||||
FMOVS (R2)(R6<<2), F4 // 447866bc
|
FMOVS (R2)(R6<<2), F4 // 447866bc
|
||||||
|
@ -875,7 +875,9 @@ const (
|
|||||||
AFLDPS
|
AFLDPS
|
||||||
AFMOVD
|
AFMOVD
|
||||||
AFMOVS
|
AFMOVS
|
||||||
AFMOVQ
|
AVMOVQ
|
||||||
|
AVMOVD
|
||||||
|
AVMOVS
|
||||||
AFMULD
|
AFMULD
|
||||||
AFMULS
|
AFMULS
|
||||||
AFNEGD
|
AFNEGD
|
||||||
|
@ -381,7 +381,9 @@ var Anames = []string{
|
|||||||
"FLDPS",
|
"FLDPS",
|
||||||
"FMOVD",
|
"FMOVD",
|
||||||
"FMOVS",
|
"FMOVS",
|
||||||
"FMOVQ",
|
"VMOVQ",
|
||||||
|
"VMOVD",
|
||||||
|
"VMOVS",
|
||||||
"FMULD",
|
"FMULD",
|
||||||
"FMULS",
|
"FMULS",
|
||||||
"FNEGD",
|
"FNEGD",
|
||||||
|
@ -260,8 +260,9 @@ func MOVCONST(d int64, s int, rt int) uint32 {
|
|||||||
const (
|
const (
|
||||||
// Optab.flag
|
// Optab.flag
|
||||||
LFROM = 1 << 0 // p.From uses constant pool
|
LFROM = 1 << 0 // p.From uses constant pool
|
||||||
LTO = 1 << 1 // p.To uses constant pool
|
LFROM3 = 1 << 1 // p.From3 uses constant pool
|
||||||
NOTUSETMP = 1 << 2 // p expands to multiple instructions, but does NOT use REGTMP
|
LTO = 1 << 2 // p.To uses constant pool
|
||||||
|
NOTUSETMP = 1 << 3 // p expands to multiple instructions, but does NOT use REGTMP
|
||||||
)
|
)
|
||||||
|
|
||||||
var optab = []Optab{
|
var optab = []Optab{
|
||||||
@ -397,10 +398,10 @@ var optab = []Optab{
|
|||||||
/* load long effective stack address (load int32 offset and add) */
|
/* load long effective stack address (load int32 offset and add) */
|
||||||
{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0},
|
{AMOVD, C_LACON, C_NONE, C_NONE, C_RSP, 34, 8, REGSP, LFROM, 0},
|
||||||
|
|
||||||
// Move a large constant to a Vn.
|
// Move a large constant to a vector register.
|
||||||
{AFMOVQ, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
|
{AVMOVQ, C_VCON, C_NONE, C_VCON, C_VREG, 101, 4, 0, LFROM | LFROM3, 0},
|
||||||
{AFMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
|
{AVMOVD, C_VCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
|
||||||
{AFMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
|
{AVMOVS, C_LCON, C_NONE, C_NONE, C_VREG, 101, 4, 0, LFROM, 0},
|
||||||
|
|
||||||
/* jump operations */
|
/* jump operations */
|
||||||
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
|
{AB, C_NONE, C_NONE, C_NONE, C_SBRA, 5, 4, 0, 0, 0},
|
||||||
@ -950,13 +951,14 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
|||||||
c.ctxt.Diag("zero-width instruction\n%v", p)
|
c.ctxt.Diag("zero-width instruction\n%v", p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch o.flag & (LFROM | LTO) {
|
if o.flag&LFROM != 0 {
|
||||||
case LFROM:
|
|
||||||
c.addpool(p, &p.From)
|
c.addpool(p, &p.From)
|
||||||
|
}
|
||||||
case LTO:
|
if o.flag&LFROM3 != 0 {
|
||||||
|
c.addpool(p, p.GetFrom3())
|
||||||
|
}
|
||||||
|
if o.flag<O != 0 {
|
||||||
c.addpool(p, &p.To)
|
c.addpool(p, &p.To)
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */
|
if p.As == AB || p.As == obj.ARET || p.As == AERET { /* TODO: other unconditional operations */
|
||||||
@ -1174,8 +1176,8 @@ func (c *ctxt7) addpool(p *obj.Prog, a *obj.Addr) {
|
|||||||
sz := 4
|
sz := 4
|
||||||
|
|
||||||
if a.Type == obj.TYPE_CONST {
|
if a.Type == obj.TYPE_CONST {
|
||||||
if lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit)) {
|
if (lit != int64(int32(lit)) && uint64(lit) != uint64(uint32(lit))) || p.As == AVMOVQ || p.As == AVMOVD {
|
||||||
// out of range -0x80000000 ~ 0xffffffff, must store 64-bit
|
// out of range -0x80000000 ~ 0xffffffff or VMOVQ or VMOVD operand, must store 64-bit.
|
||||||
t.As = ADWORD
|
t.As = ADWORD
|
||||||
sz = 8
|
sz = 8
|
||||||
} // else store 32-bit
|
} // else store 32-bit
|
||||||
@ -2675,7 +2677,7 @@ func buildop(ctxt *obj.Link) {
|
|||||||
case AFCSELD:
|
case AFCSELD:
|
||||||
oprangeset(AFCSELS, t)
|
oprangeset(AFCSELS, t)
|
||||||
|
|
||||||
case AFMOVS, AFMOVD, AFMOVQ:
|
case AFMOVS, AFMOVD, AVMOVQ, AVMOVD, AVMOVS:
|
||||||
break
|
break
|
||||||
|
|
||||||
case AFCVTZSD:
|
case AFCVTZSD:
|
||||||
@ -5142,7 +5144,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||||||
o1 = q<<30 | 0xe<<24 | len<<13
|
o1 = q<<30 | 0xe<<24 | len<<13
|
||||||
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
|
o1 |= (uint32(rf&31) << 16) | uint32(offset&31)<<5 | uint32(rt&31)
|
||||||
|
|
||||||
case 101: // FOMVQ/FMOVD $vcon, Vd -> load from constant pool.
|
case 101: // VMOVQ $vcon1, $vcon2, Vd or VMOVD|VMOVS $vcon, Vd -> FMOVQ/FMOVD/FMOVS pool(PC), Vd: load from constant pool.
|
||||||
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
|
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
|
||||||
|
|
||||||
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
|
case 102: /* vushll, vushll2, vuxtl, vuxtl2 */
|
||||||
@ -6672,15 +6674,15 @@ func (c *ctxt7) omovlit(as obj.As, p *obj.Prog, a *obj.Addr, dr int) uint32 {
|
|||||||
} else {
|
} else {
|
||||||
fp, w := 0, 0
|
fp, w := 0, 0
|
||||||
switch as {
|
switch as {
|
||||||
case AFMOVS:
|
case AFMOVS, AVMOVS:
|
||||||
fp = 1
|
fp = 1
|
||||||
w = 0 /* 32-bit SIMD/FP */
|
w = 0 /* 32-bit SIMD/FP */
|
||||||
|
|
||||||
case AFMOVD:
|
case AFMOVD, AVMOVD:
|
||||||
fp = 1
|
fp = 1
|
||||||
w = 1 /* 64-bit SIMD/FP */
|
w = 1 /* 64-bit SIMD/FP */
|
||||||
|
|
||||||
case AFMOVQ:
|
case AVMOVQ:
|
||||||
fp = 1
|
fp = 1
|
||||||
w = 2 /* 128-bit SIMD/FP */
|
w = 2 /* 128-bit SIMD/FP */
|
||||||
|
|
||||||
|
@ -86,6 +86,16 @@ In the following example, PCALIGN at the entry of the function Add will align it
|
|||||||
MOVD $1, R1
|
MOVD $1, R1
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
7. Move large constants to vector registers.
|
||||||
|
|
||||||
|
Go asm uses VMOVQ/VMOVD/VMOVS to move 128-bit, 64-bit and 32-bit constants into vector registers, respectively.
|
||||||
|
And for a 128-bit interger, it take two 64-bit operands, for the high and low parts separately.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
VMOVS $0x11223344, V0
|
||||||
|
VMOVD $0x1122334455667788, V1
|
||||||
|
VMOVQ $0x1122334455667788, $8877665544332211, V2 // V2=0x11223344556677888877665544332211
|
||||||
|
|
||||||
Special Cases.
|
Special Cases.
|
||||||
|
|
||||||
(1) umov is written as VMOV.
|
(1) umov is written as VMOV.
|
||||||
|
Loading…
Reference in New Issue
Block a user