1
0
mirror of https://github.com/golang/go synced 2024-11-15 01:30:31 -07:00

cmd/asm: add support for loong64 FMA instructions

Add support for assembling the FMA instructions present in the LoongArch
base ISA v1.00. This requires adding a new instruction format and making
use of a third source operand, which is put in RestArgs[0].

The single-precision instructions have the `.s` prefix in their official
mnemonics, and similar Go asm instructions all have `S` prefix for the
other architectures having FMA support, but in this change they instead
have `F` prefix in Go asm because loong64 currently follows the mips
backends in the naming convention. This could be changed later because
FMA is fully expressible in pure Go, making it unlikely to have to hand-
write such assembly in the wild.

Example mapping between actual encoding and Go asm syntax:

fmadd.s fd, fj, fk, fa -> FMADDF fa, fk, fj, fd
(prog.From = fa, prog.Reg = fk, prog.RestArgs[0] = fj and prog.To = fd)

fmadd.s fd, fd, fk, fa -> FMADDF fa, fk, fd
(prog.From = fa, prog.Reg = fk and prog.To = fd)

This patch is a copy of CL 477716.
Co-authored-by: WANG Xuerui <git@xen0n.name>

Change-Id: I9b4e4c601d6c5a854ee238f085849666e4faf090
Reviewed-on: https://go-review.googlesource.com/c/go/+/623877
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
This commit is contained in:
Xiaolin Zhao 2024-10-31 16:57:23 +08:00 committed by abner chenc
parent 4a0d5d601e
commit 3ae5ff2a27
4 changed files with 82 additions and 0 deletions

View File

@ -303,6 +303,23 @@ lable2:
AMMINDBWU R14, (R13), R12 // ac397138
AMMINDBVU R14, (R13), R12 // acb97138
FMADDF F2, F14, F9, F16 // 30391108
FMADDD F11, F20, F23, F12 // ecd22508
FMSUBF F3, F11, F31, F22 // f6af5108
FMSUBD F13, F30, F9, F15 // 2ff96608
FNMADDF F27, F11, F5, F21 // b5ac9d08
FNMADDD F29, F14, F27, F6 // 66bbae08
FNMSUBF F17, F8, F12, F8 // 88a1d808
FNMSUBD F29, F21, F3, F17 // 71d4ee08
FMADDF F2, F14, F9 // 29391108
FMADDD F11, F20, F23 // f7d22508
FMSUBF F3, F11, F31 // ffaf5108
FMSUBD F13, F30, F9 // 29f96608
FNMADDF F27, F11, F5 // a5ac9d08
FNMADDD F29, F14, F27 // 7bbbae08
FNMSUBF F17, F8, F12 // 8ca1d808
FNMSUBD F29, F21, F3 // 63d4ee08
FMINF F4, F5, F6 // a6900a01
FMINF F4, F5 // a5900a01
FMIND F4, F5, F6 // a6100b01

View File

@ -493,6 +493,16 @@ const (
ARDTIMED
ACPUCFG
// 3.2.1.2
AFMADDF
AFMADDD
AFMSUBF
AFMSUBD
AFNMADDF
AFNMADDD
AFNMSUBF
AFNMSUBD
// 3.2.1.3
AFMINF
AFMIND

View File

@ -211,6 +211,14 @@ var Anames = []string{
"RDTIMEHW",
"RDTIMED",
"CPUCFG",
"FMADDF",
"FMADDD",
"FMSUBF",
"FMSUBD",
"FNMADDF",
"FNMADDD",
"FNMSUBF",
"FNMSUBD",
"FMINF",
"FMIND",
"FMAXF",

View File

@ -92,6 +92,9 @@ var optab = []Optab{
{AMOVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
{AMOVD, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
{AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 39, 4, 0, 0},
{AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 39, 4, 0, 0},
{AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
{AMOVWU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
{AMOVV, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
@ -1101,6 +1104,15 @@ func buildop(ctxt *obj.Link) {
opset(AFSCALEBF, r0)
opset(AFSCALEBD, r0)
case AFMADDF:
opset(AFMADDD, r0)
opset(AFMSUBF, r0)
opset(AFMSUBD, r0)
opset(AFNMADDF, r0)
opset(AFNMADDD, r0)
opset(AFNMSUBF, r0)
opset(AFNMSUBD, r0)
case AAND:
opset(AOR, r0)
opset(AXOR, r0)
@ -1257,6 +1269,10 @@ func buildop(ctxt *obj.Link) {
}
}
func OP_RRRR(op uint32, r1 uint32, r2 uint32, r3 uint32, r4 uint32) uint32 {
return op | (r1&0x1F)<<15 | (r2&0x1F)<<10 | (r3&0x1F)<<5 | (r4 & 0x1F)
}
// r1 -> rk
// r2 -> rj
// r3 -> rd
@ -1656,6 +1672,13 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
case 39: // fmadd r1, r2, [r3], r4
r := int(p.To.Reg)
if len(p.RestArgs) > 0 {
r = int(p.GetFrom3().Reg)
}
o1 = OP_RRRR(c.oprrrr(p.As), uint32(p.From.Reg), uint32(p.Reg), uint32(r), uint32(p.To.Reg))
case 40: // word
o1 = uint32(c.regoff(&p.From))
@ -1870,6 +1893,30 @@ func (c *ctxt0) regoff(a *obj.Addr) int32 {
return int32(c.vregoff(a))
}
func (c *ctxt0) oprrrr(a obj.As) uint32 {
switch a {
case AFMADDF:
return 0x81 << 20 // fmadd.s
case AFMADDD:
return 0x82 << 20 // fmadd.d
case AFMSUBF:
return 0x85 << 20 // fmsub.s
case AFMSUBD:
return 0x86 << 20 // fmsub.d
case AFNMADDF:
return 0x89 << 20 // fnmadd.f
case AFNMADDD:
return 0x8a << 20 // fnmadd.d
case AFNMSUBF:
return 0x8d << 20 // fnmsub.s
case AFNMSUBD:
return 0x8e << 20 // fnmsub.d
}
c.ctxt.Diag("bad rrrr opcode %v", a)
return 0
}
func (c *ctxt0) oprrr(a obj.As) uint32 {
switch a {
case AADD: