mirror of
https://github.com/golang/go
synced 2024-11-14 17:30:29 -07:00
cmd/internal/obj/loong64: add {V,XV}LD/{V,XV}LDX/{V,XV}ST/{V,XV}STX instructions support
This CL adding primitive asm support of Loong64 LSX [1] and LASX [2], by introducing new sets of register V0-V31 (C_VREG), X0-X31 (C_XREG) and 8 new instructions. On Loong64, VLD,XVLD,VST,XVST implement vector memory access operations using immediate values offset. VLDX, XVLDX, VSTX, XVSTX implement vector memory access operations using register offset. Go asm syntax: VMOVQ n(RJ), RV (128bit vector load) XVMOVQ n(RJ), RX (256bit vector load) VMOVQ RV, n(RJ) (128bit vector store) XVMOVQ RX, n(RJ) (256bit vector store) VMOVQ (RJ)(RK), RV (128bit vector load) XVMOVQ (RJ)(RK), RX (256bit vector load) VMOVQ RV, (RJ)(RK) (128bit vector store) XVMOVQ RX, (RJ)(RK) (256bit vector store) Equivalent platform assembler syntax: vld vd, rj, si12 xvld xd, rj, si12 vst vd, rj, si12 xvst xd, rj, si12 vldx vd, rj, rk xvldx xd, rj, rk vstx vd, rj, rk xvstx xd, rj, rk [1]: LSX: Loongson SIMD Extension, 128bit [2]: LASX: Loongson Advanced SIMD Extension, 256bit Change-Id: Ibaf5ddfd29b77670c3c44cc32bead36b2c8b8003 Reviewed-on: https://go-review.googlesource.com/c/go/+/616075 Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn> Reviewed-by: Meidan Li <limeidan@loongson.cn> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
ac345fb7e7
commit
751a817ccc
@ -520,15 +520,27 @@ func archLoong64(linkArch *obj.LinkArch) *Arch {
|
||||
for i := loong64.REG_R0; i <= loong64.REG_R31; i++ {
|
||||
register[obj.Rconv(i)] = int16(i)
|
||||
}
|
||||
|
||||
for i := loong64.REG_F0; i <= loong64.REG_F31; i++ {
|
||||
register[obj.Rconv(i)] = int16(i)
|
||||
}
|
||||
|
||||
for i := loong64.REG_FCSR0; i <= loong64.REG_FCSR31; i++ {
|
||||
register[obj.Rconv(i)] = int16(i)
|
||||
}
|
||||
|
||||
for i := loong64.REG_FCC0; i <= loong64.REG_FCC31; i++ {
|
||||
register[obj.Rconv(i)] = int16(i)
|
||||
}
|
||||
|
||||
for i := loong64.REG_V0; i <= loong64.REG_V31; i++ {
|
||||
register[obj.Rconv(i)] = int16(i)
|
||||
}
|
||||
|
||||
for i := loong64.REG_X0; i <= loong64.REG_X31; i++ {
|
||||
register[obj.Rconv(i)] = int16(i)
|
||||
}
|
||||
|
||||
// Pseudo-registers.
|
||||
register["SB"] = RSB
|
||||
register["FP"] = RFP
|
||||
@ -541,6 +553,8 @@ func archLoong64(linkArch *obj.LinkArch) *Arch {
|
||||
"FCSR": true,
|
||||
"FCC": true,
|
||||
"R": true,
|
||||
"V": true,
|
||||
"X": true,
|
||||
}
|
||||
|
||||
instructions := make(map[string]obj.As)
|
||||
|
@ -66,6 +66,14 @@ func loong64RegisterNumber(name string, n int16) (int16, bool) {
|
||||
if 0 <= n && n <= 31 {
|
||||
return loong64.REG_R0 + n, true
|
||||
}
|
||||
case "V":
|
||||
if 0 <= n && n <= 31 {
|
||||
return loong64.REG_V0 + n, true
|
||||
}
|
||||
case "X":
|
||||
if 0 <= n && n <= 31 {
|
||||
return loong64.REG_X0 + n, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
32
src/cmd/asm/internal/asm/testdata/loong64enc1.s
vendored
32
src/cmd/asm/internal/asm/testdata/loong64enc1.s
vendored
@ -401,3 +401,35 @@ lable2:
|
||||
FSCALEBD F4, F5, F6 // a6101101
|
||||
FLOGBF F4, F5 // 85241401
|
||||
FLOGBD F4, F5 // 85281401
|
||||
|
||||
// VSTX/VLDX/XVSTX/XVLDX instructions
|
||||
VMOVQ V2, (R5)(R5) // a2144438
|
||||
VMOVQ (R4)(R5), V2 // 82144038
|
||||
XVMOVQ X2, (R4)(R5) // 82144c38
|
||||
XVMOVQ (R4)(R5), X2 // 82144838
|
||||
|
||||
// VST/VLD/XVST/XVLD instructions
|
||||
VMOVQ V2, (R4) // 8200402c
|
||||
VMOVQ V2, 3(R4) // 820c402c
|
||||
VMOVQ V2, 2040(R4) // 82e05f2c
|
||||
VMOVQ V2, -2040(R4) // 8220602c
|
||||
VMOVQ V2, y+16(FP) // 0260402c
|
||||
VMOVQ V2, x+2030(FP) // 02d85f2c
|
||||
VMOVQ (R4), V2 // 8200002c
|
||||
VMOVQ 3(R4), V2 // 820c002c
|
||||
VMOVQ 2044(R4), V2 // 82f01f2c
|
||||
VMOVQ -2044(R4), V2 // 8210202c
|
||||
VMOVQ y+16(FP), V2 // 0260002c
|
||||
VMOVQ x+2030(FP), V2 // 02d81f2c
|
||||
XVMOVQ X2, (R4) // 8200c02c
|
||||
XVMOVQ X3, 3(R4) // 830cc02c
|
||||
XVMOVQ X4, 2040(R4) // 84e0df2c
|
||||
XVMOVQ X5, -2040(R4) // 8520e02c
|
||||
XVMOVQ X6, y+16(FP) // 0660c02c
|
||||
XVMOVQ X7, x+2030(FP) // 07d8df2c
|
||||
XVMOVQ (R4), X2 // 8200802c
|
||||
XVMOVQ 3(R4), X3 // 830c802c
|
||||
XVMOVQ 2044(R4), X4 // 84f09f2c
|
||||
XVMOVQ -2044(R4), X5 // 8510a02c
|
||||
XVMOVQ y+16(FP), X6 // 0660802c
|
||||
XVMOVQ x+2030(FP), X7 // 07d89f2c
|
||||
|
@ -15,6 +15,8 @@ const (
|
||||
NSYM = 50
|
||||
NREG = 32 // number of general registers
|
||||
NFREG = 32 // number of floating point registers
|
||||
NVREG = 32 // number of LSX registers
|
||||
NXREG = 32 // number of LASX registers
|
||||
)
|
||||
|
||||
const (
|
||||
@ -150,7 +152,75 @@ const (
|
||||
REG_FCC30
|
||||
REG_FCC31
|
||||
|
||||
REG_LAST = REG_FCC31 // the last defined register
|
||||
// LSX: 128-bit vector register
|
||||
REG_V0
|
||||
REG_V1
|
||||
REG_V2
|
||||
REG_V3
|
||||
REG_V4
|
||||
REG_V5
|
||||
REG_V6
|
||||
REG_V7
|
||||
REG_V8
|
||||
REG_V9
|
||||
REG_V10
|
||||
REG_V11
|
||||
REG_V12
|
||||
REG_V13
|
||||
REG_V14
|
||||
REG_V15
|
||||
REG_V16
|
||||
REG_V17
|
||||
REG_V18
|
||||
REG_V19
|
||||
REG_V20
|
||||
REG_V21
|
||||
REG_V22
|
||||
REG_V23
|
||||
REG_V24
|
||||
REG_V25
|
||||
REG_V26
|
||||
REG_V27
|
||||
REG_V28
|
||||
REG_V29
|
||||
REG_V30
|
||||
REG_V31
|
||||
|
||||
// LASX: 256-bit vector register
|
||||
REG_X0
|
||||
REG_X1
|
||||
REG_X2
|
||||
REG_X3
|
||||
REG_X4
|
||||
REG_X5
|
||||
REG_X6
|
||||
REG_X7
|
||||
REG_X8
|
||||
REG_X9
|
||||
REG_X10
|
||||
REG_X11
|
||||
REG_X12
|
||||
REG_X13
|
||||
REG_X14
|
||||
REG_X15
|
||||
REG_X16
|
||||
REG_X17
|
||||
REG_X18
|
||||
REG_X19
|
||||
REG_X20
|
||||
REG_X21
|
||||
REG_X22
|
||||
REG_X23
|
||||
REG_X24
|
||||
REG_X25
|
||||
REG_X26
|
||||
REG_X27
|
||||
REG_X28
|
||||
REG_X29
|
||||
REG_X30
|
||||
REG_X31
|
||||
|
||||
REG_LAST = REG_X31 // the last defined register
|
||||
|
||||
REG_SPECIAL = REG_FCSR0
|
||||
|
||||
@ -179,6 +249,9 @@ func init() {
|
||||
f(REG_R0, REG_R31, 0)
|
||||
f(REG_F0, REG_F31, 32)
|
||||
|
||||
// The lower bits of V and X registers are alias to F registers
|
||||
f(REG_V0, REG_V31, 32)
|
||||
f(REG_X0, REG_X31, 32)
|
||||
}
|
||||
|
||||
const (
|
||||
@ -199,6 +272,8 @@ const (
|
||||
C_FREG
|
||||
C_FCSRREG
|
||||
C_FCCREG
|
||||
C_VREG
|
||||
C_XREG
|
||||
C_ZCON
|
||||
C_SCON // 12 bit signed
|
||||
C_UCON // 32 bit signed, low 12 bits 0
|
||||
@ -549,6 +624,10 @@ const (
|
||||
AFTINTRNEVF
|
||||
AFTINTRNEVD
|
||||
|
||||
// LSX and LASX memory access instructions
|
||||
AVMOVQ
|
||||
AXVMOVQ
|
||||
|
||||
ALAST
|
||||
|
||||
// aliases
|
||||
@ -574,4 +653,10 @@ func init() {
|
||||
if REG_FCC0%32 != 0 {
|
||||
panic("REG_FCC0 is not a multiple of 32")
|
||||
}
|
||||
if REG_V0%32 != 0 {
|
||||
panic("REG_V0 is not a multiple of 32")
|
||||
}
|
||||
if REG_X0%32 != 0 {
|
||||
panic("REG_X0 is not a multiple of 32")
|
||||
}
|
||||
}
|
||||
|
@ -255,5 +255,7 @@ var Anames = []string{
|
||||
"FTINTRNEWD",
|
||||
"FTINTRNEVF",
|
||||
"FTINTRNEVD",
|
||||
"VMOVQ",
|
||||
"XVMOVQ",
|
||||
"LAST",
|
||||
}
|
||||
|
@ -105,6 +105,10 @@ var optab = []Optab{
|
||||
{AMOVV, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
{AMOVB, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
{AMOVBU, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
{AVMOVQ, C_VREG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
{AVMOVQ, C_VREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGZERO, 0},
|
||||
{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGZERO, 0},
|
||||
{ASC, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
{ASCV, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
|
||||
|
||||
@ -118,6 +122,10 @@ var optab = []Optab{
|
||||
{AMOVV, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{AMOVB, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{AMOVBU, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{AVMOVQ, C_SOREG, C_NONE, C_NONE, C_VREG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{AXVMOVQ, C_SOREG, C_NONE, C_NONE, C_XREG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{AVMOVQ, C_SAUTO, C_NONE, C_NONE, C_VREG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{AXVMOVQ, C_SAUTO, C_NONE, C_NONE, C_XREG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{ALL, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
|
||||
{ALLV, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
|
||||
|
||||
@ -306,6 +314,8 @@ var optab = []Optab{
|
||||
{AMOVV, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
|
||||
{AMOVF, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
|
||||
{AMOVD, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
|
||||
{AVMOVQ, C_VREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
|
||||
{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
|
||||
|
||||
/* load with extended register offset */
|
||||
{AMOVB, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
|
||||
@ -315,6 +325,8 @@ var optab = []Optab{
|
||||
{AMOVV, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
|
||||
{AMOVF, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0},
|
||||
{AMOVD, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0},
|
||||
{AVMOVQ, C_ROFF, C_NONE, C_NONE, C_VREG, C_NONE, 21, 4, 0, 0},
|
||||
{AXVMOVQ, C_ROFF, C_NONE, C_NONE, C_XREG, C_NONE, 21, 4, 0, 0},
|
||||
|
||||
{obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
|
||||
{obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0},
|
||||
@ -812,6 +824,10 @@ func (c *ctxt0) rclass(r int16) int {
|
||||
return C_FCCREG
|
||||
case REG_FCSR0 <= r && r <= REG_FCSR3:
|
||||
return C_FCSRREG
|
||||
case REG_V0 <= r && r <= REG_V31:
|
||||
return C_VREG
|
||||
case REG_X0 <= r && r <= REG_X31:
|
||||
return C_XREG
|
||||
}
|
||||
|
||||
return C_GOK
|
||||
@ -1199,6 +1215,8 @@ func buildop(ctxt *obj.Link) {
|
||||
AJAL,
|
||||
AJMP,
|
||||
AMOVWU,
|
||||
AVMOVQ,
|
||||
AXVMOVQ,
|
||||
ALL,
|
||||
ALLV,
|
||||
ASC,
|
||||
@ -2099,6 +2117,14 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
|
||||
return 0x07070 << 15 // fstx.s
|
||||
case AMOVD:
|
||||
return 0x07078 << 15 // fstx.d
|
||||
case -AVMOVQ:
|
||||
return 0x07080 << 15 // vldx
|
||||
case -AXVMOVQ:
|
||||
return 0x07090 << 15 // xvldx
|
||||
case AVMOVQ:
|
||||
return 0x07088 << 15 // vstx
|
||||
case AXVMOVQ:
|
||||
return 0x07098 << 15 // xvstx
|
||||
}
|
||||
|
||||
if a < 0 {
|
||||
@ -2386,7 +2412,14 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
|
||||
return 0x0ac << 22
|
||||
case -AMOVD:
|
||||
return 0x0ae << 22
|
||||
|
||||
case -AVMOVQ:
|
||||
return 0x0b0 << 22 // vld
|
||||
case -AXVMOVQ:
|
||||
return 0x0b2 << 22 // xvld
|
||||
case AVMOVQ:
|
||||
return 0x0b1 << 22 // vst
|
||||
case AXVMOVQ:
|
||||
return 0x0b3 << 22 // xvst
|
||||
case ASLLV:
|
||||
return 0x0041 << 16
|
||||
case ASRLV:
|
||||
|
@ -11,6 +11,8 @@ var cnames0 = []string{
|
||||
"FREG",
|
||||
"FCSRREG",
|
||||
"FCCREG",
|
||||
"VREG",
|
||||
"XREG",
|
||||
"ZCON",
|
||||
"SCON",
|
||||
"UCON",
|
||||
|
@ -8,7 +8,7 @@ GNU LoongArch64 syntax, but we can still follow the general rules to map between
|
||||
|
||||
# Instructions mnemonics mapping rules
|
||||
|
||||
1. Bit widths represented by various instruction suffixes
|
||||
1. Bit widths represented by various instruction suffixes and prefixes
|
||||
V (vlong) = 64 bit
|
||||
WU (word) = 32 bit unsigned
|
||||
W (word) = 32 bit
|
||||
@ -19,6 +19,18 @@ BU = 8 bit unsigned
|
||||
F (float) = 32 bit float
|
||||
D (double) = 64 bit float
|
||||
|
||||
V (LSX) = 128 bit
|
||||
XV (LASX) = 256 bit
|
||||
|
||||
Examples:
|
||||
|
||||
MOVB (R2), R3 // Load 8 bit memory data into R3 register
|
||||
MOVH (R2), R3 // Load 16 bit memory data into R3 register
|
||||
MOVW (R2), R3 // Load 32 bit memory data into R3 register
|
||||
MOVV (R2), R3 // Load 64 bit memory data into R3 register
|
||||
VMOVQ (R2), V1 // Load 128 bit memory data into V1 register
|
||||
XVMOVQ (R2), X1 // Load 256 bit memory data into X1 register
|
||||
|
||||
2. Align directive
|
||||
Go asm supports the PCALIGN directive, which indicates that the next instruction should
|
||||
be aligned to a specified boundary by padding with NOOP instruction. The alignment value
|
||||
@ -50,6 +62,10 @@ start:
|
||||
|
||||
2. All floating-point register names are written as Fn.
|
||||
|
||||
3. All LSX register names are written as Vn.
|
||||
|
||||
4. All LASX register names are written as Xn.
|
||||
|
||||
# Argument mapping rules
|
||||
|
||||
1. The operands appear in left-to-right assignment order.
|
||||
|
@ -22,18 +22,31 @@ func rconv(r int) string {
|
||||
// Special case.
|
||||
return "g"
|
||||
}
|
||||
|
||||
if REG_R0 <= r && r <= REG_R31 {
|
||||
return fmt.Sprintf("R%d", r-REG_R0)
|
||||
}
|
||||
|
||||
if REG_F0 <= r && r <= REG_F31 {
|
||||
return fmt.Sprintf("F%d", r-REG_F0)
|
||||
}
|
||||
|
||||
if REG_FCSR0 <= r && r <= REG_FCSR31 {
|
||||
return fmt.Sprintf("FCSR%d", r-REG_FCSR0)
|
||||
}
|
||||
|
||||
if REG_FCC0 <= r && r <= REG_FCC31 {
|
||||
return fmt.Sprintf("FCC%d", r-REG_FCC0)
|
||||
}
|
||||
|
||||
if REG_V0 <= r && r <= REG_V31 {
|
||||
return fmt.Sprintf("V%d", r-REG_V0)
|
||||
}
|
||||
|
||||
if REG_X0 <= r && r <= REG_X31 {
|
||||
return fmt.Sprintf("X%d", r-REG_X0)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("Rgok(%d)", r-obj.RBaseLOONG64)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user