From 504212bbd704a3235da58ecda5f05962c5f30acb Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Sat, 1 Apr 2023 03:43:20 +0800 Subject: [PATCH] cmd/internal/obj/loong64: add atomic memory access instructions support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AM* atomic access instruction performs a sequence of “read-modify-write” operations on a memory cell atomically. Specifically, it retrieves the old value at the specified address in memory and writes it to the general register rd, performs some simple operations on the old value in memory and the value in the general register rk, and then write the result of the operation back to the memory address pointed to by general register rj. Go asm syntax: AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB]{W/V} RK, (RJ), RD AM{MAX/MIN}[DB]{WU/VU} RK, (RJ), RD Equivalent platform assembler syntax: am{swap/add/and/or/xor/max/min}[_db].{w/d} rd, rk, rj am{max/min}[_db].{wu/du} rd, rk, rj Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html Change-Id: I99ea4553ae731675180d63691c19ef334e7e7817 Reviewed-on: https://go-review.googlesource.com/c/go/+/481577 Reviewed-by: Meidan Li Reviewed-by: sophie zhao Reviewed-by: Mauri de Souza Meneguzzo Reviewed-by: Dmitri Shuralyov Reviewed-by: Qiqi Huang Reviewed-by: WANG Xuerui LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/cmd/asm/internal/arch/loong64.go | 4 + src/cmd/asm/internal/asm/asm.go | 14 +++- .../asm/internal/asm/testdata/loong64enc1.s | 50 ++++++++++++ src/cmd/internal/obj/loong64/a.out.go | 50 ++++++++++++ src/cmd/internal/obj/loong64/anames.go | 48 +++++++++++ src/cmd/internal/obj/loong64/asm.go | 79 ++++++++++++++++++- 6 files changed, 241 insertions(+), 4 deletions(-) diff --git a/src/cmd/asm/internal/arch/loong64.go b/src/cmd/asm/internal/arch/loong64.go index 2958ee1a86..bf34a94f07 100644 --- a/src/cmd/asm/internal/arch/loong64.go +++ b/src/cmd/asm/internal/arch/loong64.go @@ -55,6 +55,10 @@ func IsLoong64RDTIME(op obj.As) bool { return false } +func IsLoong64AMO(op obj.As) bool { + return loong64.IsAtomicInst(op) +} + func loong64RegisterNumber(name string, n int16) (int16, bool) { switch name { case "F": diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 949b688bbd..b2eaa0a28d 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -669,9 +669,17 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.Reg = p.getRegister(prog, op, &a[1]) prog.To = a[2] case sys.Loong64: - prog.From = a[0] - prog.Reg = p.getRegister(prog, op, &a[1]) - prog.To = a[2] + switch { + // Loong64 atomic instructions with one input and two outputs. + case arch.IsLoong64AMO(op): + prog.From = a[0] + prog.To = a[1] + prog.RegTo2 = a[2].Reg + default: + prog.From = a[0] + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.To = a[2] + } case sys.ARM: // Special cases. if arch.IsARMSTREX(op) { diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 5191b66cce..41156febc6 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -231,3 +231,53 @@ lable2: MOVV FCC0, R4 // 04dc1401 MOVV R4, FCC0 // 80d81401 + + // Loong64 atomic memory access instructions + AMSWAPB R14, (R13), R12 // ac395c38 + AMSWAPH R14, (R13), R12 // acb95c38 + AMSWAPW R14, (R13), R12 // ac396038 + AMSWAPV R14, (R13), R12 // acb96038 + AMCASB R14, (R13), R12 // ac395838 + AMCASH R14, (R13), R12 // acb95838 + AMCASW R14, (R13), R12 // ac395938 + AMCASV R14, (R13), R12 // acb95938 + AMADDW R14, (R13), R12 // ac396138 + AMADDV R14, (R13), R12 // acb96138 + AMANDW R14, (R13), R12 // ac396238 + AMANDV R14, (R13), R12 // acb96238 + AMORW R14, (R13), R12 // ac396338 + AMORV R14, (R13), R12 // acb96338 + AMXORW R14, (R13), R12 // ac396438 + AMXORV R14, (R13), R12 // acb96438 + AMMAXW R14, (R13), R12 // ac396538 + AMMAXV R14, (R13), R12 // acb96538 + AMMINW R14, (R13), R12 // ac396638 + AMMINV R14, (R13), R12 // acb96638 + AMMAXWU R14, (R13), R12 // ac396738 + AMMAXVU R14, (R13), R12 // acb96738 + AMMINWU R14, (R13), R12 // ac396838 + AMMINVU R14, (R13), R12 // acb96838 + AMSWAPDBB R14, (R13), R12 // ac395e38 + AMSWAPDBH R14, (R13), R12 // acb95e38 + AMSWAPDBW R14, (R13), R12 // ac396938 + AMSWAPDBV R14, (R13), R12 // acb96938 + AMCASDBB R14, (R13), R12 // ac395a38 + AMCASDBH R14, (R13), R12 // acb95a38 + AMCASDBW R14, (R13), R12 // ac395b38 + AMCASDBV R14, (R13), R12 // acb95b38 + AMADDDBW R14, (R13), R12 // ac396a38 + AMADDDBV R14, (R13), R12 // acb96a38 + AMANDDBW R14, (R13), R12 // ac396b38 + AMANDDBV R14, (R13), R12 // acb96b38 + AMORDBW R14, (R13), R12 // ac396c38 + AMORDBV R14, (R13), R12 // acb96c38 + AMXORDBW R14, (R13), R12 // ac396d38 + AMXORDBV R14, (R13), R12 // acb96d38 + AMMAXDBW R14, (R13), R12 // ac396e38 + AMMAXDBV R14, (R13), R12 // acb96e38 + AMMINDBW R14, (R13), R12 // ac396f38 + AMMINDBV R14, (R13), R12 // acb96f38 + AMMAXDBWU R14, (R13), R12 // ac397038 + AMMAXDBVU R14, (R13), R12 // acb97038 + AMMINDBWU R14, (R13), R12 // ac397138 + AMMINDBVU R14, (R13), R12 // acb97138 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index d944fcfcb8..2dd2a085a8 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -394,6 +394,56 @@ const ( AMOVVF AMOVVD + // 2.2.7. Atomic Memory Access Instructions + AAMSWAPB + AAMSWAPH + AAMSWAPW + AAMSWAPV + AAMCASB + AAMCASH + AAMCASW + AAMCASV + AAMADDW + AAMADDV + AAMANDW + AAMANDV + AAMORW + AAMORV + AAMXORW + AAMXORV + AAMMAXW + AAMMAXV + AAMMINW + AAMMINV + AAMMAXWU + AAMMAXVU + AAMMINWU + AAMMINVU + AAMSWAPDBB + AAMSWAPDBH + AAMSWAPDBW + AAMSWAPDBV + AAMCASDBB + AAMCASDBH + AAMCASDBW + AAMCASDBV + AAMADDDBW + AAMADDDBV + AAMANDDBW + AAMANDDBV + AAMORDBW + AAMORDBV + AAMXORDBW + AAMXORDBV + AAMMAXDBW + AAMMAXDBV + AAMMINDBW + AAMMINDBV + AAMMAXDBWU + AAMMAXDBVU + AAMMINDBWU + AAMMINDBVU + // 2.2.10. Other Miscellaneous Instructions ARDTIMELW ARDTIMEHW diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index f61756e7a8..f21e8c9630 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -131,6 +131,54 @@ var Anames = []string{ "MOVDV", "MOVVF", "MOVVD", + "AMSWAPB", + "AMSWAPH", + "AMSWAPW", + "AMSWAPV", + "AMCASB", + "AMCASH", + "AMCASW", + "AMCASV", + "AMADDW", + "AMADDV", + "AMANDW", + "AMANDV", + "AMORW", + "AMORV", + "AMXORW", + "AMXORV", + "AMMAXW", + "AMMAXV", + "AMMINW", + "AMMINV", + "AMMAXWU", + "AMMAXVU", + "AMMINWU", + "AMMINVU", + "AMSWAPDBB", + "AMSWAPDBH", + "AMSWAPDBW", + "AMSWAPDBV", + "AMCASDBB", + "AMCASDBH", + "AMCASDBW", + "AMCASDBV", + "AMADDDBW", + "AMADDDBV", + "AMANDDBW", + "AMANDDBV", + "AMORDBW", + "AMORDBV", + "AMXORDBW", + "AMXORDBV", + "AMMAXDBW", + "AMMAXDBV", + "AMMINDBW", + "AMMINDBV", + "AMMAXDBWU", + "AMMAXDBVU", + "AMMINDBWU", + "AMMINDBVU", "RDTIMELW", "RDTIMEHW", "RDTIMED", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 5b823c09af..4eaf8bf5ed 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -356,7 +356,7 @@ var optab = []Optab{ {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, {ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, - + {AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0}, {ANOOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}, {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, @@ -374,6 +374,63 @@ var optab = []Optab{ {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0}, } +var atomicInst = map[obj.As]uint32{ + AAMSWAPB: 0x070B8 << 15, // amswap.b + AAMSWAPH: 0x070B9 << 15, // amswap.h + AAMSWAPW: 0x070C0 << 15, // amswap.w + AAMSWAPV: 0x070C1 << 15, // amswap.d + AAMCASB: 0x070B0 << 15, // amcas.b + AAMCASH: 0x070B1 << 15, // amcas.h + AAMCASW: 0x070B2 << 15, // amcas.w + AAMCASV: 0x070B3 << 15, // amcas.d + AAMADDW: 0x070C2 << 15, // amadd.w + AAMADDV: 0x070C3 << 15, // amadd.d + AAMANDW: 0x070C4 << 15, // amand.w + AAMANDV: 0x070C5 << 15, // amand.d + AAMORW: 0x070C6 << 15, // amor.w + AAMORV: 0x070C7 << 15, // amor.d + AAMXORW: 0x070C8 << 15, // amxor.w + AAMXORV: 0x070C9 << 15, // amxor.d + AAMMAXW: 0x070CA << 15, // ammax.w + AAMMAXV: 0x070CB << 15, // ammax.d + AAMMINW: 0x070CC << 15, // ammin.w + AAMMINV: 0x070CD << 15, // ammin.d + AAMMAXWU: 0x070CE << 15, // ammax.wu + AAMMAXVU: 0x070CF << 15, // ammax.du + AAMMINWU: 0x070D0 << 15, // ammin.wu + AAMMINVU: 0x070D1 << 15, // ammin.du + AAMSWAPDBB: 0x070BC << 15, // amswap_db.b + AAMSWAPDBH: 0x070BD << 15, // amswap_db.h + AAMSWAPDBW: 0x070D2 << 15, // amswap_db.w + AAMSWAPDBV: 0x070D3 << 15, // amswap_db.d + AAMCASDBB: 0x070B4 << 15, // amcas_db.b + AAMCASDBH: 0x070B5 << 15, // amcas_db.h + AAMCASDBW: 0x070B6 << 15, // amcas_db.w + AAMCASDBV: 0x070B7 << 15, // amcas_db.d + AAMADDDBW: 0x070D4 << 15, // amadd_db.w + AAMADDDBV: 0x070D5 << 15, // amadd_db.d + AAMANDDBW: 0x070D6 << 15, // amand_db.w + AAMANDDBV: 0x070D7 << 15, // amand_db.d + AAMORDBW: 0x070D8 << 15, // amor_db.w + AAMORDBV: 0x070D9 << 15, // amor_db.d + AAMXORDBW: 0x070DA << 15, // amxor_db.w + AAMXORDBV: 0x070DB << 15, // amxor_db.d + AAMMAXDBW: 0x070DC << 15, // ammax_db.w + AAMMAXDBV: 0x070DD << 15, // ammax_db.d + AAMMINDBW: 0x070DE << 15, // ammin_db.w + AAMMINDBV: 0x070DF << 15, // ammin_db.d + AAMMAXDBWU: 0x070E0 << 15, // ammax_db.wu + AAMMAXDBVU: 0x070E1 << 15, // ammax_db.du + AAMMINDBWU: 0x070E2 << 15, // ammin_db.wu + AAMMINDBVU: 0x070E3 << 15, // ammin_db.du +} + +func IsAtomicInst(as obj.As) bool { + _, ok := atomicInst[as] + + return ok +} + // pcAlignPadLength returns the number of bytes required to align pc to alignedValue, // reporting an error if alignedValue is not a power of two or is out of range. func pcAlignPadLength(ctxt *obj.Link, pc int64, alignedValue int64) int { @@ -1182,6 +1239,14 @@ func buildop(ctxt *obj.Link) { case ANOOP: opset(obj.AUNDEF, r0) + + case AAMSWAPW: + for i := range atomicInst { + if i == AAMSWAPW { + continue + } + opset(i, r0) + } } } } @@ -1817,6 +1882,18 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { rel2.Sym = p.From.Sym rel2.Type = objabi.R_LOONG64_GOT_LO rel2.Add = 0x0 + + case 66: // am* From, To, RegTo2 ==> am* RegTo2, From, To + rk := p.From.Reg + rj := p.To.Reg + rd := p.RegTo2 + + // See section 2.2.7.1 of https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + // for the register usage constraints. + if rd == rj || rd == rk { + c.ctxt.Diag("illegal register combination: %v\n", p) + } + o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd)) } out[0] = o1