diff --git a/src/cmd/compile/internal/loong64/galign.go b/src/cmd/compile/internal/loong64/galign.go new file mode 100644 index 0000000000..99ab7bdfb5 --- /dev/null +++ b/src/cmd/compile/internal/loong64/galign.go @@ -0,0 +1,23 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "cmd/compile/internal/ssa" + "cmd/compile/internal/ssagen" + "cmd/internal/obj/loong64" +) + +func Init(arch *ssagen.ArchInfo) { + arch.LinkArch = &loong64.Linkloong64 + arch.REGSP = loong64.REGSP + arch.MAXWIDTH = 1 << 50 + arch.ZeroRange = zerorange + arch.Ginsnop = ginsnop + + arch.SSAMarkMoves = func(s *ssagen.State, b *ssa.Block) {} + arch.SSAGenValue = ssaGenValue + arch.SSAGenBlock = ssaGenBlock +} diff --git a/src/cmd/compile/internal/loong64/ggen.go b/src/cmd/compile/internal/loong64/ggen.go new file mode 100644 index 0000000000..c6fd1a65a1 --- /dev/null +++ b/src/cmd/compile/internal/loong64/ggen.go @@ -0,0 +1,59 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "cmd/compile/internal/ir" + "cmd/compile/internal/objw" + "cmd/compile/internal/types" + "cmd/internal/obj" + "cmd/internal/obj/loong64" +) + +func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog { + if cnt == 0 { + return p + } + if cnt < int64(4*types.PtrSize) { + for i := int64(0); i < cnt; i += int64(types.PtrSize) { + p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, 8+off+i) + } + } else if cnt <= int64(128*types.PtrSize) { + p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) + p.Reg = loong64.REGSP + p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ir.Syms.Duffzero + p.To.Offset = 8 * (128 - cnt/int64(types.PtrSize)) + } else { + // ADDV $(8+frame+lo-8), SP, r1 + // ADDV $cnt, r1, r2 + // loop: + // MOVV R0, (Widthptr)r1 + // ADDV $Widthptr, r1 + // BNE r1, r2, loop + p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) + p.Reg = loong64.REGSP + p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, loong64.REGRT2, 0) + p.Reg = loong64.REGRT1 + p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, int64(types.PtrSize)) + p1 := p + p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, int64(types.PtrSize), obj.TYPE_REG, loong64.REGRT1, 0) + p = pp.Append(p, loong64.ABNE, obj.TYPE_REG, loong64.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) + p.Reg = loong64.REGRT2 + p.To.SetTarget(p1) + } + + return p +} + +func ginsnop(pp *objw.Progs) *obj.Prog { + p := pp.Prog(loong64.ANOR) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REG_R0 + p.To.Type = obj.TYPE_REG + p.To.Reg = loong64.REG_R0 + return p +} diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go new file mode 100644 index 0000000000..ed1fcb35f2 --- /dev/null +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -0,0 +1,861 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loong64 + +import ( + "math" + + "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/compile/internal/logopt" + "cmd/compile/internal/ssa" + "cmd/compile/internal/ssagen" + "cmd/compile/internal/types" + "cmd/internal/obj" + "cmd/internal/obj/loong64" +) + +// isFPreg reports whether r is an FP register +func isFPreg(r int16) bool { + return loong64.REG_F0 <= r && r <= loong64.REG_F31 +} + +// loadByType returns the load instruction of the given type. +func loadByType(t *types.Type, r int16) obj.As { + if isFPreg(r) { + if t.Size() == 4 { + return loong64.AMOVF + } else { + return loong64.AMOVD + } + } else { + switch t.Size() { + case 1: + if t.IsSigned() { + return loong64.AMOVB + } else { + return loong64.AMOVBU + } + case 2: + if t.IsSigned() { + return loong64.AMOVH + } else { + return loong64.AMOVHU + } + case 4: + if t.IsSigned() { + return loong64.AMOVW + } else { + return loong64.AMOVWU + } + case 8: + return loong64.AMOVV + } + } + panic("bad load type") +} + +// storeByType returns the store instruction of the given type. +func storeByType(t *types.Type, r int16) obj.As { + if isFPreg(r) { + if t.Size() == 4 { + return loong64.AMOVF + } else { + return loong64.AMOVD + } + } else { + switch t.Size() { + case 1: + return loong64.AMOVB + case 2: + return loong64.AMOVH + case 4: + return loong64.AMOVW + case 8: + return loong64.AMOVV + } + } + panic("bad store type") +} + +func ssaGenValue(s *ssagen.State, v *ssa.Value) { + switch v.Op { + case ssa.OpCopy, ssa.OpLOONG64MOVVreg: + if v.Type.IsMemory() { + return + } + x := v.Args[0].Reg() + y := v.Reg() + if x == y { + return + } + as := loong64.AMOVV + if isFPreg(x) && isFPreg(y) { + as = loong64.AMOVD + } + p := s.Prog(as) + p.From.Type = obj.TYPE_REG + p.From.Reg = x + p.To.Type = obj.TYPE_REG + p.To.Reg = y + case ssa.OpLOONG64MOVVnop: + if v.Reg() != v.Args[0].Reg() { + v.Fatalf("input[0] and output not in same register %s", v.LongString()) + } + // nothing to do + case ssa.OpLoadReg: + if v.Type.IsFlags() { + v.Fatalf("load flags not implemented: %v", v.LongString()) + return + } + r := v.Reg() + p := s.Prog(loadByType(v.Type, r)) + ssagen.AddrAuto(&p.From, v.Args[0]) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case ssa.OpStoreReg: + if v.Type.IsFlags() { + v.Fatalf("store flags not implemented: %v", v.LongString()) + return + } + r := v.Args[0].Reg() + p := s.Prog(storeByType(v.Type, r)) + p.From.Type = obj.TYPE_REG + p.From.Reg = r + ssagen.AddrAuto(&p.To, v) + case ssa.OpLOONG64ADDV, + ssa.OpLOONG64SUBV, + ssa.OpLOONG64AND, + ssa.OpLOONG64OR, + ssa.OpLOONG64XOR, + ssa.OpLOONG64NOR, + ssa.OpLOONG64SLLV, + ssa.OpLOONG64SRLV, + ssa.OpLOONG64SRAV, + ssa.OpLOONG64ADDF, + ssa.OpLOONG64ADDD, + ssa.OpLOONG64SUBF, + ssa.OpLOONG64SUBD, + ssa.OpLOONG64MULF, + ssa.OpLOONG64MULD, + ssa.OpLOONG64DIVF, + ssa.OpLOONG64DIVD: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64SGT, + ssa.OpLOONG64SGTU: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64ADDVconst, + ssa.OpLOONG64SUBVconst, + ssa.OpLOONG64ANDconst, + ssa.OpLOONG64ORconst, + ssa.OpLOONG64XORconst, + ssa.OpLOONG64NORconst, + ssa.OpLOONG64SLLVconst, + ssa.OpLOONG64SRLVconst, + ssa.OpLOONG64SRAVconst, + ssa.OpLOONG64SGTconst, + ssa.OpLOONG64SGTUconst: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64MULV: + p := s.Prog(loong64.AMULV) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg1() + p1 := s.Prog(loong64.AMULHV) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = v.Args[0].Reg() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg0() + case ssa.OpLOONG64MULVU: + p := s.Prog(loong64.AMULV) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg1() + p1 := s.Prog(loong64.AMULHVU) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = v.Args[0].Reg() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg0() + case ssa.OpLOONG64DIVV: + p := s.Prog(loong64.ADIVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg1() + p1 := s.Prog(loong64.AREMV) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = v.Args[0].Reg() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg0() + case ssa.OpLOONG64DIVVU: + p := s.Prog(loong64.ADIVVU) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg1() + p1 := s.Prog(loong64.AREMVU) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = v.Args[0].Reg() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg0() + case ssa.OpLOONG64MOVVconst: + r := v.Reg() + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_CONST + p.From.Offset = v.AuxInt + p.To.Type = obj.TYPE_REG + p.To.Reg = r + if isFPreg(r) { + // cannot move into FP or special registers, use TMP as intermediate + p.To.Reg = loong64.REGTMP + p = s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGTMP + p.To.Type = obj.TYPE_REG + p.To.Reg = r + } + case ssa.OpLOONG64MOVFconst, + ssa.OpLOONG64MOVDconst: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_FCONST + p.From.Val = math.Float64frombits(uint64(v.AuxInt)) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64CMPEQF, + ssa.OpLOONG64CMPEQD, + ssa.OpLOONG64CMPGEF, + ssa.OpLOONG64CMPGED, + ssa.OpLOONG64CMPGTF, + ssa.OpLOONG64CMPGTD: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.Reg = v.Args[1].Reg() + case ssa.OpLOONG64MOVVaddr: + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_ADDR + p.From.Reg = v.Args[0].Reg() + var wantreg string + // MOVV $sym+off(base), R + // the assembler expands it as the following: + // - base is SP: add constant offset to SP (R3) + // when constant is large, tmp register (R30) may be used + // - base is SB: load external address with relocation + switch v.Aux.(type) { + default: + v.Fatalf("aux is of unknown type %T", v.Aux) + case *obj.LSym: + wantreg = "SB" + ssagen.AddAux(&p.From, v) + case *ir.Name: + wantreg = "SP" + ssagen.AddAux(&p.From, v) + case nil: + // No sym, just MOVV $off(SP), R + wantreg = "SP" + p.From.Offset = v.AuxInt + } + if reg := v.Args[0].RegName(); reg != wantreg { + v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) + } + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64MOVBload, + ssa.OpLOONG64MOVBUload, + ssa.OpLOONG64MOVHload, + ssa.OpLOONG64MOVHUload, + ssa.OpLOONG64MOVWload, + ssa.OpLOONG64MOVWUload, + ssa.OpLOONG64MOVVload, + ssa.OpLOONG64MOVFload, + ssa.OpLOONG64MOVDload: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + ssagen.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64MOVBstore, + ssa.OpLOONG64MOVHstore, + ssa.OpLOONG64MOVWstore, + ssa.OpLOONG64MOVVstore, + ssa.OpLOONG64MOVFstore, + ssa.OpLOONG64MOVDstore: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + ssagen.AddAux(&p.To, v) + case ssa.OpLOONG64MOVBstorezero, + ssa.OpLOONG64MOVHstorezero, + ssa.OpLOONG64MOVWstorezero, + ssa.OpLOONG64MOVVstorezero: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGZERO + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + ssagen.AddAux(&p.To, v) + case ssa.OpLOONG64MOVBreg, + ssa.OpLOONG64MOVBUreg, + ssa.OpLOONG64MOVHreg, + ssa.OpLOONG64MOVHUreg, + ssa.OpLOONG64MOVWreg, + ssa.OpLOONG64MOVWUreg: + a := v.Args[0] + for a.Op == ssa.OpCopy || a.Op == ssa.OpLOONG64MOVVreg { + a = a.Args[0] + } + if a.Op == ssa.OpLoadReg && loong64.REG_R0 <= a.Reg() && a.Reg() <= loong64.REG_R31 { + // LoadReg from a narrower type does an extension, except loading + // to a floating point register. So only eliminate the extension + // if it is loaded to an integer register. + + t := a.Type + switch { + case v.Op == ssa.OpLOONG64MOVBreg && t.Size() == 1 && t.IsSigned(), + v.Op == ssa.OpLOONG64MOVBUreg && t.Size() == 1 && !t.IsSigned(), + v.Op == ssa.OpLOONG64MOVHreg && t.Size() == 2 && t.IsSigned(), + v.Op == ssa.OpLOONG64MOVHUreg && t.Size() == 2 && !t.IsSigned(), + v.Op == ssa.OpLOONG64MOVWreg && t.Size() == 4 && t.IsSigned(), + v.Op == ssa.OpLOONG64MOVWUreg && t.Size() == 4 && !t.IsSigned(): + // arg is a proper-typed load, already zero/sign-extended, don't extend again + if v.Reg() == v.Args[0].Reg() { + return + } + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + return + default: + } + } + fallthrough + case ssa.OpLOONG64MOVWF, + ssa.OpLOONG64MOVWD, + ssa.OpLOONG64TRUNCFW, + ssa.OpLOONG64TRUNCDW, + ssa.OpLOONG64MOVVF, + ssa.OpLOONG64MOVVD, + ssa.OpLOONG64TRUNCFV, + ssa.OpLOONG64TRUNCDV, + ssa.OpLOONG64MOVFD, + ssa.OpLOONG64MOVDF, + ssa.OpLOONG64NEGF, + ssa.OpLOONG64NEGD, + ssa.OpLOONG64SQRTD, + ssa.OpLOONG64SQRTF: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64NEGV: + // SUB from REGZERO + p := s.Prog(loong64.ASUBVU) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.Reg = loong64.REGZERO + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64DUFFZERO: + // runtime.duffzero expects start address - 8 in R19 + p := s.Prog(loong64.ASUBVU) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 8 + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = loong64.REG_R19 + p = s.Prog(obj.ADUFFZERO) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ir.Syms.Duffzero + p.To.Offset = v.AuxInt + case ssa.OpLOONG64LoweredZero: + // SUBV $8, R19 + // MOVV R0, 8(R19) + // ADDV $8, R19 + // BNE Rarg1, R19, -2(PC) + // arg1 is the address of the last element to zero + var sz int64 + var mov obj.As + switch { + case v.AuxInt%8 == 0: + sz = 8 + mov = loong64.AMOVV + case v.AuxInt%4 == 0: + sz = 4 + mov = loong64.AMOVW + case v.AuxInt%2 == 0: + sz = 2 + mov = loong64.AMOVH + default: + sz = 1 + mov = loong64.AMOVB + } + p := s.Prog(loong64.ASUBVU) + p.From.Type = obj.TYPE_CONST + p.From.Offset = sz + p.To.Type = obj.TYPE_REG + p.To.Reg = loong64.REG_R19 + p2 := s.Prog(mov) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = loong64.REGZERO + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = loong64.REG_R19 + p2.To.Offset = sz + p3 := s.Prog(loong64.AADDVU) + p3.From.Type = obj.TYPE_CONST + p3.From.Offset = sz + p3.To.Type = obj.TYPE_REG + p3.To.Reg = loong64.REG_R19 + p4 := s.Prog(loong64.ABNE) + p4.From.Type = obj.TYPE_REG + p4.From.Reg = v.Args[1].Reg() + p4.Reg = loong64.REG_R19 + p4.To.Type = obj.TYPE_BRANCH + p4.To.SetTarget(p2) + case ssa.OpLOONG64DUFFCOPY: + p := s.Prog(obj.ADUFFCOPY) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ir.Syms.Duffcopy + p.To.Offset = v.AuxInt + case ssa.OpLOONG64LoweredMove: + // SUBV $8, R19 + // MOVV 8(R19), Rtmp + // MOVV Rtmp, (R4) + // ADDV $8, R19 + // ADDV $8, R4 + // BNE Rarg2, R19, -4(PC) + // arg2 is the address of the last element of src + var sz int64 + var mov obj.As + switch { + case v.AuxInt%8 == 0: + sz = 8 + mov = loong64.AMOVV + case v.AuxInt%4 == 0: + sz = 4 + mov = loong64.AMOVW + case v.AuxInt%2 == 0: + sz = 2 + mov = loong64.AMOVH + default: + sz = 1 + mov = loong64.AMOVB + } + p := s.Prog(loong64.ASUBVU) + p.From.Type = obj.TYPE_CONST + p.From.Offset = sz + p.To.Type = obj.TYPE_REG + p.To.Reg = loong64.REG_R19 + p2 := s.Prog(mov) + p2.From.Type = obj.TYPE_MEM + p2.From.Reg = loong64.REG_R19 + p2.From.Offset = sz + p2.To.Type = obj.TYPE_REG + p2.To.Reg = loong64.REGTMP + p3 := s.Prog(mov) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = loong64.REGTMP + p3.To.Type = obj.TYPE_MEM + p3.To.Reg = loong64.REG_R4 + p4 := s.Prog(loong64.AADDVU) + p4.From.Type = obj.TYPE_CONST + p4.From.Offset = sz + p4.To.Type = obj.TYPE_REG + p4.To.Reg = loong64.REG_R19 + p5 := s.Prog(loong64.AADDVU) + p5.From.Type = obj.TYPE_CONST + p5.From.Offset = sz + p5.To.Type = obj.TYPE_REG + p5.To.Reg = loong64.REG_R4 + p6 := s.Prog(loong64.ABNE) + p6.From.Type = obj.TYPE_REG + p6.From.Reg = v.Args[2].Reg() + p6.Reg = loong64.REG_R19 + p6.To.Type = obj.TYPE_BRANCH + p6.To.SetTarget(p2) + case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter: + s.Call(v) + case ssa.OpLOONG64CALLtail: + s.TailCall(v) + case ssa.OpLOONG64LoweredWB: + p := s.Prog(obj.ACALL) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = v.Aux.(*obj.LSym) + case ssa.OpLOONG64LoweredPanicBoundsA, ssa.OpLOONG64LoweredPanicBoundsB, ssa.OpLOONG64LoweredPanicBoundsC: + p := s.Prog(obj.ACALL) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt] + s.UseArgs(16) // space used in callee args area by assembly stubs + case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64: + as := loong64.AMOVV + switch v.Op { + case ssa.OpLOONG64LoweredAtomicLoad8: + as = loong64.AMOVB + case ssa.OpLOONG64LoweredAtomicLoad32: + as = loong64.AMOVW + } + s.Prog(loong64.ADBAR) + p := s.Prog(as) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg0() + s.Prog(loong64.ADBAR) + case ssa.OpLOONG64LoweredAtomicStore8, ssa.OpLOONG64LoweredAtomicStore32, ssa.OpLOONG64LoweredAtomicStore64: + as := loong64.AMOVV + switch v.Op { + case ssa.OpLOONG64LoweredAtomicStore8: + as = loong64.AMOVB + case ssa.OpLOONG64LoweredAtomicStore32: + as = loong64.AMOVW + } + s.Prog(loong64.ADBAR) + p := s.Prog(as) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + s.Prog(loong64.ADBAR) + case ssa.OpLOONG64LoweredAtomicStorezero32, ssa.OpLOONG64LoweredAtomicStorezero64: + as := loong64.AMOVV + if v.Op == ssa.OpLOONG64LoweredAtomicStorezero32 { + as = loong64.AMOVW + } + s.Prog(loong64.ADBAR) + p := s.Prog(as) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGZERO + p.To.Type = obj.TYPE_MEM + p.To.Reg = v.Args[0].Reg() + s.Prog(loong64.ADBAR) + case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64: + // DBAR + // MOVV Rarg1, Rtmp + // LL (Rarg0), Rout + // SC Rtmp, (Rarg0) + // BEQ Rtmp, -3(PC) + // DBAR + ll := loong64.ALLV + sc := loong64.ASCV + if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 { + ll = loong64.ALL + sc = loong64.ASC + } + s.Prog(loong64.ADBAR) + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = loong64.REGTMP + p1 := s.Prog(ll) + p1.From.Type = obj.TYPE_MEM + p1.From.Reg = v.Args[0].Reg() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = v.Reg0() + p2 := s.Prog(sc) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = loong64.REGTMP + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = v.Args[0].Reg() + p3 := s.Prog(loong64.ABEQ) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = loong64.REGTMP + p3.To.Type = obj.TYPE_BRANCH + p3.To.SetTarget(p) + s.Prog(loong64.ADBAR) + case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: + // DBAR + // LL (Rarg0), Rout + // ADDV Rarg1, Rout, Rtmp + // SC Rtmp, (Rarg0) + // BEQ Rtmp, -3(PC) + // DBAR + // ADDV Rarg1, Rout + ll := loong64.ALLV + sc := loong64.ASCV + if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 { + ll = loong64.ALL + sc = loong64.ASC + } + s.Prog(loong64.ADBAR) + p := s.Prog(ll) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg0() + p1 := s.Prog(loong64.AADDVU) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = v.Reg0() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = loong64.REGTMP + p2 := s.Prog(sc) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = loong64.REGTMP + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = v.Args[0].Reg() + p3 := s.Prog(loong64.ABEQ) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = loong64.REGTMP + p3.To.Type = obj.TYPE_BRANCH + p3.To.SetTarget(p) + s.Prog(loong64.ADBAR) + p4 := s.Prog(loong64.AADDVU) + p4.From.Type = obj.TYPE_REG + p4.From.Reg = v.Args[1].Reg() + p4.Reg = v.Reg0() + p4.To.Type = obj.TYPE_REG + p4.To.Reg = v.Reg0() + case ssa.OpLOONG64LoweredAtomicAddconst32, ssa.OpLOONG64LoweredAtomicAddconst64: + // DBAR + // LL (Rarg0), Rout + // ADDV $auxint, Rout, Rtmp + // SC Rtmp, (Rarg0) + // BEQ Rtmp, -3(PC) + // DBAR + // ADDV $auxint, Rout + ll := loong64.ALLV + sc := loong64.ASCV + if v.Op == ssa.OpLOONG64LoweredAtomicAddconst32 { + ll = loong64.ALL + sc = loong64.ASC + } + s.Prog(loong64.ADBAR) + p := s.Prog(ll) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg0() + p1 := s.Prog(loong64.AADDVU) + p1.From.Type = obj.TYPE_CONST + p1.From.Offset = v.AuxInt + p1.Reg = v.Reg0() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = loong64.REGTMP + p2 := s.Prog(sc) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = loong64.REGTMP + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = v.Args[0].Reg() + p3 := s.Prog(loong64.ABEQ) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = loong64.REGTMP + p3.To.Type = obj.TYPE_BRANCH + p3.To.SetTarget(p) + s.Prog(loong64.ADBAR) + p4 := s.Prog(loong64.AADDVU) + p4.From.Type = obj.TYPE_CONST + p4.From.Offset = v.AuxInt + p4.Reg = v.Reg0() + p4.To.Type = obj.TYPE_REG + p4.To.Reg = v.Reg0() + case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64: + // MOVV $0, Rout + // DBAR + // LL (Rarg0), Rtmp + // BNE Rtmp, Rarg1, 4(PC) + // MOVV Rarg2, Rout + // SC Rout, (Rarg0) + // BEQ Rout, -4(PC) + // DBAR + ll := loong64.ALLV + sc := loong64.ASCV + if v.Op == ssa.OpLOONG64LoweredAtomicCas32 { + ll = loong64.ALL + sc = loong64.ASC + } + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGZERO + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg0() + s.Prog(loong64.ADBAR) + p1 := s.Prog(ll) + p1.From.Type = obj.TYPE_MEM + p1.From.Reg = v.Args[0].Reg() + p1.To.Type = obj.TYPE_REG + p1.To.Reg = loong64.REGTMP + p2 := s.Prog(loong64.ABNE) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = v.Args[1].Reg() + p2.Reg = loong64.REGTMP + p2.To.Type = obj.TYPE_BRANCH + p3 := s.Prog(loong64.AMOVV) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = v.Args[2].Reg() + p3.To.Type = obj.TYPE_REG + p3.To.Reg = v.Reg0() + p4 := s.Prog(sc) + p4.From.Type = obj.TYPE_REG + p4.From.Reg = v.Reg0() + p4.To.Type = obj.TYPE_MEM + p4.To.Reg = v.Args[0].Reg() + p5 := s.Prog(loong64.ABEQ) + p5.From.Type = obj.TYPE_REG + p5.From.Reg = v.Reg0() + p5.To.Type = obj.TYPE_BRANCH + p5.To.SetTarget(p1) + p6 := s.Prog(loong64.ADBAR) + p2.To.SetTarget(p6) + case ssa.OpLOONG64LoweredNilCheck: + // Issue a load which will fault if arg is nil. + p := s.Prog(loong64.AMOVB) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + ssagen.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = loong64.REGTMP + if logopt.Enabled() { + logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name) + } + if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers + base.WarnfAt(v.Pos, "generated nil check") + } + case ssa.OpLOONG64FPFlagTrue, + ssa.OpLOONG64FPFlagFalse: + // MOVV $0, r + // BFPF 2(PC) + // MOVV $1, r + branch := loong64.ABFPF + if v.Op == ssa.OpLOONG64FPFlagFalse { + branch = loong64.ABFPT + } + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGZERO + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + p2 := s.Prog(branch) + p2.To.Type = obj.TYPE_BRANCH + p3 := s.Prog(loong64.AMOVV) + p3.From.Type = obj.TYPE_CONST + p3.From.Offset = 1 + p3.To.Type = obj.TYPE_REG + p3.To.Reg = v.Reg() + p4 := s.Prog(obj.ANOP) // not a machine instruction, for branch to land + p2.To.SetTarget(p4) + case ssa.OpLOONG64LoweredGetClosurePtr: + // Closure pointer is R22 (loong64.REGCTXT). + ssagen.CheckLoweredGetClosurePtr(v) + case ssa.OpLOONG64LoweredGetCallerSP: + // caller's SP is FixedFrameSize below the address of the first arg + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_ADDR + p.From.Offset = -base.Ctxt.Arch.FixedFrameSize + p.From.Name = obj.NAME_PARAM + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64LoweredGetCallerPC: + p := s.Prog(obj.AGETCALLERPC) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpClobber, ssa.OpClobberReg: + // TODO: implement for clobberdead experiment. Nop is ok for now. + default: + v.Fatalf("genValue not implemented: %s", v.LongString()) + } +} + +var blockJump = map[ssa.BlockKind]struct { + asm, invasm obj.As +}{ + ssa.BlockLOONG64EQ: {loong64.ABEQ, loong64.ABNE}, + ssa.BlockLOONG64NE: {loong64.ABNE, loong64.ABEQ}, + ssa.BlockLOONG64LTZ: {loong64.ABLTZ, loong64.ABGEZ}, + ssa.BlockLOONG64GEZ: {loong64.ABGEZ, loong64.ABLTZ}, + ssa.BlockLOONG64LEZ: {loong64.ABLEZ, loong64.ABGTZ}, + ssa.BlockLOONG64GTZ: {loong64.ABGTZ, loong64.ABLEZ}, + ssa.BlockLOONG64FPT: {loong64.ABFPT, loong64.ABFPF}, + ssa.BlockLOONG64FPF: {loong64.ABFPF, loong64.ABFPT}, +} + +func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { + switch b.Kind { + case ssa.BlockPlain: + if b.Succs[0].Block() != next { + p := s.Prog(obj.AJMP) + p.To.Type = obj.TYPE_BRANCH + s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) + } + case ssa.BlockDefer: + // defer returns in R19: + // 0 if we should continue executing + // 1 if we should jump to deferreturn call + p := s.Prog(loong64.ABNE) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGZERO + p.Reg = loong64.REG_R19 + p.To.Type = obj.TYPE_BRANCH + s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()}) + if b.Succs[0].Block() != next { + p := s.Prog(obj.AJMP) + p.To.Type = obj.TYPE_BRANCH + s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()}) + } + case ssa.BlockExit, ssa.BlockRetJmp: + case ssa.BlockRet: + s.Prog(obj.ARET) + case ssa.BlockLOONG64EQ, ssa.BlockLOONG64NE, + ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ, + ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ, + ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF: + jmp := blockJump[b.Kind] + var p *obj.Prog + switch next { + case b.Succs[0].Block(): + p = s.Br(jmp.invasm, b.Succs[1].Block()) + case b.Succs[1].Block(): + p = s.Br(jmp.asm, b.Succs[0].Block()) + default: + if b.Likely != ssa.BranchUnlikely { + p = s.Br(jmp.asm, b.Succs[0].Block()) + s.Br(obj.AJMP, b.Succs[1].Block()) + } else { + p = s.Br(jmp.invasm, b.Succs[1].Block()) + s.Br(obj.AJMP, b.Succs[0].Block()) + } + } + if !b.Controls[0].Type.IsFlags() { + p.From.Type = obj.TYPE_REG + p.From.Reg = b.Controls[0].Reg() + } + default: + b.Fatalf("branch not implemented: %s", b.LongString()) + } +}