mirror of
https://github.com/golang/go
synced 2024-11-25 10:17:57 -07:00
cmd/compile: optimize math/bits.OnesCount{16,32,64} implementation on loong64
Use Loong64's LSX instruction VPCNT to implement math/bits.OnesCount{16,32,64} and make it intrinsic. Benchmark results on loongson 3A5000 and 3A6000 machines: goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A5000-HV @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | OnesCount 4.413n ± 0% 1.401n ± 0% -68.25% (p=0.000 n=10) OnesCount8 1.364n ± 0% 1.363n ± 0% ~ (p=0.130 n=10) OnesCount16 2.112n ± 0% 1.534n ± 0% -27.37% (p=0.000 n=10) OnesCount32 4.533n ± 0% 1.529n ± 0% -66.27% (p=0.000 n=10) OnesCount64 4.565n ± 0% 1.531n ± 1% -66.46% (p=0.000 n=10) geomean 3.048n 1.470n -51.78% goos: linux goarch: loong64 pkg: math/bits cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | OnesCount 3.553n ± 0% 1.201n ± 0% -66.20% (p=0.000 n=10) OnesCount8 0.8021n ± 0% 0.8004n ± 0% -0.21% (p=0.000 n=10) OnesCount16 1.216n ± 0% 1.000n ± 0% -17.76% (p=0.000 n=10) OnesCount32 3.006n ± 0% 1.035n ± 0% -65.57% (p=0.000 n=10) OnesCount64 3.503n ± 0% 1.035n ± 0% -70.45% (p=0.000 n=10) geomean 2.053n 1.006n -51.01% Change-Id: I07a5b8da2bb48711b896387ec7625145804affc8 Reviewed-on: https://go-review.googlesource.com/c/go/+/620978 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Meidan Li <limeidan@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
4c8ab993cd
commit
fb9b946adc
@ -61,6 +61,7 @@ type symsStruct struct {
|
||||
ARM64HasATOMICS *obj.LSym
|
||||
ARMHasVFPv4 *obj.LSym
|
||||
Loong64HasLAM_BH *obj.LSym
|
||||
Loong64HasLSX *obj.LSym
|
||||
X86HasFMA *obj.LSym
|
||||
X86HasPOPCNT *obj.LSym
|
||||
X86HasSSE41 *obj.LSym
|
||||
|
@ -493,6 +493,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
|
||||
case ssa.OpLOONG64MOVWF,
|
||||
ssa.OpLOONG64MOVWD,
|
||||
ssa.OpLOONG64TRUNCFW,
|
||||
@ -525,6 +526,16 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
|
||||
case ssa.OpLOONG64VPCNT64,
|
||||
ssa.OpLOONG64VPCNT32,
|
||||
ssa.OpLOONG64VPCNT16:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = ((v.Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
|
||||
|
||||
case ssa.OpLOONG64NEGV:
|
||||
// SUB from REGZERO
|
||||
p := s.Prog(loong64.ASUBVU)
|
||||
@ -533,6 +544,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.Reg = loong64.REGZERO
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
|
||||
case ssa.OpLOONG64DUFFZERO:
|
||||
// runtime.duffzero expects start address in R20
|
||||
p := s.Prog(obj.ADUFFZERO)
|
||||
|
@ -153,6 +153,10 @@
|
||||
(BitRev32 ...) => (BITREVW ...)
|
||||
(BitRev64 ...) => (BITREVV ...)
|
||||
|
||||
(PopCount64 <t> x) => (MOVVfpgp <t> (VPCNT64 <typ.Float64> (MOVVgpfp <typ.Float64> x)))
|
||||
(PopCount32 <t> x) => (MOVWfpgp <t> (VPCNT32 <typ.Float32> (MOVWgpfp <typ.Float32> x)))
|
||||
(PopCount16 <t> x) => (MOVWfpgp <t> (VPCNT16 <typ.Float32> (MOVWgpfp <typ.Float32> (ZeroExt16to32 x))))
|
||||
|
||||
// math package intrinsics
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
(Sqrt32 ...) => (SQRTF ...)
|
||||
|
@ -162,6 +162,31 @@ func init() {
|
||||
readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
|
||||
)
|
||||
ops := []opData{
|
||||
// unary ops
|
||||
{name: "NEGV", argLength: 1, reg: gp11}, // -arg0
|
||||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
|
||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||
|
||||
{name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
|
||||
|
||||
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32)
|
||||
{name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64)
|
||||
|
||||
{name: "REVB2H", argLength: 1, reg: gp11, asm: "REVB2H"}, // Swap bytes: 0x11223344 -> 0x22114433 (sign extends to 64 bits)
|
||||
{name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
|
||||
{name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211
|
||||
|
||||
{name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0]
|
||||
{name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"}, // Reverse the bits in a 32-bit arg[0]
|
||||
{name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"}, // Reverse the bits in a 64-bit arg[0]
|
||||
|
||||
{name: "VPCNT64", argLength: 1, reg: fp11, asm: "VPCNTV"}, // count set bits for each 64-bit unit and store the result in each 64-bit unit
|
||||
{name: "VPCNT32", argLength: 1, reg: fp11, asm: "VPCNTW"}, // count set bits for each 32-bit unit and store the result in each 32-bit unit
|
||||
{name: "VPCNT16", argLength: 1, reg: fp11, asm: "VPCNTH"}, // count set bits for each 16-bit unit and store the result in each 16-bit unit
|
||||
|
||||
// binary ops
|
||||
{name: "ADDV", argLength: 2, reg: gp21, asm: "ADDVU", commutative: true}, // arg0 + arg1
|
||||
{name: "ADDVconst", argLength: 1, reg: gp11sp, asm: "ADDVU", aux: "Int64"}, // arg0 + auxInt. auxInt is 32-bit, also in other *const ops.
|
||||
@ -203,32 +228,13 @@ func init() {
|
||||
{name: "FNMSUBF", argLength: 3, reg: fp31, asm: "FNMSUBF", commutative: true, typ: "Float32"}, // -((arg0 * arg1) - arg2)
|
||||
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD", commutative: true, typ: "Float64"}, // -((arg0 * arg1) - arg2)
|
||||
|
||||
{name: "NEGV", argLength: 1, reg: gp11}, // -arg0
|
||||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||
|
||||
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32)
|
||||
{name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64)
|
||||
|
||||
{name: "REVB2H", argLength: 1, reg: gp11, asm: "REVB2H"}, // Swap bytes: 0x11223344 -> 0x22114433 (sign extends to 64 bits)
|
||||
{name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
|
||||
{name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211
|
||||
|
||||
{name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0]
|
||||
{name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"}, // Reverse the bits in a 32-bit arg[0]
|
||||
{name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"}, // Reverse the bits in a 64-bit arg[0]
|
||||
|
||||
{name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32
|
||||
{name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64
|
||||
{name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32
|
||||
{name: "FMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"}, // max(arg0, arg1), float64
|
||||
|
||||
{name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0
|
||||
{name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0
|
||||
|
||||
{name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
|
||||
{name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0
|
||||
{name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0
|
||||
{name: "FCOPYSGD", argLength: 2, reg: fp21, asm: "FCOPYSGD"}, // float64
|
||||
|
||||
// shifts
|
||||
|
@ -1756,6 +1756,23 @@ const (
|
||||
OpARM64PRFM
|
||||
OpARM64DMB
|
||||
|
||||
OpLOONG64NEGV
|
||||
OpLOONG64NEGF
|
||||
OpLOONG64NEGD
|
||||
OpLOONG64SQRTD
|
||||
OpLOONG64SQRTF
|
||||
OpLOONG64ABSD
|
||||
OpLOONG64CLZW
|
||||
OpLOONG64CLZV
|
||||
OpLOONG64REVB2H
|
||||
OpLOONG64REVB2W
|
||||
OpLOONG64REVBV
|
||||
OpLOONG64BITREV4B
|
||||
OpLOONG64BITREVW
|
||||
OpLOONG64BITREVV
|
||||
OpLOONG64VPCNT64
|
||||
OpLOONG64VPCNT32
|
||||
OpLOONG64VPCNT16
|
||||
OpLOONG64ADDV
|
||||
OpLOONG64ADDVconst
|
||||
OpLOONG64SUBV
|
||||
@ -1791,26 +1808,12 @@ const (
|
||||
OpLOONG64FNMADDD
|
||||
OpLOONG64FNMSUBF
|
||||
OpLOONG64FNMSUBD
|
||||
OpLOONG64NEGV
|
||||
OpLOONG64NEGF
|
||||
OpLOONG64NEGD
|
||||
OpLOONG64SQRTD
|
||||
OpLOONG64SQRTF
|
||||
OpLOONG64CLZW
|
||||
OpLOONG64CLZV
|
||||
OpLOONG64REVB2H
|
||||
OpLOONG64REVB2W
|
||||
OpLOONG64REVBV
|
||||
OpLOONG64BITREV4B
|
||||
OpLOONG64BITREVW
|
||||
OpLOONG64BITREVV
|
||||
OpLOONG64FMINF
|
||||
OpLOONG64FMIND
|
||||
OpLOONG64FMAXF
|
||||
OpLOONG64FMAXD
|
||||
OpLOONG64MASKEQZ
|
||||
OpLOONG64MASKNEZ
|
||||
OpLOONG64ABSD
|
||||
OpLOONG64FCOPYSGD
|
||||
OpLOONG64SLLV
|
||||
OpLOONG64SLLVconst
|
||||
@ -23557,6 +23560,226 @@ var opcodeTable = [...]opInfo{
|
||||
reg: regInfo{},
|
||||
},
|
||||
|
||||
{
|
||||
name: "NEGV",
|
||||
argLen: 1,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NEGF",
|
||||
argLen: 1,
|
||||
asm: loong64.ANEGF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NEGD",
|
||||
argLen: 1,
|
||||
asm: loong64.ANEGD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTD",
|
||||
argLen: 1,
|
||||
asm: loong64.ASQRTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTF",
|
||||
argLen: 1,
|
||||
asm: loong64.ASQRTF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ABSD",
|
||||
argLen: 1,
|
||||
asm: loong64.AABSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CLZW",
|
||||
argLen: 1,
|
||||
asm: loong64.ACLZW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CLZV",
|
||||
argLen: 1,
|
||||
asm: loong64.ACLZV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVB2H",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVB2H,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVB2W",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVB2W,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVBV",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVBV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BITREV4B",
|
||||
argLen: 1,
|
||||
asm: loong64.ABITREV4B,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BITREVW",
|
||||
argLen: 1,
|
||||
asm: loong64.ABITREVW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BITREVV",
|
||||
argLen: 1,
|
||||
asm: loong64.ABITREVV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPCNT64",
|
||||
argLen: 1,
|
||||
asm: loong64.AVPCNTV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPCNT32",
|
||||
argLen: 1,
|
||||
asm: loong64.AVPCNTW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "VPCNT16",
|
||||
argLen: 1,
|
||||
asm: loong64.AVPCNTH,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ADDV",
|
||||
argLen: 2,
|
||||
@ -24075,174 +24298,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NEGV",
|
||||
argLen: 1,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NEGF",
|
||||
argLen: 1,
|
||||
asm: loong64.ANEGF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NEGD",
|
||||
argLen: 1,
|
||||
asm: loong64.ANEGD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTD",
|
||||
argLen: 1,
|
||||
asm: loong64.ASQRTD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTF",
|
||||
argLen: 1,
|
||||
asm: loong64.ASQRTF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CLZW",
|
||||
argLen: 1,
|
||||
asm: loong64.ACLZW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CLZV",
|
||||
argLen: 1,
|
||||
asm: loong64.ACLZV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVB2H",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVB2H,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVB2W",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVB2W,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REVBV",
|
||||
argLen: 1,
|
||||
asm: loong64.AREVBV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BITREV4B",
|
||||
argLen: 1,
|
||||
asm: loong64.ABITREV4B,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BITREVW",
|
||||
argLen: 1,
|
||||
asm: loong64.ABITREVW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BITREVV",
|
||||
argLen: 1,
|
||||
asm: loong64.ABITREVV,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMINF",
|
||||
argLen: 2,
|
||||
@ -24335,19 +24390,6 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ABSD",
|
||||
argLen: 1,
|
||||
asm: loong64.AABSD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FCOPYSGD",
|
||||
argLen: 2,
|
||||
|
@ -628,6 +628,12 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
return true
|
||||
case OpPanicBounds:
|
||||
return rewriteValueLOONG64_OpPanicBounds(v)
|
||||
case OpPopCount16:
|
||||
return rewriteValueLOONG64_OpPopCount16(v)
|
||||
case OpPopCount32:
|
||||
return rewriteValueLOONG64_OpPopCount32(v)
|
||||
case OpPopCount64:
|
||||
return rewriteValueLOONG64_OpPopCount64(v)
|
||||
case OpPubBarrier:
|
||||
v.Op = OpLOONG64LoweredPubBarrier
|
||||
return true
|
||||
@ -8239,6 +8245,65 @@ func rewriteValueLOONG64_OpPanicBounds(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueLOONG64_OpPopCount16(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (PopCount16 <t> x)
|
||||
// result: (MOVWfpgp <t> (VPCNT16 <typ.Float32> (MOVWgpfp <typ.Float32> (ZeroExt16to32 x))))
|
||||
for {
|
||||
t := v.Type
|
||||
x := v_0
|
||||
v.reset(OpLOONG64MOVWfpgp)
|
||||
v.Type = t
|
||||
v0 := b.NewValue0(v.Pos, OpLOONG64VPCNT16, typ.Float32)
|
||||
v1 := b.NewValue0(v.Pos, OpLOONG64MOVWgpfp, typ.Float32)
|
||||
v2 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
|
||||
v2.AddArg(x)
|
||||
v1.AddArg(v2)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueLOONG64_OpPopCount32(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (PopCount32 <t> x)
|
||||
// result: (MOVWfpgp <t> (VPCNT32 <typ.Float32> (MOVWgpfp <typ.Float32> x)))
|
||||
for {
|
||||
t := v.Type
|
||||
x := v_0
|
||||
v.reset(OpLOONG64MOVWfpgp)
|
||||
v.Type = t
|
||||
v0 := b.NewValue0(v.Pos, OpLOONG64VPCNT32, typ.Float32)
|
||||
v1 := b.NewValue0(v.Pos, OpLOONG64MOVWgpfp, typ.Float32)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueLOONG64_OpPopCount64(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (PopCount64 <t> x)
|
||||
// result: (MOVVfpgp <t> (VPCNT64 <typ.Float64> (MOVVgpfp <typ.Float64> x)))
|
||||
for {
|
||||
t := v.Type
|
||||
x := v_0
|
||||
v.reset(OpLOONG64MOVVfpgp)
|
||||
v.Type = t
|
||||
v0 := b.NewValue0(v.Pos, OpLOONG64VPCNT64, typ.Float64)
|
||||
v1 := b.NewValue0(v.Pos, OpLOONG64MOVVgpfp, typ.Float64)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueLOONG64_OpRotateLeft16(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
@ -1021,9 +1021,43 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
return s.variable(n, types.Types[types.TINT])
|
||||
}
|
||||
}
|
||||
|
||||
makeOnesCountLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
|
||||
v := s.load(types.Types[types.TBOOL], addr)
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.SetControl(v)
|
||||
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||
b.AddEdgeTo(bTrue)
|
||||
b.AddEdgeTo(bFalse)
|
||||
b.Likely = ssa.BranchLikely // most loong64 machines support the LSX
|
||||
|
||||
// We have the intrinsic - use it directly.
|
||||
s.startBlock(bTrue)
|
||||
s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Call the pure Go version.
|
||||
s.startBlock(bFalse)
|
||||
s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT]
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Merge results.
|
||||
s.startBlock(bEnd)
|
||||
return s.variable(n, types.Types[types.TINT])
|
||||
}
|
||||
}
|
||||
|
||||
addF("math/bits", "OnesCount64",
|
||||
makeOnesCountAMD64(ssa.OpPopCount64),
|
||||
sys.AMD64)
|
||||
addF("math/bits", "OnesCount64",
|
||||
makeOnesCountLoong64(ssa.OpPopCount64),
|
||||
sys.Loong64)
|
||||
addF("math/bits", "OnesCount64",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
|
||||
@ -1032,6 +1066,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
addF("math/bits", "OnesCount32",
|
||||
makeOnesCountAMD64(ssa.OpPopCount32),
|
||||
sys.AMD64)
|
||||
addF("math/bits", "OnesCount32",
|
||||
makeOnesCountLoong64(ssa.OpPopCount32),
|
||||
sys.Loong64)
|
||||
addF("math/bits", "OnesCount32",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
|
||||
@ -1040,6 +1077,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
addF("math/bits", "OnesCount16",
|
||||
makeOnesCountAMD64(ssa.OpPopCount16),
|
||||
sys.AMD64)
|
||||
addF("math/bits", "OnesCount16",
|
||||
makeOnesCountLoong64(ssa.OpPopCount16),
|
||||
sys.Loong64)
|
||||
addF("math/bits", "OnesCount16",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
|
||||
|
@ -407,6 +407,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
||||
{"loong64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "Len64"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "Len8"}: struct{}{},
|
||||
{"loong64", "internal/runtime/sys", "OnesCount64"}: struct{}{},
|
||||
{"loong64", "math", "Abs"}: struct{}{},
|
||||
{"loong64", "math", "Copysign"}: struct{}{},
|
||||
{"loong64", "math", "FMA"}: struct{}{},
|
||||
@ -421,6 +422,9 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
||||
{"loong64", "math/bits", "Len16"}: struct{}{},
|
||||
{"loong64", "math/bits", "Len32"}: struct{}{},
|
||||
{"loong64", "math/bits", "Len64"}: struct{}{},
|
||||
{"loong64", "math/bits", "OnesCount16"}: struct{}{},
|
||||
{"loong64", "math/bits", "OnesCount32"}: struct{}{},
|
||||
{"loong64", "math/bits", "OnesCount64"}: struct{}{},
|
||||
{"loong64", "math/bits", "Reverse"}: struct{}{},
|
||||
{"loong64", "math/bits", "Reverse8"}: struct{}{},
|
||||
{"loong64", "math/bits", "Reverse16"}: struct{}{},
|
||||
|
@ -151,6 +151,7 @@ func InitConfig() {
|
||||
ir.Syms.ARMHasVFPv4 = typecheck.LookupRuntimeVar("armHasVFPv4") // bool
|
||||
ir.Syms.ARM64HasATOMICS = typecheck.LookupRuntimeVar("arm64HasATOMICS") // bool
|
||||
ir.Syms.Loong64HasLAM_BH = typecheck.LookupRuntimeVar("loong64HasLAM_BH") // bool
|
||||
ir.Syms.Loong64HasLSX = typecheck.LookupRuntimeVar("loong64HasLSX") // bool
|
||||
ir.Syms.Staticuint64s = typecheck.LookupRuntimeVar("staticuint64s")
|
||||
ir.Syms.Typedmemmove = typecheck.LookupRuntimeFunc("typedmemmove")
|
||||
ir.Syms.Udiv = typecheck.LookupRuntimeVar("udiv") // asm func with special ABI
|
||||
|
@ -290,5 +290,6 @@ var x86HasFMA bool
|
||||
var armHasVFPv4 bool
|
||||
var arm64HasATOMICS bool
|
||||
var loong64HasLAM_BH bool
|
||||
var loong64HasLSX bool
|
||||
|
||||
func asanregisterglobals(unsafe.Pointer, uintptr)
|
||||
|
@ -238,6 +238,7 @@ var runtimeDecls = [...]struct {
|
||||
{"armHasVFPv4", varTag, 6},
|
||||
{"arm64HasATOMICS", varTag, 6},
|
||||
{"loong64HasLAM_BH", varTag, 6},
|
||||
{"loong64HasLSX", varTag, 6},
|
||||
{"asanregisterglobals", funcTag, 130},
|
||||
}
|
||||
|
||||
|
@ -217,6 +217,7 @@ var builtins = [...]struct {
|
||||
{"runtime.armHasVFPv4", 0},
|
||||
{"runtime.arm64HasATOMICS", 0},
|
||||
{"runtime.loong64HasLAM_BH", 0},
|
||||
{"runtime.loong64HasLSX", 0},
|
||||
{"runtime.asanregisterglobals", 1},
|
||||
{"runtime.deferproc", 1},
|
||||
{"runtime.deferprocStack", 1},
|
||||
|
@ -82,6 +82,7 @@ var ARM64 struct {
|
||||
// The struct is padded to avoid false sharing.
|
||||
var Loong64 struct {
|
||||
_ CacheLinePad
|
||||
HasLSX bool // support 128-bit vector extension
|
||||
HasCRC32 bool // support CRC instruction
|
||||
HasLAMCAS bool // support AMCAS[_DB].{B/H/W/D}
|
||||
HasLAM_BH bool // support AM{SWAP/ADD}[_DB].{B/H} instruction
|
||||
|
@ -26,6 +26,7 @@ func get_cpucfg(reg uint32) uint32
|
||||
|
||||
func doinit() {
|
||||
options = []option{
|
||||
{Name: "lsx", Feature: &Loong64.HasLSX},
|
||||
{Name: "crc32", Feature: &Loong64.HasCRC32},
|
||||
{Name: "lamcas", Feature: &Loong64.HasLAMCAS},
|
||||
{Name: "lam_bh", Feature: &Loong64.HasLAM_BH},
|
||||
@ -41,13 +42,13 @@ func doinit() {
|
||||
cfg1 := get_cpucfg(1)
|
||||
cfg2 := get_cpucfg(2)
|
||||
|
||||
Loong64.HasCRC32 = isSet(cfg1, cpucfg1_CRC32)
|
||||
Loong64.HasLAMCAS = isSet(cfg2, cpucfg2_LAM_BH)
|
||||
Loong64.HasLAM_BH = isSet(cfg2, cpucfg2_LAMCAS)
|
||||
Loong64.HasCRC32 = cfgIsSet(cfg1, cpucfg1_CRC32)
|
||||
Loong64.HasLAMCAS = cfgIsSet(cfg2, cpucfg2_LAM_BH)
|
||||
Loong64.HasLAM_BH = cfgIsSet(cfg2, cpucfg2_LAMCAS)
|
||||
|
||||
osInit()
|
||||
}
|
||||
|
||||
func isSet(cfg uint32, val uint32) bool {
|
||||
func cfgIsSet(cfg uint32, val uint32) bool {
|
||||
return cfg&val != 0
|
||||
}
|
||||
|
@ -10,7 +10,17 @@ package cpu
|
||||
// initialized.
|
||||
var HWCap uint
|
||||
|
||||
// HWCAP bits. These are exposed by the Linux kernel.
|
||||
const (
|
||||
hwcap_LOONGARCH_LSX = 1 << 4
|
||||
)
|
||||
|
||||
func hwcapInit() {
|
||||
// TODO: Features that require kernel support like LSX and LASX can
|
||||
// be detected here once needed in std library or by the compiler.
|
||||
Loong64.HasLSX = hwcIsSet(HWCap, hwcap_LOONGARCH_LSX)
|
||||
}
|
||||
|
||||
func hwcIsSet(hwc uint, val uint) bool {
|
||||
return hwc&val != 0
|
||||
}
|
||||
|
@ -30,6 +30,8 @@ var (
|
||||
|
||||
armHasVFPv4 bool
|
||||
|
||||
arm64HasATOMICS bool
|
||||
arm64HasATOMICS bool
|
||||
|
||||
loong64HasLAM_BH bool
|
||||
loong64HasLSX bool
|
||||
)
|
||||
|
@ -750,8 +750,10 @@ func cpuinit(env string) {
|
||||
|
||||
case "arm64":
|
||||
arm64HasATOMICS = cpu.ARM64.HasATOMICS
|
||||
|
||||
case "loong64":
|
||||
loong64HasLAM_BH = cpu.Loong64.HasLAM_BH
|
||||
loong64HasLSX = cpu.Loong64.HasLSX
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -156,6 +156,7 @@ func OnesCount(n uint) int {
|
||||
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
|
||||
// amd64:"POPCNTQ"
|
||||
// arm64:"VCNT","VUADDLV"
|
||||
// loong64:"VPCNTV"
|
||||
// s390x:"POPCNT"
|
||||
// ppc64x:"POPCNTD"
|
||||
// wasm:"I64Popcnt"
|
||||
@ -166,6 +167,7 @@ func OnesCount64(n uint64) int {
|
||||
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
|
||||
// amd64:"POPCNTQ"
|
||||
// arm64:"VCNT","VUADDLV"
|
||||
// loong64:"VPCNTV"
|
||||
// s390x:"POPCNT"
|
||||
// ppc64x:"POPCNTD"
|
||||
// wasm:"I64Popcnt"
|
||||
@ -176,6 +178,7 @@ func OnesCount32(n uint32) int {
|
||||
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
|
||||
// amd64:"POPCNTL"
|
||||
// arm64:"VCNT","VUADDLV"
|
||||
// loong64:"VPCNTW"
|
||||
// s390x:"POPCNT"
|
||||
// ppc64x:"POPCNTW"
|
||||
// wasm:"I64Popcnt"
|
||||
@ -186,6 +189,7 @@ func OnesCount16(n uint16) int {
|
||||
// amd64/v2:-".*x86HasPOPCNT" amd64/v3:-".*x86HasPOPCNT"
|
||||
// amd64:"POPCNTL"
|
||||
// arm64:"VCNT","VUADDLV"
|
||||
// loong64:"VPCNTH"
|
||||
// s390x:"POPCNT"
|
||||
// ppc64x:"POPCNTW"
|
||||
// wasm:"I64Popcnt"
|
||||
|
Loading…
Reference in New Issue
Block a user