mirror of
https://github.com/golang/go
synced 2024-11-21 21:44:40 -07:00
cmd/compile: implement FMA codegen for loong64
Benchmark results on Loongson 3A5000 and 3A6000: goos: linux goarch: loong64 pkg: math cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | FMA 25.930n ± 0% 2.002n ± 0% -92.28% (p=0.000 n=10) goos: linux goarch: loong64 pkg: math cpu: Loongson-3A5000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | FMA 32.840n ± 0% 2.002n ± 0% -93.90% (p=0.000 n=10) Updates #59120 This patch is a copy of CL 483355. Co-authored-by: WANG Xuerui <git@xen0n.name> Change-Id: I88b89d23f00864f9173a182a47ee135afec7ed6e Reviewed-on: https://go-review.googlesource.com/c/go/+/625335 Reviewed-by: abner chenc <chenguoqi@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Carlos Amedee <carlos@golang.org>
This commit is contained in:
parent
2751443e92
commit
e6cc9d228a
@ -123,7 +123,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.From.Reg = x
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = y
|
||||
case ssa.OpLOONG64MOVVnop:
|
||||
case ssa.OpLOONG64MOVVnop,
|
||||
ssa.OpLOONG64LoweredRound32F,
|
||||
ssa.OpLOONG64LoweredRound64F:
|
||||
// nothing to do
|
||||
case ssa.OpLoadReg:
|
||||
if v.Type.IsFlags() {
|
||||
@ -320,6 +322,30 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.Reg = v.Args[1].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = loong64.REG_FCC0
|
||||
|
||||
case ssa.OpLOONG64FMADDF,
|
||||
ssa.OpLOONG64FMADDD,
|
||||
ssa.OpLOONG64FMSUBF,
|
||||
ssa.OpLOONG64FMSUBD,
|
||||
ssa.OpLOONG64FNMADDF,
|
||||
ssa.OpLOONG64FNMADDD,
|
||||
ssa.OpLOONG64FNMSUBF,
|
||||
ssa.OpLOONG64FNMSUBD:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
// r=(FMA x y z) -> FMADDD z, y, x, r
|
||||
// the SSA operand order is for taking advantage of
|
||||
// commutativity (that only applies for the first two operands)
|
||||
r := v.Reg()
|
||||
x := v.Args[0].Reg()
|
||||
y := v.Args[1].Reg()
|
||||
z := v.Args[2].Reg()
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = z
|
||||
p.Reg = y
|
||||
p.AddRestSourceReg(x)
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
|
||||
case ssa.OpLOONG64MOVVaddr:
|
||||
p := s.Prog(loong64.AMOVV)
|
||||
p.From.Type = obj.TYPE_ADDR
|
||||
|
@ -211,7 +211,7 @@
|
||||
|
||||
(CvtBoolToUint8 ...) => (Copy ...)
|
||||
|
||||
(Round(32|64)F ...) => (Copy ...)
|
||||
(Round(32|64)F ...) => (LoweredRound(32|64)F ...)
|
||||
|
||||
// comparisons
|
||||
(Eq8 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)))
|
||||
@ -675,6 +675,21 @@
|
||||
(REMVU _ (MOVVconst [1])) => (MOVVconst [0]) // mod
|
||||
(REMVU x (MOVVconst [c])) && isPowerOfTwo(c) => (ANDconst [c-1] x) // mod
|
||||
|
||||
// FMA
|
||||
(FMA ...) => (FMADDD ...)
|
||||
((ADD|SUB)F (MULF x y) z) && z.Block.Func.useFMA(v) => (FM(ADD|SUB)F x y z)
|
||||
((ADD|SUB)D (MULD x y) z) && z.Block.Func.useFMA(v) => (FM(ADD|SUB)D x y z)
|
||||
// z - xy -> -(xy - z)
|
||||
(SUBF z (MULF x y)) && z.Block.Func.useFMA(v) => (FNMSUBF x y z)
|
||||
(SUBD z (MULD x y)) && z.Block.Func.useFMA(v) => (FNMSUBD x y z)
|
||||
// z + (-xy) -> -(xy - z)
|
||||
// z - (-xy) -> xy + z
|
||||
((ADD|SUB)F z (NEGF (MULF x y))) && z.Block.Func.useFMA(v) => (F(NMSUB|MADD)F x y z)
|
||||
((ADD|SUB)D z (NEGD (MULD x y))) && z.Block.Func.useFMA(v) => (F(NMSUB|MADD)D x y z)
|
||||
// -xy - z -> -(xy + z)
|
||||
(SUBF (NEGF (MULF x y)) z) && z.Block.Func.useFMA(v) => (FNMADDF x y z)
|
||||
(SUBD (NEGD (MULD x y)) z) && z.Block.Func.useFMA(v) => (FNMADDD x y z)
|
||||
|
||||
// generic simplifications
|
||||
(ADDV x (NEGV y)) => (SUBV x y)
|
||||
(SUBV x x) => (MOVVconst [0])
|
||||
|
@ -151,6 +151,7 @@ func init() {
|
||||
fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
|
||||
fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
|
||||
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
|
||||
fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
|
||||
fp2flags = regInfo{inputs: []regMask{fp, fp}}
|
||||
fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
|
||||
fp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}}
|
||||
@ -193,6 +194,15 @@ func init() {
|
||||
{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0 | arg1)
|
||||
{name: "NORconst", argLength: 1, reg: gp11, asm: "NOR", aux: "Int64"}, // ^(arg0 | auxInt)
|
||||
|
||||
{name: "FMADDF", argLength: 3, reg: fp31, asm: "FMADDF", commutative: true, typ: "Float32"}, // (arg0 * arg1) + arg2
|
||||
{name: "FMADDD", argLength: 3, reg: fp31, asm: "FMADDD", commutative: true, typ: "Float64"}, // (arg0 * arg1) + arg2
|
||||
{name: "FMSUBF", argLength: 3, reg: fp31, asm: "FMSUBF", commutative: true, typ: "Float32"}, // (arg0 * arg1) - arg2
|
||||
{name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD", commutative: true, typ: "Float64"}, // (arg0 * arg1) - arg2
|
||||
{name: "FNMADDF", argLength: 3, reg: fp31, asm: "FNMADDF", commutative: true, typ: "Float32"}, // -((arg0 * arg1) + arg2)
|
||||
{name: "FNMADDD", argLength: 3, reg: fp31, asm: "FNMADDD", commutative: true, typ: "Float64"}, // -((arg0 * arg1) + arg2)
|
||||
{name: "FNMSUBF", argLength: 3, reg: fp31, asm: "FNMSUBF", commutative: true, typ: "Float32"}, // -((arg0 * arg1) - arg2)
|
||||
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD", commutative: true, typ: "Float64"}, // -((arg0 * arg1) - arg2)
|
||||
|
||||
{name: "NEGV", argLength: 1, reg: gp11}, // -arg0
|
||||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
@ -330,6 +340,10 @@ func init() {
|
||||
{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64
|
||||
{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32
|
||||
|
||||
// Round ops to block fused-multiply-add extraction.
|
||||
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
|
||||
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
|
||||
|
||||
// function calls
|
||||
{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem
|
||||
{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem
|
||||
|
@ -1783,6 +1783,14 @@ const (
|
||||
OpLOONG64XORconst
|
||||
OpLOONG64NOR
|
||||
OpLOONG64NORconst
|
||||
OpLOONG64FMADDF
|
||||
OpLOONG64FMADDD
|
||||
OpLOONG64FMSUBF
|
||||
OpLOONG64FMSUBD
|
||||
OpLOONG64FNMADDF
|
||||
OpLOONG64FNMADDD
|
||||
OpLOONG64FNMSUBF
|
||||
OpLOONG64FNMSUBD
|
||||
OpLOONG64NEGV
|
||||
OpLOONG64NEGF
|
||||
OpLOONG64NEGD
|
||||
@ -1887,6 +1895,8 @@ const (
|
||||
OpLOONG64TRUNCDV
|
||||
OpLOONG64MOVFD
|
||||
OpLOONG64MOVDF
|
||||
OpLOONG64LoweredRound32F
|
||||
OpLOONG64LoweredRound64F
|
||||
OpLOONG64CALLstatic
|
||||
OpLOONG64CALLtail
|
||||
OpLOONG64CALLclosure
|
||||
@ -23928,6 +23938,134 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMADDF",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFMADDF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMADDD",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFMADDD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMSUBF",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFMSUBF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMSUBD",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFMSUBD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FNMADDF",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFNMADDF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FNMADDD",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFNMADDD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FNMSUBF",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFNMSUBF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FNMSUBD",
|
||||
argLen: 3,
|
||||
commutative: true,
|
||||
asm: loong64.AFNMSUBD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NEGV",
|
||||
argLen: 1,
|
||||
@ -25326,6 +25464,32 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredRound32F",
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredRound64F",
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CALLstatic",
|
||||
auxType: auxCallOff,
|
||||
|
@ -216,6 +216,9 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
return rewriteValueLOONG64_OpEqB(v)
|
||||
case OpEqPtr:
|
||||
return rewriteValueLOONG64_OpEqPtr(v)
|
||||
case OpFMA:
|
||||
v.Op = OpLOONG64FMADDD
|
||||
return true
|
||||
case OpGetCallerPC:
|
||||
v.Op = OpLOONG64LoweredGetCallerPC
|
||||
return true
|
||||
@ -244,6 +247,10 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
return rewriteValueLOONG64_OpIsNonNil(v)
|
||||
case OpIsSliceInBounds:
|
||||
return rewriteValueLOONG64_OpIsSliceInBounds(v)
|
||||
case OpLOONG64ADDD:
|
||||
return rewriteValueLOONG64_OpLOONG64ADDD(v)
|
||||
case OpLOONG64ADDF:
|
||||
return rewriteValueLOONG64_OpLOONG64ADDF(v)
|
||||
case OpLOONG64ADDV:
|
||||
return rewriteValueLOONG64_OpLOONG64ADDV(v)
|
||||
case OpLOONG64ADDVconst:
|
||||
@ -392,6 +399,10 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
return rewriteValueLOONG64_OpLOONG64SRLV(v)
|
||||
case OpLOONG64SRLVconst:
|
||||
return rewriteValueLOONG64_OpLOONG64SRLVconst(v)
|
||||
case OpLOONG64SUBD:
|
||||
return rewriteValueLOONG64_OpLOONG64SUBD(v)
|
||||
case OpLOONG64SUBF:
|
||||
return rewriteValueLOONG64_OpLOONG64SUBF(v)
|
||||
case OpLOONG64SUBV:
|
||||
return rewriteValueLOONG64_OpLOONG64SUBV(v)
|
||||
case OpLOONG64SUBVconst:
|
||||
@ -596,10 +607,10 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
case OpRotateLeft8:
|
||||
return rewriteValueLOONG64_OpRotateLeft8(v)
|
||||
case OpRound32F:
|
||||
v.Op = OpCopy
|
||||
v.Op = OpLOONG64LoweredRound32F
|
||||
return true
|
||||
case OpRound64F:
|
||||
v.Op = OpCopy
|
||||
v.Op = OpLOONG64LoweredRound64F
|
||||
return true
|
||||
case OpRsh16Ux16:
|
||||
return rewriteValueLOONG64_OpRsh16Ux16(v)
|
||||
@ -1410,6 +1421,104 @@ func rewriteValueLOONG64_OpIsSliceInBounds(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueLOONG64_OpLOONG64ADDD(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (ADDD (MULD x y) z)
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FMADDD x y z)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
if v_0.Op != OpLOONG64MULD {
|
||||
continue
|
||||
}
|
||||
y := v_0.Args[1]
|
||||
x := v_0.Args[0]
|
||||
z := v_1
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpLOONG64FMADDD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADDD z (NEGD (MULD x y)))
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FNMSUBD x y z)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
z := v_0
|
||||
if v_1.Op != OpLOONG64NEGD {
|
||||
continue
|
||||
}
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpLOONG64MULD {
|
||||
continue
|
||||
}
|
||||
y := v_1_0.Args[1]
|
||||
x := v_1_0.Args[0]
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpLOONG64FNMSUBD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueLOONG64_OpLOONG64ADDF(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (ADDF (MULF x y) z)
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FMADDF x y z)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
if v_0.Op != OpLOONG64MULF {
|
||||
continue
|
||||
}
|
||||
y := v_0.Args[1]
|
||||
x := v_0.Args[0]
|
||||
z := v_1
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpLOONG64FMADDF)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADDF z (NEGF (MULF x y)))
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FNMSUBF x y z)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
z := v_0
|
||||
if v_1.Op != OpLOONG64NEGF {
|
||||
continue
|
||||
}
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpLOONG64MULF {
|
||||
continue
|
||||
}
|
||||
y := v_1_0.Args[1]
|
||||
x := v_1_0.Args[0]
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpLOONG64FNMSUBF)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueLOONG64_OpLOONG64ADDV(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@ -5944,6 +6053,168 @@ func rewriteValueLOONG64_OpLOONG64SRLVconst(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueLOONG64_OpLOONG64SUBD(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (SUBD (MULD x y) z)
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FMSUBD x y z)
|
||||
for {
|
||||
if v_0.Op != OpLOONG64MULD {
|
||||
break
|
||||
}
|
||||
y := v_0.Args[1]
|
||||
x := v_0.Args[0]
|
||||
z := v_1
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FMSUBD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
// match: (SUBD z (MULD x y))
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FNMSUBD x y z)
|
||||
for {
|
||||
z := v_0
|
||||
if v_1.Op != OpLOONG64MULD {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
x := v_1.Args[0]
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FNMSUBD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
// match: (SUBD z (NEGD (MULD x y)))
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FMADDD x y z)
|
||||
for {
|
||||
z := v_0
|
||||
if v_1.Op != OpLOONG64NEGD {
|
||||
break
|
||||
}
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpLOONG64MULD {
|
||||
break
|
||||
}
|
||||
y := v_1_0.Args[1]
|
||||
x := v_1_0.Args[0]
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FMADDD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
// match: (SUBD (NEGD (MULD x y)) z)
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FNMADDD x y z)
|
||||
for {
|
||||
if v_0.Op != OpLOONG64NEGD {
|
||||
break
|
||||
}
|
||||
v_0_0 := v_0.Args[0]
|
||||
if v_0_0.Op != OpLOONG64MULD {
|
||||
break
|
||||
}
|
||||
y := v_0_0.Args[1]
|
||||
x := v_0_0.Args[0]
|
||||
z := v_1
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FNMADDD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueLOONG64_OpLOONG64SUBF(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (SUBF (MULF x y) z)
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FMSUBF x y z)
|
||||
for {
|
||||
if v_0.Op != OpLOONG64MULF {
|
||||
break
|
||||
}
|
||||
y := v_0.Args[1]
|
||||
x := v_0.Args[0]
|
||||
z := v_1
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FMSUBF)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
// match: (SUBF z (MULF x y))
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FNMSUBF x y z)
|
||||
for {
|
||||
z := v_0
|
||||
if v_1.Op != OpLOONG64MULF {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[1]
|
||||
x := v_1.Args[0]
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FNMSUBF)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
// match: (SUBF z (NEGF (MULF x y)))
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FMADDF x y z)
|
||||
for {
|
||||
z := v_0
|
||||
if v_1.Op != OpLOONG64NEGF {
|
||||
break
|
||||
}
|
||||
v_1_0 := v_1.Args[0]
|
||||
if v_1_0.Op != OpLOONG64MULF {
|
||||
break
|
||||
}
|
||||
y := v_1_0.Args[1]
|
||||
x := v_1_0.Args[0]
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FMADDF)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
// match: (SUBF (NEGF (MULF x y)) z)
|
||||
// cond: z.Block.Func.useFMA(v)
|
||||
// result: (FNMADDF x y z)
|
||||
for {
|
||||
if v_0.Op != OpLOONG64NEGF {
|
||||
break
|
||||
}
|
||||
v_0_0 := v_0.Args[0]
|
||||
if v_0_0.Op != OpLOONG64MULF {
|
||||
break
|
||||
}
|
||||
y := v_0_0.Args[1]
|
||||
x := v_0_0.Args[0]
|
||||
z := v_1
|
||||
if !(z.Block.Func.useFMA(v)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpLOONG64FNMADDF)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueLOONG64_OpLOONG64SUBV(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
@ -689,7 +689,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
|
||||
},
|
||||
sys.ARM64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
addF("math", "FMA",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
if !s.config.UseFMA {
|
||||
|
@ -399,6 +399,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
||||
{"loong64", "internal/runtime/sys", "Len8"}: struct{}{},
|
||||
{"loong64", "math", "Abs"}: struct{}{},
|
||||
{"loong64", "math", "Copysign"}: struct{}{},
|
||||
{"loong64", "math", "FMA"}: struct{}{},
|
||||
{"loong64", "math", "sqrt"}: struct{}{},
|
||||
{"loong64", "math/big", "mulWW"}: struct{}{},
|
||||
{"loong64", "math/bits", "Add"}: struct{}{},
|
||||
|
@ -72,6 +72,7 @@ func FusedAdd32(x, y, z float32) float32 {
|
||||
// s390x:"FMADDS\t"
|
||||
// ppc64x:"FMADDS\t"
|
||||
// arm64:"FMADDS"
|
||||
// loong64:"FMADDF\t"
|
||||
// riscv64:"FMADDS\t"
|
||||
return x*y + z
|
||||
}
|
||||
@ -80,11 +81,13 @@ func FusedSub32_a(x, y, z float32) float32 {
|
||||
// s390x:"FMSUBS\t"
|
||||
// ppc64x:"FMSUBS\t"
|
||||
// riscv64:"FMSUBS\t"
|
||||
// loong64:"FMSUBF\t"
|
||||
return x*y - z
|
||||
}
|
||||
|
||||
func FusedSub32_b(x, y, z float32) float32 {
|
||||
// arm64:"FMSUBS"
|
||||
// loong64:"FNMSUBF\t"
|
||||
// riscv64:"FNMSUBS\t"
|
||||
return z - x*y
|
||||
}
|
||||
@ -93,6 +96,7 @@ func FusedAdd64(x, y, z float64) float64 {
|
||||
// s390x:"FMADD\t"
|
||||
// ppc64x:"FMADD\t"
|
||||
// arm64:"FMADDD"
|
||||
// loong64:"FMADDD\t"
|
||||
// riscv64:"FMADDD\t"
|
||||
return x*y + z
|
||||
}
|
||||
@ -101,11 +105,13 @@ func FusedSub64_a(x, y, z float64) float64 {
|
||||
// s390x:"FMSUB\t"
|
||||
// ppc64x:"FMSUB\t"
|
||||
// riscv64:"FMSUBD\t"
|
||||
// loong64:"FMSUBD\t"
|
||||
return x*y - z
|
||||
}
|
||||
|
||||
func FusedSub64_b(x, y, z float64) float64 {
|
||||
// arm64:"FMSUBD"
|
||||
// loong64:"FNMSUBD\t"
|
||||
// riscv64:"FNMSUBD\t"
|
||||
return z - x*y
|
||||
}
|
||||
|
@ -132,6 +132,7 @@ func fma(x, y, z float64) float64 {
|
||||
// amd64:"VFMADD231SD"
|
||||
// arm/6:"FMULAD"
|
||||
// arm64:"FMADDD"
|
||||
// loong64:"FMADDD"
|
||||
// s390x:"FMADD"
|
||||
// ppc64x:"FMADD"
|
||||
// riscv64:"FMADDD"
|
||||
|
Loading…
Reference in New Issue
Block a user