mirror of
https://github.com/golang/go
synced 2024-11-25 07:47:56 -07:00
cmd/compile: support float min/max instructions on PPC64
This enables efficient use of the builtin min/max function for float64 and float32 types on GOPPC64 >= power9. Extend the assembler to support xsminjdp/xsmaxjdp and use them to implement float min/max. Simplify the VSX xx3 opcode rules to allow FPR arguments, if all arguments are an FPR. Change-Id: I15882a4ce5dc46eba71d683cf1d184dc4236a328 Reviewed-on: https://go-review.googlesource.com/c/go/+/574535 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Paul Murphy <murp@ibm.com> Reviewed-by: Than McIntosh <thanm@google.com>
This commit is contained in:
parent
a49952445f
commit
dfb17c126c
7
src/cmd/asm/internal/asm/testdata/ppc64.s
vendored
7
src/cmd/asm/internal/asm/testdata/ppc64.s
vendored
@ -1133,7 +1133,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
|
|||||||
PNOP // 0700000000000000
|
PNOP // 0700000000000000
|
||||||
|
|
||||||
SETB CR1,R3 // 7c640100
|
SETB CR1,R3 // 7c640100
|
||||||
VCLZLSBB V1, R2 // 10400e02
|
VCLZLSBB V1,R2 // 10400e02
|
||||||
VCTZLSBB V1, R2 // 10410e02
|
VCTZLSBB V1,R2 // 10410e02
|
||||||
|
|
||||||
|
XSMAXJDP VS1,VS2,VS3 // f0611480
|
||||||
|
XSMINJDP VS1,VS2,VS3 // f06114c0
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
@ -582,7 +582,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||||||
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
|
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
|
||||||
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
|
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
|
||||||
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
|
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
|
||||||
ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
|
ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
|
||||||
r := v.Reg()
|
r := v.Reg()
|
||||||
r1 := v.Args[0].Reg()
|
r1 := v.Args[0].Reg()
|
||||||
r2 := v.Args[1].Reg()
|
r2 := v.Args[1].Reg()
|
||||||
|
@ -14,6 +14,9 @@
|
|||||||
(Sub32F ...) => (FSUBS ...)
|
(Sub32F ...) => (FSUBS ...)
|
||||||
(Sub64F ...) => (FSUB ...)
|
(Sub64F ...) => (FSUB ...)
|
||||||
|
|
||||||
|
(Min(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMINJDP x y)
|
||||||
|
(Max(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMAXJDP x y)
|
||||||
|
|
||||||
// Combine 64 bit integer multiply and adds
|
// Combine 64 bit integer multiply and adds
|
||||||
(ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)
|
(ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)
|
||||||
|
|
||||||
|
@ -189,6 +189,10 @@ func init() {
|
|||||||
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
|
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
|
||||||
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
|
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
|
||||||
|
|
||||||
|
// Note, the FPU works with float64 in register.
|
||||||
|
{name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
|
||||||
|
{name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
|
||||||
|
|
||||||
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
|
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
|
||||||
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
|
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
|
||||||
{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
|
{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
|
||||||
|
@ -2116,6 +2116,8 @@ const (
|
|||||||
OpPPC64SUBFCconst
|
OpPPC64SUBFCconst
|
||||||
OpPPC64FSUB
|
OpPPC64FSUB
|
||||||
OpPPC64FSUBS
|
OpPPC64FSUBS
|
||||||
|
OpPPC64XSMINJDP
|
||||||
|
OpPPC64XSMAXJDP
|
||||||
OpPPC64MULLD
|
OpPPC64MULLD
|
||||||
OpPPC64MULLW
|
OpPPC64MULLW
|
||||||
OpPPC64MULLDconst
|
OpPPC64MULLDconst
|
||||||
@ -28397,6 +28399,34 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "XSMINJDP",
|
||||||
|
argLen: 2,
|
||||||
|
asm: ppc64.AXSMINJDP,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
|
||||||
|
{1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "XSMAXJDP",
|
||||||
|
argLen: 2,
|
||||||
|
asm: ppc64.AXSMAXJDP,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
|
||||||
|
{1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "MULLD",
|
name: "MULLD",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
@ -340,6 +340,14 @@ func rewriteValuePPC64(v *Value) bool {
|
|||||||
return rewriteValuePPC64_OpLsh8x64(v)
|
return rewriteValuePPC64_OpLsh8x64(v)
|
||||||
case OpLsh8x8:
|
case OpLsh8x8:
|
||||||
return rewriteValuePPC64_OpLsh8x8(v)
|
return rewriteValuePPC64_OpLsh8x8(v)
|
||||||
|
case OpMax32F:
|
||||||
|
return rewriteValuePPC64_OpMax32F(v)
|
||||||
|
case OpMax64F:
|
||||||
|
return rewriteValuePPC64_OpMax64F(v)
|
||||||
|
case OpMin32F:
|
||||||
|
return rewriteValuePPC64_OpMin32F(v)
|
||||||
|
case OpMin64F:
|
||||||
|
return rewriteValuePPC64_OpMin64F(v)
|
||||||
case OpMod16:
|
case OpMod16:
|
||||||
return rewriteValuePPC64_OpMod16(v)
|
return rewriteValuePPC64_OpMod16(v)
|
||||||
case OpMod16u:
|
case OpMod16u:
|
||||||
@ -3296,6 +3304,78 @@ func rewriteValuePPC64_OpLsh8x8(v *Value) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValuePPC64_OpMax32F(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (Max32F x y)
|
||||||
|
// cond: buildcfg.GOPPC64 >= 9
|
||||||
|
// result: (XSMAXJDP x y)
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
if !(buildcfg.GOPPC64 >= 9) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64XSMAXJDP)
|
||||||
|
v.AddArg2(x, y)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpMax64F(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (Max64F x y)
|
||||||
|
// cond: buildcfg.GOPPC64 >= 9
|
||||||
|
// result: (XSMAXJDP x y)
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
if !(buildcfg.GOPPC64 >= 9) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64XSMAXJDP)
|
||||||
|
v.AddArg2(x, y)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpMin32F(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (Min32F x y)
|
||||||
|
// cond: buildcfg.GOPPC64 >= 9
|
||||||
|
// result: (XSMINJDP x y)
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
if !(buildcfg.GOPPC64 >= 9) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64XSMINJDP)
|
||||||
|
v.AddArg2(x, y)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
func rewriteValuePPC64_OpMin64F(v *Value) bool {
|
||||||
|
v_1 := v.Args[1]
|
||||||
|
v_0 := v.Args[0]
|
||||||
|
// match: (Min64F x y)
|
||||||
|
// cond: buildcfg.GOPPC64 >= 9
|
||||||
|
// result: (XSMINJDP x y)
|
||||||
|
for {
|
||||||
|
x := v_0
|
||||||
|
y := v_1
|
||||||
|
if !(buildcfg.GOPPC64 >= 9) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64XSMINJDP)
|
||||||
|
v.AddArg2(x, y)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
func rewriteValuePPC64_OpMod16(v *Value) bool {
|
func rewriteValuePPC64_OpMod16(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
@ -3698,8 +3698,15 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
|
|||||||
// string comparisons during walk, not ssagen.
|
// string comparisons during walk, not ssagen.
|
||||||
|
|
||||||
if typ.IsFloat() {
|
if typ.IsFloat() {
|
||||||
|
hasIntrinsic := false
|
||||||
switch Arch.LinkArch.Family {
|
switch Arch.LinkArch.Family {
|
||||||
case sys.AMD64, sys.ARM64, sys.RISCV64:
|
case sys.AMD64, sys.ARM64, sys.RISCV64:
|
||||||
|
hasIntrinsic = true
|
||||||
|
case sys.PPC64:
|
||||||
|
hasIntrinsic = buildcfg.GOPPC64 >= 9
|
||||||
|
}
|
||||||
|
|
||||||
|
if hasIntrinsic {
|
||||||
var op ssa.Op
|
var op ssa.Op
|
||||||
switch {
|
switch {
|
||||||
case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:
|
case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:
|
||||||
|
@ -1074,6 +1074,8 @@ const (
|
|||||||
AXVCVSXWSP
|
AXVCVSXWSP
|
||||||
AXVCVUXDSP
|
AXVCVUXDSP
|
||||||
AXVCVUXWSP
|
AXVCVUXWSP
|
||||||
|
AXSMAXJDP
|
||||||
|
AXSMINJDP
|
||||||
ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map.
|
ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map.
|
||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
|
@ -610,5 +610,7 @@ var Anames = []string{
|
|||||||
"XVCVSXWSP",
|
"XVCVSXWSP",
|
||||||
"XVCVUXDSP",
|
"XVCVUXDSP",
|
||||||
"XVCVUXWSP",
|
"XVCVUXWSP",
|
||||||
|
"XSMAXJDP",
|
||||||
|
"XSMINJDP",
|
||||||
"LASTAOUT",
|
"LASTAOUT",
|
||||||
}
|
}
|
||||||
|
@ -429,9 +429,9 @@ var optabBase = []Optab{
|
|||||||
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
|
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
|
||||||
{as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
|
{as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
|
||||||
|
|
||||||
/* VSX logical */
|
/* VSX xx3-form */
|
||||||
{as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
|
{as: AXXLAND, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 90, size: 4}, /* vsx xx3-form (FPR usage) */
|
||||||
{as: AXXLOR, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx or, xx3-form */
|
{as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx xx3-form */
|
||||||
|
|
||||||
/* VSX select */
|
/* VSX select */
|
||||||
{as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */
|
{as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */
|
||||||
@ -1679,16 +1679,17 @@ func buildop(ctxt *obj.Link) {
|
|||||||
opset(AMTVSRWZ, r0)
|
opset(AMTVSRWZ, r0)
|
||||||
opset(AMTVSRWS, r0)
|
opset(AMTVSRWS, r0)
|
||||||
|
|
||||||
case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
|
case AXXLAND:
|
||||||
opset(AXXLANDC, r0)
|
opset(AXXLANDC, r0)
|
||||||
opset(AXXLEQV, r0)
|
opset(AXXLEQV, r0)
|
||||||
opset(AXXLNAND, r0)
|
opset(AXXLNAND, r0)
|
||||||
|
|
||||||
case AXXLOR: /* xxlorc, xxlnor, xxlor, xxlxor */
|
|
||||||
opset(AXXLORC, r0)
|
opset(AXXLORC, r0)
|
||||||
opset(AXXLNOR, r0)
|
opset(AXXLNOR, r0)
|
||||||
opset(AXXLORQ, r0)
|
opset(AXXLORQ, r0)
|
||||||
opset(AXXLXOR, r0)
|
opset(AXXLXOR, r0)
|
||||||
|
opset(AXXLOR, r0)
|
||||||
|
opset(AXSMAXJDP, r0)
|
||||||
|
opset(AXSMINJDP, r0)
|
||||||
|
|
||||||
case AXXSEL: /* xxsel */
|
case AXXSEL: /* xxsel */
|
||||||
opset(AXXSEL, r0)
|
opset(AXXSEL, r0)
|
||||||
@ -4769,6 +4770,10 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
|
|||||||
return OPVXX3(60, 146, 0) /* xxlor - v2.06 */
|
return OPVXX3(60, 146, 0) /* xxlor - v2.06 */
|
||||||
case AXXLXOR:
|
case AXXLXOR:
|
||||||
return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */
|
return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */
|
||||||
|
case AXSMINJDP:
|
||||||
|
return OPVXX3(60, 152, 0) /* xsminjdp - v3.0 */
|
||||||
|
case AXSMAXJDP:
|
||||||
|
return OPVXX3(60, 144, 0) /* xsmaxjdp - v3.0 */
|
||||||
|
|
||||||
case AXXSEL:
|
case AXXSEL:
|
||||||
return OPVXX4(60, 3, 0) /* xxsel - v2.06 */
|
return OPVXX4(60, 3, 0) /* xxsel - v2.06 */
|
||||||
|
@ -165,6 +165,8 @@ func Float64Min(a, b float64) float64 {
|
|||||||
// amd64:"MINSD"
|
// amd64:"MINSD"
|
||||||
// arm64:"FMIND"
|
// arm64:"FMIND"
|
||||||
// riscv64:"FMIN"
|
// riscv64:"FMIN"
|
||||||
|
// ppc64/power9:"XSMINJDP"
|
||||||
|
// ppc64/power10:"XSMINJDP"
|
||||||
return min(a, b)
|
return min(a, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,6 +174,8 @@ func Float64Max(a, b float64) float64 {
|
|||||||
// amd64:"MINSD"
|
// amd64:"MINSD"
|
||||||
// arm64:"FMAXD"
|
// arm64:"FMAXD"
|
||||||
// riscv64:"FMAX"
|
// riscv64:"FMAX"
|
||||||
|
// ppc64/power9:"XSMAXJDP"
|
||||||
|
// ppc64/power10:"XSMAXJDP"
|
||||||
return max(a, b)
|
return max(a, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,6 +183,8 @@ func Float32Min(a, b float32) float32 {
|
|||||||
// amd64:"MINSS"
|
// amd64:"MINSS"
|
||||||
// arm64:"FMINS"
|
// arm64:"FMINS"
|
||||||
// riscv64:"FMINS"
|
// riscv64:"FMINS"
|
||||||
|
// ppc64/power9:"XSMINJDP"
|
||||||
|
// ppc64/power10:"XSMINJDP"
|
||||||
return min(a, b)
|
return min(a, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,5 +192,7 @@ func Float32Max(a, b float32) float32 {
|
|||||||
// amd64:"MINSS"
|
// amd64:"MINSS"
|
||||||
// arm64:"FMAXS"
|
// arm64:"FMAXS"
|
||||||
// riscv64:"FMAXS"
|
// riscv64:"FMAXS"
|
||||||
|
// ppc64/power9:"XSMAXJDP"
|
||||||
|
// ppc64/power10:"XSMAXJDP"
|
||||||
return max(a, b)
|
return max(a, b)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user