1
0
mirror of https://github.com/golang/go synced 2024-11-25 07:47:56 -07:00

cmd/compile: support float min/max instructions on PPC64

This enables efficient use of the builtin min/max function
for float64 and float32 types on GOPPC64 >= power9.

Extend the assembler to support xsminjdp/xsmaxjdp and use
them to implement float min/max.

Simplify the VSX xx3 opcode rules to allow FPR arguments,
if all arguments are an FPR.

Change-Id: I15882a4ce5dc46eba71d683cf1d184dc4236a328
Reviewed-on: https://go-review.googlesource.com/c/go/+/574535
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Than McIntosh <thanm@google.com>
This commit is contained in:
Paul E. Murphy 2024-03-22 11:41:58 -05:00 committed by Paul Murphy
parent a49952445f
commit dfb17c126c
11 changed files with 153 additions and 9 deletions

View File

@ -1133,7 +1133,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
PNOP // 0700000000000000 PNOP // 0700000000000000
SETB CR1,R3 // 7c640100 SETB CR1,R3 // 7c640100
VCLZLSBB V1, R2 // 10400e02 VCLZLSBB V1,R2 // 10400e02
VCTZLSBB V1, R2 // 10410e02 VCTZLSBB V1,R2 // 10410e02
XSMAXJDP VS1,VS2,VS3 // f0611480
XSMINJDP VS1,VS2,VS3 // f06114c0
RET RET

View File

@ -582,7 +582,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU, ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN, ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV, ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW: ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
r := v.Reg() r := v.Reg()
r1 := v.Args[0].Reg() r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg() r2 := v.Args[1].Reg()

View File

@ -14,6 +14,9 @@
(Sub32F ...) => (FSUBS ...) (Sub32F ...) => (FSUBS ...)
(Sub64F ...) => (FSUB ...) (Sub64F ...) => (FSUB ...)
(Min(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMINJDP x y)
(Max(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMAXJDP x y)
// Combine 64 bit integer multiply and adds // Combine 64 bit integer multiply and adds
(ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z) (ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)

View File

@ -189,6 +189,10 @@ func init() {
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1 {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1 {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
// Note, the FPU works with float64 in register.
{name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
{name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
{name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)

View File

@ -2116,6 +2116,8 @@ const (
OpPPC64SUBFCconst OpPPC64SUBFCconst
OpPPC64FSUB OpPPC64FSUB
OpPPC64FSUBS OpPPC64FSUBS
OpPPC64XSMINJDP
OpPPC64XSMAXJDP
OpPPC64MULLD OpPPC64MULLD
OpPPC64MULLW OpPPC64MULLW
OpPPC64MULLDconst OpPPC64MULLDconst
@ -28397,6 +28399,34 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "XSMINJDP",
argLen: 2,
asm: ppc64.AXSMINJDP,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
{1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
},
outputs: []outputInfo{
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
},
},
},
{
name: "XSMAXJDP",
argLen: 2,
asm: ppc64.AXSMAXJDP,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
{1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
},
outputs: []outputInfo{
{0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
},
},
},
{ {
name: "MULLD", name: "MULLD",
argLen: 2, argLen: 2,

View File

@ -340,6 +340,14 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpLsh8x64(v) return rewriteValuePPC64_OpLsh8x64(v)
case OpLsh8x8: case OpLsh8x8:
return rewriteValuePPC64_OpLsh8x8(v) return rewriteValuePPC64_OpLsh8x8(v)
case OpMax32F:
return rewriteValuePPC64_OpMax32F(v)
case OpMax64F:
return rewriteValuePPC64_OpMax64F(v)
case OpMin32F:
return rewriteValuePPC64_OpMin32F(v)
case OpMin64F:
return rewriteValuePPC64_OpMin64F(v)
case OpMod16: case OpMod16:
return rewriteValuePPC64_OpMod16(v) return rewriteValuePPC64_OpMod16(v)
case OpMod16u: case OpMod16u:
@ -3296,6 +3304,78 @@ func rewriteValuePPC64_OpLsh8x8(v *Value) bool {
return true return true
} }
} }
func rewriteValuePPC64_OpMax32F(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Max32F x y)
// cond: buildcfg.GOPPC64 >= 9
// result: (XSMAXJDP x y)
for {
x := v_0
y := v_1
if !(buildcfg.GOPPC64 >= 9) {
break
}
v.reset(OpPPC64XSMAXJDP)
v.AddArg2(x, y)
return true
}
return false
}
func rewriteValuePPC64_OpMax64F(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Max64F x y)
// cond: buildcfg.GOPPC64 >= 9
// result: (XSMAXJDP x y)
for {
x := v_0
y := v_1
if !(buildcfg.GOPPC64 >= 9) {
break
}
v.reset(OpPPC64XSMAXJDP)
v.AddArg2(x, y)
return true
}
return false
}
func rewriteValuePPC64_OpMin32F(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Min32F x y)
// cond: buildcfg.GOPPC64 >= 9
// result: (XSMINJDP x y)
for {
x := v_0
y := v_1
if !(buildcfg.GOPPC64 >= 9) {
break
}
v.reset(OpPPC64XSMINJDP)
v.AddArg2(x, y)
return true
}
return false
}
func rewriteValuePPC64_OpMin64F(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (Min64F x y)
// cond: buildcfg.GOPPC64 >= 9
// result: (XSMINJDP x y)
for {
x := v_0
y := v_1
if !(buildcfg.GOPPC64 >= 9) {
break
}
v.reset(OpPPC64XSMINJDP)
v.AddArg2(x, y)
return true
}
return false
}
func rewriteValuePPC64_OpMod16(v *Value) bool { func rewriteValuePPC64_OpMod16(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]

View File

@ -3698,8 +3698,15 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
// string comparisons during walk, not ssagen. // string comparisons during walk, not ssagen.
if typ.IsFloat() { if typ.IsFloat() {
hasIntrinsic := false
switch Arch.LinkArch.Family { switch Arch.LinkArch.Family {
case sys.AMD64, sys.ARM64, sys.RISCV64: case sys.AMD64, sys.ARM64, sys.RISCV64:
hasIntrinsic = true
case sys.PPC64:
hasIntrinsic = buildcfg.GOPPC64 >= 9
}
if hasIntrinsic {
var op ssa.Op var op ssa.Op
switch { switch {
case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN: case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:

View File

@ -1074,6 +1074,8 @@ const (
AXVCVSXWSP AXVCVSXWSP
AXVCVUXDSP AXVCVUXDSP
AXVCVUXWSP AXVCVUXWSP
AXSMAXJDP
AXSMINJDP
ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map. ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map.
// aliases // aliases

View File

@ -610,5 +610,7 @@ var Anames = []string{
"XVCVSXWSP", "XVCVSXWSP",
"XVCVUXDSP", "XVCVUXDSP",
"XVCVUXWSP", "XVCVUXWSP",
"XSMAXJDP",
"XSMINJDP",
"LASTAOUT", "LASTAOUT",
} }

View File

@ -429,9 +429,9 @@ var optabBase = []Optab{
{as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4}, {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
{as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4}, {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
/* VSX logical */ /* VSX xx3-form */
{as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */ {as: AXXLAND, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 90, size: 4}, /* vsx xx3-form (FPR usage) */
{as: AXXLOR, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx or, xx3-form */ {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx xx3-form */
/* VSX select */ /* VSX select */
{as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */ {as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */
@ -1679,16 +1679,17 @@ func buildop(ctxt *obj.Link) {
opset(AMTVSRWZ, r0) opset(AMTVSRWZ, r0)
opset(AMTVSRWS, r0) opset(AMTVSRWS, r0)
case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */ case AXXLAND:
opset(AXXLANDC, r0) opset(AXXLANDC, r0)
opset(AXXLEQV, r0) opset(AXXLEQV, r0)
opset(AXXLNAND, r0) opset(AXXLNAND, r0)
case AXXLOR: /* xxlorc, xxlnor, xxlor, xxlxor */
opset(AXXLORC, r0) opset(AXXLORC, r0)
opset(AXXLNOR, r0) opset(AXXLNOR, r0)
opset(AXXLORQ, r0) opset(AXXLORQ, r0)
opset(AXXLXOR, r0) opset(AXXLXOR, r0)
opset(AXXLOR, r0)
opset(AXSMAXJDP, r0)
opset(AXSMINJDP, r0)
case AXXSEL: /* xxsel */ case AXXSEL: /* xxsel */
opset(AXXSEL, r0) opset(AXXSEL, r0)
@ -4769,6 +4770,10 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
return OPVXX3(60, 146, 0) /* xxlor - v2.06 */ return OPVXX3(60, 146, 0) /* xxlor - v2.06 */
case AXXLXOR: case AXXLXOR:
return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */ return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */
case AXSMINJDP:
return OPVXX3(60, 152, 0) /* xsminjdp - v3.0 */
case AXSMAXJDP:
return OPVXX3(60, 144, 0) /* xsmaxjdp - v3.0 */
case AXXSEL: case AXXSEL:
return OPVXX4(60, 3, 0) /* xxsel - v2.06 */ return OPVXX4(60, 3, 0) /* xxsel - v2.06 */

View File

@ -165,6 +165,8 @@ func Float64Min(a, b float64) float64 {
// amd64:"MINSD" // amd64:"MINSD"
// arm64:"FMIND" // arm64:"FMIND"
// riscv64:"FMIN" // riscv64:"FMIN"
// ppc64/power9:"XSMINJDP"
// ppc64/power10:"XSMINJDP"
return min(a, b) return min(a, b)
} }
@ -172,6 +174,8 @@ func Float64Max(a, b float64) float64 {
// amd64:"MINSD" // amd64:"MINSD"
// arm64:"FMAXD" // arm64:"FMAXD"
// riscv64:"FMAX" // riscv64:"FMAX"
// ppc64/power9:"XSMAXJDP"
// ppc64/power10:"XSMAXJDP"
return max(a, b) return max(a, b)
} }
@ -179,6 +183,8 @@ func Float32Min(a, b float32) float32 {
// amd64:"MINSS" // amd64:"MINSS"
// arm64:"FMINS" // arm64:"FMINS"
// riscv64:"FMINS" // riscv64:"FMINS"
// ppc64/power9:"XSMINJDP"
// ppc64/power10:"XSMINJDP"
return min(a, b) return min(a, b)
} }
@ -186,5 +192,7 @@ func Float32Max(a, b float32) float32 {
// amd64:"MINSS" // amd64:"MINSS"
// arm64:"FMAXS" // arm64:"FMAXS"
// riscv64:"FMAXS" // riscv64:"FMAXS"
// ppc64/power9:"XSMAXJDP"
// ppc64/power10:"XSMAXJDP"
return max(a, b) return max(a, b)
} }