1
0
mirror of https://github.com/golang/go synced 2024-11-20 10:54:49 -07:00

cmd/compile,math: improve int<->float conversions on ppc64x

The functions Float64bits and Float64frombits perform
poorly on ppc64x because the int<->float conversions
often result in load and store sequences to handle the
type change. This patch adds more rules to recognize
those sequences and use register to register moves
and avoid unnecessary loads and stores where possible.

There were some existing rules to improve these conversions,
but this provides additional improvements. Included here:

- New instruction FCFIDS to improve on conversion to 32 bit
- Rename Xf2i64 and Xi2f64 as MTVSRD, MFVSRD, to match the asm
- Add rules to lower some of the load/store sequences for
- Added new go asm to ppc64.s testcase.
conversions

Improvements:

BenchmarkAbs-16                2.16          0.93          -56.94%
BenchmarkCopysign-16           2.66          1.18          -55.64%
BenchmarkRound-16              4.82          2.69          -44.19%
BenchmarkSignbit-16            1.71          1.14          -33.33%
BenchmarkFrexp-16              11.4          7.94          -30.35%
BenchmarkLogb-16               10.4          7.34          -29.42%
BenchmarkLdexp-16              15.7          11.2          -28.66%
BenchmarkIlogb-16              10.2          7.32          -28.24%
BenchmarkPowInt-16             69.6          55.9          -19.68%
BenchmarkModf-16               10.1          8.19          -18.91%
BenchmarkLog2-16               17.4          14.3          -17.82%
BenchmarkCbrt-16               45.0          37.3          -17.11%
BenchmarkAtanh-16              57.6          48.3          -16.15%
BenchmarkRemainder-16          76.6          65.4          -14.62%
BenchmarkGamma-16              26.0          22.5          -13.46%
BenchmarkPowFrac-16            197           174           -11.68%
BenchmarkMod-16                112           99.8          -10.89%
BenchmarkAsinh-16              59.9          53.7          -10.35%
BenchmarkAcosh-16              44.8          40.3          -10.04%

Updates #21390

Change-Id: I56cc991fc2e55249d69518d4e1ba76cc23904e35
Reviewed-on: https://go-review.googlesource.com/63290
Reviewed-by: Michael Munday <mike.munday@ibm.com>
This commit is contained in:
Lynn Boger 2017-09-12 12:33:21 -04:00
parent f351dbfa4d
commit 40e25895e3
9 changed files with 175 additions and 63 deletions

View File

@ -550,6 +550,14 @@ label1:
// ftsqrt BF, FRB
FTSQRT F2,$7
// FCFID
// FCFIDS
FCFID F2,F3
FCFIDCC F3,F3
FCFIDS F2,F3
FCFIDSCC F2,F3
//
// CMP
//

View File

@ -152,29 +152,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = y
}
case ssa.OpPPC64Xf2i64:
{
x := v.Args[0].Reg()
y := v.Reg()
p := s.Prog(ppc64.AMFVSRD)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
}
case ssa.OpPPC64Xi2f64:
{
x := v.Args[0].Reg()
y := v.Reg()
p := s.Prog(ppc64.AMTVSRD)
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
}
case ssa.OpPPC64LoweredAtomicAnd8,
ssa.OpPPC64LoweredAtomicOr8:
// SYNC
@ -597,7 +574,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = ppc64.REGTMP // Ignored; this is for the carry effect.
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB:
case ssa.OpPPC64NEG, ssa.OpPPC64FNEG, ssa.OpPPC64FSQRT, ssa.OpPPC64FSQRTS, ssa.OpPPC64FFLOOR, ssa.OpPPC64FTRUNC, ssa.OpPPC64FCEIL, ssa.OpPPC64FCTIDZ, ssa.OpPPC64FCTIWZ, ssa.OpPPC64FCFID, ssa.OpPPC64FCFIDS, ssa.OpPPC64FRSP, ssa.OpPPC64CNTLZD, ssa.OpPPC64CNTLZW, ssa.OpPPC64POPCNTD, ssa.OpPPC64POPCNTW, ssa.OpPPC64POPCNTB, ssa.OpPPC64MFVSRD, ssa.OpPPC64MTVSRD:
r := v.Reg()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG

View File

@ -57,19 +57,25 @@
(Div64F x y) -> (FDIV x y)
// Lowering float <-> int
(Cvt32to32F x) -> (FRSP (FCFID (Xi2f64 (SignExt32to64 x))))
(Cvt32to64F x) -> (FCFID (Xi2f64 (SignExt32to64 x)))
(Cvt64to32F x) -> (FRSP (FCFID (Xi2f64 x)))
(Cvt64to64F x) -> (FCFID (Xi2f64 x))
(Cvt32to32F x) -> (FCFIDS (MTVSRD (SignExt32to64 x)))
(Cvt32to64F x) -> (FCFID (MTVSRD (SignExt32to64 x)))
(Cvt64to32F x) -> (FCFIDS (MTVSRD x))
(Cvt64to64F x) -> (FCFID (MTVSRD x))
(Cvt32Fto32 x) -> (Xf2i64 (FCTIWZ x))
(Cvt32Fto64 x) -> (Xf2i64 (FCTIDZ x))
(Cvt64Fto32 x) -> (Xf2i64 (FCTIWZ x))
(Cvt64Fto64 x) -> (Xf2i64 (FCTIDZ x))
(Cvt32Fto32 x) -> (MFVSRD (FCTIWZ x))
(Cvt32Fto64 x) -> (MFVSRD (FCTIDZ x))
(Cvt64Fto32 x) -> (MFVSRD (FCTIWZ x))
(Cvt64Fto64 x) -> (MFVSRD (FCTIDZ x))
(Cvt32Fto64F x) -> x // Note x will have the wrong type for patterns dependent on Float32/Float64
(Cvt64Fto32F x) -> (FRSP x)
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) -> (MFVSRD x)
(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _)) -> (MTVSRD x)
(FMOVDstore [off] {sym} ptr (MTVSRD x) mem) -> (MOVDstore [off] {sym} ptr x mem)
(MOVDstore [off] {sym} ptr (MFVSRD x) mem) -> (FMOVDstore [off] {sym} ptr x mem)
(Round32F x) -> (LoweredRound32F x)
(Round64F x) -> (LoweredRound64F x)

View File

@ -223,6 +223,7 @@ func init() {
{name: "FCTIDZ", argLength: 1, reg: fp11, asm: "FCTIDZ", typ: "Float64"}, // convert float to 64-bit int round towards zero
{name: "FCTIWZ", argLength: 1, reg: fp11, asm: "FCTIWZ", typ: "Float64"}, // convert float to 32-bit int round towards zero
{name: "FCFID", argLength: 1, reg: fp11, asm: "FCFID", typ: "Float64"}, // convert 64-bit integer to float
{name: "FCFIDS", argLength: 1, reg: fp11, asm: "FCFIDS", typ: "Float32"}, // convert 32-bit integer to float
{name: "FRSP", argLength: 1, reg: fp11, asm: "FRSP", typ: "Float64"}, // round float to 32-bit value
// Movement between float and integer registers with no change in bits; accomplished with stores+loads on PPC.
@ -231,8 +232,8 @@ func init() {
// There are optimizations that should apply -- (Xi2f64 (MOVWload (not-ADD-ptr+offset) ) ) could use
// the word-load instructions. (Xi2f64 (MOVDload ptr )) can be (FMOVDload ptr)
{name: "Xf2i64", argLength: 1, reg: fpgp, typ: "Int64"}, // move 64 bits of F register into G register
{name: "Xi2f64", argLength: 1, reg: gpfp, typ: "Float64"}, // move 64 bits of G register into F register
{name: "MFVSRD", argLength: 1, reg: fpgp, asm: "MFVSRD", typ: "Int64"}, // move 64 bits of F register into G register
{name: "MTVSRD", argLength: 1, reg: gpfp, asm: "MTVSRD", typ: "Float64"}, // move 64 bits of G register into F register
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0&arg1
{name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // arg0&^arg1

View File

@ -1333,9 +1333,10 @@ const (
OpPPC64FCTIDZ
OpPPC64FCTIWZ
OpPPC64FCFID
OpPPC64FCFIDS
OpPPC64FRSP
OpPPC64Xf2i64
OpPPC64Xi2f64
OpPPC64MFVSRD
OpPPC64MTVSRD
OpPPC64AND
OpPPC64ANDN
OpPPC64OR
@ -17086,6 +17087,19 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FCFIDS",
argLen: 1,
asm: ppc64.AFCFIDS,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
outputs: []outputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
},
},
},
{
name: "FRSP",
argLen: 1,
@ -17100,8 +17114,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "Xf2i64",
name: "MFVSRD",
argLen: 1,
asm: ppc64.AMFVSRD,
reg: regInfo{
inputs: []inputInfo{
{0, 576460743713488896}, // F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26
@ -17112,8 +17127,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "Xi2f64",
name: "MTVSRD",
argLen: 1,
asm: ppc64.AMTVSRD,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29

View File

@ -1307,10 +1307,10 @@ func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
_ = typ
// match: (Cvt32Fto32 x)
// cond:
// result: (Xf2i64 (FCTIWZ x))
// result: (MFVSRD (FCTIWZ x))
for {
x := v.Args[0]
v.reset(OpPPC64Xf2i64)
v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIWZ, typ.Float64)
v0.AddArg(x)
v.AddArg(v0)
@ -1324,10 +1324,10 @@ func rewriteValuePPC64_OpCvt32Fto64_0(v *Value) bool {
_ = typ
// match: (Cvt32Fto64 x)
// cond:
// result: (Xf2i64 (FCTIDZ x))
// result: (MFVSRD (FCTIDZ x))
for {
x := v.Args[0]
v.reset(OpPPC64Xf2i64)
v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIDZ, typ.Float64)
v0.AddArg(x)
v.AddArg(v0)
@ -1353,15 +1353,13 @@ func rewriteValuePPC64_OpCvt32to32F_0(v *Value) bool {
_ = typ
// match: (Cvt32to32F x)
// cond:
// result: (FRSP (FCFID (Xi2f64 (SignExt32to64 x))))
// result: (FCFIDS (MTVSRD (SignExt32to64 x)))
for {
x := v.Args[0]
v.reset(OpPPC64FRSP)
v0 := b.NewValue0(v.Pos, OpPPC64FCFID, typ.Float64)
v1 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64)
v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v2.AddArg(x)
v1.AddArg(v2)
v.reset(OpPPC64FCFIDS)
v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
@ -1374,11 +1372,11 @@ func rewriteValuePPC64_OpCvt32to64F_0(v *Value) bool {
_ = typ
// match: (Cvt32to64F x)
// cond:
// result: (FCFID (Xi2f64 (SignExt32to64 x)))
// result: (FCFID (MTVSRD (SignExt32to64 x)))
for {
x := v.Args[0]
v.reset(OpPPC64FCFID)
v0 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64)
v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v1.AddArg(x)
v0.AddArg(v1)
@ -1393,10 +1391,10 @@ func rewriteValuePPC64_OpCvt64Fto32_0(v *Value) bool {
_ = typ
// match: (Cvt64Fto32 x)
// cond:
// result: (Xf2i64 (FCTIWZ x))
// result: (MFVSRD (FCTIWZ x))
for {
x := v.Args[0]
v.reset(OpPPC64Xf2i64)
v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIWZ, typ.Float64)
v0.AddArg(x)
v.AddArg(v0)
@ -1421,10 +1419,10 @@ func rewriteValuePPC64_OpCvt64Fto64_0(v *Value) bool {
_ = typ
// match: (Cvt64Fto64 x)
// cond:
// result: (Xf2i64 (FCTIDZ x))
// result: (MFVSRD (FCTIDZ x))
for {
x := v.Args[0]
v.reset(OpPPC64Xf2i64)
v.reset(OpPPC64MFVSRD)
v0 := b.NewValue0(v.Pos, OpPPC64FCTIDZ, typ.Float64)
v0.AddArg(x)
v.AddArg(v0)
@ -1438,14 +1436,12 @@ func rewriteValuePPC64_OpCvt64to32F_0(v *Value) bool {
_ = typ
// match: (Cvt64to32F x)
// cond:
// result: (FRSP (FCFID (Xi2f64 x)))
// result: (FCFIDS (MTVSRD x))
for {
x := v.Args[0]
v.reset(OpPPC64FRSP)
v0 := b.NewValue0(v.Pos, OpPPC64FCFID, typ.Float64)
v1 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64)
v1.AddArg(x)
v0.AddArg(v1)
v.reset(OpPPC64FCFIDS)
v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v0.AddArg(x)
v.AddArg(v0)
return true
}
@ -1457,11 +1453,11 @@ func rewriteValuePPC64_OpCvt64to64F_0(v *Value) bool {
_ = typ
// match: (Cvt64to64F x)
// cond:
// result: (FCFID (Xi2f64 x))
// result: (FCFID (MTVSRD x))
for {
x := v.Args[0]
v.reset(OpPPC64FCFID)
v0 := b.NewValue0(v.Pos, OpPPC64Xi2f64, typ.Float64)
v0 := b.NewValue0(v.Pos, OpPPC64MTVSRD, typ.Float64)
v0.AddArg(x)
v.AddArg(v0)
return true
@ -6242,6 +6238,33 @@ func rewriteValuePPC64_OpPPC64FADDS_0(v *Value) bool {
return false
}
func rewriteValuePPC64_OpPPC64FMOVDload_0(v *Value) bool {
// match: (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr x _))
// cond:
// result: (MTVSRD x)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[1]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64MOVDstore {
break
}
if v_1.AuxInt != off {
break
}
if v_1.Aux != sym {
break
}
_ = v_1.Args[2]
if ptr != v_1.Args[0] {
break
}
x := v_1.Args[1]
v.reset(OpPPC64MTVSRD)
v.AddArg(x)
return true
}
// match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2)
// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@ -6294,6 +6317,28 @@ func rewriteValuePPC64_OpPPC64FMOVDload_0(v *Value) bool {
return false
}
func rewriteValuePPC64_OpPPC64FMOVDstore_0(v *Value) bool {
// match: (FMOVDstore [off] {sym} ptr (MTVSRD x) mem)
// cond:
// result: (MOVDstore [off] {sym} ptr x mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64MTVSRD {
break
}
x := v_1.Args[0]
mem := v.Args[2]
v.reset(OpPPC64MOVDstore)
v.AuxInt = off
v.Aux = sym
v.AddArg(ptr)
v.AddArg(x)
v.AddArg(mem)
return true
}
// match: (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem)
// cond: is16Bit(off1+off2)
// result: (FMOVDstore [off1+off2] {sym} ptr val mem)
@ -7070,6 +7115,33 @@ func rewriteValuePPC64_OpPPC64MOVBstorezero_0(v *Value) bool {
return false
}
func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
// match: (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _))
// cond:
// result: (MFVSRD x)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[1]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64FMOVDstore {
break
}
if v_1.AuxInt != off {
break
}
if v_1.Aux != sym {
break
}
_ = v_1.Args[2]
if ptr != v_1.Args[0] {
break
}
x := v_1.Args[1]
v.reset(OpPPC64MFVSRD)
v.AddArg(x)
return true
}
// match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2)
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@ -7122,6 +7194,28 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
return false
}
func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
// match: (MOVDstore [off] {sym} ptr (MFVSRD x) mem)
// cond:
// result: (FMOVDstore [off] {sym} ptr x mem)
for {
off := v.AuxInt
sym := v.Aux
_ = v.Args[2]
ptr := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpPPC64MFVSRD {
break
}
x := v_1.Args[0]
mem := v.Args[2]
v.reset(OpPPC64FMOVDstore)
v.AuxInt = off
v.Aux = sym
v.AddArg(ptr)
v.AddArg(x)
v.AddArg(mem)
return true
}
// match: (MOVDstore [off1] {sym} (ADDconst [off2] x) val mem)
// cond: is16Bit(off1+off2)
// result: (MOVDstore [off1+off2] {sym} x val mem)

View File

@ -650,6 +650,8 @@ const (
AFCFIDCC
AFCFIDU
AFCFIDUCC
AFCFIDS
AFCFIDSCC
AFCTID
AFCTIDCC
AFCTIDZ

View File

@ -271,6 +271,8 @@ var Anames = []string{
"FCFIDCC",
"FCFIDU",
"FCFIDUCC",
"FCFIDS",
"FCFIDSCC",
"FCTID",
"FCTIDCC",
"FCTIDZ",

View File

@ -1596,6 +1596,8 @@ func buildop(ctxt *obj.Link) {
opset(AFCFIDCC, r0)
opset(AFCFIDU, r0)
opset(AFCFIDUCC, r0)
opset(AFCFIDS, r0)
opset(AFCFIDSCC, r0)
opset(AFRES, r0)
opset(AFRESCC, r0)
opset(AFRIM, r0)
@ -3656,6 +3658,10 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
return OPVCC(63, 974, 0, 0)
case AFCFIDUCC:
return OPVCC(63, 974, 0, 1)
case AFCFIDS:
return OPVCC(59, 846, 0, 0)
case AFCFIDSCC:
return OPVCC(59, 846, 0, 1)
case AFCTIW:
return OPVCC(63, 14, 0, 0)
case AFCTIWCC: