mirror of
https://github.com/golang/go
synced 2024-11-14 09:10:27 -07:00
cmd/compile: fix FP accuracy issue introduced by FMA optimization on ARM64
Two ARM64 rules are added to avoid FP accuracy issue, which causes build failure. https://build.golang.org/log/1360f5c9ef3f37968216350283c1013e9681725d fixes #24033 Change-Id: I9b74b584ab5cc53fa49476de275dc549adf97610 Reviewed-on: https://go-review.googlesource.com/96355 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
ef3ab3f5e2
commit
7113d3a512
@ -590,6 +590,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
p.From.Reg = v.Args[0].Reg()
|
p.From.Reg = v.Args[0].Reg()
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = v.Reg()
|
p.To.Reg = v.Reg()
|
||||||
|
case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
|
||||||
|
// input is already rounded
|
||||||
case ssa.OpARM64VCNT:
|
case ssa.OpARM64VCNT:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
|
@ -237,8 +237,8 @@
|
|||||||
(Cvt32Fto64F x) -> (FCVTSD x)
|
(Cvt32Fto64F x) -> (FCVTSD x)
|
||||||
(Cvt64Fto32F x) -> (FCVTDS x)
|
(Cvt64Fto32F x) -> (FCVTDS x)
|
||||||
|
|
||||||
(Round32F x) -> x
|
(Round32F x) -> (LoweredRound32F x)
|
||||||
(Round64F x) -> x
|
(Round64F x) -> (LoweredRound64F x)
|
||||||
|
|
||||||
// comparisons
|
// comparisons
|
||||||
(Eq8 x y) -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
(Eq8 x y) -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
||||||
|
@ -216,6 +216,8 @@ func init() {
|
|||||||
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit
|
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit
|
||||||
{name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit
|
{name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit
|
||||||
{name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
|
{name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
|
||||||
|
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
|
||||||
|
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
|
||||||
|
|
||||||
// 3-operand, the addend comes first
|
// 3-operand, the addend comes first
|
||||||
{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // +arg0 + (arg1 * arg2)
|
{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // +arg0 + (arg1 * arg2)
|
||||||
|
@ -1003,6 +1003,8 @@ const (
|
|||||||
OpARM64CLZW
|
OpARM64CLZW
|
||||||
OpARM64VCNT
|
OpARM64VCNT
|
||||||
OpARM64VUADDLV
|
OpARM64VUADDLV
|
||||||
|
OpARM64LoweredRound32F
|
||||||
|
OpARM64LoweredRound64F
|
||||||
OpARM64FMADDS
|
OpARM64FMADDS
|
||||||
OpARM64FMADDD
|
OpARM64FMADDD
|
||||||
OpARM64FNMADDS
|
OpARM64FNMADDS
|
||||||
@ -12765,6 +12767,32 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "LoweredRound32F",
|
||||||
|
argLen: 1,
|
||||||
|
resultInArg0: true,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LoweredRound64F",
|
||||||
|
argLen: 1,
|
||||||
|
resultInArg0: true,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "FMADDS",
|
name: "FMADDS",
|
||||||
argLen: 3,
|
argLen: 3,
|
||||||
|
@ -16318,11 +16318,10 @@ func rewriteValueARM64_OpRound_0(v *Value) bool {
|
|||||||
func rewriteValueARM64_OpRound32F_0(v *Value) bool {
|
func rewriteValueARM64_OpRound32F_0(v *Value) bool {
|
||||||
// match: (Round32F x)
|
// match: (Round32F x)
|
||||||
// cond:
|
// cond:
|
||||||
// result: x
|
// result: (LoweredRound32F x)
|
||||||
for {
|
for {
|
||||||
x := v.Args[0]
|
x := v.Args[0]
|
||||||
v.reset(OpCopy)
|
v.reset(OpARM64LoweredRound32F)
|
||||||
v.Type = x.Type
|
|
||||||
v.AddArg(x)
|
v.AddArg(x)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
@ -16330,11 +16329,10 @@ func rewriteValueARM64_OpRound32F_0(v *Value) bool {
|
|||||||
func rewriteValueARM64_OpRound64F_0(v *Value) bool {
|
func rewriteValueARM64_OpRound64F_0(v *Value) bool {
|
||||||
// match: (Round64F x)
|
// match: (Round64F x)
|
||||||
// cond:
|
// cond:
|
||||||
// result: x
|
// result: (LoweredRound64F x)
|
||||||
for {
|
for {
|
||||||
x := v.Args[0]
|
x := v.Args[0]
|
||||||
v.reset(OpCopy)
|
v.reset(OpARM64LoweredRound64F)
|
||||||
v.Type = x.Type
|
|
||||||
v.AddArg(x)
|
v.AddArg(x)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user