From 7113d3a512b17d1acea0904a2e5590fdfbd388bb Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 22 Feb 2018 13:55:01 +0000 Subject: [PATCH] cmd/compile: fix FP accuracy issue introduced by FMA optimization on ARM64 Two ARM64 rules are added to avoid FP accuracy issue, which causes build failure. https://build.golang.org/log/1360f5c9ef3f37968216350283c1013e9681725d fixes #24033 Change-Id: I9b74b584ab5cc53fa49476de275dc549adf97610 Reviewed-on: https://go-review.googlesource.com/96355 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/arm64/ssa.go | 2 ++ src/cmd/compile/internal/ssa/gen/ARM64.rules | 4 +-- src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 2 ++ src/cmd/compile/internal/ssa/opGen.go | 28 ++++++++++++++++++++ src/cmd/compile/internal/ssa/rewriteARM64.go | 10 +++---- 5 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 795b1a74c5..014e7fc57c 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -590,6 +590,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.From.Reg = v.Args[0].Reg() p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F: + // input is already rounded case ssa.OpARM64VCNT: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 3822a378be..5b4d8b04f9 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -237,8 +237,8 @@ (Cvt32Fto64F x) -> (FCVTSD x) (Cvt64Fto32F x) -> (FCVTDS x) -(Round32F x) -> x -(Round64F x) -> x +(Round32F x) -> (LoweredRound32F x) +(Round64F x) -> (LoweredRound64F x) // comparisons (Eq8 x y) -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 1d70c4e864..d712988bec 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -216,6 +216,8 @@ func init() { {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit. + {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true}, + {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true}, // 3-operand, the addend comes first {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // +arg0 + (arg1 * arg2) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 5131e8d834..09008d3032 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1003,6 +1003,8 @@ const ( OpARM64CLZW OpARM64VCNT OpARM64VUADDLV + OpARM64LoweredRound32F + OpARM64LoweredRound64F OpARM64FMADDS OpARM64FMADDD OpARM64FNMADDS @@ -12765,6 +12767,32 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredRound32F", + argLen: 1, + resultInArg0: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "LoweredRound64F", + argLen: 1, + resultInArg0: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "FMADDS", argLen: 3, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index f711aade36..306b1339ee 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -16318,11 +16318,10 @@ func rewriteValueARM64_OpRound_0(v *Value) bool { func rewriteValueARM64_OpRound32F_0(v *Value) bool { // match: (Round32F x) // cond: - // result: x + // result: (LoweredRound32F x) for { x := v.Args[0] - v.reset(OpCopy) - v.Type = x.Type + v.reset(OpARM64LoweredRound32F) v.AddArg(x) return true } @@ -16330,11 +16329,10 @@ func rewriteValueARM64_OpRound32F_0(v *Value) bool { func rewriteValueARM64_OpRound64F_0(v *Value) bool { // match: (Round64F x) // cond: - // result: x + // result: (LoweredRound64F x) for { x := v.Args[0] - v.reset(OpCopy) - v.Type = x.Type + v.reset(OpARM64LoweredRound64F) v.AddArg(x) return true }