From 93a601dd2acc7401564eae59b5e6927da4146e22 Mon Sep 17 00:00:00 2001 From: Akhil Indurti Date: Wed, 1 Aug 2018 23:22:46 -0400 Subject: [PATCH] math: add guaranteed-precision FMA implementation Currently, the precision of the float64 multiply-add operation (x * y) + z varies across architectures. While generated code for ppc64, s390x, and arm64 can guarantee that there is no intermediate rounding on those platforms, other architectures like x86, mips, and arm will exhibit different behavior depending on available instruction set. Consequently, applications cannot rely on results being identical across GOARCH-dependent codepaths. This CL introduces a software implementation that performs an IEEE 754 double-precision fused-multiply-add operation. The only supported rounding mode is round-to-nearest ties-to-even. Separate CLs include hardware implementations when available. Otherwise, this software fallback is given as the default implementation. Specifically, - arm64, ppc64, s390x: Uses the FMA instruction provided by all of these ISAs. - mips[64][le]: Falls back to this software implementation. Only release 6 of the ISA includes a strict FMA instruction with MADDF.D (not implementation defined). Because the number of R6 processors in the wild is scarce, the assembly implementation is left as a future optimization. - x86: Guards the use of VFMADD213SD by checking cpu.X86.HasFMA. - arm: Guards the use of VFMA by checking cpu.ARM.HasVFPv4. - software fallback: Uses mostly integer arithmetic except for input that involves Inf, NaN, or zero. Updates #25819. Change-Id: Iadadff2219638bacc9fec78d3ab885393fea4a08 Reviewed-on: https://go-review.googlesource.com/c/go/+/127458 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/math/all_test.go | 75 +++++++++++++++++++ src/math/fma.go | 169 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 src/math/fma.go diff --git a/src/math/all_test.go b/src/math/all_test.go index 208c8233e0d..e8fa2b8b66a 100644 --- a/src/math/all_test.go +++ b/src/math/all_test.go @@ -2005,6 +2005,64 @@ var logbBC = []float64{ 1023, } +// Test cases were generated with Berkeley TestFloat-3e/testfloat_gen. +// http://www.jhauser.us/arithmetic/TestFloat.html. +// The default rounding mode is selected (nearest/even), and exception flags are ignored. +var fmaC = []struct{ x, y, z, want float64 }{ + // Large exponent spread + {-3.999999999999087, -1.1123914289620494e-16, -7.999877929687506, -7.999877929687505}, + {-262112.0000004768, -0.06251525855623184, 1.1102230248837136e-16, 16385.99945072085}, + {-6.462348523533467e-27, -2.3763644720331857e-211, 4.000000000931324, 4.000000000931324}, + + // Effective addition + {-2.0000000037252907, 6.7904383376e-313, -3.3951933161e-313, -1.697607001654e-312}, + {-0.12499999999999999, 512.007568359375, -1.4193627164960366e-16, -64.00094604492188}, + {-2.7550648847397148e-39, -3.4028301595800694e+38, 0.9960937495343386, 1.9335955376735676}, + {5.723369164769208e+24, 3.8149300927159385e-06, 1.84489958778182e+19, 4.028324913621874e+19}, + {-0.4843749999990904, -3.6893487872543293e+19, 9.223653786709391e+18, 2.7093936974938993e+19}, + {-3.8146972665201165e-06, 4.2949672959999385e+09, -2.2204460489938386e-16, -16384.000003844263}, + {6.98156394130982e-309, -1.1072962560000002e+09, -4.4414561548793455e-308, -7.73065965765153e-300}, + + // Effective subtraction + {5e-324, 4.5, -2e-323, 0}, + {5e-324, 7, -3.5e-323, 0}, + {5e-324, 0.5000000000000001, -5e-324, Copysign(0, -1)}, + {-2.1240680525e-314, -1.233647078189316e+308, -0.25781249999954525, -0.25780987964919844}, + {8.579992955364441e-308, 0.6037391876780558, -4.4501307410480706e-308, 7.29947236107098e-309}, + {-4.450143471986689e-308, -0.9960937499927239, -4.450419332475649e-308, -1.7659233458788e-310}, + {1.4932076393918112, -2.2248022430460833e-308, 4.449875571054211e-308, 1.127783865601762e-308}, + + // Overflow + {-2.288020632214759e+38, -8.98846570988901e+307, 1.7696041796300924e+308, Inf(0)}, + {1.4888652783208255e+308, -9.007199254742012e+15, -6.807282911929205e+38, Inf(-1)}, + {9.142703268902826e+192, -1.3504889569802838e+296, -1.9082200803806996e-89, Inf(-1)}, + + // Finite x and y, but non-finite z. + {31.99218749627471, -1.7976930544991702e+308, Inf(0), Inf(0)}, + {-1.7976931281784667e+308, -2.0009765625002265, Inf(-1), Inf(-1)}, + + // Special + {0, 0, 0, 0}, + {-1.1754226043408471e-38, NaN(), Inf(0), NaN()}, + {0, 0, 2.22507385643494e-308, 2.22507385643494e-308}, + {-8.65697792e+09, NaN(), -7.516192799999999e+09, NaN()}, + {-0.00012207403779029757, 3.221225471996093e+09, NaN(), NaN()}, + {Inf(-1), 0.1252441407414153, -1.387184532981584e-76, Inf(-1)}, + {Inf(0), 1.525878907671432e-05, -9.214364835452549e+18, Inf(0)}, + + // Random + {0.1777916152213626, -32.000015266239636, -2.2204459148334633e-16, -5.689334401293007}, + {-2.0816681711722314e-16, -0.4997558592585846, -0.9465627129124969, -0.9465627129124968}, + {-1.9999997615814211, 1.8518819259933516e+19, 16.874999999999996, -3.703763410463646e+19}, + {-0.12499994039717421, 32767.99999976135, -2.0752587082923246e+19, -2.075258708292325e+19}, + {7.705600568510257e-34, -1.801432979000528e+16, -0.17224197722973714, -0.17224197722973716}, + {3.8988133103758913e-308, -0.9848632812499999, 3.893879244098556e-308, 5.40811742605814e-310}, + {-0.012651981190687427, 6.911985574912436e+38, 6.669240527007144e+18, -8.745031148409496e+36}, + {4.612811918325842e+18, 1.4901161193847641e-08, 2.6077032311277997e-08, 6.873625395187494e+10}, + {-9.094947033611148e-13, 4.450691014249257e-308, 2.086006742350485e-308, 2.086006742346437e-308}, + {-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308}, +} + func tolerance(a, b, e float64) bool { // Multiplying by e here can underflow denormal values to zero. // Check a==b so that at least if a and b are small and identical @@ -2995,6 +3053,15 @@ func TestYn(t *testing.T) { } } +func TestFma(t *testing.T) { + for _, c := range fmaC { + got := Fma(c.x, c.y, c.z) + if !alike(got, c.want) { + t.Errorf("Fma(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want) + } + } +} + // Check that math functions of high angle values // return accurate results. [Since (vf[i] + large) - large != vf[i], // testing for Trig(vf[i] + large) == Trig(vf[i]), where large is @@ -3725,3 +3792,11 @@ func BenchmarkFloat32frombits(b *testing.B) { } GlobalF = float64(x) } + +func BenchmarkFma(b *testing.B) { + x := 0.0 + for i := 0; i < b.N; i++ { + x = Fma(E, Pi, x) + } + GlobalF = x +} diff --git a/src/math/fma.go b/src/math/fma.go new file mode 100644 index 00000000000..76249229b2b --- /dev/null +++ b/src/math/fma.go @@ -0,0 +1,169 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package math + +import "math/bits" + +func zero(x uint64) uint64 { + if x == 0 { + return 1 + } + return 0 + // branchless: + // return ((x>>1 | x&1) - 1) >> 63 +} + +func nonzero(x uint64) uint64 { + if x != 0 { + return 1 + } + return 0 + // branchless: + // return 1 - ((x>>1|x&1)-1)>>63 +} + +func shl(u1, u2 uint64, n uint) (r1, r2 uint64) { + r1 = u1<>(64-n) | u2<<(n-64) + r2 = u2 << n + return +} + +func shr(u1, u2 uint64, n uint) (r1, r2 uint64) { + r2 = u2>>n | u1<<(64-n) | u1>>(n-64) + r1 = u1 >> n + return +} + +// shrcompress compresses the bottom n+1 bits of the two-word +// value into a single bit. the result is equal to the value +// shifted to the right by n, except the result's 0th bit is +// set to the bitwise OR of the bottom n+1 bits. +func shrcompress(u1, u2 uint64, n uint) (r1, r2 uint64) { + // TODO: Performance here is really sensitive to the + // order/placement of these branches. n == 0 is common + // enough to be in the fast path. Perhaps more measurement + // needs to be done to find the optimal order/placement? + switch { + case n == 0: + return u1, u2 + case n == 64: + return 0, u1 | nonzero(u2) + case n >= 128: + return 0, nonzero(u1 | u2) + case n < 64: + r1, r2 = shr(u1, u2, n) + r2 |= nonzero(u2 & (1<> 63) + exp = int32(b>>52) & mask + mantissa = b & fracMask + + if exp == 0 { + // Normalize value if subnormal. + shift := uint(bits.LeadingZeros64(mantissa) - 11) + mantissa <<= shift + exp = 1 - int32(shift) + } else { + // Add implicit 1 bit + mantissa |= 1 << 52 + } + return +} + +// Fma returns x * y + z, computed with only one rounding. +func Fma(x, y, z float64) float64 { + bx, by, bz := Float64bits(x), Float64bits(y), Float64bits(z) + + // Inf or NaN or zero involved. At most one rounding will occur. + if x == 0.0 || y == 0.0 || z == 0.0 || bx&uvinf == uvinf || by&uvinf == uvinf { + return x*y + z + } + // Handle non-finite z separately. Evaluating x*y+z where + // x and y are finite, but z is infinite, should always result in z. + if bz&uvinf == uvinf { + return z + } + + // Inputs are (sub)normal. + // Split x, y, z into sign, exponent, mantissa. + xs, xe, xm := split(bx) + ys, ye, ym := split(by) + zs, ze, zm := split(bz) + + // Compute product p = x*y as sign, exponent, two-word mantissa. + // Start with exponent. "is normal" bit isn't subtracted yet. + pe := xe + ye - bias + 1 + + // pm1:pm2 is the double-word mantissa for the product p. + // Shift left to leave top bit in product. Effectively + // shifts the 106-bit product to the left by 21. + pm1, pm2 := bits.Mul64(xm<<10, ym<<11) + zm1, zm2 := zm<<10, uint64(0) + ps := xs ^ ys // product sign + + // normalize to 62nd bit + is62zero := uint((^pm1 >> 62) & 1) + pm1, pm2 = shl(pm1, pm2, is62zero) + pe -= int32(is62zero) + + // Swap addition operands so |p| >= |z| + if pe < ze || (pe == ze && (pm1 < zm1 || (pm1 == zm1 && pm2 < zm2))) { + ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2 + } + + // Align significands + zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze)) + + // Compute resulting significands, normalizing if necessary. + var m, c uint64 + if ps == zs { + // Adding (pm1:pm2) + (zm1:zm2) + pm2, c = bits.Add64(pm2, zm2, 0) + pm1, _ = bits.Add64(pm1, zm1, c) + pe -= int32(^pm1 >> 63) + pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63)) + } else { + // Subtracting (pm1:pm2) - (zm1:zm2) + // TODO: should we special-case cancellation? + pm2, c = bits.Sub64(pm2, zm2, 0) + pm1, _ = bits.Sub64(pm1, zm1, c) + nz := lz(pm1, pm2) + pe -= nz + m, pm2 = shl(pm1, pm2, uint(nz-1)) + m |= nonzero(pm2) + } + + // Round and break ties to even + if pe > 1022+bias || pe == 1022+bias && (m+1<<9)>>63 == 1 { + // rounded value overflows exponent range + return Float64frombits(uint64(ps)<<63 | uvinf) + } + if pe < 0 { + n := uint(-pe) + m = m>>n | nonzero(m&(1<> 10) & ^zero((m&(1<<10-1))^1<<9) + pe &= -int32(nonzero(m)) + return Float64frombits(uint64(ps)<<63 + uint64(pe)<<52 + m) +}