diff --git a/src/math/all_test.go b/src/math/all_test.go
index 208c8233e0d..e8fa2b8b66a 100644
--- a/src/math/all_test.go
+++ b/src/math/all_test.go
@@ -2005,6 +2005,64 @@ var logbBC = []float64{
 	1023,
 }
 
+// Test cases were generated with Berkeley TestFloat-3e/testfloat_gen.
+// http://www.jhauser.us/arithmetic/TestFloat.html.
+// The default rounding mode is selected (nearest/even), and exception flags are ignored.
+var fmaC = []struct{ x, y, z, want float64 }{
+	// Large exponent spread
+	{-3.999999999999087, -1.1123914289620494e-16, -7.999877929687506, -7.999877929687505},
+	{-262112.0000004768, -0.06251525855623184, 1.1102230248837136e-16, 16385.99945072085},
+	{-6.462348523533467e-27, -2.3763644720331857e-211, 4.000000000931324, 4.000000000931324},
+
+	// Effective addition
+	{-2.0000000037252907, 6.7904383376e-313, -3.3951933161e-313, -1.697607001654e-312},
+	{-0.12499999999999999, 512.007568359375, -1.4193627164960366e-16, -64.00094604492188},
+	{-2.7550648847397148e-39, -3.4028301595800694e+38, 0.9960937495343386, 1.9335955376735676},
+	{5.723369164769208e+24, 3.8149300927159385e-06, 1.84489958778182e+19, 4.028324913621874e+19},
+	{-0.4843749999990904, -3.6893487872543293e+19, 9.223653786709391e+18, 2.7093936974938993e+19},
+	{-3.8146972665201165e-06, 4.2949672959999385e+09, -2.2204460489938386e-16, -16384.000003844263},
+	{6.98156394130982e-309, -1.1072962560000002e+09, -4.4414561548793455e-308, -7.73065965765153e-300},
+
+	// Effective subtraction
+	{5e-324, 4.5, -2e-323, 0},
+	{5e-324, 7, -3.5e-323, 0},
+	{5e-324, 0.5000000000000001, -5e-324, Copysign(0, -1)},
+	{-2.1240680525e-314, -1.233647078189316e+308, -0.25781249999954525, -0.25780987964919844},
+	{8.579992955364441e-308, 0.6037391876780558, -4.4501307410480706e-308, 7.29947236107098e-309},
+	{-4.450143471986689e-308, -0.9960937499927239, -4.450419332475649e-308, -1.7659233458788e-310},
+	{1.4932076393918112, -2.2248022430460833e-308, 4.449875571054211e-308, 1.127783865601762e-308},
+
+	// Overflow
+	{-2.288020632214759e+38, -8.98846570988901e+307, 1.7696041796300924e+308, Inf(0)},
+	{1.4888652783208255e+308, -9.007199254742012e+15, -6.807282911929205e+38, Inf(-1)},
+	{9.142703268902826e+192, -1.3504889569802838e+296, -1.9082200803806996e-89, Inf(-1)},
+
+	// Finite x and y, but non-finite z.
+	{31.99218749627471, -1.7976930544991702e+308, Inf(0), Inf(0)},
+	{-1.7976931281784667e+308, -2.0009765625002265, Inf(-1), Inf(-1)},
+
+	// Special
+	{0, 0, 0, 0},
+	{-1.1754226043408471e-38, NaN(), Inf(0), NaN()},
+	{0, 0, 2.22507385643494e-308, 2.22507385643494e-308},
+	{-8.65697792e+09, NaN(), -7.516192799999999e+09, NaN()},
+	{-0.00012207403779029757, 3.221225471996093e+09, NaN(), NaN()},
+	{Inf(-1), 0.1252441407414153, -1.387184532981584e-76, Inf(-1)},
+	{Inf(0), 1.525878907671432e-05, -9.214364835452549e+18, Inf(0)},
+
+	// Random
+	{0.1777916152213626, -32.000015266239636, -2.2204459148334633e-16, -5.689334401293007},
+	{-2.0816681711722314e-16, -0.4997558592585846, -0.9465627129124969, -0.9465627129124968},
+	{-1.9999997615814211, 1.8518819259933516e+19, 16.874999999999996, -3.703763410463646e+19},
+	{-0.12499994039717421, 32767.99999976135, -2.0752587082923246e+19, -2.075258708292325e+19},
+	{7.705600568510257e-34, -1.801432979000528e+16, -0.17224197722973714, -0.17224197722973716},
+	{3.8988133103758913e-308, -0.9848632812499999, 3.893879244098556e-308, 5.40811742605814e-310},
+	{-0.012651981190687427, 6.911985574912436e+38, 6.669240527007144e+18, -8.745031148409496e+36},
+	{4.612811918325842e+18, 1.4901161193847641e-08, 2.6077032311277997e-08, 6.873625395187494e+10},
+	{-9.094947033611148e-13, 4.450691014249257e-308, 2.086006742350485e-308, 2.086006742346437e-308},
+	{-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308},
+}
+
 func tolerance(a, b, e float64) bool {
 	// Multiplying by e here can underflow denormal values to zero.
 	// Check a==b so that at least if a and b are small and identical
@@ -2995,6 +3053,15 @@ func TestYn(t *testing.T) {
 	}
 }
 
+func TestFma(t *testing.T) {
+	for _, c := range fmaC {
+		got := Fma(c.x, c.y, c.z)
+		if !alike(got, c.want) {
+			t.Errorf("Fma(%g,%g,%g) == %g; want %g", c.x, c.y, c.z, got, c.want)
+		}
+	}
+}
+
 // Check that math functions of high angle values
 // return accurate results. [Since (vf[i] + large) - large != vf[i],
 // testing for Trig(vf[i] + large) == Trig(vf[i]), where large is
@@ -3725,3 +3792,11 @@ func BenchmarkFloat32frombits(b *testing.B) {
 	}
 	GlobalF = float64(x)
 }
+
+func BenchmarkFma(b *testing.B) {
+	x := 0.0
+	for i := 0; i < b.N; i++ {
+		x = Fma(E, Pi, x)
+	}
+	GlobalF = x
+}
diff --git a/src/math/fma.go b/src/math/fma.go
new file mode 100644
index 00000000000..76249229b2b
--- /dev/null
+++ b/src/math/fma.go
@@ -0,0 +1,169 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import "math/bits"
+
+func zero(x uint64) uint64 {
+	if x == 0 {
+		return 1
+	}
+	return 0
+	// branchless:
+	// return ((x>>1 | x&1) - 1) >> 63
+}
+
+func nonzero(x uint64) uint64 {
+	if x != 0 {
+		return 1
+	}
+	return 0
+	// branchless:
+	// return 1 - ((x>>1|x&1)-1)>>63
+}
+
+func shl(u1, u2 uint64, n uint) (r1, r2 uint64) {
+	r1 = u1<<n | u2>>(64-n) | u2<<(n-64)
+	r2 = u2 << n
+	return
+}
+
+func shr(u1, u2 uint64, n uint) (r1, r2 uint64) {
+	r2 = u2>>n | u1<<(64-n) | u1>>(n-64)
+	r1 = u1 >> n
+	return
+}
+
+// shrcompress compresses the bottom n+1 bits of the two-word
+// value into a single bit. the result is equal to the value
+// shifted to the right by n, except the result's 0th bit is
+// set to the bitwise OR of the bottom n+1 bits.
+func shrcompress(u1, u2 uint64, n uint) (r1, r2 uint64) {
+	// TODO: Performance here is really sensitive to the
+	// order/placement of these branches. n == 0 is common
+	// enough to be in the fast path. Perhaps more measurement
+	// needs to be done to find the optimal order/placement?
+	switch {
+	case n == 0:
+		return u1, u2
+	case n == 64:
+		return 0, u1 | nonzero(u2)
+	case n >= 128:
+		return 0, nonzero(u1 | u2)
+	case n < 64:
+		r1, r2 = shr(u1, u2, n)
+		r2 |= nonzero(u2 & (1<<n - 1))
+	case n < 128:
+		r1, r2 = shr(u1, u2, n)
+		r2 |= nonzero(u1&(1<<(n-64)-1) | u2)
+	}
+	return
+}
+
+func lz(u1, u2 uint64) (l int32) {
+	l = int32(bits.LeadingZeros64(u1))
+	if l == 64 {
+		l += int32(bits.LeadingZeros64(u2))
+	}
+	return l
+}
+
+// split splits b into sign, biased exponent, and mantissa.
+// It adds the implicit 1 bit to the mantissa for normal values,
+// and normalizes subnormal values.
+func split(b uint64) (sign uint32, exp int32, mantissa uint64) {
+	sign = uint32(b >> 63)
+	exp = int32(b>>52) & mask
+	mantissa = b & fracMask
+
+	if exp == 0 {
+		// Normalize value if subnormal.
+		shift := uint(bits.LeadingZeros64(mantissa) - 11)
+		mantissa <<= shift
+		exp = 1 - int32(shift)
+	} else {
+		// Add implicit 1 bit
+		mantissa |= 1 << 52
+	}
+	return
+}
+
+// Fma returns x * y + z, computed with only one rounding.
+func Fma(x, y, z float64) float64 {
+	bx, by, bz := Float64bits(x), Float64bits(y), Float64bits(z)
+
+	// Inf or NaN or zero involved. At most one rounding will occur.
+	if x == 0.0 || y == 0.0 || z == 0.0 || bx&uvinf == uvinf || by&uvinf == uvinf {
+		return x*y + z
+	}
+	// Handle non-finite z separately. Evaluating x*y+z where
+	// x and y are finite, but z is infinite, should always result in z.
+	if bz&uvinf == uvinf {
+		return z
+	}
+
+	// Inputs are (sub)normal.
+	// Split x, y, z into sign, exponent, mantissa.
+	xs, xe, xm := split(bx)
+	ys, ye, ym := split(by)
+	zs, ze, zm := split(bz)
+
+	// Compute product p = x*y as sign, exponent, two-word mantissa.
+	// Start with exponent. "is normal" bit isn't subtracted yet.
+	pe := xe + ye - bias + 1
+
+	// pm1:pm2 is the double-word mantissa for the product p.
+	// Shift left to leave top bit in product. Effectively
+	// shifts the 106-bit product to the left by 21.
+	pm1, pm2 := bits.Mul64(xm<<10, ym<<11)
+	zm1, zm2 := zm<<10, uint64(0)
+	ps := xs ^ ys // product sign
+
+	// normalize to 62nd bit
+	is62zero := uint((^pm1 >> 62) & 1)
+	pm1, pm2 = shl(pm1, pm2, is62zero)
+	pe -= int32(is62zero)
+
+	// Swap addition operands so |p| >= |z|
+	if pe < ze || (pe == ze && (pm1 < zm1 || (pm1 == zm1 && pm2 < zm2))) {
+		ps, pe, pm1, pm2, zs, ze, zm1, zm2 = zs, ze, zm1, zm2, ps, pe, pm1, pm2
+	}
+
+	// Align significands
+	zm1, zm2 = shrcompress(zm1, zm2, uint(pe-ze))
+
+	// Compute resulting significands, normalizing if necessary.
+	var m, c uint64
+	if ps == zs {
+		// Adding (pm1:pm2) + (zm1:zm2)
+		pm2, c = bits.Add64(pm2, zm2, 0)
+		pm1, _ = bits.Add64(pm1, zm1, c)
+		pe -= int32(^pm1 >> 63)
+		pm1, m = shrcompress(pm1, pm2, uint(64+pm1>>63))
+	} else {
+		// Subtracting (pm1:pm2) - (zm1:zm2)
+		// TODO: should we special-case cancellation?
+		pm2, c = bits.Sub64(pm2, zm2, 0)
+		pm1, _ = bits.Sub64(pm1, zm1, c)
+		nz := lz(pm1, pm2)
+		pe -= nz
+		m, pm2 = shl(pm1, pm2, uint(nz-1))
+		m |= nonzero(pm2)
+	}
+
+	// Round and break ties to even
+	if pe > 1022+bias || pe == 1022+bias && (m+1<<9)>>63 == 1 {
+		// rounded value overflows exponent range
+		return Float64frombits(uint64(ps)<<63 | uvinf)
+	}
+	if pe < 0 {
+		n := uint(-pe)
+		m = m>>n | nonzero(m&(1<<n-1))
+		pe = 0
+	}
+	m = ((m + 1<<9) >> 10) & ^zero((m&(1<<10-1))^1<<9)
+	pe &= -int32(nonzero(m))
+	return Float64frombits(uint64(ps)<<63 + uint64(pe)<<52 + m)
+}