diff --git a/src/math/big/int.go b/src/math/big/int.go
index caebde92fa..b1d09cdad8 100644
--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -485,162 +485,223 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
 		}
 		return z
 	}
-	if x == nil && y == nil {
-		return z.lehmerGCD(a, b)
+
+	return z.lehmerGCD(x, y, a, b)
+}
+
+// lehmerSimulate attempts to simulate several Euclidean update steps
+// using the leading digits of A and B.  It returns u0, u1, v0, v1
+// such that A and B can be updated as:
+//		A = u0*A + v0*B
+//		B = u1*A + v1*B
+// Requirements: A >= B and len(B.abs) >= 2
+// Since we are calculating with full words to avoid overflow,
+// we use 'even' to track the sign of the cosequences.
+// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
+// For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
+func lehmerSimulate(A, B *Int) (u0, u1, v0, v1 Word, even bool) {
+	// initialize the digits
+	var a1, a2, u2, v2 Word
+
+	m := len(B.abs) // m >= 2
+	n := len(A.abs) // n >= m >= 2
+
+	// extract the top Word of bits from A and B
+	h := nlz(A.abs[n-1])
+	a1 = A.abs[n-1]<<h | A.abs[n-2]>>(_W-h)
+	// B may have implicit zero words in the high bits if the lengths differ
+	switch {
+	case n == m:
+		a2 = B.abs[n-1]<<h | B.abs[n-2]>>(_W-h)
+	case n == m+1:
+		a2 = B.abs[n-2] >> (_W - h)
+	default:
+		a2 = 0
 	}
 
-	A := new(Int).Set(a)
-	B := new(Int).Set(b)
+	// Since we are calculating with full words to avoid overflow,
+	// we use 'even' to track the sign of the cosequences.
+	// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
+	// For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
+	// The first iteration starts with k=1 (odd).
+	even = false
+	// variables to track the cosequences
+	u0, u1, u2 = 0, 1, 0
+	v0, v1, v2 = 0, 0, 1
 
-	X := new(Int)
-	lastX := new(Int).SetInt64(1)
-
-	q := new(Int)
-	temp := new(Int)
-
-	r := new(Int)
-	for len(B.abs) > 0 {
-		q, r = q.QuoRem(A, B, r)
-
-		A, B, r = B, r, A
-
-		temp.Set(X)
-		X.Mul(X, q)
-		X.Sub(lastX, X)
-		lastX.Set(temp)
+	// Calculate the quotient and cosequences using Collins' stopping condition.
+	// Note that overflow of a Word is not possible when computing the remainder
+	// sequence and cosequences since the cosequence size is bounded by the input size.
+	// See section 4.2 of Jebelean for details.
+	for a2 >= v2 && a1-a2 >= v1+v2 {
+		q, r := a1/a2, a1%a2
+		a1, a2 = a2, r
+		u0, u1, u2 = u1, u2, u1+q*u2
+		v0, v1, v2 = v1, v2, v1+q*v2
+		even = !even
 	}
+	return
+}
 
-	if x != nil {
-		*x = *lastX
+// lehmerUpdate updates the inputs A and B such that:
+//		A = u0*A + v0*B
+//		B = u1*A + v1*B
+// where the signs of u0, u1, v0, v1 are given by even
+// For even == true: u0, v1 >= 0 && u1, v0 <= 0
+// For even == false: u0, v1 <= 0 && u1, v0 >= 0
+// q, r, s, t are temporary variables to avoid allocations in the multiplication
+func lehmerUpdate(A, B, q, r, s, t *Int, u0, u1, v0, v1 Word, even bool) {
+
+	t.abs = t.abs.setWord(u0)
+	s.abs = s.abs.setWord(v0)
+	t.neg = !even
+	s.neg = even
+
+	t.Mul(A, t)
+	s.Mul(B, s)
+
+	r.abs = r.abs.setWord(u1)
+	q.abs = q.abs.setWord(v1)
+	r.neg = even
+	q.neg = !even
+
+	r.Mul(A, r)
+	q.Mul(B, q)
+
+	A.Add(t, s)
+	B.Add(r, q)
+}
+
+// euclidUpdate performs a single step of the Euclidean GCD algorithm
+// if extended is true, it also updates the cosequence Ua, Ub
+func euclidUpdate(A, B, Ua, Ub, q, r, s, t *Int, extended bool) {
+	q, r = q.QuoRem(A, B, r)
+
+	*A, *B, *r = *B, *r, *A
+
+	if extended {
+		// Ua, Ub = Ub, Ua - q*Ub
+		t.Set(Ub)
+		s.Mul(Ub, q)
+		Ub.Sub(Ua, s)
+		Ua.Set(t)
 	}
-
-	if y != nil {
-		// y = (z - a*x)/b
-		y.Mul(a, lastX)
-		y.Sub(A, y)
-		y.Div(y, b)
-	}
-
-	*z = *A
-	return z
 }
 
 // lehmerGCD sets z to the greatest common divisor of a and b,
 // which both must be > 0, and returns z.
+// If x or y are not nil, their values are set such that z = a*x + b*y.
 // See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L.
 // This implementation uses the improved condition by Collins requiring only one
 // quotient and avoiding the possibility of single Word overflow.
 // See Jebelean, "Improving the multiprecision Euclidean algorithm",
 // Design and Implementation of Symbolic Computation Systems, pp 45-58.
-func (z *Int) lehmerGCD(a, b *Int) *Int {
-	// ensure a >= b
-	if a.abs.cmp(b.abs) < 0 {
-		a, b = b, a
+// The cosequences are updated according to Algorithm 10.45 from
+// Cohen et al. "Handbook of Elliptic and Hyperelliptic Curve Cryptography" pp 192.
+func (z *Int) lehmerGCD(x, y, a, b *Int) *Int {
+	var A, B, Ua, Ub *Int
+
+	A = new(Int).Set(a)
+	B = new(Int).Set(b)
+
+	extended := x != nil || y != nil
+
+	if extended {
+		// Ua (Ub) tracks how many times input a has been accumulated into A (B).
+		Ua = new(Int).SetInt64(1)
+		Ub = new(Int)
 	}
 
-	// don't destroy incoming values of a and b
-	B := new(Int).Set(b) // must be set first in case b is an alias of z
-	A := z.Set(a)
-
 	// temp variables for multiprecision update
-	t := new(Int)
+	q := new(Int)
 	r := new(Int)
 	s := new(Int)
-	w := new(Int)
+	t := new(Int)
+
+	// ensure A >= B
+	if A.abs.cmp(B.abs) < 0 {
+		A, B = B, A
+		Ub, Ua = Ua, Ub
+	}
 
 	// loop invariant A >= B
 	for len(B.abs) > 1 {
-		// initialize the digits
-		var a1, a2, u0, u1, u2, v0, v1, v2 Word
+		// Attempt to calculate in single-precision using leading words of A and B.
+		u0, u1, v0, v1, even := lehmerSimulate(A, B)
 
-		m := len(B.abs) // m >= 2
-		n := len(A.abs) // n >= m >= 2
-
-		// extract the top Word of bits from A and B
-		h := nlz(A.abs[n-1])
-		a1 = (A.abs[n-1] << h) | (A.abs[n-2] >> (_W - h))
-		// B may have implicit zero words in the high bits if the lengths differ
-		switch {
-		case n == m:
-			a2 = (B.abs[n-1] << h) | (B.abs[n-2] >> (_W - h))
-		case n == m+1:
-			a2 = (B.abs[n-2] >> (_W - h))
-		default:
-			a2 = 0
-		}
-
-		// Since we are calculating with full words to avoid overflow,
-		// we use 'even' to track the sign of the cosequences.
-		// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
-		// For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
-		// The first iteration starts with k=1 (odd).
-		even := false
-		// variables to track the cosequences
-		u0, u1, u2 = 0, 1, 0
-		v0, v1, v2 = 0, 0, 1
-
-		// Calculate the quotient and cosequences using Collins' stopping condition.
-		// Note that overflow of a Word is not possible when computing the remainder
-		// sequence and cosequences since the cosequence size is bounded by the input size.
-		// See section 4.2 of Jebelean for details.
-		for a2 >= v2 && a1-a2 >= v1+v2 {
-			q := a1 / a2
-			a1, a2 = a2, a1-q*a2
-			u0, u1, u2 = u1, u2, u1+q*u2
-			v0, v1, v2 = v1, v2, v1+q*v2
-			even = !even
-		}
-
-		// multiprecision step
+		// multiprecision Step
 		if v0 != 0 {
-			// simulate the effect of the single precision steps using the cosequences
+			// Simulate the effect of the single-precision steps using the cosequences.
 			// A = u0*A + v0*B
 			// B = u1*A + v1*B
+			lehmerUpdate(A, B, q, r, s, t, u0, u1, v0, v1, even)
 
-			t.abs = t.abs.setWord(u0)
-			s.abs = s.abs.setWord(v0)
-			t.neg = !even
-			s.neg = even
-
-			t.Mul(A, t)
-			s.Mul(B, s)
-
-			r.abs = r.abs.setWord(u1)
-			w.abs = w.abs.setWord(v1)
-			r.neg = even
-			w.neg = !even
-
-			r.Mul(A, r)
-			w.Mul(B, w)
-
-			A.Add(t, s)
-			B.Add(r, w)
+			if extended {
+				// Ua = u0*Ua + v0*Ub
+				// Ub = u1*Ua + v1*Ub
+				lehmerUpdate(Ua, Ub, q, r, s, t, u0, u1, v0, v1, even)
+			}
 
 		} else {
-			// single-digit calculations failed to simulate any quotients
-			// do a standard Euclidean step
-			t.Rem(A, B)
-			A, B, t = B, t, A
+			// Single-digit calculations failed to simulate any quotients.
+			// Do a standard Euclidean step.
+			euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended)
 		}
 	}
 
 	if len(B.abs) > 0 {
-		// standard Euclidean algorithm base case for B a single Word
+		// extended Euclidean algorithm base case if B is a single Word
 		if len(A.abs) > 1 {
-			// A is longer than a single Word
-			t.Rem(A, B)
-			A, B, t = B, t, A
+			// A is longer than a single Word, so one update is needed.
+			euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended)
 		}
 		if len(B.abs) > 0 {
-			// A and B are both a single Word
-			a1, a2 := A.abs[0], B.abs[0]
-			for a2 != 0 {
-				a1, a2 = a2, a1%a2
+			// A and B are both a single Word.
+			aWord, bWord := A.abs[0], B.abs[0]
+			if extended {
+				var ua, ub, va, vb Word
+				ua, ub = 1, 0
+				va, vb = 0, 1
+				even := true
+				for bWord != 0 {
+					q, r := aWord/bWord, aWord%bWord
+					aWord, bWord = bWord, r
+					ua, ub = ub, ua+q*ub
+					va, vb = vb, va+q*vb
+					even = !even
+				}
+
+				t.abs = t.abs.setWord(ua)
+				s.abs = s.abs.setWord(va)
+				t.neg = !even
+				s.neg = even
+
+				t.Mul(Ua, t)
+				s.Mul(Ub, s)
+
+				Ua.Add(t, s)
+			} else {
+				for bWord != 0 {
+					aWord, bWord = bWord, aWord%bWord
+				}
 			}
-			A.abs[0] = a1
+			A.abs[0] = aWord
 		}
 	}
+
+	if x != nil {
+		*x = *Ua
+	}
+
+	if y != nil {
+		// y = (z - a*x)/b
+		y.Mul(a, Ua)
+		y.Sub(A, y)
+		y.Div(y, b)
+	}
+
 	*z = *A
+
 	return z
 }
 
diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go
index b660d53523..1ef4d150b8 100644
--- a/src/math/big/int_test.go
+++ b/src/math/big/int_test.go
@@ -675,21 +675,43 @@ func checkGcd(aBytes, bBytes []byte) bool {
 	return x.Cmp(d) == 0
 }
 
-// euclidGCD is a reference implementation of Euclid's GCD
-// algorithm for testing against optimized algorithms.
+// euclidExtGCD is a reference implementation of Euclid's
+// extended GCD algorithm for testing against optimized algorithms.
 // Requirements: a, b > 0
-func euclidGCD(a, b *Int) *Int {
-
+func euclidExtGCD(a, b *Int) (g, x, y *Int) {
 	A := new(Int).Set(a)
 	B := new(Int).Set(b)
-	t := new(Int)
 
+	// A = Ua*a + Va*b
+	// B = Ub*a + Vb*b
+	Ua := new(Int).SetInt64(1)
+	Va := new(Int)
+
+	Ub := new(Int)
+	Vb := new(Int).SetInt64(1)
+
+	q := new(Int)
+	temp := new(Int)
+
+	r := new(Int)
 	for len(B.abs) > 0 {
-		// A, B = B, A % B
-		t.Rem(A, B)
-		A, B, t = B, t, A
+		q, r = q.QuoRem(A, B, r)
+
+		A, B, r = B, r, A
+
+		// Ua, Ub = Ub, Ua-q*Ub
+		temp.Set(Ub)
+		Ub.Mul(Ub, q)
+		Ub.Sub(Ua, Ub)
+		Ua.Set(temp)
+
+		// Va, Vb = Vb, Va-q*Vb
+		temp.Set(Vb)
+		Vb.Mul(Vb, q)
+		Vb.Sub(Va, Vb)
+		Va.Set(temp)
 	}
-	return A
+	return A, Ua, Va
 }
 
 func checkLehmerGcd(aBytes, bBytes []byte) bool {
@@ -700,12 +722,28 @@ func checkLehmerGcd(aBytes, bBytes []byte) bool {
 		return true // can only test positive arguments
 	}
 
-	d := new(Int).lehmerGCD(a, b)
-	d0 := euclidGCD(a, b)
+	d := new(Int).lehmerGCD(nil, nil, a, b)
+	d0, _, _ := euclidExtGCD(a, b)
 
 	return d.Cmp(d0) == 0
 }
 
+func checkLehmerExtGcd(aBytes, bBytes []byte) bool {
+	a := new(Int).SetBytes(aBytes)
+	b := new(Int).SetBytes(bBytes)
+	x := new(Int)
+	y := new(Int)
+
+	if a.Sign() <= 0 || b.Sign() <= 0 {
+		return true // can only test positive arguments
+	}
+
+	d := new(Int).lehmerGCD(x, y, a, b)
+	d0, x0, y0 := euclidExtGCD(a, b)
+
+	return d.Cmp(d0) == 0 && x.Cmp(x0) == 0 && y.Cmp(y0) == 0
+}
+
 var gcdTests = []struct {
 	d, x, y, a, b string
 }{
@@ -785,6 +823,10 @@ func TestGcd(t *testing.T) {
 	if err := quick.Check(checkLehmerGcd, nil); err != nil {
 		t.Error(err)
 	}
+
+	if err := quick.Check(checkLehmerExtGcd, nil); err != nil {
+		t.Error(err)
+	}
 }
 
 type intShiftTest struct {
diff --git a/src/math/big/rat.go b/src/math/big/rat.go
index b33fc696ad..46d58fcf36 100644
--- a/src/math/big/rat.go
+++ b/src/math/big/rat.go
@@ -422,7 +422,7 @@ func (z *Rat) norm() *Rat {
 		neg := z.a.neg
 		z.a.neg = false
 		z.b.neg = false
-		if f := NewInt(0).lehmerGCD(&z.a, &z.b); f.Cmp(intOne) != 0 {
+		if f := NewInt(0).lehmerGCD(nil, nil, &z.a, &z.b); f.Cmp(intOne) != 0 {
 			z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs)
 			z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs)
 			if z.b.abs.cmp(natOne) == 0 {