diff --git a/src/math/big/int.go b/src/math/big/int.go index caebde92fa..b1d09cdad8 100644 --- a/src/math/big/int.go +++ b/src/math/big/int.go @@ -485,162 +485,223 @@ func (z *Int) GCD(x, y, a, b *Int) *Int { } return z } - if x == nil && y == nil { - return z.lehmerGCD(a, b) + + return z.lehmerGCD(x, y, a, b) +} + +// lehmerSimulate attempts to simulate several Euclidean update steps +// using the leading digits of A and B. It returns u0, u1, v0, v1 +// such that A and B can be updated as: +// A = u0*A + v0*B +// B = u1*A + v1*B +// Requirements: A >= B and len(B.abs) >= 2 +// Since we are calculating with full words to avoid overflow, +// we use 'even' to track the sign of the cosequences. +// For even iterations: u0, v1 >= 0 && u1, v0 <= 0 +// For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 +func lehmerSimulate(A, B *Int) (u0, u1, v0, v1 Word, even bool) { + // initialize the digits + var a1, a2, u2, v2 Word + + m := len(B.abs) // m >= 2 + n := len(A.abs) // n >= m >= 2 + + // extract the top Word of bits from A and B + h := nlz(A.abs[n-1]) + a1 = A.abs[n-1]<>(_W-h) + // B may have implicit zero words in the high bits if the lengths differ + switch { + case n == m: + a2 = B.abs[n-1]<>(_W-h) + case n == m+1: + a2 = B.abs[n-2] >> (_W - h) + default: + a2 = 0 } - A := new(Int).Set(a) - B := new(Int).Set(b) + // Since we are calculating with full words to avoid overflow, + // we use 'even' to track the sign of the cosequences. + // For even iterations: u0, v1 >= 0 && u1, v0 <= 0 + // For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 + // The first iteration starts with k=1 (odd). + even = false + // variables to track the cosequences + u0, u1, u2 = 0, 1, 0 + v0, v1, v2 = 0, 0, 1 - X := new(Int) - lastX := new(Int).SetInt64(1) - - q := new(Int) - temp := new(Int) - - r := new(Int) - for len(B.abs) > 0 { - q, r = q.QuoRem(A, B, r) - - A, B, r = B, r, A - - temp.Set(X) - X.Mul(X, q) - X.Sub(lastX, X) - lastX.Set(temp) + // Calculate the quotient and cosequences using Collins' stopping condition. + // Note that overflow of a Word is not possible when computing the remainder + // sequence and cosequences since the cosequence size is bounded by the input size. + // See section 4.2 of Jebelean for details. + for a2 >= v2 && a1-a2 >= v1+v2 { + q, r := a1/a2, a1%a2 + a1, a2 = a2, r + u0, u1, u2 = u1, u2, u1+q*u2 + v0, v1, v2 = v1, v2, v1+q*v2 + even = !even } + return +} - if x != nil { - *x = *lastX +// lehmerUpdate updates the inputs A and B such that: +// A = u0*A + v0*B +// B = u1*A + v1*B +// where the signs of u0, u1, v0, v1 are given by even +// For even == true: u0, v1 >= 0 && u1, v0 <= 0 +// For even == false: u0, v1 <= 0 && u1, v0 >= 0 +// q, r, s, t are temporary variables to avoid allocations in the multiplication +func lehmerUpdate(A, B, q, r, s, t *Int, u0, u1, v0, v1 Word, even bool) { + + t.abs = t.abs.setWord(u0) + s.abs = s.abs.setWord(v0) + t.neg = !even + s.neg = even + + t.Mul(A, t) + s.Mul(B, s) + + r.abs = r.abs.setWord(u1) + q.abs = q.abs.setWord(v1) + r.neg = even + q.neg = !even + + r.Mul(A, r) + q.Mul(B, q) + + A.Add(t, s) + B.Add(r, q) +} + +// euclidUpdate performs a single step of the Euclidean GCD algorithm +// if extended is true, it also updates the cosequence Ua, Ub +func euclidUpdate(A, B, Ua, Ub, q, r, s, t *Int, extended bool) { + q, r = q.QuoRem(A, B, r) + + *A, *B, *r = *B, *r, *A + + if extended { + // Ua, Ub = Ub, Ua - q*Ub + t.Set(Ub) + s.Mul(Ub, q) + Ub.Sub(Ua, s) + Ua.Set(t) } - - if y != nil { - // y = (z - a*x)/b - y.Mul(a, lastX) - y.Sub(A, y) - y.Div(y, b) - } - - *z = *A - return z } // lehmerGCD sets z to the greatest common divisor of a and b, // which both must be > 0, and returns z. +// If x or y are not nil, their values are set such that z = a*x + b*y. // See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L. // This implementation uses the improved condition by Collins requiring only one // quotient and avoiding the possibility of single Word overflow. // See Jebelean, "Improving the multiprecision Euclidean algorithm", // Design and Implementation of Symbolic Computation Systems, pp 45-58. -func (z *Int) lehmerGCD(a, b *Int) *Int { - // ensure a >= b - if a.abs.cmp(b.abs) < 0 { - a, b = b, a +// The cosequences are updated according to Algorithm 10.45 from +// Cohen et al. "Handbook of Elliptic and Hyperelliptic Curve Cryptography" pp 192. +func (z *Int) lehmerGCD(x, y, a, b *Int) *Int { + var A, B, Ua, Ub *Int + + A = new(Int).Set(a) + B = new(Int).Set(b) + + extended := x != nil || y != nil + + if extended { + // Ua (Ub) tracks how many times input a has been accumulated into A (B). + Ua = new(Int).SetInt64(1) + Ub = new(Int) } - // don't destroy incoming values of a and b - B := new(Int).Set(b) // must be set first in case b is an alias of z - A := z.Set(a) - // temp variables for multiprecision update - t := new(Int) + q := new(Int) r := new(Int) s := new(Int) - w := new(Int) + t := new(Int) + + // ensure A >= B + if A.abs.cmp(B.abs) < 0 { + A, B = B, A + Ub, Ua = Ua, Ub + } // loop invariant A >= B for len(B.abs) > 1 { - // initialize the digits - var a1, a2, u0, u1, u2, v0, v1, v2 Word + // Attempt to calculate in single-precision using leading words of A and B. + u0, u1, v0, v1, even := lehmerSimulate(A, B) - m := len(B.abs) // m >= 2 - n := len(A.abs) // n >= m >= 2 - - // extract the top Word of bits from A and B - h := nlz(A.abs[n-1]) - a1 = (A.abs[n-1] << h) | (A.abs[n-2] >> (_W - h)) - // B may have implicit zero words in the high bits if the lengths differ - switch { - case n == m: - a2 = (B.abs[n-1] << h) | (B.abs[n-2] >> (_W - h)) - case n == m+1: - a2 = (B.abs[n-2] >> (_W - h)) - default: - a2 = 0 - } - - // Since we are calculating with full words to avoid overflow, - // we use 'even' to track the sign of the cosequences. - // For even iterations: u0, v1 >= 0 && u1, v0 <= 0 - // For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 - // The first iteration starts with k=1 (odd). - even := false - // variables to track the cosequences - u0, u1, u2 = 0, 1, 0 - v0, v1, v2 = 0, 0, 1 - - // Calculate the quotient and cosequences using Collins' stopping condition. - // Note that overflow of a Word is not possible when computing the remainder - // sequence and cosequences since the cosequence size is bounded by the input size. - // See section 4.2 of Jebelean for details. - for a2 >= v2 && a1-a2 >= v1+v2 { - q := a1 / a2 - a1, a2 = a2, a1-q*a2 - u0, u1, u2 = u1, u2, u1+q*u2 - v0, v1, v2 = v1, v2, v1+q*v2 - even = !even - } - - // multiprecision step + // multiprecision Step if v0 != 0 { - // simulate the effect of the single precision steps using the cosequences + // Simulate the effect of the single-precision steps using the cosequences. // A = u0*A + v0*B // B = u1*A + v1*B + lehmerUpdate(A, B, q, r, s, t, u0, u1, v0, v1, even) - t.abs = t.abs.setWord(u0) - s.abs = s.abs.setWord(v0) - t.neg = !even - s.neg = even - - t.Mul(A, t) - s.Mul(B, s) - - r.abs = r.abs.setWord(u1) - w.abs = w.abs.setWord(v1) - r.neg = even - w.neg = !even - - r.Mul(A, r) - w.Mul(B, w) - - A.Add(t, s) - B.Add(r, w) + if extended { + // Ua = u0*Ua + v0*Ub + // Ub = u1*Ua + v1*Ub + lehmerUpdate(Ua, Ub, q, r, s, t, u0, u1, v0, v1, even) + } } else { - // single-digit calculations failed to simulate any quotients - // do a standard Euclidean step - t.Rem(A, B) - A, B, t = B, t, A + // Single-digit calculations failed to simulate any quotients. + // Do a standard Euclidean step. + euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended) } } if len(B.abs) > 0 { - // standard Euclidean algorithm base case for B a single Word + // extended Euclidean algorithm base case if B is a single Word if len(A.abs) > 1 { - // A is longer than a single Word - t.Rem(A, B) - A, B, t = B, t, A + // A is longer than a single Word, so one update is needed. + euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended) } if len(B.abs) > 0 { - // A and B are both a single Word - a1, a2 := A.abs[0], B.abs[0] - for a2 != 0 { - a1, a2 = a2, a1%a2 + // A and B are both a single Word. + aWord, bWord := A.abs[0], B.abs[0] + if extended { + var ua, ub, va, vb Word + ua, ub = 1, 0 + va, vb = 0, 1 + even := true + for bWord != 0 { + q, r := aWord/bWord, aWord%bWord + aWord, bWord = bWord, r + ua, ub = ub, ua+q*ub + va, vb = vb, va+q*vb + even = !even + } + + t.abs = t.abs.setWord(ua) + s.abs = s.abs.setWord(va) + t.neg = !even + s.neg = even + + t.Mul(Ua, t) + s.Mul(Ub, s) + + Ua.Add(t, s) + } else { + for bWord != 0 { + aWord, bWord = bWord, aWord%bWord + } } - A.abs[0] = a1 + A.abs[0] = aWord } } + + if x != nil { + *x = *Ua + } + + if y != nil { + // y = (z - a*x)/b + y.Mul(a, Ua) + y.Sub(A, y) + y.Div(y, b) + } + *z = *A + return z } diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go index b660d53523..1ef4d150b8 100644 --- a/src/math/big/int_test.go +++ b/src/math/big/int_test.go @@ -675,21 +675,43 @@ func checkGcd(aBytes, bBytes []byte) bool { return x.Cmp(d) == 0 } -// euclidGCD is a reference implementation of Euclid's GCD -// algorithm for testing against optimized algorithms. +// euclidExtGCD is a reference implementation of Euclid's +// extended GCD algorithm for testing against optimized algorithms. // Requirements: a, b > 0 -func euclidGCD(a, b *Int) *Int { - +func euclidExtGCD(a, b *Int) (g, x, y *Int) { A := new(Int).Set(a) B := new(Int).Set(b) - t := new(Int) + // A = Ua*a + Va*b + // B = Ub*a + Vb*b + Ua := new(Int).SetInt64(1) + Va := new(Int) + + Ub := new(Int) + Vb := new(Int).SetInt64(1) + + q := new(Int) + temp := new(Int) + + r := new(Int) for len(B.abs) > 0 { - // A, B = B, A % B - t.Rem(A, B) - A, B, t = B, t, A + q, r = q.QuoRem(A, B, r) + + A, B, r = B, r, A + + // Ua, Ub = Ub, Ua-q*Ub + temp.Set(Ub) + Ub.Mul(Ub, q) + Ub.Sub(Ua, Ub) + Ua.Set(temp) + + // Va, Vb = Vb, Va-q*Vb + temp.Set(Vb) + Vb.Mul(Vb, q) + Vb.Sub(Va, Vb) + Va.Set(temp) } - return A + return A, Ua, Va } func checkLehmerGcd(aBytes, bBytes []byte) bool { @@ -700,12 +722,28 @@ func checkLehmerGcd(aBytes, bBytes []byte) bool { return true // can only test positive arguments } - d := new(Int).lehmerGCD(a, b) - d0 := euclidGCD(a, b) + d := new(Int).lehmerGCD(nil, nil, a, b) + d0, _, _ := euclidExtGCD(a, b) return d.Cmp(d0) == 0 } +func checkLehmerExtGcd(aBytes, bBytes []byte) bool { + a := new(Int).SetBytes(aBytes) + b := new(Int).SetBytes(bBytes) + x := new(Int) + y := new(Int) + + if a.Sign() <= 0 || b.Sign() <= 0 { + return true // can only test positive arguments + } + + d := new(Int).lehmerGCD(x, y, a, b) + d0, x0, y0 := euclidExtGCD(a, b) + + return d.Cmp(d0) == 0 && x.Cmp(x0) == 0 && y.Cmp(y0) == 0 +} + var gcdTests = []struct { d, x, y, a, b string }{ @@ -785,6 +823,10 @@ func TestGcd(t *testing.T) { if err := quick.Check(checkLehmerGcd, nil); err != nil { t.Error(err) } + + if err := quick.Check(checkLehmerExtGcd, nil); err != nil { + t.Error(err) + } } type intShiftTest struct { diff --git a/src/math/big/rat.go b/src/math/big/rat.go index b33fc696ad..46d58fcf36 100644 --- a/src/math/big/rat.go +++ b/src/math/big/rat.go @@ -422,7 +422,7 @@ func (z *Rat) norm() *Rat { neg := z.a.neg z.a.neg = false z.b.neg = false - if f := NewInt(0).lehmerGCD(&z.a, &z.b); f.Cmp(intOne) != 0 { + if f := NewInt(0).lehmerGCD(nil, nil, &z.a, &z.b); f.Cmp(intOne) != 0 { z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs) z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs) if z.b.abs.cmp(natOne) == 0 {