diff --git a/src/pkg/math/big/nat.go b/src/pkg/math/big/nat.go index 2e5c56d461..17985c248e 100644 --- a/src/pkg/math/big/nat.go +++ b/src/pkg/math/big/nat.go @@ -1248,6 +1248,15 @@ func (z nat) expNN(x, y, m nat) nat { } z = z.set(x) + // If the base is non-trivial and the exponent is large, we use + // 4-bit, windowed exponentiation. This involves precomputing 14 values + // (x^2...x^15) but then reduces the number of multiply-reduces by a + // third. Even for a 32-bit exponent, this reduces the number of + // operations. + if len(x) > 1 && len(y) > 1 && len(m) > 0 { + return z.expNNWindowed(x, y, m) + } + v := y[len(y)-1] // v > 0 because y is normalized and y > 0 shift := leadingZeros(v) + 1 v <<= shift @@ -1304,6 +1313,69 @@ func (z nat) expNN(x, y, m nat) nat { return z.norm() } +// expNNWindowed calculates x**y mod m using a fixed, 4-bit window. +func (z nat) expNNWindowed(x, y, m nat) nat { + // zz and r are used to avoid allocating in mul and div as otherwise + // the arguments would alias. + var zz, r nat + + const n = 4 + // powers[i] contains x^i. + var powers [1 << n]nat + powers[0] = natOne + powers[1] = x + for i := 2; i < 1<= 0; i-- { + yi := y[i] + for j := 0; j < _W; j += n { + if i != len(y)-1 || j != 0 { + // Unrolled loop for significant performance + // gain. Use go test -bench=".*" in crypto/rsa + // to check performance before making changes. + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + zz = zz.mul(z, z) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + } + + zz = zz.mul(z, powers[yi>>(_W-n)]) + zz, z = z, zz + zz, r = zz.div(r, z, m) + z, r = r, z + + yi <<= n + } + } + + return z.norm() +} + // probablyPrime performs reps Miller-Rabin tests to check whether n is prime. // If it returns true, n is prime with probability 1 - 1/4^reps. // If it returns false, n is not prime.