diff --git a/src/crypto/internal/nistec/nistec_test.go b/src/crypto/internal/nistec/nistec_test.go index 9103608c18a..1a82b22286f 100644 --- a/src/crypto/internal/nistec/nistec_test.go +++ b/src/crypto/internal/nistec/nistec_test.go @@ -19,7 +19,7 @@ func TestAllocations(t *testing.T) { testenv.SkipIfOptimizationOff(t) t.Run("P224", func(t *testing.T) { - if allocs := testing.AllocsPerRun(100, func() { + if allocs := testing.AllocsPerRun(10, func() { p := nistec.NewP224Point().SetGenerator() scalar := make([]byte, 28) rand.Read(scalar) @@ -38,7 +38,7 @@ func TestAllocations(t *testing.T) { } }) t.Run("P256", func(t *testing.T) { - if allocs := testing.AllocsPerRun(100, func() { + if allocs := testing.AllocsPerRun(10, func() { p := nistec.NewP256Point().SetGenerator() scalar := make([]byte, 32) rand.Read(scalar) @@ -57,7 +57,7 @@ func TestAllocations(t *testing.T) { } }) t.Run("P384", func(t *testing.T) { - if allocs := testing.AllocsPerRun(100, func() { + if allocs := testing.AllocsPerRun(10, func() { p := nistec.NewP384Point().SetGenerator() scalar := make([]byte, 48) rand.Read(scalar) @@ -76,7 +76,7 @@ func TestAllocations(t *testing.T) { } }) t.Run("P521", func(t *testing.T) { - if allocs := testing.AllocsPerRun(100, func() { + if allocs := testing.AllocsPerRun(10, func() { p := nistec.NewP521Point().SetGenerator() scalar := make([]byte, 66) rand.Read(scalar) @@ -133,21 +133,13 @@ func testEquivalents[P nistPoint[P]](t *testing.T, newPoint func() P, c elliptic p1 := newPoint().Double(p) p2 := newPoint().Add(p, p) p3, err := newPoint().ScalarMult(p, two) - if err != nil { - t.Fatal(err) - } + fatalIfErr(t, err) p4, err := newPoint().ScalarBaseMult(two) - if err != nil { - t.Fatal(err) - } + fatalIfErr(t, err) p5, err := newPoint().ScalarMult(p, nPlusTwo) - if err != nil { - t.Fatal(err) - } + fatalIfErr(t, err) p6, err := newPoint().ScalarBaseMult(nPlusTwo) - if err != nil { - t.Fatal(err) - } + fatalIfErr(t, err) if !bytes.Equal(p1.Bytes(), p2.Bytes()) { t.Error("P+P != 2*P") @@ -230,10 +222,10 @@ func testScalarMult[P nistPoint[P]](t *testing.T, newPoint func() P, c elliptic. checkScalar(t, s.FillBytes(make([]byte, byteLen))) }) } - // Test N+1...N+32 since they risk overlapping with precomputed table values + // Test N-32...N+32 since they risk overlapping with precomputed table values // in the final additions. - for i := int64(2); i <= 32; i++ { - t.Run(fmt.Sprintf("N+%d", i), func(t *testing.T) { + for i := int64(-32); i <= 32; i++ { + t.Run(fmt.Sprintf("N%+d", i), func(t *testing.T) { checkScalar(t, new(big.Int).Add(c.Params().N, big.NewInt(i)).Bytes()) }) } diff --git a/src/crypto/internal/nistec/p256_asm.go b/src/crypto/internal/nistec/p256_asm.go index 99a22b833f0..aa1ceba6bb4 100644 --- a/src/crypto/internal/nistec/p256_asm.go +++ b/src/crypto/internal/nistec/p256_asm.go @@ -294,8 +294,9 @@ func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement) // [0]P is the point at infinity and it's not stored. type p256Table [16]P256Point -// p256Select sets res to the point at index idx in the table. -// idx must be in [0, 15]. It executes in constant time. +// p256Select sets res to the point at index idx - 1 in the table. +// idx must be in [1, 16] or res will be set to an undefined value. +// It executes in constant time. // //go:noescape func p256Select(res *P256Point, table *p256Table, idx int) @@ -335,22 +336,25 @@ func init() { p256Precomputed = (*[43]p256AffineTable)(*p256PrecomputedPtr) } -// p256SelectAffine sets res to the point at index idx in the table. -// idx must be in [0, 31]. It executes in constant time. +// p256SelectAffine sets res to the point at index idx - 1 in the table. +// idx must be in [1, 32] or res will be set to an undefined value. +// It executes in constant time. // //go:noescape func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int) // Point addition with an affine point and constant time conditions. // If zero is 0, sets res = in2. If sel is 0, sets res = in1. -// If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2 +// If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2. +// If neither sel nor zero are 0 and in1 = in2, or both zero and sel are 0, +// or in1 is the infinity, res is undefined. // //go:noescape func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int) -// Point addition. Sets res = in1 + in2. Returns one if the two input points -// were equal and zero otherwise. If in1 or in2 are the point at infinity, res -// and the return value are undefined. +// Point addition. Sets res = in1 + in2 and returns zero if in1 and in2 are not +// equal. Otherwise, returns one and res is undefined. If in1 or in2 are the +// point at infinity, res and the return value are undefined. // //go:noescape func p256PointAddAsm(res, in1, in2 *P256Point) int @@ -603,58 +607,93 @@ func p256Inverse(out, in *p256Element) { p256Mul(out, in, z) } -func boothW5(in uint) (int, int) { - var s uint = ^((in >> 5) - 1) - var d uint = (1 << 6) - in - 1 +// p256OrdRsh returns the 64 least significant bits of x >> n. n must be lower +// than 256. The value of n leaks through timing side-channels. +func p256OrdRsh(x *p256OrdElement, n int) uint64 { + i := n / 64 + n = n % 64 + res := x[i] >> n + // Shift in the more significant limb, if present. + if i := i + 1; i < len(x) { + res |= x[i] << (64 - n) + } + return res +} + +func boothW5(in uint64) (int, int) { + s := ^((in >> 5) - 1) + d := (1 << 6) - in - 1 d = (d & s) | (in & (^s)) d = (d >> 1) + (d & 1) return int(d), int(s & 1) } -func boothW6(in uint) (int, int) { - var s uint = ^((in >> 6) - 1) - var d uint = (1 << 7) - in - 1 +func boothW6(in uint64) (int, int) { + s := ^((in >> 6) - 1) + d := (1 << 7) - in - 1 d = (d & s) | (in & (^s)) d = (d >> 1) + (d & 1) return int(d), int(s & 1) } func (p *P256Point) p256BaseMult(scalar *p256OrdElement) { + // This function works like p256ScalarMult below, but the table is fixed and + // "pre-doubled" for each iteration, so instead of doubling we move to the + // next table at each iteration. + + // Start scanning the window from the most significant bits. We move by + // 6 bits at a time and need to finish at -1, so -1 + 6 * 42 = 251. + index := 251 + + sel, sign := boothW6(p256OrdRsh(scalar, index)) + // sign is always zero because the boothW6 input here is at + // most five bits long, so the top bit is never set. + _ = sign + var t0 p256AffinePoint - - wvalue := (scalar[0] << 1) & 0x7f - sel, sign := boothW6(uint(wvalue)) - p256SelectAffine(&t0, &p256Precomputed[0], sel) + p256SelectAffine(&t0, &p256Precomputed[(index+1)/6], sel) p.x, p.y, p.z = t0.x, t0.y, p256One - p256NegCond(&p.y, sign) - - index := uint(5) zero := sel - for i := 1; i < 43; i++ { - if index < 192 { - wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f + for index >= 5 { + index -= 6 + + if index >= 0 { + sel, sign = boothW6(p256OrdRsh(scalar, index) & 0b1111111) } else { - wvalue = (scalar[index/64] >> (index % 64)) & 0x7f + // Booth encoding considers a virtual zero bit at index -1, + // so we shift left the least significant limb. + wvalue := (scalar[0] << 1) & 0b1111111 + sel, sign = boothW6(wvalue) } - index += 6 - sel, sign = boothW6(uint(wvalue)) - p256SelectAffine(&t0, &p256Precomputed[i], sel) + + table := &p256Precomputed[(index+1)/6] + p256SelectAffine(&t0, table, sel) + + // See p256ScalarMult for the behavior of sign, sel, and zero, that here + // is all rolled into the p256PointAddAffineAsm function. We also know + // that (if sel and zero are not 0) p != t0 for a similar reason. p256PointAddAffineAsm(p, p, &t0, sign, sel, zero) zero |= sel } - // If the whole scalar was zero, set to the point at infinity. - p256MovCond(p, p, NewP256Point(), zero) + // If zero is 0, the whole scalar was zero, p is undefined, + // and the correct result is the infinity. + infinity := NewP256Point() + p256MovCond(p, p, infinity, zero) } func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) { - // precomp is a table of precomputed points that stores powers of p - // from p^1 to p^16. + // If p is the point at infinity, p256PointAddAsm's behavior below is + // undefined. We'll just return the infinity at the end. + isInfinity := p.isInfinity() + + // precomp is a table of precomputed points that stores + // powers of p from p^1 to p^16. var precomp p256Table var t0, t1, t2, t3 P256Point - // Prepare the table + // Prepare the table by double and adding. precomp[0] = *p // 1 p256PointDoubleAsm(&t0, p) @@ -693,52 +732,56 @@ func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) { precomp[12] = t0 // 13 precomp[14] = t2 // 15 - // Start scanning the window from top bit - index := uint(254) - var sel, sign int + // Start scanning the window from the most significant bits. We move by + // 5 bits at a time and need to finish at -1, so -1 + 5 * 51 = 254. + index := 254 - wvalue := (scalar[index/64] >> (index % 64)) & 0x3f - sel, _ = boothW5(uint(wvalue)) + sel, sign := boothW5(p256OrdRsh(scalar, index)) + // sign is always zero because the boothW5 input here is at + // most two bits long, so the top bit is never set. + _ = sign p256Select(p, &precomp, sel) zero := sel - for index > 4 { + for index >= 4 { index -= 5 + p256PointDoubleAsm(p, p) p256PointDoubleAsm(p, p) p256PointDoubleAsm(p, p) p256PointDoubleAsm(p, p) p256PointDoubleAsm(p, p) - if index < 192 { - wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f + if index >= 0 { + sel, sign = boothW5(p256OrdRsh(scalar, index) & 0b111111) } else { - wvalue = (scalar[index/64] >> (index % 64)) & 0x3f + // Booth encoding considers a virtual zero bit at index -1, + // so we shift left the least significant limb. + wvalue := (scalar[0] << 1) & 0b111111 + sel, sign = boothW5(wvalue) } - sel, sign = boothW5(uint(wvalue)) - p256Select(&t0, &precomp, sel) p256NegCond(&t0.y, sign) + + // We don't check the return value of p256PointAddAsm because t0 is + // [±1-16]P, while p was just doubled five times and can't have wrapped + // around because scalar is less than the group order. p256PointAddAsm(&t1, p, &t0) + + // If sel is 0, t0 was undefined and the correct result is p unmodified. + // If zero is 0, all previous sel were 0 and the correct result is t0. + // If both are 0, the result doesn't matter as it will be thrown out. p256MovCond(&t1, &t1, p, sel) p256MovCond(p, &t1, &t0, zero) zero |= sel } - p256PointDoubleAsm(p, p) - p256PointDoubleAsm(p, p) - p256PointDoubleAsm(p, p) - p256PointDoubleAsm(p, p) - p256PointDoubleAsm(p, p) - - wvalue = (scalar[0] << 1) & 0x3f - sel, sign = boothW5(uint(wvalue)) - - p256Select(&t0, &precomp, sel) - p256NegCond(&t0.y, sign) - p256PointAddAsm(&t1, p, &t0) - p256MovCond(&t1, &t1, p, sel) - p256MovCond(p, &t1, &t0, zero) + // If zero is 0, the whole scalar was zero. + // If isInfinity is 1, the input point was the infinity. + // In both cases, p is undefined and the correct result is the infinity. + infinity := NewP256Point() + wantInfinity := zero & (isInfinity - 1) + p256MovCond(p, p, infinity, wantInfinity) }