diff --git a/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s new file mode 100644 index 0000000000..797f9b051d --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/const_amd64.s @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +DATA ·REDMASK51(SB)/8, $0x0007FFFFFFFFFFFF +GLOBL ·REDMASK51(SB), 8, $8 + +DATA ·_121666_213(SB)/8, $996687872 +GLOBL ·_121666_213(SB), 8, $8 + +DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA +GLOBL ·_2P0(SB), 8, $8 + +DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE +GLOBL ·_2P1234(SB), 8, $8 diff --git a/src/vendor/golang_org/x/crypto/curve25519/cswap_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/cswap_amd64.s new file mode 100644 index 0000000000..45484d1b59 --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/cswap_amd64.s @@ -0,0 +1,88 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func cswap(inout *[5]uint64, v uint64) +TEXT ·cswap(SB),7,$0 + MOVQ inout+0(FP),DI + MOVQ v+8(FP),SI + + CMPQ SI,$1 + MOVQ 0(DI),SI + MOVQ 80(DI),DX + MOVQ 8(DI),CX + MOVQ 88(DI),R8 + MOVQ SI,R9 + CMOVQEQ DX,SI + CMOVQEQ R9,DX + MOVQ CX,R9 + CMOVQEQ R8,CX + CMOVQEQ R9,R8 + MOVQ SI,0(DI) + MOVQ DX,80(DI) + MOVQ CX,8(DI) + MOVQ R8,88(DI) + MOVQ 16(DI),SI + MOVQ 96(DI),DX + MOVQ 24(DI),CX + MOVQ 104(DI),R8 + MOVQ SI,R9 + CMOVQEQ DX,SI + CMOVQEQ R9,DX + MOVQ CX,R9 + CMOVQEQ R8,CX + CMOVQEQ R9,R8 + MOVQ SI,16(DI) + MOVQ DX,96(DI) + MOVQ CX,24(DI) + MOVQ R8,104(DI) + MOVQ 32(DI),SI + MOVQ 112(DI),DX + MOVQ 40(DI),CX + MOVQ 120(DI),R8 + MOVQ SI,R9 + CMOVQEQ DX,SI + CMOVQEQ R9,DX + MOVQ CX,R9 + CMOVQEQ R8,CX + CMOVQEQ R9,R8 + MOVQ SI,32(DI) + MOVQ DX,112(DI) + MOVQ CX,40(DI) + MOVQ R8,120(DI) + MOVQ 48(DI),SI + MOVQ 128(DI),DX + MOVQ 56(DI),CX + MOVQ 136(DI),R8 + MOVQ SI,R9 + CMOVQEQ DX,SI + CMOVQEQ R9,DX + MOVQ CX,R9 + CMOVQEQ R8,CX + CMOVQEQ R9,R8 + MOVQ SI,48(DI) + MOVQ DX,128(DI) + MOVQ CX,56(DI) + MOVQ R8,136(DI) + MOVQ 64(DI),SI + MOVQ 144(DI),DX + MOVQ 72(DI),CX + MOVQ 152(DI),R8 + MOVQ SI,R9 + CMOVQEQ DX,SI + CMOVQEQ R9,DX + MOVQ CX,R9 + CMOVQEQ R8,CX + CMOVQEQ R9,R8 + MOVQ SI,64(DI) + MOVQ DX,144(DI) + MOVQ CX,72(DI) + MOVQ R8,152(DI) + MOVQ DI,AX + MOVQ SI,DX + RET diff --git a/src/vendor/golang_org/x/crypto/curve25519/curve25519.go b/src/vendor/golang_org/x/crypto/curve25519/curve25519.go new file mode 100644 index 0000000000..6918c47fc2 --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/curve25519.go @@ -0,0 +1,841 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// We have a implementation in amd64 assembly so this code is only run on +// non-amd64 platforms. The amd64 assembly does not support gccgo. +// +build !amd64 gccgo appengine + +package curve25519 + +// This code is a port of the public domain, "ref10" implementation of +// curve25519 from SUPERCOP 20130419 by D. J. Bernstein. + +// fieldElement represents an element of the field GF(2^255 - 19). An element +// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 +// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on +// context. +type fieldElement [10]int32 + +func feZero(fe *fieldElement) { + for i := range fe { + fe[i] = 0 + } +} + +func feOne(fe *fieldElement) { + feZero(fe) + fe[0] = 1 +} + +func feAdd(dst, a, b *fieldElement) { + for i := range dst { + dst[i] = a[i] + b[i] + } +} + +func feSub(dst, a, b *fieldElement) { + for i := range dst { + dst[i] = a[i] - b[i] + } +} + +func feCopy(dst, src *fieldElement) { + for i := range dst { + dst[i] = src[i] + } +} + +// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0. +// +// Preconditions: b in {0,1}. +func feCSwap(f, g *fieldElement, b int32) { + var x fieldElement + b = -b + for i := range x { + x[i] = b & (f[i] ^ g[i]) + } + + for i := range f { + f[i] ^= x[i] + } + for i := range g { + g[i] ^= x[i] + } +} + +// load3 reads a 24-bit, little-endian value from in. +func load3(in []byte) int64 { + var r int64 + r = int64(in[0]) + r |= int64(in[1]) << 8 + r |= int64(in[2]) << 16 + return r +} + +// load4 reads a 32-bit, little-endian value from in. +func load4(in []byte) int64 { + var r int64 + r = int64(in[0]) + r |= int64(in[1]) << 8 + r |= int64(in[2]) << 16 + r |= int64(in[3]) << 24 + return r +} + +func feFromBytes(dst *fieldElement, src *[32]byte) { + h0 := load4(src[:]) + h1 := load3(src[4:]) << 6 + h2 := load3(src[7:]) << 5 + h3 := load3(src[10:]) << 3 + h4 := load3(src[13:]) << 2 + h5 := load4(src[16:]) + h6 := load3(src[20:]) << 7 + h7 := load3(src[23:]) << 5 + h8 := load3(src[26:]) << 4 + h9 := load3(src[29:]) << 2 + + var carry [10]int64 + carry[9] = (h9 + 1<<24) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + carry[1] = (h1 + 1<<24) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[3] = (h3 + 1<<24) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[5] = (h5 + 1<<24) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + carry[7] = (h7 + 1<<24) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[0] = (h0 + 1<<25) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[2] = (h2 + 1<<25) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[4] = (h4 + 1<<25) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[6] = (h6 + 1<<25) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + carry[8] = (h8 + 1<<25) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + dst[0] = int32(h0) + dst[1] = int32(h1) + dst[2] = int32(h2) + dst[3] = int32(h3) + dst[4] = int32(h4) + dst[5] = int32(h5) + dst[6] = int32(h6) + dst[7] = int32(h7) + dst[8] = int32(h8) + dst[9] = int32(h9) +} + +// feToBytes marshals h to s. +// Preconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +// +// Write p=2^255-19; q=floor(h/p). +// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). +// +// Proof: +// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. +// Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. +// +// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). +// Then 0> 25 + q = (h[0] + q) >> 26 + q = (h[1] + q) >> 25 + q = (h[2] + q) >> 26 + q = (h[3] + q) >> 25 + q = (h[4] + q) >> 26 + q = (h[5] + q) >> 25 + q = (h[6] + q) >> 26 + q = (h[7] + q) >> 25 + q = (h[8] + q) >> 26 + q = (h[9] + q) >> 25 + + // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. + h[0] += 19 * q + // Goal: Output h-2^255 q, which is between 0 and 2^255-20. + + carry[0] = h[0] >> 26 + h[1] += carry[0] + h[0] -= carry[0] << 26 + carry[1] = h[1] >> 25 + h[2] += carry[1] + h[1] -= carry[1] << 25 + carry[2] = h[2] >> 26 + h[3] += carry[2] + h[2] -= carry[2] << 26 + carry[3] = h[3] >> 25 + h[4] += carry[3] + h[3] -= carry[3] << 25 + carry[4] = h[4] >> 26 + h[5] += carry[4] + h[4] -= carry[4] << 26 + carry[5] = h[5] >> 25 + h[6] += carry[5] + h[5] -= carry[5] << 25 + carry[6] = h[6] >> 26 + h[7] += carry[6] + h[6] -= carry[6] << 26 + carry[7] = h[7] >> 25 + h[8] += carry[7] + h[7] -= carry[7] << 25 + carry[8] = h[8] >> 26 + h[9] += carry[8] + h[8] -= carry[8] << 26 + carry[9] = h[9] >> 25 + h[9] -= carry[9] << 25 + // h10 = carry9 + + // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + // Have h[0]+...+2^230 h[9] between 0 and 2^255-1; + // evidently 2^255 h10-2^255 q = 0. + // Goal: Output h[0]+...+2^230 h[9]. + + s[0] = byte(h[0] >> 0) + s[1] = byte(h[0] >> 8) + s[2] = byte(h[0] >> 16) + s[3] = byte((h[0] >> 24) | (h[1] << 2)) + s[4] = byte(h[1] >> 6) + s[5] = byte(h[1] >> 14) + s[6] = byte((h[1] >> 22) | (h[2] << 3)) + s[7] = byte(h[2] >> 5) + s[8] = byte(h[2] >> 13) + s[9] = byte((h[2] >> 21) | (h[3] << 5)) + s[10] = byte(h[3] >> 3) + s[11] = byte(h[3] >> 11) + s[12] = byte((h[3] >> 19) | (h[4] << 6)) + s[13] = byte(h[4] >> 2) + s[14] = byte(h[4] >> 10) + s[15] = byte(h[4] >> 18) + s[16] = byte(h[5] >> 0) + s[17] = byte(h[5] >> 8) + s[18] = byte(h[5] >> 16) + s[19] = byte((h[5] >> 24) | (h[6] << 1)) + s[20] = byte(h[6] >> 7) + s[21] = byte(h[6] >> 15) + s[22] = byte((h[6] >> 23) | (h[7] << 3)) + s[23] = byte(h[7] >> 5) + s[24] = byte(h[7] >> 13) + s[25] = byte((h[7] >> 21) | (h[8] << 4)) + s[26] = byte(h[8] >> 4) + s[27] = byte(h[8] >> 12) + s[28] = byte((h[8] >> 20) | (h[9] << 6)) + s[29] = byte(h[9] >> 2) + s[30] = byte(h[9] >> 10) + s[31] = byte(h[9] >> 18) +} + +// feMul calculates h = f * g +// Can overlap h with f or g. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +// +// Notes on implementation strategy: +// +// Using schoolbook multiplication. +// Karatsuba would save a little in some cost models. +// +// Most multiplications by 2 and 19 are 32-bit precomputations; +// cheaper than 64-bit postcomputations. +// +// There is one remaining multiplication by 19 in the carry chain; +// one *19 precomputation can be merged into this, +// but the resulting data flow is considerably less clean. +// +// There are 12 carries below. +// 10 of them are 2-way parallelizable and vectorizable. +// Can get away with 11 carries, but then data flow is much deeper. +// +// With tighter constraints on inputs can squeeze carries into int32. +func feMul(h, f, g *fieldElement) { + f0 := f[0] + f1 := f[1] + f2 := f[2] + f3 := f[3] + f4 := f[4] + f5 := f[5] + f6 := f[6] + f7 := f[7] + f8 := f[8] + f9 := f[9] + g0 := g[0] + g1 := g[1] + g2 := g[2] + g3 := g[3] + g4 := g[4] + g5 := g[5] + g6 := g[6] + g7 := g[7] + g8 := g[8] + g9 := g[9] + g1_19 := 19 * g1 // 1.4*2^29 + g2_19 := 19 * g2 // 1.4*2^30; still ok + g3_19 := 19 * g3 + g4_19 := 19 * g4 + g5_19 := 19 * g5 + g6_19 := 19 * g6 + g7_19 := 19 * g7 + g8_19 := 19 * g8 + g9_19 := 19 * g9 + f1_2 := 2 * f1 + f3_2 := 2 * f3 + f5_2 := 2 * f5 + f7_2 := 2 * f7 + f9_2 := 2 * f9 + f0g0 := int64(f0) * int64(g0) + f0g1 := int64(f0) * int64(g1) + f0g2 := int64(f0) * int64(g2) + f0g3 := int64(f0) * int64(g3) + f0g4 := int64(f0) * int64(g4) + f0g5 := int64(f0) * int64(g5) + f0g6 := int64(f0) * int64(g6) + f0g7 := int64(f0) * int64(g7) + f0g8 := int64(f0) * int64(g8) + f0g9 := int64(f0) * int64(g9) + f1g0 := int64(f1) * int64(g0) + f1g1_2 := int64(f1_2) * int64(g1) + f1g2 := int64(f1) * int64(g2) + f1g3_2 := int64(f1_2) * int64(g3) + f1g4 := int64(f1) * int64(g4) + f1g5_2 := int64(f1_2) * int64(g5) + f1g6 := int64(f1) * int64(g6) + f1g7_2 := int64(f1_2) * int64(g7) + f1g8 := int64(f1) * int64(g8) + f1g9_38 := int64(f1_2) * int64(g9_19) + f2g0 := int64(f2) * int64(g0) + f2g1 := int64(f2) * int64(g1) + f2g2 := int64(f2) * int64(g2) + f2g3 := int64(f2) * int64(g3) + f2g4 := int64(f2) * int64(g4) + f2g5 := int64(f2) * int64(g5) + f2g6 := int64(f2) * int64(g6) + f2g7 := int64(f2) * int64(g7) + f2g8_19 := int64(f2) * int64(g8_19) + f2g9_19 := int64(f2) * int64(g9_19) + f3g0 := int64(f3) * int64(g0) + f3g1_2 := int64(f3_2) * int64(g1) + f3g2 := int64(f3) * int64(g2) + f3g3_2 := int64(f3_2) * int64(g3) + f3g4 := int64(f3) * int64(g4) + f3g5_2 := int64(f3_2) * int64(g5) + f3g6 := int64(f3) * int64(g6) + f3g7_38 := int64(f3_2) * int64(g7_19) + f3g8_19 := int64(f3) * int64(g8_19) + f3g9_38 := int64(f3_2) * int64(g9_19) + f4g0 := int64(f4) * int64(g0) + f4g1 := int64(f4) * int64(g1) + f4g2 := int64(f4) * int64(g2) + f4g3 := int64(f4) * int64(g3) + f4g4 := int64(f4) * int64(g4) + f4g5 := int64(f4) * int64(g5) + f4g6_19 := int64(f4) * int64(g6_19) + f4g7_19 := int64(f4) * int64(g7_19) + f4g8_19 := int64(f4) * int64(g8_19) + f4g9_19 := int64(f4) * int64(g9_19) + f5g0 := int64(f5) * int64(g0) + f5g1_2 := int64(f5_2) * int64(g1) + f5g2 := int64(f5) * int64(g2) + f5g3_2 := int64(f5_2) * int64(g3) + f5g4 := int64(f5) * int64(g4) + f5g5_38 := int64(f5_2) * int64(g5_19) + f5g6_19 := int64(f5) * int64(g6_19) + f5g7_38 := int64(f5_2) * int64(g7_19) + f5g8_19 := int64(f5) * int64(g8_19) + f5g9_38 := int64(f5_2) * int64(g9_19) + f6g0 := int64(f6) * int64(g0) + f6g1 := int64(f6) * int64(g1) + f6g2 := int64(f6) * int64(g2) + f6g3 := int64(f6) * int64(g3) + f6g4_19 := int64(f6) * int64(g4_19) + f6g5_19 := int64(f6) * int64(g5_19) + f6g6_19 := int64(f6) * int64(g6_19) + f6g7_19 := int64(f6) * int64(g7_19) + f6g8_19 := int64(f6) * int64(g8_19) + f6g9_19 := int64(f6) * int64(g9_19) + f7g0 := int64(f7) * int64(g0) + f7g1_2 := int64(f7_2) * int64(g1) + f7g2 := int64(f7) * int64(g2) + f7g3_38 := int64(f7_2) * int64(g3_19) + f7g4_19 := int64(f7) * int64(g4_19) + f7g5_38 := int64(f7_2) * int64(g5_19) + f7g6_19 := int64(f7) * int64(g6_19) + f7g7_38 := int64(f7_2) * int64(g7_19) + f7g8_19 := int64(f7) * int64(g8_19) + f7g9_38 := int64(f7_2) * int64(g9_19) + f8g0 := int64(f8) * int64(g0) + f8g1 := int64(f8) * int64(g1) + f8g2_19 := int64(f8) * int64(g2_19) + f8g3_19 := int64(f8) * int64(g3_19) + f8g4_19 := int64(f8) * int64(g4_19) + f8g5_19 := int64(f8) * int64(g5_19) + f8g6_19 := int64(f8) * int64(g6_19) + f8g7_19 := int64(f8) * int64(g7_19) + f8g8_19 := int64(f8) * int64(g8_19) + f8g9_19 := int64(f8) * int64(g9_19) + f9g0 := int64(f9) * int64(g0) + f9g1_38 := int64(f9_2) * int64(g1_19) + f9g2_19 := int64(f9) * int64(g2_19) + f9g3_38 := int64(f9_2) * int64(g3_19) + f9g4_19 := int64(f9) * int64(g4_19) + f9g5_38 := int64(f9_2) * int64(g5_19) + f9g6_19 := int64(f9) * int64(g6_19) + f9g7_38 := int64(f9_2) * int64(g7_19) + f9g8_19 := int64(f9) * int64(g8_19) + f9g9_38 := int64(f9_2) * int64(g9_19) + h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38 + h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19 + h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38 + h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19 + h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38 + h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19 + h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38 + h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19 + h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38 + h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 + var carry [10]int64 + + // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) + // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 + // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) + // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + // |h0| <= 2^25 + // |h4| <= 2^25 + // |h1| <= 1.51*2^58 + // |h5| <= 1.51*2^58 + + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + // |h1| <= 2^24; from now on fits into int32 + // |h5| <= 2^24; from now on fits into int32 + // |h2| <= 1.21*2^59 + // |h6| <= 1.21*2^59 + + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + // |h2| <= 2^25; from now on fits into int32 unchanged + // |h6| <= 2^25; from now on fits into int32 unchanged + // |h3| <= 1.51*2^58 + // |h7| <= 1.51*2^58 + + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + // |h3| <= 2^24; from now on fits into int32 unchanged + // |h7| <= 2^24; from now on fits into int32 unchanged + // |h4| <= 1.52*2^33 + // |h8| <= 1.52*2^33 + + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + // |h4| <= 2^25; from now on fits into int32 unchanged + // |h8| <= 2^25; from now on fits into int32 unchanged + // |h5| <= 1.01*2^24 + // |h9| <= 1.51*2^58 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + // |h9| <= 2^24; from now on fits into int32 unchanged + // |h0| <= 1.8*2^37 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + // |h0| <= 2^25; from now on fits into int32 unchanged + // |h1| <= 1.01*2^24 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feSquare calculates h = f*f. Can overlap h with f. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +func feSquare(h, f *fieldElement) { + f0 := f[0] + f1 := f[1] + f2 := f[2] + f3 := f[3] + f4 := f[4] + f5 := f[5] + f6 := f[6] + f7 := f[7] + f8 := f[8] + f9 := f[9] + f0_2 := 2 * f0 + f1_2 := 2 * f1 + f2_2 := 2 * f2 + f3_2 := 2 * f3 + f4_2 := 2 * f4 + f5_2 := 2 * f5 + f6_2 := 2 * f6 + f7_2 := 2 * f7 + f5_38 := 38 * f5 // 1.31*2^30 + f6_19 := 19 * f6 // 1.31*2^30 + f7_38 := 38 * f7 // 1.31*2^30 + f8_19 := 19 * f8 // 1.31*2^30 + f9_38 := 38 * f9 // 1.31*2^30 + f0f0 := int64(f0) * int64(f0) + f0f1_2 := int64(f0_2) * int64(f1) + f0f2_2 := int64(f0_2) * int64(f2) + f0f3_2 := int64(f0_2) * int64(f3) + f0f4_2 := int64(f0_2) * int64(f4) + f0f5_2 := int64(f0_2) * int64(f5) + f0f6_2 := int64(f0_2) * int64(f6) + f0f7_2 := int64(f0_2) * int64(f7) + f0f8_2 := int64(f0_2) * int64(f8) + f0f9_2 := int64(f0_2) * int64(f9) + f1f1_2 := int64(f1_2) * int64(f1) + f1f2_2 := int64(f1_2) * int64(f2) + f1f3_4 := int64(f1_2) * int64(f3_2) + f1f4_2 := int64(f1_2) * int64(f4) + f1f5_4 := int64(f1_2) * int64(f5_2) + f1f6_2 := int64(f1_2) * int64(f6) + f1f7_4 := int64(f1_2) * int64(f7_2) + f1f8_2 := int64(f1_2) * int64(f8) + f1f9_76 := int64(f1_2) * int64(f9_38) + f2f2 := int64(f2) * int64(f2) + f2f3_2 := int64(f2_2) * int64(f3) + f2f4_2 := int64(f2_2) * int64(f4) + f2f5_2 := int64(f2_2) * int64(f5) + f2f6_2 := int64(f2_2) * int64(f6) + f2f7_2 := int64(f2_2) * int64(f7) + f2f8_38 := int64(f2_2) * int64(f8_19) + f2f9_38 := int64(f2) * int64(f9_38) + f3f3_2 := int64(f3_2) * int64(f3) + f3f4_2 := int64(f3_2) * int64(f4) + f3f5_4 := int64(f3_2) * int64(f5_2) + f3f6_2 := int64(f3_2) * int64(f6) + f3f7_76 := int64(f3_2) * int64(f7_38) + f3f8_38 := int64(f3_2) * int64(f8_19) + f3f9_76 := int64(f3_2) * int64(f9_38) + f4f4 := int64(f4) * int64(f4) + f4f5_2 := int64(f4_2) * int64(f5) + f4f6_38 := int64(f4_2) * int64(f6_19) + f4f7_38 := int64(f4) * int64(f7_38) + f4f8_38 := int64(f4_2) * int64(f8_19) + f4f9_38 := int64(f4) * int64(f9_38) + f5f5_38 := int64(f5) * int64(f5_38) + f5f6_38 := int64(f5_2) * int64(f6_19) + f5f7_76 := int64(f5_2) * int64(f7_38) + f5f8_38 := int64(f5_2) * int64(f8_19) + f5f9_76 := int64(f5_2) * int64(f9_38) + f6f6_19 := int64(f6) * int64(f6_19) + f6f7_38 := int64(f6) * int64(f7_38) + f6f8_38 := int64(f6_2) * int64(f8_19) + f6f9_38 := int64(f6) * int64(f9_38) + f7f7_38 := int64(f7) * int64(f7_38) + f7f8_38 := int64(f7_2) * int64(f8_19) + f7f9_76 := int64(f7_2) * int64(f9_38) + f8f8_19 := int64(f8) * int64(f8_19) + f8f9_38 := int64(f8) * int64(f9_38) + f9f9_38 := int64(f9) * int64(f9_38) + h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38 + h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38 + h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19 + h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38 + h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38 + h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38 + h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19 + h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38 + h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38 + h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2 + var carry [10]int64 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feMul121666 calculates h = f * 121666. Can overlap h with f. +// +// Preconditions: +// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +// +// Postconditions: +// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +func feMul121666(h, f *fieldElement) { + h0 := int64(f[0]) * 121666 + h1 := int64(f[1]) * 121666 + h2 := int64(f[2]) * 121666 + h3 := int64(f[3]) * 121666 + h4 := int64(f[4]) * 121666 + h5 := int64(f[5]) * 121666 + h6 := int64(f[6]) * 121666 + h7 := int64(f[7]) * 121666 + h8 := int64(f[8]) * 121666 + h9 := int64(f[9]) * 121666 + var carry [10]int64 + + carry[9] = (h9 + (1 << 24)) >> 25 + h0 += carry[9] * 19 + h9 -= carry[9] << 25 + carry[1] = (h1 + (1 << 24)) >> 25 + h2 += carry[1] + h1 -= carry[1] << 25 + carry[3] = (h3 + (1 << 24)) >> 25 + h4 += carry[3] + h3 -= carry[3] << 25 + carry[5] = (h5 + (1 << 24)) >> 25 + h6 += carry[5] + h5 -= carry[5] << 25 + carry[7] = (h7 + (1 << 24)) >> 25 + h8 += carry[7] + h7 -= carry[7] << 25 + + carry[0] = (h0 + (1 << 25)) >> 26 + h1 += carry[0] + h0 -= carry[0] << 26 + carry[2] = (h2 + (1 << 25)) >> 26 + h3 += carry[2] + h2 -= carry[2] << 26 + carry[4] = (h4 + (1 << 25)) >> 26 + h5 += carry[4] + h4 -= carry[4] << 26 + carry[6] = (h6 + (1 << 25)) >> 26 + h7 += carry[6] + h6 -= carry[6] << 26 + carry[8] = (h8 + (1 << 25)) >> 26 + h9 += carry[8] + h8 -= carry[8] << 26 + + h[0] = int32(h0) + h[1] = int32(h1) + h[2] = int32(h2) + h[3] = int32(h3) + h[4] = int32(h4) + h[5] = int32(h5) + h[6] = int32(h6) + h[7] = int32(h7) + h[8] = int32(h8) + h[9] = int32(h9) +} + +// feInvert sets out = z^-1. +func feInvert(out, z *fieldElement) { + var t0, t1, t2, t3 fieldElement + var i int + + feSquare(&t0, z) + for i = 1; i < 1; i++ { + feSquare(&t0, &t0) + } + feSquare(&t1, &t0) + for i = 1; i < 2; i++ { + feSquare(&t1, &t1) + } + feMul(&t1, z, &t1) + feMul(&t0, &t0, &t1) + feSquare(&t2, &t0) + for i = 1; i < 1; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t1, &t2) + feSquare(&t2, &t1) + for i = 1; i < 5; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t2, &t1) + for i = 1; i < 10; i++ { + feSquare(&t2, &t2) + } + feMul(&t2, &t2, &t1) + feSquare(&t3, &t2) + for i = 1; i < 20; i++ { + feSquare(&t3, &t3) + } + feMul(&t2, &t3, &t2) + feSquare(&t2, &t2) + for i = 1; i < 10; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t2, &t1) + for i = 1; i < 50; i++ { + feSquare(&t2, &t2) + } + feMul(&t2, &t2, &t1) + feSquare(&t3, &t2) + for i = 1; i < 100; i++ { + feSquare(&t3, &t3) + } + feMul(&t2, &t3, &t2) + feSquare(&t2, &t2) + for i = 1; i < 50; i++ { + feSquare(&t2, &t2) + } + feMul(&t1, &t2, &t1) + feSquare(&t1, &t1) + for i = 1; i < 5; i++ { + feSquare(&t1, &t1) + } + feMul(out, &t1, &t0) +} + +func scalarMult(out, in, base *[32]byte) { + var e [32]byte + + copy(e[:], in[:]) + e[0] &= 248 + e[31] &= 127 + e[31] |= 64 + + var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement + feFromBytes(&x1, base) + feOne(&x2) + feCopy(&x3, &x1) + feOne(&z3) + + swap := int32(0) + for pos := 254; pos >= 0; pos-- { + b := e[pos/8] >> uint(pos&7) + b &= 1 + swap ^= int32(b) + feCSwap(&x2, &x3, swap) + feCSwap(&z2, &z3, swap) + swap = int32(b) + + feSub(&tmp0, &x3, &z3) + feSub(&tmp1, &x2, &z2) + feAdd(&x2, &x2, &z2) + feAdd(&z2, &x3, &z3) + feMul(&z3, &tmp0, &x2) + feMul(&z2, &z2, &tmp1) + feSquare(&tmp0, &tmp1) + feSquare(&tmp1, &x2) + feAdd(&x3, &z3, &z2) + feSub(&z2, &z3, &z2) + feMul(&x2, &tmp1, &tmp0) + feSub(&tmp1, &tmp1, &tmp0) + feSquare(&z2, &z2) + feMul121666(&z3, &tmp1) + feSquare(&x3, &x3) + feAdd(&tmp0, &tmp0, &z3) + feMul(&z3, &x1, &z2) + feMul(&z2, &tmp1, &tmp0) + } + + feCSwap(&x2, &x3, swap) + feCSwap(&z2, &z3, swap) + + feInvert(&z2, &z2) + feMul(&x2, &x2, &z2) + feToBytes(out, &x2) +} diff --git a/src/vendor/golang_org/x/crypto/curve25519/curve25519_test.go b/src/vendor/golang_org/x/crypto/curve25519/curve25519_test.go new file mode 100644 index 0000000000..14b0ee87cd --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/curve25519_test.go @@ -0,0 +1,29 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package curve25519 + +import ( + "fmt" + "testing" +) + +const expectedHex = "89161fde887b2b53de549af483940106ecc114d6982daa98256de23bdf77661a" + +func TestBaseScalarMult(t *testing.T) { + var a, b [32]byte + in := &a + out := &b + a[0] = 1 + + for i := 0; i < 200; i++ { + ScalarBaseMult(out, in) + in, out = out, in + } + + result := fmt.Sprintf("%x", in[:]) + if result != expectedHex { + t.Errorf("incorrect result: got %s, want %s", result, expectedHex) + } +} diff --git a/src/vendor/golang_org/x/crypto/curve25519/doc.go b/src/vendor/golang_org/x/crypto/curve25519/doc.go new file mode 100644 index 0000000000..ebeea3c2d6 --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/doc.go @@ -0,0 +1,23 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package curve25519 provides an implementation of scalar multiplication on +// the elliptic curve known as curve25519. See http://cr.yp.to/ecdh.html +package curve25519 // import "golang.org/x/crypto/curve25519" + +// basePoint is the x coordinate of the generator of the curve. +var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + +// ScalarMult sets dst to the product in*base where dst and base are the x +// coordinates of group points and all values are in little-endian form. +func ScalarMult(dst, in, base *[32]byte) { + scalarMult(dst, in, base) +} + +// ScalarBaseMult sets dst to the product in*base where dst and base are the x +// coordinates of group points, base is the standard generator and all values +// are in little-endian form. +func ScalarBaseMult(dst, in *[32]byte) { + ScalarMult(dst, in, &basePoint) +} diff --git a/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s new file mode 100644 index 0000000000..37599fac04 --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/freeze_amd64.s @@ -0,0 +1,94 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func freeze(inout *[5]uint64) +TEXT ·freeze(SB),7,$96-8 + MOVQ inout+0(FP), DI + + MOVQ SP,R11 + MOVQ $31,CX + NOTQ CX + ANDQ CX,SP + ADDQ $32,SP + + MOVQ R11,0(SP) + MOVQ R12,8(SP) + MOVQ R13,16(SP) + MOVQ R14,24(SP) + MOVQ R15,32(SP) + MOVQ BX,40(SP) + MOVQ BP,48(SP) + MOVQ 0(DI),SI + MOVQ 8(DI),DX + MOVQ 16(DI),CX + MOVQ 24(DI),R8 + MOVQ 32(DI),R9 + MOVQ ·REDMASK51(SB),AX + MOVQ AX,R10 + SUBQ $18,R10 + MOVQ $3,R11 +REDUCELOOP: + MOVQ SI,R12 + SHRQ $51,R12 + ANDQ AX,SI + ADDQ R12,DX + MOVQ DX,R12 + SHRQ $51,R12 + ANDQ AX,DX + ADDQ R12,CX + MOVQ CX,R12 + SHRQ $51,R12 + ANDQ AX,CX + ADDQ R12,R8 + MOVQ R8,R12 + SHRQ $51,R12 + ANDQ AX,R8 + ADDQ R12,R9 + MOVQ R9,R12 + SHRQ $51,R12 + ANDQ AX,R9 + IMUL3Q $19,R12,R12 + ADDQ R12,SI + SUBQ $1,R11 + JA REDUCELOOP + MOVQ $1,R12 + CMPQ R10,SI + CMOVQLT R11,R12 + CMPQ AX,DX + CMOVQNE R11,R12 + CMPQ AX,CX + CMOVQNE R11,R12 + CMPQ AX,R8 + CMOVQNE R11,R12 + CMPQ AX,R9 + CMOVQNE R11,R12 + NEGQ R12 + ANDQ R12,AX + ANDQ R12,R10 + SUBQ R10,SI + SUBQ AX,DX + SUBQ AX,CX + SUBQ AX,R8 + SUBQ AX,R9 + MOVQ SI,0(DI) + MOVQ DX,8(DI) + MOVQ CX,16(DI) + MOVQ R8,24(DI) + MOVQ R9,32(DI) + MOVQ 0(SP),R11 + MOVQ 8(SP),R12 + MOVQ 16(SP),R13 + MOVQ 24(SP),R14 + MOVQ 32(SP),R15 + MOVQ 40(SP),BX + MOVQ 48(SP),BP + MOVQ R11,SP + MOVQ DI,AX + MOVQ SI,DX + RET diff --git a/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s new file mode 100644 index 0000000000..3949f9cfaf --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/ladderstep_amd64.s @@ -0,0 +1,1398 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func ladderstep(inout *[5][5]uint64) +TEXT ·ladderstep(SB),0,$384-8 + MOVQ inout+0(FP),DI + + MOVQ SP,R11 + MOVQ $31,CX + NOTQ CX + ANDQ CX,SP + ADDQ $32,SP + + MOVQ R11,0(SP) + MOVQ R12,8(SP) + MOVQ R13,16(SP) + MOVQ R14,24(SP) + MOVQ R15,32(SP) + MOVQ BX,40(SP) + MOVQ BP,48(SP) + MOVQ 40(DI),SI + MOVQ 48(DI),DX + MOVQ 56(DI),CX + MOVQ 64(DI),R8 + MOVQ 72(DI),R9 + MOVQ SI,AX + MOVQ DX,R10 + MOVQ CX,R11 + MOVQ R8,R12 + MOVQ R9,R13 + ADDQ ·_2P0(SB),AX + ADDQ ·_2P1234(SB),R10 + ADDQ ·_2P1234(SB),R11 + ADDQ ·_2P1234(SB),R12 + ADDQ ·_2P1234(SB),R13 + ADDQ 80(DI),SI + ADDQ 88(DI),DX + ADDQ 96(DI),CX + ADDQ 104(DI),R8 + ADDQ 112(DI),R9 + SUBQ 80(DI),AX + SUBQ 88(DI),R10 + SUBQ 96(DI),R11 + SUBQ 104(DI),R12 + SUBQ 112(DI),R13 + MOVQ SI,56(SP) + MOVQ DX,64(SP) + MOVQ CX,72(SP) + MOVQ R8,80(SP) + MOVQ R9,88(SP) + MOVQ AX,96(SP) + MOVQ R10,104(SP) + MOVQ R11,112(SP) + MOVQ R12,120(SP) + MOVQ R13,128(SP) + MOVQ 96(SP),AX + MULQ 96(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 96(SP),AX + SHLQ $1,AX + MULQ 104(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 96(SP),AX + SHLQ $1,AX + MULQ 112(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 96(SP),AX + SHLQ $1,AX + MULQ 120(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 96(SP),AX + SHLQ $1,AX + MULQ 128(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 104(SP),AX + MULQ 104(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 104(SP),AX + SHLQ $1,AX + MULQ 112(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 104(SP),AX + SHLQ $1,AX + MULQ 120(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 104(SP),DX + IMUL3Q $38,DX,AX + MULQ 128(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 112(SP),AX + MULQ 112(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 112(SP),DX + IMUL3Q $38,DX,AX + MULQ 120(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 112(SP),DX + IMUL3Q $38,DX,AX + MULQ 128(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 120(SP),DX + IMUL3Q $19,DX,AX + MULQ 120(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 120(SP),DX + IMUL3Q $38,DX,AX + MULQ 128(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 128(SP),DX + IMUL3Q $19,DX,AX + MULQ 128(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,136(SP) + MOVQ R8,144(SP) + MOVQ R9,152(SP) + MOVQ AX,160(SP) + MOVQ R10,168(SP) + MOVQ 56(SP),AX + MULQ 56(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 56(SP),AX + SHLQ $1,AX + MULQ 64(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 56(SP),AX + SHLQ $1,AX + MULQ 72(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 56(SP),AX + SHLQ $1,AX + MULQ 80(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 56(SP),AX + SHLQ $1,AX + MULQ 88(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 64(SP),AX + MULQ 64(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 64(SP),AX + SHLQ $1,AX + MULQ 72(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 64(SP),AX + SHLQ $1,AX + MULQ 80(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 64(SP),DX + IMUL3Q $38,DX,AX + MULQ 88(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 72(SP),AX + MULQ 72(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 72(SP),DX + IMUL3Q $38,DX,AX + MULQ 80(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 72(SP),DX + IMUL3Q $38,DX,AX + MULQ 88(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 80(SP),DX + IMUL3Q $19,DX,AX + MULQ 80(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 80(SP),DX + IMUL3Q $38,DX,AX + MULQ 88(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 88(SP),DX + IMUL3Q $19,DX,AX + MULQ 88(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,176(SP) + MOVQ R8,184(SP) + MOVQ R9,192(SP) + MOVQ AX,200(SP) + MOVQ R10,208(SP) + MOVQ SI,SI + MOVQ R8,DX + MOVQ R9,CX + MOVQ AX,R8 + MOVQ R10,R9 + ADDQ ·_2P0(SB),SI + ADDQ ·_2P1234(SB),DX + ADDQ ·_2P1234(SB),CX + ADDQ ·_2P1234(SB),R8 + ADDQ ·_2P1234(SB),R9 + SUBQ 136(SP),SI + SUBQ 144(SP),DX + SUBQ 152(SP),CX + SUBQ 160(SP),R8 + SUBQ 168(SP),R9 + MOVQ SI,216(SP) + MOVQ DX,224(SP) + MOVQ CX,232(SP) + MOVQ R8,240(SP) + MOVQ R9,248(SP) + MOVQ 120(DI),SI + MOVQ 128(DI),DX + MOVQ 136(DI),CX + MOVQ 144(DI),R8 + MOVQ 152(DI),R9 + MOVQ SI,AX + MOVQ DX,R10 + MOVQ CX,R11 + MOVQ R8,R12 + MOVQ R9,R13 + ADDQ ·_2P0(SB),AX + ADDQ ·_2P1234(SB),R10 + ADDQ ·_2P1234(SB),R11 + ADDQ ·_2P1234(SB),R12 + ADDQ ·_2P1234(SB),R13 + ADDQ 160(DI),SI + ADDQ 168(DI),DX + ADDQ 176(DI),CX + ADDQ 184(DI),R8 + ADDQ 192(DI),R9 + SUBQ 160(DI),AX + SUBQ 168(DI),R10 + SUBQ 176(DI),R11 + SUBQ 184(DI),R12 + SUBQ 192(DI),R13 + MOVQ SI,256(SP) + MOVQ DX,264(SP) + MOVQ CX,272(SP) + MOVQ R8,280(SP) + MOVQ R9,288(SP) + MOVQ AX,296(SP) + MOVQ R10,304(SP) + MOVQ R11,312(SP) + MOVQ R12,320(SP) + MOVQ R13,328(SP) + MOVQ 280(SP),SI + IMUL3Q $19,SI,AX + MOVQ AX,336(SP) + MULQ 112(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 288(SP),DX + IMUL3Q $19,DX,AX + MOVQ AX,344(SP) + MULQ 104(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 256(SP),AX + MULQ 96(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 256(SP),AX + MULQ 104(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 256(SP),AX + MULQ 112(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 256(SP),AX + MULQ 120(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 256(SP),AX + MULQ 128(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 264(SP),AX + MULQ 96(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 264(SP),AX + MULQ 104(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 264(SP),AX + MULQ 112(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 264(SP),AX + MULQ 120(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 264(SP),DX + IMUL3Q $19,DX,AX + MULQ 128(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 272(SP),AX + MULQ 96(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 272(SP),AX + MULQ 104(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 272(SP),AX + MULQ 112(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 272(SP),DX + IMUL3Q $19,DX,AX + MULQ 120(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 272(SP),DX + IMUL3Q $19,DX,AX + MULQ 128(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 280(SP),AX + MULQ 96(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 280(SP),AX + MULQ 104(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 336(SP),AX + MULQ 120(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 336(SP),AX + MULQ 128(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 288(SP),AX + MULQ 96(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 344(SP),AX + MULQ 112(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 344(SP),AX + MULQ 120(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 344(SP),AX + MULQ 128(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,96(SP) + MOVQ R8,104(SP) + MOVQ R9,112(SP) + MOVQ AX,120(SP) + MOVQ R10,128(SP) + MOVQ 320(SP),SI + IMUL3Q $19,SI,AX + MOVQ AX,256(SP) + MULQ 72(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 328(SP),DX + IMUL3Q $19,DX,AX + MOVQ AX,264(SP) + MULQ 64(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 296(SP),AX + MULQ 56(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 296(SP),AX + MULQ 64(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 296(SP),AX + MULQ 72(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 296(SP),AX + MULQ 80(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 296(SP),AX + MULQ 88(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 304(SP),AX + MULQ 56(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 304(SP),AX + MULQ 64(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 304(SP),AX + MULQ 72(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 304(SP),AX + MULQ 80(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 304(SP),DX + IMUL3Q $19,DX,AX + MULQ 88(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 312(SP),AX + MULQ 56(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 312(SP),AX + MULQ 64(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 312(SP),AX + MULQ 72(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 312(SP),DX + IMUL3Q $19,DX,AX + MULQ 80(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 312(SP),DX + IMUL3Q $19,DX,AX + MULQ 88(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 320(SP),AX + MULQ 56(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 320(SP),AX + MULQ 64(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 256(SP),AX + MULQ 80(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 256(SP),AX + MULQ 88(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 328(SP),AX + MULQ 56(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 264(SP),AX + MULQ 72(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 264(SP),AX + MULQ 80(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 264(SP),AX + MULQ 88(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,DX + MOVQ R8,CX + MOVQ R9,R11 + MOVQ AX,R12 + MOVQ R10,R13 + ADDQ ·_2P0(SB),DX + ADDQ ·_2P1234(SB),CX + ADDQ ·_2P1234(SB),R11 + ADDQ ·_2P1234(SB),R12 + ADDQ ·_2P1234(SB),R13 + ADDQ 96(SP),SI + ADDQ 104(SP),R8 + ADDQ 112(SP),R9 + ADDQ 120(SP),AX + ADDQ 128(SP),R10 + SUBQ 96(SP),DX + SUBQ 104(SP),CX + SUBQ 112(SP),R11 + SUBQ 120(SP),R12 + SUBQ 128(SP),R13 + MOVQ SI,120(DI) + MOVQ R8,128(DI) + MOVQ R9,136(DI) + MOVQ AX,144(DI) + MOVQ R10,152(DI) + MOVQ DX,160(DI) + MOVQ CX,168(DI) + MOVQ R11,176(DI) + MOVQ R12,184(DI) + MOVQ R13,192(DI) + MOVQ 120(DI),AX + MULQ 120(DI) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 128(DI) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 136(DI) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 144(DI) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 120(DI),AX + SHLQ $1,AX + MULQ 152(DI) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 128(DI),AX + MULQ 128(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 128(DI),AX + SHLQ $1,AX + MULQ 136(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 128(DI),AX + SHLQ $1,AX + MULQ 144(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 128(DI),DX + IMUL3Q $38,DX,AX + MULQ 152(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 136(DI),AX + MULQ 136(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 136(DI),DX + IMUL3Q $38,DX,AX + MULQ 144(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 136(DI),DX + IMUL3Q $38,DX,AX + MULQ 152(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 144(DI),DX + IMUL3Q $19,DX,AX + MULQ 144(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 144(DI),DX + IMUL3Q $38,DX,AX + MULQ 152(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 152(DI),DX + IMUL3Q $19,DX,AX + MULQ 152(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,120(DI) + MOVQ R8,128(DI) + MOVQ R9,136(DI) + MOVQ AX,144(DI) + MOVQ R10,152(DI) + MOVQ 160(DI),AX + MULQ 160(DI) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 168(DI) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 176(DI) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 184(DI) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 160(DI),AX + SHLQ $1,AX + MULQ 192(DI) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 168(DI),AX + MULQ 168(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 168(DI),AX + SHLQ $1,AX + MULQ 176(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 168(DI),AX + SHLQ $1,AX + MULQ 184(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 168(DI),DX + IMUL3Q $38,DX,AX + MULQ 192(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),AX + MULQ 176(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 176(DI),DX + IMUL3Q $38,DX,AX + MULQ 184(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),DX + IMUL3Q $38,DX,AX + MULQ 192(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(DI),DX + IMUL3Q $19,DX,AX + MULQ 184(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(DI),DX + IMUL3Q $38,DX,AX + MULQ 192(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 192(DI),DX + IMUL3Q $19,DX,AX + MULQ 192(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + ANDQ DX,SI + MOVQ CX,R8 + SHRQ $51,CX + ADDQ R10,CX + ANDQ DX,R8 + MOVQ CX,R9 + SHRQ $51,CX + ADDQ R12,CX + ANDQ DX,R9 + MOVQ CX,AX + SHRQ $51,CX + ADDQ R14,CX + ANDQ DX,AX + MOVQ CX,R10 + SHRQ $51,CX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,160(DI) + MOVQ R8,168(DI) + MOVQ R9,176(DI) + MOVQ AX,184(DI) + MOVQ R10,192(DI) + MOVQ 184(DI),SI + IMUL3Q $19,SI,AX + MOVQ AX,56(SP) + MULQ 16(DI) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 192(DI),DX + IMUL3Q $19,DX,AX + MOVQ AX,64(SP) + MULQ 8(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 160(DI),AX + MULQ 0(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 160(DI),AX + MULQ 8(DI) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 160(DI),AX + MULQ 16(DI) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 160(DI),AX + MULQ 24(DI) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 160(DI),AX + MULQ 32(DI) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 168(DI),AX + MULQ 0(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 168(DI),AX + MULQ 8(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 168(DI),AX + MULQ 16(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 168(DI),AX + MULQ 24(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 168(DI),DX + IMUL3Q $19,DX,AX + MULQ 32(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),AX + MULQ 0(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 176(DI),AX + MULQ 8(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 176(DI),AX + MULQ 16(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 176(DI),DX + IMUL3Q $19,DX,AX + MULQ 24(DI) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(DI),DX + IMUL3Q $19,DX,AX + MULQ 32(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(DI),AX + MULQ 0(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 184(DI),AX + MULQ 8(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 56(SP),AX + MULQ 24(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 56(SP),AX + MULQ 32(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 192(DI),AX + MULQ 0(DI) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 64(SP),AX + MULQ 16(DI) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 64(SP),AX + MULQ 24(DI) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 64(SP),AX + MULQ 32(DI) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,160(DI) + MOVQ R8,168(DI) + MOVQ R9,176(DI) + MOVQ AX,184(DI) + MOVQ R10,192(DI) + MOVQ 200(SP),SI + IMUL3Q $19,SI,AX + MOVQ AX,56(SP) + MULQ 152(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 208(SP),DX + IMUL3Q $19,DX,AX + MOVQ AX,64(SP) + MULQ 144(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(SP),AX + MULQ 136(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 176(SP),AX + MULQ 144(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 176(SP),AX + MULQ 152(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 176(SP),AX + MULQ 160(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 176(SP),AX + MULQ 168(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 184(SP),AX + MULQ 136(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 184(SP),AX + MULQ 144(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 184(SP),AX + MULQ 152(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 184(SP),AX + MULQ 160(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 184(SP),DX + IMUL3Q $19,DX,AX + MULQ 168(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 192(SP),AX + MULQ 136(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 192(SP),AX + MULQ 144(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 192(SP),AX + MULQ 152(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 192(SP),DX + IMUL3Q $19,DX,AX + MULQ 160(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 192(SP),DX + IMUL3Q $19,DX,AX + MULQ 168(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 200(SP),AX + MULQ 136(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 200(SP),AX + MULQ 144(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 56(SP),AX + MULQ 160(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 56(SP),AX + MULQ 168(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 208(SP),AX + MULQ 136(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 64(SP),AX + MULQ 152(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 64(SP),AX + MULQ 160(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 64(SP),AX + MULQ 168(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,40(DI) + MOVQ R8,48(DI) + MOVQ R9,56(DI) + MOVQ AX,64(DI) + MOVQ R10,72(DI) + MOVQ 216(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + MOVQ AX,SI + MOVQ DX,CX + MOVQ 224(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,CX + MOVQ DX,R8 + MOVQ 232(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,R8 + MOVQ DX,R9 + MOVQ 240(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,R9 + MOVQ DX,R10 + MOVQ 248(SP),AX + MULQ ·_121666_213(SB) + SHRQ $13,AX + ADDQ AX,R10 + IMUL3Q $19,DX,DX + ADDQ DX,SI + ADDQ 136(SP),SI + ADDQ 144(SP),CX + ADDQ 152(SP),R8 + ADDQ 160(SP),R9 + ADDQ 168(SP),R10 + MOVQ SI,80(DI) + MOVQ CX,88(DI) + MOVQ R8,96(DI) + MOVQ R9,104(DI) + MOVQ R10,112(DI) + MOVQ 104(DI),SI + IMUL3Q $19,SI,AX + MOVQ AX,56(SP) + MULQ 232(SP) + MOVQ AX,SI + MOVQ DX,CX + MOVQ 112(DI),DX + IMUL3Q $19,DX,AX + MOVQ AX,64(SP) + MULQ 224(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 80(DI),AX + MULQ 216(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 80(DI),AX + MULQ 224(SP) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 80(DI),AX + MULQ 232(SP) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 80(DI),AX + MULQ 240(SP) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 80(DI),AX + MULQ 248(SP) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 88(DI),AX + MULQ 216(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 88(DI),AX + MULQ 224(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 88(DI),AX + MULQ 232(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 88(DI),AX + MULQ 240(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 88(DI),DX + IMUL3Q $19,DX,AX + MULQ 248(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 96(DI),AX + MULQ 216(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 96(DI),AX + MULQ 224(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 96(DI),AX + MULQ 232(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 96(DI),DX + IMUL3Q $19,DX,AX + MULQ 240(SP) + ADDQ AX,SI + ADCQ DX,CX + MOVQ 96(DI),DX + IMUL3Q $19,DX,AX + MULQ 248(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 104(DI),AX + MULQ 216(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 104(DI),AX + MULQ 224(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 56(SP),AX + MULQ 240(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 56(SP),AX + MULQ 248(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 112(DI),AX + MULQ 216(SP) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 64(SP),AX + MULQ 232(SP) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 64(SP),AX + MULQ 240(SP) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 64(SP),AX + MULQ 248(SP) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ ·REDMASK51(SB),DX + SHLQ $13,CX:SI + ANDQ DX,SI + SHLQ $13,R9:R8 + ANDQ DX,R8 + ADDQ CX,R8 + SHLQ $13,R11:R10 + ANDQ DX,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ DX,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ DX,R14 + ADDQ R13,R14 + IMUL3Q $19,R15,CX + ADDQ CX,SI + MOVQ SI,CX + SHRQ $51,CX + ADDQ R8,CX + MOVQ CX,R8 + SHRQ $51,CX + ANDQ DX,SI + ADDQ R10,CX + MOVQ CX,R9 + SHRQ $51,CX + ANDQ DX,R8 + ADDQ R12,CX + MOVQ CX,AX + SHRQ $51,CX + ANDQ DX,R9 + ADDQ R14,CX + MOVQ CX,R10 + SHRQ $51,CX + ANDQ DX,AX + IMUL3Q $19,CX,CX + ADDQ CX,SI + ANDQ DX,R10 + MOVQ SI,80(DI) + MOVQ R8,88(DI) + MOVQ R9,96(DI) + MOVQ AX,104(DI) + MOVQ R10,112(DI) + MOVQ 0(SP),R11 + MOVQ 8(SP),R12 + MOVQ 16(SP),R13 + MOVQ 24(SP),R14 + MOVQ 32(SP),R15 + MOVQ 40(SP),BX + MOVQ 48(SP),BP + MOVQ R11,SP + MOVQ DI,AX + MOVQ SI,DX + RET diff --git a/src/vendor/golang_org/x/crypto/curve25519/mont25519_amd64.go b/src/vendor/golang_org/x/crypto/curve25519/mont25519_amd64.go new file mode 100644 index 0000000000..5822bd5338 --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/mont25519_amd64.go @@ -0,0 +1,240 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64,!gccgo,!appengine + +package curve25519 + +// These functions are implemented in the .s files. The names of the functions +// in the rest of the file are also taken from the SUPERCOP sources to help +// people following along. + +//go:noescape + +func cswap(inout *[5]uint64, v uint64) + +//go:noescape + +func ladderstep(inout *[5][5]uint64) + +//go:noescape + +func freeze(inout *[5]uint64) + +//go:noescape + +func mul(dest, a, b *[5]uint64) + +//go:noescape + +func square(out, in *[5]uint64) + +// mladder uses a Montgomery ladder to calculate (xr/zr) *= s. +func mladder(xr, zr *[5]uint64, s *[32]byte) { + var work [5][5]uint64 + + work[0] = *xr + setint(&work[1], 1) + setint(&work[2], 0) + work[3] = *xr + setint(&work[4], 1) + + j := uint(6) + var prevbit byte + + for i := 31; i >= 0; i-- { + for j < 8 { + bit := ((*s)[i] >> j) & 1 + swap := bit ^ prevbit + prevbit = bit + cswap(&work[1], uint64(swap)) + ladderstep(&work) + j-- + } + j = 7 + } + + *xr = work[1] + *zr = work[2] +} + +func scalarMult(out, in, base *[32]byte) { + var e [32]byte + copy(e[:], (*in)[:]) + e[0] &= 248 + e[31] &= 127 + e[31] |= 64 + + var t, z [5]uint64 + unpack(&t, base) + mladder(&t, &z, &e) + invert(&z, &z) + mul(&t, &t, &z) + pack(out, &t) +} + +func setint(r *[5]uint64, v uint64) { + r[0] = v + r[1] = 0 + r[2] = 0 + r[3] = 0 + r[4] = 0 +} + +// unpack sets r = x where r consists of 5, 51-bit limbs in little-endian +// order. +func unpack(r *[5]uint64, x *[32]byte) { + r[0] = uint64(x[0]) | + uint64(x[1])<<8 | + uint64(x[2])<<16 | + uint64(x[3])<<24 | + uint64(x[4])<<32 | + uint64(x[5])<<40 | + uint64(x[6]&7)<<48 + + r[1] = uint64(x[6])>>3 | + uint64(x[7])<<5 | + uint64(x[8])<<13 | + uint64(x[9])<<21 | + uint64(x[10])<<29 | + uint64(x[11])<<37 | + uint64(x[12]&63)<<45 + + r[2] = uint64(x[12])>>6 | + uint64(x[13])<<2 | + uint64(x[14])<<10 | + uint64(x[15])<<18 | + uint64(x[16])<<26 | + uint64(x[17])<<34 | + uint64(x[18])<<42 | + uint64(x[19]&1)<<50 + + r[3] = uint64(x[19])>>1 | + uint64(x[20])<<7 | + uint64(x[21])<<15 | + uint64(x[22])<<23 | + uint64(x[23])<<31 | + uint64(x[24])<<39 | + uint64(x[25]&15)<<47 + + r[4] = uint64(x[25])>>4 | + uint64(x[26])<<4 | + uint64(x[27])<<12 | + uint64(x[28])<<20 | + uint64(x[29])<<28 | + uint64(x[30])<<36 | + uint64(x[31]&127)<<44 +} + +// pack sets out = x where out is the usual, little-endian form of the 5, +// 51-bit limbs in x. +func pack(out *[32]byte, x *[5]uint64) { + t := *x + freeze(&t) + + out[0] = byte(t[0]) + out[1] = byte(t[0] >> 8) + out[2] = byte(t[0] >> 16) + out[3] = byte(t[0] >> 24) + out[4] = byte(t[0] >> 32) + out[5] = byte(t[0] >> 40) + out[6] = byte(t[0] >> 48) + + out[6] ^= byte(t[1]<<3) & 0xf8 + out[7] = byte(t[1] >> 5) + out[8] = byte(t[1] >> 13) + out[9] = byte(t[1] >> 21) + out[10] = byte(t[1] >> 29) + out[11] = byte(t[1] >> 37) + out[12] = byte(t[1] >> 45) + + out[12] ^= byte(t[2]<<6) & 0xc0 + out[13] = byte(t[2] >> 2) + out[14] = byte(t[2] >> 10) + out[15] = byte(t[2] >> 18) + out[16] = byte(t[2] >> 26) + out[17] = byte(t[2] >> 34) + out[18] = byte(t[2] >> 42) + out[19] = byte(t[2] >> 50) + + out[19] ^= byte(t[3]<<1) & 0xfe + out[20] = byte(t[3] >> 7) + out[21] = byte(t[3] >> 15) + out[22] = byte(t[3] >> 23) + out[23] = byte(t[3] >> 31) + out[24] = byte(t[3] >> 39) + out[25] = byte(t[3] >> 47) + + out[25] ^= byte(t[4]<<4) & 0xf0 + out[26] = byte(t[4] >> 4) + out[27] = byte(t[4] >> 12) + out[28] = byte(t[4] >> 20) + out[29] = byte(t[4] >> 28) + out[30] = byte(t[4] >> 36) + out[31] = byte(t[4] >> 44) +} + +// invert calculates r = x^-1 mod p using Fermat's little theorem. +func invert(r *[5]uint64, x *[5]uint64) { + var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64 + + square(&z2, x) /* 2 */ + square(&t, &z2) /* 4 */ + square(&t, &t) /* 8 */ + mul(&z9, &t, x) /* 9 */ + mul(&z11, &z9, &z2) /* 11 */ + square(&t, &z11) /* 22 */ + mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */ + + square(&t, &z2_5_0) /* 2^6 - 2^1 */ + for i := 1; i < 5; i++ { /* 2^20 - 2^10 */ + square(&t, &t) + } + mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */ + + square(&t, &z2_10_0) /* 2^11 - 2^1 */ + for i := 1; i < 10; i++ { /* 2^20 - 2^10 */ + square(&t, &t) + } + mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */ + + square(&t, &z2_20_0) /* 2^21 - 2^1 */ + for i := 1; i < 20; i++ { /* 2^40 - 2^20 */ + square(&t, &t) + } + mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */ + + square(&t, &t) /* 2^41 - 2^1 */ + for i := 1; i < 10; i++ { /* 2^50 - 2^10 */ + square(&t, &t) + } + mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */ + + square(&t, &z2_50_0) /* 2^51 - 2^1 */ + for i := 1; i < 50; i++ { /* 2^100 - 2^50 */ + square(&t, &t) + } + mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */ + + square(&t, &z2_100_0) /* 2^101 - 2^1 */ + for i := 1; i < 100; i++ { /* 2^200 - 2^100 */ + square(&t, &t) + } + mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */ + + square(&t, &t) /* 2^201 - 2^1 */ + for i := 1; i < 50; i++ { /* 2^250 - 2^50 */ + square(&t, &t) + } + mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */ + + square(&t, &t) /* 2^251 - 2^1 */ + square(&t, &t) /* 2^252 - 2^2 */ + square(&t, &t) /* 2^253 - 2^3 */ + + square(&t, &t) /* 2^254 - 2^4 */ + + square(&t, &t) /* 2^255 - 2^5 */ + mul(r, &t, &z11) /* 2^255 - 21 */ +} diff --git a/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s new file mode 100644 index 0000000000..e48d183ee5 --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/mul_amd64.s @@ -0,0 +1,191 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func mul(dest, a, b *[5]uint64) +TEXT ·mul(SB),0,$128-24 + MOVQ dest+0(FP), DI + MOVQ a+8(FP), SI + MOVQ b+16(FP), DX + + MOVQ SP,R11 + MOVQ $31,CX + NOTQ CX + ANDQ CX,SP + ADDQ $32,SP + + MOVQ R11,0(SP) + MOVQ R12,8(SP) + MOVQ R13,16(SP) + MOVQ R14,24(SP) + MOVQ R15,32(SP) + MOVQ BX,40(SP) + MOVQ BP,48(SP) + MOVQ DI,56(SP) + MOVQ DX,CX + MOVQ 24(SI),DX + IMUL3Q $19,DX,AX + MOVQ AX,64(SP) + MULQ 16(CX) + MOVQ AX,R8 + MOVQ DX,R9 + MOVQ 32(SI),DX + IMUL3Q $19,DX,AX + MOVQ AX,72(SP) + MULQ 8(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SI),AX + MULQ 0(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 0(SI),AX + MULQ 8(CX) + MOVQ AX,R10 + MOVQ DX,R11 + MOVQ 0(SI),AX + MULQ 16(CX) + MOVQ AX,R12 + MOVQ DX,R13 + MOVQ 0(SI),AX + MULQ 24(CX) + MOVQ AX,R14 + MOVQ DX,R15 + MOVQ 0(SI),AX + MULQ 32(CX) + MOVQ AX,BX + MOVQ DX,BP + MOVQ 8(SI),AX + MULQ 0(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 8(SI),AX + MULQ 8(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 8(SI),AX + MULQ 16(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 8(SI),AX + MULQ 24(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 8(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 16(SI),AX + MULQ 0(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 16(SI),AX + MULQ 8(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 16(SI),AX + MULQ 16(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 16(SI),DX + IMUL3Q $19,DX,AX + MULQ 24(CX) + ADDQ AX,R8 + ADCQ DX,R9 + MOVQ 16(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 24(SI),AX + MULQ 0(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ 24(SI),AX + MULQ 8(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 64(SP),AX + MULQ 24(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 64(SP),AX + MULQ 32(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 32(SI),AX + MULQ 0(CX) + ADDQ AX,BX + ADCQ DX,BP + MOVQ 72(SP),AX + MULQ 16(CX) + ADDQ AX,R10 + ADCQ DX,R11 + MOVQ 72(SP),AX + MULQ 24(CX) + ADDQ AX,R12 + ADCQ DX,R13 + MOVQ 72(SP),AX + MULQ 32(CX) + ADDQ AX,R14 + ADCQ DX,R15 + MOVQ ·REDMASK51(SB),SI + SHLQ $13,R9:R8 + ANDQ SI,R8 + SHLQ $13,R11:R10 + ANDQ SI,R10 + ADDQ R9,R10 + SHLQ $13,R13:R12 + ANDQ SI,R12 + ADDQ R11,R12 + SHLQ $13,R15:R14 + ANDQ SI,R14 + ADDQ R13,R14 + SHLQ $13,BP:BX + ANDQ SI,BX + ADDQ R15,BX + IMUL3Q $19,BP,DX + ADDQ DX,R8 + MOVQ R8,DX + SHRQ $51,DX + ADDQ R10,DX + MOVQ DX,CX + SHRQ $51,DX + ANDQ SI,R8 + ADDQ R12,DX + MOVQ DX,R9 + SHRQ $51,DX + ANDQ SI,CX + ADDQ R14,DX + MOVQ DX,AX + SHRQ $51,DX + ANDQ SI,R9 + ADDQ BX,DX + MOVQ DX,R10 + SHRQ $51,DX + ANDQ SI,AX + IMUL3Q $19,DX,DX + ADDQ DX,R8 + ANDQ SI,R10 + MOVQ R8,0(DI) + MOVQ CX,8(DI) + MOVQ R9,16(DI) + MOVQ AX,24(DI) + MOVQ R10,32(DI) + MOVQ 0(SP),R11 + MOVQ 8(SP),R12 + MOVQ 16(SP),R13 + MOVQ 24(SP),R14 + MOVQ 32(SP),R15 + MOVQ 40(SP),BX + MOVQ 48(SP),BP + MOVQ R11,SP + MOVQ DI,AX + MOVQ SI,DX + RET diff --git a/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s b/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s new file mode 100644 index 0000000000..78d1a50ddc --- /dev/null +++ b/src/vendor/golang_org/x/crypto/curve25519/square_amd64.s @@ -0,0 +1,153 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This code was translated into a form compatible with 6a from the public +// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html + +// +build amd64,!gccgo,!appengine + +// func square(out, in *[5]uint64) +TEXT ·square(SB),7,$96-16 + MOVQ out+0(FP), DI + MOVQ in+8(FP), SI + + MOVQ SP,R11 + MOVQ $31,CX + NOTQ CX + ANDQ CX,SP + ADDQ $32, SP + + MOVQ R11,0(SP) + MOVQ R12,8(SP) + MOVQ R13,16(SP) + MOVQ R14,24(SP) + MOVQ R15,32(SP) + MOVQ BX,40(SP) + MOVQ BP,48(SP) + MOVQ 0(SI),AX + MULQ 0(SI) + MOVQ AX,CX + MOVQ DX,R8 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 8(SI) + MOVQ AX,R9 + MOVQ DX,R10 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 16(SI) + MOVQ AX,R11 + MOVQ DX,R12 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 24(SI) + MOVQ AX,R13 + MOVQ DX,R14 + MOVQ 0(SI),AX + SHLQ $1,AX + MULQ 32(SI) + MOVQ AX,R15 + MOVQ DX,BX + MOVQ 8(SI),AX + MULQ 8(SI) + ADDQ AX,R11 + ADCQ DX,R12 + MOVQ 8(SI),AX + SHLQ $1,AX + MULQ 16(SI) + ADDQ AX,R13 + ADCQ DX,R14 + MOVQ 8(SI),AX + SHLQ $1,AX + MULQ 24(SI) + ADDQ AX,R15 + ADCQ DX,BX + MOVQ 8(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,CX + ADCQ DX,R8 + MOVQ 16(SI),AX + MULQ 16(SI) + ADDQ AX,R15 + ADCQ DX,BX + MOVQ 16(SI),DX + IMUL3Q $38,DX,AX + MULQ 24(SI) + ADDQ AX,CX + ADCQ DX,R8 + MOVQ 16(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,R9 + ADCQ DX,R10 + MOVQ 24(SI),DX + IMUL3Q $19,DX,AX + MULQ 24(SI) + ADDQ AX,R9 + ADCQ DX,R10 + MOVQ 24(SI),DX + IMUL3Q $38,DX,AX + MULQ 32(SI) + ADDQ AX,R11 + ADCQ DX,R12 + MOVQ 32(SI),DX + IMUL3Q $19,DX,AX + MULQ 32(SI) + ADDQ AX,R13 + ADCQ DX,R14 + MOVQ ·REDMASK51(SB),SI + SHLQ $13,R8:CX + ANDQ SI,CX + SHLQ $13,R10:R9 + ANDQ SI,R9 + ADDQ R8,R9 + SHLQ $13,R12:R11 + ANDQ SI,R11 + ADDQ R10,R11 + SHLQ $13,R14:R13 + ANDQ SI,R13 + ADDQ R12,R13 + SHLQ $13,BX:R15 + ANDQ SI,R15 + ADDQ R14,R15 + IMUL3Q $19,BX,DX + ADDQ DX,CX + MOVQ CX,DX + SHRQ $51,DX + ADDQ R9,DX + ANDQ SI,CX + MOVQ DX,R8 + SHRQ $51,DX + ADDQ R11,DX + ANDQ SI,R8 + MOVQ DX,R9 + SHRQ $51,DX + ADDQ R13,DX + ANDQ SI,R9 + MOVQ DX,AX + SHRQ $51,DX + ADDQ R15,DX + ANDQ SI,AX + MOVQ DX,R10 + SHRQ $51,DX + IMUL3Q $19,DX,DX + ADDQ DX,CX + ANDQ SI,R10 + MOVQ CX,0(DI) + MOVQ R8,8(DI) + MOVQ R9,16(DI) + MOVQ AX,24(DI) + MOVQ R10,32(DI) + MOVQ 0(SP),R11 + MOVQ 8(SP),R12 + MOVQ 16(SP),R13 + MOVQ 24(SP),R14 + MOVQ 32(SP),R15 + MOVQ 40(SP),BX + MOVQ 48(SP),BP + MOVQ R11,SP + MOVQ DI,AX + MOVQ SI,DX + RET