2014-02-12 11:24:52 -07:00
|
|
|
// Copyright 2014 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
//
|
|
|
|
// ARM version of md5block.go
|
|
|
|
|
2014-08-12 18:04:45 -06:00
|
|
|
#include "textflag.h"
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// SHA1 block routine. See sha1block.go for Go equivalent.
|
|
|
|
//
|
|
|
|
// There are 80 rounds of 4 types:
|
|
|
|
// - rounds 0-15 are type 1 and load data (ROUND1 macro).
|
|
|
|
// - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
|
|
|
|
// - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
|
|
|
|
// - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
|
|
|
|
// - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
|
|
|
|
//
|
|
|
|
// Each round loads or shuffles the data, then computes a per-round
|
|
|
|
// function of b, c, d, and then mixes the result into and rotates the
|
|
|
|
// five registers a, b, c, d, e holding the intermediate results.
|
|
|
|
//
|
|
|
|
// The register rotation is implemented by rotating the arguments to
|
|
|
|
// the round macros instead of by explicit move instructions.
|
|
|
|
|
|
|
|
// Register definitions
|
2015-02-13 15:21:18 -07:00
|
|
|
#define Rdata R0 // Pointer to incoming data
|
|
|
|
#define Rconst R1 // Current constant for SHA round
|
|
|
|
#define Ra R2 // SHA1 accumulator
|
|
|
|
#define Rb R3 // SHA1 accumulator
|
|
|
|
#define Rc R4 // SHA1 accumulator
|
|
|
|
#define Rd R5 // SHA1 accumulator
|
|
|
|
#define Re R6 // SHA1 accumulator
|
|
|
|
#define Rt0 R7 // Temporary
|
|
|
|
#define Rt1 R8 // Temporary
|
2014-02-12 11:24:52 -07:00
|
|
|
// r9, r10 are forbidden
|
|
|
|
// r11 is OK provided you check the assembler that no synthetic instructions use it
|
2015-02-13 15:21:18 -07:00
|
|
|
#define Rt2 R11 // Temporary
|
|
|
|
#define Rctr R12 // loop counter
|
|
|
|
#define Rw R14 // point to w buffer
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// func block(dig *digest, p []byte)
|
|
|
|
// 0(FP) is *digest
|
|
|
|
// 4(FP) is p.array (struct Slice)
|
|
|
|
// 8(FP) is p.len
|
|
|
|
//12(FP) is p.cap
|
|
|
|
//
|
|
|
|
// Stack frame
|
2015-02-13 15:21:18 -07:00
|
|
|
#define p_end -4 // -4(SP) pointer to the end of data
|
|
|
|
#define p_data (p_end - 4) // -8(SP) current data pointer
|
|
|
|
#define w_buf (p_data - 4*80) // -328(SP) 80 words temporary buffer w uint32[80]
|
|
|
|
#define saved (w_buf - 4*5) // -348(SP) saved sha1 registers a,b,c,d,e - these must be last
|
2014-02-12 11:24:52 -07:00
|
|
|
// Total size +4 for saved LR is 352
|
|
|
|
|
|
|
|
// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
|
|
|
|
// e += w[i]
|
2015-02-13 15:21:18 -07:00
|
|
|
#define LOAD(Re) \
|
|
|
|
MOVBU 2(Rdata), Rt0 ; \
|
|
|
|
MOVBU 3(Rdata), Rt1 ; \
|
|
|
|
MOVBU 1(Rdata), Rt2 ; \
|
|
|
|
ORR Rt0<<8, Rt1, Rt0 ; \
|
|
|
|
MOVBU.P 4(Rdata), Rt1 ; \
|
|
|
|
ORR Rt2<<16, Rt0, Rt0 ; \
|
|
|
|
ORR Rt1<<24, Rt0, Rt0 ; \
|
|
|
|
MOVW.P Rt0, 4(Rw) ; \
|
|
|
|
ADD Rt0, Re, Re
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
|
|
|
|
// w[i&0xf] = tmp<<1 | tmp>>(32-1)
|
|
|
|
// e += w[i&0xf]
|
2015-02-13 15:21:18 -07:00
|
|
|
#define SHUFFLE(Re) \
|
|
|
|
MOVW (-16*4)(Rw), Rt0 ; \
|
|
|
|
MOVW (-14*4)(Rw), Rt1 ; \
|
|
|
|
MOVW (-8*4)(Rw), Rt2 ; \
|
|
|
|
EOR Rt0, Rt1, Rt0 ; \
|
|
|
|
MOVW (-3*4)(Rw), Rt1 ; \
|
|
|
|
EOR Rt2, Rt0, Rt0 ; \
|
|
|
|
EOR Rt0, Rt1, Rt0 ; \
|
|
|
|
MOVW Rt0@>(32-1), Rt0 ; \
|
|
|
|
MOVW.P Rt0, 4(Rw) ; \
|
|
|
|
ADD Rt0, Re, Re
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// t1 = (b & c) | ((~b) & d)
|
2015-02-13 15:21:18 -07:00
|
|
|
#define FUNC1(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
MVN Rb, Rt1 ; \
|
|
|
|
AND Rb, Rc, Rt0 ; \
|
|
|
|
AND Rd, Rt1, Rt1 ; \
|
|
|
|
ORR Rt0, Rt1, Rt1
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// t1 = b ^ c ^ d
|
2015-02-13 15:21:18 -07:00
|
|
|
#define FUNC2(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
EOR Rb, Rc, Rt1 ; \
|
|
|
|
EOR Rd, Rt1, Rt1
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// t1 = (b & c) | (b & d) | (c & d) =
|
|
|
|
// t1 = (b & c) | ((b | c) & d)
|
2015-02-13 15:21:18 -07:00
|
|
|
#define FUNC3(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
ORR Rb, Rc, Rt0 ; \
|
|
|
|
AND Rb, Rc, Rt1 ; \
|
|
|
|
AND Rd, Rt0, Rt0 ; \
|
|
|
|
ORR Rt0, Rt1, Rt1
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
#define FUNC4 FUNC2
|
|
|
|
|
|
|
|
// a5 := a<<5 | a>>(32-5)
|
|
|
|
// b = b<<30 | b>>(32-30)
|
|
|
|
// e = a5 + t1 + e + const
|
2015-02-13 15:21:18 -07:00
|
|
|
#define MIX(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
ADD Rt1, Re, Re ; \
|
|
|
|
MOVW Rb@>(32-30), Rb ; \
|
|
|
|
ADD Ra@>(32-5), Re, Re ; \
|
|
|
|
ADD Rconst, Re, Re
|
|
|
|
|
|
|
|
#define ROUND1(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
LOAD(Re) ; \
|
|
|
|
FUNC1(Ra, Rb, Rc, Rd, Re) ; \
|
|
|
|
MIX(Ra, Rb, Rc, Rd, Re)
|
|
|
|
|
|
|
|
#define ROUND1x(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
SHUFFLE(Re) ; \
|
|
|
|
FUNC1(Ra, Rb, Rc, Rd, Re) ; \
|
|
|
|
MIX(Ra, Rb, Rc, Rd, Re)
|
|
|
|
|
|
|
|
#define ROUND2(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
SHUFFLE(Re) ; \
|
|
|
|
FUNC2(Ra, Rb, Rc, Rd, Re) ; \
|
|
|
|
MIX(Ra, Rb, Rc, Rd, Re)
|
|
|
|
|
|
|
|
#define ROUND3(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
SHUFFLE(Re) ; \
|
|
|
|
FUNC3(Ra, Rb, Rc, Rd, Re) ; \
|
|
|
|
MIX(Ra, Rb, Rc, Rd, Re)
|
|
|
|
|
|
|
|
#define ROUND4(Ra, Rb, Rc, Rd, Re) \
|
|
|
|
SHUFFLE(Re) ; \
|
|
|
|
FUNC4(Ra, Rb, Rc, Rd, Re) ; \
|
|
|
|
MIX(Ra, Rb, Rc, Rd, Re)
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
|
|
|
|
// func block(dig *digest, p []byte)
|
|
|
|
TEXT ·block(SB), 0, $352-16
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW p+4(FP), Rdata // pointer to the data
|
|
|
|
MOVW p_len+8(FP), Rt0 // number of bytes
|
|
|
|
ADD Rdata, Rt0
|
2015-02-17 19:30:27 -07:00
|
|
|
MOVW Rt0, p_end(R13) // pointer to end of data
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
// Load up initial SHA1 accumulator
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW dig+0(FP), Rt0
|
|
|
|
MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
loop:
|
|
|
|
// Save registers at SP+4 onwards
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
|
|
|
|
|
2015-02-17 19:30:27 -07:00
|
|
|
MOVW $w_buf(R13), Rw
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW $0x5A827999, Rconst
|
|
|
|
MOVW $3, Rctr
|
|
|
|
loop1: ROUND1(Ra, Rb, Rc, Rd, Re)
|
|
|
|
ROUND1(Re, Ra, Rb, Rc, Rd)
|
|
|
|
ROUND1(Rd, Re, Ra, Rb, Rc)
|
|
|
|
ROUND1(Rc, Rd, Re, Ra, Rb)
|
|
|
|
ROUND1(Rb, Rc, Rd, Re, Ra)
|
|
|
|
SUB.S $1, Rctr
|
2014-02-12 11:24:52 -07:00
|
|
|
BNE loop1
|
|
|
|
|
2015-02-13 15:21:18 -07:00
|
|
|
ROUND1(Ra, Rb, Rc, Rd, Re)
|
|
|
|
ROUND1x(Re, Ra, Rb, Rc, Rd)
|
|
|
|
ROUND1x(Rd, Re, Ra, Rb, Rc)
|
|
|
|
ROUND1x(Rc, Rd, Re, Ra, Rb)
|
|
|
|
ROUND1x(Rb, Rc, Rd, Re, Ra)
|
2014-02-12 11:24:52 -07:00
|
|
|
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW $0x6ED9EBA1, Rconst
|
|
|
|
MOVW $4, Rctr
|
|
|
|
loop2: ROUND2(Ra, Rb, Rc, Rd, Re)
|
|
|
|
ROUND2(Re, Ra, Rb, Rc, Rd)
|
|
|
|
ROUND2(Rd, Re, Ra, Rb, Rc)
|
|
|
|
ROUND2(Rc, Rd, Re, Ra, Rb)
|
|
|
|
ROUND2(Rb, Rc, Rd, Re, Ra)
|
|
|
|
SUB.S $1, Rctr
|
2014-02-12 11:24:52 -07:00
|
|
|
BNE loop2
|
|
|
|
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW $0x8F1BBCDC, Rconst
|
|
|
|
MOVW $4, Rctr
|
|
|
|
loop3: ROUND3(Ra, Rb, Rc, Rd, Re)
|
|
|
|
ROUND3(Re, Ra, Rb, Rc, Rd)
|
|
|
|
ROUND3(Rd, Re, Ra, Rb, Rc)
|
|
|
|
ROUND3(Rc, Rd, Re, Ra, Rb)
|
|
|
|
ROUND3(Rb, Rc, Rd, Re, Ra)
|
|
|
|
SUB.S $1, Rctr
|
2014-02-12 11:24:52 -07:00
|
|
|
BNE loop3
|
|
|
|
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW $0xCA62C1D6, Rconst
|
|
|
|
MOVW $4, Rctr
|
|
|
|
loop4: ROUND4(Ra, Rb, Rc, Rd, Re)
|
|
|
|
ROUND4(Re, Ra, Rb, Rc, Rd)
|
|
|
|
ROUND4(Rd, Re, Ra, Rb, Rc)
|
|
|
|
ROUND4(Rc, Rd, Re, Ra, Rb)
|
|
|
|
ROUND4(Rb, Rc, Rd, Re, Ra)
|
|
|
|
SUB.S $1, Rctr
|
2014-02-12 11:24:52 -07:00
|
|
|
BNE loop4
|
|
|
|
|
|
|
|
// Accumulate - restoring registers from SP+4
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
|
|
|
|
ADD Rt0, Ra
|
|
|
|
ADD Rt1, Rb
|
|
|
|
ADD Rt2, Rc
|
|
|
|
ADD Rctr, Rd
|
|
|
|
ADD Rw, Re
|
|
|
|
|
2015-02-17 19:30:27 -07:00
|
|
|
MOVW p_end(R13), Rt0
|
2015-02-13 15:21:18 -07:00
|
|
|
CMP Rt0, Rdata
|
2014-02-12 11:24:52 -07:00
|
|
|
BLO loop
|
|
|
|
|
|
|
|
// Save final SHA1 accumulator
|
2015-02-13 15:21:18 -07:00
|
|
|
MOVW dig+0(FP), Rt0
|
|
|
|
MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
|
2014-02-12 11:24:52 -07:00
|
|
|
|
|
|
|
RET
|