1
0
mirror of https://github.com/golang/go synced 2024-11-25 10:17:57 -07:00

crypto/md5: Avo port of md5block_amd64.s

This implementation utilizes the same registers found in the reference
implementation, aiming to produce a minimal semantic diff between the
Avo-generated output and the original hand-written assembly.

To verify the Avo implementation, the reference and Avo-generated
assembly files are fed to `go tool asm`, capturing the debug output into
corresponding temp files. The debug output contains supplementary
metadata (line numbers, instruction offsets, and source file references)
that must be removed in order to obtain a semantic diff of the two
files. This is accomplished via a small utility script written in awk.

Metadata not found in the reference assembly file has been added to one
parameter symbol, resulting in a single line diff.

Commands used to verify Avo output:

GOROOT=$(go env GOROOT)
ASM_PATH="src/crypto/md5/md5block_amd64.s"
REFERENCE="54fe0fd43fcf8609666c16ae6d15ed92873b1564"

go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \
  <(git cat-file -p "$REFERENCE:$ASM_PATH") \
  > /tmp/reference.s

go tool asm -o /dev/null -I "$GOROOT"/src/runtime -debug \
  "$ASM_PATH" \
  > /tmp/avo.s

normalize(){
  awk '{
    $1=$2=$3="";
    print substr($0,4)
  }'
}

diff <(normalize < /tmp/reference.s) <(normalize < /tmp/avo.s)

3c3
< MOVQ p+8(FP), SI
---
> MOVQ p_base+8(FP), SI

Change-Id: Ifecc84fd0f5a39a88350e6eaffb45ed3fdacf2fb
Reviewed-on: https://go-review.googlesource.com/c/go/+/599935
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
This commit is contained in:
Garrett Bodley 2024-07-21 01:00:56 -04:00 committed by Roland Shoemaker
parent 3b78ca0626
commit 1e0cba8ee1
6 changed files with 928 additions and 187 deletions

View File

@ -357,6 +357,7 @@ var excluded = map[string]bool{
// go.dev/issue/46027: some imports are missing for this submodule.
"crypto/internal/bigmod/_asm": true,
"crypto/internal/edwards25519/field/_asm": true,
"crypto/md5/_asm": true,
"crypto/sha1/_asm": true,
"crypto/sha256/_asm": true,
"crypto/sha512/_asm": true,

View File

@ -0,0 +1,11 @@
module std/crypto/md5/_asm
go 1.24
require github.com/mmcloughlin/avo v0.6.0
require (
golang.org/x/mod v0.20.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/tools v0.24.0 // indirect
)

View File

@ -0,0 +1,8 @@
github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY=
github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8=
golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=

View File

@ -0,0 +1,228 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Original source:
// http://www.zorinaq.com/papers/md5-amd64.html
// http://www.zorinaq.com/papers/md5-amd64.tar.bz2
//
// Translated from Perl generating GNU assembly into
// #defines generating 6a assembly by the Go Authors.
package main
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"
)
//go:generate go run . -out ../md5block_amd64.s -pkg md5
func main() {
Package("crypto/md5")
ConstraintExpr("!purego")
block()
Generate()
}
// MD5 optimized for AMD64.
//
// Author: Marc Bevand <bevand_m (at) epita.fr>
// Licence: I hereby disclaim the copyright on this code and place it
// in the public domain.
func block() {
Implement("block")
Attributes(NOSPLIT)
AllocLocal(8)
Load(Param("dig"), RBP)
Load(Param("p").Base(), RSI)
Load(Param("p").Len(), RDX)
SHRQ(Imm(6), RDX)
SHLQ(Imm(6), RDX)
LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI)
MOVL(Mem{Base: BP}.Offset(0*4), EAX)
MOVL(Mem{Base: BP}.Offset(1*4), EBX)
MOVL(Mem{Base: BP}.Offset(2*4), ECX)
MOVL(Mem{Base: BP}.Offset(3*4), EDX)
MOVL(Imm(0xffffffff), R11L)
CMPQ(RSI, RDI)
JEQ(LabelRef("end"))
loop()
end()
}
func loop() {
Label("loop")
MOVL(EAX, R12L)
MOVL(EBX, R13L)
MOVL(ECX, R14L)
MOVL(EDX, R15L)
MOVL(Mem{Base: SI}.Offset(0*4), R8L)
MOVL(EDX, R9L)
ROUND1(EAX, EBX, ECX, EDX, 1, 0xd76aa478, 7)
ROUND1(EDX, EAX, EBX, ECX, 2, 0xe8c7b756, 12)
ROUND1(ECX, EDX, EAX, EBX, 3, 0x242070db, 17)
ROUND1(EBX, ECX, EDX, EAX, 4, 0xc1bdceee, 22)
ROUND1(EAX, EBX, ECX, EDX, 5, 0xf57c0faf, 7)
ROUND1(EDX, EAX, EBX, ECX, 6, 0x4787c62a, 12)
ROUND1(ECX, EDX, EAX, EBX, 7, 0xa8304613, 17)
ROUND1(EBX, ECX, EDX, EAX, 8, 0xfd469501, 22)
ROUND1(EAX, EBX, ECX, EDX, 9, 0x698098d8, 7)
ROUND1(EDX, EAX, EBX, ECX, 10, 0x8b44f7af, 12)
ROUND1(ECX, EDX, EAX, EBX, 11, 0xffff5bb1, 17)
ROUND1(EBX, ECX, EDX, EAX, 12, 0x895cd7be, 22)
ROUND1(EAX, EBX, ECX, EDX, 13, 0x6b901122, 7)
ROUND1(EDX, EAX, EBX, ECX, 14, 0xfd987193, 12)
ROUND1(ECX, EDX, EAX, EBX, 15, 0xa679438e, 17)
ROUND1(EBX, ECX, EDX, EAX, 1, 0x49b40821, 22)
MOVL(EDX, R9L)
MOVL(EDX, R10L)
ROUND2(EAX, EBX, ECX, EDX, 6, 0xf61e2562, 5)
ROUND2(EDX, EAX, EBX, ECX, 11, 0xc040b340, 9)
ROUND2(ECX, EDX, EAX, EBX, 0, 0x265e5a51, 14)
ROUND2(EBX, ECX, EDX, EAX, 5, 0xe9b6c7aa, 20)
ROUND2(EAX, EBX, ECX, EDX, 10, 0xd62f105d, 5)
ROUND2(EDX, EAX, EBX, ECX, 15, 0x2441453, 9)
ROUND2(ECX, EDX, EAX, EBX, 4, 0xd8a1e681, 14)
ROUND2(EBX, ECX, EDX, EAX, 9, 0xe7d3fbc8, 20)
ROUND2(EAX, EBX, ECX, EDX, 14, 0x21e1cde6, 5)
ROUND2(EDX, EAX, EBX, ECX, 3, 0xc33707d6, 9)
ROUND2(ECX, EDX, EAX, EBX, 8, 0xf4d50d87, 14)
ROUND2(EBX, ECX, EDX, EAX, 13, 0x455a14ed, 20)
ROUND2(EAX, EBX, ECX, EDX, 2, 0xa9e3e905, 5)
ROUND2(EDX, EAX, EBX, ECX, 7, 0xfcefa3f8, 9)
ROUND2(ECX, EDX, EAX, EBX, 12, 0x676f02d9, 14)
ROUND2(EBX, ECX, EDX, EAX, 5, 0x8d2a4c8a, 20)
MOVL(ECX, R9L)
ROUND3FIRST(EAX, EBX, ECX, EDX, 8, 0xfffa3942, 4)
ROUND3(EDX, EAX, EBX, ECX, 11, 0x8771f681, 11)
ROUND3(ECX, EDX, EAX, EBX, 14, 0x6d9d6122, 16)
ROUND3(EBX, ECX, EDX, EAX, 1, 0xfde5380c, 23)
ROUND3(EAX, EBX, ECX, EDX, 4, 0xa4beea44, 4)
ROUND3(EDX, EAX, EBX, ECX, 7, 0x4bdecfa9, 11)
ROUND3(ECX, EDX, EAX, EBX, 10, 0xf6bb4b60, 16)
ROUND3(EBX, ECX, EDX, EAX, 13, 0xbebfbc70, 23)
ROUND3(EAX, EBX, ECX, EDX, 0, 0x289b7ec6, 4)
ROUND3(EDX, EAX, EBX, ECX, 3, 0xeaa127fa, 11)
ROUND3(ECX, EDX, EAX, EBX, 6, 0xd4ef3085, 16)
ROUND3(EBX, ECX, EDX, EAX, 9, 0x4881d05, 23)
ROUND3(EAX, EBX, ECX, EDX, 12, 0xd9d4d039, 4)
ROUND3(EDX, EAX, EBX, ECX, 15, 0xe6db99e5, 11)
ROUND3(ECX, EDX, EAX, EBX, 2, 0x1fa27cf8, 16)
ROUND3(EBX, ECX, EDX, EAX, 0, 0xc4ac5665, 23)
MOVL(R11L, R9L)
XORL(EDX, R9L)
ROUND4(EAX, EBX, ECX, EDX, 7, 0xf4292244, 6)
ROUND4(EDX, EAX, EBX, ECX, 14, 0x432aff97, 10)
ROUND4(ECX, EDX, EAX, EBX, 5, 0xab9423a7, 15)
ROUND4(EBX, ECX, EDX, EAX, 12, 0xfc93a039, 21)
ROUND4(EAX, EBX, ECX, EDX, 3, 0x655b59c3, 6)
ROUND4(EDX, EAX, EBX, ECX, 10, 0x8f0ccc92, 10)
ROUND4(ECX, EDX, EAX, EBX, 1, 0xffeff47d, 15)
ROUND4(EBX, ECX, EDX, EAX, 8, 0x85845dd1, 21)
ROUND4(EAX, EBX, ECX, EDX, 15, 0x6fa87e4f, 6)
ROUND4(EDX, EAX, EBX, ECX, 6, 0xfe2ce6e0, 10)
ROUND4(ECX, EDX, EAX, EBX, 13, 0xa3014314, 15)
ROUND4(EBX, ECX, EDX, EAX, 4, 0x4e0811a1, 21)
ROUND4(EAX, EBX, ECX, EDX, 11, 0xf7537e82, 6)
ROUND4(EDX, EAX, EBX, ECX, 2, 0xbd3af235, 10)
ROUND4(ECX, EDX, EAX, EBX, 9, 0x2ad7d2bb, 15)
ROUND4(EBX, ECX, EDX, EAX, 0, 0xeb86d391, 21)
ADDL(R12L, EAX)
ADDL(R13L, EBX)
ADDL(R14L, ECX)
ADDL(R15L, EDX)
ADDQ(Imm(64), RSI)
CMPQ(RSI, RDI)
JB(LabelRef("loop"))
}
func end() {
Label("end")
MOVL(EAX, Mem{Base: BP}.Offset(0*4))
MOVL(EBX, Mem{Base: BP}.Offset(1*4))
MOVL(ECX, Mem{Base: BP}.Offset(2*4))
MOVL(EDX, Mem{Base: BP}.Offset(3*4))
RET()
}
func ROUND1(a, b, c, d GPPhysical, index int, konst, shift uint64) {
XORL(c, R9L)
ADDL(Imm(konst), a)
ADDL(R8L, a)
ANDL(b, R9L)
XORL(d, R9L)
MOVL(Mem{Base: SI}.Offset(index*4), R8L)
ADDL(R9L, a)
ROLL(Imm(shift), a)
MOVL(c, R9L)
ADDL(b, a)
}
// Uses https://github.com/animetosho/md5-optimisation#dependency-shortcut-in-g-function
func ROUND2(a, b, c, d GPPhysical, index int, konst, shift uint64) {
XORL(R11L, R9L)
ADDL(Imm(konst), a)
ADDL(R8L, a)
ANDL(b, R10L)
ANDL(c, R9L)
MOVL(Mem{Base: SI}.Offset(index*4), R8L)
ADDL(R9L, a)
ADDL(R10L, a)
MOVL(c, R9L)
MOVL(c, R10L)
ROLL(Imm(shift), a)
ADDL(b, a)
}
// Uses https://github.com/animetosho/md5-optimisation#h-function-re-use
func ROUND3FIRST(a, b, c, d GPPhysical, index int, konst, shift uint64) {
MOVL(d, R9L)
XORL(c, R9L)
XORL(b, R9L)
ADDL(Imm(konst), a)
ADDL(R8L, a)
MOVL(Mem{Base: SI}.Offset(index*4), R8L)
ADDL(R9L, a)
ROLL(Imm(shift), a)
ADDL(b, a)
}
func ROUND3(a, b, c, d GPPhysical, index int, konst, shift uint64) {
XORL(a, R9L)
XORL(b, R9L)
ADDL(Imm(konst), a)
ADDL(R8L, a)
MOVL(Mem{Base: SI}.Offset(index*4), R8L)
ADDL(R9L, a)
ROLL(Imm(shift), a)
ADDL(b, a)
}
func ROUND4(a, b, c, d GPPhysical, index int, konst, shift uint64) {
ADDL(Imm(konst), a)
ADDL(R8L, a)
ORL(b, R9L)
XORL(c, R9L)
ADDL(R9L, a)
MOVL(Mem{Base: SI}.Offset(index*4), R8L)
MOVL(Imm(0xffffffff), R9L)
ROLL(Imm(shift), a)
XORL(c, R9L)
ADDL(b, a)
}

View File

@ -1,197 +1,689 @@
// Original source:
// http://www.zorinaq.com/papers/md5-amd64.html
// http://www.zorinaq.com/papers/md5-amd64.tar.bz2
//
// Translated from Perl generating GNU assembly into
// #defines generating 6a assembly by the Go Authors.
// Code generated by command: go run md5block_amd64_asm.go -out ../md5block_amd64.s -pkg md5. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
// MD5 optimized for AMD64.
//
// Author: Marc Bevand <bevand_m (at) epita.fr>
// Licence: I hereby disclaim the copyright on this code and place it
// in the public domain.
TEXT ·block(SB),NOSPLIT,$8-32
MOVQ dig+0(FP), BP
MOVQ p+8(FP), SI
MOVQ p_len+16(FP), DX
SHRQ $6, DX
SHLQ $6, DX
LEAQ (SI)(DX*1), DI
MOVL (0*4)(BP), AX
MOVL (1*4)(BP), BX
MOVL (2*4)(BP), CX
MOVL (3*4)(BP), DX
MOVL $0xffffffff, R11
CMPQ SI, DI
JEQ end
// func block(dig *digest, p []byte)
TEXT ·block(SB), NOSPLIT, $8-32
MOVQ dig+0(FP), BP
MOVQ p_base+8(FP), SI
MOVQ p_len+16(FP), DX
SHRQ $0x06, DX
SHLQ $0x06, DX
LEAQ (SI)(DX*1), DI
MOVL (BP), AX
MOVL 4(BP), BX
MOVL 8(BP), CX
MOVL 12(BP), DX
MOVL $0xffffffff, R11
CMPQ SI, DI
JEQ end
loop:
MOVL AX, R12
MOVL BX, R13
MOVL CX, R14
MOVL DX, R15
MOVL (0*4)(SI), R8
MOVL DX, R9
#define ROUND1(a, b, c, d, index, const, shift) \
XORL c, R9; \
ADDL $const, a; \
ADDL R8, a; \
ANDL b, R9; \
XORL d, R9; \
MOVL (index*4)(SI), R8; \
ADDL R9, a; \
ROLL $shift, a; \
MOVL c, R9; \
ADDL b, a
ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7);
ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12);
ROUND1(CX,DX,AX,BX, 3,0x242070db,17);
ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22);
ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7);
ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12);
ROUND1(CX,DX,AX,BX, 7,0xa8304613,17);
ROUND1(BX,CX,DX,AX, 8,0xfd469501,22);
ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7);
ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12);
ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17);
ROUND1(BX,CX,DX,AX,12,0x895cd7be,22);
ROUND1(AX,BX,CX,DX,13,0x6b901122, 7);
ROUND1(DX,AX,BX,CX,14,0xfd987193,12);
ROUND1(CX,DX,AX,BX,15,0xa679438e,17);
ROUND1(BX,CX,DX,AX, 1,0x49b40821,22);
MOVL DX, R9
MOVL DX, R10
// Uses https://github.com/animetosho/md5-optimisation#dependency-shortcut-in-g-function
#define ROUND2(a, b, c, d, index, const, shift) \
XORL R11, R9; \
ADDL $const, a; \
ADDL R8, a; \
ANDL b, R10; \
ANDL c, R9; \
MOVL (index*4)(SI),R8; \
ADDL R9, a; \
ADDL R10, a; \
MOVL c, R9; \
MOVL c, R10; \
ROLL $shift, a; \
ADDL b, a
ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5);
ROUND2(DX,AX,BX,CX,11,0xc040b340, 9);
ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14);
ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20);
ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5);
ROUND2(DX,AX,BX,CX,15, 0x2441453, 9);
ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14);
ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20);
ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5);
ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9);
ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14);
ROUND2(BX,CX,DX,AX,13,0x455a14ed,20);
ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5);
ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9);
ROUND2(CX,DX,AX,BX,12,0x676f02d9,14);
ROUND2(BX,CX,DX,AX, 5,0x8d2a4c8a,20);
MOVL CX, R9
// Uses https://github.com/animetosho/md5-optimisation#h-function-re-use
#define ROUND3FIRST(a, b, c, d, index, const, shift) \
MOVL d, R9; \
XORL c, R9; \
XORL b, R9; \
ADDL $const, a; \
ADDL R8, a; \
MOVL (index*4)(SI),R8; \
ADDL R9, a; \
ROLL $shift, a; \
ADDL b, a
#define ROUND3(a, b, c, d, index, const, shift) \
XORL a, R9; \
XORL b, R9; \
ADDL $const, a; \
ADDL R8, a; \
MOVL (index*4)(SI),R8; \
ADDL R9, a; \
ROLL $shift, a; \
ADDL b, a
ROUND3FIRST(AX,BX,CX,DX, 8,0xfffa3942, 4);
ROUND3(DX,AX,BX,CX,11,0x8771f681,11);
ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16);
ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23);
ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4);
ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11);
ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16);
ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23);
ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4);
ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11);
ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16);
ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23);
ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4);
ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11);
ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16);
ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23);
MOVL R11, R9
XORL DX, R9
#define ROUND4(a, b, c, d, index, const, shift) \
ADDL $const, a; \
ADDL R8, a; \
ORL b, R9; \
XORL c, R9; \
ADDL R9, a; \
MOVL (index*4)(SI),R8; \
MOVL $0xffffffff, R9; \
ROLL $shift, a; \
XORL c, R9; \
ADDL b, a
ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6);
ROUND4(DX,AX,BX,CX,14,0x432aff97,10);
ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15);
ROUND4(BX,CX,DX,AX,12,0xfc93a039,21);
ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6);
ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10);
ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15);
ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21);
ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6);
ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10);
ROUND4(CX,DX,AX,BX,13,0xa3014314,15);
ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21);
ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6);
ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10);
ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15);
ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21);
ADDL R12, AX
ADDL R13, BX
ADDL R14, CX
ADDL R15, DX
ADDQ $64, SI
CMPQ SI, DI
JB loop
MOVL AX, R12
MOVL BX, R13
MOVL CX, R14
MOVL DX, R15
MOVL (SI), R8
MOVL DX, R9
XORL CX, R9
ADDL $0xd76aa478, AX
ADDL R8, AX
ANDL BX, R9
XORL DX, R9
MOVL 4(SI), R8
ADDL R9, AX
ROLL $0x07, AX
MOVL CX, R9
ADDL BX, AX
XORL BX, R9
ADDL $0xe8c7b756, DX
ADDL R8, DX
ANDL AX, R9
XORL CX, R9
MOVL 8(SI), R8
ADDL R9, DX
ROLL $0x0c, DX
MOVL BX, R9
ADDL AX, DX
XORL AX, R9
ADDL $0x242070db, CX
ADDL R8, CX
ANDL DX, R9
XORL BX, R9
MOVL 12(SI), R8
ADDL R9, CX
ROLL $0x11, CX
MOVL AX, R9
ADDL DX, CX
XORL DX, R9
ADDL $0xc1bdceee, BX
ADDL R8, BX
ANDL CX, R9
XORL AX, R9
MOVL 16(SI), R8
ADDL R9, BX
ROLL $0x16, BX
MOVL DX, R9
ADDL CX, BX
XORL CX, R9
ADDL $0xf57c0faf, AX
ADDL R8, AX
ANDL BX, R9
XORL DX, R9
MOVL 20(SI), R8
ADDL R9, AX
ROLL $0x07, AX
MOVL CX, R9
ADDL BX, AX
XORL BX, R9
ADDL $0x4787c62a, DX
ADDL R8, DX
ANDL AX, R9
XORL CX, R9
MOVL 24(SI), R8
ADDL R9, DX
ROLL $0x0c, DX
MOVL BX, R9
ADDL AX, DX
XORL AX, R9
ADDL $0xa8304613, CX
ADDL R8, CX
ANDL DX, R9
XORL BX, R9
MOVL 28(SI), R8
ADDL R9, CX
ROLL $0x11, CX
MOVL AX, R9
ADDL DX, CX
XORL DX, R9
ADDL $0xfd469501, BX
ADDL R8, BX
ANDL CX, R9
XORL AX, R9
MOVL 32(SI), R8
ADDL R9, BX
ROLL $0x16, BX
MOVL DX, R9
ADDL CX, BX
XORL CX, R9
ADDL $0x698098d8, AX
ADDL R8, AX
ANDL BX, R9
XORL DX, R9
MOVL 36(SI), R8
ADDL R9, AX
ROLL $0x07, AX
MOVL CX, R9
ADDL BX, AX
XORL BX, R9
ADDL $0x8b44f7af, DX
ADDL R8, DX
ANDL AX, R9
XORL CX, R9
MOVL 40(SI), R8
ADDL R9, DX
ROLL $0x0c, DX
MOVL BX, R9
ADDL AX, DX
XORL AX, R9
ADDL $0xffff5bb1, CX
ADDL R8, CX
ANDL DX, R9
XORL BX, R9
MOVL 44(SI), R8
ADDL R9, CX
ROLL $0x11, CX
MOVL AX, R9
ADDL DX, CX
XORL DX, R9
ADDL $0x895cd7be, BX
ADDL R8, BX
ANDL CX, R9
XORL AX, R9
MOVL 48(SI), R8
ADDL R9, BX
ROLL $0x16, BX
MOVL DX, R9
ADDL CX, BX
XORL CX, R9
ADDL $0x6b901122, AX
ADDL R8, AX
ANDL BX, R9
XORL DX, R9
MOVL 52(SI), R8
ADDL R9, AX
ROLL $0x07, AX
MOVL CX, R9
ADDL BX, AX
XORL BX, R9
ADDL $0xfd987193, DX
ADDL R8, DX
ANDL AX, R9
XORL CX, R9
MOVL 56(SI), R8
ADDL R9, DX
ROLL $0x0c, DX
MOVL BX, R9
ADDL AX, DX
XORL AX, R9
ADDL $0xa679438e, CX
ADDL R8, CX
ANDL DX, R9
XORL BX, R9
MOVL 60(SI), R8
ADDL R9, CX
ROLL $0x11, CX
MOVL AX, R9
ADDL DX, CX
XORL DX, R9
ADDL $0x49b40821, BX
ADDL R8, BX
ANDL CX, R9
XORL AX, R9
MOVL 4(SI), R8
ADDL R9, BX
ROLL $0x16, BX
MOVL DX, R9
ADDL CX, BX
MOVL DX, R9
MOVL DX, R10
XORL R11, R9
ADDL $0xf61e2562, AX
ADDL R8, AX
ANDL BX, R10
ANDL CX, R9
MOVL 24(SI), R8
ADDL R9, AX
ADDL R10, AX
MOVL CX, R9
MOVL CX, R10
ROLL $0x05, AX
ADDL BX, AX
XORL R11, R9
ADDL $0xc040b340, DX
ADDL R8, DX
ANDL AX, R10
ANDL BX, R9
MOVL 44(SI), R8
ADDL R9, DX
ADDL R10, DX
MOVL BX, R9
MOVL BX, R10
ROLL $0x09, DX
ADDL AX, DX
XORL R11, R9
ADDL $0x265e5a51, CX
ADDL R8, CX
ANDL DX, R10
ANDL AX, R9
MOVL (SI), R8
ADDL R9, CX
ADDL R10, CX
MOVL AX, R9
MOVL AX, R10
ROLL $0x0e, CX
ADDL DX, CX
XORL R11, R9
ADDL $0xe9b6c7aa, BX
ADDL R8, BX
ANDL CX, R10
ANDL DX, R9
MOVL 20(SI), R8
ADDL R9, BX
ADDL R10, BX
MOVL DX, R9
MOVL DX, R10
ROLL $0x14, BX
ADDL CX, BX
XORL R11, R9
ADDL $0xd62f105d, AX
ADDL R8, AX
ANDL BX, R10
ANDL CX, R9
MOVL 40(SI), R8
ADDL R9, AX
ADDL R10, AX
MOVL CX, R9
MOVL CX, R10
ROLL $0x05, AX
ADDL BX, AX
XORL R11, R9
ADDL $0x02441453, DX
ADDL R8, DX
ANDL AX, R10
ANDL BX, R9
MOVL 60(SI), R8
ADDL R9, DX
ADDL R10, DX
MOVL BX, R9
MOVL BX, R10
ROLL $0x09, DX
ADDL AX, DX
XORL R11, R9
ADDL $0xd8a1e681, CX
ADDL R8, CX
ANDL DX, R10
ANDL AX, R9
MOVL 16(SI), R8
ADDL R9, CX
ADDL R10, CX
MOVL AX, R9
MOVL AX, R10
ROLL $0x0e, CX
ADDL DX, CX
XORL R11, R9
ADDL $0xe7d3fbc8, BX
ADDL R8, BX
ANDL CX, R10
ANDL DX, R9
MOVL 36(SI), R8
ADDL R9, BX
ADDL R10, BX
MOVL DX, R9
MOVL DX, R10
ROLL $0x14, BX
ADDL CX, BX
XORL R11, R9
ADDL $0x21e1cde6, AX
ADDL R8, AX
ANDL BX, R10
ANDL CX, R9
MOVL 56(SI), R8
ADDL R9, AX
ADDL R10, AX
MOVL CX, R9
MOVL CX, R10
ROLL $0x05, AX
ADDL BX, AX
XORL R11, R9
ADDL $0xc33707d6, DX
ADDL R8, DX
ANDL AX, R10
ANDL BX, R9
MOVL 12(SI), R8
ADDL R9, DX
ADDL R10, DX
MOVL BX, R9
MOVL BX, R10
ROLL $0x09, DX
ADDL AX, DX
XORL R11, R9
ADDL $0xf4d50d87, CX
ADDL R8, CX
ANDL DX, R10
ANDL AX, R9
MOVL 32(SI), R8
ADDL R9, CX
ADDL R10, CX
MOVL AX, R9
MOVL AX, R10
ROLL $0x0e, CX
ADDL DX, CX
XORL R11, R9
ADDL $0x455a14ed, BX
ADDL R8, BX
ANDL CX, R10
ANDL DX, R9
MOVL 52(SI), R8
ADDL R9, BX
ADDL R10, BX
MOVL DX, R9
MOVL DX, R10
ROLL $0x14, BX
ADDL CX, BX
XORL R11, R9
ADDL $0xa9e3e905, AX
ADDL R8, AX
ANDL BX, R10
ANDL CX, R9
MOVL 8(SI), R8
ADDL R9, AX
ADDL R10, AX
MOVL CX, R9
MOVL CX, R10
ROLL $0x05, AX
ADDL BX, AX
XORL R11, R9
ADDL $0xfcefa3f8, DX
ADDL R8, DX
ANDL AX, R10
ANDL BX, R9
MOVL 28(SI), R8
ADDL R9, DX
ADDL R10, DX
MOVL BX, R9
MOVL BX, R10
ROLL $0x09, DX
ADDL AX, DX
XORL R11, R9
ADDL $0x676f02d9, CX
ADDL R8, CX
ANDL DX, R10
ANDL AX, R9
MOVL 48(SI), R8
ADDL R9, CX
ADDL R10, CX
MOVL AX, R9
MOVL AX, R10
ROLL $0x0e, CX
ADDL DX, CX
XORL R11, R9
ADDL $0x8d2a4c8a, BX
ADDL R8, BX
ANDL CX, R10
ANDL DX, R9
MOVL 20(SI), R8
ADDL R9, BX
ADDL R10, BX
MOVL DX, R9
MOVL DX, R10
ROLL $0x14, BX
ADDL CX, BX
MOVL CX, R9
MOVL DX, R9
XORL CX, R9
XORL BX, R9
ADDL $0xfffa3942, AX
ADDL R8, AX
MOVL 32(SI), R8
ADDL R9, AX
ROLL $0x04, AX
ADDL BX, AX
XORL DX, R9
XORL AX, R9
ADDL $0x8771f681, DX
ADDL R8, DX
MOVL 44(SI), R8
ADDL R9, DX
ROLL $0x0b, DX
ADDL AX, DX
XORL CX, R9
XORL DX, R9
ADDL $0x6d9d6122, CX
ADDL R8, CX
MOVL 56(SI), R8
ADDL R9, CX
ROLL $0x10, CX
ADDL DX, CX
XORL BX, R9
XORL CX, R9
ADDL $0xfde5380c, BX
ADDL R8, BX
MOVL 4(SI), R8
ADDL R9, BX
ROLL $0x17, BX
ADDL CX, BX
XORL AX, R9
XORL BX, R9
ADDL $0xa4beea44, AX
ADDL R8, AX
MOVL 16(SI), R8
ADDL R9, AX
ROLL $0x04, AX
ADDL BX, AX
XORL DX, R9
XORL AX, R9
ADDL $0x4bdecfa9, DX
ADDL R8, DX
MOVL 28(SI), R8
ADDL R9, DX
ROLL $0x0b, DX
ADDL AX, DX
XORL CX, R9
XORL DX, R9
ADDL $0xf6bb4b60, CX
ADDL R8, CX
MOVL 40(SI), R8
ADDL R9, CX
ROLL $0x10, CX
ADDL DX, CX
XORL BX, R9
XORL CX, R9
ADDL $0xbebfbc70, BX
ADDL R8, BX
MOVL 52(SI), R8
ADDL R9, BX
ROLL $0x17, BX
ADDL CX, BX
XORL AX, R9
XORL BX, R9
ADDL $0x289b7ec6, AX
ADDL R8, AX
MOVL (SI), R8
ADDL R9, AX
ROLL $0x04, AX
ADDL BX, AX
XORL DX, R9
XORL AX, R9
ADDL $0xeaa127fa, DX
ADDL R8, DX
MOVL 12(SI), R8
ADDL R9, DX
ROLL $0x0b, DX
ADDL AX, DX
XORL CX, R9
XORL DX, R9
ADDL $0xd4ef3085, CX
ADDL R8, CX
MOVL 24(SI), R8
ADDL R9, CX
ROLL $0x10, CX
ADDL DX, CX
XORL BX, R9
XORL CX, R9
ADDL $0x04881d05, BX
ADDL R8, BX
MOVL 36(SI), R8
ADDL R9, BX
ROLL $0x17, BX
ADDL CX, BX
XORL AX, R9
XORL BX, R9
ADDL $0xd9d4d039, AX
ADDL R8, AX
MOVL 48(SI), R8
ADDL R9, AX
ROLL $0x04, AX
ADDL BX, AX
XORL DX, R9
XORL AX, R9
ADDL $0xe6db99e5, DX
ADDL R8, DX
MOVL 60(SI), R8
ADDL R9, DX
ROLL $0x0b, DX
ADDL AX, DX
XORL CX, R9
XORL DX, R9
ADDL $0x1fa27cf8, CX
ADDL R8, CX
MOVL 8(SI), R8
ADDL R9, CX
ROLL $0x10, CX
ADDL DX, CX
XORL BX, R9
XORL CX, R9
ADDL $0xc4ac5665, BX
ADDL R8, BX
MOVL (SI), R8
ADDL R9, BX
ROLL $0x17, BX
ADDL CX, BX
MOVL R11, R9
XORL DX, R9
ADDL $0xf4292244, AX
ADDL R8, AX
ORL BX, R9
XORL CX, R9
ADDL R9, AX
MOVL 28(SI), R8
MOVL $0xffffffff, R9
ROLL $0x06, AX
XORL CX, R9
ADDL BX, AX
ADDL $0x432aff97, DX
ADDL R8, DX
ORL AX, R9
XORL BX, R9
ADDL R9, DX
MOVL 56(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0a, DX
XORL BX, R9
ADDL AX, DX
ADDL $0xab9423a7, CX
ADDL R8, CX
ORL DX, R9
XORL AX, R9
ADDL R9, CX
MOVL 20(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0f, CX
XORL AX, R9
ADDL DX, CX
ADDL $0xfc93a039, BX
ADDL R8, BX
ORL CX, R9
XORL DX, R9
ADDL R9, BX
MOVL 48(SI), R8
MOVL $0xffffffff, R9
ROLL $0x15, BX
XORL DX, R9
ADDL CX, BX
ADDL $0x655b59c3, AX
ADDL R8, AX
ORL BX, R9
XORL CX, R9
ADDL R9, AX
MOVL 12(SI), R8
MOVL $0xffffffff, R9
ROLL $0x06, AX
XORL CX, R9
ADDL BX, AX
ADDL $0x8f0ccc92, DX
ADDL R8, DX
ORL AX, R9
XORL BX, R9
ADDL R9, DX
MOVL 40(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0a, DX
XORL BX, R9
ADDL AX, DX
ADDL $0xffeff47d, CX
ADDL R8, CX
ORL DX, R9
XORL AX, R9
ADDL R9, CX
MOVL 4(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0f, CX
XORL AX, R9
ADDL DX, CX
ADDL $0x85845dd1, BX
ADDL R8, BX
ORL CX, R9
XORL DX, R9
ADDL R9, BX
MOVL 32(SI), R8
MOVL $0xffffffff, R9
ROLL $0x15, BX
XORL DX, R9
ADDL CX, BX
ADDL $0x6fa87e4f, AX
ADDL R8, AX
ORL BX, R9
XORL CX, R9
ADDL R9, AX
MOVL 60(SI), R8
MOVL $0xffffffff, R9
ROLL $0x06, AX
XORL CX, R9
ADDL BX, AX
ADDL $0xfe2ce6e0, DX
ADDL R8, DX
ORL AX, R9
XORL BX, R9
ADDL R9, DX
MOVL 24(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0a, DX
XORL BX, R9
ADDL AX, DX
ADDL $0xa3014314, CX
ADDL R8, CX
ORL DX, R9
XORL AX, R9
ADDL R9, CX
MOVL 52(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0f, CX
XORL AX, R9
ADDL DX, CX
ADDL $0x4e0811a1, BX
ADDL R8, BX
ORL CX, R9
XORL DX, R9
ADDL R9, BX
MOVL 16(SI), R8
MOVL $0xffffffff, R9
ROLL $0x15, BX
XORL DX, R9
ADDL CX, BX
ADDL $0xf7537e82, AX
ADDL R8, AX
ORL BX, R9
XORL CX, R9
ADDL R9, AX
MOVL 44(SI), R8
MOVL $0xffffffff, R9
ROLL $0x06, AX
XORL CX, R9
ADDL BX, AX
ADDL $0xbd3af235, DX
ADDL R8, DX
ORL AX, R9
XORL BX, R9
ADDL R9, DX
MOVL 8(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0a, DX
XORL BX, R9
ADDL AX, DX
ADDL $0x2ad7d2bb, CX
ADDL R8, CX
ORL DX, R9
XORL AX, R9
ADDL R9, CX
MOVL 36(SI), R8
MOVL $0xffffffff, R9
ROLL $0x0f, CX
XORL AX, R9
ADDL DX, CX
ADDL $0xeb86d391, BX
ADDL R8, BX
ORL CX, R9
XORL DX, R9
ADDL R9, BX
MOVL (SI), R8
MOVL $0xffffffff, R9
ROLL $0x15, BX
XORL DX, R9
ADDL CX, BX
ADDL R12, AX
ADDL R13, BX
ADDL R14, CX
ADDL R15, DX
ADDQ $0x40, SI
CMPQ SI, DI
JB loop
end:
MOVL AX, (0*4)(BP)
MOVL BX, (1*4)(BP)
MOVL CX, (2*4)(BP)
MOVL DX, (3*4)(BP)
MOVL AX, (BP)
MOVL BX, 4(BP)
MOVL CX, 8(BP)
MOVL DX, 12(BP)
RET

View File

@ -359,6 +359,7 @@ var excluded = map[string]bool{
// See go.dev/issue/46027: some imports are missing for this submodule.
"crypto/internal/bigmod/_asm": true,
"crypto/internal/edwards25519/field/_asm": true,
"crypto/md5/_asm": true,
"crypto/sha1/_asm": true,
"crypto/sha256/_asm": true,
"crypto/sha512/_asm": true,