diff --git a/src/cmd/compile/internal/types2/stdlib_test.go b/src/cmd/compile/internal/types2/stdlib_test.go index d91d60d1241..00f9fab6139 100644 --- a/src/cmd/compile/internal/types2/stdlib_test.go +++ b/src/cmd/compile/internal/types2/stdlib_test.go @@ -357,6 +357,7 @@ var excluded = map[string]bool{ // go.dev/issue/46027: some imports are missing for this submodule. "crypto/internal/bigmod/_asm": true, "crypto/internal/edwards25519/field/_asm": true, + "crypto/md5/_asm": true, "crypto/sha1/_asm": true, "crypto/sha256/_asm": true, "crypto/sha512/_asm": true, diff --git a/src/crypto/md5/_asm/go.mod b/src/crypto/md5/_asm/go.mod new file mode 100644 index 00000000000..04e6e30a84f --- /dev/null +++ b/src/crypto/md5/_asm/go.mod @@ -0,0 +1,11 @@ +module std/crypto/md5/_asm + +go 1.24 + +require github.com/mmcloughlin/avo v0.6.0 + +require ( + golang.org/x/mod v0.20.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/tools v0.24.0 // indirect +) diff --git a/src/crypto/md5/_asm/go.sum b/src/crypto/md5/_asm/go.sum new file mode 100644 index 00000000000..76af484b2eb --- /dev/null +++ b/src/crypto/md5/_asm/go.sum @@ -0,0 +1,8 @@ +github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY= +github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= +golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= diff --git a/src/crypto/md5/_asm/md5block_amd64_asm.go b/src/crypto/md5/_asm/md5block_amd64_asm.go new file mode 100644 index 00000000000..45133be83b7 --- /dev/null +++ b/src/crypto/md5/_asm/md5block_amd64_asm.go @@ -0,0 +1,228 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Original source: +// http://www.zorinaq.com/papers/md5-amd64.html +// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 +// +// Translated from Perl generating GNU assembly into +// #defines generating 6a assembly by the Go Authors. + +package main + +import ( + . "github.com/mmcloughlin/avo/build" + . "github.com/mmcloughlin/avo/operand" + . "github.com/mmcloughlin/avo/reg" +) + +//go:generate go run . -out ../md5block_amd64.s -pkg md5 + +func main() { + Package("crypto/md5") + ConstraintExpr("!purego") + block() + Generate() +} + +// MD5 optimized for AMD64. +// +// Author: Marc Bevand +// Licence: I hereby disclaim the copyright on this code and place it +// in the public domain. +func block() { + Implement("block") + Attributes(NOSPLIT) + AllocLocal(8) + + Load(Param("dig"), RBP) + Load(Param("p").Base(), RSI) + Load(Param("p").Len(), RDX) + SHRQ(Imm(6), RDX) + SHLQ(Imm(6), RDX) + + LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI) + MOVL(Mem{Base: BP}.Offset(0*4), EAX) + MOVL(Mem{Base: BP}.Offset(1*4), EBX) + MOVL(Mem{Base: BP}.Offset(2*4), ECX) + MOVL(Mem{Base: BP}.Offset(3*4), EDX) + MOVL(Imm(0xffffffff), R11L) + + CMPQ(RSI, RDI) + JEQ(LabelRef("end")) + + loop() + end() +} + +func loop() { + Label("loop") + MOVL(EAX, R12L) + MOVL(EBX, R13L) + MOVL(ECX, R14L) + MOVL(EDX, R15L) + + MOVL(Mem{Base: SI}.Offset(0*4), R8L) + MOVL(EDX, R9L) + + ROUND1(EAX, EBX, ECX, EDX, 1, 0xd76aa478, 7) + ROUND1(EDX, EAX, EBX, ECX, 2, 0xe8c7b756, 12) + ROUND1(ECX, EDX, EAX, EBX, 3, 0x242070db, 17) + ROUND1(EBX, ECX, EDX, EAX, 4, 0xc1bdceee, 22) + ROUND1(EAX, EBX, ECX, EDX, 5, 0xf57c0faf, 7) + ROUND1(EDX, EAX, EBX, ECX, 6, 0x4787c62a, 12) + ROUND1(ECX, EDX, EAX, EBX, 7, 0xa8304613, 17) + ROUND1(EBX, ECX, EDX, EAX, 8, 0xfd469501, 22) + ROUND1(EAX, EBX, ECX, EDX, 9, 0x698098d8, 7) + ROUND1(EDX, EAX, EBX, ECX, 10, 0x8b44f7af, 12) + ROUND1(ECX, EDX, EAX, EBX, 11, 0xffff5bb1, 17) + ROUND1(EBX, ECX, EDX, EAX, 12, 0x895cd7be, 22) + ROUND1(EAX, EBX, ECX, EDX, 13, 0x6b901122, 7) + ROUND1(EDX, EAX, EBX, ECX, 14, 0xfd987193, 12) + ROUND1(ECX, EDX, EAX, EBX, 15, 0xa679438e, 17) + ROUND1(EBX, ECX, EDX, EAX, 1, 0x49b40821, 22) + + MOVL(EDX, R9L) + MOVL(EDX, R10L) + + ROUND2(EAX, EBX, ECX, EDX, 6, 0xf61e2562, 5) + ROUND2(EDX, EAX, EBX, ECX, 11, 0xc040b340, 9) + ROUND2(ECX, EDX, EAX, EBX, 0, 0x265e5a51, 14) + ROUND2(EBX, ECX, EDX, EAX, 5, 0xe9b6c7aa, 20) + ROUND2(EAX, EBX, ECX, EDX, 10, 0xd62f105d, 5) + ROUND2(EDX, EAX, EBX, ECX, 15, 0x2441453, 9) + ROUND2(ECX, EDX, EAX, EBX, 4, 0xd8a1e681, 14) + ROUND2(EBX, ECX, EDX, EAX, 9, 0xe7d3fbc8, 20) + ROUND2(EAX, EBX, ECX, EDX, 14, 0x21e1cde6, 5) + ROUND2(EDX, EAX, EBX, ECX, 3, 0xc33707d6, 9) + ROUND2(ECX, EDX, EAX, EBX, 8, 0xf4d50d87, 14) + ROUND2(EBX, ECX, EDX, EAX, 13, 0x455a14ed, 20) + ROUND2(EAX, EBX, ECX, EDX, 2, 0xa9e3e905, 5) + ROUND2(EDX, EAX, EBX, ECX, 7, 0xfcefa3f8, 9) + ROUND2(ECX, EDX, EAX, EBX, 12, 0x676f02d9, 14) + ROUND2(EBX, ECX, EDX, EAX, 5, 0x8d2a4c8a, 20) + + MOVL(ECX, R9L) + + ROUND3FIRST(EAX, EBX, ECX, EDX, 8, 0xfffa3942, 4) + ROUND3(EDX, EAX, EBX, ECX, 11, 0x8771f681, 11) + ROUND3(ECX, EDX, EAX, EBX, 14, 0x6d9d6122, 16) + ROUND3(EBX, ECX, EDX, EAX, 1, 0xfde5380c, 23) + ROUND3(EAX, EBX, ECX, EDX, 4, 0xa4beea44, 4) + ROUND3(EDX, EAX, EBX, ECX, 7, 0x4bdecfa9, 11) + ROUND3(ECX, EDX, EAX, EBX, 10, 0xf6bb4b60, 16) + ROUND3(EBX, ECX, EDX, EAX, 13, 0xbebfbc70, 23) + ROUND3(EAX, EBX, ECX, EDX, 0, 0x289b7ec6, 4) + ROUND3(EDX, EAX, EBX, ECX, 3, 0xeaa127fa, 11) + ROUND3(ECX, EDX, EAX, EBX, 6, 0xd4ef3085, 16) + ROUND3(EBX, ECX, EDX, EAX, 9, 0x4881d05, 23) + ROUND3(EAX, EBX, ECX, EDX, 12, 0xd9d4d039, 4) + ROUND3(EDX, EAX, EBX, ECX, 15, 0xe6db99e5, 11) + ROUND3(ECX, EDX, EAX, EBX, 2, 0x1fa27cf8, 16) + ROUND3(EBX, ECX, EDX, EAX, 0, 0xc4ac5665, 23) + + MOVL(R11L, R9L) + XORL(EDX, R9L) + + ROUND4(EAX, EBX, ECX, EDX, 7, 0xf4292244, 6) + ROUND4(EDX, EAX, EBX, ECX, 14, 0x432aff97, 10) + ROUND4(ECX, EDX, EAX, EBX, 5, 0xab9423a7, 15) + ROUND4(EBX, ECX, EDX, EAX, 12, 0xfc93a039, 21) + ROUND4(EAX, EBX, ECX, EDX, 3, 0x655b59c3, 6) + ROUND4(EDX, EAX, EBX, ECX, 10, 0x8f0ccc92, 10) + ROUND4(ECX, EDX, EAX, EBX, 1, 0xffeff47d, 15) + ROUND4(EBX, ECX, EDX, EAX, 8, 0x85845dd1, 21) + ROUND4(EAX, EBX, ECX, EDX, 15, 0x6fa87e4f, 6) + ROUND4(EDX, EAX, EBX, ECX, 6, 0xfe2ce6e0, 10) + ROUND4(ECX, EDX, EAX, EBX, 13, 0xa3014314, 15) + ROUND4(EBX, ECX, EDX, EAX, 4, 0x4e0811a1, 21) + ROUND4(EAX, EBX, ECX, EDX, 11, 0xf7537e82, 6) + ROUND4(EDX, EAX, EBX, ECX, 2, 0xbd3af235, 10) + ROUND4(ECX, EDX, EAX, EBX, 9, 0x2ad7d2bb, 15) + ROUND4(EBX, ECX, EDX, EAX, 0, 0xeb86d391, 21) + + ADDL(R12L, EAX) + ADDL(R13L, EBX) + ADDL(R14L, ECX) + ADDL(R15L, EDX) + + ADDQ(Imm(64), RSI) + CMPQ(RSI, RDI) + JB(LabelRef("loop")) +} + +func end() { + Label("end") + MOVL(EAX, Mem{Base: BP}.Offset(0*4)) + MOVL(EBX, Mem{Base: BP}.Offset(1*4)) + MOVL(ECX, Mem{Base: BP}.Offset(2*4)) + MOVL(EDX, Mem{Base: BP}.Offset(3*4)) + RET() +} + +func ROUND1(a, b, c, d GPPhysical, index int, konst, shift uint64) { + XORL(c, R9L) + ADDL(Imm(konst), a) + ADDL(R8L, a) + ANDL(b, R9L) + XORL(d, R9L) + MOVL(Mem{Base: SI}.Offset(index*4), R8L) + ADDL(R9L, a) + ROLL(Imm(shift), a) + MOVL(c, R9L) + ADDL(b, a) +} + +// Uses https://github.com/animetosho/md5-optimisation#dependency-shortcut-in-g-function +func ROUND2(a, b, c, d GPPhysical, index int, konst, shift uint64) { + XORL(R11L, R9L) + ADDL(Imm(konst), a) + ADDL(R8L, a) + ANDL(b, R10L) + ANDL(c, R9L) + MOVL(Mem{Base: SI}.Offset(index*4), R8L) + ADDL(R9L, a) + ADDL(R10L, a) + MOVL(c, R9L) + MOVL(c, R10L) + ROLL(Imm(shift), a) + ADDL(b, a) +} + +// Uses https://github.com/animetosho/md5-optimisation#h-function-re-use +func ROUND3FIRST(a, b, c, d GPPhysical, index int, konst, shift uint64) { + MOVL(d, R9L) + XORL(c, R9L) + XORL(b, R9L) + ADDL(Imm(konst), a) + ADDL(R8L, a) + MOVL(Mem{Base: SI}.Offset(index*4), R8L) + ADDL(R9L, a) + ROLL(Imm(shift), a) + ADDL(b, a) +} + +func ROUND3(a, b, c, d GPPhysical, index int, konst, shift uint64) { + XORL(a, R9L) + XORL(b, R9L) + ADDL(Imm(konst), a) + ADDL(R8L, a) + MOVL(Mem{Base: SI}.Offset(index*4), R8L) + ADDL(R9L, a) + ROLL(Imm(shift), a) + ADDL(b, a) +} + +func ROUND4(a, b, c, d GPPhysical, index int, konst, shift uint64) { + ADDL(Imm(konst), a) + ADDL(R8L, a) + ORL(b, R9L) + XORL(c, R9L) + ADDL(R9L, a) + MOVL(Mem{Base: SI}.Offset(index*4), R8L) + MOVL(Imm(0xffffffff), R9L) + ROLL(Imm(shift), a) + XORL(c, R9L) + ADDL(b, a) +} diff --git a/src/crypto/md5/md5block_amd64.s b/src/crypto/md5/md5block_amd64.s index 652506ae27f..979eb22dbf7 100644 --- a/src/crypto/md5/md5block_amd64.s +++ b/src/crypto/md5/md5block_amd64.s @@ -1,197 +1,689 @@ -// Original source: -// http://www.zorinaq.com/papers/md5-amd64.html -// http://www.zorinaq.com/papers/md5-amd64.tar.bz2 -// -// Translated from Perl generating GNU assembly into -// #defines generating 6a assembly by the Go Authors. +// Code generated by command: go run md5block_amd64_asm.go -out ../md5block_amd64.s -pkg md5. DO NOT EDIT. //go:build !purego #include "textflag.h" -// MD5 optimized for AMD64. -// -// Author: Marc Bevand -// Licence: I hereby disclaim the copyright on this code and place it -// in the public domain. - -TEXT ·block(SB),NOSPLIT,$8-32 - MOVQ dig+0(FP), BP - MOVQ p+8(FP), SI - MOVQ p_len+16(FP), DX - SHRQ $6, DX - SHLQ $6, DX - - LEAQ (SI)(DX*1), DI - MOVL (0*4)(BP), AX - MOVL (1*4)(BP), BX - MOVL (2*4)(BP), CX - MOVL (3*4)(BP), DX - MOVL $0xffffffff, R11 - - CMPQ SI, DI - JEQ end +// func block(dig *digest, p []byte) +TEXT ·block(SB), NOSPLIT, $8-32 + MOVQ dig+0(FP), BP + MOVQ p_base+8(FP), SI + MOVQ p_len+16(FP), DX + SHRQ $0x06, DX + SHLQ $0x06, DX + LEAQ (SI)(DX*1), DI + MOVL (BP), AX + MOVL 4(BP), BX + MOVL 8(BP), CX + MOVL 12(BP), DX + MOVL $0xffffffff, R11 + CMPQ SI, DI + JEQ end loop: - MOVL AX, R12 - MOVL BX, R13 - MOVL CX, R14 - MOVL DX, R15 - - MOVL (0*4)(SI), R8 - MOVL DX, R9 - -#define ROUND1(a, b, c, d, index, const, shift) \ - XORL c, R9; \ - ADDL $const, a; \ - ADDL R8, a; \ - ANDL b, R9; \ - XORL d, R9; \ - MOVL (index*4)(SI), R8; \ - ADDL R9, a; \ - ROLL $shift, a; \ - MOVL c, R9; \ - ADDL b, a - - ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7); - ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12); - ROUND1(CX,DX,AX,BX, 3,0x242070db,17); - ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22); - ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7); - ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12); - ROUND1(CX,DX,AX,BX, 7,0xa8304613,17); - ROUND1(BX,CX,DX,AX, 8,0xfd469501,22); - ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7); - ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12); - ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17); - ROUND1(BX,CX,DX,AX,12,0x895cd7be,22); - ROUND1(AX,BX,CX,DX,13,0x6b901122, 7); - ROUND1(DX,AX,BX,CX,14,0xfd987193,12); - ROUND1(CX,DX,AX,BX,15,0xa679438e,17); - ROUND1(BX,CX,DX,AX, 1,0x49b40821,22); - - MOVL DX, R9 - MOVL DX, R10 - -// Uses https://github.com/animetosho/md5-optimisation#dependency-shortcut-in-g-function - -#define ROUND2(a, b, c, d, index, const, shift) \ - XORL R11, R9; \ - ADDL $const, a; \ - ADDL R8, a; \ - ANDL b, R10; \ - ANDL c, R9; \ - MOVL (index*4)(SI),R8; \ - ADDL R9, a; \ - ADDL R10, a; \ - MOVL c, R9; \ - MOVL c, R10; \ - ROLL $shift, a; \ - ADDL b, a - - ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5); - ROUND2(DX,AX,BX,CX,11,0xc040b340, 9); - ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14); - ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20); - ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5); - ROUND2(DX,AX,BX,CX,15, 0x2441453, 9); - ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14); - ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20); - ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5); - ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9); - ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14); - ROUND2(BX,CX,DX,AX,13,0x455a14ed,20); - ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5); - ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9); - ROUND2(CX,DX,AX,BX,12,0x676f02d9,14); - ROUND2(BX,CX,DX,AX, 5,0x8d2a4c8a,20); - - MOVL CX, R9 - -// Uses https://github.com/animetosho/md5-optimisation#h-function-re-use - -#define ROUND3FIRST(a, b, c, d, index, const, shift) \ - MOVL d, R9; \ - XORL c, R9; \ - XORL b, R9; \ - ADDL $const, a; \ - ADDL R8, a; \ - MOVL (index*4)(SI),R8; \ - ADDL R9, a; \ - ROLL $shift, a; \ - ADDL b, a - -#define ROUND3(a, b, c, d, index, const, shift) \ - XORL a, R9; \ - XORL b, R9; \ - ADDL $const, a; \ - ADDL R8, a; \ - MOVL (index*4)(SI),R8; \ - ADDL R9, a; \ - ROLL $shift, a; \ - ADDL b, a - - ROUND3FIRST(AX,BX,CX,DX, 8,0xfffa3942, 4); - ROUND3(DX,AX,BX,CX,11,0x8771f681,11); - ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16); - ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23); - ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4); - ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11); - ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16); - ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23); - ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4); - ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11); - ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16); - ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23); - ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4); - ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11); - ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16); - ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23); - - MOVL R11, R9 - XORL DX, R9 - -#define ROUND4(a, b, c, d, index, const, shift) \ - ADDL $const, a; \ - ADDL R8, a; \ - ORL b, R9; \ - XORL c, R9; \ - ADDL R9, a; \ - MOVL (index*4)(SI),R8; \ - MOVL $0xffffffff, R9; \ - ROLL $shift, a; \ - XORL c, R9; \ - ADDL b, a - - ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6); - ROUND4(DX,AX,BX,CX,14,0x432aff97,10); - ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15); - ROUND4(BX,CX,DX,AX,12,0xfc93a039,21); - ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6); - ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10); - ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15); - ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21); - ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6); - ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10); - ROUND4(CX,DX,AX,BX,13,0xa3014314,15); - ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21); - ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6); - ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10); - ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15); - ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21); - - ADDL R12, AX - ADDL R13, BX - ADDL R14, CX - ADDL R15, DX - - ADDQ $64, SI - CMPQ SI, DI - JB loop + MOVL AX, R12 + MOVL BX, R13 + MOVL CX, R14 + MOVL DX, R15 + MOVL (SI), R8 + MOVL DX, R9 + XORL CX, R9 + ADDL $0xd76aa478, AX + ADDL R8, AX + ANDL BX, R9 + XORL DX, R9 + MOVL 4(SI), R8 + ADDL R9, AX + ROLL $0x07, AX + MOVL CX, R9 + ADDL BX, AX + XORL BX, R9 + ADDL $0xe8c7b756, DX + ADDL R8, DX + ANDL AX, R9 + XORL CX, R9 + MOVL 8(SI), R8 + ADDL R9, DX + ROLL $0x0c, DX + MOVL BX, R9 + ADDL AX, DX + XORL AX, R9 + ADDL $0x242070db, CX + ADDL R8, CX + ANDL DX, R9 + XORL BX, R9 + MOVL 12(SI), R8 + ADDL R9, CX + ROLL $0x11, CX + MOVL AX, R9 + ADDL DX, CX + XORL DX, R9 + ADDL $0xc1bdceee, BX + ADDL R8, BX + ANDL CX, R9 + XORL AX, R9 + MOVL 16(SI), R8 + ADDL R9, BX + ROLL $0x16, BX + MOVL DX, R9 + ADDL CX, BX + XORL CX, R9 + ADDL $0xf57c0faf, AX + ADDL R8, AX + ANDL BX, R9 + XORL DX, R9 + MOVL 20(SI), R8 + ADDL R9, AX + ROLL $0x07, AX + MOVL CX, R9 + ADDL BX, AX + XORL BX, R9 + ADDL $0x4787c62a, DX + ADDL R8, DX + ANDL AX, R9 + XORL CX, R9 + MOVL 24(SI), R8 + ADDL R9, DX + ROLL $0x0c, DX + MOVL BX, R9 + ADDL AX, DX + XORL AX, R9 + ADDL $0xa8304613, CX + ADDL R8, CX + ANDL DX, R9 + XORL BX, R9 + MOVL 28(SI), R8 + ADDL R9, CX + ROLL $0x11, CX + MOVL AX, R9 + ADDL DX, CX + XORL DX, R9 + ADDL $0xfd469501, BX + ADDL R8, BX + ANDL CX, R9 + XORL AX, R9 + MOVL 32(SI), R8 + ADDL R9, BX + ROLL $0x16, BX + MOVL DX, R9 + ADDL CX, BX + XORL CX, R9 + ADDL $0x698098d8, AX + ADDL R8, AX + ANDL BX, R9 + XORL DX, R9 + MOVL 36(SI), R8 + ADDL R9, AX + ROLL $0x07, AX + MOVL CX, R9 + ADDL BX, AX + XORL BX, R9 + ADDL $0x8b44f7af, DX + ADDL R8, DX + ANDL AX, R9 + XORL CX, R9 + MOVL 40(SI), R8 + ADDL R9, DX + ROLL $0x0c, DX + MOVL BX, R9 + ADDL AX, DX + XORL AX, R9 + ADDL $0xffff5bb1, CX + ADDL R8, CX + ANDL DX, R9 + XORL BX, R9 + MOVL 44(SI), R8 + ADDL R9, CX + ROLL $0x11, CX + MOVL AX, R9 + ADDL DX, CX + XORL DX, R9 + ADDL $0x895cd7be, BX + ADDL R8, BX + ANDL CX, R9 + XORL AX, R9 + MOVL 48(SI), R8 + ADDL R9, BX + ROLL $0x16, BX + MOVL DX, R9 + ADDL CX, BX + XORL CX, R9 + ADDL $0x6b901122, AX + ADDL R8, AX + ANDL BX, R9 + XORL DX, R9 + MOVL 52(SI), R8 + ADDL R9, AX + ROLL $0x07, AX + MOVL CX, R9 + ADDL BX, AX + XORL BX, R9 + ADDL $0xfd987193, DX + ADDL R8, DX + ANDL AX, R9 + XORL CX, R9 + MOVL 56(SI), R8 + ADDL R9, DX + ROLL $0x0c, DX + MOVL BX, R9 + ADDL AX, DX + XORL AX, R9 + ADDL $0xa679438e, CX + ADDL R8, CX + ANDL DX, R9 + XORL BX, R9 + MOVL 60(SI), R8 + ADDL R9, CX + ROLL $0x11, CX + MOVL AX, R9 + ADDL DX, CX + XORL DX, R9 + ADDL $0x49b40821, BX + ADDL R8, BX + ANDL CX, R9 + XORL AX, R9 + MOVL 4(SI), R8 + ADDL R9, BX + ROLL $0x16, BX + MOVL DX, R9 + ADDL CX, BX + MOVL DX, R9 + MOVL DX, R10 + XORL R11, R9 + ADDL $0xf61e2562, AX + ADDL R8, AX + ANDL BX, R10 + ANDL CX, R9 + MOVL 24(SI), R8 + ADDL R9, AX + ADDL R10, AX + MOVL CX, R9 + MOVL CX, R10 + ROLL $0x05, AX + ADDL BX, AX + XORL R11, R9 + ADDL $0xc040b340, DX + ADDL R8, DX + ANDL AX, R10 + ANDL BX, R9 + MOVL 44(SI), R8 + ADDL R9, DX + ADDL R10, DX + MOVL BX, R9 + MOVL BX, R10 + ROLL $0x09, DX + ADDL AX, DX + XORL R11, R9 + ADDL $0x265e5a51, CX + ADDL R8, CX + ANDL DX, R10 + ANDL AX, R9 + MOVL (SI), R8 + ADDL R9, CX + ADDL R10, CX + MOVL AX, R9 + MOVL AX, R10 + ROLL $0x0e, CX + ADDL DX, CX + XORL R11, R9 + ADDL $0xe9b6c7aa, BX + ADDL R8, BX + ANDL CX, R10 + ANDL DX, R9 + MOVL 20(SI), R8 + ADDL R9, BX + ADDL R10, BX + MOVL DX, R9 + MOVL DX, R10 + ROLL $0x14, BX + ADDL CX, BX + XORL R11, R9 + ADDL $0xd62f105d, AX + ADDL R8, AX + ANDL BX, R10 + ANDL CX, R9 + MOVL 40(SI), R8 + ADDL R9, AX + ADDL R10, AX + MOVL CX, R9 + MOVL CX, R10 + ROLL $0x05, AX + ADDL BX, AX + XORL R11, R9 + ADDL $0x02441453, DX + ADDL R8, DX + ANDL AX, R10 + ANDL BX, R9 + MOVL 60(SI), R8 + ADDL R9, DX + ADDL R10, DX + MOVL BX, R9 + MOVL BX, R10 + ROLL $0x09, DX + ADDL AX, DX + XORL R11, R9 + ADDL $0xd8a1e681, CX + ADDL R8, CX + ANDL DX, R10 + ANDL AX, R9 + MOVL 16(SI), R8 + ADDL R9, CX + ADDL R10, CX + MOVL AX, R9 + MOVL AX, R10 + ROLL $0x0e, CX + ADDL DX, CX + XORL R11, R9 + ADDL $0xe7d3fbc8, BX + ADDL R8, BX + ANDL CX, R10 + ANDL DX, R9 + MOVL 36(SI), R8 + ADDL R9, BX + ADDL R10, BX + MOVL DX, R9 + MOVL DX, R10 + ROLL $0x14, BX + ADDL CX, BX + XORL R11, R9 + ADDL $0x21e1cde6, AX + ADDL R8, AX + ANDL BX, R10 + ANDL CX, R9 + MOVL 56(SI), R8 + ADDL R9, AX + ADDL R10, AX + MOVL CX, R9 + MOVL CX, R10 + ROLL $0x05, AX + ADDL BX, AX + XORL R11, R9 + ADDL $0xc33707d6, DX + ADDL R8, DX + ANDL AX, R10 + ANDL BX, R9 + MOVL 12(SI), R8 + ADDL R9, DX + ADDL R10, DX + MOVL BX, R9 + MOVL BX, R10 + ROLL $0x09, DX + ADDL AX, DX + XORL R11, R9 + ADDL $0xf4d50d87, CX + ADDL R8, CX + ANDL DX, R10 + ANDL AX, R9 + MOVL 32(SI), R8 + ADDL R9, CX + ADDL R10, CX + MOVL AX, R9 + MOVL AX, R10 + ROLL $0x0e, CX + ADDL DX, CX + XORL R11, R9 + ADDL $0x455a14ed, BX + ADDL R8, BX + ANDL CX, R10 + ANDL DX, R9 + MOVL 52(SI), R8 + ADDL R9, BX + ADDL R10, BX + MOVL DX, R9 + MOVL DX, R10 + ROLL $0x14, BX + ADDL CX, BX + XORL R11, R9 + ADDL $0xa9e3e905, AX + ADDL R8, AX + ANDL BX, R10 + ANDL CX, R9 + MOVL 8(SI), R8 + ADDL R9, AX + ADDL R10, AX + MOVL CX, R9 + MOVL CX, R10 + ROLL $0x05, AX + ADDL BX, AX + XORL R11, R9 + ADDL $0xfcefa3f8, DX + ADDL R8, DX + ANDL AX, R10 + ANDL BX, R9 + MOVL 28(SI), R8 + ADDL R9, DX + ADDL R10, DX + MOVL BX, R9 + MOVL BX, R10 + ROLL $0x09, DX + ADDL AX, DX + XORL R11, R9 + ADDL $0x676f02d9, CX + ADDL R8, CX + ANDL DX, R10 + ANDL AX, R9 + MOVL 48(SI), R8 + ADDL R9, CX + ADDL R10, CX + MOVL AX, R9 + MOVL AX, R10 + ROLL $0x0e, CX + ADDL DX, CX + XORL R11, R9 + ADDL $0x8d2a4c8a, BX + ADDL R8, BX + ANDL CX, R10 + ANDL DX, R9 + MOVL 20(SI), R8 + ADDL R9, BX + ADDL R10, BX + MOVL DX, R9 + MOVL DX, R10 + ROLL $0x14, BX + ADDL CX, BX + MOVL CX, R9 + MOVL DX, R9 + XORL CX, R9 + XORL BX, R9 + ADDL $0xfffa3942, AX + ADDL R8, AX + MOVL 32(SI), R8 + ADDL R9, AX + ROLL $0x04, AX + ADDL BX, AX + XORL DX, R9 + XORL AX, R9 + ADDL $0x8771f681, DX + ADDL R8, DX + MOVL 44(SI), R8 + ADDL R9, DX + ROLL $0x0b, DX + ADDL AX, DX + XORL CX, R9 + XORL DX, R9 + ADDL $0x6d9d6122, CX + ADDL R8, CX + MOVL 56(SI), R8 + ADDL R9, CX + ROLL $0x10, CX + ADDL DX, CX + XORL BX, R9 + XORL CX, R9 + ADDL $0xfde5380c, BX + ADDL R8, BX + MOVL 4(SI), R8 + ADDL R9, BX + ROLL $0x17, BX + ADDL CX, BX + XORL AX, R9 + XORL BX, R9 + ADDL $0xa4beea44, AX + ADDL R8, AX + MOVL 16(SI), R8 + ADDL R9, AX + ROLL $0x04, AX + ADDL BX, AX + XORL DX, R9 + XORL AX, R9 + ADDL $0x4bdecfa9, DX + ADDL R8, DX + MOVL 28(SI), R8 + ADDL R9, DX + ROLL $0x0b, DX + ADDL AX, DX + XORL CX, R9 + XORL DX, R9 + ADDL $0xf6bb4b60, CX + ADDL R8, CX + MOVL 40(SI), R8 + ADDL R9, CX + ROLL $0x10, CX + ADDL DX, CX + XORL BX, R9 + XORL CX, R9 + ADDL $0xbebfbc70, BX + ADDL R8, BX + MOVL 52(SI), R8 + ADDL R9, BX + ROLL $0x17, BX + ADDL CX, BX + XORL AX, R9 + XORL BX, R9 + ADDL $0x289b7ec6, AX + ADDL R8, AX + MOVL (SI), R8 + ADDL R9, AX + ROLL $0x04, AX + ADDL BX, AX + XORL DX, R9 + XORL AX, R9 + ADDL $0xeaa127fa, DX + ADDL R8, DX + MOVL 12(SI), R8 + ADDL R9, DX + ROLL $0x0b, DX + ADDL AX, DX + XORL CX, R9 + XORL DX, R9 + ADDL $0xd4ef3085, CX + ADDL R8, CX + MOVL 24(SI), R8 + ADDL R9, CX + ROLL $0x10, CX + ADDL DX, CX + XORL BX, R9 + XORL CX, R9 + ADDL $0x04881d05, BX + ADDL R8, BX + MOVL 36(SI), R8 + ADDL R9, BX + ROLL $0x17, BX + ADDL CX, BX + XORL AX, R9 + XORL BX, R9 + ADDL $0xd9d4d039, AX + ADDL R8, AX + MOVL 48(SI), R8 + ADDL R9, AX + ROLL $0x04, AX + ADDL BX, AX + XORL DX, R9 + XORL AX, R9 + ADDL $0xe6db99e5, DX + ADDL R8, DX + MOVL 60(SI), R8 + ADDL R9, DX + ROLL $0x0b, DX + ADDL AX, DX + XORL CX, R9 + XORL DX, R9 + ADDL $0x1fa27cf8, CX + ADDL R8, CX + MOVL 8(SI), R8 + ADDL R9, CX + ROLL $0x10, CX + ADDL DX, CX + XORL BX, R9 + XORL CX, R9 + ADDL $0xc4ac5665, BX + ADDL R8, BX + MOVL (SI), R8 + ADDL R9, BX + ROLL $0x17, BX + ADDL CX, BX + MOVL R11, R9 + XORL DX, R9 + ADDL $0xf4292244, AX + ADDL R8, AX + ORL BX, R9 + XORL CX, R9 + ADDL R9, AX + MOVL 28(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x06, AX + XORL CX, R9 + ADDL BX, AX + ADDL $0x432aff97, DX + ADDL R8, DX + ORL AX, R9 + XORL BX, R9 + ADDL R9, DX + MOVL 56(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0a, DX + XORL BX, R9 + ADDL AX, DX + ADDL $0xab9423a7, CX + ADDL R8, CX + ORL DX, R9 + XORL AX, R9 + ADDL R9, CX + MOVL 20(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0f, CX + XORL AX, R9 + ADDL DX, CX + ADDL $0xfc93a039, BX + ADDL R8, BX + ORL CX, R9 + XORL DX, R9 + ADDL R9, BX + MOVL 48(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x15, BX + XORL DX, R9 + ADDL CX, BX + ADDL $0x655b59c3, AX + ADDL R8, AX + ORL BX, R9 + XORL CX, R9 + ADDL R9, AX + MOVL 12(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x06, AX + XORL CX, R9 + ADDL BX, AX + ADDL $0x8f0ccc92, DX + ADDL R8, DX + ORL AX, R9 + XORL BX, R9 + ADDL R9, DX + MOVL 40(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0a, DX + XORL BX, R9 + ADDL AX, DX + ADDL $0xffeff47d, CX + ADDL R8, CX + ORL DX, R9 + XORL AX, R9 + ADDL R9, CX + MOVL 4(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0f, CX + XORL AX, R9 + ADDL DX, CX + ADDL $0x85845dd1, BX + ADDL R8, BX + ORL CX, R9 + XORL DX, R9 + ADDL R9, BX + MOVL 32(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x15, BX + XORL DX, R9 + ADDL CX, BX + ADDL $0x6fa87e4f, AX + ADDL R8, AX + ORL BX, R9 + XORL CX, R9 + ADDL R9, AX + MOVL 60(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x06, AX + XORL CX, R9 + ADDL BX, AX + ADDL $0xfe2ce6e0, DX + ADDL R8, DX + ORL AX, R9 + XORL BX, R9 + ADDL R9, DX + MOVL 24(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0a, DX + XORL BX, R9 + ADDL AX, DX + ADDL $0xa3014314, CX + ADDL R8, CX + ORL DX, R9 + XORL AX, R9 + ADDL R9, CX + MOVL 52(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0f, CX + XORL AX, R9 + ADDL DX, CX + ADDL $0x4e0811a1, BX + ADDL R8, BX + ORL CX, R9 + XORL DX, R9 + ADDL R9, BX + MOVL 16(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x15, BX + XORL DX, R9 + ADDL CX, BX + ADDL $0xf7537e82, AX + ADDL R8, AX + ORL BX, R9 + XORL CX, R9 + ADDL R9, AX + MOVL 44(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x06, AX + XORL CX, R9 + ADDL BX, AX + ADDL $0xbd3af235, DX + ADDL R8, DX + ORL AX, R9 + XORL BX, R9 + ADDL R9, DX + MOVL 8(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0a, DX + XORL BX, R9 + ADDL AX, DX + ADDL $0x2ad7d2bb, CX + ADDL R8, CX + ORL DX, R9 + XORL AX, R9 + ADDL R9, CX + MOVL 36(SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x0f, CX + XORL AX, R9 + ADDL DX, CX + ADDL $0xeb86d391, BX + ADDL R8, BX + ORL CX, R9 + XORL DX, R9 + ADDL R9, BX + MOVL (SI), R8 + MOVL $0xffffffff, R9 + ROLL $0x15, BX + XORL DX, R9 + ADDL CX, BX + ADDL R12, AX + ADDL R13, BX + ADDL R14, CX + ADDL R15, DX + ADDQ $0x40, SI + CMPQ SI, DI + JB loop end: - MOVL AX, (0*4)(BP) - MOVL BX, (1*4)(BP) - MOVL CX, (2*4)(BP) - MOVL DX, (3*4)(BP) + MOVL AX, (BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) RET diff --git a/src/go/types/stdlib_test.go b/src/go/types/stdlib_test.go index b82fd0ddc1a..d41a3d10df6 100644 --- a/src/go/types/stdlib_test.go +++ b/src/go/types/stdlib_test.go @@ -359,6 +359,7 @@ var excluded = map[string]bool{ // See go.dev/issue/46027: some imports are missing for this submodule. "crypto/internal/bigmod/_asm": true, "crypto/internal/edwards25519/field/_asm": true, + "crypto/md5/_asm": true, "crypto/sha1/_asm": true, "crypto/sha256/_asm": true, "crypto/sha512/_asm": true,