mirror of
https://github.com/golang/go
synced 2024-11-13 13:00:32 -07:00
crypto/md5: implement md5block in hardware on loong64
goos: linux goarch: loong64 pkg: crypto/md5 cpu: Loongson-3A6000 @ 2500.00MHz │ bench.old │ bench.new │ │ sec/op │ sec/op vs base │ Hash8Bytes 276.6n ± 0% 219.7n ± 0% -20.57% (p=0.000 n=20) Hash64 445.8n ± 0% 339.9n ± 0% -23.76% (p=0.000 n=20) Hash128 632.0n ± 0% 468.1n ± 0% -25.93% (p=0.000 n=20) Hash256 1005.0n ± 0% 723.8n ± 0% -27.98% (p=0.000 n=20) Hash512 1.749µ ± 0% 1.238µ ± 0% -29.22% (p=0.000 n=20) Hash1K 3.238µ ± 0% 2.265µ ± 0% -30.05% (p=0.000 n=20) Hash8K 24.09µ ± 0% 16.66µ ± 0% -30.83% (p=0.000 n=20) Hash1M 3.049m ± 0% 2.105m ± 0% -30.97% (p=0.000 n=20) Hash8M 24.39m ± 0% 16.84m ± 0% -30.97% (p=0.000 n=20) Hash8BytesUnaligned 284.1n ± 0% 227.2n ± 0% -20.03% (p=0.000 n=20) Hash1KUnaligned 3.238µ ± 0% 2.265µ ± 0% -30.05% (p=0.000 n=20) Hash8KUnaligned 24.09µ ± 0% 16.66µ ± 0% -30.82% (p=0.000 n=20) geomean 7.142µ 5.164µ -27.70% │ bench.old │ bench.new │ │ B/s │ B/s vs base │ Hash8Bytes 27.58Mi ± 0% 34.73Mi ± 0% +25.93% (p=0.000 n=20) Hash64 136.9Mi ± 0% 179.6Mi ± 0% +31.15% (p=0.000 n=20) Hash128 193.1Mi ± 0% 260.8Mi ± 0% +35.03% (p=0.000 n=20) Hash256 243.0Mi ± 0% 337.3Mi ± 0% +38.82% (p=0.000 n=20) Hash512 279.1Mi ± 0% 394.3Mi ± 0% +41.25% (p=0.000 n=20) Hash1K 301.6Mi ± 0% 431.1Mi ± 0% +42.94% (p=0.000 n=20) Hash8K 324.3Mi ± 0% 468.9Mi ± 0% +44.56% (p=0.000 n=20) Hash1M 327.9Mi ± 0% 475.0Mi ± 0% +44.86% (p=0.000 n=20) Hash8M 328.0Mi ± 0% 475.1Mi ± 0% +44.86% (p=0.000 n=20) Hash8BytesUnaligned 26.86Mi ± 0% 33.58Mi ± 0% +25.04% (p=0.000 n=20) Hash1KUnaligned 301.6Mi ± 0% 431.1Mi ± 0% +42.95% (p=0.000 n=20) Hash8KUnaligned 324.3Mi ± 0% 468.9Mi ± 0% +44.56% (p=0.000 n=20) geomean 182.5Mi 252.4Mi +38.31% goos: linux goarch: loong64 pkg: crypto/md5 cpu: Loongson-3A5000 @ 2500.00MHz │ bench.old │ bench.new │ │ sec/op │ sec/op vs base │ Hash8Bytes 346.0n ± 0% 289.1n ± 0% -16.45% (p=0.000 n=20) Hash64 521.2n ± 0% 409.3n ± 0% -21.47% (p=0.000 n=20) Hash128 707.1n ± 0% 537.8n ± 0% -23.94% (p=0.000 n=20) Hash256 1080.0n ± 0% 795.8n ± 0% -26.31% (p=0.000 n=20) Hash512 1.826µ ± 0% 1.311µ ± 0% -28.20% (p=0.000 n=20) Hash1K 3.315µ ± 0% 2.342µ ± 0% -29.35% (p=0.000 n=20) Hash8K 24.19µ ± 0% 16.78µ ± 0% -30.65% (p=0.000 n=20) Hash1M 3.052m ± 0% 2.110m ± 0% -30.86% (p=0.000 n=20) Hash8M 24.41m ± 0% 16.88m ± 0% -30.85% (p=0.000 n=20) Hash8BytesUnaligned 345.9n ± 0% 289.0n ± 0% -16.45% (p=0.000 n=20) Hash1KUnaligned 3.316µ ± 0% 2.342µ ± 0% -29.37% (p=0.000 n=20) Hash8KUnaligned 24.19µ ± 0% 16.78µ ± 0% -30.66% (p=0.000 n=20) geomean 7.673µ 5.648µ -26.39% │ bench.old │ bench.new │ │ B/s │ B/s vs base │ Hash8Bytes 22.05Mi ± 0% 26.39Mi ± 0% +19.68% (p=0.000 n=20) Hash64 117.1Mi ± 0% 149.1Mi ± 0% +27.32% (p=0.000 n=20) Hash128 172.6Mi ± 0% 227.0Mi ± 0% +31.49% (p=0.000 n=20) Hash256 226.0Mi ± 0% 306.8Mi ± 0% +35.77% (p=0.000 n=20) Hash512 267.4Mi ± 0% 372.5Mi ± 0% +39.26% (p=0.000 n=20) Hash1K 294.6Mi ± 0% 417.0Mi ± 0% +41.53% (p=0.000 n=20) Hash8K 322.9Mi ± 0% 465.7Mi ± 0% +44.20% (p=0.000 n=20) Hash1M 327.7Mi ± 0% 474.0Mi ± 0% +44.64% (p=0.000 n=20) Hash8M 327.8Mi ± 0% 474.1Mi ± 0% +44.62% (p=0.000 n=20) Hash8BytesUnaligned 22.06Mi ± 0% 26.40Mi ± 0% +19.67% (p=0.000 n=20) Hash1KUnaligned 294.5Mi ± 0% 417.0Mi ± 0% +41.60% (p=0.000 n=20) Hash8KUnaligned 322.9Mi ± 0% 465.7Mi ± 0% +44.21% (p=0.000 n=20) geomean 169.9Mi 230.8Mi +35.85% Change-Id: Iffddd60e3fc0b3bb265289f836a2d875f0805f64 Reviewed-on: https://go-review.googlesource.com/c/go/+/589540 Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn> Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
This commit is contained in:
parent
4a9d98d49a
commit
6edc1c23ed
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (amd64 || 386 || arm || ppc64le || ppc64 || s390x || arm64) && !purego
|
||||
//go:build (386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le || s390x) && !purego
|
||||
|
||||
package md5
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (!amd64 && !386 && !arm && !ppc64le && !ppc64 && !s390x && !arm64) || purego
|
||||
//go:build (!386 && !amd64 && !arm && !arm64 && !loong64 && !ppc64 && !ppc64le && !s390x) || purego
|
||||
|
||||
package md5
|
||||
|
||||
|
180
src/crypto/md5/md5block_loong64.s
Normal file
180
src/crypto/md5/md5block_loong64.s
Normal file
@ -0,0 +1,180 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
//
|
||||
// Loong64 version of md5block.go
|
||||
// derived from crypto/md5/md5block_amd64.s
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#define REGTMP R30
|
||||
#define REGTMP1 R12
|
||||
#define REGTMP2 R18
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func block(dig *digest, p []byte)
|
||||
TEXT ·block(SB),NOSPLIT,$0-32
|
||||
MOVV dig+0(FP), R4
|
||||
MOVV p+8(FP), R5
|
||||
MOVV p_len+16(FP), R6
|
||||
AND $~63, R6
|
||||
BEQ R6, zero
|
||||
|
||||
// p_len >= 64
|
||||
ADDV R5, R6, R24
|
||||
MOVW (0*4)(R4), R7
|
||||
MOVW (1*4)(R4), R8
|
||||
MOVW (2*4)(R4), R9
|
||||
MOVW (3*4)(R4), R10
|
||||
|
||||
loop:
|
||||
MOVW R7, R14
|
||||
MOVW R8, R15
|
||||
MOVW R9, R16
|
||||
MOVW R10, R17
|
||||
|
||||
MOVW (0*4)(R5), R11
|
||||
MOVW R10, REGTMP1
|
||||
|
||||
// F = ((c ^ d) & b) ^ d
|
||||
#define ROUND1(a, b, c, d, index, const, shift) \
|
||||
ADDV $const, a; \
|
||||
ADD R11, a; \
|
||||
MOVW (index*4)(R5), R11; \
|
||||
XOR c, REGTMP1; \
|
||||
AND b, REGTMP1; \
|
||||
XOR d, REGTMP1; \
|
||||
ADD REGTMP1, a; \
|
||||
ROTR $(32-shift), a; \
|
||||
MOVW c, REGTMP1; \
|
||||
ADD b, a
|
||||
|
||||
ROUND1(R7, R8, R9, R10, 1, 0xd76aa478, 7);
|
||||
ROUND1(R10, R7, R8, R9, 2, 0xe8c7b756, 12);
|
||||
ROUND1(R9, R10, R7, R8, 3, 0x242070db, 17);
|
||||
ROUND1(R8, R9, R10, R7, 4, 0xc1bdceee, 22);
|
||||
ROUND1(R7, R8, R9, R10, 5, 0xf57c0faf, 7);
|
||||
ROUND1(R10, R7, R8, R9, 6, 0x4787c62a, 12);
|
||||
ROUND1(R9, R10, R7, R8, 7, 0xa8304613, 17);
|
||||
ROUND1(R8, R9, R10, R7, 8, 0xfd469501, 22);
|
||||
ROUND1(R7, R8, R9, R10, 9, 0x698098d8, 7);
|
||||
ROUND1(R10, R7, R8, R9, 10, 0x8b44f7af, 12);
|
||||
ROUND1(R9, R10, R7, R8, 11, 0xffff5bb1, 17);
|
||||
ROUND1(R8, R9, R10, R7, 12, 0x895cd7be, 22);
|
||||
ROUND1(R7, R8, R9, R10, 13, 0x6b901122, 7);
|
||||
ROUND1(R10, R7, R8, R9, 14, 0xfd987193, 12);
|
||||
ROUND1(R9, R10, R7, R8, 15, 0xa679438e, 17);
|
||||
ROUND1(R8, R9, R10, R7, 1, 0x49b40821, 22);
|
||||
|
||||
MOVW (1*4)(R5), R11
|
||||
|
||||
// F = ((b ^ c) & d) ^ c
|
||||
#define ROUND2(a, b, c, d, index, const, shift) \
|
||||
ADDV $const, a; \
|
||||
ADD R11, a; \
|
||||
MOVW (index*4)(R5), R11; \
|
||||
XOR b, c, REGTMP; \
|
||||
AND REGTMP, d, REGTMP; \
|
||||
XOR REGTMP, c, REGTMP; \
|
||||
ADD REGTMP, a; \
|
||||
ROTR $(32-shift), a; \
|
||||
ADD b, a
|
||||
|
||||
ROUND2(R7, R8, R9, R10, 6, 0xf61e2562, 5);
|
||||
ROUND2(R10, R7, R8, R9, 11, 0xc040b340, 9);
|
||||
ROUND2(R9, R10, R7, R8, 0, 0x265e5a51, 14);
|
||||
ROUND2(R8, R9, R10, R7, 5, 0xe9b6c7aa, 20);
|
||||
ROUND2(R7, R8, R9, R10, 10, 0xd62f105d, 5);
|
||||
ROUND2(R10, R7, R8, R9, 15, 0x2441453, 9);
|
||||
ROUND2(R9, R10, R7, R8, 4, 0xd8a1e681, 14);
|
||||
ROUND2(R8, R9, R10, R7, 9, 0xe7d3fbc8, 20);
|
||||
ROUND2(R7, R8, R9, R10, 14, 0x21e1cde6, 5);
|
||||
ROUND2(R10, R7, R8, R9, 3, 0xc33707d6, 9);
|
||||
ROUND2(R9, R10, R7, R8, 8, 0xf4d50d87, 14);
|
||||
ROUND2(R8, R9, R10, R7, 13, 0x455a14ed, 20);
|
||||
ROUND2(R7, R8, R9, R10, 2, 0xa9e3e905, 5);
|
||||
ROUND2(R10, R7, R8, R9, 7, 0xfcefa3f8, 9);
|
||||
ROUND2(R9, R10, R7, R8, 12, 0x676f02d9, 14);
|
||||
ROUND2(R8, R9, R10, R7, 5, 0x8d2a4c8a, 20);
|
||||
|
||||
MOVW (5*4)(R5), R11
|
||||
MOVW R9, REGTMP1
|
||||
|
||||
// F = b ^ c ^ d
|
||||
#define ROUND3(a, b, c, d, index, const, shift) \
|
||||
ADDV $const, a; \
|
||||
ADD R11, a; \
|
||||
MOVW (index*4)(R5), R11; \
|
||||
XOR d, REGTMP1; \
|
||||
XOR b, REGTMP1; \
|
||||
ADD REGTMP1, a; \
|
||||
ROTR $(32-shift), a; \
|
||||
MOVW b, REGTMP1; \
|
||||
ADD b, a
|
||||
|
||||
ROUND3(R7, R8, R9, R10, 8, 0xfffa3942, 4);
|
||||
ROUND3(R10, R7, R8, R9, 11, 0x8771f681, 11);
|
||||
ROUND3(R9, R10, R7, R8, 14, 0x6d9d6122, 16);
|
||||
ROUND3(R8, R9, R10, R7, 1, 0xfde5380c, 23);
|
||||
ROUND3(R7, R8, R9, R10, 4, 0xa4beea44, 4);
|
||||
ROUND3(R10, R7, R8, R9, 7, 0x4bdecfa9, 11);
|
||||
ROUND3(R9, R10, R7, R8, 10, 0xf6bb4b60, 16);
|
||||
ROUND3(R8, R9, R10, R7, 13, 0xbebfbc70, 23);
|
||||
ROUND3(R7, R8, R9, R10, 0, 0x289b7ec6, 4);
|
||||
ROUND3(R10, R7, R8, R9, 3, 0xeaa127fa, 11);
|
||||
ROUND3(R9, R10, R7, R8, 6, 0xd4ef3085, 16);
|
||||
ROUND3(R8, R9, R10, R7, 9, 0x4881d05, 23);
|
||||
ROUND3(R7, R8, R9, R10, 12, 0xd9d4d039, 4);
|
||||
ROUND3(R10, R7, R8, R9, 15, 0xe6db99e5, 11);
|
||||
ROUND3(R9, R10, R7, R8, 2, 0x1fa27cf8, 16);
|
||||
ROUND3(R8, R9, R10, R7, 0, 0xc4ac5665, 23);
|
||||
|
||||
MOVW (0*4)(R5), R11
|
||||
MOVV $0xffffffff, REGTMP2
|
||||
XOR R10, REGTMP2, REGTMP1 // REGTMP1 = ~d
|
||||
|
||||
// F = c ^ (b | (~d))
|
||||
#define ROUND4(a, b, c, d, index, const, shift) \
|
||||
ADDV $const, a; \
|
||||
ADD R11, a; \
|
||||
MOVW (index*4)(R5), R11; \
|
||||
OR b, REGTMP1; \
|
||||
XOR c, REGTMP1; \
|
||||
ADD REGTMP1, a; \
|
||||
ROTR $(32-shift), a; \
|
||||
MOVV $0xffffffff, REGTMP2; \
|
||||
XOR c, REGTMP2, REGTMP1; \
|
||||
ADD b, a
|
||||
|
||||
ROUND4(R7, R8, R9, R10, 7, 0xf4292244, 6);
|
||||
ROUND4(R10, R7, R8, R9, 14, 0x432aff97, 10);
|
||||
ROUND4(R9, R10, R7, R8, 5, 0xab9423a7, 15);
|
||||
ROUND4(R8, R9, R10, R7, 12, 0xfc93a039, 21);
|
||||
ROUND4(R7, R8, R9, R10, 3, 0x655b59c3, 6);
|
||||
ROUND4(R10, R7, R8, R9, 10, 0x8f0ccc92, 10);
|
||||
ROUND4(R9, R10, R7, R8, 1, 0xffeff47d, 15);
|
||||
ROUND4(R8, R9, R10, R7, 8, 0x85845dd1, 21);
|
||||
ROUND4(R7, R8, R9, R10, 15, 0x6fa87e4f, 6);
|
||||
ROUND4(R10, R7, R8, R9, 6, 0xfe2ce6e0, 10);
|
||||
ROUND4(R9, R10, R7, R8, 13, 0xa3014314, 15);
|
||||
ROUND4(R8, R9, R10, R7, 4, 0x4e0811a1, 21);
|
||||
ROUND4(R7, R8, R9, R10, 11, 0xf7537e82, 6);
|
||||
ROUND4(R10, R7, R8, R9, 2, 0xbd3af235, 10);
|
||||
ROUND4(R9, R10, R7, R8, 9, 0x2ad7d2bb, 15);
|
||||
ROUND4(R8, R9, R10, R7, 0, 0xeb86d391, 21);
|
||||
|
||||
ADD R14, R7
|
||||
ADD R15, R8
|
||||
ADD R16, R9
|
||||
ADD R17, R10
|
||||
|
||||
ADDV $64, R5
|
||||
BNE R5, R24, loop
|
||||
|
||||
MOVW R7, (0*4)(R4)
|
||||
MOVW R8, (1*4)(R4)
|
||||
MOVW R9, (2*4)(R4)
|
||||
MOVW R10, (3*4)(R4)
|
||||
zero:
|
||||
RET
|
Loading…
Reference in New Issue
Block a user