From 107d18299cb6e222ec26f86a28a4454dc1eb8888 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 12 Dec 2013 11:26:36 -0800 Subject: [PATCH] crypto/sha1: Optimise FUNC1 with alternate formulation According to Wikipedia: http://en.wikipedia.org/wiki/SHA-1 there is an alternate formulation for the FUNC1 transform, namely f1 = d xor (b and (c xor d)) instead of f1 = (b and c) or ((not b) and d) This reduces the instruction count of FUNC1 from 6 to 4 and makes about 5% speed improvement on amd64 and suprisingly 17% on 386. amd64 Intel(R) Core(TM) i7 CPU Q 820 @ 1.73GHz: benchmark old ns/op new ns/op delta BenchmarkHash8Bytes 506 499 -1.38% BenchmarkHash1K 3099 2961 -4.45% BenchmarkHash8K 22292 21243 -4.71% benchmark old MB/s new MB/s speedup BenchmarkHash8Bytes 15.80 16.00 1.01x BenchmarkHash1K 330.40 345.82 1.05x BenchmarkHash8K 367.48 385.63 1.05x i386 Intel(R) Core(TM) i7 CPU Q 820 @ 1.73GHz: benchmark old ns/op new ns/op delta BenchmarkHash8Bytes 647 615 -4.95% BenchmarkHash1K 3673 3161 -13.94% BenchmarkHash8K 26141 22374 -14.41% benchmark old MB/s new MB/s speedup BenchmarkHash8Bytes 12.35 13.01 1.05x BenchmarkHash1K 278.74 323.94 1.16x BenchmarkHash8K 313.37 366.13 1.17x The improvements on an Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz were almost identical. R=golang-dev, r, hanwen CC=golang-dev, rsc https://golang.org/cl/19910043 --- src/pkg/crypto/sha1/sha1block_386.s | 10 ++++------ src/pkg/crypto/sha1/sha1block_amd64.s | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/pkg/crypto/sha1/sha1block_386.s b/src/pkg/crypto/sha1/sha1block_386.s index 890b3ae818..688851c31e 100644 --- a/src/pkg/crypto/sha1/sha1block_386.s +++ b/src/pkg/crypto/sha1/sha1block_386.s @@ -46,12 +46,10 @@ ADDL DI, e #define FUNC1(a, b, c, d, e) \ - MOVL b, SI; \ - ANDL c, SI; \ - MOVL b, DI; \ - NOTL DI; \ - ANDL d, DI; \ - ORL SI, DI + MOVL d, DI; \ + XORL c, DI; \ + ANDL b, DI; \ + XORL d, DI #define FUNC2(a, b, c, d, e) \ MOVL b, DI; \ diff --git a/src/pkg/crypto/sha1/sha1block_amd64.s b/src/pkg/crypto/sha1/sha1block_amd64.s index 0bb6c204c5..8ffb9d5d68 100644 --- a/src/pkg/crypto/sha1/sha1block_amd64.s +++ b/src/pkg/crypto/sha1/sha1block_amd64.s @@ -34,12 +34,10 @@ MOVL R10, (((index)&0xf)*4)(SP) #define FUNC1(a, b, c, d, e) \ - MOVL b, R8; \ - ANDL c, R8; \ - MOVL b, R9; \ - NOTL R9; \ - ANDL d, R9; \ - ORL R8, R9 + MOVL d, R9; \ + XORL c, R9; \ + ANDL b, R9; \ + XORL d, R9 #define FUNC2(a, b, c, d, e) \ MOVL b, R9; \