mirror of
https://github.com/golang/go
synced 2024-11-22 18:44:54 -07:00
e8f0340fa4
Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
272 lines
5.9 KiB
Go
272 lines
5.9 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "math/bits"
|
|
|
|
// ------------------- //
|
|
// const rotates //
|
|
// ------------------- //
|
|
|
|
func rot64(x uint64) uint64 {
|
|
var a uint64
|
|
|
|
// amd64:"ROLQ\t[$]7"
|
|
// ppc64:"ROTL\t[$]7"
|
|
// ppc64le:"ROTL\t[$]7"
|
|
// loong64: "ROTRV\t[$]57"
|
|
a += x<<7 | x>>57
|
|
|
|
// amd64:"ROLQ\t[$]8"
|
|
// arm64:"ROR\t[$]56"
|
|
// s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
|
|
// ppc64:"ROTL\t[$]8"
|
|
// ppc64le:"ROTL\t[$]8"
|
|
// loong64: "ROTRV\t[$]56"
|
|
a += x<<8 + x>>56
|
|
|
|
// amd64:"ROLQ\t[$]9"
|
|
// arm64:"ROR\t[$]55"
|
|
// s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
|
|
// ppc64:"ROTL\t[$]9"
|
|
// ppc64le:"ROTL\t[$]9"
|
|
// loong64: "ROTRV\t[$]55"
|
|
a += x<<9 ^ x>>55
|
|
|
|
// amd64:"ROLQ\t[$]10"
|
|
// arm64:"ROR\t[$]54"
|
|
// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
|
|
// ppc64:"ROTL\t[$]10"
|
|
// ppc64le:"ROTL\t[$]10"
|
|
// arm64:"ROR\t[$]54"
|
|
// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
|
|
// loong64: "ROTRV\t[$]54"
|
|
a += bits.RotateLeft64(x, 10)
|
|
|
|
return a
|
|
}
|
|
|
|
func rot32(x uint32) uint32 {
|
|
var a uint32
|
|
|
|
// amd64:"ROLL\t[$]7"
|
|
// arm:"MOVW\tR\\d+@>25"
|
|
// ppc64:"ROTLW\t[$]7"
|
|
// ppc64le:"ROTLW\t[$]7"
|
|
// loong64: "ROTR\t[$]25"
|
|
a += x<<7 | x>>25
|
|
|
|
// amd64:`ROLL\t[$]8`
|
|
// arm:"MOVW\tR\\d+@>24"
|
|
// arm64:"RORW\t[$]24"
|
|
// s390x:"RLL\t[$]8"
|
|
// ppc64:"ROTLW\t[$]8"
|
|
// ppc64le:"ROTLW\t[$]8"
|
|
// loong64: "ROTR\t[$]24"
|
|
a += x<<8 + x>>24
|
|
|
|
// amd64:"ROLL\t[$]9"
|
|
// arm:"MOVW\tR\\d+@>23"
|
|
// arm64:"RORW\t[$]23"
|
|
// s390x:"RLL\t[$]9"
|
|
// ppc64:"ROTLW\t[$]9"
|
|
// ppc64le:"ROTLW\t[$]9"
|
|
// loong64: "ROTR\t[$]23"
|
|
a += x<<9 ^ x>>23
|
|
|
|
// amd64:"ROLL\t[$]10"
|
|
// arm:"MOVW\tR\\d+@>22"
|
|
// arm64:"RORW\t[$]22"
|
|
// s390x:"RLL\t[$]10"
|
|
// ppc64:"ROTLW\t[$]10"
|
|
// ppc64le:"ROTLW\t[$]10"
|
|
// arm64:"RORW\t[$]22"
|
|
// s390x:"RLL\t[$]10"
|
|
// loong64: "ROTR\t[$]22"
|
|
a += bits.RotateLeft32(x, 10)
|
|
|
|
return a
|
|
}
|
|
|
|
func rot16(x uint16) uint16 {
|
|
var a uint16
|
|
|
|
// amd64:"ROLW\t[$]7"
|
|
a += x<<7 | x>>9
|
|
|
|
// amd64:`ROLW\t[$]8`
|
|
a += x<<8 + x>>8
|
|
|
|
// amd64:"ROLW\t[$]9"
|
|
a += x<<9 ^ x>>7
|
|
|
|
return a
|
|
}
|
|
|
|
func rot8(x uint8) uint8 {
|
|
var a uint8
|
|
|
|
// amd64:"ROLB\t[$]5"
|
|
a += x<<5 | x>>3
|
|
|
|
// amd64:`ROLB\t[$]6`
|
|
a += x<<6 + x>>2
|
|
|
|
// amd64:"ROLB\t[$]7"
|
|
a += x<<7 ^ x>>1
|
|
|
|
return a
|
|
}
|
|
|
|
// ----------------------- //
|
|
// non-const rotates //
|
|
// ----------------------- //
|
|
|
|
func rot64nc(x uint64, z uint) uint64 {
|
|
var a uint64
|
|
|
|
z &= 63
|
|
|
|
// amd64:"ROLQ",-"AND"
|
|
// arm64:"ROR","NEG",-"AND"
|
|
// ppc64:"ROTL",-"NEG",-"AND"
|
|
// ppc64le:"ROTL",-"NEG",-"AND"
|
|
// loong64: "ROTRV", -"AND"
|
|
a += x<<z | x>>(64-z)
|
|
|
|
// amd64:"RORQ",-"AND"
|
|
// arm64:"ROR",-"NEG",-"AND"
|
|
// ppc64:"ROTL","NEG",-"AND"
|
|
// ppc64le:"ROTL","NEG",-"AND"
|
|
// loong64: "ROTRV", -"AND"
|
|
a += x>>z | x<<(64-z)
|
|
|
|
return a
|
|
}
|
|
|
|
func rot32nc(x uint32, z uint) uint32 {
|
|
var a uint32
|
|
|
|
z &= 31
|
|
|
|
// amd64:"ROLL",-"AND"
|
|
// arm64:"ROR","NEG",-"AND"
|
|
// ppc64:"ROTLW",-"NEG",-"AND"
|
|
// ppc64le:"ROTLW",-"NEG",-"AND"
|
|
// loong64: "ROTR", -"AND"
|
|
a += x<<z | x>>(32-z)
|
|
|
|
// amd64:"RORL",-"AND"
|
|
// arm64:"ROR",-"NEG",-"AND"
|
|
// ppc64:"ROTLW","NEG",-"AND"
|
|
// ppc64le:"ROTLW","NEG",-"AND"
|
|
// loong64: "ROTR", -"AND"
|
|
a += x>>z | x<<(32-z)
|
|
|
|
return a
|
|
}
|
|
|
|
func rot16nc(x uint16, z uint) uint16 {
|
|
var a uint16
|
|
|
|
z &= 15
|
|
|
|
// amd64:"ROLW",-"ANDQ"
|
|
a += x<<z | x>>(16-z)
|
|
|
|
// amd64:"RORW",-"ANDQ"
|
|
a += x>>z | x<<(16-z)
|
|
|
|
return a
|
|
}
|
|
|
|
func rot8nc(x uint8, z uint) uint8 {
|
|
var a uint8
|
|
|
|
z &= 7
|
|
|
|
// amd64:"ROLB",-"ANDQ"
|
|
a += x<<z | x>>(8-z)
|
|
|
|
// amd64:"RORB",-"ANDQ"
|
|
a += x>>z | x<<(8-z)
|
|
|
|
return a
|
|
}
|
|
|
|
// Issue 18254: rotate after inlining
|
|
func f32(x uint32) uint32 {
|
|
// amd64:"ROLL\t[$]7"
|
|
return rot32nc(x, 7)
|
|
}
|
|
|
|
// --------------------------------------- //
|
|
// Combined Rotate + Masking operations //
|
|
// --------------------------------------- //
|
|
|
|
func checkMaskedRotate32(a []uint32, r int) {
|
|
i := 0
|
|
|
|
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
|
|
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
|
|
i++
|
|
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
|
|
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
|
|
i++
|
|
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
|
|
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
|
|
i++
|
|
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
|
|
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
|
|
i++
|
|
|
|
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
|
|
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
|
|
i++
|
|
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
|
|
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[3], r) & 0xFF00
|
|
i++
|
|
|
|
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
|
|
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF
|
|
i++
|
|
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
|
|
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
|
|
a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF
|
|
i++
|
|
}
|
|
|
|
// combined arithmetic and rotate on arm64
|
|
func checkArithmeticWithRotate(a *[1000]uint64) {
|
|
// arm64: "AND\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
|
|
a[2] = a[1] & bits.RotateLeft64(a[0], 13)
|
|
// arm64: "ORR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
|
|
a[5] = a[4] | bits.RotateLeft64(a[3], 13)
|
|
// arm64: "EOR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
|
|
a[8] = a[7] ^ bits.RotateLeft64(a[6], 13)
|
|
// arm64: "MVN\tR[0-9]+@>51, R[0-9]+"
|
|
a[10] = ^bits.RotateLeft64(a[9], 13)
|
|
// arm64: "BIC\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
|
|
a[13] = a[12] &^ bits.RotateLeft64(a[11], 13)
|
|
// arm64: "EON\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
|
|
a[16] = a[15] ^ ^bits.RotateLeft64(a[14], 13)
|
|
// arm64: "ORN\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
|
|
a[19] = a[18] | ^bits.RotateLeft64(a[17], 13)
|
|
// arm64: "TST\tR[0-9]+@>51, R[0-9]+"
|
|
if a[18]&bits.RotateLeft64(a[19], 13) == 0 {
|
|
a[20] = 1
|
|
}
|
|
|
|
}
|