1
0
mirror of https://github.com/golang/go synced 2024-11-08 18:26:14 -07:00
go/test/codegen/rotate.go

280 lines
6.1 KiB
Go
Raw Normal View History

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import "math/bits"
// ------------------- //
// const rotates //
// ------------------- //
func rot64(x uint64) uint64 {
var a uint64
// amd64:"ROLQ\t[$]7"
// ppc64:"ROTL\t[$]7"
// ppc64le:"ROTL\t[$]7"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTRV\t[$]57"
a += x<<7 | x>>57
// amd64:"ROLQ\t[$]8"
// arm64:"ROR\t[$]56"
cmd/compile: optimize shift pairs and masks on s390x Optimize combinations of left and right shifts by a constant value into a 'rotate then insert selected bits [into zero]' instruction. Use the same instruction for contiguous masks since it has some benefits over 'and immediate' (not restricted to 32-bits, does not overwrite source register). To keep the complexity of this change under control I've only implemented 64 bit operations for now. There are a lot more optimizations that can be done with this instruction family. However, since their function overlaps with other instructions we need to be somewhat careful not to break existing optimization rules by creating optimization dead ends. This is particularly true of the load/store merging rules which contain lots of zero extensions and shifts. This CL does interfere with the store merging rules when an operand is shifted left before it is stored: binary.BigEndian.PutUint64(b, x << 1) This is unfortunate but it's not critical and somewhat complex so I plan to fix that in a follow up CL. file before after Δ % addr2line 4117446 4117282 -164 -0.004% api 4945184 4942752 -2432 -0.049% asm 4998079 4991891 -6188 -0.124% buildid 2685158 2684074 -1084 -0.040% cgo 4553732 4553394 -338 -0.007% compile 19294446 19245070 -49376 -0.256% cover 4897105 4891319 -5786 -0.118% dist 3544389 3542785 -1604 -0.045% doc 3926795 3927617 +822 +0.021% fix 3302958 3293868 -9090 -0.275% link 6546274 6543456 -2818 -0.043% nm 4102021 4100825 -1196 -0.029% objdump 4542431 4548483 +6052 +0.133% pack 2482465 2416389 -66076 -2.662% pprof 13366541 13363915 -2626 -0.020% test2json 2829007 2761515 -67492 -2.386% trace 10216164 10219684 +3520 +0.034% vet 6773956 6773572 -384 -0.006% total 107124151 106917891 -206260 -0.193% Change-Id: I7591cce41e06867ba10a745daae9333513062746 Reviewed-on: https://go-review.googlesource.com/c/go/+/233317 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Trust: Michael Munday <mike.munday@ibm.com>
2020-05-11 10:44:48 -06:00
// s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
// ppc64:"ROTL\t[$]8"
// ppc64le:"ROTL\t[$]8"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTRV\t[$]56"
a += x<<8 + x>>56
// amd64:"ROLQ\t[$]9"
// arm64:"ROR\t[$]55"
cmd/compile: optimize shift pairs and masks on s390x Optimize combinations of left and right shifts by a constant value into a 'rotate then insert selected bits [into zero]' instruction. Use the same instruction for contiguous masks since it has some benefits over 'and immediate' (not restricted to 32-bits, does not overwrite source register). To keep the complexity of this change under control I've only implemented 64 bit operations for now. There are a lot more optimizations that can be done with this instruction family. However, since their function overlaps with other instructions we need to be somewhat careful not to break existing optimization rules by creating optimization dead ends. This is particularly true of the load/store merging rules which contain lots of zero extensions and shifts. This CL does interfere with the store merging rules when an operand is shifted left before it is stored: binary.BigEndian.PutUint64(b, x << 1) This is unfortunate but it's not critical and somewhat complex so I plan to fix that in a follow up CL. file before after Δ % addr2line 4117446 4117282 -164 -0.004% api 4945184 4942752 -2432 -0.049% asm 4998079 4991891 -6188 -0.124% buildid 2685158 2684074 -1084 -0.040% cgo 4553732 4553394 -338 -0.007% compile 19294446 19245070 -49376 -0.256% cover 4897105 4891319 -5786 -0.118% dist 3544389 3542785 -1604 -0.045% doc 3926795 3927617 +822 +0.021% fix 3302958 3293868 -9090 -0.275% link 6546274 6543456 -2818 -0.043% nm 4102021 4100825 -1196 -0.029% objdump 4542431 4548483 +6052 +0.133% pack 2482465 2416389 -66076 -2.662% pprof 13366541 13363915 -2626 -0.020% test2json 2829007 2761515 -67492 -2.386% trace 10216164 10219684 +3520 +0.034% vet 6773956 6773572 -384 -0.006% total 107124151 106917891 -206260 -0.193% Change-Id: I7591cce41e06867ba10a745daae9333513062746 Reviewed-on: https://go-review.googlesource.com/c/go/+/233317 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Trust: Michael Munday <mike.munday@ibm.com>
2020-05-11 10:44:48 -06:00
// s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
// ppc64:"ROTL\t[$]9"
// ppc64le:"ROTL\t[$]9"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTRV\t[$]55"
a += x<<9 ^ x>>55
// amd64:"ROLQ\t[$]10"
// arm64:"ROR\t[$]54"
// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
// ppc64:"ROTL\t[$]10"
// ppc64le:"ROTL\t[$]10"
// arm64:"ROR\t[$]54"
// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTRV\t[$]54"
a += bits.RotateLeft64(x, 10)
return a
}
func rot32(x uint32) uint32 {
var a uint32
// amd64:"ROLL\t[$]7"
// arm:"MOVW\tR\\d+@>25"
// ppc64:"ROTLW\t[$]7"
// ppc64le:"ROTLW\t[$]7"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTR\t[$]25"
a += x<<7 | x>>25
// amd64:`ROLL\t[$]8`
// arm:"MOVW\tR\\d+@>24"
// arm64:"RORW\t[$]24"
// s390x:"RLL\t[$]8"
// ppc64:"ROTLW\t[$]8"
// ppc64le:"ROTLW\t[$]8"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTR\t[$]24"
a += x<<8 + x>>24
// amd64:"ROLL\t[$]9"
// arm:"MOVW\tR\\d+@>23"
// arm64:"RORW\t[$]23"
// s390x:"RLL\t[$]9"
// ppc64:"ROTLW\t[$]9"
// ppc64le:"ROTLW\t[$]9"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTR\t[$]23"
a += x<<9 ^ x>>23
// amd64:"ROLL\t[$]10"
// arm:"MOVW\tR\\d+@>22"
// arm64:"RORW\t[$]22"
// s390x:"RLL\t[$]10"
// ppc64:"ROTLW\t[$]10"
// ppc64le:"ROTLW\t[$]10"
// arm64:"RORW\t[$]22"
// s390x:"RLL\t[$]10"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTR\t[$]22"
a += bits.RotateLeft32(x, 10)
return a
}
func rot16(x uint16) uint16 {
var a uint16
// amd64:"ROLW\t[$]7"
a += x<<7 | x>>9
// amd64:`ROLW\t[$]8`
a += x<<8 + x>>8
// amd64:"ROLW\t[$]9"
a += x<<9 ^ x>>7
return a
}
func rot8(x uint8) uint8 {
var a uint8
// amd64:"ROLB\t[$]5"
a += x<<5 | x>>3
// amd64:`ROLB\t[$]6`
a += x<<6 + x>>2
// amd64:"ROLB\t[$]7"
a += x<<7 ^ x>>1
return a
}
// ----------------------- //
// non-const rotates //
// ----------------------- //
func rot64nc(x uint64, z uint) uint64 {
var a uint64
z &= 63
// amd64:"ROLQ",-"AND"
// arm64:"ROR","NEG",-"AND"
// ppc64:"ROTL",-"NEG",-"AND"
// ppc64le:"ROTL",-"NEG",-"AND"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTRV", -"AND"
a += x<<z | x>>(64-z)
// amd64:"RORQ",-"AND"
// arm64:"ROR",-"NEG",-"AND"
// ppc64:"ROTL","NEG",-"AND"
// ppc64le:"ROTL","NEG",-"AND"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTRV", -"AND"
a += x>>z | x<<(64-z)
return a
}
func rot32nc(x uint32, z uint) uint32 {
var a uint32
z &= 31
// amd64:"ROLL",-"AND"
// arm64:"ROR","NEG",-"AND"
// ppc64:"ROTLW",-"NEG",-"AND"
// ppc64le:"ROTLW",-"NEG",-"AND"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTR", -"AND"
a += x<<z | x>>(32-z)
// amd64:"RORL",-"AND"
// arm64:"ROR",-"NEG",-"AND"
// ppc64:"ROTLW","NEG",-"AND"
// ppc64le:"ROTLW","NEG",-"AND"
cmd/compile: intrinsify RotateLeft{32,64} on loong64 Benchmark on crypto/sha256 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.19µs ± 0% 0.97µs ± 0% -18.75% (p=0.000 n=9+9) Hash8Bytes/Sum224 1.21µs ± 0% 0.97µs ± 0% -20.04% (p=0.000 n=9+10) Hash8Bytes/Sum256 1.21µs ± 0% 0.98µs ± 0% -19.16% (p=0.000 n=10+7) Hash1K/New 15.9µs ± 0% 12.4µs ± 0% -22.10% (p=0.000 n=10+10) Hash1K/Sum224 15.9µs ± 0% 12.4µs ± 0% -22.18% (p=0.000 n=8+10) Hash1K/Sum256 15.9µs ± 0% 12.4µs ± 0% -22.15% (p=0.000 n=10+9) Hash8K/New 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=10+9) Hash8K/Sum224 119µs ± 0% 92µs ± 0% -22.41% (p=0.000 n=9+10) Hash8K/Sum256 119µs ± 0% 92µs ± 0% -22.40% (p=0.000 n=9+9) name old speed new speed delta Hash8Bytes/New 6.70MB/s ± 0% 8.25MB/s ± 0% +23.13% (p=0.000 n=10+10) Hash8Bytes/Sum224 6.60MB/s ± 0% 8.26MB/s ± 0% +25.06% (p=0.000 n=10+10) Hash8Bytes/Sum256 6.59MB/s ± 0% 8.15MB/s ± 0% +23.67% (p=0.000 n=10+7) Hash1K/New 64.3MB/s ± 0% 82.5MB/s ± 0% +28.36% (p=0.000 n=10+10) Hash1K/Sum224 64.3MB/s ± 0% 82.6MB/s ± 0% +28.51% (p=0.000 n=10+10) Hash1K/Sum256 64.3MB/s ± 0% 82.6MB/s ± 0% +28.46% (p=0.000 n=9+9) Hash8K/New 69.0MB/s ± 0% 89.0MB/s ± 0% +28.87% (p=0.000 n=10+8) Hash8K/Sum224 69.0MB/s ± 0% 89.0MB/s ± 0% +28.88% (p=0.000 n=9+10) Hash8K/Sum256 69.0MB/s ± 0% 88.9MB/s ± 0% +28.87% (p=0.000 n=8+9) Benchmark on crypto/sha512 (provided by Xiaodong Liu): name old time/op new time/op delta Hash8Bytes/New 1.55µs ± 0% 1.31µs ± 0% -15.67% (p=0.000 n=10+10) Hash8Bytes/Sum384 1.59µs ± 0% 1.35µs ± 0% -14.97% (p=0.000 n=10+10) Hash8Bytes/Sum512 1.62µs ± 0% 1.39µs ± 0% -14.02% (p=0.000 n=10+10) Hash1K/New 10.7µs ± 0% 8.6µs ± 0% -19.60% (p=0.000 n=8+8) Hash1K/Sum384 10.8µs ± 0% 8.7µs ± 0% -19.40% (p=0.000 n=9+9) Hash1K/Sum512 10.8µs ± 0% 8.7µs ± 0% -19.35% (p=0.000 n=9+10) Hash8K/New 74.6µs ± 0% 59.6µs ± 0% -20.08% (p=0.000 n=10+9) Hash8K/Sum384 74.7µs ± 0% 59.7µs ± 0% -20.04% (p=0.000 n=9+8) Hash8K/Sum512 74.7µs ± 0% 59.7µs ± 0% -20.01% (p=0.000 n=10+10) name old speed new speed delta Hash8Bytes/New 5.16MB/s ± 0% 6.12MB/s ± 0% +18.60% (p=0.000 n=10+8) Hash8Bytes/Sum384 5.02MB/s ± 0% 5.90MB/s ± 0% +17.56% (p=0.000 n=10+10) Hash8Bytes/Sum512 4.94MB/s ± 0% 5.74MB/s ± 0% +16.29% (p=0.000 n=10+9) Hash1K/New 95.4MB/s ± 0% 118.6MB/s ± 0% +24.38% (p=0.000 n=10+10) Hash1K/Sum384 95.0MB/s ± 0% 117.9MB/s ± 0% +24.06% (p=0.000 n=8+9) Hash1K/Sum512 94.8MB/s ± 0% 117.5MB/s ± 0% +23.99% (p=0.000 n=8+9) Hash8K/New 110MB/s ± 0% 137MB/s ± 0% +25.11% (p=0.000 n=9+6) Hash8K/Sum384 110MB/s ± 0% 137MB/s ± 0% +25.07% (p=0.000 n=9+8) Hash8K/Sum512 110MB/s ± 0% 137MB/s ± 0% +25.01% (p=0.000 n=10+10) Change-Id: I28ccfce634659305a336c8e0a3f8589f7361d661 Reviewed-on: https://go-review.googlesource.com/c/go/+/422317 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org> Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: David Chase <drchase@google.com>
2022-08-09 09:53:37 -06:00
// loong64: "ROTR", -"AND"
a += x>>z | x<<(32-z)
return a
}
func rot16nc(x uint16, z uint) uint16 {
var a uint16
z &= 15
// amd64:"ROLW",-"ANDQ"
a += x<<z | x>>(16-z)
// amd64:"RORW",-"ANDQ"
a += x>>z | x<<(16-z)
return a
}
func rot8nc(x uint8, z uint) uint8 {
var a uint8
z &= 7
// amd64:"ROLB",-"ANDQ"
a += x<<z | x>>(8-z)
// amd64:"RORB",-"ANDQ"
a += x>>z | x<<(8-z)
return a
}
// Issue 18254: rotate after inlining
func f32(x uint32) uint32 {
// amd64:"ROLL\t[$]7"
return rot32nc(x, 7)
}
func doubleRotate(x uint64) uint64 {
x = (x << 5) | (x >> 59)
// amd64:"ROLQ\t[$]15"
// arm64:"ROR\t[$]49"
x = (x << 10) | (x >> 54)
return x
}
// --------------------------------------- //
// Combined Rotate + Masking operations //
// --------------------------------------- //
func checkMaskedRotate32(a []uint32, r int) {
i := 0
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
i++
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]8, [$]15, R[0-9]+"
a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
i++
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]27, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
i++
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]24, [$]31, R[0-9]+"
a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]8, [$]15, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16, [$]23, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], r) & 0xFF00
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]20, [$]11, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], r) & 0xFFF00FFF
i++
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]20, [$]11, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 4) & 0xFFF00FFF
i++
}
// combined arithmetic and rotate on arm64
func checkArithmeticWithRotate(a *[1000]uint64) {
// arm64: "AND\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
a[2] = a[1] & bits.RotateLeft64(a[0], 13)
// arm64: "ORR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
a[5] = a[4] | bits.RotateLeft64(a[3], 13)
// arm64: "EOR\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
a[8] = a[7] ^ bits.RotateLeft64(a[6], 13)
// arm64: "MVN\tR[0-9]+@>51, R[0-9]+"
a[10] = ^bits.RotateLeft64(a[9], 13)
// arm64: "BIC\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
a[13] = a[12] &^ bits.RotateLeft64(a[11], 13)
// arm64: "EON\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
a[16] = a[15] ^ ^bits.RotateLeft64(a[14], 13)
// arm64: "ORN\tR[0-9]+@>51, R[0-9]+, R[0-9]+"
a[19] = a[18] | ^bits.RotateLeft64(a[17], 13)
// arm64: "TST\tR[0-9]+@>51, R[0-9]+"
if a[18]&bits.RotateLeft64(a[19], 13) == 0 {
a[20] = 1
}
}