1
0
mirror of https://github.com/golang/go synced 2024-11-23 20:50:04 -07:00
go/test/codegen/copy.go
Lynn Boger 815509ae31 cmd/compile: improve lowered moves and zeros for ppc64le
This change includes the following:
- Generate LXV/STXV sequences instead of LXVD2X/STXVD2X on power9.
These instructions do not require an index register, which
allows more loads and stores within a loop without initializing
multiple index registers. The LoweredQuadXXX generate LXV/STXV.
- Create LoweredMoveXXXShort and LoweredZeroXXXShort for short
moves that don't generate loops, and therefore don't clobber the
address registers or flags.
- Use registers other than R3 and R4 to avoid conflicting with
registers that have already been allocated to avoid unnecessary
register moves.
- Eliminate the use of R14 as scratch register and use R31
instead.
- Add PCALIGN when the LoweredMoveXXX or LoweredZeroXXX generates a
loop with more than 3 iterations.

This performance opportunity was noticed in github.com/golang/snappy
benchmarks. Results on power9:

WordsDecode1e1    54.1ns ± 0%    53.8ns ± 0%   -0.51%  (p=0.029 n=4+4)
WordsDecode1e2     287ns ± 0%     282ns ± 1%   -1.83%  (p=0.029 n=4+4)
WordsDecode1e3    3.98µs ± 0%    3.64µs ± 0%   -8.52%  (p=0.029 n=4+4)
WordsDecode1e4    66.9µs ± 0%    67.0µs ± 0%   +0.20%  (p=0.029 n=4+4)
WordsDecode1e5     723µs ± 0%     723µs ± 0%   -0.01%  (p=0.200 n=4+4)
WordsDecode1e6    7.21ms ± 0%    7.21ms ± 0%   -0.02%  (p=1.000 n=4+4)
WordsEncode1e1    29.9ns ± 0%    29.4ns ± 0%   -1.51%  (p=0.029 n=4+4)
WordsEncode1e2    2.12µs ± 0%    1.75µs ± 0%  -17.70%  (p=0.029 n=4+4)
WordsEncode1e3    11.7µs ± 0%    11.2µs ± 0%   -4.61%  (p=0.029 n=4+4)
WordsEncode1e4     119µs ± 0%     120µs ± 0%   +0.36%  (p=0.029 n=4+4)
WordsEncode1e5    1.21ms ± 0%    1.22ms ± 0%   +0.41%  (p=0.029 n=4+4)
WordsEncode1e6    12.0ms ± 0%    12.0ms ± 0%   +0.57%  (p=0.029 n=4+4)
RandomEncode       286µs ± 0%     203µs ± 0%  -28.82%  (p=0.029 n=4+4)
ExtendMatch       47.4µs ± 0%    47.0µs ± 0%   -0.85%  (p=0.029 n=4+4)

Change-Id: Iecad3a39ae55280286e42760a5c9d5c1168f5858
Reviewed-on: https://go-review.googlesource.com/c/go/+/226539
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-04-06 12:09:39 +00:00

125 lines
2.6 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
import "runtime"
// Check small copies are replaced with moves.
func movesmall4() {
x := [...]byte{1, 2, 3, 4}
// 386:-".*memmove"
// amd64:-".*memmove"
// arm:-".*memmove"
// arm64:-".*memmove"
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
copy(x[1:], x[:])
}
func movesmall7() {
x := [...]byte{1, 2, 3, 4, 5, 6, 7}
// 386:-".*memmove"
// amd64:-".*memmove"
// arm64:-".*memmove"
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
copy(x[1:], x[:])
}
func movesmall16() {
x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
// amd64:-".*memmove"
// ppc64:".*memmove"
// ppc64le:".*memmove"
copy(x[1:], x[:])
}
var x [256]byte
// Check that large disjoint copies are replaced with moves.
func moveDisjointStack32() {
var s [32]byte
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
// ppc64le/power8:"LXVD2X",-"ADD",-"BC"
// ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
copy(s[:], x[:32])
runtime.KeepAlive(&s)
}
func moveDisjointStack64() {
var s [96]byte
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
// ppc64le/power8:"LXVD2X","ADD","BC"
// ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
copy(s[:], x[:96])
runtime.KeepAlive(&s)
}
func moveDisjointStack() {
var s [256]byte
// s390x:-".*memmove"
// amd64:-".*memmove"
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
// ppc64le/power8:"LXVD2X"
// ppc64le/power9:"LXV",-"LXVD2X"
copy(s[:], x[:])
runtime.KeepAlive(&s)
}
func moveDisjointArg(b *[256]byte) {
var s [256]byte
// s390x:-".*memmove"
// amd64:-".*memmove"
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
// ppc64le/power8:"LXVD2X"
// ppc64le/power9:"LXV",-"LXVD2X"
copy(s[:], b[:])
runtime.KeepAlive(&s)
}
func moveDisjointNoOverlap(a *[256]byte) {
// s390x:-".*memmove"
// amd64:-".*memmove"
// ppc64:-".*memmove"
// ppc64le:-".*memmove"
// ppc64le/power8:"LXVD2X"
// ppc64le/power9:"LXV",-"LXVD2X"
copy(a[:], a[128:])
}
// Check that no branches are generated when the pointers are [not] equal.
func ptrEqual() {
// amd64:-"JEQ",-"JNE"
// ppc64:-"BEQ",-"BNE"
// ppc64le:-"BEQ",-"BNE"
// s390x:-"BEQ",-"BNE"
copy(x[:], x[:])
}
func ptrOneOffset() {
// amd64:-"JEQ",-"JNE"
// ppc64:-"BEQ",-"BNE"
// ppc64le:-"BEQ",-"BNE"
// s390x:-"BEQ",-"BNE"
copy(x[1:], x[:])
}
func ptrBothOffset() {
// amd64:-"JEQ",-"JNE"
// ppc64:-"BEQ",-"BNE"
// ppc64le:-"BEQ",-"BNE"
// s390x:-"BEQ",-"BNE"
copy(x[1:], x[2:])
}