mirror of
https://github.com/golang/go
synced 2024-11-25 07:37:57 -07:00
5f88755f43
goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | Memmove/0 0.8004n ± 0% 0.4002n ± 0% -50.00% (p=0.000 n=20) Memmove/1 2.494n ± 0% 2.136n ± 0% -14.35% (p=0.000 n=20) Memmove/2 2.802n ± 0% 2.512n ± 0% -10.35% (p=0.000 n=20) Memmove/3 2.802n ± 0% 2.497n ± 0% -10.92% (p=0.000 n=20) Memmove/4 3.202n ± 0% 2.808n ± 0% -12.30% (p=0.000 n=20) Memmove/5 2.821n ± 0% 2.658n ± 0% -5.76% (p=0.000 n=20) Memmove/6 2.819n ± 0% 2.657n ± 0% -5.73% (p=0.000 n=20) Memmove/7 2.820n ± 0% 2.654n ± 0% -5.87% (p=0.000 n=20) Memmove/8 3.202n ± 0% 2.814n ± 0% -12.12% (p=0.000 n=20) Memmove/9 3.202n ± 0% 3.009n ± 0% -6.03% (p=0.000 n=20) Memmove/10 3.202n ± 0% 3.009n ± 0% -6.03% (p=0.000 n=20) Memmove/11 3.202n ± 0% 3.009n ± 0% -6.03% (p=0.000 n=20) Memmove/12 3.202n ± 0% 3.010n ± 0% -6.01% (p=0.000 n=20) Memmove/13 3.202n ± 0% 3.009n ± 0% -6.03% (p=0.000 n=20) Memmove/14 3.202n ± 0% 3.009n ± 0% -6.03% (p=0.000 n=20) Memmove/15 3.202n ± 0% 3.010n ± 0% -6.01% (p=0.000 n=20) Memmove/16 3.202n ± 0% 3.009n ± 0% -6.03% (p=0.000 n=20) Memmove/32 3.602n ± 0% 3.603n ± 0% +0.03% (p=0.000 n=20) Memmove/64 4.202n ± 0% 4.204n ± 0% +0.05% (p=0.000 n=20) Memmove/128 8.005n ± 0% 8.007n ± 0% +0.02% (p=0.000 n=20) Memmove/256 11.21n ± 0% 10.81n ± 0% -3.57% (p=0.000 n=20) Memmove/512 17.65n ± 0% 17.96n ± 0% +1.73% (p=0.000 n=20) Memmove/1024 30.48n ± 0% 30.46n ± 0% -0.07% (p=0.000 n=20) Memmove/2048 56.43n ± 0% 56.30n ± 0% -0.24% (p=0.000 n=20) Memmove/4096 107.7n ± 0% 107.6n ± 0% -0.09% (p=0.000 n=20) MemmoveOverlap/32 4.002n ± 0% 4.003n ± 0% +0.02% (p=0.002 n=20) MemmoveOverlap/64 4.603n ± 0% 4.603n ± 0% ~ (p=0.286 n=20) MemmoveOverlap/128 8.704n ± 0% 8.699n ± 0% ~ (p=0.180 n=20) MemmoveOverlap/256 12.01n ± 0% 11.76n ± 0% -2.08% (p=0.000 n=20) MemmoveOverlap/512 18.42n ± 0% 18.36n ± 0% -0.33% (p=0.000 n=20) MemmoveOverlap/1024 31.23n ± 0% 31.16n ± 0% -0.21% (p=0.000 n=20) MemmoveOverlap/2048 57.42n ± 0% 56.82n ± 0% -1.04% (p=0.000 n=20) MemmoveOverlap/4096 108.5n ± 0% 108.0n ± 0% -0.46% (p=0.000 n=20) MemmoveUnalignedDst/0 2.804n ± 0% 2.447n ± 0% -12.70% (p=0.000 n=20) MemmoveUnalignedDst/1 2.802n ± 0% 2.491n ± 0% -11.12% (p=0.000 n=20) MemmoveUnalignedDst/2 3.202n ± 0% 2.808n ± 0% -12.29% (p=0.000 n=20) MemmoveUnalignedDst/3 3.202n ± 0% 2.814n ± 0% -12.12% (p=0.000 n=20) MemmoveUnalignedDst/4 3.602n ± 0% 3.202n ± 0% -11.10% (p=0.000 n=20) MemmoveUnalignedDst/5 3.202n ± 0% 3.203n ± 0% +0.03% (p=0.014 n=20) MemmoveUnalignedDst/6 3.202n ± 0% 3.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedDst/7 3.202n ± 0% 3.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedDst/8 3.602n ± 0% 3.202n ± 0% -11.10% (p=0.000 n=20) MemmoveUnalignedDst/9 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedDst/10 3.602n ± 0% 3.602n ± 0% ~ (p=0.091 n=20) MemmoveUnalignedDst/11 3.602n ± 0% 3.602n ± 0% ~ (p=0.613 n=20) MemmoveUnalignedDst/12 3.602n ± 0% 3.602n ± 0% ~ (p=0.165 n=20) MemmoveUnalignedDst/13 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedDst/14 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedDst/15 3.602n ± 0% 3.602n ± 0% 0.00% (p=0.027 n=20) MemmoveUnalignedDst/16 3.602n ± 0% 3.602n ± 0% ~ (p=0.661 n=20) MemmoveUnalignedDst/32 4.002n ± 0% 4.002n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedDst/64 6.804n ± 0% 6.804n ± 0% ~ (p=0.204 n=20) MemmoveUnalignedDst/128 12.61n ± 0% 12.61n ± 0% ~ (p=1.000 n=20) ¹ MemmoveUnalignedDst/256 16.33n ± 2% 16.32n ± 2% ~ (p=0.839 n=20) MemmoveUnalignedDst/512 25.61n ± 0% 24.71n ± 0% -3.51% (p=0.000 n=20) MemmoveUnalignedDst/1024 42.81n ± 0% 42.82n ± 0% ~ (p=0.973 n=20) MemmoveUnalignedDst/2048 74.86n ± 0% 76.03n ± 0% +1.56% (p=0.000 n=20) MemmoveUnalignedDst/4096 152.0n ± 11% 152.0n ± 0% 0.00% (p=0.013 n=20) MemmoveUnalignedDstOverlap/32 5.319n ± 0% 5.558n ± 1% +4.50% (p=0.000 n=20) MemmoveUnalignedDstOverlap/64 8.006n ± 0% 8.025n ± 0% +0.24% (p=0.000 n=20) MemmoveUnalignedDstOverlap/128 9.631n ± 0% 9.601n ± 0% -0.31% (p=0.000 n=20) MemmoveUnalignedDstOverlap/256 13.79n ± 2% 13.58n ± 1% ~ (p=0.234 n=20) MemmoveUnalignedDstOverlap/512 21.38n ± 0% 21.30n ± 0% -0.37% (p=0.000 n=20) MemmoveUnalignedDstOverlap/1024 41.71n ± 0% 41.70n ± 0% ~ (p=0.887 n=20) MemmoveUnalignedDstOverlap/2048 81.63n ± 0% 81.61n ± 0% ~ (p=0.481 n=20) MemmoveUnalignedDstOverlap/4096 162.6n ± 0% 162.6n ± 0% ~ (p=0.171 n=20) MemmoveUnalignedSrc/0 2.808n ± 0% 2.482n ± 0% -11.61% (p=0.000 n=20) MemmoveUnalignedSrc/1 2.804n ± 0% 2.577n ± 0% -8.08% (p=0.000 n=20) MemmoveUnalignedSrc/2 3.202n ± 0% 2.806n ± 0% -12.37% (p=0.000 n=20) MemmoveUnalignedSrc/3 3.202n ± 0% 2.808n ± 0% -12.30% (p=0.000 n=20) MemmoveUnalignedSrc/4 3.602n ± 0% 3.202n ± 0% -11.10% (p=0.000 n=20) MemmoveUnalignedSrc/5 3.202n ± 0% 3.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/6 3.202n ± 0% 3.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/7 3.202n ± 0% 3.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/8 3.602n ± 0% 3.202n ± 0% -11.10% (p=0.000 n=20) MemmoveUnalignedSrc/9 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/10 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/11 3.602n ± 0% 3.602n ± 0% ~ (p=0.746 n=20) MemmoveUnalignedSrc/12 3.602n ± 0% 3.602n ± 0% ~ (p=0.407 n=20) MemmoveUnalignedSrc/13 3.603n ± 0% 3.602n ± 0% -0.03% (p=0.001 n=20) MemmoveUnalignedSrc/14 3.603n ± 0% 3.602n ± 0% -0.01% (p=0.013 n=20) MemmoveUnalignedSrc/15 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/16 3.602n ± 0% 3.602n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/32 4.002n ± 0% 4.002n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrc/64 4.803n ± 0% 4.803n ± 0% 0.00% (p=0.008 n=20) MemmoveUnalignedSrc/128 8.405n ± 0% 8.405n ± 0% 0.00% (p=0.003 n=20) MemmoveUnalignedSrc/256 12.04n ± 3% 12.20n ± 2% ~ (p=0.151 n=20) MemmoveUnalignedSrc/512 19.11n ± 0% 19.10n ± 3% ~ (p=0.621 n=20) MemmoveUnalignedSrc/1024 35.62n ± 0% 35.62n ± 0% ~ (p=0.407 n=20) MemmoveUnalignedSrc/2048 68.04n ± 0% 68.35n ± 0% +0.46% (p=0.000 n=20) MemmoveUnalignedSrc/4096 133.2n ± 1% 133.3n ± 0% ~ (p=0.131 n=20) MemmoveUnalignedSrcDst/f_16_0 4.202n ± 0% 4.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_16_0 4.202n ± 0% 4.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/f_16_1 4.202n ± 0% 4.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_16_1 4.202n ± 0% 4.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/f_16_4 4.202n ± 0% 4.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_16_4 4.202n ± 0% 4.202n ± 0% ~ (p=0.661 n=20) MemmoveUnalignedSrcDst/f_16_7 4.202n ± 0% 4.202n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_16_7 4.203n ± 0% 4.202n ± 0% -0.02% (p=0.008 n=20) MemmoveUnalignedSrcDst/f_64_0 6.103n ± 0% 6.100n ± 0% ~ (p=0.595 n=20) MemmoveUnalignedSrcDst/b_64_0 6.103n ± 0% 6.102n ± 0% ~ (p=0.973 n=20) MemmoveUnalignedSrcDst/f_64_1 7.419n ± 0% 7.226n ± 0% -2.59% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_64_1 6.745n ± 0% 6.941n ± 0% +2.89% (p=0.000 n=20) MemmoveUnalignedSrcDst/f_64_4 7.420n ± 0% 7.223n ± 0% -2.65% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_64_4 6.753n ± 0% 6.941n ± 0% +2.79% (p=0.000 n=20) MemmoveUnalignedSrcDst/f_64_7 7.423n ± 0% 7.204n ± 0% -2.96% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_64_7 6.750n ± 0% 6.941n ± 0% +2.83% (p=0.000 n=20) MemmoveUnalignedSrcDst/f_256_0 12.96n ± 0% 12.99n ± 0% +0.27% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_256_0 12.91n ± 0% 12.94n ± 0% +0.23% (p=0.000 n=20) MemmoveUnalignedSrcDst/f_256_1 17.21n ± 0% 17.21n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_256_1 17.61n ± 0% 17.61n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/f_256_4 16.21n ± 0% 16.21n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_256_4 16.41n ± 0% 16.41n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/f_256_7 14.12n ± 0% 14.10n ± 0% ~ (p=0.307 n=20) MemmoveUnalignedSrcDst/b_256_7 14.81n ± 0% 14.81n ± 0% ~ (p=1.000 n=20) ¹ MemmoveUnalignedSrcDst/f_4096_0 109.3n ± 0% 109.4n ± 0% +0.09% (p=0.004 n=20) MemmoveUnalignedSrcDst/b_4096_0 109.6n ± 0% 109.6n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/f_4096_1 113.5n ± 0% 113.5n ± 0% ~ (p=1.000 n=20) MemmoveUnalignedSrcDst/b_4096_1 113.7n ± 0% 113.7n ± 0% ~ (p=1.000 n=20) ¹ MemmoveUnalignedSrcDst/f_4096_4 112.3n ± 0% 112.3n ± 0% ~ (p=0.763 n=20) MemmoveUnalignedSrcDst/b_4096_4 112.6n ± 0% 112.9n ± 1% +0.31% (p=0.032 n=20) MemmoveUnalignedSrcDst/f_4096_7 110.6n ± 0% 110.6n ± 0% ~ (p=1.000 n=20) ¹ MemmoveUnalignedSrcDst/b_4096_7 111.1n ± 0% 111.1n ± 0% ~ (p=1.000 n=20) ¹ MemmoveUnalignedSrcDst/f_65536_0 4.801µ ± 0% 4.818µ ± 0% +0.34% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_65536_0 5.027µ ± 0% 5.036µ ± 0% +0.19% (p=0.007 n=20) MemmoveUnalignedSrcDst/f_65536_1 4.815µ ± 0% 4.729µ ± 0% -1.78% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_65536_1 4.659µ ± 0% 4.737µ ± 1% +1.69% (p=0.000 n=20) MemmoveUnalignedSrcDst/f_65536_4 4.807µ ± 0% 4.721µ ± 0% -1.78% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_65536_4 4.659µ ± 0% 4.601µ ± 0% -1.23% (p=0.000 n=20) MemmoveUnalignedSrcDst/f_65536_7 4.868µ ± 0% 4.759µ ± 0% -2.23% (p=0.000 n=20) MemmoveUnalignedSrcDst/b_65536_7 4.665µ ± 0% 4.709µ ± 0% +0.93% (p=0.000 n=20) MemmoveUnalignedSrcOverlap/32 6.804n ± 0% 6.810n ± 0% +0.09% (p=0.000 n=20) MemmoveUnalignedSrcOverlap/64 10.41n ± 0% 10.42n ± 0% +0.10% (p=0.000 n=20) MemmoveUnalignedSrcOverlap/128 11.59n ± 0% 11.58n ± 0% ~ (p=0.414 n=20) MemmoveUnalignedSrcOverlap/256 14.22n ± 0% 14.29n ± 0% +0.46% (p=0.000 n=20) MemmoveUnalignedSrcOverlap/512 23.11n ± 0% 23.04n ± 0% -0.28% (p=0.001 n=20) MemmoveUnalignedSrcOverlap/1024 41.44n ± 0% 41.47n ± 0% ~ (p=0.693 n=20) MemmoveUnalignedSrcOverlap/2048 81.25n ± 0% 81.25n ± 0% ~ (p=0.405 n=20) MemmoveUnalignedSrcOverlap/4096 166.1n ± 0% 166.1n ± 0% ~ (p=0.451 n=20) geomean 13.02n 12.69n -2.51% ¹ all samples are equal Change-Id: I712adc7670f6ae360714ec5a770d00d76c8700ed Reviewed-on: https://go-review.googlesource.com/c/go/+/618815 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Carlos Amedee <carlos@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: abner chenc <chenguoqi@loongson.cn>
163 lines
3.2 KiB
Go
163 lines
3.2 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
import "runtime"
|
|
|
|
// Check small copies are replaced with moves.
|
|
|
|
func movesmall4() {
|
|
x := [...]byte{1, 2, 3, 4}
|
|
// 386:-".*memmove"
|
|
// amd64:-".*memmove"
|
|
// arm:-".*memmove"
|
|
// arm64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
copy(x[1:], x[:])
|
|
}
|
|
|
|
func movesmall7() {
|
|
x := [...]byte{1, 2, 3, 4, 5, 6, 7}
|
|
// 386:-".*memmove"
|
|
// amd64:-".*memmove"
|
|
// arm64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
copy(x[1:], x[:])
|
|
}
|
|
|
|
func movesmall16() {
|
|
x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
|
|
// amd64:-".*memmove"
|
|
// ppc64x:".*memmove"
|
|
copy(x[1:], x[:])
|
|
}
|
|
|
|
var x [256]byte
|
|
|
|
// Check that large disjoint copies are replaced with moves.
|
|
|
|
func moveDisjointStack32() {
|
|
var s [32]byte
|
|
// ppc64x:-".*memmove"
|
|
// ppc64x/power8:"LXVD2X",-"ADD",-"BC"
|
|
// ppc64x/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
|
|
copy(s[:], x[:32])
|
|
runtime.KeepAlive(&s)
|
|
}
|
|
|
|
func moveDisjointStack64() {
|
|
var s [96]byte
|
|
// ppc64x:-".*memmove"
|
|
// ppc64x/power8:"LXVD2X","ADD","BC"
|
|
// ppc64x/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
|
|
copy(s[:], x[:96])
|
|
runtime.KeepAlive(&s)
|
|
}
|
|
|
|
func moveDisjointStack() {
|
|
var s [256]byte
|
|
// s390x:-".*memmove"
|
|
// amd64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
// ppc64x/power8:"LXVD2X"
|
|
// ppc64x/power9:"LXV",-"LXVD2X"
|
|
copy(s[:], x[:])
|
|
runtime.KeepAlive(&s)
|
|
}
|
|
|
|
func moveDisjointArg(b *[256]byte) {
|
|
var s [256]byte
|
|
// s390x:-".*memmove"
|
|
// amd64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
// ppc64x/power8:"LXVD2X"
|
|
// ppc64x/power9:"LXV",-"LXVD2X"
|
|
copy(s[:], b[:])
|
|
runtime.KeepAlive(&s)
|
|
}
|
|
|
|
func moveDisjointNoOverlap(a *[256]byte) {
|
|
// s390x:-".*memmove"
|
|
// amd64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
// ppc64x/power8:"LXVD2X"
|
|
// ppc64x/power9:"LXV",-"LXVD2X"
|
|
copy(a[:], a[128:])
|
|
}
|
|
|
|
// Check arch-specific memmove lowering. See issue 41662 fot details
|
|
|
|
func moveArchLowering1(b []byte, x *[1]byte) {
|
|
_ = b[1]
|
|
// amd64:-".*memmove"
|
|
// arm64:-".*memmove"
|
|
// loong64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
copy(b, x[:])
|
|
}
|
|
|
|
func moveArchLowering2(b []byte, x *[2]byte) {
|
|
_ = b[2]
|
|
// amd64:-".*memmove"
|
|
// arm64:-".*memmove"
|
|
// loong64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
copy(b, x[:])
|
|
}
|
|
|
|
func moveArchLowering4(b []byte, x *[4]byte) {
|
|
_ = b[4]
|
|
// amd64:-".*memmove"
|
|
// arm64:-".*memmove"
|
|
// loong64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
copy(b, x[:])
|
|
}
|
|
|
|
func moveArchLowering8(b []byte, x *[8]byte) {
|
|
_ = b[8]
|
|
// amd64:-".*memmove"
|
|
// arm64:-".*memmove"
|
|
// ppc64x:-".*memmove"
|
|
copy(b, x[:])
|
|
}
|
|
|
|
func moveArchLowering16(b []byte, x *[16]byte) {
|
|
_ = b[16]
|
|
// amd64:-".*memmove"
|
|
copy(b, x[:])
|
|
}
|
|
|
|
// Check that no branches are generated when the pointers are [not] equal.
|
|
|
|
func ptrEqual() {
|
|
// amd64:-"JEQ",-"JNE"
|
|
// ppc64x:-"BEQ",-"BNE"
|
|
// s390x:-"BEQ",-"BNE"
|
|
copy(x[:], x[:])
|
|
}
|
|
|
|
func ptrOneOffset() {
|
|
// amd64:-"JEQ",-"JNE"
|
|
// ppc64x:-"BEQ",-"BNE"
|
|
// s390x:-"BEQ",-"BNE"
|
|
copy(x[1:], x[:])
|
|
}
|
|
|
|
func ptrBothOffset() {
|
|
// amd64:-"JEQ",-"JNE"
|
|
// ppc64x:-"BEQ",-"BNE"
|
|
// s390x:-"BEQ",-"BNE"
|
|
copy(x[1:], x[2:])
|
|
}
|
|
|
|
// Verify #62698 on PPC64.
|
|
func noMaskOnCopy(a []int, s string, x int) int {
|
|
// ppc64x:-"MOVD\t$-1", -"AND"
|
|
return a[x&^copy([]byte{}, s)]
|
|
}
|