mirror of
https://github.com/golang/go
synced 2024-11-17 07:54:41 -07:00
runtime: remove AVX2less code in memclrNoHeapPointers for GOAMD64 >= 3
Optimize memclr by removing simple case loop along with the runtime check since AVX2 is guaranteed to be available when compiling with GOAMD64 >= 3 name old speed new speed delta Memclr/5-12 2.70GB/s ± 1% 2.73GB/s ± 1% ~ (p=0.056 n=5+5) Memclr/16-12 7.00GB/s ± 2% 7.03GB/s ± 1% ~ (p=1.000 n=5+5) Memclr/64-12 25.5GB/s ± 1% 25.5GB/s ± 1% ~ (p=0.548 n=5+5) Memclr/256-12 53.4GB/s ± 1% 52.7GB/s ± 2% ~ (p=0.222 n=5+5) Memclr/4096-12 109GB/s ± 1% 129GB/s ± 0% +18.57% (p=0.008 n=5+5) Memclr/65536-12 75.2GB/s ± 2% 78.3GB/s ± 3% +4.14% (p=0.008 n=5+5) Memclr/1M-12 53.5GB/s ± 2% 54.1GB/s ± 2% ~ (p=0.310 n=5+5) Memclr/4M-12 53.1GB/s ± 3% 52.9GB/s ± 2% ~ (p=1.000 n=5+5) Memclr/8M-12 44.6GB/s ± 3% 45.1GB/s ± 3% ~ (p=0.310 n=5+5) Memclr/16M-12 24.8GB/s ± 2% 24.2GB/s ± 2% ~ (p=0.056 n=5+5) Memclr/64M-12 38.3GB/s ± 1% 37.8GB/s ± 1% ~ (p=0.056 n=5+5) [Geo mean] 31.0GB/s 31.5GB/s +1.78% Change-Id: I6f3014f6338cb3b5a1b94503faa205f043fe2de8 Reviewed-on: https://go-review.googlesource.com/c/go/+/367494 Trust: Cherry Mui <cherryyz@google.com> Trust: Daniel Martí <mvdan@mvdan.cc> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
cc3a3519af
commit
12acf9b0f0
2
src/cmd/dist/build.go
vendored
2
src/cmd/dist/build.go
vendored
@ -732,6 +732,8 @@ func runInstall(pkg string, ch chan struct{}) {
|
||||
pathf("%s/src/runtime/funcdata.h", goroot), 0)
|
||||
copyfile(pathf("%s/pkg/include/asm_ppc64x.h", goroot),
|
||||
pathf("%s/src/runtime/asm_ppc64x.h", goroot), 0)
|
||||
copyfile(pathf("%s/pkg/include/asm_amd64.h", goroot),
|
||||
pathf("%s/src/runtime/asm_amd64.h", goroot), 0)
|
||||
}
|
||||
|
||||
// Generate any missing files; regenerate existing ones.
|
||||
|
14
src/runtime/asm_amd64.h
Normal file
14
src/runtime/asm_amd64.h
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright 2021 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Define features that are guaranteed to be supported by setting the AMD64 variable.
|
||||
// If a feature is supported, there's no need to check it at runtime every time.
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
#define hasAVX2
|
||||
#endif
|
||||
|
||||
#ifdef GOAMD64_v4
|
||||
#define hasAVX2
|
||||
#endif
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
#include "asm_amd64.h"
|
||||
|
||||
// See memclrNoHeapPointers Go doc for important implementation constraints.
|
||||
|
||||
@ -39,6 +40,8 @@ tail:
|
||||
JBE _65through128
|
||||
CMPQ BX, $256
|
||||
JBE _129through256
|
||||
|
||||
#ifndef hasAVX2
|
||||
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
|
||||
JE loop_preheader_avx2
|
||||
// TODO: for really big clears, use MOVNTDQ, even without AVX2.
|
||||
@ -65,6 +68,7 @@ loop:
|
||||
CMPQ BX, $256
|
||||
JAE loop
|
||||
JMP tail
|
||||
#endif
|
||||
|
||||
loop_preheader_avx2:
|
||||
VPXOR Y0, Y0, Y0
|
||||
|
Loading…
Reference in New Issue
Block a user