1
0
mirror of https://github.com/golang/go synced 2024-11-17 07:54:41 -07:00

runtime: remove AVX2less code in memclrNoHeapPointers for GOAMD64 >= 3

Optimize memclr by removing simple case loop along with the
runtime check since AVX2 is guaranteed to be available when
compiling with GOAMD64 >= 3

name             old speed      new speed      delta
Memclr/5-12      2.70GB/s ± 1%  2.73GB/s ± 1%     ~     (p=0.056 n=5+5)
Memclr/16-12     7.00GB/s ± 2%  7.03GB/s ± 1%     ~     (p=1.000 n=5+5)
Memclr/64-12     25.5GB/s ± 1%  25.5GB/s ± 1%     ~     (p=0.548 n=5+5)
Memclr/256-12    53.4GB/s ± 1%  52.7GB/s ± 2%     ~     (p=0.222 n=5+5)
Memclr/4096-12    109GB/s ± 1%   129GB/s ± 0%  +18.57%  (p=0.008 n=5+5)
Memclr/65536-12  75.2GB/s ± 2%  78.3GB/s ± 3%   +4.14%  (p=0.008 n=5+5)
Memclr/1M-12     53.5GB/s ± 2%  54.1GB/s ± 2%     ~     (p=0.310 n=5+5)
Memclr/4M-12     53.1GB/s ± 3%  52.9GB/s ± 2%     ~     (p=1.000 n=5+5)
Memclr/8M-12     44.6GB/s ± 3%  45.1GB/s ± 3%     ~     (p=0.310 n=5+5)
Memclr/16M-12    24.8GB/s ± 2%  24.2GB/s ± 2%     ~     (p=0.056 n=5+5)
Memclr/64M-12    38.3GB/s ± 1%  37.8GB/s ± 1%     ~     (p=0.056 n=5+5)
[Geo mean]       31.0GB/s       31.5GB/s        +1.78%

Change-Id: I6f3014f6338cb3b5a1b94503faa205f043fe2de8
Reviewed-on: https://go-review.googlesource.com/c/go/+/367494
Trust: Cherry Mui <cherryyz@google.com>
Trust: Daniel Martí <mvdan@mvdan.cc>
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
vpachkov 2021-11-29 15:20:37 +03:00 committed by Keith Randall
parent cc3a3519af
commit 12acf9b0f0
3 changed files with 20 additions and 0 deletions

View File

@ -732,6 +732,8 @@ func runInstall(pkg string, ch chan struct{}) {
pathf("%s/src/runtime/funcdata.h", goroot), 0)
copyfile(pathf("%s/pkg/include/asm_ppc64x.h", goroot),
pathf("%s/src/runtime/asm_ppc64x.h", goroot), 0)
copyfile(pathf("%s/pkg/include/asm_amd64.h", goroot),
pathf("%s/src/runtime/asm_amd64.h", goroot), 0)
}
// Generate any missing files; regenerate existing ones.

14
src/runtime/asm_amd64.h Normal file
View File

@ -0,0 +1,14 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Define features that are guaranteed to be supported by setting the AMD64 variable.
// If a feature is supported, there's no need to check it at runtime every time.
#ifdef GOAMD64_v3
#define hasAVX2
#endif
#ifdef GOAMD64_v4
#define hasAVX2
#endif

View File

@ -6,6 +6,7 @@
#include "go_asm.h"
#include "textflag.h"
#include "asm_amd64.h"
// See memclrNoHeapPointers Go doc for important implementation constraints.
@ -39,6 +40,8 @@ tail:
JBE _65through128
CMPQ BX, $256
JBE _129through256
#ifndef hasAVX2
CMPB internalcpu·X86+const_offsetX86HasAVX2(SB), $1
JE loop_preheader_avx2
// TODO: for really big clears, use MOVNTDQ, even without AVX2.
@ -65,6 +68,7 @@ loop:
CMPQ BX, $256
JAE loop
JMP tail
#endif
loop_preheader_avx2:
VPXOR Y0, Y0, Y0