1
0
mirror of https://github.com/golang/go synced 2024-11-26 08:17:59 -07:00

internal/bytealg: port more performance-critical functions to ABIInternal

CL 308931 ported several runtime assembly functions to ABIInternal so
that compiler-generated ABIInternal calls don't go through ABI
wrappers, but it missed the runtime assembly functions that are
actually defined in internal/bytealg.

This eliminates the cost of wrappers for the BleveQuery and
GopherLuaKNucleotide benchmarks, but there's still more to do for
Tile38.

                                      0-base                1-wrappers
                                     sec/op        sec/op            vs base
BleveQuery                          6.507 ± 0%    6.477 ± 0%  -0.46% (p=0.004 n=20)
GopherLuaKNucleotide                30.39 ± 1%    30.34 ± 0%       ~ (p=0.301 n=20)
Tile38IntersectsCircle100kmRequest 1.038m ± 1%   1.080m ± 2%  +4.03% (p=0.000 n=20)

For #40724.

Change-Id: I0b722443f684fcb997b1d70802c5ed4b8d8f9829
Reviewed-on: https://go-review.googlesource.com/c/go/+/310184
Trust: Austin Clements <austin@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Austin Clements 2021-04-14 19:15:42 -04:00
parent 48b7432e3f
commit 8f4c5068e0
4 changed files with 107 additions and 10 deletions

View File

@ -1764,8 +1764,9 @@ func cmdlist() {
// IsRuntimePackagePath examines 'pkgpath' and returns TRUE if it // IsRuntimePackagePath examines 'pkgpath' and returns TRUE if it
// belongs to the collection of "runtime-related" packages, including // belongs to the collection of "runtime-related" packages, including
// "runtime" itself, "reflect", "syscall", and the // "runtime" itself, "reflect", "syscall", and the
// "runtime/internal/*" packages. See also the function of the same // "runtime/internal/*" packages.
// name in cmd/internal/objabi/path.go. //
// Keep in sync with cmd/internal/objabi/path.go:IsRuntimePackagePath.
func IsRuntimePackagePath(pkgpath string) bool { func IsRuntimePackagePath(pkgpath string) bool {
rval := false rval := false
switch pkgpath { switch pkgpath {
@ -1777,6 +1778,8 @@ func IsRuntimePackagePath(pkgpath string) bool {
rval = true rval = true
case "crypto/x509/internal/macos": // libc function wrappers need to be ABIInternal case "crypto/x509/internal/macos": // libc function wrappers need to be ABIInternal
rval = true rval = true
case "internal/bytealg":
rval = true
default: default:
rval = strings.HasPrefix(pkgpath, "runtime/internal") rval = strings.HasPrefix(pkgpath, "runtime/internal")
} }

View File

@ -47,6 +47,8 @@ func PathToPrefix(s string) string {
// some cases need to be aware of when they are building such a // some cases need to be aware of when they are building such a
// package, for example to enable features such as ABI selectors in // package, for example to enable features such as ABI selectors in
// assembly sources. // assembly sources.
//
// Keep in sync with cmd/dist/build.go:IsRuntimePackagePath.
func IsRuntimePackagePath(pkgpath string) bool { func IsRuntimePackagePath(pkgpath string) bool {
rval := false rval := false
switch pkgpath { switch pkgpath {
@ -58,6 +60,8 @@ func IsRuntimePackagePath(pkgpath string) bool {
rval = true rval = true
case "crypto/x509/internal/macos": // libc function wrappers need to be ABIInternal case "crypto/x509/internal/macos": // libc function wrappers need to be ABIInternal
rval = true rval = true
case "internal/bytealg":
rval = true
default: default:
rval = strings.HasPrefix(pkgpath, "runtime/internal") rval = strings.HasPrefix(pkgpath, "runtime/internal")
} }

View File

@ -5,20 +5,41 @@
#include "go_asm.h" #include "go_asm.h"
#include "textflag.h" #include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-56 TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
#ifdef GOEXPERIMENT_regabiargs
// AX = a_base (want in SI)
// BX = a_len (want in BX)
// CX = a_cap (unused)
// DI = b_base (want in DI)
// SI = b_len (want in DX)
// R8 = b_cap (unused)
MOVQ SI, DX
MOVQ AX, SI
#else
MOVQ a_base+0(FP), SI MOVQ a_base+0(FP), SI
MOVQ a_len+8(FP), BX MOVQ a_len+8(FP), BX
MOVQ b_base+24(FP), DI MOVQ b_base+24(FP), DI
MOVQ b_len+32(FP), DX MOVQ b_len+32(FP), DX
LEAQ ret+48(FP), R9 LEAQ ret+48(FP), R9
#endif
JMP cmpbody<>(SB) JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
#ifdef GOEXPERIMENT_regabiargs
// AX = a_base (want in SI)
// BX = a_len (want in BX)
// CX = b_base (want in DI)
// DI = b_len (want in DX)
MOVQ AX, SI
MOVQ DI, DX
MOVQ CX, DI
#else
MOVQ a_base+0(FP), SI MOVQ a_base+0(FP), SI
MOVQ a_len+8(FP), BX MOVQ a_len+8(FP), BX
MOVQ b_base+16(FP), DI MOVQ b_base+16(FP), DI
MOVQ b_len+24(FP), DX MOVQ b_len+24(FP), DX
LEAQ ret+32(FP), R9 LEAQ ret+32(FP), R9
#endif
JMP cmpbody<>(SB) JMP cmpbody<>(SB)
// input: // input:
@ -26,7 +47,12 @@ TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
// DI = b // DI = b
// BX = alen // BX = alen
// DX = blen // DX = blen
#ifndef GOEXPERIMENT_regabiargs
// R9 = address of output word (stores -1/0/1 here) // R9 = address of output word (stores -1/0/1 here)
#else
// output:
// AX = output (-1/0/1)
#endif
TEXT cmpbody<>(SB),NOSPLIT,$0-0 TEXT cmpbody<>(SB),NOSPLIT,$0-0
CMPQ SI, DI CMPQ SI, DI
JEQ allsame JEQ allsame
@ -74,7 +100,9 @@ diff16:
CMPB CX, (DI)(BX*1) CMPB CX, (DI)(BX*1)
SETHI AX SETHI AX
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
#ifndef GOEXPERIMENT_regabiargs
MOVQ AX, (R9) MOVQ AX, (R9)
#endif
RET RET
// 0 through 16 bytes left, alen>=8, blen>=8 // 0 through 16 bytes left, alen>=8, blen>=8
@ -100,7 +128,9 @@ diff8:
SHRQ CX, AX // move a's bit to bottom SHRQ CX, AX // move a's bit to bottom
ANDQ $1, AX // mask bit ANDQ $1, AX // mask bit
LEAQ -1(AX*2), AX // 1/0 => +1/-1 LEAQ -1(AX*2), AX // 1/0 => +1/-1
#ifndef GOEXPERIMENT_regabiargs
MOVQ AX, (R9) MOVQ AX, (R9)
#endif
RET RET
// 0-7 bytes in common // 0-7 bytes in common
@ -139,7 +169,9 @@ di_finish:
SHRQ CX, SI // move a's bit to bottom SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1 LEAQ -1(SI*2), AX // 1/0 => +1/-1
#ifndef GOEXPERIMENT_regabiargs
MOVQ AX, (R9) MOVQ AX, (R9)
#endif
RET RET
allsame: allsame:
@ -149,7 +181,9 @@ allsame:
SETGT AX // 1 if alen > blen SETGT AX // 1 if alen > blen
SETEQ CX // 1 if alen == blen SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
#ifndef GOEXPERIMENT_regabiargs
MOVQ AX, (R9) MOVQ AX, (R9)
#endif
RET RET
// this works for >= 64 bytes of data. // this works for >= 64 bytes of data.

View File

@ -6,7 +6,21 @@
#include "textflag.h" #include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool // memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal(SB),NOSPLIT,$0-25 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
#ifdef GOEXPERIMENT_regabiargs
// AX = a (want in SI)
// BX = b (want in DI)
// CX = size (want in BX)
CMPQ AX, BX
JNE neq
MOVQ $1, AX // return 1
RET
neq:
MOVQ AX, SI
MOVQ BX, DI
MOVQ CX, BX
JMP memeqbody<>(SB)
#else
MOVQ a+0(FP), SI MOVQ a+0(FP), SI
MOVQ b+8(FP), DI MOVQ b+8(FP), DI
CMPQ SI, DI CMPQ SI, DI
@ -17,9 +31,24 @@ TEXT runtime·memequal(SB),NOSPLIT,$0-25
eq: eq:
MOVB $1, ret+24(FP) MOVB $1, ret+24(FP)
RET RET
#endif
// memequal_varlen(a, b unsafe.Pointer) bool // memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
#ifdef GOEXPERIMENT_regabiargs
// AX = a (want in SI)
// BX = b (want in DI)
// 8(DX) = size (want in BX)
CMPQ AX, BX
JNE neq
MOVQ $1, AX // return 1
RET
neq:
MOVQ AX, SI
MOVQ BX, DI
MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
JMP memeqbody<>(SB)
#else
MOVQ a+0(FP), SI MOVQ a+0(FP), SI
MOVQ b+8(FP), DI MOVQ b+8(FP), DI
CMPQ SI, DI CMPQ SI, DI
@ -30,11 +59,18 @@ TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17
eq: eq:
MOVB $1, ret+16(FP) MOVB $1, ret+16(FP)
RET RET
#endif
// a in SI // Input:
// b in DI // a in SI
// count in BX // b in DI
// address of result byte in AX // count in BX
#ifndef GOEXPERIMENT_regabiargs
// address of result byte in AX
#else
// Output:
// result in AX
#endif
TEXT memeqbody<>(SB),NOSPLIT,$0-0 TEXT memeqbody<>(SB),NOSPLIT,$0-0
CMPQ BX, $8 CMPQ BX, $8
JB small JB small
@ -68,7 +104,11 @@ hugeloop:
SUBQ $64, BX SUBQ $64, BX
CMPL DX, $0xffff CMPL DX, $0xffff
JEQ hugeloop JEQ hugeloop
#ifdef GOEXPERIMENT_regabiargs
XORQ AX, AX // return 0
#else
MOVB $0, (AX) MOVB $0, (AX)
#endif
RET RET
// 64 bytes at a time using ymm registers // 64 bytes at a time using ymm registers
@ -89,7 +129,11 @@ hugeloop_avx2:
CMPL DX, $0xffffffff CMPL DX, $0xffffffff
JEQ hugeloop_avx2 JEQ hugeloop_avx2
VZEROUPPER VZEROUPPER
#ifdef GOEXPERIMENT_regabiargs
XORQ AX, AX // return 0
#else
MOVB $0, (AX) MOVB $0, (AX)
#endif
RET RET
bigloop_avx2: bigloop_avx2:
@ -106,7 +150,11 @@ bigloop:
SUBQ $8, BX SUBQ $8, BX
CMPQ CX, DX CMPQ CX, DX
JEQ bigloop JEQ bigloop
#ifdef GOEXPERIMENT_regabiargs
XORQ AX, AX // return 0
#else
MOVB $0, (AX) MOVB $0, (AX)
#endif
RET RET
// remaining 0-8 bytes // remaining 0-8 bytes
@ -114,7 +162,11 @@ leftover:
MOVQ -8(SI)(BX*1), CX MOVQ -8(SI)(BX*1), CX
MOVQ -8(DI)(BX*1), DX MOVQ -8(DI)(BX*1), DX
CMPQ CX, DX CMPQ CX, DX
#ifdef GOEXPERIMENT_regabiargs
SETEQ AX
#else
SETEQ (AX) SETEQ (AX)
#endif
RET RET
small: small:
@ -149,6 +201,10 @@ di_finish:
SUBQ SI, DI SUBQ SI, DI
SHLQ CX, DI SHLQ CX, DI
equal: equal:
#ifdef GOEXPERIMENT_regabiargs
SETEQ AX
#else
SETEQ (AX) SETEQ (AX)
#endif
RET RET