mirror of
https://github.com/golang/go
synced 2024-11-18 08:04:40 -07:00
reflect, runtime: optimize Value.Call on s390x and add benchmark
Use an MVC loop to copy arguments in runtime.call* rather than copying bytes individually. I've added the benchmark CallArgCopy to test the speed of Value.Call for various argument sizes. name old speed new speed delta CallArgCopy/size=128 439MB/s ± 1% 582MB/s ± 1% +32.41% (p=0.000 n=10+10) CallArgCopy/size=256 695MB/s ± 1% 1172MB/s ± 1% +68.67% (p=0.000 n=10+10) CallArgCopy/size=1024 573MB/s ± 8% 4175MB/s ± 2% +628.11% (p=0.000 n=10+10) CallArgCopy/size=4096 1.46GB/s ± 2% 10.19GB/s ± 1% +600.52% (p=0.000 n=10+10) CallArgCopy/size=65536 1.51GB/s ± 0% 12.30GB/s ± 1% +716.30% (p=0.000 n=9+10) Change-Id: I87dae4809330e7964f6cb4a9e40e5b3254dd519d Reviewed-on: https://go-review.googlesource.com/28096 Run-TryBot: Michael Munday <munday@ca.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Bill O'Farrell <billotosyr@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
6c43c0c2fd
commit
3436f0776f
@ -1535,6 +1535,34 @@ func BenchmarkCall(b *testing.B) {
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkCallArgCopy(b *testing.B) {
|
||||
byteArray := func(n int) Value {
|
||||
return Zero(ArrayOf(n, TypeOf(byte(0))))
|
||||
}
|
||||
sizes := [...]struct {
|
||||
fv Value
|
||||
arg Value
|
||||
}{
|
||||
{ValueOf(func(a [128]byte) {}), byteArray(128)},
|
||||
{ValueOf(func(a [256]byte) {}), byteArray(256)},
|
||||
{ValueOf(func(a [1024]byte) {}), byteArray(1024)},
|
||||
{ValueOf(func(a [4096]byte) {}), byteArray(4096)},
|
||||
{ValueOf(func(a [65536]byte) {}), byteArray(65536)},
|
||||
}
|
||||
for _, size := range sizes {
|
||||
bench := func(b *testing.B) {
|
||||
args := []Value{size.arg}
|
||||
b.SetBytes(int64(size.arg.Len()))
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
size.fv.Call(args)
|
||||
}
|
||||
}
|
||||
name := fmt.Sprintf("size=%v", size.arg.Len())
|
||||
b.Run(name, bench)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMakeFunc(t *testing.T) {
|
||||
f := dummy
|
||||
fv := MakeFunc(TypeOf(f), func(in []Value) []Value { return in })
|
||||
|
@ -387,53 +387,55 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-32
|
||||
TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
|
||||
NO_LOCAL_POINTERS; \
|
||||
/* copy arguments to stack */ \
|
||||
MOVD arg+16(FP), R3; \
|
||||
MOVWZ argsize+24(FP), R4; \
|
||||
MOVD R15, R5; \
|
||||
ADD $(8-1), R5; \
|
||||
SUB $1, R3; \
|
||||
ADD R5, R4; \
|
||||
CMP R5, R4; \
|
||||
BEQ 6(PC); \
|
||||
ADD $1, R3; \
|
||||
ADD $1, R5; \
|
||||
MOVBZ 0(R3), R6; \
|
||||
MOVBZ R6, 0(R5); \
|
||||
BR -6(PC); \
|
||||
/* call function */ \
|
||||
MOVD arg+16(FP), R4; \
|
||||
MOVWZ argsize+24(FP), R5; \
|
||||
MOVD $stack-MAXSIZE(SP), R6; \
|
||||
loopArgs: /* copy 256 bytes at a time */ \
|
||||
CMP R5, $256; \
|
||||
BLT tailArgs; \
|
||||
SUB $256, R5; \
|
||||
MVC $256, 0(R4), 0(R6); \
|
||||
MOVD $256(R4), R4; \
|
||||
MOVD $256(R6), R6; \
|
||||
BR loopArgs; \
|
||||
tailArgs: /* copy remaining bytes */ \
|
||||
CMP R5, $0; \
|
||||
BEQ callFunction; \
|
||||
SUB $1, R5; \
|
||||
EXRL $callfnMVC<>(SB), R5; \
|
||||
callFunction: \
|
||||
MOVD f+8(FP), R12; \
|
||||
MOVD (R12), R8; \
|
||||
PCDATA $PCDATA_StackMapIndex, $0; \
|
||||
BL (R8); \
|
||||
/* copy return values back */ \
|
||||
MOVD arg+16(FP), R3; \
|
||||
MOVWZ n+24(FP), R4; \
|
||||
MOVWZ retoffset+28(FP), R6; \
|
||||
MOVD R15, R5; \
|
||||
ADD R6, R5; \
|
||||
ADD R6, R3; \
|
||||
SUB R6, R4; \
|
||||
ADD $(8-1), R5; \
|
||||
SUB $1, R3; \
|
||||
ADD R5, R4; \
|
||||
loop: \
|
||||
CMP R5, R4; \
|
||||
BEQ end; \
|
||||
ADD $1, R5; \
|
||||
ADD $1, R3; \
|
||||
MOVBZ 0(R5), R6; \
|
||||
MOVBZ R6, 0(R3); \
|
||||
BR loop; \
|
||||
end: \
|
||||
MOVD arg+16(FP), R6; \
|
||||
MOVWZ n+24(FP), R5; \
|
||||
MOVD $stack-MAXSIZE(SP), R4; \
|
||||
MOVWZ retoffset+28(FP), R1; \
|
||||
ADD R1, R4; \
|
||||
ADD R1, R6; \
|
||||
SUB R1, R5; \
|
||||
loopRets: /* copy 256 bytes at a time */ \
|
||||
CMP R5, $256; \
|
||||
BLT tailRets; \
|
||||
SUB $256, R5; \
|
||||
MVC $256, 0(R4), 0(R6); \
|
||||
MOVD $256(R4), R4; \
|
||||
MOVD $256(R6), R6; \
|
||||
BR loopRets; \
|
||||
tailRets: /* copy remaining bytes */ \
|
||||
CMP R5, $0; \
|
||||
BEQ writeBarrierUpdates; \
|
||||
SUB $1, R5; \
|
||||
EXRL $callfnMVC<>(SB), R5; \
|
||||
writeBarrierUpdates: \
|
||||
/* execute write barrier updates */ \
|
||||
MOVD argtype+0(FP), R7; \
|
||||
MOVD arg+16(FP), R3; \
|
||||
MOVWZ n+24(FP), R4; \
|
||||
MOVWZ retoffset+28(FP), R6; \
|
||||
MOVD R7, 8(R15); \
|
||||
MOVD R3, 16(R15); \
|
||||
MOVD R4, 24(R15); \
|
||||
MOVD R6, 32(R15); \
|
||||
MOVD argtype+0(FP), R1; \
|
||||
MOVD arg+16(FP), R2; \
|
||||
MOVWZ n+24(FP), R3; \
|
||||
MOVWZ retoffset+28(FP), R4; \
|
||||
STMG R1, R4, stack-MAXSIZE(SP); \
|
||||
BL runtime·callwritebarrier(SB); \
|
||||
RET
|
||||
|
||||
@ -464,6 +466,10 @@ CALLFN(·call268435456, 268435456)
|
||||
CALLFN(·call536870912, 536870912)
|
||||
CALLFN(·call1073741824, 1073741824)
|
||||
|
||||
// Not a function: target for EXRL (execute relative long) instruction.
|
||||
TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
|
||||
MVC $1, 0(R4), 0(R6)
|
||||
|
||||
TEXT runtime·procyield(SB),NOSPLIT,$0-0
|
||||
RET
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user