From e945623930d2c85b9a81476203451ca8f4092875 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Tue, 11 Sep 2018 11:42:15 -0400 Subject: [PATCH] runtime: improve CALLFN macro for ppc64x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous CALLFN macro was copying a single byte at a time which is extremely inefficient on ppc64x. This changes the macro so it copies 8 bytes at a time. benchmark in reflect: name old time/op new time/op delta Call-8 177ns ± 0% 165ns ± 0% -6.78% (p=1.000 n=1+1) CallArgCopy/size=128-8 194ns ± 0% 140ns ± 0% -27.84% (p=1.000 n=1+1) CallArgCopy/size=256-8 253ns ± 0% 159ns ± 0% -37.15% (p=1.000 n=1+1) CallArgCopy/size=1024-8 612ns ± 0% 222ns ± 0% -63.73% (p=1.000 n=1+1) CallArgCopy/size=4096-8 2.14µs ± 0% 0.53µs ± 0% -75.01% (p=1.000 n=1+1) CallArgCopy/size=65536-8 33.0µs ± 0% 7.3µs ± 0% -77.72% (p=1.000 n=1+1) Change-Id: I71f6ee788264e61bb072264d21b77b83592c9dca Reviewed-on: https://go-review.googlesource.com/134635 Run-TryBot: Lynn Boger TryBot-Result: Gobot Gobot Reviewed-by: Carlos Eduardo Seo Reviewed-by: Michael Munday --- src/runtime/asm_ppc64x.s | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 57877c0194..b180cb06ab 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -390,15 +390,36 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ /* copy arguments to stack */ \ MOVD arg+16(FP), R3; \ MOVWZ argsize+24(FP), R4; \ - MOVD R1, R5; \ - ADD $(FIXED_FRAME-1), R5; \ - SUB $1, R3; \ - ADD R5, R4; \ - CMP R5, R4; \ - BEQ 4(PC); \ - MOVBZU 1(R3), R6; \ - MOVBZU R6, 1(R5); \ - BR -4(PC); \ + MOVD R1, R5; \ + CMP R4, $8; \ + BLT tailsetup; \ + /* copy 8 at a time if possible */ \ + ADD $(FIXED_FRAME-8), R5; \ + SUB $8, R3; \ +top: \ + MOVDU 8(R3), R7; \ + MOVDU R7, 8(R5); \ + SUB $8, R4; \ + CMP R4, $8; \ + BGE top; \ + /* handle remaining bytes */ \ + CMP $0, R4; \ + BEQ callfn; \ + ADD $7, R3; \ + ADD $7, R5; \ + BR tail; \ +tailsetup: \ + CMP $0, R4; \ + BEQ callfn; \ + ADD $(FIXED_FRAME-1), R5; \ + SUB $1, R3; \ +tail: \ + MOVBU 1(R3), R6; \ + MOVBU R6, 1(R5); \ + SUB $1, R4; \ + CMP $0, R4; \ + BGT tail; \ +callfn: \ /* call function */ \ MOVD f+8(FP), R11; \ MOVD (R11), R12; \