From 46bc9a6e1e94d0c7929a4835a3d163b721085329 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 10 Mar 2023 16:05:47 -0600 Subject: [PATCH] runtime,runtime/cgo: fix tsan/pprof/cgo issues The tsan13 test highlighted a few bugs. The first being runtime.sigprofNonGoWrapper was being called from C code and violating the C ABI. The second was a missed tsan acquire/release after thread creation. The third was runtime.cgoSigtramp violating ELFv2 ABI constraints when loading g. It is reworked to avoid clobbering R30 and R31 via runtime.load_g. Change-Id: Ib2d98047fa1b4e72b8045767e86457a8ddfe492e Reviewed-on: https://go-review.googlesource.com/c/go/+/475935 Reviewed-by: Lynn Boger Reviewed-by: Ian Lance Taylor Run-TryBot: Paul Murphy Reviewed-by: Bryan Mills Reviewed-by: Archana Ravindar TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui --- src/runtime/cgo/abi_ppc64x.h | 161 +++++++++++++++++++++++++++++++++ src/runtime/cgo/gcc_ppc64x.c | 2 + src/runtime/sys_linux_ppc64x.s | 57 +++++++++--- 3 files changed, 206 insertions(+), 14 deletions(-) create mode 100644 src/runtime/cgo/abi_ppc64x.h diff --git a/src/runtime/cgo/abi_ppc64x.h b/src/runtime/cgo/abi_ppc64x.h new file mode 100644 index 00000000000..5982c857b34 --- /dev/null +++ b/src/runtime/cgo/abi_ppc64x.h @@ -0,0 +1,161 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Macros for transitioning from the host ABI to Go ABI +// +// On PPC64/ELFv2 targets, the following registers are callee +// saved when called from C. They must be preserved before +// calling into Go which does not preserve any of them. +// +// R14-R31 +// CR2-4 +// VR20-31 +// F14-F31 +// +// xcoff(aix) and ELFv1 are similar, but may only require a +// subset of these. +// +// These macros assume a 16 byte aligned stack pointer. This +// is required by ELFv1, ELFv2, and AIX PPC64. + +#define SAVE_GPR_SIZE (18*8) +#define SAVE_GPR(offset) \ + MOVD R14, (offset+8*0)(R1) \ + MOVD R15, (offset+8*1)(R1) \ + MOVD R16, (offset+8*2)(R1) \ + MOVD R17, (offset+8*3)(R1) \ + MOVD R18, (offset+8*4)(R1) \ + MOVD R19, (offset+8*5)(R1) \ + MOVD R20, (offset+8*6)(R1) \ + MOVD R21, (offset+8*7)(R1) \ + MOVD R22, (offset+8*8)(R1) \ + MOVD R23, (offset+8*9)(R1) \ + MOVD R24, (offset+8*10)(R1) \ + MOVD R25, (offset+8*11)(R1) \ + MOVD R26, (offset+8*12)(R1) \ + MOVD R27, (offset+8*13)(R1) \ + MOVD R28, (offset+8*14)(R1) \ + MOVD R29, (offset+8*15)(R1) \ + MOVD g, (offset+8*16)(R1) \ + MOVD R31, (offset+8*17)(R1) + +#define RESTORE_GPR(offset) \ + MOVD (offset+8*0)(R1), R14 \ + MOVD (offset+8*1)(R1), R15 \ + MOVD (offset+8*2)(R1), R16 \ + MOVD (offset+8*3)(R1), R17 \ + MOVD (offset+8*4)(R1), R18 \ + MOVD (offset+8*5)(R1), R19 \ + MOVD (offset+8*6)(R1), R20 \ + MOVD (offset+8*7)(R1), R21 \ + MOVD (offset+8*8)(R1), R22 \ + MOVD (offset+8*9)(R1), R23 \ + MOVD (offset+8*10)(R1), R24 \ + MOVD (offset+8*11)(R1), R25 \ + MOVD (offset+8*12)(R1), R26 \ + MOVD (offset+8*13)(R1), R27 \ + MOVD (offset+8*14)(R1), R28 \ + MOVD (offset+8*15)(R1), R29 \ + MOVD (offset+8*16)(R1), g \ + MOVD (offset+8*17)(R1), R31 + +#define SAVE_FPR_SIZE (18*8) +#define SAVE_FPR(offset) \ + FMOVD F14, (offset+8*0)(R1) \ + FMOVD F15, (offset+8*1)(R1) \ + FMOVD F16, (offset+8*2)(R1) \ + FMOVD F17, (offset+8*3)(R1) \ + FMOVD F18, (offset+8*4)(R1) \ + FMOVD F19, (offset+8*5)(R1) \ + FMOVD F20, (offset+8*6)(R1) \ + FMOVD F21, (offset+8*7)(R1) \ + FMOVD F22, (offset+8*8)(R1) \ + FMOVD F23, (offset+8*9)(R1) \ + FMOVD F24, (offset+8*10)(R1) \ + FMOVD F25, (offset+8*11)(R1) \ + FMOVD F26, (offset+8*12)(R1) \ + FMOVD F27, (offset+8*13)(R1) \ + FMOVD F28, (offset+8*14)(R1) \ + FMOVD F29, (offset+8*15)(R1) \ + FMOVD F30, (offset+8*16)(R1) \ + FMOVD F31, (offset+8*17)(R1) + +#define RESTORE_FPR(offset) \ + FMOVD (offset+8*0)(R1), F14 \ + FMOVD (offset+8*1)(R1), F15 \ + FMOVD (offset+8*2)(R1), F16 \ + FMOVD (offset+8*3)(R1), F17 \ + FMOVD (offset+8*4)(R1), F18 \ + FMOVD (offset+8*5)(R1), F19 \ + FMOVD (offset+8*6)(R1), F20 \ + FMOVD (offset+8*7)(R1), F21 \ + FMOVD (offset+8*8)(R1), F22 \ + FMOVD (offset+8*9)(R1), F23 \ + FMOVD (offset+8*10)(R1), F24 \ + FMOVD (offset+8*11)(R1), F25 \ + FMOVD (offset+8*12)(R1), F26 \ + FMOVD (offset+8*13)(R1), F27 \ + FMOVD (offset+8*14)(R1), F28 \ + FMOVD (offset+8*15)(R1), F29 \ + FMOVD (offset+8*16)(R1), F30 \ + FMOVD (offset+8*17)(R1), F31 + +// Save and restore VR20-31 (aka VSR56-63). These +// macros must point to a 16B aligned offset. +#define SAVE_VR_SIZE (12*16) +#define SAVE_VR(offset, rtmp) \ + MOVD $(offset), rtmp \ + STVX V20, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V21, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V22, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V23, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V24, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V25, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V26, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V27, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V28, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V29, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V30, (rtmp)(R1) \ + ADD $16, rtmp \ + STVX V31, (rtmp)(R1) + +#define RESTORE_VR(offset, rtmp) \ + MOVD $(offset), rtmp \ + LVX (rtmp)(R1), V20 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V21 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V22 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V23 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V24 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V25 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V26 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V27 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V28 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V29 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V30 \ + ADD $16, rtmp \ + LVX (rtmp)(R1), V31 + +// LR and CR are saved in the caller's frame. The callee must +// make space for all other callee-save registers. +#define SAVE_ALL_REG_SIZE (SAVE_GPR_SIZE+SAVE_FPR_SIZE+SAVE_VR_SIZE) diff --git a/src/runtime/cgo/gcc_ppc64x.c b/src/runtime/cgo/gcc_ppc64x.c index 7e34fe4e028..bfdcf650143 100644 --- a/src/runtime/cgo/gcc_ppc64x.c +++ b/src/runtime/cgo/gcc_ppc64x.c @@ -61,7 +61,9 @@ threadentry(void *v) ThreadStart ts; ts = *(ThreadStart*)v; + _cgo_tsan_acquire(); free(v); + _cgo_tsan_release(); // Save g for this thread in C TLS setg_gcc((void*)ts.g); diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s index d0427a4807f..492a02bd380 100644 --- a/src/runtime/sys_linux_ppc64x.s +++ b/src/runtime/sys_linux_ppc64x.s @@ -12,6 +12,7 @@ #include "go_tls.h" #include "textflag.h" #include "asm_ppc64x.h" +#include "cgo/abi_ppc64x.h" #define SYS_exit 1 #define SYS_read 3 @@ -633,11 +634,11 @@ TEXT sigtramp<>(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0 // The stack unwinder, presumably written in C, may not be able to // handle Go frame correctly. So, this function is NOFRAME, and we - // save/restore LR manually. + // save/restore LR manually, and obey ELFv2 calling conventions. MOVD LR, R10 - // We're coming from C code, initialize essential registers. - CALL runtime·reginit(SB) + // We're coming from C code, initialize R0 + MOVD $0, R0 // If no traceback function, do usual sigtramp. MOVD runtime·cgoTraceback(SB), R6 @@ -650,15 +651,18 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT|NOFRAME,$0 CMP $0, R6 BEQ sigtramp - // Set up g register. - CALL runtime·load_g(SB) + // Inspect the g in TLS without clobbering R30/R31 via runtime.load_g. + MOVD runtime·tls_g(SB), R9 + MOVD 0(R9), R9 // Figure out if we are currently in a cgo call. // If not, just do usual sigtramp. // compared to ARM64 and others. - CMP $0, g + CMP $0, R9 BEQ sigtrampnog // g == nil - MOVD g_m(g), R6 + + // g is not nil. Check further. + MOVD g_m(R9), R6 CMP $0, R6 BEQ sigtramp // g.m == nil MOVW m_ncgo(R6), R7 @@ -731,13 +735,38 @@ TEXT cgoSigtramp<>(SB),NOSPLIT,$0 JMP sigtramp<>(SB) #endif -TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT,$0 - // We're coming from C code, set up essential register, then call sigprofNonGo. - CALL runtime·reginit(SB) - MOVW R3, FIXED_FRAME+0(R1) // sig - MOVD R4, FIXED_FRAME+8(R1) // info - MOVD R5, FIXED_FRAME+16(R1) // ctx - CALL runtime·sigprofNonGo(SB) +// Used by cgoSigtramp to inspect without clobbering R30/R31 via runtime.load_g. +GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8 + +TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 + // This is called from C code. Callee save registers must be saved. + // R3,R4,R5 hold arguments. + // Save LR into R0 and stack a big frame. + MOVD LR, R0 + MOVD R0, 16(R1) + MOVW CR, R0 + MOVD R0, 8(R1) + // Don't save a back chain pointer when calling into Go. It will be overwritten. + // Go stores LR where ELF stores a back chain pointer. + ADD $-(32+SAVE_ALL_REG_SIZE), R1 + + SAVE_GPR(32) + SAVE_FPR(32+SAVE_GPR_SIZE) + SAVE_VR(32+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R6) + + MOVD $0, R0 + CALL runtime·sigprofNonGo(SB) + + RESTORE_GPR(32) + RESTORE_FPR(32+SAVE_GPR_SIZE) + RESTORE_VR(32+SAVE_GPR_SIZE+SAVE_FPR_SIZE, R6) + + // Clear frame, restore LR, return + ADD $(32+SAVE_ALL_REG_SIZE), R1 + MOVD 16(R1), R0 + MOVD R0, LR + MOVD 8(R1), R0 + MOVW R0, CR RET TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0