From 4d048194cd0323e1deffce96e88e8a672a08732d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Nov 2020 21:38:57 +0100 Subject: [PATCH] runtime: support new callbackasm1 calling convention on windows/arm This updates the callbacks implementation on windows/arm for the changes made in CL 258938. At the time, that was left as a TODO. At the same time, it also extends the previous support for only 4 arguments to also support additional arguments on the stack. This is required for functions like SetWinEventHook, which take 7 arguments. It does this by pushing r0-r3 onto the stack before the normal prologue, and then pointing the args struct to that location. This is derived from CL 270077 and CL 270078. Updates #40724. Fixes #42591. Change-Id: Icc199e7f2c24205e41be4e00015283c7e2a9b797 Reviewed-on: https://go-review.googlesource.com/c/go/+/271178 Run-TryBot: Jason A. Donenfeld Trust: Jason A. Donenfeld Reviewed-by: Austin Clements --- src/runtime/sys_windows_arm.s | 66 ++++++++++++++++------------------ src/runtime/syscall_windows.go | 29 +++++++++++---- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index 3fc6d27cb0..fe267080cc 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -314,48 +314,42 @@ TEXT runtime·externalthreadhandler(SB),NOSPLIT|NOFRAME,$0 GLOBL runtime·cbctxts(SB), NOPTR, $4 TEXT runtime·callbackasm1(SB),NOSPLIT|NOFRAME,$0 - // TODO(austin): This needs to be converted to match changes - // in cgocallback, but I have no way to test. See CL 258938, - // and callbackasm1 on amd64 and 386. - MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} - SUB $36, R13 // space for locals + // On entry, the trampoline in zcallback_windows_arm.s left + // the callback index in R12 (which is volatile in the C ABI). - // save callback arguments to stack. We currently support up to 4 arguments - ADD $16, R13, R4 - MOVM.IA [R0-R3], (R4) + // Push callback register arguments r0-r3. We do this first so + // they're contiguous with stack arguments. + MOVM.DB.W [R0-R3], (R13) + // Push C callee-save registers r4-r11 and lr. + MOVM.DB.W [R4-R11, R14], (R13) + SUB $(16 + callbackArgs__size), R13 // space for locals - // load cbctxts[i]. The trampoline in zcallback_windows.s puts the callback - // index in R12 - MOVW runtime·cbctxts(SB), R4 - MOVW R12<<2(R4), R4 // R4 holds pointer to wincallbackcontext structure - - // extract callback context - MOVW wincallbackcontext_argsize(R4), R5 - MOVW wincallbackcontext_gobody(R4), R4 - - // we currently support up to 4 arguments - CMP $(4 * 4), R5 - BL.GT runtime·abort(SB) - - // extend argsize by size of return value - ADD $4, R5 - - // Build 'type args struct' - MOVW R4, 4(R13) // fn - ADD $16, R13, R0 // arg (points to r0-r3, ret on stack) - MOVW R0, 8(R13) - MOVW R5, 12(R13) // argsize + // Create a struct callbackArgs on our stack. + MOVW R12, (16+callbackArgs_index)(R13) // callback index + MOVW $(16+callbackArgs__size+4*9)(R13), R0 + MOVW R0, (16+callbackArgs_args)(R13) // address of args vector + MOVW $0, R0 + MOVW R0, (16+callbackArgs_result)(R13) // result + // Prepare for entry to Go. BL runtime·load_g(SB) - BL runtime·cgocallback_gofunc(SB) - ADD $16, R13, R0 // load arg - MOVW 12(R13), R1 // load argsize - SUB $4, R1 // offset to return value - MOVW R1<<0(R0), R0 // load return value + // Call cgocallback, which will call callbackWrap(frame). + MOVW $0, R0 + MOVW R0, 12(R13) // context + MOVW $16(R13), R1 // R1 = &callbackArgs{...} + MOVW R1, 8(R13) // frame (address of callbackArgs) + MOVW $·callbackWrap(SB), R1 + MOVW R1, 4(R13) // PC of function to call + BL runtime·cgocallback(SB) - ADD $36, R13 // free locals - MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc} + // Get callback result. + MOVW (16+callbackArgs_result)(R13), R0 + + ADD $(16 + callbackArgs__size), R13 // free locals + MOVM.IA.W (R13), [R4-R11, R12] // pop {r4-r11, lr=>r12} + ADD $(4*4), R13 // skip r0-r3 + B (R12) // return // uint32 tstart_stdcall(M *newm); TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0 diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 21f2452b5a..7835b492f7 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -109,14 +109,19 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { // passed as two words (little endian); and // structs are pushed on the stack. In // fastcall, arguments larger than the word - // size are passed by reference. + // size are passed by reference. On arm, + // 8-byte aligned arguments round up to the + // next even register and can be split across + // registers and the stack. panic("compileCallback: argument size is larger than uintptr") } - if k := t.kind & kindMask; GOARCH == "amd64" && (k == kindFloat32 || k == kindFloat64) { + if k := t.kind & kindMask; (GOARCH == "amd64" || GOARCH == "arm") && (k == kindFloat32 || k == kindFloat64) { // In fastcall, floating-point arguments in // the first four positions are passed in // floating-point registers, which we don't - // currently spill. + // currently spill. arm passes floating-point + // arguments in VFP registers, which we also + // don't support. panic("compileCallback: float arguments not supported") } @@ -128,6 +133,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { // argument word and all supported Windows // architectures are little endian, so src is already // pointing to the right place for smaller arguments. + // The same is true on arm. // Copy just the size of the argument. Note that this // could be a small by-value struct, but C and Go @@ -139,7 +145,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { abiMap = append(abiMap, part) } - // cdecl, stdcall, and fastcall pad arguments to word size. + // cdecl, stdcall, fastcall, and arm pad arguments to word size. src += sys.PtrSize // The Go ABI packs arguments. dst += t.size @@ -205,7 +211,18 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { type callbackArgs struct { index uintptr - args unsafe.Pointer // Arguments in stdcall/cdecl convention, with registers spilled + // args points to the argument block. + // + // For cdecl and stdcall, all arguments are on the stack. + // + // For fastcall, the trampoline spills register arguments to + // the reserved spill slots below the stack arguments, + // resulting in a layout equivalent to stdcall. + // + // For arm, the trampoline stores the register arguments just + // below the stack arguments, so again we can treat it as one + // big stack arguments frame. + args unsafe.Pointer // Below are out-args from callbackWrap result uintptr retPop uintptr // For 386 cdecl, how many bytes to pop on return @@ -216,7 +233,7 @@ func callbackWrap(a *callbackArgs) { c := cbs.ctxt[a.index] a.retPop = c.retPop - // Convert from stdcall to Go ABI. + // Convert from C to Go ABI. var frame [callbackMaxFrame]byte goArgs := unsafe.Pointer(&frame) for _, part := range c.abiMap {