diff --git a/src/runtime/defs_windows_arm64.go b/src/runtime/defs_windows_arm64.go new file mode 100644 index 0000000000..9ccce46f09 --- /dev/null +++ b/src/runtime/defs_windows_arm64.go @@ -0,0 +1,83 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +// NOTE(rsc): _CONTEXT_CONTROL is actually 0x400001 and should include PC, SP, and LR. +// However, empirically, LR doesn't come along on Windows 10 +// unless you also set _CONTEXT_INTEGER (0x400002). +// Without LR, we skip over the next-to-bottom function in profiles +// when the bottom function is frameless. +// So we set both here, to make a working _CONTEXT_CONTROL. +const _CONTEXT_CONTROL = 0x400003 + +type neon128 struct { + low uint64 + high int64 +} + +// See https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-arm64_nt_context +type context struct { + contextflags uint32 + cpsr uint32 + x [31]uint64 // fp is x[29], lr is x[30] + xsp uint64 + pc uint64 + v [32]neon128 + fpcr uint32 + fpsr uint32 + bcr [8]uint32 + bvr [8]uint64 + wcr [2]uint32 + wvr [2]uint64 +} + +func (c *context) ip() uintptr { return uintptr(c.pc) } +func (c *context) sp() uintptr { return uintptr(c.xsp) } +func (c *context) lr() uintptr { return uintptr(c.x[30]) } + +func (c *context) set_ip(x uintptr) { c.pc = uint64(x) } +func (c *context) set_sp(x uintptr) { c.xsp = uint64(x) } +func (c *context) set_lr(x uintptr) { c.x[30] = uint64(x) } + +func dumpregs(r *context) { + print("r0 ", hex(r.x[0]), "\n") + print("r1 ", hex(r.x[1]), "\n") + print("r2 ", hex(r.x[2]), "\n") + print("r3 ", hex(r.x[3]), "\n") + print("r4 ", hex(r.x[4]), "\n") + print("r5 ", hex(r.x[5]), "\n") + print("r6 ", hex(r.x[6]), "\n") + print("r7 ", hex(r.x[7]), "\n") + print("r8 ", hex(r.x[8]), "\n") + print("r9 ", hex(r.x[9]), "\n") + print("r10 ", hex(r.x[10]), "\n") + print("r11 ", hex(r.x[11]), "\n") + print("r12 ", hex(r.x[12]), "\n") + print("r13 ", hex(r.x[13]), "\n") + print("r14 ", hex(r.x[14]), "\n") + print("r15 ", hex(r.x[15]), "\n") + print("r16 ", hex(r.x[16]), "\n") + print("r17 ", hex(r.x[17]), "\n") + print("r18 ", hex(r.x[18]), "\n") + print("r19 ", hex(r.x[19]), "\n") + print("r20 ", hex(r.x[20]), "\n") + print("r21 ", hex(r.x[21]), "\n") + print("r22 ", hex(r.x[22]), "\n") + print("r23 ", hex(r.x[23]), "\n") + print("r24 ", hex(r.x[24]), "\n") + print("r25 ", hex(r.x[25]), "\n") + print("r26 ", hex(r.x[26]), "\n") + print("r27 ", hex(r.x[27]), "\n") + print("r28 ", hex(r.x[28]), "\n") + print("r29 ", hex(r.x[29]), "\n") + print("lr ", hex(r.x[30]), "\n") + print("sp ", hex(r.xsp), "\n") + print("pc ", hex(r.pc), "\n") + print("cpsr ", hex(r.cpsr), "\n") +} + +func stackcheck() { + // TODO: not implemented on ARM +} diff --git a/src/runtime/os_windows_arm64.go b/src/runtime/os_windows_arm64.go new file mode 100644 index 0000000000..7e413445ba --- /dev/null +++ b/src/runtime/os_windows_arm64.go @@ -0,0 +1,14 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import "unsafe" + +//go:nosplit +func cputicks() int64 { + var counter int64 + stdcall1(_QueryPerformanceCounter, uintptr(unsafe.Pointer(&counter))) + return counter +} diff --git a/src/runtime/rt0_windows_arm64.s b/src/runtime/rt0_windows_arm64.s new file mode 100644 index 0000000000..1e71a068d3 --- /dev/null +++ b/src/runtime/rt0_windows_arm64.s @@ -0,0 +1,12 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "go_tls.h" +#include "textflag.h" + +// This is the entry point for the program from the +// kernel for an ordinary -buildmode=exe program. +TEXT _rt0_arm64_windows(SB),NOSPLIT|NOFRAME,$0 + B ·rt0_go(SB) diff --git a/src/runtime/sys_windows_arm64.s b/src/runtime/sys_windows_arm64.s new file mode 100644 index 0000000000..b279f25de8 --- /dev/null +++ b/src/runtime/sys_windows_arm64.s @@ -0,0 +1,588 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "go_tls.h" +#include "textflag.h" + +#ifdef NOT_PORTED + +// Note: For system ABI, R0-R3 are args, R4-R11 are callee-save. + +// void runtime·asmstdcall(void *c); +TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R4, R5, R14], (R13) // push {r4, r5, lr} + MOVW R0, R4 // put libcall * in r4 + MOVW R13, R5 // save stack pointer in r5 + + // SetLastError(0) + MOVW $0, R0 + MRC 15, 0, R1, C13, C0, 2 + MOVW R0, 0x34(R1) + + MOVW 8(R4), R12 // libcall->args + + // Do we have more than 4 arguments? + MOVW 4(R4), R0 // libcall->n + SUB.S $4, R0, R2 + BLE loadregs + + // Reserve stack space for remaining args + SUB R2<<2, R13 + BIC $0x7, R13 // alignment for ABI + + // R0: count of arguments + // R1: + // R2: loop counter, from 0 to (n-4) + // R3: scratch + // R4: pointer to libcall struct + // R12: libcall->args + MOVW $0, R2 +stackargs: + ADD $4, R2, R3 // r3 = args[4 + i] + MOVW R3<<2(R12), R3 + MOVW R3, R2<<2(R13) // stack[i] = r3 + + ADD $1, R2 // i++ + SUB $4, R0, R3 // while (i < (n - 4)) + CMP R3, R2 + BLT stackargs + +loadregs: + CMP $3, R0 + MOVW.GT 12(R12), R3 + + CMP $2, R0 + MOVW.GT 8(R12), R2 + + CMP $1, R0 + MOVW.GT 4(R12), R1 + + CMP $0, R0 + MOVW.GT 0(R12), R0 + + BIC $0x7, R13 // alignment for ABI + MOVW 0(R4), R12 // branch to libcall->fn + BL (R12) + + MOVW R5, R13 // free stack space + MOVW R0, 12(R4) // save return value to libcall->r1 + MOVW R1, 16(R4) + + // GetLastError + MRC 15, 0, R1, C13, C0, 2 + MOVW 0x34(R1), R0 + MOVW R0, 20(R4) // store in libcall->err + + MOVM.IA.W (R13), [R4, R5, R15] + +TEXT runtime·badsignal2(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R4, R14], (R13) // push {r4, lr} + MOVW R13, R4 // save original stack pointer + SUB $8, R13 // space for 2 variables + BIC $0x7, R13 // alignment for ABI + + // stderr + MOVW runtime·_GetStdHandle(SB), R1 + MOVW $-12, R0 + BL (R1) + + MOVW $runtime·badsignalmsg(SB), R1 // lpBuffer + MOVW $runtime·badsignallen(SB), R2 // lpNumberOfBytesToWrite + MOVW (R2), R2 + ADD $0x4, R13, R3 // lpNumberOfBytesWritten + MOVW $0, R12 // lpOverlapped + MOVW R12, (R13) + + MOVW runtime·_WriteFile(SB), R12 + BL (R12) + + MOVW R4, R13 // restore SP + MOVM.IA.W (R13), [R4, R15] // pop {r4, pc} + +TEXT runtime·getlasterror(SB),NOSPLIT,$0 + MRC 15, 0, R0, C13, C0, 2 + MOVW 0x34(R0), R0 + MOVW R0, ret+0(FP) + RET + +// Called by Windows as a Vectored Exception Handler (VEH). +// First argument is pointer to struct containing +// exception record and context pointers. +// Handler function is stored in R1 +// Return 0 for 'not handled', -1 for handled. +// int32_t sigtramp( +// PEXCEPTION_POINTERS ExceptionInfo, +// func *GoExceptionHandler); +TEXT sigtramp<>(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R0, R4-R11, R14], (R13) // push {r0, r4-r11, lr} (SP-=40) + SUB $(8+20), R13 // reserve space for g, sp, and + // parameters/retval to go call + + MOVW R0, R6 // Save param0 + MOVW R1, R7 // Save param1 + + BL runtime·load_g(SB) + CMP $0, g // is there a current g? + BL.EQ runtime·badsignal2(SB) + + // save g and SP in case of stack switch + MOVW R13, 24(R13) + MOVW g, 20(R13) + + // do we need to switch to the g0 stack? + MOVW g, R5 // R5 = g + MOVW g_m(R5), R2 // R2 = m + MOVW m_g0(R2), R4 // R4 = g0 + CMP R5, R4 // if curg == g0 + BEQ g0 + + // switch to g0 stack + MOVW R4, g // g = g0 + MOVW (g_sched+gobuf_sp)(g), R3 // R3 = g->gobuf.sp + BL runtime·save_g(SB) + + // make room for sighandler arguments + // and re-save old SP for restoring later. + // (note that the 24(R3) here must match the 24(R13) above.) + SUB $40, R3 + MOVW R13, 24(R3) // save old stack pointer + MOVW R3, R13 // switch stack + +g0: + MOVW 0(R6), R2 // R2 = ExceptionPointers->ExceptionRecord + MOVW 4(R6), R3 // R3 = ExceptionPointers->ContextRecord + + MOVW $0, R4 + MOVW R4, 0(R13) // No saved link register. + MOVW R2, 4(R13) // Move arg0 (ExceptionRecord) into position + MOVW R3, 8(R13) // Move arg1 (ContextRecord) into position + MOVW R5, 12(R13) // Move arg2 (original g) into position + BL (R7) // Call the go routine + MOVW 16(R13), R4 // Fetch return value from stack + + // switch back to original stack and g + MOVW 24(R13), R13 + MOVW 20(R13), g + BL runtime·save_g(SB) + +done: + MOVW R4, R0 // move retval into position + ADD $(8 + 20), R13 // free locals + MOVM.IA.W (R13), [R3, R4-R11, R14] // pop {r3, r4-r11, lr} + + // if return value is CONTINUE_SEARCH, do not set up control + // flow guard workaround + CMP $0, R0 + BEQ return + + // Check if we need to set up the control flow guard workaround. + // On Windows/ARM, the stack pointer must lie within system + // stack limits when we resume from exception. + // Store the resume SP and PC on the g0 stack, + // and return to returntramp on the g0 stack. returntramp + // pops the saved PC and SP from the g0 stack, resuming execution + // at the desired location. + // If returntramp has already been set up by a previous exception + // handler, don't clobber the stored SP and PC on the stack. + MOVW 4(R3), R3 // PEXCEPTION_POINTERS->Context + MOVW context_pc(R3), R2 // load PC from context record + MOVW $returntramp<>(SB), R1 + CMP R1, R2 + B.EQ return // do not clobber saved SP/PC + + // Save resume SP and PC into R0, R1. + MOVW context_spr(R3), R2 + MOVW R2, context_r0(R3) + MOVW context_pc(R3), R2 + MOVW R2, context_r1(R3) + + // Set up context record to return to returntramp on g0 stack + MOVW R12, context_spr(R3) + MOVW $returntramp<>(SB), R2 + MOVW R2, context_pc(R3) + +return: + B (R14) // return + +// Trampoline to resume execution from exception handler. +// This is part of the control flow guard workaround. +// It switches stacks and jumps to the continuation address. +// R0 and R1 are set above at the end of sigtramp<> +// in the context that starts executing at returntramp<>. +TEXT returntramp<>(SB),NOSPLIT|NOFRAME,$0 + // Important: do not smash LR, + // which is set to a live value when handling + // a signal by pushing a call to sigpanic onto the stack. + MOVW R0, R13 + B (R1) + +TEXT runtime·exceptiontramp(SB),NOSPLIT|NOFRAME,$0 + MOVW $runtime·exceptionhandler(SB), R1 + B sigtramp<>(SB) + +TEXT runtime·firstcontinuetramp(SB),NOSPLIT|NOFRAME,$0 + MOVW $runtime·firstcontinuehandler(SB), R1 + B sigtramp<>(SB) + +TEXT runtime·lastcontinuetramp(SB),NOSPLIT|NOFRAME,$0 + MOVW $runtime·lastcontinuehandler(SB), R1 + B sigtramp<>(SB) + +TEXT runtime·ctrlhandler(SB),NOSPLIT|NOFRAME,$0 + MOVW $runtime·ctrlhandler1(SB), R1 + B runtime·externalthreadhandler(SB) + +TEXT runtime·profileloop(SB),NOSPLIT|NOFRAME,$0 + MOVW $runtime·profileloop1(SB), R1 + B runtime·externalthreadhandler(SB) + +// int32 externalthreadhandler(uint32 arg, int (*func)(uint32)) +// stack layout: +// +----------------+ +// | callee-save | +// | registers | +// +----------------+ +// | m | +// +----------------+ +// 20| g | +// +----------------+ +// 16| func ptr (r1) | +// +----------------+ +// 12| argument (r0) | +//---+----------------+ +// 8 | param1 | (also return value for called Go function) +// +----------------+ +// 4 | param0 | +// +----------------+ +// 0 | slot for LR | +// +----------------+ +// +TEXT runtime·externalthreadhandler(SB),NOSPLIT|NOFRAME|TOPFRAME,$0 + MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} + SUB $(m__size + g__size + 20), R13 // space for locals + MOVW R14, 0(R13) // push LR again for anything unwinding the stack + MOVW R0, 12(R13) + MOVW R1, 16(R13) + + // zero out m and g structures + ADD $20, R13, R0 // compute pointer to g + MOVW R0, 4(R13) + MOVW $(m__size + g__size), R0 + MOVW R0, 8(R13) + BL runtime·memclrNoHeapPointers(SB) + + // initialize m and g structures + ADD $20, R13, R2 // R2 = g + ADD $(20 + g__size), R13, R3 // R3 = m + MOVW R2, m_g0(R3) // m->g0 = g + MOVW R3, g_m(R2) // g->m = m + MOVW R2, m_curg(R3) // m->curg = g + + MOVW R2, g + BL runtime·save_g(SB) + + // set up stackguard stuff + MOVW R13, R0 + MOVW R0, g_stack+stack_hi(g) + SUB $(32*1024), R0 + MOVW R0, (g_stack+stack_lo)(g) + MOVW R0, g_stackguard0(g) + MOVW R0, g_stackguard1(g) + + // move argument into position and call function + MOVW 12(R13), R0 + MOVW R0, 4(R13) + MOVW 16(R13), R1 + BL (R1) + + // clear g + MOVW $0, g + BL runtime·save_g(SB) + + MOVW 8(R13), R0 // load return value + ADD $(m__size + g__size + 20), R13 // free locals + MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc} + +GLOBL runtime·cbctxts(SB), NOPTR, $4 + +TEXT runtime·callbackasm1(SB),NOSPLIT|NOFRAME,$0 + // On entry, the trampoline in zcallback_windows_arm.s left + // the callback index in R12 (which is volatile in the C ABI). + + // Push callback register arguments r0-r3. We do this first so + // they're contiguous with stack arguments. + MOVM.DB.W [R0-R3], (R13) + // Push C callee-save registers r4-r11 and lr. + MOVM.DB.W [R4-R11, R14], (R13) + SUB $(16 + callbackArgs__size), R13 // space for locals + + // Create a struct callbackArgs on our stack. + MOVW R12, (16+callbackArgs_index)(R13) // callback index + MOVW $(16+callbackArgs__size+4*9)(R13), R0 + MOVW R0, (16+callbackArgs_args)(R13) // address of args vector + MOVW $0, R0 + MOVW R0, (16+callbackArgs_result)(R13) // result + + // Prepare for entry to Go. + BL runtime·load_g(SB) + + // Call cgocallback, which will call callbackWrap(frame). + MOVW $0, R0 + MOVW R0, 12(R13) // context + MOVW $16(R13), R1 // R1 = &callbackArgs{...} + MOVW R1, 8(R13) // frame (address of callbackArgs) + MOVW $·callbackWrap(SB), R1 + MOVW R1, 4(R13) // PC of function to call + BL runtime·cgocallback(SB) + + // Get callback result. + MOVW (16+callbackArgs_result)(R13), R0 + + ADD $(16 + callbackArgs__size), R13 // free locals + MOVM.IA.W (R13), [R4-R11, R12] // pop {r4-r11, lr=>r12} + ADD $(4*4), R13 // skip r0-r3 + B (R12) // return + +// uint32 tstart_stdcall(M *newm); +TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr} + + MOVW m_g0(R0), g + MOVW R0, g_m(g) + BL runtime·save_g(SB) + + // Layout new m scheduler stack on os stack. + MOVW R13, R0 + MOVW R0, g_stack+stack_hi(g) + SUB $(64*1024), R0 + MOVW R0, (g_stack+stack_lo)(g) + MOVW R0, g_stackguard0(g) + MOVW R0, g_stackguard1(g) + + BL runtime·emptyfunc(SB) // fault if stack check is wrong + BL runtime·mstart(SB) + + // Exit the thread. + MOVW $0, R0 + MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc} + +// Runs on OS stack. +// duration (in -100ns units) is in dt+0(FP). +// g may be nil. +TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$0-4 + MOVW dt+0(FP), R0 + MOVM.DB.W [R4, R14], (R13) // push {r4, lr} + MOVW R13, R4 // Save SP + SUB $8, R13 // R13 = R13 - 8 + BIC $0x7, R13 // Align SP for ABI + RSB $0, R0, R3 // R3 = -R0 + MOVW $0, R1 // R1 = FALSE (alertable) + MOVW $-1, R0 // R0 = handle + MOVW R13, R2 // R2 = pTime + MOVW R3, 0(R2) // time_lo + MOVW R0, 4(R2) // time_hi + MOVW runtime·_NtWaitForSingleObject(SB), R3 + BL (R3) + MOVW R4, R13 // Restore SP + MOVM.IA.W (R13), [R4, R15] // pop {R4, pc} + +// Runs on OS stack. +// duration (in -100ns units) is in dt+0(FP). +// g is valid. +// TODO: neeeds to be implemented properly. +TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$0-4 + B runtime·abort(SB) + +// Runs on OS stack. +TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R4, R14], (R13) // push {R4, lr} + MOVW R13, R4 + BIC $0x7, R13 // alignment for ABI + MOVW runtime·_SwitchToThread(SB), R0 + BL (R0) + MOVW R4, R13 // restore stack pointer + MOVM.IA.W (R13), [R4, R15] // pop {R4, pc} + +TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0 + B runtime·armPublicationBarrier(SB) + +// never called (cgo not supported) +TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0 + MOVW $0xabcd, R0 + MOVW R0, (R0) + RET + +// See http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/ +// Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2. +#define _INTERRUPT_TIME 0x7ffe0008 +#define _SYSTEM_TIME 0x7ffe0014 +#define time_lo 0 +#define time_hi1 4 +#define time_hi2 8 + +TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 + MOVW $0, R0 + MOVB runtime·useQPCTime(SB), R0 + CMP $0, R0 + BNE useQPC + MOVW $_INTERRUPT_TIME, R3 +loop: + MOVW time_hi1(R3), R1 + MOVW time_lo(R3), R0 + MOVW time_hi2(R3), R2 + CMP R1, R2 + BNE loop + + // wintime = R1:R0, multiply by 100 + MOVW $100, R2 + MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 + MULA R1, R2, R4, R4 + + // wintime*100 = R4:R3 + MOVW R3, ret_lo+0(FP) + MOVW R4, ret_hi+4(FP) + RET +useQPC: + B runtime·nanotimeQPC(SB) // tail call + +TEXT time·now(SB),NOSPLIT|NOFRAME,$0-20 + MOVW $0, R0 + MOVB runtime·useQPCTime(SB), R0 + CMP $0, R0 + BNE useQPC + MOVW $_INTERRUPT_TIME, R3 +loop: + MOVW time_hi1(R3), R1 + MOVW time_lo(R3), R0 + MOVW time_hi2(R3), R2 + CMP R1, R2 + BNE loop + + // wintime = R1:R0, multiply by 100 + MOVW $100, R2 + MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 + MULA R1, R2, R4, R4 + + // wintime*100 = R4:R3 + MOVW R3, mono+12(FP) + MOVW R4, mono+16(FP) + + MOVW $_SYSTEM_TIME, R3 +wall: + MOVW time_hi1(R3), R1 + MOVW time_lo(R3), R0 + MOVW time_hi2(R3), R2 + CMP R1, R2 + BNE wall + + // w = R1:R0 in 100ns untis + // convert to Unix epoch (but still 100ns units) + #define delta 116444736000000000 + SUB.S $(delta & 0xFFFFFFFF), R0 + SBC $(delta >> 32), R1 + + // Convert to nSec + MOVW $100, R2 + MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2 + MULA R1, R2, R4, R4 + // w = R2:R1 in nSec + MOVW R3, R1 // R4:R3 -> R2:R1 + MOVW R4, R2 + + // multiply nanoseconds by reciprocal of 10**9 (scaled by 2**61) + // to get seconds (96 bit scaled result) + MOVW $0x89705f41, R3 // 2**61 * 10**-9 + MULLU R1,R3,(R6,R5) // R7:R6:R5 = R2:R1 * R3 + MOVW $0,R7 + MULALU R2,R3,(R7,R6) + + // unscale by discarding low 32 bits, shifting the rest by 29 + MOVW R6>>29,R6 // R7:R6 = (R7:R6:R5 >> 61) + ORR R7<<3,R6 + MOVW R7>>29,R7 + + // subtract (10**9 * sec) from nsec to get nanosecond remainder + MOVW $1000000000, R5 // 10**9 + MULLU R6,R5,(R9,R8) // R9:R8 = R7:R6 * R5 + MULA R7,R5,R9,R9 + SUB.S R8,R1 // R2:R1 -= R9:R8 + SBC R9,R2 + + // because reciprocal was a truncated repeating fraction, quotient + // may be slightly too small -- adjust to make remainder < 10**9 + CMP R5,R1 // if remainder > 10**9 + SUB.HS R5,R1 // remainder -= 10**9 + ADD.HS $1,R6 // sec += 1 + + MOVW R6,sec_lo+0(FP) + MOVW R7,sec_hi+4(FP) + MOVW R1,nsec+8(FP) + RET +useQPC: + B runtime·nowQPC(SB) // tail call + +// save_g saves the g register (R10) into thread local memory +// so that we can call externally compiled +// ARM code that will overwrite those registers. +// NOTE: runtime.gogo assumes that R1 is preserved by this function. +// runtime.mcall assumes this function only clobbers R0 and R11. +// Returns with g in R0. +// Save the value in the _TEB->TlsSlots array. +// Effectively implements TlsSetValue(). +// tls_g stores the TLS slot allocated TlsAlloc(). +TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0 + MRC 15, 0, R0, C13, C0, 2 + ADD $0xe10, R0 + MOVW $runtime·tls_g(SB), R11 + MOVW (R11), R11 + MOVW g, R11<<2(R0) + MOVW g, R0 // preserve R0 across call to setg<> + RET + +// load_g loads the g register from thread-local memory, +// for use after calling externally compiled +// ARM code that overwrote those registers. +// Get the value from the _TEB->TlsSlots array. +// Effectively implements TlsGetValue(). +TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0 + MRC 15, 0, R0, C13, C0, 2 + ADD $0xe10, R0 + MOVW $runtime·tls_g(SB), g + MOVW (g), g + MOVW g<<2(R0), g + RET + +// This is called from rt0_go, which runs on the system stack +// using the initial stack allocated by the OS. +// It calls back into standard C using the BL below. +// To do that, the stack pointer must be 8-byte-aligned. +TEXT runtime·_initcgo(SB),NOSPLIT|NOFRAME,$0 + MOVM.DB.W [R4, R14], (R13) // push {r4, lr} + + // Ensure stack is 8-byte aligned before calling C code + MOVW R13, R4 + BIC $0x7, R13 + + // Allocate a TLS slot to hold g across calls to external code + MOVW $runtime·_TlsAlloc(SB), R0 + MOVW (R0), R0 + BL (R0) + + // Assert that slot is less than 64 so we can use _TEB->TlsSlots + CMP $64, R0 + MOVW $runtime·abort(SB), R1 + BL.GE (R1) + + // Save Slot into tls_g + MOVW $runtime·tls_g(SB), R1 + MOVW R0, (R1) + + MOVW R4, R13 + MOVM.IA.W (R13), [R4, R15] // pop {r4, pc} + +// Holds the TLS Slot, which was allocated by TlsAlloc() +GLOBL runtime·tls_g+0(SB), NOPTR, $4 + +#endif