1
0
mirror of https://github.com/golang/go synced 2024-10-04 20:31:22 -06:00
go/src/pkg/runtime/sys_linux_arm.s
Elias Naur 45233734e2 runtime.cmd/ld: Add ARM external linking and implement -shared in terms of external linking
This CL is an aggregate of 10271047, 10499043, 9733044. Descriptions of each follow:

10499043
runtime,cmd/ld: Merge TLS symbols and teach 5l about ARM TLS

This CL prepares for external linking support to ARM.

The pseudo-symbols runtime.g and runtime.m are merged into a single
runtime.tlsgm symbol. When external linking, the offset of a thread local
variable is stored at a memory location instead of being embedded into a offset
of a ldr instruction. With a single runtime.tlsgm symbol for both g and m, only
one such offset is needed.

The larger part of this CL moves TLS code from gcc compiled to internally
compiled. The TLS code now uses the modern MRC instruction, and 5l is taught
about TLS fallbacks in case the instruction is not available or appropriate.

10271047
This CL adds support for -linkmode external to 5l.

For 5l itself, use addrel to allow for D_CALL relocations to be handled by the
host linker. Of the cases listed in rsc's comment in issue 4069, only case 5 and
63 needed an update. One of the TODO: addrel cases was since replaced, and the
rest of the cases are either covered by indirection through addpool (cases with
LTO or LFROM flags) or stubs (case 74). The addpool cases are covered because
addpool emits AWORD instructions, which in turn are handled by case 11.

In the runtime, change the argv argument in the rt0* functions slightly to be a
pointer to the argv list, instead of relying on a particular location of argv.

9733044
The -shared flag to 6l outputs a shared library, implemented in Go
and callable from non-Go programs such as C.

The main part of this CL change the thread local storage model.
Go uses the fastest and least general mode, local exec. TLS data in shared
libraries normally requires at least the local dynamic mode, however, this CL
instead opts for using the initial exec mode. Initial exec mode is faster than
local dynamic mode and can be used in linux since the linker has reserved a
limited amount of TLS space for performance sensitive TLS code.

Initial exec mode requires an extra load from the GOT table to determine the
TLS offset. This penalty will not be paid if ld is not in -shared mode, since
TLS accesses will be reduced to local exec.

The elf sections .init_array and .rela.init_array are added to register the Go
runtime entry with cgo at library load time.

The "hidden" attribute is added to Cgo functions called from Go, since Go
does not generate call through the GOT table, and adding non-GOT relocations for
a global function is not supported by gcc. Cgo symbols don't need to be global
and avoiding the GOT table is also faster.

The changes to 8l are only removes code relevant to the old -shared mode where
internal linking was used.

This CL only address the low level linker work. It can be submitted by itself,
but to be useful, the runtime changes in CL 9738047 is also needed.

Design discussion at
https://groups.google.com/forum/?fromgroups#!topic/golang-nuts/zmjXkGrEx6Q

Fixes #5590.

R=rsc
CC=golang-dev
https://golang.org/cl/12871044
2013-08-14 15:38:54 +00:00

449 lines
8.6 KiB
ArmAsm

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// System calls and other sys.stuff for arm, Linux
//
#include "zasm_GOOS_GOARCH.h"
#include "../../cmd/ld/textflag.h"
// for EABI, as we don't support OABI
#define SYS_BASE 0x0
#define SYS_exit (SYS_BASE + 1)
#define SYS_read (SYS_BASE + 3)
#define SYS_write (SYS_BASE + 4)
#define SYS_open (SYS_BASE + 5)
#define SYS_close (SYS_BASE + 6)
#define SYS_gettimeofday (SYS_BASE + 78)
#define SYS_clone (SYS_BASE + 120)
#define SYS_rt_sigreturn (SYS_BASE + 173)
#define SYS_rt_sigaction (SYS_BASE + 174)
#define SYS_rt_sigprocmask (SYS_BASE + 175)
#define SYS_sigaltstack (SYS_BASE + 186)
#define SYS_mmap2 (SYS_BASE + 192)
#define SYS_futex (SYS_BASE + 240)
#define SYS_exit_group (SYS_BASE + 248)
#define SYS_munmap (SYS_BASE + 91)
#define SYS_madvise (SYS_BASE + 220)
#define SYS_setitimer (SYS_BASE + 104)
#define SYS_mincore (SYS_BASE + 219)
#define SYS_gettid (SYS_BASE + 224)
#define SYS_tkill (SYS_BASE + 238)
#define SYS_sched_yield (SYS_BASE + 158)
#define SYS_select (SYS_BASE + 142) // newselect
#define SYS_ugetrlimit (SYS_BASE + 191)
#define SYS_sched_getaffinity (SYS_BASE + 242)
#define SYS_clock_gettime (SYS_BASE + 263)
#define SYS_epoll_create (SYS_BASE + 250)
#define SYS_epoll_ctl (SYS_BASE + 251)
#define SYS_epoll_wait (SYS_BASE + 252)
#define SYS_epoll_create1 (SYS_BASE + 357)
#define SYS_fcntl (SYS_BASE + 55)
#define ARM_BASE (SYS_BASE + 0x0f0000)
TEXT runtime·open(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_open, R7
SWI $0
RET
TEXT runtime·close(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW $SYS_close, R7
SWI $0
RET
TEXT runtime·write(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_write, R7
SWI $0
RET
TEXT runtime·read(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_read, R7
SWI $0
RET
TEXT runtime·getrlimit(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW $SYS_ugetrlimit, R7
SWI $0
RET
TEXT runtime·exit(SB),NOSPLIT,$-4
MOVW 0(FP), R0
MOVW $SYS_exit_group, R7
SWI $0
MOVW $1234, R0
MOVW $1002, R1
MOVW R0, (R1) // fail hard
TEXT runtime·exit1(SB),NOSPLIT,$-4
MOVW 0(FP), R0
MOVW $SYS_exit, R7
SWI $0
MOVW $1234, R0
MOVW $1003, R1
MOVW R0, (R1) // fail hard
TEXT runtime·raise(SB),NOSPLIT,$-4
MOVW $SYS_gettid, R7
SWI $0
// arg 1 tid already in R0 from gettid
MOVW sig+0(FP), R1 // arg 2 - signal
MOVW $SYS_tkill, R7
SWI $0
RET
TEXT runtime·mmap(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW 12(FP), R3
MOVW 16(FP), R4
MOVW 20(FP), R5
MOVW $SYS_mmap2, R7
SWI $0
MOVW $0xfffff001, R6
CMP R6, R0
RSB.HI $0, R0
RET
TEXT runtime·munmap(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW $SYS_munmap, R7
SWI $0
MOVW $0xfffff001, R6
CMP R6, R0
MOVW.HI $0, R8 // crash on syscall failure
MOVW.HI R8, (R8)
RET
TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_madvise, R7
SWI $0
// ignore failure - maybe pages are locked
RET
TEXT runtime·setitimer(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_setitimer, R7
SWI $0
RET
TEXT runtime·mincore(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_mincore, R7
SWI $0
RET
TEXT time·now(SB), NOSPLIT, $32
MOVW $0, R0 // CLOCK_REALTIME
MOVW $8(R13), R1 // timespec
MOVW $SYS_clock_gettime, R7
SWI $0
MOVW 8(R13), R0 // sec
MOVW 12(R13), R2 // nsec
MOVW R0, 0(FP)
MOVW $0, R1
MOVW R1, 4(FP)
MOVW R2, 8(FP)
RET
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
TEXT runtime·nanotime(SB),NOSPLIT,$32
MOVW $0, R0 // CLOCK_REALTIME
MOVW $8(R13), R1 // timespec
MOVW $SYS_clock_gettime, R7
SWI $0
MOVW 8(R13), R0 // sec
MOVW 12(R13), R2 // nsec
MOVW $1000000000, R3
MULLU R0, R3, (R1, R0)
MOVW $0, R4
ADD.S R2, R0
ADC R4, R1
MOVW 0(FP), R3
MOVW R0, 0(R3)
MOVW R1, 4(R3)
RET
// int32 futex(int32 *uaddr, int32 op, int32 val,
// struct timespec *timeout, int32 *uaddr2, int32 val2);
TEXT runtime·futex(SB),NOSPLIT,$0
MOVW 4(SP), R0
MOVW 8(SP), R1
MOVW 12(SP), R2
MOVW 16(SP), R3
MOVW 20(SP), R4
MOVW 24(SP), R5
MOVW $SYS_futex, R7
SWI $0
RET
// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
TEXT runtime·clone(SB),NOSPLIT,$0
MOVW flags+0(FP), R0
MOVW stack+4(FP), R1
MOVW $0, R2 // parent tid ptr
MOVW $0, R3 // tls_val
MOVW $0, R4 // child tid ptr
MOVW $0, R5
// Copy mp, gp, fn off parent stack for use by child.
// TODO(kaib): figure out which registers are clobbered by clone and avoid stack copying
MOVW $-16(R1), R1
MOVW mm+8(FP), R6
MOVW R6, 0(R1)
MOVW gg+12(FP), R6
MOVW R6, 4(R1)
MOVW fn+16(FP), R6
MOVW R6, 8(R1)
MOVW $1234, R6
MOVW R6, 12(R1)
MOVW $SYS_clone, R7
SWI $0
// In parent, return.
CMP $0, R0
BEQ 2(PC)
RET
// Paranoia: check that SP is as we expect. Use R13 to avoid linker 'fixup'
MOVW 12(R13), R0
MOVW $1234, R1
CMP R0, R1
BEQ 2(PC)
BL runtime·abort(SB)
MOVW 0(R13), m
MOVW 4(R13), g
// paranoia; check they are not nil
MOVW 0(m), R0
MOVW 0(g), R0
BL runtime·emptyfunc(SB) // fault if stack check is wrong
// Initialize m->procid to Linux tid
MOVW $SYS_gettid, R7
SWI $0
MOVW R0, m_procid(m)
// Call fn
MOVW 8(R13), R0
MOVW $16(R13), R13
BL (R0)
MOVW $0, R0
MOVW R0, 4(R13)
BL runtime·exit1(SB)
// It shouldn't return
MOVW $1234, R0
MOVW $1005, R1
MOVW R0, (R1)
TEXT runtime·sigaltstack(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW $SYS_sigaltstack, R7
SWI $0
MOVW $0xfffff001, R6
CMP R6, R0
MOVW.HI $0, R8 // crash on syscall failure
MOVW.HI R8, (R8)
RET
TEXT runtime·sigtramp(SB),NOSPLIT,$24
// this might be called in external code context,
// where g and m are not set.
// first save R0, because runtime·load_gm will clobber it
MOVW R0, 4(R13)
MOVB runtime·iscgo(SB), R0
CMP $0, R0
BL.NE runtime·load_gm(SB)
CMP $0, m
BNE 4(PC)
// signal number is already prepared in 4(R13)
MOVW $runtime·badsignal(SB), R11
BL (R11)
RET
// save g
MOVW g, R3
MOVW g, 20(R13)
// g = m->gsignal
MOVW m_gsignal(m), g
// copy arguments for call to sighandler
// R0 is already saved above
MOVW R1, 8(R13)
MOVW R2, 12(R13)
MOVW R3, 16(R13)
BL runtime·sighandler(SB)
// restore g
MOVW 20(R13), g
RET
TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW 12(FP), R3
MOVW $SYS_rt_sigprocmask, R7
SWI $0
RET
TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW 12(FP), R3
MOVW $SYS_rt_sigaction, R7
SWI $0
RET
TEXT runtime·sigreturn(SB),NOSPLIT,$0
MOVW $SYS_rt_sigreturn, R7
SWI $0
RET
TEXT runtime·usleep(SB),NOSPLIT,$12
MOVW usec+0(FP), R0
MOVW R0, R1
MOVW $1000000, R2
DIV R2, R0
MOD R2, R1
MOVW R0, 4(SP)
MOVW R1, 8(SP)
MOVW $0, R0
MOVW $0, R1
MOVW $0, R2
MOVW $0, R3
MOVW $4(SP), R4
MOVW $SYS_select, R7
SWI $0
RET
// Use kernel version instead of native armcas in asm_arm.s.
// See ../sync/atomic/asm_linux_arm.s for details.
TEXT cas<>(SB),NOSPLIT,$0
MOVW $0xffff0fc0, PC
TEXT runtime·cas(SB),NOSPLIT,$0
MOVW valptr+0(FP), R2
MOVW old+4(FP), R0
casagain:
MOVW new+8(FP), R1
BL cas<>(SB)
BCC cascheck
MOVW $1, R0
RET
cascheck:
// Kernel lies; double-check.
MOVW valptr+0(FP), R2
MOVW old+4(FP), R0
MOVW 0(R2), R3
CMP R0, R3
BEQ casagain
MOVW $0, R0
RET
TEXT runtime·casp(SB),NOSPLIT,$0
B runtime·cas(SB)
TEXT runtime·osyield(SB),NOSPLIT,$0
MOVW $SYS_sched_yield, R7
SWI $0
RET
TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW $SYS_sched_getaffinity, R7
SWI $0
RET
// int32 runtime·epollcreate(int32 size)
TEXT runtime·epollcreate(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW $SYS_epoll_create, R7
SWI $0
RET
// int32 runtime·epollcreate1(int32 flags)
TEXT runtime·epollcreate1(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW $SYS_epoll_create1, R7
SWI $0
RET
// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev)
TEXT runtime·epollctl(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW 12(FP), R3
MOVW $SYS_epoll_ctl, R7
SWI $0
RET
// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout)
TEXT runtime·epollwait(SB),NOSPLIT,$0
MOVW 0(FP), R0
MOVW 4(FP), R1
MOVW 8(FP), R2
MOVW 12(FP), R3
MOVW $SYS_epoll_wait, R7
SWI $0
RET
// void runtime·closeonexec(int32 fd)
TEXT runtime·closeonexec(SB),NOSPLIT,$0
MOVW 0(FP), R0 // fd
MOVW $2, R1 // F_SETFD
MOVW $1, R2 // FD_CLOEXEC
MOVW $SYS_fcntl, R7
SWI $0
RET
// b __kuser_get_tls @ 0xffff0fe0
TEXT runtime·read_tls_fallback(SB),NOSPLIT,$-4
MOVW $0xffff0fe0, R0
B (R0)