2013-02-22 11:06:43 -07:00
|
|
|
|
// Copyright 2013 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
// +build race
|
|
|
|
|
|
[dev.cc] runtime: convert assembly files for C to Go transition
The main change is that #include "zasm_GOOS_GOARCH.h"
is now #include "go_asm.h" and/or #include "go_tls.h".
Also, because C StackGuard is now Go _StackGuard,
the assembly name changes from const_StackGuard to
const__StackGuard.
In asm_$GOARCH.s, add new function getg, formerly
implemented in C.
The renamed atomics now have Go wrappers, to get
escape analysis annotations right. Those wrappers
are in CL 174860043.
LGTM=r, aram
R=r, aram
CC=austin, dvyukov, golang-codereviews, iant, khr
https://golang.org/cl/168510043
2014-11-11 15:06:22 -07:00
|
|
|
|
#include "go_asm.h"
|
|
|
|
|
#include "go_tls.h"
|
2014-03-06 12:48:30 -07:00
|
|
|
|
#include "funcdata.h"
|
2014-09-04 21:05:18 -06:00
|
|
|
|
#include "textflag.h"
|
2013-08-07 13:20:05 -06:00
|
|
|
|
|
2014-03-06 12:48:30 -07:00
|
|
|
|
// The following thunks allow calling the gcc-compiled race runtime directly
|
|
|
|
|
// from Go code without going all the way through cgo.
|
|
|
|
|
// First, it's much faster (up to 50% speedup for real Go programs).
|
|
|
|
|
// Second, it eliminates race-related special cases from cgocall and scheduler.
|
|
|
|
|
// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
|
|
|
|
|
|
|
|
|
|
// A brief recap of the amd64 calling convention.
|
|
|
|
|
// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
|
|
|
|
|
// Callee-saved registers are: BX, BP, R12-R15.
|
|
|
|
|
// SP must be 16-byte aligned.
|
|
|
|
|
// On Windows:
|
|
|
|
|
// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
|
|
|
|
|
// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
|
|
|
|
|
// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
|
|
|
|
|
// http://msdn.microsoft.com/en-us/library/ms235286.aspx
|
|
|
|
|
// We do not do this, because it seems to be intended for vararg/unprototyped functions.
|
|
|
|
|
// Gcc-compiled race runtime does not try to use that space.
|
|
|
|
|
|
|
|
|
|
#ifdef GOOS_windows
|
|
|
|
|
#define RARG0 CX
|
|
|
|
|
#define RARG1 DX
|
|
|
|
|
#define RARG2 R8
|
|
|
|
|
#define RARG3 R9
|
|
|
|
|
#else
|
|
|
|
|
#define RARG0 DI
|
|
|
|
|
#define RARG1 SI
|
|
|
|
|
#define RARG2 DX
|
|
|
|
|
#define RARG3 CX
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// func runtime·raceread(addr uintptr)
|
|
|
|
|
// Called from instrumented code.
|
|
|
|
|
TEXT runtime·raceread(SB), NOSPLIT, $0-8
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ (SP), RARG2
|
|
|
|
|
// void __tsan_read(ThreadState *thr, void *addr, void *pc);
|
|
|
|
|
MOVQ $__tsan_read(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·RaceRead(addr uintptr)
|
|
|
|
|
TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
|
|
|
|
|
// This needs to be a tail call, because raceread reads caller pc.
|
|
|
|
|
JMP runtime·raceread(SB)
|
|
|
|
|
|
|
|
|
|
// void runtime·racereadpc(void *addr, void *callpc, void *pc)
|
|
|
|
|
TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ callpc+8(FP), RARG2
|
|
|
|
|
MOVQ pc+16(FP), RARG3
|
|
|
|
|
// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
|
|
|
|
|
MOVQ $__tsan_read_pc(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·racewrite(addr uintptr)
|
|
|
|
|
// Called from instrumented code.
|
|
|
|
|
TEXT runtime·racewrite(SB), NOSPLIT, $0-8
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ (SP), RARG2
|
|
|
|
|
// void __tsan_write(ThreadState *thr, void *addr, void *pc);
|
|
|
|
|
MOVQ $__tsan_write(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·RaceWrite(addr uintptr)
|
|
|
|
|
TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
|
|
|
|
|
// This needs to be a tail call, because racewrite reads caller pc.
|
|
|
|
|
JMP runtime·racewrite(SB)
|
|
|
|
|
|
|
|
|
|
// void runtime·racewritepc(void *addr, void *callpc, void *pc)
|
|
|
|
|
TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ callpc+8(FP), RARG2
|
2014-09-01 06:04:33 -06:00
|
|
|
|
MOVQ pc+16(FP), RARG3
|
2014-03-06 12:48:30 -07:00
|
|
|
|
// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
|
|
|
|
|
MOVQ $__tsan_write_pc(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·racereadrange(addr, size uintptr)
|
|
|
|
|
// Called from instrumented code.
|
|
|
|
|
TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ size+8(FP), RARG2
|
|
|
|
|
MOVQ (SP), RARG3
|
|
|
|
|
// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
|
|
|
|
|
MOVQ $__tsan_read_range(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·RaceReadRange(addr, size uintptr)
|
|
|
|
|
TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
|
|
|
|
|
// This needs to be a tail call, because racereadrange reads caller pc.
|
|
|
|
|
JMP runtime·racereadrange(SB)
|
|
|
|
|
|
|
|
|
|
// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
|
|
|
|
|
TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ size+8(FP), RARG2
|
|
|
|
|
MOVQ pc+16(FP), RARG3
|
|
|
|
|
// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
|
|
|
|
|
MOVQ $__tsan_read_range(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·racewriterange(addr, size uintptr)
|
|
|
|
|
// Called from instrumented code.
|
|
|
|
|
TEXT runtime·racewriterange(SB), NOSPLIT, $0-16
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ size+8(FP), RARG2
|
|
|
|
|
MOVQ (SP), RARG3
|
|
|
|
|
// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
|
|
|
|
|
MOVQ $__tsan_write_range(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// func runtime·RaceWriteRange(addr, size uintptr)
|
|
|
|
|
TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
|
|
|
|
|
// This needs to be a tail call, because racewriterange reads caller pc.
|
|
|
|
|
JMP runtime·racewriterange(SB)
|
|
|
|
|
|
|
|
|
|
// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
|
|
|
|
|
TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
|
|
|
|
|
MOVQ addr+0(FP), RARG1
|
|
|
|
|
MOVQ size+8(FP), RARG2
|
|
|
|
|
MOVQ pc+16(FP), RARG3
|
|
|
|
|
// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
|
|
|
|
|
MOVQ $__tsan_write_range(SB), AX
|
|
|
|
|
JMP racecalladdr<>(SB)
|
|
|
|
|
|
|
|
|
|
// If addr (RARG1) is out of range, do nothing.
|
|
|
|
|
// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
|
|
|
|
|
TEXT racecalladdr<>(SB), NOSPLIT, $0-0
|
|
|
|
|
get_tls(R12)
|
|
|
|
|
MOVQ g(R12), R14
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
2014-11-20 07:51:02 -07:00
|
|
|
|
// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
|
2014-03-06 12:48:30 -07:00
|
|
|
|
CMPQ RARG1, runtime·racearenastart(SB)
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
JB data
|
2014-03-06 12:48:30 -07:00
|
|
|
|
CMPQ RARG1, runtime·racearenaend(SB)
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
JB call
|
|
|
|
|
data:
|
2014-11-20 07:51:02 -07:00
|
|
|
|
CMPQ RARG1, runtime·racedatastart(SB)
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
JB ret
|
2014-11-20 07:51:02 -07:00
|
|
|
|
CMPQ RARG1, runtime·racedataend(SB)
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
JAE ret
|
|
|
|
|
call:
|
2014-03-06 12:48:30 -07:00
|
|
|
|
MOVQ AX, AX // w/o this 6a miscompiles this function
|
|
|
|
|
JMP racecall<>(SB)
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
ret:
|
2014-03-06 12:48:30 -07:00
|
|
|
|
RET
|
|
|
|
|
|
2013-02-27 23:32:29 -07:00
|
|
|
|
// func runtime·racefuncenter(pc uintptr)
|
2014-03-06 12:48:30 -07:00
|
|
|
|
// Called from instrumented code.
|
|
|
|
|
TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
|
|
|
|
|
MOVQ DX, R15 // save function entry context (for closures)
|
|
|
|
|
get_tls(R12)
|
|
|
|
|
MOVQ g(R12), R14
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
|
|
|
|
MOVQ callpc+0(FP), RARG1
|
|
|
|
|
// void __tsan_func_enter(ThreadState *thr, void *pc);
|
|
|
|
|
MOVQ $__tsan_func_enter(SB), AX
|
2014-11-20 07:51:02 -07:00
|
|
|
|
// racecall<> preserves R15
|
2014-03-06 12:48:30 -07:00
|
|
|
|
CALL racecall<>(SB)
|
|
|
|
|
MOVQ R15, DX // restore function entry context
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
// func runtime·racefuncexit()
|
|
|
|
|
// Called from instrumented code.
|
|
|
|
|
TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
|
|
|
|
|
get_tls(R12)
|
|
|
|
|
MOVQ g(R12), R14
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
|
|
|
|
// void __tsan_func_exit(ThreadState *thr);
|
|
|
|
|
MOVQ $__tsan_func_exit(SB), AX
|
|
|
|
|
JMP racecall<>(SB)
|
|
|
|
|
|
2014-09-01 06:04:33 -06:00
|
|
|
|
// Atomic operations for sync/atomic package.
|
|
|
|
|
|
|
|
|
|
// Load
|
|
|
|
|
TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic32_load(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic64_load(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·LoadInt32(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·LoadInt64(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·LoadInt64(SB)
|
|
|
|
|
|
2015-01-06 18:40:16 -07:00
|
|
|
|
TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·LoadInt64(SB)
|
|
|
|
|
|
2014-09-01 06:04:33 -06:00
|
|
|
|
// Store
|
|
|
|
|
TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic32_store(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic64_store(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·StoreInt32(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·StoreInt64(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·StoreInt64(SB)
|
|
|
|
|
|
|
|
|
|
// Swap
|
|
|
|
|
TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic32_exchange(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic64_exchange(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·SwapInt32(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·SwapInt64(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·SwapInt64(SB)
|
|
|
|
|
|
|
|
|
|
// Add
|
|
|
|
|
TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
MOVL add+8(FP), AX // convert fetch_add to add_fetch
|
|
|
|
|
ADDL AX, ret+16(FP)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
MOVQ add+8(FP), AX // convert fetch_add to add_fetch
|
|
|
|
|
ADDQ AX, ret+16(FP)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·AddInt32(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·AddInt64(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·AddInt64(SB)
|
|
|
|
|
|
|
|
|
|
// CompareAndSwap
|
|
|
|
|
TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
|
|
|
|
|
CALL racecallatomic<>(SB)
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·CompareAndSwapInt32(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·CompareAndSwapInt64(SB)
|
|
|
|
|
|
|
|
|
|
TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
|
|
|
|
|
JMP sync∕atomic·CompareAndSwapInt64(SB)
|
|
|
|
|
|
|
|
|
|
// Generic atomic operation implementation.
|
|
|
|
|
// AX already contains target function.
|
|
|
|
|
TEXT racecallatomic<>(SB), NOSPLIT, $0-0
|
|
|
|
|
// Trigger SIGSEGV early.
|
|
|
|
|
MOVQ 16(SP), R12
|
2014-11-20 07:51:02 -07:00
|
|
|
|
MOVL (R12), R13
|
|
|
|
|
// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
|
|
|
|
|
CMPQ R12, runtime·racearenastart(SB)
|
|
|
|
|
JB racecallatomic_data
|
|
|
|
|
CMPQ R12, runtime·racearenaend(SB)
|
|
|
|
|
JB racecallatomic_ok
|
|
|
|
|
racecallatomic_data:
|
|
|
|
|
CMPQ R12, runtime·racedatastart(SB)
|
|
|
|
|
JB racecallatomic_ignore
|
|
|
|
|
CMPQ R12, runtime·racedataend(SB)
|
|
|
|
|
JAE racecallatomic_ignore
|
|
|
|
|
racecallatomic_ok:
|
|
|
|
|
// Addr is within the good range, call the atomic function.
|
2014-09-01 06:04:33 -06:00
|
|
|
|
get_tls(R12)
|
|
|
|
|
MOVQ g(R12), R14
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
|
|
|
|
MOVQ 8(SP), RARG1 // caller pc
|
|
|
|
|
MOVQ (SP), RARG2 // pc
|
|
|
|
|
LEAQ 16(SP), RARG3 // arguments
|
2014-11-20 07:51:02 -07:00
|
|
|
|
JMP racecall<>(SB) // does not return
|
|
|
|
|
racecallatomic_ignore:
|
|
|
|
|
// Addr is outside the good range.
|
|
|
|
|
// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
|
|
|
|
|
// An attempt to synchronize on the address would cause crash.
|
|
|
|
|
MOVQ AX, R15 // remember the original function
|
|
|
|
|
MOVQ $__tsan_go_ignore_sync_begin(SB), AX
|
|
|
|
|
MOVQ g(R12), R14
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
|
|
|
|
CALL racecall<>(SB)
|
|
|
|
|
MOVQ R15, AX // restore the original function
|
|
|
|
|
// Call the atomic function.
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
|
|
|
|
MOVQ 8(SP), RARG1 // caller pc
|
|
|
|
|
MOVQ (SP), RARG2 // pc
|
|
|
|
|
LEAQ 16(SP), RARG3 // arguments
|
|
|
|
|
CALL racecall<>(SB)
|
|
|
|
|
// Call __tsan_go_ignore_sync_end.
|
|
|
|
|
MOVQ $__tsan_go_ignore_sync_end(SB), AX
|
|
|
|
|
MOVQ g_racectx(R14), RARG0 // goroutine context
|
2014-09-01 06:04:33 -06:00
|
|
|
|
JMP racecall<>(SB)
|
|
|
|
|
|
2014-03-06 12:48:30 -07:00
|
|
|
|
// void runtime·racecall(void(*f)(...), ...)
|
|
|
|
|
// Calls C function f from race runtime and passes up to 4 arguments to it.
|
|
|
|
|
// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
|
|
|
|
|
TEXT runtime·racecall(SB), NOSPLIT, $0-0
|
|
|
|
|
MOVQ fn+0(FP), AX
|
|
|
|
|
MOVQ arg0+8(FP), RARG0
|
|
|
|
|
MOVQ arg1+16(FP), RARG1
|
|
|
|
|
MOVQ arg2+24(FP), RARG2
|
|
|
|
|
MOVQ arg3+32(FP), RARG3
|
|
|
|
|
JMP racecall<>(SB)
|
|
|
|
|
|
|
|
|
|
// Switches SP to g0 stack and calls (AX). Arguments already set.
|
|
|
|
|
TEXT racecall<>(SB), NOSPLIT, $0-0
|
|
|
|
|
get_tls(R12)
|
|
|
|
|
MOVQ g(R12), R14
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
|
MOVQ g_m(R14), R13
|
2014-03-06 12:48:30 -07:00
|
|
|
|
// Switch to g0 stack.
|
|
|
|
|
MOVQ SP, R12 // callee-saved, preserved across the CALL
|
|
|
|
|
MOVQ m_g0(R13), R10
|
|
|
|
|
CMPQ R10, R14
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
JE call // already on g0
|
2014-03-06 12:48:30 -07:00
|
|
|
|
MOVQ (g_sched+gobuf_sp)(R10), SP
|
[dev.power64] cmd/5a, cmd/6a, cmd/8a, cmd/9a: make labels function-scoped
I removed support for jumping between functions years ago,
as part of doing the instruction layout for each function separately.
Given that, it makes sense to treat labels as function-scoped.
This lets each function have its own 'loop' label, for example.
Makes the assembly much cleaner and removes the last
reason anyone would reach for the 123(PC) form instead.
Note that this is on the dev.power64 branch, but it changes all
the assemblers. The change will ship in Go 1.5 (perhaps after
being ported into the new assembler).
Came up as part of CL 167730043.
LGTM=r
R=r
CC=austin, dave, golang-codereviews, minux
https://golang.org/cl/159670043
2014-10-28 19:50:16 -06:00
|
|
|
|
call:
|
2014-03-06 12:48:30 -07:00
|
|
|
|
ANDQ $~15, SP // alignment for gcc ABI
|
|
|
|
|
CALL AX
|
|
|
|
|
MOVQ R12, SP
|
|
|
|
|
RET
|
|
|
|
|
|
|
|
|
|
// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
|
|
|
|
|
// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
|
|
|
|
|
// The overall effect of Go->C->Go call chain is similar to that of mcall.
|
|
|
|
|
TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
|
|
|
|
|
// Save callee-saved registers (Go code won't respect that).
|
|
|
|
|
// This is superset of darwin/linux/windows registers.
|
|
|
|
|
PUSHQ BX
|
|
|
|
|
PUSHQ BP
|
|
|
|
|
PUSHQ DI
|
|
|
|
|
PUSHQ SI
|
|
|
|
|
PUSHQ R12
|
|
|
|
|
PUSHQ R13
|
|
|
|
|
PUSHQ R14
|
|
|
|
|
PUSHQ R15
|
|
|
|
|
// Set g = g0.
|
|
|
|
|
get_tls(R12)
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
|
MOVQ g(R12), R13
|
|
|
|
|
MOVQ g_m(R13), R13
|
2014-03-06 12:48:30 -07:00
|
|
|
|
MOVQ m_g0(R13), R14
|
|
|
|
|
MOVQ R14, g(R12) // g = m->g0
|
2015-02-13 07:14:48 -07:00
|
|
|
|
PUSHQ RARG0 // func arg
|
2014-03-06 12:48:30 -07:00
|
|
|
|
CALL runtime·racesymbolize(SB)
|
2015-02-13 07:14:48 -07:00
|
|
|
|
POPQ R12
|
2014-03-06 12:48:30 -07:00
|
|
|
|
// All registers are smashed after Go code, reload.
|
|
|
|
|
get_tls(R12)
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
|
MOVQ g(R12), R13
|
|
|
|
|
MOVQ g_m(R13), R13
|
2014-03-06 12:48:30 -07:00
|
|
|
|
MOVQ m_curg(R13), R14
|
|
|
|
|
MOVQ R14, g(R12) // g = m->curg
|
|
|
|
|
// Restore callee-saved registers.
|
|
|
|
|
POPQ R15
|
|
|
|
|
POPQ R14
|
|
|
|
|
POPQ R13
|
|
|
|
|
POPQ R12
|
|
|
|
|
POPQ SI
|
|
|
|
|
POPQ DI
|
|
|
|
|
POPQ BP
|
|
|
|
|
POPQ BX
|
2013-02-22 11:06:43 -07:00
|
|
|
|
RET
|