2011-06-29 01:37:56 -06:00
|
|
|
// Copyright 2011 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
2011-12-19 13:51:13 -07:00
|
|
|
#include "zasm_GOOS_GOARCH.h"
|
2013-08-07 13:20:05 -06:00
|
|
|
#include "../../cmd/ld/textflag.h"
|
2011-06-29 01:37:56 -06:00
|
|
|
|
2011-11-28 18:57:20 -07:00
|
|
|
// maxargs should be divisible by 2, as Windows stack
|
|
|
|
// must be kept 16-byte aligned on syscall entry.
|
|
|
|
#define maxargs 16
|
2011-08-27 07:17:00 -06:00
|
|
|
|
|
|
|
// void runtime·asmstdcall(void *c);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·asmstdcall(SB),NOSPLIT,$0
|
2011-08-27 07:17:00 -06:00
|
|
|
// asmcgocall will put first argument into CX.
|
|
|
|
PUSHQ CX // save for later
|
2014-01-16 21:58:10 -07:00
|
|
|
MOVQ libcall_fn(CX), AX
|
|
|
|
MOVQ libcall_args(CX), SI
|
|
|
|
MOVQ libcall_n(CX), CX
|
2011-08-27 07:17:00 -06:00
|
|
|
|
|
|
|
// SetLastError(0).
|
|
|
|
MOVQ 0x30(GS), DI
|
|
|
|
MOVL $0, 0x68(DI)
|
|
|
|
|
|
|
|
SUBQ $(maxargs*8), SP // room for args
|
|
|
|
|
|
|
|
// Fast version, do not store args on the stack.
|
|
|
|
CMPL CX, $4
|
|
|
|
JLE loadregs
|
|
|
|
|
|
|
|
// Check we have enough room for args.
|
|
|
|
CMPL CX, $maxargs
|
|
|
|
JLE 2(PC)
|
|
|
|
INT $3 // not enough room -> crash
|
|
|
|
|
|
|
|
// Copy args to the stack.
|
2011-06-29 01:37:56 -06:00
|
|
|
MOVQ SP, DI
|
|
|
|
CLD
|
|
|
|
REP; MOVSQ
|
2011-08-27 07:17:00 -06:00
|
|
|
MOVQ SP, SI
|
|
|
|
|
|
|
|
loadregs:
|
|
|
|
// Load first 4 args into correspondent registers.
|
|
|
|
MOVQ 0(SI), CX
|
|
|
|
MOVQ 8(SI), DX
|
|
|
|
MOVQ 16(SI), R8
|
|
|
|
MOVQ 24(SI), R9
|
2011-06-29 01:37:56 -06:00
|
|
|
|
|
|
|
// Call stdcall function.
|
|
|
|
CALL AX
|
2011-08-27 07:17:00 -06:00
|
|
|
|
|
|
|
ADDQ $(maxargs*8), SP
|
|
|
|
|
|
|
|
// Return result.
|
|
|
|
POPQ CX
|
2014-01-16 21:58:10 -07:00
|
|
|
MOVQ AX, libcall_r1(CX)
|
2011-08-27 07:17:00 -06:00
|
|
|
|
|
|
|
// GetLastError().
|
|
|
|
MOVQ 0x30(GS), DI
|
|
|
|
MOVL 0x68(DI), AX
|
2014-01-16 21:58:10 -07:00
|
|
|
MOVQ AX, libcall_err(CX)
|
2011-06-29 01:37:56 -06:00
|
|
|
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·badsignal2(SB),NOSPLIT,$48
|
2012-03-12 13:55:18 -06:00
|
|
|
// stderr
|
|
|
|
MOVQ $-12, CX // stderr
|
|
|
|
MOVQ CX, 0(SP)
|
|
|
|
MOVQ runtime·GetStdHandle(SB), AX
|
|
|
|
CALL AX
|
|
|
|
|
|
|
|
MOVQ AX, CX // handle
|
|
|
|
MOVQ CX, 0(SP)
|
|
|
|
MOVQ $runtime·badsignalmsg(SB), DX // pointer
|
|
|
|
MOVQ DX, 8(SP)
|
|
|
|
MOVL $runtime·badsignallen(SB), R8 // count
|
|
|
|
MOVQ R8, 16(SP)
|
|
|
|
LEAQ 40(SP), R9 // written count
|
|
|
|
MOVQ $0, 0(R9)
|
|
|
|
MOVQ R9, 24(SP)
|
|
|
|
MOVQ $0, 32(SP) // overlapped
|
|
|
|
MOVQ runtime·WriteFile(SB), AX
|
|
|
|
CALL AX
|
|
|
|
|
|
|
|
RET
|
|
|
|
|
2011-06-29 01:37:56 -06:00
|
|
|
// faster get/set last error
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·getlasterror(SB),NOSPLIT,$0
|
2011-06-29 01:37:56 -06:00
|
|
|
MOVQ 0x30(GS), AX
|
|
|
|
MOVL 0x68(AX), AX
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·setlasterror(SB),NOSPLIT,$0
|
2011-06-29 01:37:56 -06:00
|
|
|
MOVL err+0(FP), AX
|
|
|
|
MOVQ 0x30(GS), CX
|
|
|
|
MOVL AX, 0x68(CX)
|
|
|
|
RET
|
|
|
|
|
2014-03-25 18:13:50 -06:00
|
|
|
// Called by Windows as a Vectored Exception Handler (VEH).
|
|
|
|
// First argument is pointer to struct containing
|
|
|
|
// exception record and context pointers.
|
|
|
|
// Return 0 for 'not handled', -1 for handled.
|
|
|
|
TEXT runtime·sigtramp(SB),NOSPLIT,$0-0
|
|
|
|
// CX: PEXCEPTION_POINTERS ExceptionInfo
|
2011-09-03 02:27:16 -06:00
|
|
|
|
2014-03-25 18:13:50 -06:00
|
|
|
// DI SI BP BX R12 R13 R14 R15 registers and DF flag are preserved
|
|
|
|
// as required by windows callback convention.
|
|
|
|
PUSHFQ
|
|
|
|
SUBQ $88, SP
|
|
|
|
MOVQ DI, 80(SP)
|
|
|
|
MOVQ SI, 72(SP)
|
|
|
|
MOVQ BP, 64(SP)
|
|
|
|
MOVQ BX, 56(SP)
|
|
|
|
MOVQ R12, 48(SP)
|
|
|
|
MOVQ R13, 40(SP)
|
|
|
|
MOVQ R14, 32(SP)
|
|
|
|
MOVQ R15, 24(SP)
|
|
|
|
|
|
|
|
MOVQ 0(CX), BX // ExceptionRecord*
|
|
|
|
MOVQ 8(CX), CX // Context*
|
|
|
|
|
|
|
|
// fetch g
|
|
|
|
get_tls(DX)
|
2014-07-09 21:55:35 -06:00
|
|
|
CMPQ DX, $0
|
|
|
|
JNE 3(PC)
|
|
|
|
MOVQ $0, AX // continue
|
|
|
|
JMP done
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
MOVQ g(DX), DX
|
|
|
|
CMPQ DX, $0
|
2012-03-12 13:55:18 -06:00
|
|
|
JNE 2(PC)
|
2013-07-11 15:06:43 -06:00
|
|
|
CALL runtime·badsignal2(SB)
|
2014-03-25 18:13:50 -06:00
|
|
|
// call sighandler(ExceptionRecord*, Context*, G*)
|
|
|
|
MOVQ BX, 0(SP)
|
|
|
|
MOVQ CX, 8(SP)
|
|
|
|
MOVQ DX, 16(SP)
|
2011-09-03 02:27:16 -06:00
|
|
|
CALL runtime·sighandler(SB)
|
2014-03-25 18:13:50 -06:00
|
|
|
// AX is set to report result back to Windows
|
2011-09-03 02:27:16 -06:00
|
|
|
|
2014-07-09 21:55:35 -06:00
|
|
|
done:
|
2014-03-25 18:13:50 -06:00
|
|
|
// restore registers as required for windows callback
|
|
|
|
MOVQ 24(SP), R15
|
|
|
|
MOVQ 32(SP), R14
|
|
|
|
MOVQ 40(SP), R13
|
|
|
|
MOVQ 48(SP), R12
|
|
|
|
MOVQ 56(SP), BX
|
|
|
|
MOVQ 64(SP), BP
|
|
|
|
MOVQ 72(SP), SI
|
|
|
|
MOVQ 80(SP), DI
|
|
|
|
ADDQ $88, SP
|
|
|
|
POPFQ
|
2012-03-12 14:48:16 -06:00
|
|
|
|
2011-09-03 02:27:16 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·ctrlhandler(SB),NOSPLIT,$8
|
2011-09-17 01:57:59 -06:00
|
|
|
MOVQ CX, 16(SP) // spill
|
|
|
|
MOVQ $runtime·ctrlhandler1(SB), CX
|
|
|
|
MOVQ CX, 0(SP)
|
|
|
|
CALL runtime·externalthreadhandler(SB)
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·profileloop(SB),NOSPLIT,$8
|
2011-09-17 01:57:59 -06:00
|
|
|
MOVQ $runtime·profileloop1(SB), CX
|
|
|
|
MOVQ CX, 0(SP)
|
|
|
|
CALL runtime·externalthreadhandler(SB)
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·externalthreadhandler(SB),NOSPLIT,$0
|
2011-08-18 10:37:42 -06:00
|
|
|
PUSHQ BP
|
|
|
|
MOVQ SP, BP
|
|
|
|
PUSHQ BX
|
|
|
|
PUSHQ SI
|
|
|
|
PUSHQ DI
|
2012-01-08 17:23:07 -07:00
|
|
|
PUSHQ 0x28(GS)
|
2011-09-17 01:57:59 -06:00
|
|
|
MOVQ SP, DX
|
2011-08-18 10:37:42 -06:00
|
|
|
|
|
|
|
// setup dummy m, g
|
2011-09-17 01:57:59 -06:00
|
|
|
SUBQ $m_end, SP // space for M
|
|
|
|
MOVQ SP, 0(SP)
|
|
|
|
MOVQ $m_end, 8(SP)
|
|
|
|
CALL runtime·memclr(SB) // smashes AX,BX,CX
|
|
|
|
|
2011-08-18 10:37:42 -06:00
|
|
|
LEAQ m_tls(SP), CX
|
2012-01-08 17:23:07 -07:00
|
|
|
MOVQ CX, 0x28(GS)
|
2011-09-17 01:57:59 -06:00
|
|
|
MOVQ SP, BX
|
|
|
|
SUBQ $g_end, SP // space for G
|
2011-08-18 10:37:42 -06:00
|
|
|
MOVQ SP, g(CX)
|
2011-09-17 01:57:59 -06:00
|
|
|
MOVQ SP, m_g0(BX)
|
|
|
|
|
|
|
|
MOVQ SP, 0(SP)
|
|
|
|
MOVQ $g_end, 8(SP)
|
|
|
|
CALL runtime·memclr(SB) // smashes AX,BX,CX
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
LEAQ g_end(SP), BX
|
|
|
|
MOVQ BX, g_m(SP)
|
|
|
|
|
2011-08-18 10:37:42 -06:00
|
|
|
LEAQ -8192(SP), CX
|
|
|
|
MOVQ CX, g_stackguard(SP)
|
2011-09-17 01:57:59 -06:00
|
|
|
MOVQ DX, g_stackbase(SP)
|
2011-08-18 10:37:42 -06:00
|
|
|
|
2011-09-17 01:57:59 -06:00
|
|
|
PUSHQ 32(BP) // arg for handler
|
|
|
|
CALL 16(BP)
|
2011-08-18 10:37:42 -06:00
|
|
|
POPQ CX
|
|
|
|
|
|
|
|
get_tls(CX)
|
|
|
|
MOVQ g(CX), CX
|
|
|
|
MOVQ g_stackbase(CX), SP
|
2012-01-08 17:23:07 -07:00
|
|
|
POPQ 0x28(GS)
|
2011-08-18 10:37:42 -06:00
|
|
|
POPQ DI
|
|
|
|
POPQ SI
|
|
|
|
POPQ BX
|
|
|
|
POPQ BP
|
2011-08-29 06:12:56 -06:00
|
|
|
RET
|
2011-08-30 06:02:02 -06:00
|
|
|
|
2013-06-24 01:17:45 -06:00
|
|
|
GLOBL runtime·cbctxts(SB), $8
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·callbackasm1(SB),NOSPLIT,$0
|
2011-08-30 06:02:02 -06:00
|
|
|
// Construct args vector for cgocallback().
|
|
|
|
// By windows/amd64 calling convention first 4 args are in CX, DX, R8, R9
|
|
|
|
// args from the 5th on are on the stack.
|
|
|
|
// In any case, even if function has 0,1,2,3,4 args, there is reserved
|
|
|
|
// but uninitialized "shadow space" for the first 4 args.
|
|
|
|
// The values are in registers.
|
2013-06-24 01:17:45 -06:00
|
|
|
MOVQ CX, (16+0)(SP)
|
|
|
|
MOVQ DX, (16+8)(SP)
|
|
|
|
MOVQ R8, (16+16)(SP)
|
|
|
|
MOVQ R9, (16+24)(SP)
|
|
|
|
|
|
|
|
// remove return address from stack, we are not returning there
|
|
|
|
MOVQ 0(SP), AX
|
|
|
|
ADDQ $8, SP
|
|
|
|
|
|
|
|
// determine index into runtime·cbctxts table
|
|
|
|
SUBQ $runtime·callbackasm(SB), AX
|
|
|
|
MOVQ $0, DX
|
|
|
|
MOVQ $5, CX // divide by 5 because each call instruction in runtime·callbacks is 5 bytes long
|
|
|
|
DIVL CX,
|
|
|
|
|
|
|
|
// find correspondent runtime·cbctxts table entry
|
|
|
|
MOVQ runtime·cbctxts(SB), CX
|
|
|
|
MOVQ -8(CX)(AX*8), AX
|
|
|
|
|
|
|
|
// extract callback context
|
|
|
|
MOVQ cbctxt_argsize(AX), DX
|
|
|
|
MOVQ cbctxt_gobody(AX), AX
|
2011-08-30 06:02:02 -06:00
|
|
|
|
|
|
|
// preserve whatever's at the memory location that
|
|
|
|
// the callback will use to store the return value
|
|
|
|
LEAQ 8(SP), CX // args vector, skip return address
|
|
|
|
PUSHQ 0(CX)(DX*1) // store 8 bytes from just after the args array
|
|
|
|
ADDQ $8, DX // extend argsize by size of return value
|
|
|
|
|
|
|
|
// DI SI BP BX R12 R13 R14 R15 registers and DF flag are preserved
|
|
|
|
// as required by windows callback convention.
|
|
|
|
PUSHFQ
|
|
|
|
SUBQ $64, SP
|
|
|
|
MOVQ DI, 56(SP)
|
|
|
|
MOVQ SI, 48(SP)
|
|
|
|
MOVQ BP, 40(SP)
|
|
|
|
MOVQ BX, 32(SP)
|
|
|
|
MOVQ R12, 24(SP)
|
|
|
|
MOVQ R13, 16(SP)
|
|
|
|
MOVQ R14, 8(SP)
|
|
|
|
MOVQ R15, 0(SP)
|
|
|
|
|
2012-03-08 13:53:11 -07:00
|
|
|
// prepare call stack. use SUBQ to hide from stack frame checks
|
2013-02-22 14:08:56 -07:00
|
|
|
// cgocallback(Go func, void *frame, uintptr framesize)
|
2012-03-08 13:53:11 -07:00
|
|
|
SUBQ $24, SP
|
2013-06-24 01:17:45 -06:00
|
|
|
MOVQ DX, 16(SP) // argsize (including return value)
|
|
|
|
MOVQ CX, 8(SP) // callback parameters
|
|
|
|
MOVQ AX, 0(SP) // address of target Go function
|
2011-08-30 06:02:02 -06:00
|
|
|
CLD
|
2013-06-24 01:17:45 -06:00
|
|
|
CALL runtime·cgocallback_gofunc(SB)
|
2012-03-08 13:53:11 -07:00
|
|
|
MOVQ 0(SP), AX
|
|
|
|
MOVQ 8(SP), CX
|
|
|
|
MOVQ 16(SP), DX
|
|
|
|
ADDQ $24, SP
|
2011-08-30 06:02:02 -06:00
|
|
|
|
|
|
|
// restore registers as required for windows callback
|
|
|
|
MOVQ 0(SP), R15
|
|
|
|
MOVQ 8(SP), R14
|
|
|
|
MOVQ 16(SP), R13
|
|
|
|
MOVQ 24(SP), R12
|
|
|
|
MOVQ 32(SP), BX
|
|
|
|
MOVQ 40(SP), BP
|
|
|
|
MOVQ 48(SP), SI
|
|
|
|
MOVQ 56(SP), DI
|
|
|
|
ADDQ $64, SP
|
|
|
|
POPFQ
|
|
|
|
|
|
|
|
MOVL -8(CX)(DX*1), AX // return value
|
|
|
|
POPQ -8(CX)(DX*1) // restore bytes just after the args
|
2011-06-29 01:37:56 -06:00
|
|
|
RET
|
|
|
|
|
2011-08-29 06:12:56 -06:00
|
|
|
// uint32 tstart_stdcall(M *newm);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·tstart_stdcall(SB),NOSPLIT,$0
|
2011-08-29 06:12:56 -06:00
|
|
|
// CX contains first arg newm
|
2011-06-29 01:37:56 -06:00
|
|
|
MOVQ m_g0(CX), DX // g
|
|
|
|
|
|
|
|
// Layout new m scheduler stack on os stack.
|
|
|
|
MOVQ SP, AX
|
|
|
|
MOVQ AX, g_stackbase(DX)
|
2011-08-29 06:12:56 -06:00
|
|
|
SUBQ $(64*1024), AX // stack size
|
2011-06-29 01:37:56 -06:00
|
|
|
MOVQ AX, g_stackguard(DX)
|
|
|
|
|
|
|
|
// Set up tls.
|
|
|
|
LEAQ m_tls(CX), SI
|
2012-01-08 17:23:07 -07:00
|
|
|
MOVQ SI, 0x28(GS)
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
MOVQ CX, g_m(DX)
|
2011-06-29 01:37:56 -06:00
|
|
|
MOVQ DX, g(SI)
|
|
|
|
|
|
|
|
// Someday the convention will be D is always cleared.
|
|
|
|
CLD
|
|
|
|
|
2011-08-29 06:12:56 -06:00
|
|
|
CALL runtime·stackcheck(SB) // clobbers AX,CX
|
2013-03-01 09:44:43 -07:00
|
|
|
CALL runtime·mstart(SB)
|
2011-06-29 01:37:56 -06:00
|
|
|
|
|
|
|
XORL AX, AX // return 0 == success
|
|
|
|
RET
|
|
|
|
|
|
|
|
// set tls base to DI
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·settls(SB),NOSPLIT,$0
|
2012-01-08 17:23:07 -07:00
|
|
|
MOVQ DI, 0x28(GS)
|
2011-06-29 01:37:56 -06:00
|
|
|
RET
|
2012-05-29 23:10:54 -06:00
|
|
|
|
2013-07-15 20:36:05 -06:00
|
|
|
// Sleep duration is in 100ns units.
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·usleep1(SB),NOSPLIT,$0
|
2013-07-15 20:36:05 -06:00
|
|
|
MOVL duration+0(FP), BX
|
|
|
|
MOVQ $runtime·usleep2(SB), AX // to hide from 6l
|
|
|
|
|
|
|
|
// Execute call on m->g0 stack, in case we are not actually
|
|
|
|
// calling a system call wrapper, like when running under WINE.
|
|
|
|
get_tls(R15)
|
|
|
|
CMPQ R15, $0
|
|
|
|
JNE 3(PC)
|
|
|
|
// Not a Go-managed thread. Do not switch stack.
|
2013-03-07 07:18:48 -07:00
|
|
|
CALL AX
|
|
|
|
RET
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
MOVQ g(R15), R13
|
|
|
|
MOVQ g_m(R13), R13
|
2014-02-12 11:31:36 -07:00
|
|
|
|
|
|
|
// leave pc/sp for cpu profiler
|
|
|
|
MOVQ (SP), R12
|
|
|
|
MOVQ R12, m_libcallpc(R13)
|
2014-02-13 22:20:51 -07:00
|
|
|
MOVQ g(R15), R12
|
|
|
|
MOVQ R12, m_libcallg(R13)
|
|
|
|
// sp must be the last, because once async cpu profiler finds
|
|
|
|
// all three values to be non-zero, it will use them
|
2014-02-12 11:31:36 -07:00
|
|
|
LEAQ 8(SP), R12
|
|
|
|
MOVQ R12, m_libcallsp(R13)
|
|
|
|
|
|
|
|
MOVQ m_g0(R13), R14
|
2013-07-15 20:36:05 -06:00
|
|
|
CMPQ g(R15), R14
|
2014-02-12 11:31:36 -07:00
|
|
|
JNE usleep1_switch
|
2013-07-15 20:36:05 -06:00
|
|
|
// executing on m->g0 already
|
|
|
|
CALL AX
|
2014-02-12 11:31:36 -07:00
|
|
|
JMP usleep1_ret
|
2013-07-15 20:36:05 -06:00
|
|
|
|
2014-02-12 11:31:36 -07:00
|
|
|
usleep1_switch:
|
2013-07-15 20:36:05 -06:00
|
|
|
// Switch to m->g0 stack and back.
|
|
|
|
MOVQ (g_sched+gobuf_sp)(R14), R14
|
|
|
|
MOVQ SP, -8(R14)
|
|
|
|
LEAQ -8(R14), SP
|
|
|
|
CALL AX
|
|
|
|
MOVQ 0(SP), SP
|
2014-02-12 11:31:36 -07:00
|
|
|
|
|
|
|
usleep1_ret:
|
|
|
|
MOVQ $0, m_libcallsp(R13)
|
2013-07-15 20:36:05 -06:00
|
|
|
RET
|
|
|
|
|
|
|
|
// Runs on OS stack. duration (in 100ns units) is in BX.
|
2014-07-09 22:23:50 -06:00
|
|
|
TEXT runtime·usleep2(SB),NOSPLIT,$16
|
|
|
|
MOVQ SP, AX
|
|
|
|
ANDQ $~15, SP // alignment as per Windows requirement
|
|
|
|
MOVQ AX, 8(SP)
|
2013-07-15 20:36:05 -06:00
|
|
|
// Want negative 100ns units.
|
2013-03-07 07:18:48 -07:00
|
|
|
NEGQ BX
|
|
|
|
MOVQ SP, R8 // ptime
|
|
|
|
MOVQ BX, (R8)
|
|
|
|
MOVQ $-1, CX // handle
|
|
|
|
MOVQ $0, DX // alertable
|
2013-07-15 20:36:05 -06:00
|
|
|
MOVQ runtime·NtWaitForSingleObject(SB), AX
|
2013-03-07 07:18:48 -07:00
|
|
|
CALL AX
|
2014-07-09 22:23:50 -06:00
|
|
|
MOVQ 8(SP), SP
|
2013-03-07 07:18:48 -07:00
|
|
|
RET
|