1
0
mirror of https://github.com/golang/go synced 2024-10-04 12:21:26 -06:00
go/src/pkg/runtime/sys_linux_amd64.s
Dmitriy Vyukov 49e0300854 runtime: integrated network poller for linux
vs tip:
BenchmarkTCP4OneShot                    172994        40485  -76.60%
BenchmarkTCP4OneShot-2                   96581        30028  -68.91%
BenchmarkTCP4OneShot-4                   52615        18454  -64.93%
BenchmarkTCP4OneShot-8                   26351        12289  -53.36%
BenchmarkTCP4OneShot-16                  12258        16093  +31.29%
BenchmarkTCP4OneShot-32                  13200        17045  +29.13%

BenchmarkTCP4OneShotTimeout             124814        42932  -65.60%
BenchmarkTCP4OneShotTimeout-2            99090        29040  -70.69%
BenchmarkTCP4OneShotTimeout-4            51860        18455  -64.41%
BenchmarkTCP4OneShotTimeout-8            26100        12073  -53.74%
BenchmarkTCP4OneShotTimeout-16           12198        16654  +36.53%
BenchmarkTCP4OneShotTimeout-32           13438        17143  +27.57%

BenchmarkTCP4Persistent                 115647         7782  -93.27%
BenchmarkTCP4Persistent-2                58024         4808  -91.71%
BenchmarkTCP4Persistent-4                24715         3674  -85.13%
BenchmarkTCP4Persistent-8                16431         2407  -85.35%
BenchmarkTCP4Persistent-16                2336         1875  -19.73%
BenchmarkTCP4Persistent-32                1689         1637   -3.08%

BenchmarkTCP4PersistentTimeout           79754         7859  -90.15%
BenchmarkTCP4PersistentTimeout-2         57708         5952  -89.69%
BenchmarkTCP4PersistentTimeout-4         26907         3823  -85.79%
BenchmarkTCP4PersistentTimeout-8         15036         2567  -82.93%
BenchmarkTCP4PersistentTimeout-16         2507         1903  -24.09%
BenchmarkTCP4PersistentTimeout-32         1717         1627   -5.24%

vs old scheduler:
benchmark                           old ns/op    new ns/op    delta
BenchmarkTCPOneShot                    192244        40485  -78.94%
BenchmarkTCPOneShot-2                   63835        30028  -52.96%
BenchmarkTCPOneShot-4                   35443        18454  -47.93%
BenchmarkTCPOneShot-8                   22140        12289  -44.49%
BenchmarkTCPOneShot-16                  16930        16093   -4.94%
BenchmarkTCPOneShot-32                  16719        17045   +1.95%

BenchmarkTCPOneShotTimeout             190495        42932  -77.46%
BenchmarkTCPOneShotTimeout-2            64828        29040  -55.20%
BenchmarkTCPOneShotTimeout-4            34591        18455  -46.65%
BenchmarkTCPOneShotTimeout-8            21989        12073  -45.10%
BenchmarkTCPOneShotTimeout-16           16848        16654   -1.15%
BenchmarkTCPOneShotTimeout-32           16796        17143   +2.07%

BenchmarkTCPPersistent                  81670         7782  -90.47%
BenchmarkTCPPersistent-2                26598         4808  -81.92%
BenchmarkTCPPersistent-4                15633         3674  -76.50%
BenchmarkTCPPersistent-8                18093         2407  -86.70%
BenchmarkTCPPersistent-16               17472         1875  -89.27%
BenchmarkTCPPersistent-32                7679         1637  -78.68%

BenchmarkTCPPersistentTimeout           83186         7859  -90.55%
BenchmarkTCPPersistentTimeout-2         26883         5952  -77.86%
BenchmarkTCPPersistentTimeout-4         15776         3823  -75.77%
BenchmarkTCPPersistentTimeout-8         18180         2567  -85.88%
BenchmarkTCPPersistentTimeout-16        17454         1903  -89.10%
BenchmarkTCPPersistentTimeout-32         7798         1627  -79.14%

R=golang-dev, iant, bradfitz, dave, rsc
CC=golang-dev
https://golang.org/cl/7579044
2013-03-14 19:06:35 +04:00

393 lines
7.2 KiB
ArmAsm

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// System calls and other sys.stuff for AMD64, Linux
//
#include "zasm_GOOS_GOARCH.h"
TEXT runtime·exit(SB),7,$0-8
MOVL 8(SP), DI
MOVL $231, AX // exitgroup - force all os threads to exit
SYSCALL
RET
TEXT runtime·exit1(SB),7,$0-8
MOVL 8(SP), DI
MOVL $60, AX // exit - exit the current os thread
SYSCALL
RET
TEXT runtime·open(SB),7,$0-16
MOVQ 8(SP), DI
MOVL 16(SP), SI
MOVL 20(SP), DX
MOVL $2, AX // syscall entry
SYSCALL
RET
TEXT runtime·close(SB),7,$0-16
MOVL 8(SP), DI
MOVL $3, AX // syscall entry
SYSCALL
RET
TEXT runtime·write(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVL 24(SP), DX
MOVL $1, AX // syscall entry
SYSCALL
RET
TEXT runtime·read(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVL 24(SP), DX
MOVL $0, AX // syscall entry
SYSCALL
RET
TEXT runtime·getrlimit(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVL $97, AX // syscall entry
SYSCALL
RET
TEXT runtime·usleep(SB),7,$16
MOVL $0, DX
MOVL usec+0(FP), AX
MOVL $1000000, CX
DIVL CX
MOVQ AX, 0(SP)
MOVQ DX, 8(SP)
// select(0, 0, 0, 0, &tv)
MOVL $0, DI
MOVL $0, SI
MOVL $0, DX
MOVL $0, R10
MOVQ SP, R8
MOVL $23, AX
SYSCALL
RET
TEXT runtime·raisesigpipe(SB),7,$12
MOVL $186, AX // syscall - gettid
SYSCALL
MOVL AX, DI // arg 1 tid
MOVL $13, SI // arg 2 SIGPIPE
MOVL $200, AX // syscall - tkill
SYSCALL
RET
TEXT runtime·setitimer(SB),7,$0-24
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVQ 24(SP), DX
MOVL $38, AX // syscall entry
SYSCALL
RET
TEXT runtime·mincore(SB),7,$0-24
MOVQ 8(SP), DI
MOVQ 16(SP), SI
MOVQ 24(SP), DX
MOVL $27, AX // syscall entry
SYSCALL
RET
// func now() (sec int64, nsec int32)
TEXT time·now(SB),7,$16
// Be careful. We're calling a function with gcc calling convention here.
// We're guaranteed 128 bytes on entry, and we've taken 16, and the
// call uses another 8.
// That leaves 104 for the gettime code to use. Hope that's enough!
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
JEQ fallback_gtod
MOVL $0, DI // CLOCK_REALTIME
LEAQ 0(SP), SI
CALL AX
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
fallback_gtod:
LEAQ 0(SP), DI
MOVQ $0, SI
MOVQ runtime·__vdso_gettimeofday_sym(SB), AX
CALL AX
MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec
IMULQ $1000, DX
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
TEXT runtime·nanotime(SB),7,$16
// Duplicate time.now here to avoid using up precious stack space.
// See comment above in time.now.
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
JEQ fallback_gtod_nt
MOVL $0, DI // CLOCK_REALTIME
LEAQ 0(SP), SI
CALL AX
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
// sec is in AX, nsec in DX
// return nsec in AX
IMULQ $1000000000, AX
ADDQ DX, AX
RET
fallback_gtod_nt:
LEAQ 0(SP), DI
MOVQ $0, SI
MOVQ runtime·__vdso_gettimeofday_sym(SB), AX
CALL AX
MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec
IMULQ $1000, DX
// sec is in AX, nsec in DX
// return nsec in AX
IMULQ $1000000000, AX
ADDQ DX, AX
RET
TEXT runtime·rtsigprocmask(SB),7,$0-32
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVQ 24(SP), DX
MOVL 32(SP), R10
MOVL $14, AX // syscall entry
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·rt_sigaction(SB),7,$0-32
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVQ 24(SP), DX
MOVQ 32(SP), R10
MOVL $13, AX // syscall entry
SYSCALL
RET
TEXT runtime·sigtramp(SB),7,$64
get_tls(BX)
// check that m exists
MOVQ m(BX), BP
CMPQ BP, $0
JNE 4(PC)
MOVQ DI, 0(SP)
CALL runtime·badsignal(SB)
RET
// save g
MOVQ g(BX), R10
MOVQ R10, 40(SP)
// g = m->gsignal
MOVQ m_gsignal(BP), BP
MOVQ BP, g(BX)
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ DX, 16(SP)
MOVQ R10, 24(SP)
CALL runtime·sighandler(SB)
// restore g
get_tls(BX)
MOVQ 40(SP), R10
MOVQ R10, g(BX)
RET
TEXT runtime·sigreturn(SB),7,$0
MOVL $15, AX // rt_sigreturn
SYSCALL
INT $3 // not reached
TEXT runtime·mmap(SB),7,$0
MOVQ 8(SP), DI
MOVQ $0, SI
MOVQ 16(SP), SI
MOVL 24(SP), DX
MOVL 28(SP), R10
MOVL 32(SP), R8
MOVL 36(SP), R9
MOVL $9, AX // mmap
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 3(PC)
NOTQ AX
INCQ AX
RET
TEXT runtime·munmap(SB),7,$0
MOVQ 8(SP), DI
MOVQ 16(SP), SI
MOVQ $11, AX // munmap
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·madvise(SB),7,$0
MOVQ 8(SP), DI
MOVQ 16(SP), SI
MOVQ 24(SP), DX
MOVQ $28, AX // madvise
SYSCALL
// ignore failure - maybe pages are locked
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,
// struct timespec *timeout, int32 *uaddr2, int32 val2);
TEXT runtime·futex(SB),7,$0
MOVQ 8(SP), DI
MOVL 16(SP), SI
MOVL 20(SP), DX
MOVQ 24(SP), R10
MOVQ 32(SP), R8
MOVL 40(SP), R9
MOVL $202, AX
SYSCALL
RET
// int64 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
TEXT runtime·clone(SB),7,$0
MOVL flags+8(SP), DI
MOVQ stack+16(SP), SI
// Copy mp, gp, fn off parent stack for use by child.
// Careful: Linux system call clobbers CX and R11.
MOVQ mm+24(SP), R8
MOVQ gg+32(SP), R9
MOVQ fn+40(SP), R12
MOVL $56, AX
SYSCALL
// In parent, return.
CMPQ AX, $0
JEQ 2(PC)
RET
// In child, on new stack.
MOVQ SI, SP
// Initialize m->procid to Linux tid
MOVL $186, AX // gettid
SYSCALL
MOVQ AX, m_procid(R8)
// Set FS to point at m->tls.
LEAQ m_tls(R8), DI
CALL runtime·settls(SB)
// In child, set up new stack
get_tls(CX)
MOVQ R8, m(CX)
MOVQ R9, g(CX)
CALL runtime·stackcheck(SB)
// Call fn
CALL R12
// It shouldn't return. If it does, exit
MOVL $111, DI
MOVL $60, AX
SYSCALL
JMP -3(PC) // keep exiting
TEXT runtime·sigaltstack(SB),7,$-8
MOVQ new+8(SP), DI
MOVQ old+16(SP), SI
MOVQ $131, AX
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
// set tls base to DI
TEXT runtime·settls(SB),7,$32
ADDQ $16, DI // ELF wants to use -16(FS), -8(FS)
MOVQ DI, SI
MOVQ $0x1002, DI // ARCH_SET_FS
MOVQ $158, AX // arch_prctl
SYSCALL
CMPQ AX, $0xfffffffffffff001
JLS 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·osyield(SB),7,$0
MOVL $24, AX
SYSCALL
RET
TEXT runtime·sched_getaffinity(SB),7,$0
MOVQ 8(SP), DI
MOVL 16(SP), SI
MOVQ 24(SP), DX
MOVL $204, AX // syscall entry
SYSCALL
RET
// int32 runtime·epollcreate(int32 size);
TEXT runtime·epollcreate(SB),7,$0
MOVL 8(SP), DI
MOVL $213, AX // syscall entry
SYSCALL
RET
// int32 runtime·epollcreate1(int32 flags);
TEXT runtime·epollcreate1(SB),7,$0
MOVL 8(SP), DI
MOVL $291, AX // syscall entry
SYSCALL
RET
// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
TEXT runtime·epollctl(SB),7,$0
MOVL 8(SP), DI
MOVL 12(SP), SI
MOVL 16(SP), DX
MOVQ 24(SP), R10
MOVL $233, AX // syscall entry
SYSCALL
RET
// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
TEXT runtime·epollwait(SB),7,$0
MOVL 8(SP), DI
MOVQ 16(SP), SI
MOVL 24(SP), DX
MOVL 28(SP), R10
MOVL $232, AX // syscall entry
SYSCALL
RET
// void runtime·closeonexec(int32 fd);
TEXT runtime·closeonexec(SB),7,$0
MOVL 8(SP), DI // fd
MOVQ $2, SI // F_SETFD
MOVQ $1, DX // FD_CLOEXEC
MOVL $72, AX // fcntl
SYSCALL
RET