1
0
mirror of https://github.com/golang/go synced 2024-10-02 10:28:34 -06:00

runtime: improve cpu profiles for GC/syscalls/cgo

Current "System->etext" is not very informative.
Add parent "GC" frame.
Replace un-unwindable syscall/cgo frames with Go stack that leads to the call.

LGTM=rsc
R=rsc, alex.brainman, ality
CC=golang-codereviews
https://golang.org/cl/61270043
This commit is contained in:
Dmitriy Vyukov 2014-02-12 22:31:36 +04:00
parent 44c252bda2
commit 5e72fae9b2
5 changed files with 77 additions and 13 deletions

View File

@ -291,7 +291,14 @@ runtime·stdcall(void *fn, int32 count, ...)
m->libcall.fn = fn; m->libcall.fn = fn;
m->libcall.n = count; m->libcall.n = count;
m->libcall.args = (uintptr*)&count + 1; m->libcall.args = (uintptr*)&count + 1;
if(m->profilehz != 0) {
// leave pc/sp for cpu profiler
m->libcallpc = (uintptr)runtime·getcallerpc(&fn);
m->libcallsp = (uintptr)runtime·getcallersp(&fn);
m->libcallg = g;
}
runtime·asmcgocall(runtime·asmstdcall, &m->libcall); runtime·asmcgocall(runtime·asmstdcall, &m->libcall);
m->libcallsp = 0;
return (void*)m->libcall.r1; return (void*)m->libcall.r1;
} }

View File

@ -2104,10 +2104,10 @@ static struct {
uintptr pcbuf[100]; uintptr pcbuf[100];
} prof; } prof;
static void static void System(void) {}
System(void) static void ExternalCode(void) {}
{ static void GC(void) {}
} extern byte etext[];
// Called if we receive a SIGPROF signal. // Called if we receive a SIGPROF signal.
void void
@ -2221,9 +2221,35 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
if(traceback) if(traceback)
n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false); n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
if(!traceback || n <= 0) { if(!traceback || n <= 0) {
n = 2; // Normal traceback is impossible or has failed.
prof.pcbuf[0] = (uintptr)pc; // See if it falls into several common cases.
prof.pcbuf[1] = (uintptr)System + 1; n = 0;
if(mp->ncgo > 0 && mp->curg != nil &&
mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
// Cgo, we can't unwind and symbolize arbitrary C code,
// so instead collect Go stack that leads to the cgo call.
// This is especially important on windows, since all syscalls are cgo calls.
n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
}
#ifdef GOOS_windows
if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
// Libcall, i.e. runtime syscall on windows.
// Collect Go stack that leads to the call.
n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
}
#endif
if(n == 0) {
// If all of the above has failed, account it against abstract "System" or "GC".
n = 2;
// "ExternalCode" is better than "etext".
if((uintptr)pc > (uintptr)etext)
pc = (byte*)ExternalCode + PCQuantum;
prof.pcbuf[0] = (uintptr)pc;
if(mp->gcing || mp->helpgc)
prof.pcbuf[1] = (uintptr)GC + PCQuantum;
else
prof.pcbuf[1] = (uintptr)System + PCQuantum;
}
} }
prof.fn(prof.pcbuf, n); prof.fn(prof.pcbuf, n);
runtime·unlock(&prof); runtime·unlock(&prof);

View File

@ -350,6 +350,9 @@ struct M
// these are here because they are too large to be on the stack // these are here because they are too large to be on the stack
// of low-level NOSPLIT functions. // of low-level NOSPLIT functions.
LibCall libcall; LibCall libcall;
uintptr libcallpc; // for cpu profiler
uintptr libcallsp;
G* libcallg;
#endif #endif
#ifdef GOOS_solaris #ifdef GOOS_solaris
int32* perrno; // pointer to TLS errno int32* perrno; // pointer to TLS errno

View File

@ -343,19 +343,34 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
RET RET
MOVL m(CX), BP MOVL m(CX), BP
// leave pc/sp for cpu profiler
MOVL (SP), SI
MOVL SI, m_libcallpc(BP)
LEAL 4(SP), SI
MOVL SI, m_libcallsp(BP)
MOVL g(BP), SI
MOVL SI, m_libcallg(BP)
MOVL m_g0(BP), SI MOVL m_g0(BP), SI
CMPL g(CX), SI CMPL g(CX), SI
JNE 3(PC) JNE usleep1_switch
// executing on m->g0 already // executing on m->g0 already
CALL AX CALL AX
RET JMP usleep1_ret
usleep1_switch:
// Switch to m->g0 stack and back. // Switch to m->g0 stack and back.
MOVL (g_sched+gobuf_sp)(SI), SI MOVL (g_sched+gobuf_sp)(SI), SI
MOVL SP, -4(SI) MOVL SP, -4(SI)
LEAL -4(SI), SP LEAL -4(SI), SP
CALL AX CALL AX
MOVL 0(SP), SP MOVL 0(SP), SP
usleep1_ret:
get_tls(CX)
MOVL m(CX), BP
MOVL $0, m_libcallsp(BP)
RET RET
// Runs on OS stack. duration (in 100ns units) is in BX. // Runs on OS stack. duration (in 100ns units) is in BX.

View File

@ -337,20 +337,33 @@ TEXT runtime·usleep1(SB),NOSPLIT,$0
CALL AX CALL AX
RET RET
MOVQ m(R15), R14 MOVQ m(R15), R13
MOVQ m_g0(R14), R14
// leave pc/sp for cpu profiler
MOVQ (SP), R12
MOVQ R12, m_libcallpc(R13)
LEAQ 8(SP), R12
MOVQ R12, m_libcallsp(R13)
MOVQ g(R13), R12
MOVQ R12, m_libcallg(R13)
MOVQ m_g0(R13), R14
CMPQ g(R15), R14 CMPQ g(R15), R14
JNE 3(PC) JNE usleep1_switch
// executing on m->g0 already // executing on m->g0 already
CALL AX CALL AX
RET JMP usleep1_ret
usleep1_switch:
// Switch to m->g0 stack and back. // Switch to m->g0 stack and back.
MOVQ (g_sched+gobuf_sp)(R14), R14 MOVQ (g_sched+gobuf_sp)(R14), R14
MOVQ SP, -8(R14) MOVQ SP, -8(R14)
LEAQ -8(R14), SP LEAQ -8(R14), SP
CALL AX CALL AX
MOVQ 0(SP), SP MOVQ 0(SP), SP
usleep1_ret:
MOVQ $0, m_libcallsp(R13)
RET RET
// Runs on OS stack. duration (in 100ns units) is in BX. // Runs on OS stack. duration (in 100ns units) is in BX.