mirror of
https://github.com/golang/go
synced 2024-11-23 21:40:05 -07:00
runtime: fix and improve CPU profiling
- do not lose profiling signals when we have no mcache (possible for syscalls/cgo) - do not lose any profiling signals on windows - fix profiling of cgo programs on windows (they had no m->thread setup) - properly setup tls in cgo programs on windows - check _beginthread return value Fixes #6417. Fixes #6986. R=alex.brainman, rsc CC=golang-codereviews https://golang.org/cl/44820047
This commit is contained in:
parent
f69391dd9e
commit
8a3c587dc1
@ -5,6 +5,8 @@
|
|||||||
#define WIN32_LEAN_AND_MEAN
|
#define WIN32_LEAN_AND_MEAN
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <process.h>
|
#include <process.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
#include "libcgo.h"
|
#include "libcgo.h"
|
||||||
|
|
||||||
static void threadentry(void*);
|
static void threadentry(void*);
|
||||||
@ -25,14 +27,19 @@ x_cgo_init(G *g)
|
|||||||
void
|
void
|
||||||
_cgo_sys_thread_start(ThreadStart *ts)
|
_cgo_sys_thread_start(ThreadStart *ts)
|
||||||
{
|
{
|
||||||
_beginthread(threadentry, 0, ts);
|
uintptr_t thandle;
|
||||||
|
|
||||||
|
thandle = _beginthread(threadentry, 0, ts);
|
||||||
|
if(thandle == -1) {
|
||||||
|
fprintf(stderr, "runtime: failed to create new OS thread (%d)\n", errno);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
threadentry(void *v)
|
threadentry(void *v)
|
||||||
{
|
{
|
||||||
ThreadStart ts;
|
ThreadStart ts;
|
||||||
void *tls0;
|
|
||||||
|
|
||||||
ts = *(ThreadStart*)v;
|
ts = *(ThreadStart*)v;
|
||||||
free(v);
|
free(v);
|
||||||
@ -43,16 +50,13 @@ threadentry(void *v)
|
|||||||
/*
|
/*
|
||||||
* Set specific keys in thread local storage.
|
* Set specific keys in thread local storage.
|
||||||
*/
|
*/
|
||||||
tls0 = (void*)LocalAlloc(LPTR, 32);
|
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movl %0, %%fs:0x14\n" // MOVL tls0, 0x14(FS)
|
"movl %0, %%fs:0x14\n" // MOVL tls0, 0x14(FS)
|
||||||
"movl %%fs:0x14, %%eax\n" // MOVL 0x14(FS), tmp
|
"movl %%fs:0x14, %%eax\n" // MOVL 0x14(FS), tmp
|
||||||
"movl %1, 0(%%eax)\n" // MOVL g, 0(FS)
|
"movl %1, 0(%%eax)\n" // MOVL g, 0(FS)
|
||||||
"movl %2, 4(%%eax)\n" // MOVL m, 4(FS)
|
"movl %2, 4(%%eax)\n" // MOVL m, 4(FS)
|
||||||
:: "r"(tls0), "r"(ts.g), "r"(ts.m) : "%eax"
|
:: "r"(ts.tls), "r"(ts.g), "r"(ts.m) : "%eax"
|
||||||
);
|
);
|
||||||
|
|
||||||
crosscall_386(ts.fn);
|
crosscall_386(ts.fn);
|
||||||
|
|
||||||
LocalFree(tls0);
|
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#define WIN64_LEAN_AND_MEAN
|
#define WIN64_LEAN_AND_MEAN
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <process.h>
|
#include <process.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
#include "libcgo.h"
|
#include "libcgo.h"
|
||||||
|
|
||||||
static void threadentry(void*);
|
static void threadentry(void*);
|
||||||
@ -25,14 +27,19 @@ x_cgo_init(G *g)
|
|||||||
void
|
void
|
||||||
_cgo_sys_thread_start(ThreadStart *ts)
|
_cgo_sys_thread_start(ThreadStart *ts)
|
||||||
{
|
{
|
||||||
_beginthread(threadentry, 0, ts);
|
uintptr_t thandle;
|
||||||
|
|
||||||
|
thandle = _beginthread(threadentry, 0, ts);
|
||||||
|
if(thandle == -1) {
|
||||||
|
fprintf(stderr, "runtime: failed to create new OS thread (%d)\n", errno);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
threadentry(void *v)
|
threadentry(void *v)
|
||||||
{
|
{
|
||||||
ThreadStart ts;
|
ThreadStart ts;
|
||||||
void *tls0;
|
|
||||||
|
|
||||||
ts = *(ThreadStart*)v;
|
ts = *(ThreadStart*)v;
|
||||||
free(v);
|
free(v);
|
||||||
@ -43,13 +50,12 @@ threadentry(void *v)
|
|||||||
/*
|
/*
|
||||||
* Set specific keys in thread local storage.
|
* Set specific keys in thread local storage.
|
||||||
*/
|
*/
|
||||||
tls0 = (void*)LocalAlloc(LPTR, 64);
|
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movq %0, %%gs:0x28\n" // MOVL tls0, 0x28(GS)
|
"movq %0, %%gs:0x28\n" // MOVL tls0, 0x28(GS)
|
||||||
"movq %%gs:0x28, %%rax\n" // MOVQ 0x28(GS), tmp
|
"movq %%gs:0x28, %%rax\n" // MOVQ 0x28(GS), tmp
|
||||||
"movq %1, 0(%%rax)\n" // MOVQ g, 0(GS)
|
"movq %1, 0(%%rax)\n" // MOVQ g, 0(GS)
|
||||||
"movq %2, 8(%%rax)\n" // MOVQ m, 8(GS)
|
"movq %2, 8(%%rax)\n" // MOVQ m, 8(GS)
|
||||||
:: "r"(tls0), "r"(ts.g), "r"(ts.m) : "%rax"
|
:: "r"(ts.tls), "r"(ts.g), "r"(ts.m) : "%rax"
|
||||||
);
|
);
|
||||||
|
|
||||||
crosscall_amd64(ts.fn);
|
crosscall_amd64(ts.fn);
|
||||||
|
@ -34,6 +34,7 @@ struct ThreadStart
|
|||||||
{
|
{
|
||||||
uintptr m;
|
uintptr m;
|
||||||
G *g;
|
G *g;
|
||||||
|
uintptr *tls;
|
||||||
void (*fn)(void);
|
void (*fn)(void);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -86,10 +86,6 @@ runtime·osinit(void)
|
|||||||
void *kernel32;
|
void *kernel32;
|
||||||
void *SetProcessPriorityBoost;
|
void *SetProcessPriorityBoost;
|
||||||
|
|
||||||
// -1 = current process, -2 = current thread
|
|
||||||
runtime·stdcall(runtime·DuplicateHandle, 7,
|
|
||||||
(uintptr)-1, (uintptr)-2, (uintptr)-1, &m->thread,
|
|
||||||
(uintptr)0, (uintptr)0, (uintptr)DUPLICATE_SAME_ACCESS);
|
|
||||||
runtime·stdcall(runtime·SetConsoleCtrlHandler, 2, runtime·ctrlhandler, (uintptr)1);
|
runtime·stdcall(runtime·SetConsoleCtrlHandler, 2, runtime·ctrlhandler, (uintptr)1);
|
||||||
runtime·stdcall(runtime·timeBeginPeriod, 1, (uintptr)1);
|
runtime·stdcall(runtime·timeBeginPeriod, 1, (uintptr)1);
|
||||||
runtime·ncpu = getproccount();
|
runtime·ncpu = getproccount();
|
||||||
@ -229,7 +225,6 @@ runtime·newosproc(M *mp, void *stk)
|
|||||||
runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror());
|
runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), runtime·getlasterror());
|
||||||
runtime·throw("runtime.newosproc");
|
runtime·throw("runtime.newosproc");
|
||||||
}
|
}
|
||||||
runtime·atomicstorep(&mp->thread, thandle);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Called to initialize a new m (including the bootstrap m).
|
// Called to initialize a new m (including the bootstrap m).
|
||||||
@ -245,6 +240,14 @@ runtime·mpreinit(M *mp)
|
|||||||
void
|
void
|
||||||
runtime·minit(void)
|
runtime·minit(void)
|
||||||
{
|
{
|
||||||
|
void *thandle;
|
||||||
|
|
||||||
|
// -1 = current process, -2 = current thread
|
||||||
|
runtime·stdcall(runtime·DuplicateHandle, 7,
|
||||||
|
(uintptr)-1, (uintptr)-2, (uintptr)-1, &thandle,
|
||||||
|
(uintptr)0, (uintptr)0, (uintptr)DUPLICATE_SAME_ACCESS);
|
||||||
|
runtime·atomicstorep(&m->thread, thandle);
|
||||||
|
|
||||||
runtime·install_exception_handler();
|
runtime·install_exception_handler();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,7 +386,7 @@ runtime·ctrlhandler1(uint32 type)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void runtime·dosigprof(Context *r, G *gp);
|
extern void runtime·dosigprof(Context *r, G *gp, M *mp);
|
||||||
extern void runtime·profileloop(void);
|
extern void runtime·profileloop(void);
|
||||||
static void *profiletimer;
|
static void *profiletimer;
|
||||||
|
|
||||||
@ -402,13 +405,11 @@ profilem(M *mp)
|
|||||||
tls = runtime·tls0;
|
tls = runtime·tls0;
|
||||||
gp = *(G**)tls;
|
gp = *(G**)tls;
|
||||||
|
|
||||||
if(gp != nil && gp != mp->g0 && gp->status != Gsyscall) {
|
// align Context to 16 bytes
|
||||||
// align Context to 16 bytes
|
r = (Context*)((uintptr)(&rbuf[15]) & ~15);
|
||||||
r = (Context*)((uintptr)(&rbuf[15]) & ~15);
|
r->ContextFlags = CONTEXT_CONTROL;
|
||||||
r->ContextFlags = CONTEXT_CONTROL;
|
runtime·stdcall(runtime·GetThreadContext, 2, mp->thread, r);
|
||||||
runtime·stdcall(runtime·GetThreadContext, 2, mp->thread, r);
|
runtime·dosigprof(r, gp, mp);
|
||||||
runtime·dosigprof(r, gp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -102,7 +102,7 @@ runtime·sigdisable(uint32 sig)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
runtime·dosigprof(Context *r, G *gp)
|
runtime·dosigprof(Context *r, G *gp, M *mp)
|
||||||
{
|
{
|
||||||
runtime·sigprof((uint8*)r->Eip, (uint8*)r->Esp, nil, gp);
|
runtime·sigprof((uint8*)r->Eip, (uint8*)r->Esp, nil, gp, mp);
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ runtime·sigdisable(uint32 sig)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
runtime·dosigprof(Context *r, G *gp)
|
runtime·dosigprof(Context *r, G *gp, M *mp)
|
||||||
{
|
{
|
||||||
runtime·sigprof((uint8*)r->Rip, (uint8*)r->Rsp, nil, gp);
|
runtime·sigprof((uint8*)r->Rip, (uint8*)r->Rsp, nil, gp, mp);
|
||||||
}
|
}
|
||||||
|
@ -33,10 +33,6 @@ func TestCPUProfile(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCPUProfileMultithreaded(t *testing.T) {
|
func TestCPUProfileMultithreaded(t *testing.T) {
|
||||||
// TODO(brainman): delete when issue 6986 is fixed.
|
|
||||||
if runtime.GOOS == "windows" && runtime.GOARCH == "amd64" {
|
|
||||||
t.Skip("skipping broken test on windows-amd64-race")
|
|
||||||
}
|
|
||||||
buf := make([]byte, 100000)
|
buf := make([]byte, 100000)
|
||||||
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
|
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
|
||||||
testCPUProfile(t, []string{"crc32.ChecksumIEEE", "crc32.Update"}, func() {
|
testCPUProfile(t, []string{"crc32.ChecksumIEEE", "crc32.Update"}, func() {
|
||||||
@ -197,9 +193,6 @@ func TestCPUProfileWithFork(t *testing.T) {
|
|||||||
// If it did, it would see inconsistent state and would either record an incorrect stack
|
// If it did, it would see inconsistent state and would either record an incorrect stack
|
||||||
// or crash because the stack was malformed.
|
// or crash because the stack was malformed.
|
||||||
func TestGoroutineSwitch(t *testing.T) {
|
func TestGoroutineSwitch(t *testing.T) {
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
t.Skip("flaky test; see http://golang.org/issue/6417")
|
|
||||||
}
|
|
||||||
// How much to try. These defaults take about 1 seconds
|
// How much to try. These defaults take about 1 seconds
|
||||||
// on a 2012 MacBook Pro. The ones in short mode take
|
// on a 2012 MacBook Pro. The ones in short mode take
|
||||||
// about 0.1 seconds.
|
// about 0.1 seconds.
|
||||||
@ -252,10 +245,6 @@ func TestGoroutineSwitch(t *testing.T) {
|
|||||||
|
|
||||||
// Test that profiling of division operations is okay, especially on ARM. See issue 6681.
|
// Test that profiling of division operations is okay, especially on ARM. See issue 6681.
|
||||||
func TestMathBigDivide(t *testing.T) {
|
func TestMathBigDivide(t *testing.T) {
|
||||||
// TODO(brainman): delete when issue 6986 is fixed.
|
|
||||||
if runtime.GOOS == "windows" && runtime.GOARCH == "amd64" {
|
|
||||||
t.Skip("skipping broken test on windows-amd64-race")
|
|
||||||
}
|
|
||||||
testCPUProfile(t, nil, func() {
|
testCPUProfile(t, nil, func() {
|
||||||
t := time.After(5 * time.Second)
|
t := time.After(5 * time.Second)
|
||||||
pi := new(big.Int)
|
pi := new(big.Int)
|
||||||
|
@ -638,6 +638,7 @@ struct CgoThreadStart
|
|||||||
{
|
{
|
||||||
M *m;
|
M *m;
|
||||||
G *g;
|
G *g;
|
||||||
|
uintptr *tls;
|
||||||
void (*fn)(void);
|
void (*fn)(void);
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -916,6 +917,7 @@ newm(void(*fn)(void), P *p)
|
|||||||
runtime·throw("_cgo_thread_start missing");
|
runtime·throw("_cgo_thread_start missing");
|
||||||
ts.m = mp;
|
ts.m = mp;
|
||||||
ts.g = mp->g0;
|
ts.g = mp->g0;
|
||||||
|
ts.tls = mp->tls;
|
||||||
ts.fn = runtime·mstart;
|
ts.fn = runtime·mstart;
|
||||||
runtime·asmcgocall(_cgo_thread_start, &ts);
|
runtime·asmcgocall(_cgo_thread_start, &ts);
|
||||||
return;
|
return;
|
||||||
@ -2074,18 +2076,25 @@ System(void)
|
|||||||
|
|
||||||
// Called if we receive a SIGPROF signal.
|
// Called if we receive a SIGPROF signal.
|
||||||
void
|
void
|
||||||
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
|
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
|
||||||
{
|
{
|
||||||
int32 n;
|
int32 n;
|
||||||
bool traceback;
|
bool traceback;
|
||||||
|
MCache *mcache;
|
||||||
|
// Do not use global m in this function, use mp instead.
|
||||||
|
// On windows one m is sending reports about all the g's, so m means a wrong thing.
|
||||||
|
byte m;
|
||||||
|
|
||||||
|
m = 0;
|
||||||
|
USED(m);
|
||||||
|
|
||||||
if(prof.fn == nil || prof.hz == 0)
|
if(prof.fn == nil || prof.hz == 0)
|
||||||
return;
|
return;
|
||||||
traceback = true;
|
|
||||||
// Windows does profiling in a dedicated thread w/o m.
|
// Profiling runs concurrently with GC, so it must not allocate.
|
||||||
if(!Windows && (m == nil || m->mcache == nil))
|
mcache = mp->mcache;
|
||||||
traceback = false;
|
mp->mcache = nil;
|
||||||
|
|
||||||
// Define that a "user g" is a user-created goroutine, and a "system g"
|
// Define that a "user g" is a user-created goroutine, and a "system g"
|
||||||
// is one that is m->g0 or m->gsignal. We've only made sure that we
|
// is one that is m->g0 or m->gsignal. We've only made sure that we
|
||||||
// can unwind user g's, so exclude the system g's.
|
// can unwind user g's, so exclude the system g's.
|
||||||
@ -2158,24 +2167,21 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
|
|||||||
// To recap, there are no constraints on the assembly being used for the
|
// To recap, there are no constraints on the assembly being used for the
|
||||||
// transition. We simply require that g and SP match and that the PC is not
|
// transition. We simply require that g and SP match and that the PC is not
|
||||||
// in runtime.gogo.
|
// in runtime.gogo.
|
||||||
//
|
traceback = true;
|
||||||
// On Windows, one m is sending reports about all the g's, so gp == m->curg
|
if(gp == nil || gp != mp->curg ||
|
||||||
// is not a useful comparison. The profilem function in os_windows.c has
|
|
||||||
// already checked that gp is a user g.
|
|
||||||
if(gp == nil ||
|
|
||||||
(!Windows && gp != m->curg) ||
|
|
||||||
(uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
|
(uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
|
||||||
((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
|
((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
|
||||||
traceback = false;
|
traceback = false;
|
||||||
|
|
||||||
// Race detector calls asmcgocall w/o entersyscall/exitsyscall,
|
// Race detector calls asmcgocall w/o entersyscall/exitsyscall,
|
||||||
// we can not currently unwind through asmcgocall.
|
// we can not currently unwind through asmcgocall.
|
||||||
if(m != nil && m->racecall)
|
if(mp != nil && mp->racecall)
|
||||||
traceback = false;
|
traceback = false;
|
||||||
|
|
||||||
runtime·lock(&prof);
|
runtime·lock(&prof);
|
||||||
if(prof.fn == nil) {
|
if(prof.fn == nil) {
|
||||||
runtime·unlock(&prof);
|
runtime·unlock(&prof);
|
||||||
|
mp->mcache = mcache;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
n = 0;
|
n = 0;
|
||||||
@ -2188,6 +2194,7 @@ runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
|
|||||||
}
|
}
|
||||||
prof.fn(prof.pcbuf, n);
|
prof.fn(prof.pcbuf, n);
|
||||||
runtime·unlock(&prof);
|
runtime·unlock(&prof);
|
||||||
|
mp->mcache = mcache;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Arrange to call fn with a traceback hz times a second.
|
// Arrange to call fn with a traceback hz times a second.
|
||||||
|
@ -864,7 +864,7 @@ void runtime·dopanic(int32);
|
|||||||
void runtime·startpanic(void);
|
void runtime·startpanic(void);
|
||||||
void runtime·freezetheworld(void);
|
void runtime·freezetheworld(void);
|
||||||
void runtime·unwindstack(G*, byte*);
|
void runtime·unwindstack(G*, byte*);
|
||||||
void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp);
|
void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp);
|
||||||
void runtime·resetcpuprofiler(int32);
|
void runtime·resetcpuprofiler(int32);
|
||||||
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
||||||
void runtime·usleep(uint32);
|
void runtime·usleep(uint32);
|
||||||
|
@ -39,7 +39,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
|
|||||||
bool crash;
|
bool crash;
|
||||||
|
|
||||||
if(sig == SIGPROF) {
|
if(sig == SIGPROF) {
|
||||||
runtime·sigprof((byte*)SIG_EIP(info, ctxt), (byte*)SIG_ESP(info, ctxt), nil, gp);
|
runtime·sigprof((byte*)SIG_EIP(info, ctxt), (byte*)SIG_ESP(info, ctxt), nil, gp, m);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
|
|||||||
bool crash;
|
bool crash;
|
||||||
|
|
||||||
if(sig == SIGPROF) {
|
if(sig == SIGPROF) {
|
||||||
runtime·sigprof((byte*)SIG_RIP(info, ctxt), (byte*)SIG_RSP(info, ctxt), nil, gp);
|
runtime·sigprof((byte*)SIG_RIP(info, ctxt), (byte*)SIG_RSP(info, ctxt), nil, gp, m);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
|
|||||||
bool crash;
|
bool crash;
|
||||||
|
|
||||||
if(sig == SIGPROF) {
|
if(sig == SIGPROF) {
|
||||||
runtime·sigprof((uint8*)SIG_PC(info, ctxt), (uint8*)SIG_SP(info, ctxt), (uint8*)SIG_LR(info, ctxt), gp);
|
runtime·sigprof((uint8*)SIG_PC(info, ctxt), (uint8*)SIG_SP(info, ctxt), (uint8*)SIG_LR(info, ctxt), gp, m);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user