1
0
mirror of https://github.com/golang/go synced 2024-10-04 04:31:21 -06:00
go/src/pkg/runtime/proc.c

1730 lines
44 KiB
C
Raw Normal View History

2008-07-14 15:34:27 -06:00
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "runtime.h"
#include "arch_GOARCH.h"
#include "malloc.h"
ld: detect stack overflow due to NOSPLIT Fix problems found. On amd64, various library routines had bigger stack frames than expected, because large function calls had been added. runtime.assertI2T: nosplit stack overflow 120 assumed on entry to runtime.assertI2T 8 after runtime.assertI2T uses 112 0 on entry to runtime.newTypeAssertionError -8 on entry to runtime.morestack01 runtime.assertE2E: nosplit stack overflow 120 assumed on entry to runtime.assertE2E 16 after runtime.assertE2E uses 104 8 on entry to runtime.panic 0 on entry to runtime.morestack16 -8 after runtime.morestack16 uses 8 runtime.assertE2T: nosplit stack overflow 120 assumed on entry to runtime.assertE2T 16 after runtime.assertE2T uses 104 8 on entry to runtime.panic 0 on entry to runtime.morestack16 -8 after runtime.morestack16 uses 8 runtime.newselect: nosplit stack overflow 120 assumed on entry to runtime.newselect 56 after runtime.newselect uses 64 48 on entry to runtime.printf 8 after runtime.printf uses 40 0 on entry to vprintf -8 on entry to runtime.morestack16 runtime.selectdefault: nosplit stack overflow 120 assumed on entry to runtime.selectdefault 56 after runtime.selectdefault uses 64 48 on entry to runtime.printf 8 after runtime.printf uses 40 0 on entry to vprintf -8 on entry to runtime.morestack16 runtime.selectgo: nosplit stack overflow 120 assumed on entry to runtime.selectgo 0 after runtime.selectgo uses 120 -8 on entry to runtime.gosched On arm, 5c was tagging functions NOSPLIT that should not have been, like the recursive function printpanics: printpanics: nosplit stack overflow 124 assumed on entry to printpanics 112 after printpanics uses 12 108 on entry to printpanics 96 after printpanics uses 12 92 on entry to printpanics 80 after printpanics uses 12 76 on entry to printpanics 64 after printpanics uses 12 60 on entry to printpanics 48 after printpanics uses 12 44 on entry to printpanics 32 after printpanics uses 12 28 on entry to printpanics 16 after printpanics uses 12 12 on entry to printpanics 0 after printpanics uses 12 -4 on entry to printpanics R=r, r2 CC=golang-dev https://golang.org/cl/4188061
2011-02-22 15:40:40 -07:00
#include "stack.h"
#include "race.h"
#include "type.h"
2008-07-14 15:34:27 -06:00
bool runtime·iscgo;
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
static void schedule(G*);
typedef struct Sched Sched;
M runtime·m0;
G runtime·g0; // idle goroutine for m0
2008-07-14 15:34:27 -06:00
static int32 debug = 0;
int32 runtime·gcwaiting;
G* runtime·allg;
G* runtime·lastg;
M* runtime·allm;
M* runtime·extram;
int8* runtime·goos;
int32 runtime·ncpu;
// Go scheduler
//
// The go scheduler's job is to match ready-to-run goroutines (`g's)
// with waiting-for-work schedulers (`m's). If there are ready g's
// and no waiting m's, ready() will start a new m running in a new
// OS thread, so that all ready g's can run simultaneously, up to a limit.
// For now, m's never go away.
//
// By default, Go keeps only one kernel thread (m) running user code
// at a single time; other threads may be blocked in the operating system.
// Setting the environment variable $GOMAXPROCS or calling
// runtime.GOMAXPROCS() will change the number of user threads
// allowed to execute simultaneously. $GOMAXPROCS is thus an
// approximation of the maximum number of cores to use.
//
// Even a program that can run without deadlock in a single process
// might use more m's if given the chance. For example, the prime
// sieve will use as many m's as there are primes (up to runtime·sched.mmax),
// allowing different stages of the pipeline to execute in parallel.
// We could revisit this choice, only kicking off new m's for blocking
// system calls, but that would limit the amount of parallel computation
// that go would try to do.
//
// In general, one could imagine all sorts of refinements to the
// scheduler, but the goal now is just to get something working on
// Linux and OS X.
struct Sched {
Lock;
G *gfree; // available g's (status == Gdead)
int64 goidgen;
G *ghead; // g's waiting to run
G *gtail;
int32 gwait; // number of g's waiting to run
int32 gcount; // number of g's that are alive
int32 grunning; // number of g's running on cpu or in syscall
M *mhead; // m's waiting for work
int32 mwait; // number of m's waiting for work
int32 mcount; // number of m's that have been created
volatile uint32 atomic; // atomic scheduling word (see below)
int32 profilehz; // cpu profiling rate
bool init; // running initialization
Note stopped; // one g can set waitstop and wait here for m's to stop
};
// The atomic word in sched is an atomic uint32 that
// holds these fields.
//
// [15 bits] mcpu number of m's executing on cpu
// [15 bits] mcpumax max number of m's allowed on cpu
// [1 bit] waitstop some g is waiting on stopped
// [1 bit] gwaiting gwait != 0
//
// These fields are the information needed by entersyscall
// and exitsyscall to decide whether to coordinate with the
// scheduler. Packing them into a single machine word lets
// them use a fast path with a single atomic read/write and
// no lock/unlock. This greatly reduces contention in
// syscall- or cgo-heavy multithreaded programs.
//
// Except for entersyscall and exitsyscall, the manipulations
// to these fields only happen while holding the schedlock,
// so the routines holding schedlock only need to worry about
// what entersyscall and exitsyscall do, not the other routines
// (which also use the schedlock).
//
// In particular, entersyscall and exitsyscall only read mcpumax,
// waitstop, and gwaiting. They never write them. Thus, writes to those
// fields can be done (holding schedlock) without fear of write conflicts.
// There may still be logic conflicts: for example, the set of waitstop must
// be conditioned on mcpu >= mcpumax or else the wait may be a
// spurious sleep. The Promela model in proc.p verifies these accesses.
enum {
mcpuWidth = 15,
mcpuMask = (1<<mcpuWidth) - 1,
mcpuShift = 0,
mcpumaxShift = mcpuShift + mcpuWidth,
waitstopShift = mcpumaxShift + mcpuWidth,
gwaitingShift = waitstopShift+1,
// The max value of GOMAXPROCS is constrained
// by the max value we can store in the bit fields
// of the atomic word. Reserve a few high values
// so that we can detect accidental decrement
// beyond zero.
maxgomaxprocs = mcpuMask - 10,
};
#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask)
#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask)
#define atomic_waitstop(v) (((v)>>waitstopShift)&1)
#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1)
Sched runtime·sched;
int32 runtime·gomaxprocs;
bool runtime·singleproc;
static bool canaddmcpu(void);
// An m that is waiting for notewakeup(&m->havenextg). This may
// only be accessed while the scheduler lock is held. This is used to
// minimize the number of times we call notewakeup while the scheduler
// lock is held, since the m will normally move quickly to lock the
// scheduler itself, producing lock contention.
static M* mwakeup;
// Scheduling helpers. Sched must be locked.
static void gput(G*); // put/get on ghead/gtail
static G* gget(void);
static void mput(M*); // put/get on mhead
static M* mget(G*);
static void gfput(G*); // put/get on gfree
static G* gfget(void);
static void matchmg(void); // match m's to g's
static void readylocked(G*); // ready, but sched is locked
static void mnextg(M*, G*);
static void mcommoninit(M*);
void
setmcpumax(uint32 n)
{
uint32 v, w;
for(;;) {
v = runtime·sched.atomic;
w = v;
w &= ~(mcpuMask<<mcpumaxShift);
w |= n<<mcpumaxShift;
if(runtime·cas(&runtime·sched.atomic, v, w))
break;
}
}
// Keep trace of scavenger's goroutine for deadlock detection.
static G *scvg;
// The bootstrap sequence is:
//
// call osinit
// call schedinit
// make & queue new G
// call runtime·mstart
//
// The new G calls runtime·main.
void
runtime·schedinit(void)
{
int32 n;
byte *p;
m->nomemprof++;
runtime·mprofinit();
runtime·mallocinit();
mcommoninit(m);
runtime·goargs();
runtime·goenvs();
// For debugging:
// Allocate internal symbol table representation now,
// so that we don't need to call malloc when we crash.
// runtime·findfunc(0);
runtime·gomaxprocs = 1;
p = runtime·getenv("GOMAXPROCS");
if(p != nil && (n = runtime·atoi(p)) != 0) {
if(n > maxgomaxprocs)
n = maxgomaxprocs;
runtime·gomaxprocs = n;
}
// wait for the main goroutine to start before taking
// GOMAXPROCS into account.
setmcpumax(1);
runtime·singleproc = runtime·gomaxprocs == 1;
canaddmcpu(); // mcpu++ to account for bootstrap m
m->helpgc = 1; // flag to tell schedule() to mcpu--
runtime·sched.grunning++;
mstats.enablegc = 1;
m->nomemprof--;
if(raceenabled)
g->racectx = runtime·raceinit();
}
extern void main·init(void);
extern void main·main(void);
// The main goroutine.
void
runtime·main(void)
{
// Lock the main goroutine onto this, the main OS thread,
// during initialization. Most programs won't care, but a few
// do require certain calls to be made by the main thread.
// Those can arrange for main.main to run in the main thread
// by calling runtime.LockOSThread during initialization
// to preserve the lock.
runtime·lockOSThread();
// From now on, newgoroutines may use non-main threads.
setmcpumax(runtime·gomaxprocs);
runtime·sched.init = true;
scvg = runtime·newproc1((byte*)runtime·MHeap_Scavenger, nil, 0, 0, runtime·main);
scvg->issystem = true;
// The deadlock detection has false negatives.
// Let scvg start up, to eliminate the false negative
// for the trivial program func main() { select{} }.
runtime·gosched();
main·init();
runtime·sched.init = false;
runtime·unlockOSThread();
main·main();
if(raceenabled)
runtime·racefini();
// Make racy client program work: if panicking on
// another goroutine at the same time as main returns,
// let the other goroutine finish printing the panic trace.
// Once it does, it will exit. See issue 3934.
if(runtime·panicking)
runtime·park(nil, nil, "panicwait");
runtime·exit(0);
for(;;)
*(int32*)runtime·main = 0;
}
// Lock the scheduler.
static void
schedlock(void)
{
runtime·lock(&runtime·sched);
}
// Unlock the scheduler.
static void
schedunlock(void)
{
M *mp;
mp = mwakeup;
mwakeup = nil;
runtime·unlock(&runtime·sched);
if(mp != nil)
runtime·notewakeup(&mp->havenextg);
}
2008-07-14 15:34:27 -06:00
void
runtime·goexit(void)
2008-07-14 15:34:27 -06:00
{
if(raceenabled)
runtime·racegoend();
g->status = Gmoribund;
runtime·gosched();
2008-07-14 15:34:27 -06:00
}
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
void
runtime·goroutineheader(G *gp)
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
{
int8 *status;
switch(gp->status) {
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
case Gidle:
status = "idle";
break;
case Grunnable:
status = "runnable";
break;
case Grunning:
status = "running";
break;
case Gsyscall:
status = "syscall";
break;
case Gwaiting:
if(gp->waitreason)
status = gp->waitreason;
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
else
status = "waiting";
break;
case Gmoribund:
status = "moribund";
break;
default:
status = "???";
break;
}
runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
}
void
runtime·tracebackothers(G *me)
{
G *gp;
int32 traceback;
traceback = runtime·gotraceback();
for(gp = runtime·allg; gp != nil; gp = gp->alllink) {
if(gp == me || gp->status == Gdead)
continue;
if(gp->issystem && traceback < 2)
continue;
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
runtime·printf("\n");
runtime·goroutineheader(gp);
runtime·traceback(gp->sched.pc, (byte*)gp->sched.sp, 0, gp);
}
}
// Mark this g as m's idle goroutine.
// This functionality might be used in environments where programs
// are limited to a single thread, to simulate a select-driven
// network server. It is not exposed via the standard runtime API.
void
runtime·idlegoroutine(void)
{
if(g->idlem != nil)
runtime·throw("g is already an idle goroutine");
g->idlem = m;
}
static void
mcommoninit(M *mp)
{
mp->id = runtime·sched.mcount++;
mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
if(mp->mcache == nil)
mp->mcache = runtime·allocmcache();
runtime·callers(1, mp->createstack, nelem(mp->createstack));
runtime·mpreinit(mp);
// Add to runtime·allm so garbage collector doesn't free m
// when it is just in a register or thread-local storage.
mp->alllink = runtime·allm;
// runtime·NumCgoCall() iterates over allm w/o schedlock,
// so we need to publish it safely.
runtime·atomicstorep(&runtime·allm, mp);
}
// Try to increment mcpu. Report whether succeeded.
static bool
canaddmcpu(void)
{
uint32 v;
for(;;) {
v = runtime·sched.atomic;
if(atomic_mcpu(v) >= atomic_mcpumax(v))
return 0;
if(runtime·cas(&runtime·sched.atomic, v, v+(1<<mcpuShift)))
return 1;
}
}
// Put on `g' queue. Sched must be locked.
static void
gput(G *gp)
2008-07-14 15:34:27 -06:00
{
// If g is the idle goroutine for an m, hand it off.
if(gp->idlem != nil) {
if(gp->idlem->idleg != nil) {
runtime·printf("m%d idle out of sync: g%D g%D\n",
gp->idlem->id,
gp->idlem->idleg->goid, gp->goid);
runtime·throw("runtime: double idle");
}
gp->idlem->idleg = gp;
return;
}
gp->schedlink = nil;
if(runtime·sched.ghead == nil)
runtime·sched.ghead = gp;
else
runtime·sched.gtail->schedlink = gp;
runtime·sched.gtail = gp;
// increment gwait.
// if it transitions to nonzero, set atomic gwaiting bit.
if(runtime·sched.gwait++ == 0)
runtime·xadd(&runtime·sched.atomic, 1<<gwaitingShift);
}
// Report whether gget would return something.
static bool
haveg(void)
{
return runtime·sched.ghead != nil || m->idleg != nil;
}
// Get from `g' queue. Sched must be locked.
static G*
gget(void)
{
G *gp;
gp = runtime·sched.ghead;
if(gp) {
runtime·sched.ghead = gp->schedlink;
if(runtime·sched.ghead == nil)
runtime·sched.gtail = nil;
// decrement gwait.
// if it transitions to zero, clear atomic gwaiting bit.
if(--runtime·sched.gwait == 0)
runtime·xadd(&runtime·sched.atomic, -1<<gwaitingShift);
} else if(m->idleg != nil) {
gp = m->idleg;
m->idleg = nil;
}
return gp;
}
// Put on `m' list. Sched must be locked.
static void
mput(M *mp)
{
mp->schedlink = runtime·sched.mhead;
runtime·sched.mhead = mp;
runtime·sched.mwait++;
}
// Get an `m' to run `g'. Sched must be locked.
static M*
mget(G *gp)
{
M *mp;
// if g has its own m, use it.
if(gp && (mp = gp->lockedm) != nil)
return mp;
// otherwise use general m pool.
if((mp = runtime·sched.mhead) != nil) {
runtime·sched.mhead = mp->schedlink;
runtime·sched.mwait--;
}
return mp;
}
// Mark g ready to run.
void
runtime·ready(G *gp)
{
schedlock();
readylocked(gp);
schedunlock();
}
// Mark g ready to run. Sched is already locked.
// G might be running already and about to stop.
// The sched lock protects g->status from changing underfoot.
static void
readylocked(G *gp)
{
if(gp->m) {
// Running on another machine.
// Ready it when it stops.
gp->readyonstop = 1;
return;
}
// Mark runnable.
if(gp->status == Grunnable || gp->status == Grunning) {
runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
runtime·throw("bad g->status in ready");
runtime: always run stackalloc on scheduler stack Avoids deadlocks like the one below, in which a stack split happened in order to call lock(&stacks), but then the stack unsplit cannot run because stacks is now locked. The only code calling stackalloc that wasn't on a scheduler stack already was malg, which creates a new goroutine. runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139 runtime.futex() futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51 futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2) futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119 futexlock(0x5b0188, 0x5b0188) runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x7f0d27b4a000) runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336 runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218) runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705 runtime.oldstack() runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224 runtime.lessstack() ----- lessstack called from goroutine 2 ----- runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x40a5e2) runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316 runtime.stackalloc(0x1000, 0x4055b0) runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803 runtime.malg(0x1000, 0x40add9) runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854 runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...) runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831 runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059) ... R=r, r2 CC=golang-dev https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
}
gp->status = Grunnable;
gput(gp);
matchmg();
}
static void
nop(void)
{
}
// Same as readylocked but a different symbol so that
// debuggers can set a breakpoint here and catch all
// new goroutines.
static void
newprocreadylocked(G *gp)
{
nop(); // avoid inlining in 6l
readylocked(gp);
}
// Pass g to m for running.
// Caller has already incremented mcpu.
static void
mnextg(M *mp, G *gp)
{
runtime·sched.grunning++;
mp->nextg = gp;
if(mp->waitnextg) {
mp->waitnextg = 0;
if(mwakeup != nil)
runtime·notewakeup(&mwakeup->havenextg);
mwakeup = mp;
}
}
// Get the next goroutine that m should run.
// Sched must be locked on entry, is unlocked on exit.
// Makes sure that at most $GOMAXPROCS g's are
// running on cpus (not in system calls) at any given time.
static G*
nextgandunlock(void)
{
G *gp;
uint32 v;
top:
if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs)
runtime·throw("negative mcpu");
// If there is a g waiting as m->nextg, the mcpu++
// happened before it was passed to mnextg.
if(m->nextg != nil) {
gp = m->nextg;
m->nextg = nil;
schedunlock();
return gp;
}
if(m->lockedg != nil) {
// We can only run one g, and it's not available.
// Make sure some other cpu is running to handle
// the ordinary run queue.
if(runtime·sched.gwait != 0) {
matchmg();
// m->lockedg might have been on the queue.
if(m->nextg != nil) {
gp = m->nextg;
m->nextg = nil;
schedunlock();
return gp;
}
}
} else {
// Look for work on global queue.
while(haveg() && canaddmcpu()) {
gp = gget();
if(gp == nil)
runtime·throw("gget inconsistency");
if(gp->lockedm) {
mnextg(gp->lockedm, gp);
continue;
}
runtime·sched.grunning++;
schedunlock();
return gp;
}
// The while loop ended either because the g queue is empty
// or because we have maxed out our m procs running go
// code (mcpu >= mcpumax). We need to check that
// concurrent actions by entersyscall/exitsyscall cannot
// invalidate the decision to end the loop.
//
// We hold the sched lock, so no one else is manipulating the
// g queue or changing mcpumax. Entersyscall can decrement
// mcpu, but if does so when there is something on the g queue,
// the gwait bit will be set, so entersyscall will take the slow path
// and use the sched lock. So it cannot invalidate our decision.
//
// Wait on global m queue.
mput(m);
2008-07-14 15:34:27 -06:00
}
// Look for deadlock situation.
// There is a race with the scavenger that causes false negatives:
// if the scavenger is just starting, then we have
// scvg != nil && grunning == 0 && gwait == 0
// and we do not detect a deadlock. It is possible that we should
// add that case to the if statement here, but it is too close to Go 1
// to make such a subtle change. Instead, we work around the
// false negative in trivial programs by calling runtime.gosched
// from the main goroutine just before main.main.
// See runtime·main above.
//
// On a related note, it is also possible that the scvg == nil case is
// wrong and should include gwait, but that does not happen in
// standard Go programs, which all start the scavenger.
//
if((scvg == nil && runtime·sched.grunning == 0) ||
(scvg != nil && runtime·sched.grunning == 1 && runtime·sched.gwait == 0 &&
(scvg->status == Grunning || scvg->status == Gsyscall))) {
m->throwing = -1; // do not dump full stacks
runtime·throw("all goroutines are asleep - deadlock!");
}
m->nextg = nil;
m->waitnextg = 1;
runtime·noteclear(&m->havenextg);
// Stoptheworld is waiting for all but its cpu to go to stop.
// Entersyscall might have decremented mcpu too, but if so
// it will see the waitstop and take the slow path.
// Exitsyscall never increments mcpu beyond mcpumax.
v = runtime·atomicload(&runtime·sched.atomic);
if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
// set waitstop = 0 (known to be 1)
runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
runtime·notewakeup(&runtime·sched.stopped);
}
schedunlock();
runtime·notesleep(&m->havenextg);
if(m->helpgc) {
runtime·gchelper();
m->helpgc = 0;
runtime·lock(&runtime·sched);
goto top;
}
if((gp = m->nextg) == nil)
runtime·throw("bad m->nextg in nextgoroutine");
m->nextg = nil;
return gp;
2008-07-14 15:34:27 -06:00
}
int32
runtime·gcprocs(void)
{
int32 n;
// Figure out how many CPUs to use during GC.
// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
n = runtime·gomaxprocs;
if(n > runtime·ncpu)
n = runtime·ncpu;
if(n > MaxGcproc)
n = MaxGcproc;
if(n > runtime·sched.mwait+1) // one M is currently running
n = runtime·sched.mwait+1;
return n;
}
void
runtime·helpgc(int32 nproc)
{
M *mp;
int32 n;
runtime·lock(&runtime·sched);
for(n = 1; n < nproc; n++) { // one M is currently running
mp = mget(nil);
if(mp == nil)
runtime·throw("runtime·gcprocs inconsistency");
mp->helpgc = 1;
mp->waitnextg = 0;
runtime·notewakeup(&mp->havenextg);
}
runtime·unlock(&runtime·sched);
}
void
runtime·stoptheworld(void)
{
uint32 v;
schedlock();
runtime·gcwaiting = 1;
setmcpumax(1);
// while mcpu > 1
for(;;) {
v = runtime·sched.atomic;
if(atomic_mcpu(v) <= 1)
break;
// It would be unsafe for multiple threads to be using
// the stopped note at once, but there is only
// ever one thread doing garbage collection.
runtime·noteclear(&runtime·sched.stopped);
if(atomic_waitstop(v))
runtime·throw("invalid waitstop");
// atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
// still being true.
if(!runtime·cas(&runtime·sched.atomic, v, v+(1<<waitstopShift)))
continue;
schedunlock();
runtime·notesleep(&runtime·sched.stopped);
schedlock();
}
runtime·singleproc = runtime·gomaxprocs == 1;
schedunlock();
}
void
runtime·starttheworld(void)
{
M *mp;
int32 max;
// Figure out how many CPUs GC could possibly use.
max = runtime·gomaxprocs;
if(max > runtime·ncpu)
max = runtime·ncpu;
if(max > MaxGcproc)
max = MaxGcproc;
schedlock();
runtime·gcwaiting = 0;
setmcpumax(runtime·gomaxprocs);
matchmg();
if(runtime·gcprocs() < max && canaddmcpu()) {
// If GC could have used another helper proc, start one now,
// in the hope that it will be available next time.
// It would have been even better to start it before the collection,
// but doing so requires allocating memory, so it's tricky to
// coordinate. This lazy approach works out in practice:
// we don't mind if the first couple gc rounds don't have quite
// the maximum number of procs.
// canaddmcpu above did mcpu++
// (necessary, because m will be doing various
// initialization work so is definitely running),
// but m is not running a specific goroutine,
// so set the helpgc flag as a signal to m's
// first schedule(nil) to mcpu-- and grunning--.
mp = runtime·newm();
mp->helpgc = 1;
runtime·sched.grunning++;
}
schedunlock();
}
// Called to start an M.
void
runtime·mstart(void)
{
// It is used by windows-386 only. Unfortunately, seh needs
// to be located on os stack, and mstart runs on os stack
// for both m0 and m.
SEH seh;
if(g != m->g0)
runtime·throw("bad runtime·mstart");
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
// Record top of stack for use by mcall.
// Once we call schedule we're never coming back,
// so other calls can reuse this stack space.
runtime·gosave(&m->g0->sched);
m->g0->sched.pc = (void*)-1; // make sure it is never used
m->seh = &seh;
runtime·asminit();
runtime·minit();
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
if(m == &runtime·m0) {
runtime·initsig();
if(runtime·iscgo)
runtime·newextram();
}
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
schedule(nil);
// TODO(brainman): This point is never reached, because scheduler
// does not release os threads at the moment. But once this path
// is enabled, we must remove our seh here.
}
// When running with cgo, we call libcgo_thread_start
// to start threads for us so that we can play nicely with
// foreign code.
void (*libcgo_thread_start)(void*);
typedef struct CgoThreadStart CgoThreadStart;
struct CgoThreadStart
{
M *m;
G *g;
void (*fn)(void);
};
// Kick off new m's as needed (up to mcpumax).
// Sched is locked.
static void
matchmg(void)
{
G *gp;
M *mp;
if(m->mallocing || m->gcing)
return;
while(haveg() && canaddmcpu()) {
gp = gget();
if(gp == nil)
runtime·throw("gget inconsistency");
// Find the m that will run gp.
if((mp = mget(gp)) == nil)
mp = runtime·newm();
mnextg(mp, gp);
}
}
// Allocate a new m unassociated with any thread.
M*
runtime·allocm(void)
{
M *mp;
static Type *mtype; // The Go type M
if(mtype == nil) {
Eface e;
runtime·gc_m_ptr(&e);
mtype = ((PtrType*)e.type)->elem;
}
mp = runtime·cnew(mtype);
mcommoninit(mp);
if(runtime·iscgo || Windows)
mp->g0 = runtime·malg(-1);
else
mp->g0 = runtime·malg(8192);
return mp;
}
static M* lockextra(bool nilokay);
static void unlockextra(M*);
// needm is called when a cgo callback happens on a
// thread without an m (a thread not created by Go).
// In this case, needm is expected to find an m to use
// and return with m, g initialized correctly.
// Since m and g are not set now (likely nil, but see below)
// needm is limited in what routines it can call. In particular
// it can only call nosplit functions (textflag 7) and cannot
// do any scheduling that requires an m.
//
// In order to avoid needing heavy lifting here, we adopt
// the following strategy: there is a stack of available m's
// that can be stolen. Using compare-and-swap
// to pop from the stack has ABA races, so we simulate
// a lock by doing an exchange (via casp) to steal the stack
// head and replace the top pointer with MLOCKED (1).
// This serves as a simple spin lock that we can use even
// without an m. The thread that locks the stack in this way
// unlocks the stack by storing a valid stack head pointer.
//
// In order to make sure that there is always an m structure
// available to be stolen, we maintain the invariant that there
// is always one more than needed. At the beginning of the
// program (if cgo is in use) the list is seeded with a single m.
// If needm finds that it has taken the last m off the list, its job
// is - once it has installed its own m so that it can do things like
// allocate memory - to create a spare m and put it on the list.
//
// Each of these extra m's also has a g0 and a curg that are
// pressed into service as the scheduling stack and current
// goroutine for the duration of the cgo callback.
//
// When the callback is done with the m, it calls dropm to
// put the m back on the list.
#pragma textflag 7
void
runtime·needm(byte x)
{
M *mp;
// Lock extra list, take head, unlock popped list.
// nilokay=false is safe here because of the invariant above,
// that the extra list always contains or will soon contain
// at least one m.
mp = lockextra(false);
// Set needextram when we've just emptied the list,
// so that the eventual call into cgocallbackg will
// allocate a new m for the extra list. We delay the
// allocation until then so that it can be done
// after exitsyscall makes sure it is okay to be
// running at all (that is, there's no garbage collection
// running right now).
mp->needextram = mp->schedlink == nil;
unlockextra(mp->schedlink);
// Install m and g (= m->g0) and set the stack bounds
// to match the current stack. We don't actually know
// how big the stack is, like we don't know how big any
// scheduling stack is, but we assume there's at least 32 kB,
// which is more than enough for us.
runtime·setmg(mp, mp->g0);
g->stackbase = (uintptr)(&x + 1024);
g->stackguard = (uintptr)(&x - 32*1024);
// On windows/386, we need to put an SEH frame (two words)
// somewhere on the current stack. We are called
// from needm, and we know there is some available
// space one word into the argument frame. Use that.
m->seh = (SEH*)((uintptr*)&x + 1);
// Initialize this thread to use the m.
runtime·asminit();
runtime·minit();
}
// newextram allocates an m and puts it on the extra list.
// It is called with a working local m, so that it can do things
// like call schedlock and allocate.
void
runtime·newextram(void)
{
M *mp, *mnext;
G *gp;
// Scheduler protects allocation of new m's and g's.
// Create extra goroutine locked to extra m.
// The goroutine is the context in which the cgo callback will run.
// The sched.pc will never be returned to, but setting it to
// runtime.goexit makes clear to the traceback routines where
// the goroutine stack ends.
schedlock();
mp = runtime·allocm();
gp = runtime·malg(4096);
gp->sched.pc = (void*)runtime·goexit;
gp->sched.sp = gp->stackbase;
gp->sched.g = gp;
gp->status = Gsyscall;
mp->curg = gp;
mp->locked = LockInternal;
mp->lockedg = gp;
gp->lockedm = mp;
// put on allg for garbage collector
if(runtime·lastg == nil)
runtime·allg = gp;
else
runtime·lastg->alllink = gp;
runtime·lastg = gp;
schedunlock();
// Add m to the extra list.
mnext = lockextra(true);
mp->schedlink = mnext;
unlockextra(mp);
}
// dropm is called when a cgo callback has called needm but is now
// done with the callback and returning back into the non-Go thread.
// It puts the current m back onto the extra list.
//
// The main expense here is the call to signalstack to release the
// m's signal stack, and then the call to needm on the next callback
// from this thread. It is tempting to try to save the m for next time,
// which would eliminate both these costs, but there might not be
// a next time: the current thread (which Go does not control) might exit.
// If we saved the m for that thread, there would be an m leak each time
// such a thread exited. Instead, we acquire and release an m on each
// call. These should typically not be scheduling operations, just a few
// atomics, so the cost should be small.
//
// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
// variable using pthread_key_create. Unlike the pthread keys we already use
// on OS X, this dummy key would never be read by Go code. It would exist
// only so that we could register at thread-exit-time destructor.
// That destructor would put the m back onto the extra list.
// This is purely a performance optimization. The current version,
// in which dropm happens on each cgo call, is still correct too.
// We may have to keep the current version on systems with cgo
// but without pthreads, like Windows.
void
runtime·dropm(void)
{
M *mp, *mnext;
// Undo whatever initialization minit did during needm.
runtime·unminit();
// Clear m and g, and return m to the extra list.
// After the call to setmg we can only call nosplit functions.
mp = m;
runtime·setmg(nil, nil);
mnext = lockextra(true);
mp->schedlink = mnext;
unlockextra(mp);
}
#define MLOCKED ((M*)1)
// lockextra locks the extra list and returns the list head.
// The caller must unlock the list by storing a new list head
// to runtime.extram. If nilokay is true, then lockextra will
// return a nil list head if that's what it finds. If nilokay is false,
// lockextra will keep waiting until the list head is no longer nil.
#pragma textflag 7
static M*
lockextra(bool nilokay)
{
M *mp;
void (*yield)(void);
for(;;) {
mp = runtime·atomicloadp(&runtime·extram);
if(mp == MLOCKED) {
yield = runtime·osyield;
yield();
continue;
}
if(mp == nil && !nilokay) {
runtime·usleep(1);
continue;
}
if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
yield = runtime·osyield;
yield();
continue;
}
break;
}
return mp;
}
#pragma textflag 7
static void
unlockextra(M *mp)
{
runtime·atomicstorep(&runtime·extram, mp);
}
// Create a new m. It will start off with a call to runtime·mstart.
M*
runtime·newm(void)
{
M *mp;
mp = runtime·allocm();
if(runtime·iscgo) {
CgoThreadStart ts;
if(libcgo_thread_start == nil)
runtime·throw("libcgo_thread_start missing");
ts.m = mp;
ts.g = mp->g0;
ts.fn = runtime·mstart;
runtime·asmcgocall(libcgo_thread_start, &ts);
} else {
runtime·newosproc(mp, mp->g0, (byte*)mp->g0->stackbase, runtime·mstart);
}
return mp;
}
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
// One round of scheduler: find a goroutine and run it.
// The argument is the goroutine that was running before
// schedule was called, or nil if this is the first call.
// Never returns.
static void
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
schedule(G *gp)
2008-07-14 15:34:27 -06:00
{
int32 hz;
uint32 v;
schedlock();
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
if(gp != nil) {
// Just finished running gp.
gp->m = nil;
runtime·sched.grunning--;
// atomic { mcpu-- }
v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
if(atomic_mcpu(v) > maxgomaxprocs)
runtime·throw("negative mcpu in scheduler");
switch(gp->status) {
case Grunnable:
case Gdead:
// Shouldn't have been running!
runtime·throw("bad gp->status in sched");
case Grunning:
gp->status = Grunnable;
gput(gp);
break;
case Gmoribund:
gp->status = Gdead;
if(gp->lockedm) {
gp->lockedm = nil;
m->lockedg = nil;
m->locked = 0;
}
gp->idlem = nil;
runtime·unwindstack(gp, nil);
gfput(gp);
if(--runtime·sched.gcount == 0)
runtime·exit(0);
break;
}
if(gp->readyonstop) {
gp->readyonstop = 0;
readylocked(gp);
}
} else if(m->helpgc) {
// Bootstrap m or new m started by starttheworld.
// atomic { mcpu-- }
v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
if(atomic_mcpu(v) > maxgomaxprocs)
runtime·throw("negative mcpu in scheduler");
// Compensate for increment in starttheworld().
runtime·sched.grunning--;
m->helpgc = 0;
} else if(m->nextg != nil) {
// New m started by matchmg.
} else {
runtime·throw("invalid m state in scheduler");
}
// Find (or wait for) g to run. Unlocks runtime·sched.
gp = nextgandunlock();
gp->readyonstop = 0;
gp->status = Grunning;
2008-07-14 15:34:27 -06:00
m->curg = gp;
gp->m = m;
// Check whether the profiler needs to be turned on or off.
hz = runtime·sched.profilehz;
if(m->profilehz != hz)
runtime·resetcpuprofiler(hz);
if(gp->sched.pc == (byte*)runtime·goexit) { // kickoff
runtime·gogocall(&gp->sched, (void(*)(void))gp->entry);
}
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
runtime·gogo(&gp->sched, 0);
2008-07-14 15:34:27 -06:00
}
// Enter scheduler. If g->status is Grunning,
// re-queues g and runs everyone else who is waiting
// before running g again. If g->status is Gmoribund,
// kills off g.
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
// Cannot split stack because it is called from exitsyscall.
// See comment below.
#pragma textflag 7
void
runtime·gosched(void)
{
if(m->locks != 0)
runtime·throw("gosched holding locks");
if(g == m->g0)
runtime·throw("gosched of g0");
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
runtime·mcall(schedule);
}
// Puts the current goroutine into a waiting state and unlocks the lock.
// The goroutine can be made runnable again by calling runtime·ready(gp).
void
runtime·park(void (*unlockf)(Lock*), Lock *lock, int8 *reason)
{
g->status = Gwaiting;
g->waitreason = reason;
if(unlockf)
unlockf(lock);
runtime·gosched();
}
// The goroutine g is about to enter a system call.
// Record that it's not using the cpu anymore.
// This is called only from the go syscall library and cgocall,
// not from the low-level system calls used by the runtime.
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
//
// Entersyscall cannot split the stack: the runtime·gosave must
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
// make g->sched refer to the caller's stack segment, because
// entersyscall is going to return immediately after.
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
// It's okay to call matchmg and notewakeup even after
// decrementing mcpu, because we haven't released the
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
// sched lock yet, so the garbage collector cannot be running.
#pragma textflag 7
void
runtime·entersyscall(void)
{
uint32 v;
if(m->profilehz > 0)
runtime·setprof(false);
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
// Leave SP around for gc and traceback.
runtime·gosave(&g->sched);
g->gcsp = g->sched.sp;
g->gcstack = g->stackbase;
g->gcguard = g->stackguard;
g->status = Gsyscall;
if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
// g->gcsp, g->gcguard-StackGuard, g->gcstack);
runtime·throw("entersyscall");
}
// Fast path.
// The slow path inside the schedlock/schedunlock will get
// through without stopping if it does:
// mcpu--
// gwait not true
// waitstop && mcpu <= mcpumax not true
// If we can do the same with a single atomic add,
// then we can skip the locks.
v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
return;
schedlock();
v = runtime·atomicload(&runtime·sched.atomic);
if(atomic_gwaiting(v)) {
matchmg();
v = runtime·atomicload(&runtime·sched.atomic);
}
if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
runtime·notewakeup(&runtime·sched.stopped);
}
// Re-save sched in case one of the calls
// (notewakeup, matchmg) triggered something using it.
runtime·gosave(&g->sched);
schedunlock();
}
// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
// The hint is ignored at the moment, and it's just a copy of runtime·entersyscall().
#pragma textflag 7
void
runtime·entersyscallblock(void)
{
uint32 v;
if(m->profilehz > 0)
runtime·setprof(false);
// Leave SP around for gc and traceback.
runtime·gosave(&g->sched);
g->gcsp = g->sched.sp;
g->gcstack = g->stackbase;
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
g->gcguard = g->stackguard;
g->status = Gsyscall;
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
// g->gcsp, g->gcguard-StackGuard, g->gcstack);
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
runtime·throw("entersyscall");
}
// Fast path.
// The slow path inside the schedlock/schedunlock will get
// through without stopping if it does:
// mcpu--
// gwait not true
// waitstop && mcpu <= mcpumax not true
// If we can do the same with a single atomic add,
// then we can skip the locks.
v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
return;
schedlock();
v = runtime·atomicload(&runtime·sched.atomic);
if(atomic_gwaiting(v)) {
matchmg();
v = runtime·atomicload(&runtime·sched.atomic);
}
if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
runtime·notewakeup(&runtime·sched.stopped);
}
// Re-save sched in case one of the calls
// (notewakeup, matchmg) triggered something using it.
runtime·gosave(&g->sched);
schedunlock();
}
// The goroutine g exited its system call.
// Arrange for it to run on a cpu again.
// This is called only from the go syscall library, not
// from the low-level system calls used by the runtime.
void
runtime·exitsyscall(void)
{
uint32 v;
// Fast path.
// If we can do the mcpu++ bookkeeping and
// find that we still have mcpu <= mcpumax, then we can
// start executing Go code immediately, without having to
// schedlock/schedunlock.
v = runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift));
if(m->profilehz == runtime·sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) {
// There's a cpu for us, so we can run.
g->status = Grunning;
// Garbage collector isn't running (since we are),
// so okay to clear gcstack.
g->gcstack = (uintptr)nil;
if(m->profilehz > 0)
runtime·setprof(true);
return;
}
// Tell scheduler to put g back on the run queue:
// mostly equivalent to g->status = Grunning,
// but keeps the garbage collector from thinking
// that g is running right now, which it's not.
g->readyonstop = 1;
// All the cpus are taken.
// The scheduler will ready g and put this m to sleep.
// When the scheduler takes g away from m,
// it will undo the runtime·sched.mcpu++ above.
runtime·gosched();
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
// Gosched returned, so we're allowed to run now.
// Delete the gcstack information that we left for
// the garbage collector during the system call.
// Must wait until now because until gosched returns
// we don't know for sure that the garbage collector
// is not running.
g->gcstack = (uintptr)nil;
}
// Hook used by runtime·malg to call runtime·stackalloc on the
// scheduler stack. This exists because runtime·stackalloc insists
// on being called on the scheduler stack, to avoid trying to grow
// the stack while allocating a new stack segment.
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
static void
mstackalloc(G *gp)
{
gp->param = runtime·stackalloc((uintptr)gp->param);
runtime·gogo(&gp->sched, 0);
}
// Allocate a new g, with a stack big enough for stacksize bytes.
G*
runtime·malg(int32 stacksize)
{
runtime: always run stackalloc on scheduler stack Avoids deadlocks like the one below, in which a stack split happened in order to call lock(&stacks), but then the stack unsplit cannot run because stacks is now locked. The only code calling stackalloc that wasn't on a scheduler stack already was malg, which creates a new goroutine. runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139 runtime.futex() futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51 futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2) futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119 futexlock(0x5b0188, 0x5b0188) runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x7f0d27b4a000) runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336 runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218) runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705 runtime.oldstack() runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224 runtime.lessstack() ----- lessstack called from goroutine 2 ----- runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x40a5e2) runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316 runtime.stackalloc(0x1000, 0x4055b0) runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803 runtime.malg(0x1000, 0x40add9) runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854 runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...) runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831 runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059) ... R=r, r2 CC=golang-dev https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
G *newg;
byte *stk;
if(StackTop < sizeof(Stktop)) {
runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
runtime·throw("runtime: bad stack.h");
}
runtime: always run stackalloc on scheduler stack Avoids deadlocks like the one below, in which a stack split happened in order to call lock(&stacks), but then the stack unsplit cannot run because stacks is now locked. The only code calling stackalloc that wasn't on a scheduler stack already was malg, which creates a new goroutine. runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139 runtime.futex() futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51 futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2) futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119 futexlock(0x5b0188, 0x5b0188) runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x7f0d27b4a000) runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336 runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218) runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705 runtime.oldstack() runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224 runtime.lessstack() ----- lessstack called from goroutine 2 ----- runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x40a5e2) runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316 runtime.stackalloc(0x1000, 0x4055b0) runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803 runtime.malg(0x1000, 0x40add9) runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854 runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...) runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831 runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059) ... R=r, r2 CC=golang-dev https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
newg = runtime·malloc(sizeof(G));
if(stacksize >= 0) {
runtime: always run stackalloc on scheduler stack Avoids deadlocks like the one below, in which a stack split happened in order to call lock(&stacks), but then the stack unsplit cannot run because stacks is now locked. The only code calling stackalloc that wasn't on a scheduler stack already was malg, which creates a new goroutine. runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139 runtime.futex() futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51 futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2) futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119 futexlock(0x5b0188, 0x5b0188) runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x7f0d27b4a000) runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336 runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218) runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705 runtime.oldstack() runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224 runtime.lessstack() ----- lessstack called from goroutine 2 ----- runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x40a5e2) runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316 runtime.stackalloc(0x1000, 0x4055b0) runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803 runtime.malg(0x1000, 0x40add9) runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854 runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...) runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831 runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059) ... R=r, r2 CC=golang-dev https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
if(g == m->g0) {
// running on scheduler stack already.
stk = runtime·stackalloc(StackSystem + stacksize);
} else {
// have to call stackalloc on scheduler stack.
g->param = (void*)(StackSystem + stacksize);
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
runtime·mcall(mstackalloc);
runtime: always run stackalloc on scheduler stack Avoids deadlocks like the one below, in which a stack split happened in order to call lock(&stacks), but then the stack unsplit cannot run because stacks is now locked. The only code calling stackalloc that wasn't on a scheduler stack already was malg, which creates a new goroutine. runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139 runtime.futex() futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51 futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2) futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119 futexlock(0x5b0188, 0x5b0188) runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x7f0d27b4a000) runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336 runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218) runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705 runtime.oldstack() runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224 runtime.lessstack() ----- lessstack called from goroutine 2 ----- runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x40a5e2) runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316 runtime.stackalloc(0x1000, 0x4055b0) runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803 runtime.malg(0x1000, 0x40add9) runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854 runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...) runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831 runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059) ... R=r, r2 CC=golang-dev https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
stk = g->param;
g->param = nil;
}
newg->stack0 = (uintptr)stk;
newg->stackguard = (uintptr)stk + StackGuard;
newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
}
runtime: always run stackalloc on scheduler stack Avoids deadlocks like the one below, in which a stack split happened in order to call lock(&stacks), but then the stack unsplit cannot run because stacks is now locked. The only code calling stackalloc that wasn't on a scheduler stack already was malg, which creates a new goroutine. runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139 runtime.futex() futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51 futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2) futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119 futexlock(0x5b0188, 0x5b0188) runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x7f0d27b4a000) runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336 runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218) runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705 runtime.oldstack() runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224 runtime.lessstack() ----- lessstack called from goroutine 2 ----- runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158 runtime.lock(0x5b0188, 0x40a5e2) runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316 runtime.stackalloc(0x1000, 0x4055b0) runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803 runtime.malg(0x1000, 0x40add9) runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854 runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...) runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831 runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059) ... R=r, r2 CC=golang-dev https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
return newg;
}
// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
// Cannot split the stack because it assumes that the arguments
// are available sequentially after &fn; they would not be
// copied if a stack split occurred. It's OK for this to call
// functions that split the stack.
#pragma textflag 7
void
runtime·newproc(int32 siz, byte* fn, ...)
{
byte *argp;
if(thechar == '5')
argp = (byte*)(&fn+2); // skip caller's saved LR
else
argp = (byte*)(&fn+1);
runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
}
// Create a new g running fn with narg bytes of arguments starting
// at argp and returning nret bytes of results. callerpc is the
// address of the go statement that created this. The new g is put
// on the queue of g's waiting to run.
G*
runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
{
byte *sp;
G *newg;
int32 siz;
uintptr racectx;
//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
siz = narg + nret;
siz = (siz+7) & ~7;
// We could instead create a secondary stack frame
// and make it look like goexit was on the original but
// the call to the actual goroutine function was split.
// Not worth it: this is almost always an error.
if(siz > StackMin - 1024)
runtime·throw("runtime.newproc: function arguments too large for new goroutine");
if(raceenabled)
racectx = runtime·racegostart(callerpc);
schedlock();
if((newg = gfget()) != nil) {
if(newg->stackguard - StackGuard != newg->stack0)
runtime·throw("invalid stack in newg");
} else {
ld: detect stack overflow due to NOSPLIT Fix problems found. On amd64, various library routines had bigger stack frames than expected, because large function calls had been added. runtime.assertI2T: nosplit stack overflow 120 assumed on entry to runtime.assertI2T 8 after runtime.assertI2T uses 112 0 on entry to runtime.newTypeAssertionError -8 on entry to runtime.morestack01 runtime.assertE2E: nosplit stack overflow 120 assumed on entry to runtime.assertE2E 16 after runtime.assertE2E uses 104 8 on entry to runtime.panic 0 on entry to runtime.morestack16 -8 after runtime.morestack16 uses 8 runtime.assertE2T: nosplit stack overflow 120 assumed on entry to runtime.assertE2T 16 after runtime.assertE2T uses 104 8 on entry to runtime.panic 0 on entry to runtime.morestack16 -8 after runtime.morestack16 uses 8 runtime.newselect: nosplit stack overflow 120 assumed on entry to runtime.newselect 56 after runtime.newselect uses 64 48 on entry to runtime.printf 8 after runtime.printf uses 40 0 on entry to vprintf -8 on entry to runtime.morestack16 runtime.selectdefault: nosplit stack overflow 120 assumed on entry to runtime.selectdefault 56 after runtime.selectdefault uses 64 48 on entry to runtime.printf 8 after runtime.printf uses 40 0 on entry to vprintf -8 on entry to runtime.morestack16 runtime.selectgo: nosplit stack overflow 120 assumed on entry to runtime.selectgo 0 after runtime.selectgo uses 120 -8 on entry to runtime.gosched On arm, 5c was tagging functions NOSPLIT that should not have been, like the recursive function printpanics: printpanics: nosplit stack overflow 124 assumed on entry to printpanics 112 after printpanics uses 12 108 on entry to printpanics 96 after printpanics uses 12 92 on entry to printpanics 80 after printpanics uses 12 76 on entry to printpanics 64 after printpanics uses 12 60 on entry to printpanics 48 after printpanics uses 12 44 on entry to printpanics 32 after printpanics uses 12 28 on entry to printpanics 16 after printpanics uses 12 12 on entry to printpanics 0 after printpanics uses 12 -4 on entry to printpanics R=r, r2 CC=golang-dev https://golang.org/cl/4188061
2011-02-22 15:40:40 -07:00
newg = runtime·malg(StackMin);
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
if(runtime·lastg == nil)
runtime·allg = newg;
else
runtime·lastg->alllink = newg;
runtime·lastg = newg;
}
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
newg->status = Gwaiting;
newg->waitreason = "new goroutine";
sp = (byte*)newg->stackbase;
sp -= siz;
runtime·memmove(sp, argp, narg);
if(thechar == '5') {
// caller's LR
sp -= sizeof(void*);
*(void**)sp = nil;
}
newg->sched.sp = (uintptr)sp;
newg->sched.pc = (byte*)runtime·goexit;
newg->sched.g = newg;
newg->entry = fn;
newg->gopc = (uintptr)callerpc;
if(raceenabled)
newg->racectx = racectx;
runtime·sched.gcount++;
newg->goid = ++runtime·sched.goidgen;
newprocreadylocked(newg);
schedunlock();
return newg;
//printf(" goid=%d\n", newg->goid);
}
// Put on gfree list. Sched must be locked.
static void
gfput(G *gp)
{
if(gp->stackguard - StackGuard != gp->stack0)
runtime·throw("invalid stack in gfput");
gp->schedlink = runtime·sched.gfree;
runtime·sched.gfree = gp;
}
// Get from gfree list. Sched must be locked.
static G*
gfget(void)
{
G *gp;
gp = runtime·sched.gfree;
if(gp)
runtime·sched.gfree = gp->schedlink;
return gp;
}
void
runtime·Breakpoint(void)
{
runtime·breakpoint();
}
void
runtime·Gosched(void)
{
runtime·gosched();
}
// Implementation of runtime.GOMAXPROCS.
// delete when scheduler is stronger
int32
runtime·gomaxprocsfunc(int32 n)
{
int32 ret;
uint32 v;
schedlock();
ret = runtime·gomaxprocs;
if(n <= 0)
n = ret;
if(n > maxgomaxprocs)
n = maxgomaxprocs;
runtime·gomaxprocs = n;
if(runtime·gomaxprocs > 1)
runtime·singleproc = false;
if(runtime·gcwaiting != 0) {
if(atomic_mcpumax(runtime·sched.atomic) != 1)
runtime·throw("invalid mcpumax during gc");
schedunlock();
return ret;
}
setmcpumax(n);
// If there are now fewer allowed procs
// than procs running, stop.
v = runtime·atomicload(&runtime·sched.atomic);
if(atomic_mcpu(v) > n) {
schedunlock();
runtime·gosched();
return ret;
}
// handle more procs
matchmg();
schedunlock();
return ret;
}
static void
LockOSThread(void)
{
m->lockedg = g;
g->lockedm = m;
}
void
runtime·LockOSThread(void)
{
m->locked |= LockExternal;
LockOSThread();
}
void
runtime·lockOSThread(void)
{
m->locked += LockInternal;
LockOSThread();
}
static void
UnlockOSThread(void)
{
if(m->locked != 0)
return;
m->lockedg = nil;
g->lockedm = nil;
}
void
runtime·UnlockOSThread(void)
{
m->locked &= ~LockExternal;
UnlockOSThread();
}
void
runtime·unlockOSThread(void)
{
if(m->locked < LockInternal)
runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
m->locked -= LockInternal;
UnlockOSThread();
}
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
bool
runtime·lockedOSThread(void)
{
return g->lockedm != nil && m->lockedg != nil;
}
// for testing of callbacks
void
runtime·golockedOSThread(bool ret)
{
ret = runtime·lockedOSThread();
FLUSH(&ret);
}
// for testing of wire, unwire
void
runtime·mid(uint32 ret)
{
ret = m->id;
FLUSH(&ret);
}
void
runtime·NumGoroutine(intgo ret)
{
ret = runtime·sched.gcount;
FLUSH(&ret);
}
int32
runtime·gcount(void)
{
return runtime·sched.gcount;
}
int32
runtime·mcount(void)
{
return runtime·sched.mcount;
}
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
void
runtime·badmcall(void) // called from assembly
{
runtime·throw("runtime: mcall called on m->g0 stack");
}
void
runtime·badmcall2(void) // called from assembly
{
runtime·throw("runtime: mcall function returned");
}
static struct {
Lock;
void (*fn)(uintptr*, int32);
int32 hz;
uintptr pcbuf[100];
} prof;
// Called if we receive a SIGPROF signal.
void
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp)
{
int32 n;
if(prof.fn == nil || prof.hz == 0)
return;
runtime·lock(&prof);
if(prof.fn == nil) {
runtime·unlock(&prof);
return;
}
n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
if(n > 0)
prof.fn(prof.pcbuf, n);
runtime·unlock(&prof);
}
// Arrange to call fn with a traceback hz times a second.
void
runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
{
// Force sane arguments.
if(hz < 0)
hz = 0;
if(hz == 0)
fn = nil;
if(fn == nil)
hz = 0;
// Stop profiler on this cpu so that it is safe to lock prof.
// if a profiling signal came in while we had prof locked,
// it would deadlock.
runtime·resetcpuprofiler(0);
runtime·lock(&prof);
prof.fn = fn;
prof.hz = hz;
runtime·unlock(&prof);
runtime·lock(&runtime·sched);
runtime·sched.profilehz = hz;
runtime·unlock(&runtime·sched);
if(hz != 0)
runtime·resetcpuprofiler(hz);
}