1
0
mirror of https://github.com/golang/go synced 2024-11-21 21:14:47 -07:00

runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042
This commit is contained in:
Russ Cox 2011-07-19 11:01:17 -04:00
parent 9f636598ba
commit 025abd530e
4 changed files with 857 additions and 100 deletions

View File

@ -15,3 +15,9 @@ var F32to64 = f32to64
var Fcmp64 = fcmp64
var Fintto64 = fintto64
var F64toint = f64toint
func entersyscall()
func exitsyscall()
var Entersyscall = entersyscall
var Exitsyscall = exitsyscall

View File

@ -28,10 +28,10 @@ int32 runtime·gcwaiting;
// Go scheduler
//
// The go scheduler's job is to match ready-to-run goroutines (`g's)
// with waiting-for-work schedulers (`m's). If there are ready gs
// and no waiting ms, ready() will start a new m running in a new
// OS thread, so that all ready gs can run simultaneously, up to a limit.
// For now, ms never go away.
// with waiting-for-work schedulers (`m's). If there are ready g's
// and no waiting m's, ready() will start a new m running in a new
// OS thread, so that all ready g's can run simultaneously, up to a limit.
// For now, m's never go away.
//
// By default, Go keeps only one kernel thread (m) running user code
// at a single time; other threads may be blocked in the operating system.
@ -41,10 +41,10 @@ int32 runtime·gcwaiting;
// approximation of the maximum number of cores to use.
//
// Even a program that can run without deadlock in a single process
// might use more ms if given the chance. For example, the prime
// sieve will use as many ms as there are primes (up to runtime·sched.mmax),
// might use more m's if given the chance. For example, the prime
// sieve will use as many m's as there are primes (up to runtime·sched.mmax),
// allowing different stages of the pipeline to execute in parallel.
// We could revisit this choice, only kicking off new ms for blocking
// We could revisit this choice, only kicking off new m's for blocking
// system calls, but that would limit the amount of parallel computation
// that go would try to do.
//
@ -55,28 +55,75 @@ int32 runtime·gcwaiting;
struct Sched {
Lock;
G *gfree; // available gs (status == Gdead)
G *gfree; // available g's (status == Gdead)
int32 goidgen;
G *ghead; // gs waiting to run
G *ghead; // g's waiting to run
G *gtail;
int32 gwait; // number of gs waiting to run
int32 gcount; // number of gs that are alive
int32 grunning; // number of gs running on cpu or in syscall
int32 gwait; // number of g's waiting to run
int32 gcount; // number of g's that are alive
int32 grunning; // number of g's running on cpu or in syscall
M *mhead; // ms waiting for work
int32 mwait; // number of ms waiting for work
int32 mcount; // number of ms that have been created
int32 mcpu; // number of ms executing on cpu
int32 mcpumax; // max number of ms allowed on cpu
M *mhead; // m's waiting for work
int32 mwait; // number of m's waiting for work
int32 mcount; // number of m's that have been created
int32 predawn; // running initialization, don't run new gs.
volatile uint32 atomic; // atomic scheduling word (see below)
int32 predawn; // running initialization, don't run new g's.
int32 profilehz; // cpu profiling rate
Note stopped; // one g can wait here for ms to stop
int32 waitstop; // after setting this flag
Note stopped; // one g can set waitstop and wait here for m's to stop
};
// The atomic word in sched is an atomic uint32 that
// holds these fields.
//
// [15 bits] mcpu number of m's executing on cpu
// [15 bits] mcpumax max number of m's allowed on cpu
// [1 bit] waitstop some g is waiting on stopped
// [1 bit] gwaiting gwait != 0
//
// These fields are the information needed by entersyscall
// and exitsyscall to decide whether to coordinate with the
// scheduler. Packing them into a single machine word lets
// them use a fast path with a single atomic read/write and
// no lock/unlock. This greatly reduces contention in
// syscall- or cgo-heavy multithreaded programs.
//
// Except for entersyscall and exitsyscall, the manipulations
// to these fields only happen while holding the schedlock,
// so the routines holding schedlock only need to worry about
// what entersyscall and exitsyscall do, not the other routines
// (which also use the schedlock).
//
// In particular, entersyscall and exitsyscall only read mcpumax,
// waitstop, and gwaiting. They never write them. Thus, writes to those
// fields can be done (holding schedlock) without fear of write conflicts.
// There may still be logic conflicts: for example, the set of waitstop must
// be conditioned on mcpu >= mcpumax or else the wait may be a
// spurious sleep. The Promela model in proc.p verifies these accesses.
enum {
mcpuWidth = 15,
mcpuMask = (1<<mcpuWidth) - 1,
mcpuShift = 0,
mcpumaxShift = mcpuShift + mcpuWidth,
waitstopShift = mcpumaxShift + mcpuWidth,
gwaitingShift = waitstopShift+1,
// The max value of GOMAXPROCS is constrained
// by the max value we can store in the bit fields
// of the atomic word. Reserve a few high values
// so that we can detect accidental decrement
// beyond zero.
maxgomaxprocs = mcpuMask - 10,
};
#define atomic_mcpu(v) (((v)>>mcpuShift)&mcpuMask)
#define atomic_mcpumax(v) (((v)>>mcpumaxShift)&mcpuMask)
#define atomic_waitstop(v) (((v)>>waitstopShift)&1)
#define atomic_gwaiting(v) (((v)>>gwaitingShift)&1)
Sched runtime·sched;
int32 runtime·gomaxprocs;
@ -94,11 +141,26 @@ static void mput(M*); // put/get on mhead
static M* mget(G*);
static void gfput(G*); // put/get on gfree
static G* gfget(void);
static void matchmg(void); // match ms to gs
static void matchmg(void); // match m's to g's
static void readylocked(G*); // ready, but sched is locked
static void mnextg(M*, G*);
static void mcommoninit(M*);
void
setmcpumax(uint32 n)
{
uint32 v, w;
for(;;) {
v = runtime·sched.atomic;
w = v;
w &= ~(mcpuMask<<mcpumaxShift);
w |= n<<mcpumaxShift;
if(runtime·cas(&runtime·sched.atomic, v, w))
break;
}
}
// The bootstrap sequence is:
//
// call osinit
@ -131,9 +193,12 @@ runtime·schedinit(void)
runtime·gomaxprocs = 1;
p = runtime·getenv("GOMAXPROCS");
if(p != nil && (n = runtime·atoi(p)) != 0)
if(p != nil && (n = runtime·atoi(p)) != 0) {
if(n > maxgomaxprocs)
n = maxgomaxprocs;
runtime·gomaxprocs = n;
runtime·sched.mcpumax = runtime·gomaxprocs;
}
setmcpumax(runtime·gomaxprocs);
runtime·sched.predawn = 1;
m->nomemprof--;
@ -168,7 +233,7 @@ runtime·initdone(void)
mstats.enablegc = 1;
// If main·init_function started other goroutines,
// kick off new ms to handle them, like ready
// kick off new m's to handle them, like ready
// would have, had it not been pre-dawn.
schedlock();
matchmg();
@ -221,6 +286,21 @@ mcommoninit(M *m)
runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil);
}
// Try to increment mcpu. Report whether succeeded.
static bool
canaddmcpu(void)
{
uint32 v;
for(;;) {
v = runtime·sched.atomic;
if(atomic_mcpu(v) >= atomic_mcpumax(v))
return 0;
if(runtime·cas(&runtime·sched.atomic, v, v+(1<<mcpuShift)))
return 1;
}
}
// Put on `g' queue. Sched must be locked.
static void
gput(G *g)
@ -228,7 +308,7 @@ gput(G *g)
M *m;
// If g is wired, hand it off directly.
if(runtime·sched.mcpu < runtime·sched.mcpumax && (m = g->lockedm) != nil) {
if((m = g->lockedm) != nil && canaddmcpu()) {
mnextg(m, g);
return;
}
@ -251,7 +331,18 @@ gput(G *g)
else
runtime·sched.gtail->schedlink = g;
runtime·sched.gtail = g;
runtime·sched.gwait++;
// increment gwait.
// if it transitions to nonzero, set atomic gwaiting bit.
if(runtime·sched.gwait++ == 0)
runtime·xadd(&runtime·sched.atomic, 1<<gwaitingShift);
}
// Report whether gget would return something.
static bool
haveg(void)
{
return runtime·sched.ghead != nil || m->idleg != nil;
}
// Get from `g' queue. Sched must be locked.
@ -265,7 +356,10 @@ gget(void)
runtime·sched.ghead = g->schedlink;
if(runtime·sched.ghead == nil)
runtime·sched.gtail = nil;
runtime·sched.gwait--;
// decrement gwait.
// if it transitions to zero, clear atomic gwaiting bit.
if(--runtime·sched.gwait == 0)
runtime·xadd(&runtime·sched.atomic, -1<<gwaitingShift);
} else if(m->idleg != nil) {
g = m->idleg;
m->idleg = nil;
@ -350,11 +444,11 @@ newprocreadylocked(G *g)
}
// Pass g to m for running.
// Caller has already incremented mcpu.
static void
mnextg(M *m, G *g)
{
runtime·sched.grunning++;
runtime·sched.mcpu++;
m->nextg = g;
if(m->waitnextg) {
m->waitnextg = 0;
@ -366,18 +460,19 @@ mnextg(M *m, G *g)
// Get the next goroutine that m should run.
// Sched must be locked on entry, is unlocked on exit.
// Makes sure that at most $GOMAXPROCS gs are
// Makes sure that at most $GOMAXPROCS g's are
// running on cpus (not in system calls) at any given time.
static G*
nextgandunlock(void)
{
G *gp;
uint32 v;
if(runtime·sched.mcpu < 0)
runtime·throw("negative runtime·sched.mcpu");
if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs)
runtime·throw("negative mcpu");
// If there is a g waiting as m->nextg,
// mnextg took care of the runtime·sched.mcpu++.
// If there is a g waiting as m->nextg, the mcpu++
// happened before it was passed to mnextg.
if(m->nextg != nil) {
gp = m->nextg;
m->nextg = nil;
@ -393,26 +488,50 @@ nextgandunlock(void)
matchmg();
} else {
// Look for work on global queue.
while(runtime·sched.mcpu < runtime·sched.mcpumax && (gp=gget()) != nil) {
while(haveg() && canaddmcpu()) {
gp = gget();
if(gp == nil)
runtime·throw("gget inconsistency");
if(gp->lockedm) {
mnextg(gp->lockedm, gp);
continue;
}
runtime·sched.grunning++;
runtime·sched.mcpu++; // this m will run gp
schedunlock();
return gp;
}
// Otherwise, wait on global m queue.
// The while loop ended either because the g queue is empty
// or because we have maxed out our m procs running go
// code (mcpu >= mcpumax). We need to check that
// concurrent actions by entersyscall/exitsyscall cannot
// invalidate the decision to end the loop.
//
// We hold the sched lock, so no one else is manipulating the
// g queue or changing mcpumax. Entersyscall can decrement
// mcpu, but if does so when there is something on the g queue,
// the gwait bit will be set, so entersyscall will take the slow path
// and use the sched lock. So it cannot invalidate our decision.
//
// Wait on global m queue.
mput(m);
}
v = runtime·atomicload(&runtime·sched.atomic);
if(runtime·sched.grunning == 0)
runtime·throw("all goroutines are asleep - deadlock!");
m->nextg = nil;
m->waitnextg = 1;
runtime·noteclear(&m->havenextg);
if(runtime·sched.waitstop && runtime·sched.mcpu <= runtime·sched.mcpumax) {
runtime·sched.waitstop = 0;
// Stoptheworld is waiting for all but its cpu to go to stop.
// Entersyscall might have decremented mcpu too, but if so
// it will see the waitstop and take the slow path.
// Exitsyscall never increments mcpu beyond mcpumax.
if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
// set waitstop = 0 (known to be 1)
runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
runtime·notewakeup(&runtime·sched.stopped);
}
schedunlock();
@ -424,21 +543,34 @@ nextgandunlock(void)
return gp;
}
// TODO(rsc): Remove. This is only temporary,
// for the mark and sweep collector.
void
runtime·stoptheworld(void)
{
uint32 v;
schedlock();
runtime·gcwaiting = 1;
runtime·sched.mcpumax = 1;
while(runtime·sched.mcpu > 1) {
setmcpumax(1);
// while mcpu > 1
for(;;) {
v = runtime·sched.atomic;
if(atomic_mcpu(v) <= 1)
break;
// It would be unsafe for multiple threads to be using
// the stopped note at once, but there is only
// ever one thread doing garbage collection,
// so this is okay.
// ever one thread doing garbage collection.
runtime·noteclear(&runtime·sched.stopped);
runtime·sched.waitstop = 1;
if(atomic_waitstop(v))
runtime·throw("invalid waitstop");
// atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
// still being true.
if(!runtime·cas(&runtime·sched.atomic, v, v+(1<<waitstopShift)))
continue;
schedunlock();
runtime·notesleep(&runtime·sched.stopped);
schedlock();
@ -453,7 +585,7 @@ runtime·starttheworld(void)
{
schedlock();
runtime·gcwaiting = 0;
runtime·sched.mcpumax = runtime·gomaxprocs;
setmcpumax(runtime·gomaxprocs);
matchmg();
schedunlock();
}
@ -490,7 +622,7 @@ struct CgoThreadStart
void (*fn)(void);
};
// Kick off new ms as needed (up to mcpumax).
// Kick off new m's as needed (up to mcpumax).
// There are already `other' other cpus that will
// start looking for goroutines shortly.
// Sched is locked.
@ -501,10 +633,14 @@ matchmg(void)
if(m->mallocing || m->gcing)
return;
while(runtime·sched.mcpu < runtime·sched.mcpumax && (g = gget()) != nil){
M *m;
while(haveg() && canaddmcpu()) {
g = gget();
if(g == nil)
runtime·throw("gget inconsistency");
// Find the m that will run g.
M *m;
if((m = mget(g)) == nil){
m = runtime·malloc(sizeof(M));
mcommoninit(m);
@ -541,6 +677,7 @@ static void
schedule(G *gp)
{
int32 hz;
uint32 v;
schedlock();
if(gp != nil) {
@ -549,11 +686,13 @@ schedule(G *gp)
// Just finished running gp.
gp->m = nil;
runtime·sched.mcpu--;
runtime·sched.grunning--;
if(runtime·sched.mcpu < 0)
runtime·throw("runtime·sched.mcpu < 0 in scheduler");
// atomic { mcpu-- }
v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
if(atomic_mcpu(v) > maxgomaxprocs)
runtime·throw("negative mcpu in scheduler");
switch(gp->status){
case Grunnable:
case Gdead:
@ -632,30 +771,60 @@ runtime·gosched(void)
void
runtime·entersyscall(void)
{
uint32 v, w;
if(runtime·sched.predawn)
return;
schedlock();
g->status = Gsyscall;
runtime·sched.mcpu--;
if(runtime·sched.gwait != 0)
matchmg();
if(runtime·sched.waitstop && runtime·sched.mcpu <= runtime·sched.mcpumax) {
runtime·sched.waitstop = 0;
runtime·notewakeup(&runtime·sched.stopped);
}
// Leave SP around for gc and traceback.
// Do before schedunlock so that gc
// never sees Gsyscall with wrong stack.
runtime·gosave(&g->sched);
g->gcsp = g->sched.sp;
g->gcstack = g->stackbase;
g->gcguard = g->stackguard;
g->status = Gsyscall;
if(g->gcsp < g->gcguard-StackGuard || g->gcstack < g->gcsp) {
runtime·printf("entersyscall inconsistent %p [%p,%p]\n", g->gcsp, g->gcguard-StackGuard, g->gcstack);
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
// g->gcsp, g->gcguard-StackGuard, g->gcstack);
runtime·throw("entersyscall");
}
// Fast path.
// The slow path inside the schedlock/schedunlock will get
// through without stopping if it does:
// mcpu--
// gwait not true
// waitstop && mcpu <= mcpumax not true
// If we can do the same with a single atomic read/write,
// then we can skip the locks.
for(;;) {
v = runtime·sched.atomic;
if(atomic_gwaiting(v))
break;
if(atomic_waitstop(v) && atomic_mcpu(v)-1 <= atomic_mcpumax(v))
break;
w = v;
w += (-1<<mcpuShift);
if(runtime·cas(&runtime·sched.atomic, v, w))
return;
}
schedlock();
// atomic { mcpu--; }
v = runtime·xadd(&runtime·sched.atomic, (-1<<mcpuShift));
if(atomic_gwaiting(v)) {
matchmg();
v = runtime·atomicload(&runtime·sched.atomic);
}
if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
runtime·xadd(&runtime·sched.atomic, -1<<waitstopShift);
runtime·notewakeup(&runtime·sched.stopped);
}
// Re-save sched in case one of the calls
// (notewakeup, matchmg) triggered something using it.
runtime·gosave(&g->sched);
schedunlock();
}
@ -666,21 +835,43 @@ runtime·entersyscall(void)
void
runtime·exitsyscall(void)
{
uint32 v, w;
if(runtime·sched.predawn)
return;
schedlock();
runtime·sched.mcpu++;
// Fast path - if there's room for this m, we're done.
if(m->profilehz == runtime·sched.profilehz && runtime·sched.mcpu <= runtime·sched.mcpumax) {
// Fast path.
// If we can do the mcpu-- bookkeeping and
// find that we still have mcpu <= mcpumax, then we can
// start executing Go code immediately, without having to
// schedlock/schedunlock.
for(;;) {
// If the profiler frequency needs updating,
// take the slow path.
if(m->profilehz != runtime·sched.profilehz)
break;
v = runtime·sched.atomic;
if(atomic_mcpu(v) >= atomic_mcpumax(v))
break;
w = v;
w += (1<<mcpuShift);
if(runtime·cas(&runtime·sched.atomic, v, w)) {
// There's a cpu for us, so we can run.
g->status = Grunning;
// Garbage collector isn't running (since we are),
// so okay to clear gcstack.
g->gcstack = nil;
schedunlock();
return;
}
}
schedlock();
// atomic { mcpu++; }
runtime·xadd(&runtime·sched.atomic, (1<<mcpuShift));
// Tell scheduler to put g back on the run queue:
// mostly equivalent to g->status = Grunning,
// but keeps the garbage collector from thinking
@ -688,7 +879,7 @@ runtime·exitsyscall(void)
g->readyonstop = 1;
schedunlock();
// Slow path - all the cpus are taken.
// All the cpus are taken.
// The scheduler will ready g and put this m to sleep.
// When the scheduler takes g away from m,
// it will undo the runtime·sched.mcpu++ above.
@ -1229,25 +1420,29 @@ int32
runtime·gomaxprocsfunc(int32 n)
{
int32 ret;
uint32 v;
schedlock();
ret = runtime·gomaxprocs;
if(n <= 0)
n = ret;
if(n > maxgomaxprocs)
n = maxgomaxprocs;
runtime·gomaxprocs = n;
if(runtime·gcwaiting != 0) {
if (runtime·sched.mcpumax != 1)
runtime·throw("invalid runtime·sched.mcpumax during gc");
if(atomic_mcpumax(runtime·sched.atomic) != 1)
runtime·throw("invalid mcpumax during gc");
schedunlock();
return ret;
}
runtime·sched.mcpumax = n;
// handle fewer procs?
if(runtime·sched.mcpu > runtime·sched.mcpumax) {
setmcpumax(n);
// If there are now fewer allowed procs
// than procs running, stop.
v = runtime·atomicload(&runtime·sched.atomic);
if(atomic_mcpu(v) > n) {
schedunlock();
// just give up the cpu.
// we'll only get rescheduled once the
// number has come down.
runtime·gosched();
return ret;
}

506
src/pkg/runtime/proc.p Normal file
View File

@ -0,0 +1,506 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
model for proc.c as of 2011/07/15.
takes 4300 seconds to explore 1128130 states
with G=3, var_gomaxprocs=1
on a Core i7 L640 2.13 GHz Lenovo X201s.
rm -f proc.p.trail pan.* pan
spin -a proc.p
gcc -DSAFETY -DREACH -DMEMLIM'='4000 -o pan pan.c
pan -w28 -n -i -m500000
test -f proc.p.trail && pan -r proc.p.trail
*/
/*
* scheduling parameters
*/
/*
* the number of goroutines G doubles as the maximum
* number of OS threads; the max is reachable when all
* the goroutines are blocked in system calls.
*/
#define G 3
/*
* whether to allow gomaxprocs to vary during execution.
* enabling this checks the scheduler even when code is
* calling GOMAXPROCS, but it also slows down the verification
* by about 10x.
*/
#define var_gomaxprocs 1 /* allow gomaxprocs to vary */
/* gomaxprocs */
#if var_gomaxprocs
byte gomaxprocs = 3;
#else
#define gomaxprocs 3
#endif
/* queue of waiting M's: sched_mhead[:mwait] */
byte mwait;
byte sched_mhead[G];
/* garbage collector state */
bit gc_lock, gcwaiting;
/* goroutines sleeping, waiting to run */
byte gsleep, gwait;
/* scheduler state */
bit sched_lock;
bit sched_stopped;
bit atomic_gwaiting, atomic_waitstop;
byte atomic_mcpu, atomic_mcpumax;
/* M struct fields - state for handing off g to m. */
bit m_waitnextg[G];
bit m_havenextg[G];
bit m_nextg[G];
/*
* opt_atomic/opt_dstep mark atomic/deterministics
* sequences that are marked only for reasons of
* optimization, not for correctness of the algorithms.
*
* in general any code that runs while holding the
* schedlock and does not refer to or modify the atomic_*
* fields can be marked atomic/dstep without affecting
* the usefulness of the model. since we trust the lock
* implementation, what we really want to test is the
* interleaving of the atomic fast paths with entersyscall
* and exitsyscall.
*/
#define opt_atomic atomic
#define opt_dstep d_step
/* locks */
inline lock(x) {
d_step { x == 0; x = 1 }
}
inline unlock(x) {
d_step { assert x == 1; x = 0 }
}
/* notes */
inline noteclear(x) {
x = 0
}
inline notesleep(x) {
x == 1
}
inline notewakeup(x) {
opt_dstep { assert x == 0; x = 1 }
}
/*
* scheduler
*/
inline schedlock() {
lock(sched_lock)
}
inline schedunlock() {
unlock(sched_lock)
}
/*
* canaddmcpu is like the C function but takes
* an extra argument to include in the test, to model
* "cannget() && canaddmcpu()" as "canaddmcpu(cangget())"
*/
inline canaddmcpu(g) {
d_step {
g && atomic_mcpu < atomic_mcpumax;
atomic_mcpu++;
}
}
/*
* gput is like the C function.
* instead of tracking goroutines explicitly we
* maintain only the count of the number of
* waiting goroutines.
*/
inline gput() {
/* omitted: lockedm, idlem concerns */
opt_dstep {
gwait++;
if
:: gwait == 1 ->
atomic_gwaiting = 1
:: else
fi
}
}
/*
* cangget is a macro so it can be passed to
* canaddmcpu (see above).
*/
#define cangget() (gwait>0)
/*
* gget is like the C function.
*/
inline gget() {
opt_dstep {
assert gwait > 0;
gwait--;
if
:: gwait == 0 ->
atomic_gwaiting = 0
:: else
fi
}
}
/*
* mput is like the C function.
* here we do keep an explicit list of waiting M's,
* so that we know which ones can be awakened.
* we use _pid-1 because the monitor is proc 0.
*/
inline mput() {
opt_dstep {
sched_mhead[mwait] = _pid - 1;
mwait++
}
}
/*
* mnextg is like the C function mnextg(m, g).
* it passes an unspecified goroutine to m to start running.
*/
inline mnextg(m) {
opt_dstep {
m_nextg[m] = 1;
if
:: m_waitnextg[m] ->
m_waitnextg[m] = 0;
notewakeup(m_havenextg[m])
:: else
fi
}
}
/*
* mgetnextg handles the main m handoff in matchmg.
* it is like mget() || new M followed by mnextg(m, g),
* but combined to avoid a local variable.
* unlike the C code, a new M simply assumes it is
* running a g instead of using the mnextg coordination
* to obtain one.
*/
inline mgetnextg() {
opt_atomic {
if
:: mwait > 0 ->
mwait--;
mnextg(sched_mhead[mwait]);
sched_mhead[mwait] = 0;
:: else ->
run mstart();
fi
}
}
/*
* nextgandunlock is like the C function.
* it pulls a g off the queue or else waits for one.
*/
inline nextgandunlock() {
assert atomic_mcpu <= G;
if
:: m_nextg[_pid-1] ->
m_nextg[_pid-1] = 0;
schedunlock();
:: canaddmcpu(!m_nextg[_pid-1] && cangget()) ->
gget();
schedunlock();
:: else ->
opt_dstep {
mput();
m_nextg[_pid-1] = 0;
m_waitnextg[_pid-1] = 1;
noteclear(m_havenextg[_pid-1]);
}
if
:: atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
atomic_waitstop = 0;
notewakeup(sched_stopped)
:: else
fi;
schedunlock();
opt_dstep {
notesleep(m_havenextg[_pid-1]);
assert m_nextg[_pid-1];
m_nextg[_pid-1] = 0;
}
fi
}
/*
* stoptheworld is like the C function.
*/
inline stoptheworld() {
schedlock();
gcwaiting = 1;
atomic_mcpumax = 1;
do
:: d_step { atomic_mcpu > 1 ->
noteclear(sched_stopped);
assert !atomic_waitstop;
atomic_waitstop = 1 }
schedunlock();
notesleep(sched_stopped);
schedlock();
:: else ->
break
od;
schedunlock();
}
/*
* starttheworld is like the C function.
*/
inline starttheworld() {
schedlock();
gcwaiting = 0;
atomic_mcpumax = gomaxprocs;
matchmg();
schedunlock();
}
/*
* matchmg is like the C function.
*/
inline matchmg() {
do
:: canaddmcpu(cangget()) ->
gget();
mgetnextg();
:: else -> break
od
}
/*
* ready is like the C function.
* it puts a g on the run queue.
*/
inline ready() {
schedlock();
gput()
matchmg()
schedunlock()
}
/*
* schedule simulates the C scheduler.
* it assumes that there is always a goroutine
* running already, and the goroutine has entered
* the scheduler for an unspecified reason,
* either to yield or to block.
*/
inline schedule() {
schedlock();
mustsched = 0;
atomic_mcpu--;
assert atomic_mcpu <= G;
if
:: skip ->
// goroutine yields, still runnable
gput();
:: gsleep+1 < G ->
// goroutine goes to sleep (but there is another that can wake it)
gsleep++
fi;
// Find goroutine to run.
nextgandunlock()
}
/*
* entersyscall is like the C function.
*/
inline entersyscall() {
/*
* Fast path. Check all the conditions tested during schedlock/schedunlock
* below, and if we can get through the whole thing without stopping, run it
* in one atomic cas-based step.
*/
atomic {
if
:: atomic_gwaiting ->
skip
:: atomic_waitstop && atomic_mcpu-1 <= atomic_mcpumax ->
skip
:: else ->
atomic_mcpu--;
goto Lreturn_entersyscall;
fi
}
/*
* Normal path.
*/
schedlock()
d_step {
atomic_mcpu--;
}
if
:: atomic_gwaiting ->
matchmg()
:: else
fi;
if
:: atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
atomic_waitstop = 0;
notewakeup(sched_stopped)
:: else
fi;
schedunlock();
Lreturn_entersyscall:
skip
}
/*
* exitsyscall is like the C function.
*/
inline exitsyscall() {
/*
* Fast path. If there's a cpu available, use it.
*/
atomic {
// omitted profilehz check
if
:: atomic_mcpu >= atomic_mcpumax ->
skip
:: else ->
atomic_mcpu++;
goto Lreturn_exitsyscall
fi
}
/*
* Normal path.
*/
schedlock();
d_step {
atomic_mcpu++;
if
:: atomic_mcpu <= atomic_mcpumax ->
skip
:: else ->
mustsched = 1
fi
}
schedunlock()
Lreturn_exitsyscall:
skip
}
#if var_gomaxprocs
inline gomaxprocsfunc() {
schedlock();
opt_atomic {
if
:: gomaxprocs != 1 -> gomaxprocs = 1
:: gomaxprocs != 2 -> gomaxprocs = 2
:: gomaxprocs != 3 -> gomaxprocs = 3
fi;
}
if
:: gcwaiting != 0 ->
assert atomic_mcpumax == 1
:: else ->
atomic_mcpumax = gomaxprocs;
if
:: atomic_mcpu > gomaxprocs ->
mustsched = 1
:: else ->
matchmg()
fi
fi;
schedunlock();
}
#endif
/*
* mstart is the entry point for a new M.
* our model of an M is always running some
* unspecified goroutine.
*/
proctype mstart() {
/*
* mustsched is true if the goroutine must enter the
* scheduler instead of continuing to execute.
*/
bit mustsched;
do
:: skip ->
// goroutine reschedules.
schedule()
:: !mustsched ->
// goroutine does something.
if
:: skip ->
// goroutine executes system call
entersyscall();
exitsyscall()
:: atomic { gsleep > 0; gsleep-- } ->
// goroutine wakes another goroutine
ready()
:: lock(gc_lock) ->
// goroutine runs a garbage collection
stoptheworld();
starttheworld();
unlock(gc_lock)
#if var_gomaxprocs
:: skip ->
// goroutine picks a new gomaxprocs
gomaxprocsfunc()
#endif
fi
od;
assert 0;
}
/*
* monitor initializes the scheduler state
* and then watches for impossible conditions.
*/
active proctype monitor() {
opt_dstep {
byte i = 1;
do
:: i < G ->
gput();
i++
:: else -> break
od;
atomic_mcpu = 1;
atomic_mcpumax = 1;
}
run mstart();
do
// Should never have goroutines waiting with procs available.
:: !sched_lock && gwait > 0 && atomic_mcpu < atomic_mcpumax ->
assert 0
// Should never have gc waiting for stop if things have already stopped.
:: !sched_lock && atomic_waitstop && atomic_mcpu <= atomic_mcpumax ->
assert 0
od
}

View File

@ -73,3 +73,53 @@ func BenchmarkStackGrowth(b *testing.B) {
<-c
}
}
func BenchmarkSyscall(b *testing.B) {
const CallsPerSched = 1000
procs := runtime.GOMAXPROCS(-1)
N := int32(b.N / CallsPerSched)
c := make(chan bool, procs)
for p := 0; p < procs; p++ {
go func() {
for atomic.AddInt32(&N, -1) >= 0 {
runtime.Gosched()
for g := 0; g < CallsPerSched; g++ {
runtime.Entersyscall()
runtime.Exitsyscall()
}
}
c <- true
}()
}
for p := 0; p < procs; p++ {
<-c
}
}
func BenchmarkSyscallWork(b *testing.B) {
const CallsPerSched = 1000
const LocalWork = 100
procs := runtime.GOMAXPROCS(-1)
N := int32(b.N / CallsPerSched)
c := make(chan bool, procs)
for p := 0; p < procs; p++ {
go func() {
foo := 42
for atomic.AddInt32(&N, -1) >= 0 {
runtime.Gosched()
for g := 0; g < CallsPerSched; g++ {
runtime.Entersyscall()
for i := 0; i < LocalWork; i++ {
foo *= 2
foo /= 2
}
runtime.Exitsyscall()
}
}
c <- foo == 42
}()
}
for p := 0; p < procs; p++ {
<-c
}
}