go/src/pkg/runtime/proc.c

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "runtime.h"
#include "arch_GOARCH.h"
#include "zaexperiment.h"
#include "malloc.h"
#include "stack.h"
#include "race.h"
#include "type.h"
#include "../../cmd/ld/textflag.h"

// Goroutine scheduler
// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
//
// The main concepts are:
// G - goroutine.
// M - worker thread, or machine.
// P - processor, a resource that is required to execute Go code.
//     M must have an associated P to execute Go code, however it can be
//     blocked or in a syscall w/o an associated P.
//
// Design doc at http://golang.org/s/go11sched.

typedef struct Sched Sched;
struct Sched {
	Lock;

	uint64	goidgen;

	M*	midle;	 // idle m's waiting for work
	int32	nmidle;	 // number of idle m's waiting for work
	int32	nmidlelocked; // number of locked m's waiting for work
	int32	mcount;	 // number of m's that have been created
	int32	maxmcount;	// maximum number of m's allowed (or die)

	P*	pidle;  // idle P's
	uint32	npidle;
	uint32	nmspinning;

	// Global runnable queue.
	G*	runqhead;
	G*	runqtail;
	int32	runqsize;

	// Global cache of dead G's.
	Lock	gflock;
	G*	gfree;

	uint32	gcwaiting;	// gc is waiting to run
	int32	stopwait;
	Note	stopnote;
	uint32	sysmonwait;
	Note	sysmonnote;
	uint64	lastpoll;

	int32	profilehz;	// cpu profiling rate
};

enum
{
	// The max value of GOMAXPROCS.
	// There are no fundamental restrictions on the value.
	MaxGomaxprocs = 1<<8,

	// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
	GoidCacheBatch = 16,
};

Sched	runtime·sched;
int32	runtime·gomaxprocs;
uint32	runtime·needextram;
bool	runtime·iscgo;
M	runtime·m0;
G	runtime·g0;	// idle goroutine for m0
G*	runtime·lastg;
M*	runtime·allm;
M*	runtime·extram;
int8*	runtime·goos;
int32	runtime·ncpu;
static int32	newprocs;

static	Lock allglock;	// the following vars are protected by this lock or by stoptheworld
G**	runtime·allg;
uintptr runtime·allglen;
static	uintptr allgcap;

void runtime·mstart(void);
static void runqput(P*, G*);
static G* runqget(P*);
static bool runqputslow(P*, G*, uint32, uint32);
static G* runqsteal(P*, P*);
static void mput(M*);
static M* mget(void);
static void mcommoninit(M*);
static void schedule(void);
static void procresize(int32);
static void acquirep(P*);
static P* releasep(void);
static void newm(void(*)(void), P*);
static void stopm(void);
static void startm(P*, bool);
static void handoffp(P*);
static void wakep(void);
static void stoplockedm(void);
static void startlockedm(G*);
static void sysmon(void);
static uint32 retake(int64);
static void incidlelocked(int32);
static void checkdead(void);
static void exitsyscall0(G*);
static void park0(G*);
static void goexit0(G*);
static void gfput(P*, G*);
static G* gfget(P*);
static void gfpurge(P*);
static void globrunqput(G*);
static void globrunqputbatch(G*, G*, int32);
static G* globrunqget(P*, int32);
static P* pidleget(void);
static void pidleput(P*);
static void injectglist(G*);
static bool preemptall(void);
static bool preemptone(P*);
static bool exitsyscallfast(void);
static bool haveexperiment(int8*);
static void allgadd(G*);

// The bootstrap sequence is:
//
//	call osinit
//	call schedinit
//	make & queue new G
//	call runtime·mstart
//
// The new G calls runtime·main.
void
runtime·schedinit(void)
{
	int32 n, procs;
	byte *p;
	Eface i;

	runtime·sched.maxmcount = 10000;
	runtime·precisestack = haveexperiment("precisestack");

	runtime·mallocinit();
	mcommoninit(m);
	
	// Initialize the itable value for newErrorCString,
	// so that the next time it gets called, possibly
	// in a fault during a garbage collection, it will not
	// need to allocated memory.
	runtime·newErrorCString(0, &i);

	runtime·goargs();
	runtime·goenvs();
	runtime·parsedebugvars();

	// Allocate internal symbol table representation now, we need it for GC anyway.
	runtime·symtabinit();

	runtime·sched.lastpoll = runtime·nanotime();
	procs = 1;
	p = runtime·getenv("GOMAXPROCS");
	if(p != nil && (n = runtime·atoi(p)) > 0) {
		if(n > MaxGomaxprocs)
			n = MaxGomaxprocs;
		procs = n;
	}
	runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
	procresize(procs);

	mstats.enablegc = 1;

	if(raceenabled)
		g->racectx = runtime·raceinit();
}

extern void main·init(void);
extern void main·main(void);

static FuncVal scavenger = {runtime·MHeap_Scavenger};

static FuncVal initDone = { runtime·unlockOSThread };

// The main goroutine.
void
runtime·main(void)
{
	Defer d;
	
	// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
	// Using decimal instead of binary GB and MB because
	// they look nicer in the stack overflow failure message.
	if(sizeof(void*) == 8)
		runtime·maxstacksize = 1000000000;
	else
		runtime·maxstacksize = 250000000;

	newm(sysmon, nil);

	// Lock the main goroutine onto this, the main OS thread,
	// during initialization.  Most programs won't care, but a few
	// do require certain calls to be made by the main thread.
	// Those can arrange for main.main to run in the main thread
	// by calling runtime.LockOSThread during initialization
	// to preserve the lock.
	runtime·lockOSThread();
	
	// Defer unlock so that runtime.Goexit during init does the unlock too.
	d.fn = &initDone;
	d.siz = 0;
	d.link = g->defer;
	d.argp = (void*)-1;
	d.special = true;
	g->defer = &d;

	if(m != &runtime·m0)
		runtime·throw("runtime·main not on m0");
	runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
	main·init();

	if(g->defer != &d || d.fn != &initDone)
		runtime·throw("runtime: bad defer entry after init");
	g->defer = d.link;
	runtime·unlockOSThread();

	main·main();
	if(raceenabled)
		runtime·racefini();

	// Make racy client program work: if panicking on
	// another goroutine at the same time as main returns,
	// let the other goroutine finish printing the panic trace.
	// Once it does, it will exit. See issue 3934.
	if(runtime·panicking)
		runtime·park(nil, nil, "panicwait");

	runtime·exit(0);
	for(;;)
		*(int32*)runtime·main = 0;
}

void
runtime·goroutineheader(G *gp)
{
	int8 *status;
	int64 waitfor;

	switch(gp->status) {
	case Gidle:
		status = "idle";
		break;
	case Grunnable:
		status = "runnable";
		break;
	case Grunning:
		status = "running";
		break;
	case Gsyscall:
		status = "syscall";
		break;
	case Gwaiting:
		if(gp->waitreason)
			status = gp->waitreason;
		else
			status = "waiting";
		break;
	default:
		status = "???";
		break;
	}

	// approx time the G is blocked, in minutes
	waitfor = 0;
	if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
		waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000*1000);

	if(waitfor < 1)
		runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
	else
		runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
}

void
runtime·tracebackothers(G *me)
{
	G *gp;
	int32 traceback;
	uintptr i;

	traceback = runtime·gotraceback(nil);
	
	// Show the current goroutine first, if we haven't already.
	if((gp = m->curg) != nil && gp != me) {
		runtime·printf("\n");
		runtime·goroutineheader(gp);
		runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
	}

	runtime·lock(&allglock);
	for(i = 0; i < runtime·allglen; i++) {
		gp = runtime·allg[i];
		if(gp == me || gp == m->curg || gp->status == Gdead)
			continue;
		if(gp->issystem && traceback < 2)
			continue;
		runtime·printf("\n");
		runtime·goroutineheader(gp);
		if(gp->status == Grunning) {
			runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
			runtime·printcreatedby(gp);
		} else
			runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
	}
	runtime·unlock(&allglock);
}

static void
checkmcount(void)
{
	// sched lock is held
	if(runtime·sched.mcount > runtime·sched.maxmcount) {
		runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
		runtime·throw("thread exhaustion");
	}
}

static void
mcommoninit(M *mp)
{
	// If there is no mcache runtime·callers() will crash,
	// and we are most likely in sysmon thread so the stack is senseless anyway.
	if(m->mcache)
		runtime·callers(1, mp->createstack, nelem(mp->createstack));

	mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();

	runtime·lock(&runtime·sched);
	mp->id = runtime·sched.mcount++;
	checkmcount();
	runtime·mpreinit(mp);

	// Add to runtime·allm so garbage collector doesn't free m
	// when it is just in a register or thread-local storage.
	mp->alllink = runtime·allm;
	// runtime·NumCgoCall() iterates over allm w/o schedlock,
	// so we need to publish it safely.
	runtime·atomicstorep(&runtime·allm, mp);
	runtime·unlock(&runtime·sched);
}

// Mark gp ready to run.
void
runtime·ready(G *gp)
{
	// Mark runnable.
	m->locks++;  // disable preemption because it can be holding p in a local var
	if(gp->status != Gwaiting) {
		runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
		runtime·throw("bad g->status in ready");
	}
	gp->status = Grunnable;
	runqput(m->p, gp);
	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
		wakep();
	m->locks--;
	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
		g->stackguard0 = StackPreempt;
}

int32
runtime·gcprocs(void)
{
	int32 n;

	// Figure out how many CPUs to use during GC.
	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
	runtime·lock(&runtime·sched);
	n = runtime·gomaxprocs;
	if(n > runtime·ncpu)
		n = runtime·ncpu;
	if(n > MaxGcproc)
		n = MaxGcproc;
	if(n > runtime·sched.nmidle+1) // one M is currently running
		n = runtime·sched.nmidle+1;
	runtime·unlock(&runtime·sched);
	return n;
}

static bool
needaddgcproc(void)
{
	int32 n;

	runtime·lock(&runtime·sched);
	n = runtime·gomaxprocs;
	if(n > runtime·ncpu)
		n = runtime·ncpu;
	if(n > MaxGcproc)
		n = MaxGcproc;
	n -= runtime·sched.nmidle+1; // one M is currently running
	runtime·unlock(&runtime·sched);
	return n > 0;
}

void
runtime·helpgc(int32 nproc)
{
	M *mp;
	int32 n, pos;

	runtime·lock(&runtime·sched);
	pos = 0;
	for(n = 1; n < nproc; n++) {  // one M is currently running
		if(runtime·allp[pos]->mcache == m->mcache)
			pos++;
		mp = mget();
		if(mp == nil)
			runtime·throw("runtime·gcprocs inconsistency");
		mp->helpgc = n;
		mp->mcache = runtime·allp[pos]->mcache;
		pos++;
		runtime·notewakeup(&mp->park);
	}
	runtime·unlock(&runtime·sched);
}

// Similar to stoptheworld but best-effort and can be called several times.
// There is no reverse operation, used during crashing.
// This function must not lock any mutexes.
void
runtime·freezetheworld(void)
{
	int32 i;

	if(runtime·gomaxprocs == 1)
		return;
	// stopwait and preemption requests can be lost
	// due to races with concurrently executing threads,
	// so try several times
	for(i = 0; i < 5; i++) {
		// this should tell the scheduler to not start any new goroutines
		runtime·sched.stopwait = 0x7fffffff;
		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
		// this should stop running goroutines
		if(!preemptall())
			break;  // no running goroutines
		runtime·usleep(1000);
	}
	// to be sure
	runtime·usleep(1000);
	preemptall();
	runtime·usleep(1000);
}

void
runtime·stoptheworld(void)
{
	int32 i;
	uint32 s;
	P *p;
	bool wait;

	runtime·lock(&runtime·sched);
	runtime·sched.stopwait = runtime·gomaxprocs;
	runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
	preemptall();
	// stop current P
	m->p->status = Pgcstop;
	runtime·sched.stopwait--;
	// try to retake all P's in Psyscall status
	for(i = 0; i < runtime·gomaxprocs; i++) {
		p = runtime·allp[i];
		s = p->status;
		if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
			runtime·sched.stopwait--;
	}
	// stop idle P's
	while(p = pidleget()) {
		p->status = Pgcstop;
		runtime·sched.stopwait--;
	}
	wait = runtime·sched.stopwait > 0;
	runtime·unlock(&runtime·sched);

	// wait for remaining P's to stop voluntarily
	if(wait) {
		for(;;) {
			// wait for 100us, then try to re-preempt in case of any races
			if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
				runtime·noteclear(&runtime·sched.stopnote);
				break;
			}
			preemptall();
		}
	}
	if(runtime·sched.stopwait)
		runtime·throw("stoptheworld: not stopped");
	for(i = 0; i < runtime·gomaxprocs; i++) {
		p = runtime·allp[i];
		if(p->status != Pgcstop)
			runtime·throw("stoptheworld: not stopped");
	}
}

static void
mhelpgc(void)
{
	m->helpgc = -1;
}

void
runtime·starttheworld(void)
{
	P *p, *p1;
	M *mp;
	G *gp;
	bool add;

	m->locks++;  // disable preemption because it can be holding p in a local var
	gp = runtime·netpoll(false);  // non-blocking
	injectglist(gp);
	add = needaddgcproc();
	runtime·lock(&runtime·sched);
	if(newprocs) {
		procresize(newprocs);
		newprocs = 0;
	} else
		procresize(runtime·gomaxprocs);
	runtime·sched.gcwaiting = 0;

	p1 = nil;
	while(p = pidleget()) {
		// procresize() puts p's with work at the beginning of the list.
		// Once we reach a p without a run queue, the rest don't have one either.
		if(p->runqhead == p->runqtail) {
			pidleput(p);
			break;
		}
		p->m = mget();
		p->link = p1;
		p1 = p;
	}
	if(runtime·sched.sysmonwait) {
		runtime·sched.sysmonwait = false;
		runtime·notewakeup(&runtime·sched.sysmonnote);
	}
	runtime·unlock(&runtime·sched);

	while(p1) {
		p = p1;
		p1 = p1->link;
		if(p->m) {
			mp = p->m;
			p->m = nil;
			if(mp->nextp)
				runtime·throw("starttheworld: inconsistent mp->nextp");
			mp->nextp = p;
			runtime·notewakeup(&mp->park);
		} else {
			// Start M to run P.  Do not start another M below.
			newm(nil, p);
			add = false;
		}
	}

	if(add) {
		// If GC could have used another helper proc, start one now,
		// in the hope that it will be available next time.
		// It would have been even better to start it before the collection,
		// but doing so requires allocating memory, so it's tricky to
		// coordinate.  This lazy approach works out in practice:
		// we don't mind if the first couple gc rounds don't have quite
		// the maximum number of procs.
		newm(mhelpgc, nil);
	}
	m->locks--;
	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
		g->stackguard0 = StackPreempt;
}

// Called to start an M.
void
runtime·mstart(void)
{
#ifdef GOOS_windows
#ifdef GOARCH_386
	// It is used by windows-386 only. Unfortunately, seh needs
	// to be located on os stack, and mstart runs on os stack
	// for both m0 and m.
	SEH seh;
#endif
#endif

	if(g != m->g0)
		runtime·throw("bad runtime·mstart");

	// Record top of stack for use by mcall.
	// Once we call schedule we're never coming back,
	// so other calls can reuse this stack space.
	runtime·gosave(&m->g0->sched);
	m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
	m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
#ifdef GOOS_windows
#ifdef GOARCH_386
	m->seh = &seh;
#endif
#endif
	runtime·asminit();
	runtime·minit();

	// Install signal handlers; after minit so that minit can
	// prepare the thread to be able to handle the signals.
	if(m == &runtime·m0)
		runtime·initsig();
	
	if(m->mstartfn)
		m->mstartfn();

	if(m->helpgc) {
		m->helpgc = 0;
		stopm();
	} else if(m != &runtime·m0) {
		acquirep(m->nextp);
		m->nextp = nil;
	}
	schedule();

	// TODO(brainman): This point is never reached, because scheduler
	// does not release os threads at the moment. But once this path
	// is enabled, we must remove our seh here.
}

// When running with cgo, we call _cgo_thread_start
// to start threads for us so that we can play nicely with
// foreign code.
void (*_cgo_thread_start)(void*);

typedef struct CgoThreadStart CgoThreadStart;
struct CgoThreadStart
{
	M *m;
	G *g;
	uintptr *tls;
	void (*fn)(void);
};

// Allocate a new m unassociated with any thread.
// Can use p for allocation context if needed.
M*
runtime·allocm(P *p)
{
	M *mp;
	static Type *mtype;  // The Go type M

	m->locks++;  // disable GC because it can be called from sysmon
	if(m->p == nil)
		acquirep(p);  // temporarily borrow p for mallocs in this function
	if(mtype == nil) {
		Eface e;
		runtime·gc_m_ptr(&e);
		mtype = ((PtrType*)e.type)->elem;
	}

	mp = runtime·cnew(mtype);
	mcommoninit(mp);

	// In case of cgo or Solaris, pthread_create will make us a stack.
	// Windows will layout sched stack on OS stack.
	if(runtime·iscgo || Solaris || Windows)
		mp->g0 = runtime·malg(-1);
	else
		mp->g0 = runtime·malg(8192);

	if(p == m->p)
		releasep();
	m->locks--;
	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
		g->stackguard0 = StackPreempt;

	return mp;
}

static M* lockextra(bool nilokay);
static void unlockextra(M*);

// needm is called when a cgo callback happens on a
// thread without an m (a thread not created by Go).
// In this case, needm is expected to find an m to use
// and return with m, g initialized correctly.
// Since m and g are not set now (likely nil, but see below)
// needm is limited in what routines it can call. In particular
// it can only call nosplit functions (textflag 7) and cannot
// do any scheduling that requires an m.
//
// In order to avoid needing heavy lifting here, we adopt
// the following strategy: there is a stack of available m's
// that can be stolen. Using compare-and-swap
// to pop from the stack has ABA races, so we simulate
// a lock by doing an exchange (via casp) to steal the stack
// head and replace the top pointer with MLOCKED (1).
// This serves as a simple spin lock that we can use even
// without an m. The thread that locks the stack in this way
// unlocks the stack by storing a valid stack head pointer.
//
// In order to make sure that there is always an m structure
// available to be stolen, we maintain the invariant that there
// is always one more than needed. At the beginning of the
// program (if cgo is in use) the list is seeded with a single m.
// If needm finds that it has taken the last m off the list, its job
// is - once it has installed its own m so that it can do things like
// allocate memory - to create a spare m and put it on the list.
//
// Each of these extra m's also has a g0 and a curg that are
// pressed into service as the scheduling stack and current
// goroutine for the duration of the cgo callback.
//
// When the callback is done with the m, it calls dropm to
// put the m back on the list.
#pragma textflag NOSPLIT
void
runtime·needm(byte x)
{
	M *mp;

	if(runtime·needextram) {
		// Can happen if C/C++ code calls Go from a global ctor.
		// Can not throw, because scheduler is not initialized yet.
		runtime·write(2, "fatal error: cgo callback before cgo call\n",
			sizeof("fatal error: cgo callback before cgo call\n")-1);
		runtime·exit(1);
	}

	// Lock extra list, take head, unlock popped list.
	// nilokay=false is safe here because of the invariant above,
	// that the extra list always contains or will soon contain
	// at least one m.
	mp = lockextra(false);

	// Set needextram when we've just emptied the list,
	// so that the eventual call into cgocallbackg will
	// allocate a new m for the extra list. We delay the
	// allocation until then so that it can be done
	// after exitsyscall makes sure it is okay to be
	// running at all (that is, there's no garbage collection
	// running right now).
	mp->needextram = mp->schedlink == nil;
	unlockextra(mp->schedlink);

	// Install m and g (= m->g0) and set the stack bounds
	// to match the current stack. We don't actually know
	// how big the stack is, like we don't know how big any
	// scheduling stack is, but we assume there's at least 32 kB,
	// which is more than enough for us.
	runtime·setmg(mp, mp->g0);
	g->stackbase = (uintptr)(&x + 1024);
	g->stackguard = (uintptr)(&x - 32*1024);
	g->stackguard0 = g->stackguard;

#ifdef GOOS_windows
#ifdef GOARCH_386
	// On windows/386, we need to put an SEH frame (two words)
	// somewhere on the current stack. We are called from cgocallback_gofunc
	// and we know that it will leave two unused words below m->curg->sched.sp.
	// Use those.
	m->seh = (SEH*)((uintptr*)&x + 1);
#endif
#endif

	// Initialize this thread to use the m.
	runtime·asminit();
	runtime·minit();
}

// newextram allocates an m and puts it on the extra list.
// It is called with a working local m, so that it can do things
// like call schedlock and allocate.
void
runtime·newextram(void)
{
	M *mp, *mnext;
	G *gp;

	// Create extra goroutine locked to extra m.
	// The goroutine is the context in which the cgo callback will run.
	// The sched.pc will never be returned to, but setting it to
	// runtime.goexit makes clear to the traceback routines where
	// the goroutine stack ends.
	mp = runtime·allocm(nil);
	gp = runtime·malg(4096);
	gp->sched.pc = (uintptr)runtime·goexit;
	gp->sched.sp = gp->stackbase;
	gp->sched.lr = 0;
	gp->sched.g = gp;
	gp->syscallpc = gp->sched.pc;
	gp->syscallsp = gp->sched.sp;
	gp->syscallstack = gp->stackbase;
	gp->syscallguard = gp->stackguard;
	gp->status = Gsyscall;
	mp->curg = gp;
	mp->locked = LockInternal;
	mp->lockedg = gp;
	gp->lockedm = mp;
	gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
	if(raceenabled)
		gp->racectx = runtime·racegostart(runtime·newextram);
	// put on allg for garbage collector
	allgadd(gp);

	// Add m to the extra list.
	mnext = lockextra(true);
	mp->schedlink = mnext;
	unlockextra(mp);
}

// dropm is called when a cgo callback has called needm but is now
// done with the callback and returning back into the non-Go thread.
// It puts the current m back onto the extra list.
//
// The main expense here is the call to signalstack to release the
// m's signal stack, and then the call to needm on the next callback
// from this thread. It is tempting to try to save the m for next time,
// which would eliminate both these costs, but there might not be
// a next time: the current thread (which Go does not control) might exit.
// If we saved the m for that thread, there would be an m leak each time
// such a thread exited. Instead, we acquire and release an m on each
// call. These should typically not be scheduling operations, just a few
// atomics, so the cost should be small.
//
// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
// variable using pthread_key_create. Unlike the pthread keys we already use
// on OS X, this dummy key would never be read by Go code. It would exist
// only so that we could register at thread-exit-time destructor.
// That destructor would put the m back onto the extra list.
// This is purely a performance optimization. The current version,
// in which dropm happens on each cgo call, is still correct too.
// We may have to keep the current version on systems with cgo
// but without pthreads, like Windows.
void
runtime·dropm(void)
{
	M *mp, *mnext;

	// Undo whatever initialization minit did during needm.
	runtime·unminit();

#ifdef GOOS_windows
#ifdef GOARCH_386
	m->seh = nil;  // reset dangling typed pointer
#endif
#endif

	// Clear m and g, and return m to the extra list.
	// After the call to setmg we can only call nosplit functions.
	mp = m;
	runtime·setmg(nil, nil);

	mnext = lockextra(true);
	mp->schedlink = mnext;
	unlockextra(mp);
}

#define MLOCKED ((M*)1)

// lockextra locks the extra list and returns the list head.
// The caller must unlock the list by storing a new list head
// to runtime.extram. If nilokay is true, then lockextra will
// return a nil list head if that's what it finds. If nilokay is false,
// lockextra will keep waiting until the list head is no longer nil.
#pragma textflag NOSPLIT
static M*
lockextra(bool nilokay)
{
	M *mp;
	void (*yield)(void);

	for(;;) {
		mp = runtime·atomicloadp(&runtime·extram);
		if(mp == MLOCKED) {
			yield = runtime·osyield;
			yield();
			continue;
		}
		if(mp == nil && !nilokay) {
			runtime·usleep(1);
			continue;
		}
		if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
			yield = runtime·osyield;
			yield();
			continue;
		}
		break;
	}
	return mp;
}

#pragma textflag NOSPLIT
static void
unlockextra(M *mp)
{
	runtime·atomicstorep(&runtime·extram, mp);
}


// Create a new m.  It will start off with a call to fn, or else the scheduler.
static void
newm(void(*fn)(void), P *p)
{
	M *mp;

	mp = runtime·allocm(p);
	mp->nextp = p;
	mp->mstartfn = fn;

	if(runtime·iscgo) {
		CgoThreadStart ts;

		if(_cgo_thread_start == nil)
			runtime·throw("_cgo_thread_start missing");
		ts.m = mp;
		ts.g = mp->g0;
		ts.tls = mp->tls;
		ts.fn = runtime·mstart;
		runtime·asmcgocall(_cgo_thread_start, &ts);
		return;
	}
	runtime·newosproc(mp, (byte*)mp->g0->stackbase);
}

// Stops execution of the current m until new work is available.
// Returns with acquired P.
static void
stopm(void)
{
	if(m->locks)
		runtime·throw("stopm holding locks");
	if(m->p)
		runtime·throw("stopm holding p");
	if(m->spinning) {
		m->spinning = false;
		runtime·xadd(&runtime·sched.nmspinning, -1);
	}

retry:
	runtime·lock(&runtime·sched);
	mput(m);
	runtime·unlock(&runtime·sched);
	runtime·notesleep(&m->park);
	runtime·noteclear(&m->park);
	if(m->helpgc) {
		runtime·gchelper();
		m->helpgc = 0;
		m->mcache = nil;
		goto retry;
	}
	acquirep(m->nextp);
	m->nextp = nil;
}

static void
mspinning(void)
{
	m->spinning = true;
}

// Schedules some M to run the p (creates an M if necessary).
// If p==nil, tries to get an idle P, if no idle P's does nothing.
static void
startm(P *p, bool spinning)
{
	M *mp;
	void (*fn)(void);

	runtime·lock(&runtime·sched);
	if(p == nil) {
		p = pidleget();
		if(p == nil) {
			runtime·unlock(&runtime·sched);
			if(spinning)
				runtime·xadd(&runtime·sched.nmspinning, -1);
			return;
		}
	}
	mp = mget();
	runtime·unlock(&runtime·sched);
	if(mp == nil) {
		fn = nil;
		if(spinning)
			fn = mspinning;
		newm(fn, p);
		return;
	}
	if(mp->spinning)
		runtime·throw("startm: m is spinning");
	if(mp->nextp)
		runtime·throw("startm: m has p");
	mp->spinning = spinning;
	mp->nextp = p;
	runtime·notewakeup(&mp->park);
}

// Hands off P from syscall or locked M.
static void
handoffp(P *p)
{
	// if it has local work, start it straight away
	if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
		startm(p, false);
		return;
	}
	// no local work, check that there are no spinning/idle M's,
	// otherwise our help is not required
	if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
		runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
		startm(p, true);
		return;
	}
	runtime·lock(&runtime·sched);
	if(runtime·sched.gcwaiting) {
		p->status = Pgcstop;
		if(--runtime·sched.stopwait == 0)
			runtime·notewakeup(&runtime·sched.stopnote);
		runtime·unlock(&runtime·sched);
		return;
	}
	if(runtime·sched.runqsize) {
		runtime·unlock(&runtime·sched);
		startm(p, false);
		return;
	}
	// If this is the last running P and nobody is polling network,
	// need to wakeup another M to poll network.
	if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
		runtime·unlock(&runtime·sched);
		startm(p, false);
		return;
	}
	pidleput(p);
	runtime·unlock(&runtime·sched);
}

// Tries to add one more P to execute G's.
// Called when a G is made runnable (newproc, ready).
static void
wakep(void)
{
	// be conservative about spinning threads
	if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
		return;
	startm(nil, true);
}

// Stops execution of the current m that is locked to a g until the g is runnable again.
// Returns with acquired P.
static void
stoplockedm(void)
{
	P *p;

	if(m->lockedg == nil || m->lockedg->lockedm != m)
		runtime·throw("stoplockedm: inconsistent locking");
	if(m->p) {
		// Schedule another M to run this p.
		p = releasep();
		handoffp(p);
	}
	incidlelocked(1);
	// Wait until another thread schedules lockedg again.
	runtime·notesleep(&m->park);
	runtime·noteclear(&m->park);
	if(m->lockedg->status != Grunnable)
		runtime·throw("stoplockedm: not runnable");
	acquirep(m->nextp);
	m->nextp = nil;
}

// Schedules the locked m to run the locked gp.
static void
startlockedm(G *gp)
{
	M *mp;
	P *p;

	mp = gp->lockedm;
	if(mp == m)
		runtime·throw("startlockedm: locked to me");
	if(mp->nextp)
		runtime·throw("startlockedm: m has p");
	// directly handoff current P to the locked m
	incidlelocked(-1);
	p = releasep();
	mp->nextp = p;
	runtime·notewakeup(&mp->park);
	stopm();
}

// Stops the current m for stoptheworld.
// Returns when the world is restarted.
static void
gcstopm(void)
{
	P *p;

	if(!runtime·sched.gcwaiting)
		runtime·throw("gcstopm: not waiting for gc");
	if(m->spinning) {
		m->spinning = false;
		runtime·xadd(&runtime·sched.nmspinning, -1);
	}
	p = releasep();
	runtime·lock(&runtime·sched);
	p->status = Pgcstop;
	if(--runtime·sched.stopwait == 0)
		runtime·notewakeup(&runtime·sched.stopnote);
	runtime·unlock(&runtime·sched);
	stopm();
}

// Schedules gp to run on the current M.
// Never returns.
static void
execute(G *gp)
{
	int32 hz;

	if(gp->status != Grunnable) {
		runtime·printf("execute: bad g status %d\n", gp->status);
		runtime·throw("execute: bad g status");
	}
	gp->status = Grunning;
	gp->waitsince = 0;
	gp->preempt = false;
	gp->stackguard0 = gp->stackguard;
	m->p->schedtick++;
	m->curg = gp;
	gp->m = m;

	// Check whether the profiler needs to be turned on or off.
	hz = runtime·sched.profilehz;
	if(m->profilehz != hz)
		runtime·resetcpuprofiler(hz);

	runtime·gogo(&gp->sched);
}

// Finds a runnable goroutine to execute.
// Tries to steal from other P's, get g from global queue, poll network.
static G*
findrunnable(void)
{
	G *gp;
	P *p;
	int32 i;

top:
	if(runtime·sched.gcwaiting) {
		gcstopm();
		goto top;
	}
	// local runq
	gp = runqget(m->p);
	if(gp)
		return gp;
	// global runq
	if(runtime·sched.runqsize) {
		runtime·lock(&runtime·sched);
		gp = globrunqget(m->p, 0);
		runtime·unlock(&runtime·sched);
		if(gp)
			return gp;
	}
	// poll network
	gp = runtime·netpoll(false);  // non-blocking
	if(gp) {
		injectglist(gp->schedlink);
		gp->status = Grunnable;
		return gp;
	}
	// If number of spinning M's >= number of busy P's, block.
	// This is necessary to prevent excessive CPU consumption
	// when GOMAXPROCS>>1 but the program parallelism is low.
	if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
		goto stop;
	if(!m->spinning) {
		m->spinning = true;
		runtime·xadd(&runtime·sched.nmspinning, 1);
	}
	// random steal from other P's
	for(i = 0; i < 2*runtime·gomaxprocs; i++) {
		if(runtime·sched.gcwaiting)
			goto top;
		p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
		if(p == m->p)
			gp = runqget(p);
		else
			gp = runqsteal(m->p, p);
		if(gp)
			return gp;
	}
stop:
	// return P and block
	runtime·lock(&runtime·sched);
	if(runtime·sched.gcwaiting) {
		runtime·unlock(&runtime·sched);
		goto top;
	}
	if(runtime·sched.runqsize) {
		gp = globrunqget(m->p, 0);
		runtime·unlock(&runtime·sched);
		return gp;
	}
	p = releasep();
	pidleput(p);
	runtime·unlock(&runtime·sched);
	if(m->spinning) {
		m->spinning = false;
		runtime·xadd(&runtime·sched.nmspinning, -1);
	}
	// check all runqueues once again
	for(i = 0; i < runtime·gomaxprocs; i++) {
		p = runtime·allp[i];
		if(p && p->runqhead != p->runqtail) {
			runtime·lock(&runtime·sched);
			p = pidleget();
			runtime·unlock(&runtime·sched);
			if(p) {
				acquirep(p);
				goto top;
			}
			break;
		}
	}
	// poll network
	if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
		if(m->p)
			runtime·throw("findrunnable: netpoll with p");
		if(m->spinning)
			runtime·throw("findrunnable: netpoll with spinning");
		gp = runtime·netpoll(true);  // block until new work is available
		runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
		if(gp) {
			runtime·lock(&runtime·sched);
			p = pidleget();
			runtime·unlock(&runtime·sched);
			if(p) {
				acquirep(p);
				injectglist(gp->schedlink);
				gp->status = Grunnable;
				return gp;
			}
			injectglist(gp);
		}
	}
	stopm();
	goto top;
}

static void
resetspinning(void)
{
	int32 nmspinning;

	if(m->spinning) {
		m->spinning = false;
		nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
		if(nmspinning < 0)
			runtime·throw("findrunnable: negative nmspinning");
	} else
		nmspinning = runtime·atomicload(&runtime·sched.nmspinning);

	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
	// so see if we need to wakeup another P here.
	if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
		wakep();
}

// Injects the list of runnable G's into the scheduler.
// Can run concurrently with GC.
static void
injectglist(G *glist)
{
	int32 n;
	G *gp;

	if(glist == nil)
		return;
	runtime·lock(&runtime·sched);
	for(n = 0; glist; n++) {
		gp = glist;
		glist = gp->schedlink;
		gp->status = Grunnable;
		globrunqput(gp);
	}
	runtime·unlock(&runtime·sched);

	for(; n && runtime·sched.npidle; n--)
		startm(nil, false);
}

// One round of scheduler: find a runnable goroutine and execute it.
// Never returns.
static void
schedule(void)
{
	G *gp;
	uint32 tick;

	if(m->locks)
		runtime·throw("schedule: holding locks");

top:
	if(runtime·sched.gcwaiting) {
		gcstopm();
		goto top;
	}

	gp = nil;
	// Check the global runnable queue once in a while to ensure fairness.
	// Otherwise two goroutines can completely occupy the local runqueue
	// by constantly respawning each other.
	tick = m->p->schedtick;
	// This is a fancy way to say tick%61==0,
	// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
	if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
		runtime·lock(&runtime·sched);
		gp = globrunqget(m->p, 1);
		runtime·unlock(&runtime·sched);
		if(gp)
			resetspinning();
	}
	if(gp == nil) {
		gp = runqget(m->p);
		if(gp && m->spinning)
			runtime·throw("schedule: spinning with local work");
	}
	if(gp == nil) {
		gp = findrunnable();  // blocks until work is available
		resetspinning();
	}

	if(gp->lockedm) {
		// Hands off own p to the locked m,
		// then blocks waiting for a new p.
		startlockedm(gp);
		goto top;
	}

	execute(gp);
}

// Puts the current goroutine into a waiting state and calls unlockf.
// If unlockf returns false, the goroutine is resumed.
void
runtime·park(bool(*unlockf)(G*, void*), void *lock, int8 *reason)
{
	m->waitlock = lock;
	m->waitunlockf = unlockf;
	g->waitreason = reason;
	runtime·mcall(park0);
}

static bool
parkunlock(G *gp, void *lock)
{
	USED(gp);
	runtime·unlock(lock);
	return true;
}

// Puts the current goroutine into a waiting state and unlocks the lock.
// The goroutine can be made runnable again by calling runtime·ready(gp).
void
runtime·parkunlock(Lock *lock, int8 *reason)
{
	runtime·park(parkunlock, lock, reason);
}

// runtime·park continuation on g0.
static void
park0(G *gp)
{
	bool ok;

	gp->status = Gwaiting;
	gp->m = nil;
	m->curg = nil;
	if(m->waitunlockf) {
		ok = m->waitunlockf(gp, m->waitlock);
		m->waitunlockf = nil;
		m->waitlock = nil;
		if(!ok) {
			gp->status = Grunnable;
			execute(gp);  // Schedule it back, never returns.
		}
	}
	if(m->lockedg) {
		stoplockedm();
		execute(gp);  // Never returns.
	}
	schedule();
}

// Scheduler yield.
void
runtime·gosched(void)
{
	runtime·mcall(runtime·gosched0);
}

// runtime·gosched continuation on g0.
void
runtime·gosched0(G *gp)
{
	gp->status = Grunnable;
	gp->m = nil;
	m->curg = nil;
	runtime·lock(&runtime·sched);
	globrunqput(gp);
	runtime·unlock(&runtime·sched);
	if(m->lockedg) {
		stoplockedm();
		execute(gp);  // Never returns.
	}
	schedule();
}

// Finishes execution of the current goroutine.
// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
// Since it does not return it does not matter.  But if it is preempted
// at the split stack check, GC will complain about inconsistent sp.
#pragma textflag NOSPLIT
void
runtime·goexit(void)
{
	if(raceenabled)
		runtime·racegoend();
	runtime·mcall(goexit0);
}

// runtime·goexit continuation on g0.
static void
goexit0(G *gp)
{
	gp->status = Gdead;
	gp->m = nil;
	gp->lockedm = nil;
	m->curg = nil;
	m->lockedg = nil;
	if(m->locked & ~LockExternal) {
		runtime·printf("invalid m->locked = %d\n", m->locked);
		runtime·throw("internal lockOSThread error");
	}	
	m->locked = 0;
	runtime·unwindstack(gp, nil);
	gfput(m->p, gp);
	schedule();
}

#pragma textflag NOSPLIT
static void
save(void *pc, uintptr sp)
{
	g->sched.pc = (uintptr)pc;
	g->sched.sp = sp;
	g->sched.lr = 0;
	g->sched.ret = 0;
	g->sched.ctxt = 0;
	g->sched.g = g;
}

// The goroutine g is about to enter a system call.
// Record that it's not using the cpu anymore.
// This is called only from the go syscall library and cgocall,
// not from the low-level system calls used by the runtime.
//
// Entersyscall cannot split the stack: the runtime·gosave must
// make g->sched refer to the caller's stack segment, because
// entersyscall is going to return immediately after.
#pragma textflag NOSPLIT
void
·entersyscall(int32 dummy)
{
	// Disable preemption because during this function g is in Gsyscall status,
	// but can have inconsistent g->sched, do not let GC observe it.
	m->locks++;

	// Leave SP around for GC and traceback.
	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
	g->syscallsp = g->sched.sp;
	g->syscallpc = g->sched.pc;
	g->syscallstack = g->stackbase;
	g->syscallguard = g->stackguard;
	g->status = Gsyscall;
	if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
		// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
		//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
		runtime·throw("entersyscall");
	}

	if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
		runtime·lock(&runtime·sched);
		if(runtime·atomicload(&runtime·sched.sysmonwait)) {
			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
			runtime·notewakeup(&runtime·sched.sysmonnote);
		}
		runtime·unlock(&runtime·sched);
		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
	}

	m->mcache = nil;
	m->p->m = nil;
	runtime·atomicstore(&m->p->status, Psyscall);
	if(runtime·sched.gcwaiting) {
		runtime·lock(&runtime·sched);
		if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
			if(--runtime·sched.stopwait == 0)
				runtime·notewakeup(&runtime·sched.stopnote);
		}
		runtime·unlock(&runtime·sched);
		save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
	}

	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
	// We set stackguard to StackPreempt so that first split stack check calls morestack.
	// Morestack detects this case and throws.
	g->stackguard0 = StackPreempt;
	m->locks--;
}

// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
#pragma textflag NOSPLIT
void
·entersyscallblock(int32 dummy)
{
	P *p;

	m->locks++;  // see comment in entersyscall

	// Leave SP around for GC and traceback.
	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
	g->syscallsp = g->sched.sp;
	g->syscallpc = g->sched.pc;
	g->syscallstack = g->stackbase;
	g->syscallguard = g->stackguard;
	g->status = Gsyscall;
	if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
		// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
		//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
		runtime·throw("entersyscallblock");
	}

	p = releasep();
	handoffp(p);
	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
		incidlelocked(1);

	// Resave for traceback during blocked call.
	save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));

	g->stackguard0 = StackPreempt;  // see comment in entersyscall
	m->locks--;
}

// The goroutine g exited its system call.
// Arrange for it to run on a cpu again.
// This is called only from the go syscall library, not
// from the low-level system calls used by the runtime.
#pragma textflag NOSPLIT
void
runtime·exitsyscall(void)
{
	m->locks++;  // see comment in entersyscall

	if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
		incidlelocked(-1);

	g->waitsince = 0;
	if(exitsyscallfast()) {
		// There's a cpu for us, so we can run.
		m->p->syscalltick++;
		g->status = Grunning;
		// Garbage collector isn't running (since we are),
		// so okay to clear gcstack and gcsp.
		g->syscallstack = (uintptr)nil;
		g->syscallsp = (uintptr)nil;
		m->locks--;
		if(g->preempt) {
			// restore the preemption request in case we've cleared it in newstack
			g->stackguard0 = StackPreempt;
		} else {
			// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
			g->stackguard0 = g->stackguard;
		}
		return;
	}

	m->locks--;

	// Call the scheduler.
	runtime·mcall(exitsyscall0);

	// Scheduler returned, so we're allowed to run now.
	// Delete the gcstack information that we left for
	// the garbage collector during the system call.
	// Must wait until now because until gosched returns
	// we don't know for sure that the garbage collector
	// is not running.
	g->syscallstack = (uintptr)nil;
	g->syscallsp = (uintptr)nil;
	m->p->syscalltick++;
}

#pragma textflag NOSPLIT
static bool
exitsyscallfast(void)
{
	P *p;

	// Freezetheworld sets stopwait but does not retake P's.
	if(runtime·sched.stopwait) {
		m->p = nil;
		return false;
	}

	// Try to re-acquire the last P.
	if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
		// There's a cpu for us, so we can run.
		m->mcache = m->p->mcache;
		m->p->m = m;
		return true;
	}
	// Try to get any other idle P.
	m->p = nil;
	if(runtime·sched.pidle) {
		runtime·lock(&runtime·sched);
		p = pidleget();
		if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
			runtime·atomicstore(&runtime·sched.sysmonwait, 0);
			runtime·notewakeup(&runtime·sched.sysmonnote);
		}
		runtime·unlock(&runtime·sched);
		if(p) {
			acquirep(p);
			return true;
		}
	}
	return false;
}

// runtime·exitsyscall slow path on g0.
// Failed to acquire P, enqueue gp as runnable.
static void
exitsyscall0(G *gp)
{
	P *p;

	gp->status = Grunnable;
	gp->m = nil;
	m->curg = nil;
	runtime·lock(&runtime·sched);
	p = pidleget();
	if(p == nil)
		globrunqput(gp);
	else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
		runtime·notewakeup(&runtime·sched.sysmonnote);
	}
	runtime·unlock(&runtime·sched);
	if(p) {
		acquirep(p);
		execute(gp);  // Never returns.
	}
	if(m->lockedg) {
		// Wait until another thread schedules gp and so m again.
		stoplockedm();
		execute(gp);  // Never returns.
	}
	stopm();
	schedule();  // Never returns.
}

// Called from syscall package before fork.
void
syscall·runtime_BeforeFork(void)
{
	// Fork can hang if preempted with signals frequently enough (see issue 5517).
	// Ensure that we stay on the same M where we disable profiling.
	m->locks++;
	if(m->profilehz != 0)
		runtime·resetcpuprofiler(0);
}

// Called from syscall package after fork in parent.
void
syscall·runtime_AfterFork(void)
{
	int32 hz;

	hz = runtime·sched.profilehz;
	if(hz != 0)
		runtime·resetcpuprofiler(hz);
	m->locks--;
}

// Hook used by runtime·malg to call runtime·stackalloc on the
// scheduler stack.  This exists because runtime·stackalloc insists
// on being called on the scheduler stack, to avoid trying to grow
// the stack while allocating a new stack segment.
static void
mstackalloc(G *gp)
{
	gp->param = runtime·stackalloc((uintptr)gp->param);
	runtime·gogo(&gp->sched);
}

// Allocate a new g, with a stack big enough for stacksize bytes.
G*
runtime·malg(int32 stacksize)
{
	G *newg;
	byte *stk;

	if(StackTop < sizeof(Stktop)) {
		runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
		runtime·throw("runtime: bad stack.h");
	}

	newg = runtime·malloc(sizeof(G));
	if(stacksize >= 0) {
		if(g == m->g0) {
			// running on scheduler stack already.
			stk = runtime·stackalloc(StackSystem + stacksize);
		} else {
			// have to call stackalloc on scheduler stack.
			g->param = (void*)(StackSystem + stacksize);
			runtime·mcall(mstackalloc);
			stk = g->param;
			g->param = nil;
		}
		newg->stacksize = StackSystem + stacksize;
		newg->stack0 = (uintptr)stk;
		newg->stackguard = (uintptr)stk + StackGuard;
		newg->stackguard0 = newg->stackguard;
		newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
		runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
	}
	return newg;
}

// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
// Cannot split the stack because it assumes that the arguments
// are available sequentially after &fn; they would not be
// copied if a stack split occurred.  It's OK for this to call
// functions that split the stack.
#pragma textflag NOSPLIT
void
runtime·newproc(int32 siz, FuncVal* fn, ...)
{
	byte *argp;

	if(thechar == '5')
		argp = (byte*)(&fn+2);  // skip caller's saved LR
	else
		argp = (byte*)(&fn+1);
	runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
}

// Create a new g running fn with narg bytes of arguments starting
// at argp and returning nret bytes of results.  callerpc is the
// address of the go statement that created this.  The new g is put
// on the queue of g's waiting to run.
G*
runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
{
	byte *sp;
	G *newg;
	P *p;
	int32 siz;

//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
	m->locks++;  // disable preemption because it can be holding p in a local var
	siz = narg + nret;
	siz = (siz+7) & ~7;

	// We could instead create a secondary stack frame
	// and make it look like goexit was on the original but
	// the call to the actual goroutine function was split.
	// Not worth it: this is almost always an error.
	if(siz > StackMin - 1024)
		runtime·throw("runtime.newproc: function arguments too large for new goroutine");

	p = m->p;
	if((newg = gfget(p)) != nil) {
		if(newg->stackguard - StackGuard != newg->stack0)
			runtime·throw("invalid stack in newg");
	} else {
		newg = runtime·malg(StackMin);
		allgadd(newg);
	}

	sp = (byte*)newg->stackbase;
	sp -= siz;
	runtime·memmove(sp, argp, narg);
	if(thechar == '5') {
		// caller's LR
		sp -= sizeof(void*);
		*(void**)sp = nil;
	}

	runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
	newg->sched.sp = (uintptr)sp;
	newg->sched.pc = (uintptr)runtime·goexit;
	newg->sched.g = newg;
	runtime·gostartcallfn(&newg->sched, fn);
	newg->gopc = (uintptr)callerpc;
	newg->status = Grunnable;
	if(p->goidcache == p->goidcacheend) {
		p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
		p->goidcacheend = p->goidcache + GoidCacheBatch;
	}
	newg->goid = p->goidcache++;
	newg->panicwrap = 0;
	if(raceenabled)
		newg->racectx = runtime·racegostart((void*)callerpc);
	runqput(p, newg);

	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
		wakep();
	m->locks--;
	if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
		g->stackguard0 = StackPreempt;
	return newg;
}

static void
allgadd(G *gp)
{
	G **new;
	uintptr cap;

	runtime·lock(&allglock);
	if(runtime·allglen >= allgcap) {
		cap = 4096/sizeof(new[0]);
		if(cap < 2*allgcap)
			cap = 2*allgcap;
		new = runtime·malloc(cap*sizeof(new[0]));
		if(new == nil)
			runtime·throw("runtime: cannot allocate memory");
		if(runtime·allg != nil) {
			runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
			runtime·free(runtime·allg);
		}
		runtime·allg = new;
		allgcap = cap;
	}
	runtime·allg[runtime·allglen++] = gp;
	runtime·unlock(&allglock);
}

// Put on gfree list.
// If local list is too long, transfer a batch to the global list.
static void
gfput(P *p, G *gp)
{
	if(gp->stackguard - StackGuard != gp->stack0)
		runtime·throw("invalid stack in gfput");
	gp->schedlink = p->gfree;
	p->gfree = gp;
	p->gfreecnt++;
	if(p->gfreecnt >= 64) {
		runtime·lock(&runtime·sched.gflock);
		while(p->gfreecnt >= 32) {
			p->gfreecnt--;
			gp = p->gfree;
			p->gfree = gp->schedlink;
			gp->schedlink = runtime·sched.gfree;
			runtime·sched.gfree = gp;
		}
		runtime·unlock(&runtime·sched.gflock);
	}
}

// Get from gfree list.
// If local list is empty, grab a batch from global list.
static G*
gfget(P *p)
{
	G *gp;

retry:
	gp = p->gfree;
	if(gp == nil && runtime·sched.gfree) {
		runtime·lock(&runtime·sched.gflock);
		while(p->gfreecnt < 32 && runtime·sched.gfree) {
			p->gfreecnt++;
			gp = runtime·sched.gfree;
			runtime·sched.gfree = gp->schedlink;
			gp->schedlink = p->gfree;
			p->gfree = gp;
		}
		runtime·unlock(&runtime·sched.gflock);
		goto retry;
	}
	if(gp) {
		p->gfree = gp->schedlink;
		p->gfreecnt--;
	}
	return gp;
}

// Purge all cached G's from gfree list to the global list.
static void
gfpurge(P *p)
{
	G *gp;

	runtime·lock(&runtime·sched.gflock);
	while(p->gfreecnt) {
		p->gfreecnt--;
		gp = p->gfree;
		p->gfree = gp->schedlink;
		gp->schedlink = runtime·sched.gfree;
		runtime·sched.gfree = gp;
	}
	runtime·unlock(&runtime·sched.gflock);
}

void
runtime·Breakpoint(void)
{
	runtime·breakpoint();
}

void
runtime·Gosched(void)
{
	runtime·gosched();
}

// Implementation of runtime.GOMAXPROCS.
// delete when scheduler is even stronger
int32
runtime·gomaxprocsfunc(int32 n)
{
	int32 ret;

	if(n > MaxGomaxprocs)
		n = MaxGomaxprocs;
	runtime·lock(&runtime·sched);
	ret = runtime·gomaxprocs;
	if(n <= 0 || n == ret) {
		runtime·unlock(&runtime·sched);
		return ret;
	}
	runtime·unlock(&runtime·sched);

	runtime·semacquire(&runtime·worldsema, false);
	m->gcing = 1;
	runtime·stoptheworld();
	newprocs = n;
	m->gcing = 0;
	runtime·semrelease(&runtime·worldsema);
	runtime·starttheworld();

	return ret;
}

// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
// after they modify m->locked. Do not allow preemption during this call,
// or else the m might be different in this function than in the caller.
#pragma textflag NOSPLIT
static void
lockOSThread(void)
{
	m->lockedg = g;
	g->lockedm = m;
}

void
runtime·LockOSThread(void)
{
	m->locked |= LockExternal;
	lockOSThread();
}

void
runtime·lockOSThread(void)
{
	m->locked += LockInternal;
	lockOSThread();
}


// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
// after they update m->locked. Do not allow preemption during this call,
// or else the m might be in different in this function than in the caller.
#pragma textflag NOSPLIT
static void
unlockOSThread(void)
{
	if(m->locked != 0)
		return;
	m->lockedg = nil;
	g->lockedm = nil;
}

void
runtime·UnlockOSThread(void)
{
	m->locked &= ~LockExternal;
	unlockOSThread();
}

void
runtime·unlockOSThread(void)
{
	if(m->locked < LockInternal)
		runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
	m->locked -= LockInternal;
	unlockOSThread();
}

bool
runtime·lockedOSThread(void)
{
	return g->lockedm != nil && m->lockedg != nil;
}

// for testing of callbacks
void
runtime·golockedOSThread(bool ret)
{
	ret = runtime·lockedOSThread();
	FLUSH(&ret);
}

void
runtime·NumGoroutine(intgo ret)
{
	ret = runtime·gcount();
	FLUSH(&ret);
}

int32
runtime·gcount(void)
{
	G *gp;
	int32 n, s;
	uintptr i;

	n = 0;
	runtime·lock(&allglock);
	// TODO(dvyukov): runtime.NumGoroutine() is O(N).
	// We do not want to increment/decrement centralized counter in newproc/goexit,
	// just to make runtime.NumGoroutine() faster.
	// Compromise solution is to introduce per-P counters of active goroutines.
	for(i = 0; i < runtime·allglen; i++) {
		gp = runtime·allg[i];
		s = gp->status;
		if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
			n++;
	}
	runtime·unlock(&allglock);
	return n;
}

int32
runtime·mcount(void)
{
	return runtime·sched.mcount;
}

void
runtime·badmcall(void (*fn)(G*))  // called from assembly
{
	USED(fn); // TODO: print fn?
	runtime·throw("runtime: mcall called on m->g0 stack");
}

void
runtime·badmcall2(void (*fn)(G*))  // called from assembly
{
	USED(fn);
	runtime·throw("runtime: mcall function returned");
}

void
runtime·badreflectcall(void) // called from assembly
{
	runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
}

static struct {
	Lock;
	void (*fn)(uintptr*, int32);
	int32 hz;
	uintptr pcbuf[100];
} prof;

static void
System(void)
{
}

// Called if we receive a SIGPROF signal.
void
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
{
	int32 n;
	bool traceback;
	MCache *mcache;
	// Do not use global m in this function, use mp instead.
	// On windows one m is sending reports about all the g's, so m means a wrong thing.
	byte m;

	m = 0;
	USED(m);

	if(prof.fn == nil || prof.hz == 0)
		return;

	// Profiling runs concurrently with GC, so it must not allocate.
	mcache = mp->mcache;
	mp->mcache = nil;

	// Define that a "user g" is a user-created goroutine, and a "system g"
	// is one that is m->g0 or m->gsignal. We've only made sure that we
	// can unwind user g's, so exclude the system g's.
	//
	// It is not quite as easy as testing gp == m->curg (the current user g)
	// because we might be interrupted for profiling halfway through a
	// goroutine switch. The switch involves updating three (or four) values:
	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
	// because once it gets updated the new g is running.
	//
	// When switching from a user g to a system g, LR is not considered live,
	// so the update only affects g, SP, and PC. Since PC must be last, there
	// the possible partial transitions in ordinary execution are (1) g alone is updated,
	// (2) both g and SP are updated, and (3) SP alone is updated.
	// If g is updated, we'll see a system g and not look closer.
	// If SP alone is updated, we can detect the partial transition by checking
	// whether the SP is within g's stack bounds. (We could also require that SP
	// be changed only after g, but the stack bounds check is needed by other
	// cases, so there is no need to impose an additional requirement.)
	//
	// There is one exceptional transition to a system g, not in ordinary execution.
	// When a signal arrives, the operating system starts the signal handler running
	// with an updated PC and SP. The g is updated last, at the beginning of the
	// handler. There are two reasons this is okay. First, until g is updated the
	// g and SP do not match, so the stack bounds check detects the partial transition.
	// Second, signal handlers currently run with signals disabled, so a profiling
	// signal cannot arrive during the handler.
	//
	// When switching from a system g to a user g, there are three possibilities.
	//
	// First, it may be that the g switch has no PC update, because the SP
	// either corresponds to a user g throughout (as in runtime.asmcgocall)
	// or because it has been arranged to look like a user g frame
	// (as in runtime.cgocallback_gofunc). In this case, since the entire
	// transition is a g+SP update, a partial transition updating just one of 
	// those will be detected by the stack bounds check.
	//
	// Second, when returning from a signal handler, the PC and SP updates
	// are performed by the operating system in an atomic update, so the g
	// update must be done before them. The stack bounds check detects
	// the partial transition here, and (again) signal handlers run with signals
	// disabled, so a profiling signal cannot arrive then anyway.
	//
	// Third, the common case: it may be that the switch updates g, SP, and PC
	// separately, as in runtime.gogo.
	//
	// Because runtime.gogo is the only instance, we check whether the PC lies
	// within that function, and if so, not ask for a traceback. This approach
	// requires knowing the size of the runtime.gogo function, which we
	// record in arch_*.h and check in runtime_test.go.
	//
	// There is another apparently viable approach, recorded here in case
	// the "PC within runtime.gogo" check turns out not to be usable.
	// It would be possible to delay the update of either g or SP until immediately
	// before the PC update instruction. Then, because of the stack bounds check,
	// the only problematic interrupt point is just before that PC update instruction,
	// and the sigprof handler can detect that instruction and simulate stepping past
	// it in order to reach a consistent state. On ARM, the update of g must be made
	// in two places (in R10 and also in a TLS slot), so the delayed update would
	// need to be the SP update. The sigprof handler must read the instruction at
	// the current PC and if it was the known instruction (for example, JMP BX or 
	// MOV R2, PC), use that other register in place of the PC value.
	// The biggest drawback to this solution is that it requires that we can tell
	// whether it's safe to read from the memory pointed at by PC.
	// In a correct program, we can test PC == nil and otherwise read,
	// but if a profiling signal happens at the instant that a program executes
	// a bad jump (before the program manages to handle the resulting fault)
	// the profiling handler could fault trying to read nonexistent memory.
	//
	// To recap, there are no constraints on the assembly being used for the
	// transition. We simply require that g and SP match and that the PC is not
	// in runtime.gogo.
	traceback = true;
	if(gp == nil || gp != mp->curg ||
	   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
	   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
		traceback = false;

	// Race detector calls asmcgocall w/o entersyscall/exitsyscall,
	// we can not currently unwind through asmcgocall.
	if(mp != nil && mp->racecall)
		traceback = false;

	runtime·lock(&prof);
	if(prof.fn == nil) {
		runtime·unlock(&prof);
		mp->mcache = mcache;
		return;
	}
	n = 0;
	if(traceback)
		n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
	if(!traceback || n <= 0) {
		n = 2;
		prof.pcbuf[0] = (uintptr)pc;
		prof.pcbuf[1] = (uintptr)System + 1;
	}
	prof.fn(prof.pcbuf, n);
	runtime·unlock(&prof);
	mp->mcache = mcache;
}

// Arrange to call fn with a traceback hz times a second.
void
runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
{
	// Force sane arguments.
	if(hz < 0)
		hz = 0;
	if(hz == 0)
		fn = nil;
	if(fn == nil)
		hz = 0;

	// Disable preemption, otherwise we can be rescheduled to another thread
	// that has profiling enabled.
	m->locks++;

	// Stop profiler on this thread so that it is safe to lock prof.
	// if a profiling signal came in while we had prof locked,
	// it would deadlock.
	runtime·resetcpuprofiler(0);

	runtime·lock(&prof);
	prof.fn = fn;
	prof.hz = hz;
	runtime·unlock(&prof);
	runtime·lock(&runtime·sched);
	runtime·sched.profilehz = hz;
	runtime·unlock(&runtime·sched);

	if(hz != 0)
		runtime·resetcpuprofiler(hz);

	m->locks--;
}

// Change number of processors.  The world is stopped, sched is locked.
static void
procresize(int32 new)
{
	int32 i, old;
	bool empty;
	G *gp;
	P *p;

	old = runtime·gomaxprocs;
	if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
		runtime·throw("procresize: invalid arg");
	// initialize new P's
	for(i = 0; i < new; i++) {
		p = runtime·allp[i];
		if(p == nil) {
			p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
			p->id = i;
			p->status = Pgcstop;
			runtime·atomicstorep(&runtime·allp[i], p);
		}
		if(p->mcache == nil) {
			if(old==0 && i==0)
				p->mcache = m->mcache;  // bootstrap
			else
				p->mcache = runtime·allocmcache();
		}
	}

	// redistribute runnable G's evenly
	// collect all runnable goroutines in global queue preserving FIFO order
	// FIFO order is required to ensure fairness even during frequent GCs
	// see http://golang.org/issue/7126
	empty = false;
	while(!empty) {
		empty = true;
		for(i = 0; i < old; i++) {
			p = runtime·allp[i];
			if(p->runqhead == p->runqtail)
				continue;
			empty = false;
			// pop from tail of local queue
			p->runqtail--;
			gp = p->runq[p->runqtail%nelem(p->runq)];
			// push onto head of global queue
			gp->schedlink = runtime·sched.runqhead;
			runtime·sched.runqhead = gp;
			if(runtime·sched.runqtail == nil)
				runtime·sched.runqtail = gp;
			runtime·sched.runqsize++;
		}
	}
	// fill local queues with at most nelem(p->runq)/2 goroutines
	// start at 1 because current M already executes some G and will acquire allp[0] below,
	// so if we have a spare G we want to put it into allp[1].
	for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
		gp = runtime·sched.runqhead;
		runtime·sched.runqhead = gp->schedlink;
		if(runtime·sched.runqhead == nil)
			runtime·sched.runqtail = nil;
		runtime·sched.runqsize--;
		runqput(runtime·allp[i%new], gp);
	}

	// free unused P's
	for(i = new; i < old; i++) {
		p = runtime·allp[i];
		runtime·freemcache(p->mcache);
		p->mcache = nil;
		gfpurge(p);
		p->status = Pdead;
		// can't free P itself because it can be referenced by an M in syscall
	}

	if(m->p)
		m->p->m = nil;
	m->p = nil;
	m->mcache = nil;
	p = runtime·allp[0];
	p->m = nil;
	p->status = Pidle;
	acquirep(p);
	for(i = new-1; i > 0; i--) {
		p = runtime·allp[i];
		p->status = Pidle;
		pidleput(p);
	}
	runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
}

// Associate p and the current m.
static void
acquirep(P *p)
{
	if(m->p || m->mcache)
		runtime·throw("acquirep: already in go");
	if(p->m || p->status != Pidle) {
		runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
		runtime·throw("acquirep: invalid p state");
	}
	m->mcache = p->mcache;
	m->p = p;
	p->m = m;
	p->status = Prunning;
}

// Disassociate p and the current m.
static P*
releasep(void)
{
	P *p;

	if(m->p == nil || m->mcache == nil)
		runtime·throw("releasep: invalid arg");
	p = m->p;
	if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
		runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
			m, m->p, p->m, m->mcache, p->mcache, p->status);
		runtime·throw("releasep: invalid p state");
	}
	m->p = nil;
	m->mcache = nil;
	p->m = nil;
	p->status = Pidle;
	return p;
}

static void
incidlelocked(int32 v)
{
	runtime·lock(&runtime·sched);
	runtime·sched.nmidlelocked += v;
	if(v > 0)
		checkdead();
	runtime·unlock(&runtime·sched);
}

// Check for deadlock situation.
// The check is based on number of running M's, if 0 -> deadlock.
static void
checkdead(void)
{
	G *gp;
	int32 run, grunning, s;
	uintptr i;

	// -1 for sysmon
	run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
	if(run > 0)
		return;
	if(run < 0) {
		runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
			runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
		runtime·throw("checkdead: inconsistent counts");
	}
	grunning = 0;
	runtime·lock(&allglock);
	for(i = 0; i < runtime·allglen; i++) {
		gp = runtime·allg[i];
		if(gp->isbackground)
			continue;
		s = gp->status;
		if(s == Gwaiting)
			grunning++;
		else if(s == Grunnable || s == Grunning || s == Gsyscall) {
			runtime·unlock(&allglock);
			runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
			runtime·throw("checkdead: runnable g");
		}
	}
	runtime·unlock(&allglock);
	if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
		runtime·exit(0);
	m->throwing = -1;  // do not dump full stacks
	runtime·throw("all goroutines are asleep - deadlock!");
}

static void
sysmon(void)
{
	uint32 idle, delay;
	int64 now, lastpoll, lasttrace;
	G *gp;

	lasttrace = 0;
	idle = 0;  // how many cycles in succession we had not wokeup somebody
	delay = 0;
	for(;;) {
		if(idle == 0)  // start with 20us sleep...
			delay = 20;
		else if(idle > 50)  // start doubling the sleep after 1ms...
			delay *= 2;
		if(delay > 10*1000)  // up to 10ms
			delay = 10*1000;
		runtime·usleep(delay);
		if(runtime·debug.schedtrace <= 0 &&
			(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
			runtime·lock(&runtime·sched);
			if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
				runtime·atomicstore(&runtime·sched.sysmonwait, 1);
				runtime·unlock(&runtime·sched);
				runtime·notesleep(&runtime·sched.sysmonnote);
				runtime·noteclear(&runtime·sched.sysmonnote);
				idle = 0;
				delay = 20;
			} else
				runtime·unlock(&runtime·sched);
		}
		// poll network if not polled for more than 10ms
		lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
		now = runtime·nanotime();
		if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
			runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
			gp = runtime·netpoll(false);  // non-blocking
			if(gp) {
				// Need to decrement number of idle locked M's
				// (pretending that one more is running) before injectglist.
				// Otherwise it can lead to the following situation:
				// injectglist grabs all P's but before it starts M's to run the P's,
				// another M returns from syscall, finishes running its G,
				// observes that there is no work to do and no other running M's
				// and reports deadlock.
				incidlelocked(-1);
				injectglist(gp);
				incidlelocked(1);
			}
		}
		// retake P's blocked in syscalls
		// and preempt long running G's
		if(retake(now))
			idle = 0;
		else
			idle++;

		if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
			lasttrace = now;
			runtime·schedtrace(runtime·debug.scheddetail);
		}
	}
}

typedef struct Pdesc Pdesc;
struct Pdesc
{
	uint32	schedtick;
	int64	schedwhen;
	uint32	syscalltick;
	int64	syscallwhen;
};
static Pdesc pdesc[MaxGomaxprocs];

static uint32
retake(int64 now)
{
	uint32 i, s, n;
	int64 t;
	P *p;
	Pdesc *pd;

	n = 0;
	for(i = 0; i < runtime·gomaxprocs; i++) {
		p = runtime·allp[i];
		if(p==nil)
			continue;
		pd = &pdesc[i];
		s = p->status;
		if(s == Psyscall) {
			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
			t = p->syscalltick;
			if(pd->syscalltick != t) {
				pd->syscalltick = t;
				pd->syscallwhen = now;
				continue;
			}
			// On the one hand we don't want to retake Ps if there is no other work to do,
			// but on the other hand we want to retake them eventually
			// because they can prevent the sysmon thread from deep sleep.
			if(p->runqhead == p->runqtail &&
				runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
				pd->syscallwhen + 10*1000*1000 > now)
				continue;
			// Need to decrement number of idle locked M's
			// (pretending that one more is running) before the CAS.
			// Otherwise the M from which we retake can exit the syscall,
			// increment nmidle and report deadlock.
			incidlelocked(-1);
			if(runtime·cas(&p->status, s, Pidle)) {
				n++;
				handoffp(p);
			}
			incidlelocked(1);
		} else if(s == Prunning) {
			// Preempt G if it's running for more than 10ms.
			t = p->schedtick;
			if(pd->schedtick != t) {
				pd->schedtick = t;
				pd->schedwhen = now;
				continue;
			}
			if(pd->schedwhen + 10*1000*1000 > now)
				continue;
			preemptone(p);
		}
	}
	return n;
}

// Tell all goroutines that they have been preempted and they should stop.
// This function is purely best-effort.  It can fail to inform a goroutine if a
// processor just started running it.
// No locks need to be held.
// Returns true if preemption request was issued to at least one goroutine.
static bool
preemptall(void)
{
	P *p;
	int32 i;
	bool res;

	res = false;
	for(i = 0; i < runtime·gomaxprocs; i++) {
		p = runtime·allp[i];
		if(p == nil || p->status != Prunning)
			continue;
		res |= preemptone(p);
	}
	return res;
}

// Tell the goroutine running on processor P to stop.
// This function is purely best-effort.  It can incorrectly fail to inform the
// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
// correct goroutine, that goroutine might ignore the request if it is
// simultaneously executing runtime·newstack.
// No lock needs to be held.
// Returns true if preemption request was issued.
static bool
preemptone(P *p)
{
	M *mp;
	G *gp;

	mp = p->m;
	if(mp == nil || mp == m)
		return false;
	gp = mp->curg;
	if(gp == nil || gp == mp->g0)
		return false;
	gp->preempt = true;
	gp->stackguard0 = StackPreempt;
	return true;
}

void
runtime·schedtrace(bool detailed)
{
	static int64 starttime;
	int64 now;
	int64 id1, id2, id3;
	int32 i, t, h;
	uintptr gi;
	int8 *fmt;
	M *mp, *lockedm;
	G *gp, *lockedg;
	P *p;

	now = runtime·nanotime();
	if(starttime == 0)
		starttime = now;

	runtime·lock(&runtime·sched);
	runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
		(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
		runtime·sched.nmidle, runtime·sched.runqsize);
	if(detailed) {
		runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
			runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
			runtime·sched.stopwait, runtime·sched.sysmonwait);
	}
	// We must be careful while reading data from P's, M's and G's.
	// Even if we hold schedlock, most data can be changed concurrently.
	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
	for(i = 0; i < runtime·gomaxprocs; i++) {
		p = runtime·allp[i];
		if(p == nil)
			continue;
		mp = p->m;
		h = runtime·atomicload(&p->runqhead);
		t = runtime·atomicload(&p->runqtail);
		if(detailed)
			runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
				i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
		else {
			// In non-detailed mode format lengths of per-P run queues as:
			// [len1 len2 len3 len4]
			fmt = " %d";
			if(runtime·gomaxprocs == 1)
				fmt = " [%d]\n";
			else if(i == 0)
				fmt = " [%d";
			else if(i == runtime·gomaxprocs-1)
				fmt = " %d]\n";
			runtime·printf(fmt, t-h);
		}
	}
	if(!detailed) {
		runtime·unlock(&runtime·sched);
		return;
	}
	for(mp = runtime·allm; mp; mp = mp->alllink) {
		p = mp->p;
		gp = mp->curg;
		lockedg = mp->lockedg;
		id1 = -1;
		if(p)
			id1 = p->id;
		id2 = -1;
		if(gp)
			id2 = gp->goid;
		id3 = -1;
		if(lockedg)
			id3 = lockedg->goid;
		runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
			" locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
			mp->id, id1, id2,
			mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
			mp->spinning, id3);
	}
	runtime·lock(&allglock);
	for(gi = 0; gi < runtime·allglen; gi++) {
		gp = runtime·allg[gi];
		mp = gp->m;
		lockedm = gp->lockedm;
		runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
			gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
			lockedm ? lockedm->id : -1);
	}
	runtime·unlock(&allglock);
	runtime·unlock(&runtime·sched);
}

// Put mp on midle list.
// Sched must be locked.
static void
mput(M *mp)
{
	mp->schedlink = runtime·sched.midle;
	runtime·sched.midle = mp;
	runtime·sched.nmidle++;
	checkdead();
}

// Try to get an m from midle list.
// Sched must be locked.
static M*
mget(void)
{
	M *mp;

	if((mp = runtime·sched.midle) != nil){
		runtime·sched.midle = mp->schedlink;
		runtime·sched.nmidle--;
	}
	return mp;
}

// Put gp on the global runnable queue.
// Sched must be locked.
static void
globrunqput(G *gp)
{
	gp->schedlink = nil;
	if(runtime·sched.runqtail)
		runtime·sched.runqtail->schedlink = gp;
	else
		runtime·sched.runqhead = gp;
	runtime·sched.runqtail = gp;
	runtime·sched.runqsize++;
}

// Put a batch of runnable goroutines on the global runnable queue.
// Sched must be locked.
static void
globrunqputbatch(G *ghead, G *gtail, int32 n)
{
	gtail->schedlink = nil;
	if(runtime·sched.runqtail)
		runtime·sched.runqtail->schedlink = ghead;
	else
		runtime·sched.runqhead = ghead;
	runtime·sched.runqtail = gtail;
	runtime·sched.runqsize += n;
}

// Try get a batch of G's from the global runnable queue.
// Sched must be locked.
static G*
globrunqget(P *p, int32 max)
{
	G *gp, *gp1;
	int32 n;

	if(runtime·sched.runqsize == 0)
		return nil;
	n = runtime·sched.runqsize/runtime·gomaxprocs+1;
	if(n > runtime·sched.runqsize)
		n = runtime·sched.runqsize;
	if(max > 0 && n > max)
		n = max;
	if(n > nelem(p->runq)/2)
		n = nelem(p->runq)/2;
	runtime·sched.runqsize -= n;
	if(runtime·sched.runqsize == 0)
		runtime·sched.runqtail = nil;
	gp = runtime·sched.runqhead;
	runtime·sched.runqhead = gp->schedlink;
	n--;
	while(n--) {
		gp1 = runtime·sched.runqhead;
		runtime·sched.runqhead = gp1->schedlink;
		runqput(p, gp1);
	}
	return gp;
}

// Put p to on pidle list.
// Sched must be locked.
static void
pidleput(P *p)
{
	p->link = runtime·sched.pidle;
	runtime·sched.pidle = p;
	runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
}

// Try get a p from pidle list.
// Sched must be locked.
static P*
pidleget(void)
{
	P *p;

	p = runtime·sched.pidle;
	if(p) {
		runtime·sched.pidle = p->link;
		runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
	}
	return p;
}

// Try to put g on local runnable queue.
// If it's full, put onto global queue.
// Executed only by the owner P.
static void
runqput(P *p, G *gp)
{
	uint32 h, t;

retry:
	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
	t = p->runqtail;
	if(t - h < nelem(p->runq)) {
		p->runq[t%nelem(p->runq)] = gp;
		runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
		return;
	}
	if(runqputslow(p, gp, h, t))
		return;
	// the queue is not full, now the put above must suceed
	goto retry;
}

// Put g and a batch of work from local runnable queue on global queue.
// Executed only by the owner P.
static bool
runqputslow(P *p, G *gp, uint32 h, uint32 t)
{
	G *batch[nelem(p->runq)/2+1];
	uint32 n, i;

	// First, grab a batch from local queue.
	n = t-h;
	n = n/2;
	if(n != nelem(p->runq)/2)
		runtime·throw("runqputslow: queue is not full");
	for(i=0; i<n; i++)
		batch[i] = p->runq[(h+i)%nelem(p->runq)];
	if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
		return false;
	batch[n] = gp;
	// Link the goroutines.
	for(i=0; i<n; i++)
		batch[i]->schedlink = batch[i+1];
	// Now put the batch on global queue.
	runtime·lock(&runtime·sched);
	globrunqputbatch(batch[0], batch[n], n+1);
	runtime·unlock(&runtime·sched);
	return true;
}

// Get g from local runnable queue.
// Executed only by the owner P.
static G*
runqget(P *p)
{
	G *gp;
	uint32 t, h;

	for(;;) {
		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
		t = p->runqtail;
		if(t == h)
			return nil;
		gp = p->runq[h%nelem(p->runq)];
		if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume
			return gp;
	}
}

// Grabs a batch of goroutines from local runnable queue.
// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
// Can be executed by any P.
static uint32
runqgrab(P *p, G **batch)
{
	uint32 t, h, n, i;

	for(;;) {
		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
		t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
		n = t-h;
		n = n - n/2;
		if(n == 0)
			break;
		if(n > nelem(p->runq)/2)  // read inconsistent h and t
			continue;
		for(i=0; i<n; i++)
			batch[i] = p->runq[(h+i)%nelem(p->runq)];
		if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
			break;
	}
	return n;
}

// Steal half of elements from local runnable queue of p2
// and put onto local runnable queue of p.
// Returns one of the stolen elements (or nil if failed).
static G*
runqsteal(P *p, P *p2)
{
	G *gp;
	G *batch[nelem(p->runq)/2];
	uint32 t, h, n, i;

	n = runqgrab(p2, batch);
	if(n == 0)
		return nil;
	n--;
	gp = batch[n];
	if(n == 0)
		return gp;
	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
	t = p->runqtail;
	if(t - h + n >= nelem(p->runq))
		runtime·throw("runqsteal: runq overflow");
	for(i=0; i<n; i++, t++)
		p->runq[t%nelem(p->runq)] = batch[i];
	runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
	return gp;
}

void
runtime·testSchedLocalQueue(void)
{
	P p;
	G gs[nelem(p.runq)];
	int32 i, j;

	runtime·memclr((byte*)&p, sizeof(p));

	for(i = 0; i < nelem(gs); i++) {
		if(runqget(&p) != nil)
			runtime·throw("runq is not empty initially");
		for(j = 0; j < i; j++)
			runqput(&p, &gs[i]);
		for(j = 0; j < i; j++) {
			if(runqget(&p) != &gs[i]) {
				runtime·printf("bad element at iter %d/%d\n", i, j);
				runtime·throw("bad element");
			}
		}
		if(runqget(&p) != nil)
			runtime·throw("runq is not empty afterwards");
	}
}

void
runtime·testSchedLocalQueueSteal(void)
{
	P p1, p2;
	G gs[nelem(p1.runq)], *gp;
	int32 i, j, s;

	runtime·memclr((byte*)&p1, sizeof(p1));
	runtime·memclr((byte*)&p2, sizeof(p2));

	for(i = 0; i < nelem(gs); i++) {
		for(j = 0; j < i; j++) {
			gs[j].sig = 0;
			runqput(&p1, &gs[j]);
		}
		gp = runqsteal(&p2, &p1);
		s = 0;
		if(gp) {
			s++;
			gp->sig++;
		}
		while(gp = runqget(&p2)) {
			s++;
			gp->sig++;
		}
		while(gp = runqget(&p1))
			gp->sig++;
		for(j = 0; j < i; j++) {
			if(gs[j].sig != 1) {
				runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
				runtime·throw("bad element");
			}
		}
		if(s != i/2 && s != i/2+1) {
			runtime·printf("bad steal %d, want %d or %d, iter %d\n",
				s, i/2, i/2+1, i);
			runtime·throw("bad steal");
		}
	}
}

extern void runtime·morestack(void);

// Does f mark the top of a goroutine stack?
bool
runtime·topofstack(Func *f)
{
	return f->entry == (uintptr)runtime·goexit ||
		f->entry == (uintptr)runtime·mstart ||
		f->entry == (uintptr)runtime·mcall ||
		f->entry == (uintptr)runtime·morestack ||
		f->entry == (uintptr)runtime·lessstack ||
		f->entry == (uintptr)_rt0_go;
}

void
runtime∕debug·setMaxThreads(intgo in, intgo out)
{
	runtime·lock(&runtime·sched);
	out = runtime·sched.maxmcount;
	runtime·sched.maxmcount = in;
	checkmcount();
	runtime·unlock(&runtime·sched);
	FLUSH(&out);
}

static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h

static bool
haveexperiment(int8 *name)
{
	int32 i, j;
	
	for(i=0; i<sizeof(experiment); i++) {
		if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
			for(j=0; name[j]; j++)
				if(experiment[i+j] != name[j])
					goto nomatch;
			if(experiment[i+j] != '\0' && experiment[i+j] != ',')
				goto nomatch;
			return 1;
		}
	nomatch:;
	}
	return 0;
}

// func runtime_procPin() int
void
sync·runtime_procPin(intgo p)
{
	M *mp;

	mp = m;
	// Disable preemption.
	mp->locks++;
	p = mp->p->id;
	FLUSH(&p);
}

// func runtime_procUnpin()
void
sync·runtime_procUnpin(void)
{
	m->locks--;
}
-												synch chan

SVN=127057

											
										
										
											2008-07-14 15:34:27 -06:00
+								// Copyright 2009 The Go Authors. All rights reserved.
 								// Use of this source code is governed by a BSD-style
 								// license that can be found in the LICENSE file.
 								#include "runtime.h"
-												runtime: make more build-friendly

Collapse the arch,os-specific directories into the main directory
by renaming xxx/foo.c to foo_xxx.c, and so on.

There are no substantial edits here, except to the Makefile.
The assumption is that the Go tool will #define GOOS_darwin
and GOARCH_amd64 and will make any file named something
like signals_darwin.h available as signals_GOOS.h during the
build.  This replaces what used to be done with -I$(GOOS).

There is still work to be done to make runtime build with
standard tools, but this is a big step.  After this we will have
to write a script to generate all the generated files so they
can be checked in (instead of generated during the build).

R=r, iant, r, lucio.dere
CC=golang-dev
https://golang.org/cl/5490053

											
										
										
											2011-12-16 13:33:58 -07:00
+								#include "arch_GOARCH.h"
-												build: disable precise collection of stack frames

The code for call site-specific pointer bitmaps was not ready in time,
but the zeroing required without it is too expensive to use by default.
We will have to wait for precise collection of stack frames until Go 1.3.

The precise collection can be re-enabled by

        GOEXPERIMENT=precisestack ./all.bash

but that will not be the default for a Go 1.2 build.

Fixes #6087.

R=golang-dev, jeremyjackins, dan.kortschak, r
CC=golang-dev
https://golang.org/cl/13677045

											
										
										
											2013-09-16 18:26:10 -06:00
+								#include "zaexperiment.h"
-												gc #0.  mark and sweep collector.

R=r,gri
DELTA=472  (423 added, 2 deleted, 47 changed)
OCL=23522
CL=23541

											
										
										
											2009-01-26 18:37:05 -07:00
+								#include "malloc.h"
-												ld: detect stack overflow due to NOSPLIT

Fix problems found.

On amd64, various library routines had bigger
stack frames than expected, because large function
calls had been added.

runtime.assertI2T: nosplit stack overflow
        120	assumed on entry to runtime.assertI2T
        8	after runtime.assertI2T uses 112
        0	on entry to runtime.newTypeAssertionError
        -8	on entry to runtime.morestack01

runtime.assertE2E: nosplit stack overflow
        120	assumed on entry to runtime.assertE2E
        16	after runtime.assertE2E uses 104
        8	on entry to runtime.panic
        0	on entry to runtime.morestack16
        -8	after runtime.morestack16 uses 8

runtime.assertE2T: nosplit stack overflow
        120	assumed on entry to runtime.assertE2T
        16	after runtime.assertE2T uses 104
        8	on entry to runtime.panic
        0	on entry to runtime.morestack16
        -8	after runtime.morestack16 uses 8

runtime.newselect: nosplit stack overflow
        120	assumed on entry to runtime.newselect
        56	after runtime.newselect uses 64
        48	on entry to runtime.printf
        8	after runtime.printf uses 40
        0	on entry to vprintf
        -8	on entry to runtime.morestack16

runtime.selectdefault: nosplit stack overflow
        120	assumed on entry to runtime.selectdefault
        56	after runtime.selectdefault uses 64
        48	on entry to runtime.printf
        8	after runtime.printf uses 40
        0	on entry to vprintf
        -8	on entry to runtime.morestack16

runtime.selectgo: nosplit stack overflow
        120	assumed on entry to runtime.selectgo
        0	after runtime.selectgo uses 120
        -8	on entry to runtime.gosched

On arm, 5c was tagging functions NOSPLIT that should
not have been, like the recursive function printpanics:

printpanics: nosplit stack overflow
        124	assumed on entry to printpanics
        112	after printpanics uses 12
        108	on entry to printpanics
        96	after printpanics uses 12
        92	on entry to printpanics
        80	after printpanics uses 12
        76	on entry to printpanics
        64	after printpanics uses 12
        60	on entry to printpanics
        48	after printpanics uses 12
        44	on entry to printpanics
        32	after printpanics uses 12
        28	on entry to printpanics
        16	after printpanics uses 12
        12	on entry to printpanics
        0	after printpanics uses 12
        -4	on entry to printpanics

R=r, r2
CC=golang-dev
https://golang.org/cl/4188061

											
										
										
											2011-02-22 15:40:40 -07:00
+								#include "stack.h"
-												race: runtime changes
This is a part of a bigger change that adds data race detection feature:
https://golang.org/cl/6456044

R=rsc
CC=gobot, golang-dev
https://golang.org/cl/6535050

											
										
										
											2012-10-07 12:05:32 -06:00
+								#include "race.h"
-												runtime: store types of allocated objects

R=rsc
CC=golang-dev
https://golang.org/cl/6569057

											
										
										
											2012-10-21 15:41:32 -06:00
+								#include "type.h"
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#include "../../cmd/ld/textflag.h"
-												synch chan

SVN=127057

											
										
										
											2008-07-14 15:34:27 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Goroutine scheduler
 								// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								//
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// The main concepts are:
 								// G - goroutine.
 								// M - worker thread, or machine.
 								// P - processor, a resource that is required to execute Go code.
 								//     M must have an associated P to execute Go code, however it can be
 								//     blocked or in a syscall w/o an associated P.
-												runtime: add link to design doc for new scheduler

R=golang-dev, remyoudompheng, bradfitz
CC=golang-dev
https://golang.org/cl/7419049

											
										
										
											2013-03-04 08:36:45 -07:00
+								//
 								// Design doc at http://golang.org/s/go11sched.
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								typedef struct Sched Sched;
-												first cut at multithreading.  works on Linux.

* kick off new os procs (machs) as needed
* add sys·sleep for testing
* add Lock, Rendez
* properly lock mal, sys·newproc, scheduler
* linux syscall arg #4 is in R10, not CX
* chans are not multithread-safe yet
* multithreading disabled by default;
  set $gomaxprocs=2 (or 1000) to turn it on

This should build on OS X but may not.
Rob and I will fix soon after submitting.

TBR=r
OCL=13784
CL=13842

											
										
										
											2008-08-04 17:43:49 -06:00
+								struct Sched {
 									Lock;
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									uint64	goidgen;
-												kill trailing white space.
(apparently my first attempt didn't work.)

R=r
OCL=13888
CL=13888

											
										
										
											2008-08-05 15:21:42 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									M*	midle;	 // idle m's waiting for work
 									int32	nmidle;	 // number of idle m's waiting for work
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+									int32	nmidlelocked; // number of locked m's waiting for work
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									int32	mcount;	 // number of m's that have been created
-												runtime: impose thread count limit

Actually working to stay within the limit could cause subtle deadlocks.
Crashing avoids the subtlety.

Fixes #4056.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/13037043

											
										
										
											2013-08-16 20:25:26 -06:00
+									int32	maxmcount;	// maximum number of m's allowed (or die)
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
 									P*	pidle;  // idle P's
 									uint32	npidle;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									uint32	nmspinning;
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
 									// Global runnable queue.
 									G*	runqhead;
 									G*	runqtail;
 									int32	runqsize;
 									// Global cache of dead G's.
 									Lock	gflock;
 									G*	gfree;
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									uint32	gcwaiting;	// gc is waiting to run
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									int32	stopwait;
 									Note	stopnote;
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									uint32	sysmonwait;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									Note	sysmonnote;
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									uint64	lastpoll;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									int32	profilehz;	// cpu profiling rate
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
+								};
-												runtime: allocate goroutine ids in batches
Helps reduce contention on sched.goidgen.

benchmark                               old ns/op    new ns/op    delta
BenchmarkCreateGoroutines-16                  259          237   -8.49%
BenchmarkCreateGoroutinesParallel-16          127           43  -66.06%

R=golang-codereviews, dave, bradfitz, khr
CC=golang-codereviews, rsc
https://golang.org/cl/46970043

											
										
										
											2014-01-21 23:34:36 -07:00
+								enum
 								{
 									// The max value of GOMAXPROCS.
 									// There are no fundamental restrictions on the value.
 									MaxGomaxprocs = 1<<8,
 									// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
 									// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 									GoidCacheBatch = 16,
 								};
-												first cut at multithreading.  works on Linux.

* kick off new os procs (machs) as needed
* add sys·sleep for testing
* add Lock, Rendez
* properly lock mal, sys·newproc, scheduler
* linux syscall arg #4 is in R10, not CX
* chans are not multithread-safe yet
* multithreading disabled by default;
  set $gomaxprocs=2 (or 1000) to turn it on

This should build on OS X but may not.
Rob and I will fix soon after submitting.

TBR=r
OCL=13784
CL=13842

											
										
										
											2008-08-04 17:43:49 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								Sched	runtime·sched;
 								int32	runtime·gomaxprocs;
-												runtime: detect deadlocks in programs using cgo
When cgo is used, runtime creates an additional M to handle callbacks on threads not created by Go.
This effectively disabled deadlock detection, which is a right thing, because Go program can be blocked
and only serve callbacks on external threads.
This also disables deadlock detection under race detector, because it happens to use cgo.
With this change the additional M is created lazily on first cgo call. So deadlock detector
works for programs that import "C", "net" or "net/http/pprof" but do not use them in fact.
Also fixes deadlock detector under race detector.
It should be fine to create the M later, because C code can not call into Go before first cgo call,
because C code does not know when Go initialization has completed. So a Go program need to call into C
first either to create an external thread, or notify a thread created in global ctor that Go
initialization has completed.
Fixes #4973.
Fixes #5475.

R=golang-dev, minux.ma, iant
CC=golang-dev
https://golang.org/cl/9303046

											
										
										
											2013-05-22 12:57:47 -06:00
+								uint32	runtime·needextram;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								bool	runtime·iscgo;
 								M	runtime·m0;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+								G	runtime·g0;	// idle goroutine for m0
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								G*	runtime·lastg;
 								M*	runtime·allm;
 								M*	runtime·extram;
 								int8*	runtime·goos;
 								int32	runtime·ncpu;
 								static int32	newprocs;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+								static	Lock allglock;	// the following vars are protected by this lock or by stoptheworld
 								G**	runtime·allg;
 								uintptr runtime·allglen;
 								static	uintptr allgcap;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								void runtime·mstart(void);
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								static void runqput(P*, G*);
 								static G* runqget(P*);
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+								static bool runqputslow(P*, G*, uint32, uint32);
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								static G* runqsteal(P*, P*);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static void mput(M*);
 								static M* mget(void);
 								static void mcommoninit(M*);
 								static void schedule(void);
 								static void procresize(int32);
 								static void acquirep(P*);
 								static P* releasep(void);
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+								static void newm(void(*)(void), P*);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static void stopm(void);
 								static void startm(P*, bool);
 								static void handoffp(P*);
 								static void wakep(void);
 								static void stoplockedm(void);
 								static void startlockedm(G*);
 								static void sysmon(void);
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+								static uint32 retake(int64);
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+								static void incidlelocked(int32);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static void checkdead(void);
 								static void exitsyscall0(G*);
 								static void park0(G*);
 								static void goexit0(G*);
 								static void gfput(P*, G*);
 								static G* gfget(P*);
 								static void gfpurge(P*);
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								static void globrunqput(G*);
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+								static void globrunqputbatch(G*, G*, int32);
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+								static G* globrunqget(P*, int32);
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								static P* pidleget(void);
 								static void pidleput(P*);
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+								static void injectglist(G*);
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+								static bool preemptall(void);
 								static bool preemptone(P*);
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+								static bool exitsyscallfast(void);
-												build: disable precise collection of stack frames

The code for call site-specific pointer bitmaps was not ready in time,
but the zeroing required without it is too expensive to use by default.
We will have to wait for precise collection of stack frames until Go 1.3.

The precise collection can be re-enabled by

        GOEXPERIMENT=precisestack ./all.bash

but that will not be the default for a Go 1.2 build.

Fixes #6087.

R=golang-dev, jeremyjackins, dan.kortschak, r
CC=golang-dev
https://golang.org/cl/13677045

											
										
										
											2013-09-16 18:26:10 -06:00
+								static bool haveexperiment(int8*);
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+								static void allgadd(G*);
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
-												proper handling of signals.
do not run init on g0.

R=r
DELTA=161  (124 added, 23 deleted, 14 changed)
OCL=15490
CL=15497

											
										
										
											2008-09-18 16:56:46 -06:00
+								// The bootstrap sequence is:
 								//
 								//	call osinit
 								//	call schedinit
 								//	make & queue new G
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								//	call runtime·mstart
-												proper handling of signals.
do not run init on g0.

R=r
DELTA=161  (124 added, 23 deleted, 14 changed)
OCL=15490
CL=15497

											
										
										
											2008-09-18 16:56:46 -06:00
+								//
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+								// The new G calls runtime·main.
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·schedinit(void)
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								{
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									int32 n, procs;
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+									byte *p;
-												runtime: avoid allocation of internal panic values

If a fault happens in malloc, inevitably the next thing that happens
is a deadlock trying to allocate the panic value that says the fault
happened. Stop doing that, two ways.

First, reject panic in malloc just as we reject panic in garbage collection.

Second, runtime.panicstring was using an error implementation
backed by a Go string, so the interface held an allocated *string.
Since the actual errors are C strings, define a new error
implementation backed by a C char*, which needs no indirection
and therefore no allocation.

This second fix will avoid allocation for errors like nil panic derefs
or division by zero, so it is worth doing even though the first fix
should take care of faults during malloc.

Update #6419

R=golang-dev, dvyukov, dave
CC=golang-dev
https://golang.org/cl/13774043

											
										
										
											2013-09-20 13:15:25 -06:00
+									Eface i;
-												stack overflow debugging and fix.

  * in 6l, -K already meant check for stack underflow.
    add -KK to mean double-check stack overflows
    even in nosplit functions.

  * comment out print locks; they deadlock too easily
     but are still useful to put back for special occasions.

  * let runcgo assembly switch to scheduler stack
    without involving scheduler directly.  because runcgo
    gets called from matchmg, it is too hard to keep it
    from being called on other stacks.

R=r
DELTA=94  (65 added, 18 deleted, 11 changed)
OCL=35591
CL=35604

											
										
										
											2009-10-12 11:26:38 -06:00
-												runtime: impose thread count limit

Actually working to stay within the limit could cause subtle deadlocks.
Crashing avoids the subtlety.

Fixes #4056.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/13037043

											
										
										
											2013-08-16 20:25:26 -06:00
+									runtime·sched.maxmcount = 10000;
-												build: disable precise collection of stack frames

The code for call site-specific pointer bitmaps was not ready in time,
but the zeroing required without it is too expensive to use by default.
We will have to wait for precise collection of stack frames until Go 1.3.

The precise collection can be re-enabled by

        GOEXPERIMENT=precisestack ./all.bash

but that will not be the default for a Go 1.2 build.

Fixes #6087.

R=golang-dev, jeremyjackins, dan.kortschak, r
CC=golang-dev
https://golang.org/cl/13677045

											
										
										
											2013-09-16 18:26:10 -06:00
+									runtime·precisestack = haveexperiment("precisestack");
-												runtime: impose thread count limit

Actually working to stay within the limit could cause subtle deadlocks.
Crashing avoids the subtlety.

Fixes #4056.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/13037043

											
										
										
											2013-08-16 20:25:26 -06:00
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+									runtime·mallocinit();
-												runtime: eliminate contention during stack allocation
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1045.00       949.00   -9.19%
BenchmarkStackGrowth-2                             3450.00       800.00  -76.81%
BenchmarkStackGrowth-4                             5076.00       513.00  -89.89%
BenchmarkStackGrowth-8                             7805.00       471.00  -93.97%
BenchmarkStackGrowth-16                           11751.00       321.00  -97.27%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091

											
										
										
											2011-07-12 10:24:32 -06:00
+									mcommoninit(m);
-												runtime: avoid allocation of internal panic values

If a fault happens in malloc, inevitably the next thing that happens
is a deadlock trying to allocate the panic value that says the fault
happened. Stop doing that, two ways.

First, reject panic in malloc just as we reject panic in garbage collection.

Second, runtime.panicstring was using an error implementation
backed by a Go string, so the interface held an allocated *string.
Since the actual errors are C strings, define a new error
implementation backed by a C char*, which needs no indirection
and therefore no allocation.

This second fix will avoid allocation for errors like nil panic derefs
or division by zero, so it is worth doing even though the first fix
should take care of faults during malloc.

Update #6419

R=golang-dev, dvyukov, dave
CC=golang-dev
https://golang.org/cl/13774043

											
										
										
											2013-09-20 13:15:25 -06:00
 									// Initialize the itable value for newErrorCString,
 									// so that the next time it gets called, possibly
 									// in a fault during a garbage collection, it will not
 									// need to allocated memory.
 									runtime·newErrorCString(0, &i);
-												runtime: eliminate contention during stack allocation
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1045.00       949.00   -9.19%
BenchmarkStackGrowth-2                             3450.00       800.00  -76.81%
BenchmarkStackGrowth-4                             5076.00       513.00  -89.89%
BenchmarkStackGrowth-8                             7805.00       471.00  -93.97%
BenchmarkStackGrowth-16                           11751.00       321.00  -97.27%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091

											
										
										
											2011-07-12 10:24:32 -06:00
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+									runtime·goargs();
-												runtime: revert 6974:1f3c3696babb

I missed that environment is used during runtime setup,
well before go init() functions run. Implemented os-dependent
runtime.goenvs functions to allow for different unix, plan9 and
windows versions of environment discovery.

R=rsc, paulzhol
CC=golang-dev
https://golang.org/cl/3787046

											
										
										
											2011-01-11 17:48:15 -07:00
+									runtime·goenvs();
-												runtime: introduce GODEBUG env var
Currently it replaces GOGCTRACE env var (GODEBUG=gctrace=1).
The plan is to extend it with other type of debug tracing,
e.g. GODEBUG=gctrace=1,schedtrace=100.

R=rsc
CC=bradfitz, daniel.morsing, gobot, golang-dev
https://golang.org/cl/10026045

											
										
										
											2013-06-28 08:37:06 -06:00
+									runtime·parsedebugvars();
-												casify, cleanup sys

R=r
OCL=22978
CL=22984

											
										
										
											2009-01-16 15:58:14 -07:00
-												runtime: allocate internal symbol table eagerly
we need it for GC anyway.

R=golang-dev, khr, dave, khr
CC=golang-dev
https://golang.org/cl/9728044

											
										
										
											2013-05-28 11:10:10 -06:00
+									// Allocate internal symbol table representation now, we need it for GC anyway.
 									runtime·symtabinit();
-												malloc in runtime (not used by default)

R=r
DELTA=1551  (1550 added, 0 deleted, 1 changed)
OCL=21404
CL=21538

											
										
										
											2008-12-18 16:42:28 -07:00
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									runtime·sched.lastpoll = runtime·nanotime();
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									procs = 1;
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+									p = runtime·getenv("GOMAXPROCS");
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(p != nil && (n = runtime·atoi(p)) > 0) {
 										if(n > MaxGomaxprocs)
 											n = MaxGomaxprocs;
 										procs = n;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
+									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·allp = runtime·malloc((MaxGomaxprocs+1)*sizeof(runtime·allp[0]));
 									procresize(procs);
-												runtime: include bootstrap m in mcpu accounting

R=rsc, dvyukov
CC=golang-dev
https://golang.org/cl/5307052

											
										
										
											2011-10-25 01:35:20 -06:00
-												runtime: run goroutines during init

Fixes #583.
Fixes #1776.
Fixes #2001.
Fixes #2112.

R=golang-dev, bradfitz, r, gri
CC=golang-dev
https://golang.org/cl/5265044

											
										
										
											2011-10-13 13:54:23 -06:00
+									mstats.enablegc = 1;
-												race: runtime changes
This is a part of a bigger change that adds data race detection feature:
https://golang.org/cl/6456044

R=rsc
CC=gobot, golang-dev
https://golang.org/cl/6535050

											
										
										
											2012-10-07 12:05:32 -06:00
 									if(raceenabled)
-												runtime/race: switch to explicit race context instead of goroutine id's
Removes limit on maximum number of goroutines ever existed.
code.google.com/p/goexecutor tests now pass successfully.
Also slightly improves performance.
Before: $ time ./flate.test -test.short
real	0m9.314s
After:  $ time ./flate.test -test.short
real	0m8.958s
Fixes #4286.
The runtime is built from llvm rev 174312.

R=rsc
CC=golang-dev
https://golang.org/cl/7218044

											
										
										
											2013-02-06 00:40:54 -07:00
+										g->racectx = runtime·raceinit();
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								}
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+								extern void main·init(void);
 								extern void main·main(void);
-												cmd/gc, reflect, runtime: switch to indirect func value representation

Step 1 of http://golang.org/s/go11func.

R=golang-dev, r, daniel.morsing, remyoudompheng
CC=golang-dev
https://golang.org/cl/7393045

											
										
										
											2013-02-21 15:01:13 -07:00
+								static FuncVal scavenger = {runtime·MHeap_Scavenger};
-												runtime: handle runtime.Goexit during init

Fixes #5963.

R=golang-dev, dsymonds, dvyukov
CC=golang-dev
https://golang.org/cl/11879045

											
										
										
											2013-07-26 11:54:44 -06:00
+								static FuncVal initDone = { runtime·unlockOSThread };
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+								// The main goroutine.
 								void
 								runtime·main(void)
 								{
-												runtime: handle runtime.Goexit during init

Fixes #5963.

R=golang-dev, dsymonds, dvyukov
CC=golang-dev
https://golang.org/cl/11879045

											
										
										
											2013-07-26 11:54:44 -06:00
+									Defer d;
-												runtime: impose stack size limit

The goal is to stop only those programs that would keep
going and run the machine out of memory, but before they do that.
1 GB on 64-bit, 250 MB on 32-bit.
That seems implausibly large, and it can be adjusted.

Fixes #2556.
Fixes #4494.
Fixes #5173.

R=khr, r, dvyukov
CC=golang-dev
https://golang.org/cl/12541052

											
										
										
											2013-08-15 20:34:06 -06:00
 									// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
 									// Using decimal instead of binary GB and MB because
 									// they look nicer in the stack overflow failure message.
 									if(sizeof(void*) == 8)
 										runtime·maxstacksize = 1000000000;
 									else
 										runtime·maxstacksize = 250000000;
-												runtime: handle runtime.Goexit during init

Fixes #5963.

R=golang-dev, dsymonds, dvyukov
CC=golang-dev
https://golang.org/cl/11879045

											
										
										
											2013-07-26 11:54:44 -06:00
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+									newm(sysmon, nil);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+									// Lock the main goroutine onto this, the main OS thread,
 									// during initialization.  Most programs won't care, but a few
 									// do require certain calls to be made by the main thread.
 									// Those can arrange for main.main to run in the main thread
 									// by calling runtime.LockOSThread during initialization
 									// to preserve the lock.
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+									runtime·lockOSThread();
-												runtime: handle runtime.Goexit during init

Fixes #5963.

R=golang-dev, dsymonds, dvyukov
CC=golang-dev
https://golang.org/cl/11879045

											
										
										
											2013-07-26 11:54:44 -06:00
 									// Defer unlock so that runtime.Goexit during init does the unlock too.
 									d.fn = &initDone;
 									d.siz = 0;
 									d.link = g->defer;
 									d.argp = (void*)-1;
 									d.special = true;
 									g->defer = &d;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									if(m != &runtime·m0)
 										runtime·throw("runtime·main not on m0");
-												runtime: fix deadlock detector false negative
The issue was that scvg is assigned *after* the scavenger goroutine is started,
so when the scavenger calls entersyscall() the g==scvg check can fail.
Fixes #5025.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7629045

											
										
										
											2013-03-12 07:21:44 -06:00
+									runtime·newproc1(&scavenger, nil, 0, 0, runtime·main);
-												runtime: fix deadlock detector false negative
Fixes #4819.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7322086

											
										
										
											2013-02-20 01:15:02 -07:00
+									main·init();
-												runtime: handle runtime.Goexit during init

Fixes #5963.

R=golang-dev, dsymonds, dvyukov
CC=golang-dev
https://golang.org/cl/11879045

											
										
										
											2013-07-26 11:54:44 -06:00
 									if(g->defer != &d || d.fn != &initDone)
 										runtime·throw("runtime: bad defer entry after init");
 									g->defer = d.link;
-												runtime: fix deadlock detector false negative
Fixes #4819.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7322086

											
										
										
											2013-02-20 01:15:02 -07:00
+									runtime·unlockOSThread();
-												runtime: work around false negative in deadlock detection

Not a complete fix for issue 3342, but fixes the trivial case.
There may still be a race in the instants before and after
a scavenger-induced garbage collection.

Intended to be "obviously safe": a call to runtime·gosched
before main.main is no different than a call to runtime.Gosched
at the beginning of main.main, and it is (or had better be)
safe to call runtime.Gosched at any point during main.

Update #3342.

R=iant
CC=golang-dev
https://golang.org/cl/5919052

											
										
										
											2012-03-27 10:22:19 -06:00
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+									main·main();
-												race: runtime changes
This is a part of a bigger change that adds data race detection feature:
https://golang.org/cl/6456044

R=rsc
CC=gobot, golang-dev
https://golang.org/cl/6535050

											
										
										
											2012-10-07 12:05:32 -06:00
+									if(raceenabled)
 										runtime·racefini();
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
-												runtime: make return from main wait for active panic

Arguably if this happens the program is buggy anyway,
but letting the panic continue looks better than interrupting it.
Otherwise things like this are possible, and confusing:

$ go run x.go
panic: $ echo $?
0
$

Fixes #3934.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/7322083

											
										
										
											2013-02-15 12:48:35 -07:00
+									// Make racy client program work: if panicking on
 									// another goroutine at the same time as main returns,
 									// let the other goroutine finish printing the panic trace.
 									// Once it does, it will exit. See issue 3934.
 									if(runtime·panicking)
 										runtime·park(nil, nil, "panicwait");
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+									runtime·exit(0);
 									for(;;)
 										*(int32*)runtime·main = 0;
 								}
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
+								void
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+								runtime·goroutineheader(G *gp)
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
+								{
 									int8 *status;
-												runtime: output how long goroutines are blocked
Example of output:

goroutine 4 [sleep for 3 min]:
time.Sleep(0x34630b8a000)
        src/pkg/runtime/time.goc:31 +0x31
main.func·002()
        block.go:16 +0x2c
created by main.main
        block.go:17 +0x33

Full program and output are here:
http://play.golang.org/p/NEZdADI3Td

Fixes #6809.

R=golang-codereviews, khr, kamil.kisiel, bradfitz, rsc
CC=golang-codereviews
https://golang.org/cl/50420043

											
										
										
											2014-01-16 01:54:46 -07:00
+									int64 waitfor;
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									switch(gp->status) {
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
+									case Gidle:
 										status = "idle";
 										break;
 									case Grunnable:
 										status = "runnable";
 										break;
 									case Grunning:
 										status = "running";
 										break;
 									case Gsyscall:
 										status = "syscall";
 										break;
 									case Gwaiting:
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+										if(gp->waitreason)
 											status = gp->waitreason;
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
+										else
 											status = "waiting";
 										break;
 									default:
 										status = "???";
 										break;
 									}
-												runtime: output how long goroutines are blocked
Example of output:

goroutine 4 [sleep for 3 min]:
time.Sleep(0x34630b8a000)
        src/pkg/runtime/time.goc:31 +0x31
main.func·002()
        block.go:16 +0x2c
created by main.main
        block.go:17 +0x33

Full program and output are here:
http://play.golang.org/p/NEZdADI3Td

Fixes #6809.

R=golang-codereviews, khr, kamil.kisiel, bradfitz, rsc
CC=golang-codereviews
https://golang.org/cl/50420043

											
										
										
											2014-01-16 01:54:46 -07:00
 									// approx time the G is blocked, in minutes
 									waitfor = 0;
 									if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
 										waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
 									if(waitfor < 1)
 										runtime·printf("goroutine %D [%s]:\n", gp->goid, status);
 									else
 										runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
+								}
-												print tracebacks for all goroutines, not just the crashing one

R=ken
OCL=13518
CL=13518

											
										
										
											2008-07-28 12:29:41 -06:00
+								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·tracebackothers(G *me)
-												print tracebacks for all goroutines, not just the crashing one

R=ken
OCL=13518
CL=13518

											
										
										
											2008-07-28 12:29:41 -06:00
+								{
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									G *gp;
-												runtime: dump the full stack of a throwing goroutine
Useful for debugging of runtime bugs.
+ Do not print "stack segment boundary" unless GOTRACEBACK>1.
+ Do not traceback system goroutines unless GOTRACEBACK>1.

R=rsc, minux.ma
CC=golang-dev
https://golang.org/cl/7098050

											
										
										
											2013-01-29 03:57:11 -07:00
+									int32 traceback;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									uintptr i;
-												print tracebacks for all goroutines, not just the crashing one

R=ken
OCL=13518
CL=13518

											
										
										
											2008-07-28 12:29:41 -06:00
-												runtime: accept GOTRACEBACK=crash to mean 'crash after panic'

This provides a way to generate core dumps when people need them.
The settings are:

        GOTRACEBACK=0  no traceback on panic, just exit
        GOTRACEBACK=1  default - traceback on panic, then exit
        GOTRACEBACK=2  traceback including runtime frames on panic, then exit
        GOTRACEBACK=crash traceback including runtime frames on panic, then crash

Fixes #3257.

R=golang-dev, devon.odell, r, daniel.morsing, ality
CC=golang-dev
https://golang.org/cl/7666044

											
										
										
											2013-03-14 23:11:03 -06:00
+									traceback = runtime·gotraceback(nil);
-												runtime: record proper goroutine state during stack split

Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048

											
										
										
											2013-06-27 09:32:01 -06:00
 									// Show the current goroutine first, if we haven't already.
 									if((gp = m->curg) != nil && gp != me) {
 										runtime·printf("\n");
 										runtime·goroutineheader(gp);
-												runtime: fix crash in runtime.GoroutineProfile

This is a possible Go 1.2.1 candidate.

Fixes #6946.

R=iant, r
CC=golang-dev
https://golang.org/cl/41640043

											
										
										
											2013-12-13 13:44:57 -07:00
+										runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
-												runtime: record proper goroutine state during stack split

Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048

											
										
										
											2013-06-27 09:32:01 -06:00
+									}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·lock(&allglock);
 									for(i = 0; i < runtime·allglen; i++) {
 										gp = runtime·allg[i];
-												runtime: record proper goroutine state during stack split

Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048

											
										
										
											2013-06-27 09:32:01 -06:00
+										if(gp == me || gp == m->curg || gp->status == Gdead)
-												print tracebacks for all goroutines, not just the crashing one

R=ken
OCL=13518
CL=13518

											
										
										
											2008-07-28 12:29:41 -06:00
+											continue;
-												runtime: dump the full stack of a throwing goroutine
Useful for debugging of runtime bugs.
+ Do not print "stack segment boundary" unless GOTRACEBACK>1.
+ Do not traceback system goroutines unless GOTRACEBACK>1.

R=rsc, minux.ma
CC=golang-dev
https://golang.org/cl/7098050

											
										
										
											2013-01-29 03:57:11 -07:00
+										if(gp->issystem && traceback < 2)
 											continue;
-												runtime: simplify stack traces

Make the stack traces more readable for new
Go programmers while preserving their utility for old hands.

- Change status number [4] to string.
- Elide frames in runtime package (internal details).
- Swap file:line and arguments.
- Drop 'created by' for main goroutine.
- Show goroutines in order of allocation:
  implies main goroutine first if nothing else.

There is no option to get the extra frames back.
Uncomment 'return 1' at the bottom of symtab.c.

$ 6.out
throw: all goroutines are asleep - deadlock!

goroutine 1 [chan send]:
main.main()
       /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a

goroutine 2 [select (no cases)]:
main.sel()
       /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23

goroutine 3 [chan receive]:
main.recv(0xf8400010a0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50

goroutine 4 [chan receive (nil chan)]:
main.recv(0x0, 0x0)
       /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e
created by main.main
       /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66
$

$ 6.out index
panic: runtime error: index out of range

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9
$

$ 6.out nil
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x0 pc=0x22ca]

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211
$

$ 6.out panic
panic: panic

goroutine 1 [running]:
main.main()
        /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101
$

R=golang-dev, qyzhai, n13m3y3r, r
CC=golang-dev
https://golang.org/cl/4907048

											
										
										
											2011-08-22 21:26:39 -06:00
+										runtime·printf("\n");
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+										runtime·goroutineheader(gp);
-												runtime: print "created by" for running goroutines in traceback
This allows to at least determine goroutine "identity".
Now it looks like:
goroutine 12 [running]:
        goroutine running on other thread; stack unavailable
created by testing.RunTests
        src/pkg/testing/testing.go:440 +0x88e

R=golang-dev, r, rsc
CC=golang-dev
https://golang.org/cl/12248043

											
										
										
											2013-08-01 09:28:38 -06:00
+										if(gp->status == Grunning) {
-												runtime: record proper goroutine state during stack split

Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048

											
										
										
											2013-06-27 09:32:01 -06:00
+											runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
-												runtime: print "created by" for running goroutines in traceback
This allows to at least determine goroutine "identity".
Now it looks like:
goroutine 12 [running]:
        goroutine running on other thread; stack unavailable
created by testing.RunTests
        src/pkg/testing/testing.go:440 +0x88e

R=golang-dev, r, rsc
CC=golang-dev
https://golang.org/cl/12248043

											
										
										
											2013-08-01 09:28:38 -06:00
+											runtime·printcreatedby(gp);
 										} else
-												runtime: fix crash in runtime.GoroutineProfile

This is a possible Go 1.2.1 candidate.

Fixes #6946.

R=iant, r
CC=golang-dev
https://golang.org/cl/41640043

											
										
										
											2013-12-13 13:44:57 -07:00
+											runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
-												print tracebacks for all goroutines, not just the crashing one

R=ken
OCL=13518
CL=13518

											
										
										
											2008-07-28 12:29:41 -06:00
+									}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·unlock(&allglock);
-												print tracebacks for all goroutines, not just the crashing one

R=ken
OCL=13518
CL=13518

											
										
										
											2008-07-28 12:29:41 -06:00
+								}
-												runtime: impose thread count limit

Actually working to stay within the limit could cause subtle deadlocks.
Crashing avoids the subtlety.

Fixes #4056.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/13037043

											
										
										
											2013-08-16 20:25:26 -06:00
+								static void
 								checkmcount(void)
 								{
 									// sched lock is held
 									if(runtime·sched.mcount > runtime·sched.maxmcount) {
 										runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
 										runtime·throw("thread exhaustion");
 									}
 								}
-												runtime: eliminate contention during stack allocation
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1045.00       949.00   -9.19%
BenchmarkStackGrowth-2                             3450.00       800.00  -76.81%
BenchmarkStackGrowth-4                             5076.00       513.00  -89.89%
BenchmarkStackGrowth-8                             7805.00       471.00  -93.97%
BenchmarkStackGrowth-16                           11751.00       321.00  -97.27%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091

											
										
										
											2011-07-12 10:24:32 -06:00
+								static void
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+								mcommoninit(M *mp)
-												runtime: eliminate contention during stack allocation
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1045.00       949.00   -9.19%
BenchmarkStackGrowth-2                             3450.00       800.00  -76.81%
BenchmarkStackGrowth-4                             5076.00       513.00  -89.89%
BenchmarkStackGrowth-8                             7805.00       471.00  -93.97%
BenchmarkStackGrowth-16                           11751.00       321.00  -97.27%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091

											
										
										
											2011-07-12 10:24:32 -06:00
+								{
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// If there is no mcache runtime·callers() will crash,
 									// and we are most likely in sysmon thread so the stack is senseless anyway.
 									if(m->mcache)
 										runtime·callers(1, mp->createstack, nelem(mp->createstack));
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
-												runtime, pprof: add profiling of thread creation

Same idea as heap profile: how did each thread get created?
Low memory (256 bytes per OS thread), high reward for
programs that suddenly have many threads running.

Fixes #1477.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/5639059

											
										
										
											2012-02-08 08:33:54 -07:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·lock(&runtime·sched);
 									mp->id = runtime·sched.mcount++;
-												runtime: impose thread count limit

Actually working to stay within the limit could cause subtle deadlocks.
Crashing avoids the subtlety.

Fixes #4056.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/13037043

											
										
										
											2013-08-16 20:25:26 -06:00
+									checkmcount();
-												runtime: split minit() to mpreinit() and minit()
mpreinit() is called on the parent thread and with mcache (can allocate memory),
minit() is called on the child thread and can not allocate memory.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7389043

											
										
										
											2013-02-21 05:24:38 -07:00
+									runtime·mpreinit(mp);
-												runtime, pprof: add profiling of thread creation

Same idea as heap profile: how did each thread get created?
Low memory (256 bytes per OS thread), high reward for
programs that suddenly have many threads running.

Fixes #1477.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/5639059

											
										
										
											2012-02-08 08:33:54 -07:00
+									// Add to runtime·allm so garbage collector doesn't free m
 									// when it is just in a register or thread-local storage.
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									mp->alllink = runtime·allm;
-												runtime: rename Cgocalls and Goroutines to NumCgoCall and NumGoroutine, respectively.
Update some other docs too.

Update #2955.

R=rsc
CC=golang-dev
https://golang.org/cl/5676060

											
										
										
											2012-02-16 14:49:41 -07:00
+									// runtime·NumCgoCall() iterates over allm w/o schedlock,
-												runtime, pprof: add profiling of thread creation

Same idea as heap profile: how did each thread get created?
Low memory (256 bytes per OS thread), high reward for
programs that suddenly have many threads running.

Fixes #1477.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/5639059

											
										
										
											2012-02-08 08:33:54 -07:00
+									// so we need to publish it safely.
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									runtime·atomicstorep(&runtime·allm, mp);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·unlock(&runtime·sched);
-												runtime: eliminate contention during stack allocation
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1045.00       949.00   -9.19%
BenchmarkStackGrowth-2                             3450.00       800.00  -76.81%
BenchmarkStackGrowth-4                             5076.00       513.00  -89.89%
BenchmarkStackGrowth-8                             7805.00       471.00  -93.97%
BenchmarkStackGrowth-16                           11751.00       321.00  -97.27%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091

											
										
										
											2011-07-12 10:24:32 -06:00
+								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Mark gp ready to run.
-												first cut at multithreading.  works on Linux.

* kick off new os procs (machs) as needed
* add sys·sleep for testing
* add Lock, Rendez
* properly lock mal, sys·newproc, scheduler
* linux syscall arg #4 is in R10, not CX
* chans are not multithread-safe yet
* multithreading disabled by default;
  set $gomaxprocs=2 (or 1000) to turn it on

This should build on OS X but may not.
Rob and I will fix soon after submitting.

TBR=r
OCL=13784
CL=13842

											
										
										
											2008-08-04 17:43:49 -06:00
+								void
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+								runtime·ready(G *gp)
-												first cut at multithreading.  works on Linux.

* kick off new os procs (machs) as needed
* add sys·sleep for testing
* add Lock, Rendez
* properly lock mal, sys·newproc, scheduler
* linux syscall arg #4 is in R10, not CX
* chans are not multithread-safe yet
* multithreading disabled by default;
  set $gomaxprocs=2 (or 1000) to turn it on

This should build on OS X but may not.
Rob and I will fix soon after submitting.

TBR=r
OCL=13784
CL=13842

											
										
										
											2008-08-04 17:43:49 -06:00
+								{
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+									// Mark runnable.
-												runtime: disable preemption in several scheduler functions
Required for preemptive scheduler, see the comments for details.

R=golang-dev, khr, iant, khr
CC=golang-dev
https://golang.org/cl/9740051

											
										
										
											2013-06-03 04:40:38 -06:00
+									m->locks++;  // disable preemption because it can be holding p in a local var
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(gp->status != Gwaiting) {
-												runtime: switch to 64-bit goroutine ids
Fixes #4275.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6759053

											
										
										
											2012-10-26 00:13:06 -06:00
+										runtime·printf("goroutine %D has status %d\n", gp->goid, gp->status);
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+										runtime·throw("bad g->status in ready");
-												runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045

											
										
										
											2011-02-23 13:51:20 -07:00
+									}
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									gp->status = Grunnable;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runqput(m->p, gp);
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										wakep();
-												runtime: disable preemption in several scheduler functions
Required for preemptive scheduler, see the comments for details.

R=golang-dev, khr, iant, khr
CC=golang-dev
https://golang.org/cl/9740051

											
										
										
											2013-06-03 04:40:38 -06:00
+									m->locks--;
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+									if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
 										g->stackguard0 = StackPreempt;
-												synch chan

SVN=127057

											
										
										
											2008-07-14 15:34:27 -06:00
+								}
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+								int32
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+								runtime·gcprocs(void)
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+								{
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+									int32 n;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+									// Figure out how many CPUs to use during GC.
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+									// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									runtime·lock(&runtime·sched);
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+									n = runtime·gomaxprocs;
 									if(n > runtime·ncpu)
 										n = runtime·ncpu;
 									if(n > MaxGcproc)
 										n = MaxGcproc;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(n > runtime·sched.nmidle+1) // one M is currently running
 										n = runtime·sched.nmidle+1;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									runtime·unlock(&runtime·sched);
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+									return n;
 								}
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+								static bool
 								needaddgcproc(void)
 								{
 									int32 n;
 									runtime·lock(&runtime·sched);
 									n = runtime·gomaxprocs;
 									if(n > runtime·ncpu)
 										n = runtime·ncpu;
 									if(n > MaxGcproc)
 										n = MaxGcproc;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									n -= runtime·sched.nmidle+1; // one M is currently running
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									runtime·unlock(&runtime·sched);
 									return n > 0;
 								}
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+								void
 								runtime·helpgc(int32 nproc)
 								{
 									M *mp;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									int32 n, pos;
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
 									runtime·lock(&runtime·sched);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									pos = 0;
 									for(n = 1; n < nproc; n++) {  // one M is currently running
 										if(runtime·allp[pos]->mcache == m->mcache)
 											pos++;
 										mp = mget();
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+										if(mp == nil)
 											runtime·throw("runtime·gcprocs inconsistency");
-												runtime: faster parallel GC
Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase.

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkTree2-8               97100768     71417553  -26.45%
garbage.BenchmarkTree2LastPause-8     970931485    714103692  -26.45%
garbage.BenchmarkTree2Pause-8         469127802    345029253  -26.45%
garbage.BenchmarkParser-8            2880950854   2715456901   -5.74%
garbage.BenchmarkParserLastPause-8    137047399    103336476  -24.60%
garbage.BenchmarkParserPause-8         80686028     58922680  -26.97%

R=golang-dev, 0xe2.0x9a.0x9b, dave, adg, rsc, iant
CC=golang-dev
https://golang.org/cl/7816044

											
										
										
											2013-03-21 02:48:02 -06:00
+										mp->helpgc = n;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										mp->mcache = runtime·allp[pos]->mcache;
 										pos++;
 										runtime·notewakeup(&mp->park);
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+									}
 									runtime·unlock(&runtime·sched);
 								}
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+								// Similar to stoptheworld but best-effort and can be called several times.
 								// There is no reverse operation, used during crashing.
 								// This function must not lock any mutexes.
 								void
 								runtime·freezetheworld(void)
 								{
 									int32 i;
 									if(runtime·gomaxprocs == 1)
 										return;
 									// stopwait and preemption requests can be lost
 									// due to races with concurrently executing threads,
 									// so try several times
 									for(i = 0; i < 5; i++) {
 										// this should tell the scheduler to not start any new goroutines
 										runtime·sched.stopwait = 0x7fffffff;
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+										runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+										// this should stop running goroutines
 										if(!preemptall())
 											break;  // no running goroutines
 										runtime·usleep(1000);
 									}
 									// to be sure
 									runtime·usleep(1000);
 									preemptall();
 									runtime·usleep(1000);
 								}
-												add support for ref counts to memory allocator.

mark and sweep, stop the world garbage collector
(intermediate step in the way to ref counting).
can run pretty with an explicit gc after each file.

R=r
DELTA=502  (346 added, 143 deleted, 13 changed)
OCL=20630
CL=20635

											
										
										
											2008-12-05 16:24:18 -07:00
+								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·stoptheworld(void)
-												add support for ref counts to memory allocator.

mark and sweep, stop the world garbage collector
(intermediate step in the way to ref counting).
can run pretty with an explicit gc after each file.

R=r
DELTA=502  (346 added, 143 deleted, 13 changed)
OCL=20630
CL=20635

											
										
										
											2008-12-05 16:24:18 -07:00
+								{
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									int32 i;
 									uint32 s;
 									P *p;
 									bool wait;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·lock(&runtime·sched);
 									runtime·sched.stopwait = runtime·gomaxprocs;
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
-												runtime: preempt goroutines for GC
The last patch for preemptive scheduler,
with this change stoptheworld issues preemption
requests every 100us.
Update #543.

R=golang-dev, daniel.morsing, rsc
CC=golang-dev
https://golang.org/cl/10264044

											
										
										
											2013-06-28 07:52:17 -06:00
+									preemptall();
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// stop current P
 									m->p->status = Pgcstop;
 									runtime·sched.stopwait--;
 									// try to retake all P's in Psyscall status
 									for(i = 0; i < runtime·gomaxprocs; i++) {
 										p = runtime·allp[i];
 										s = p->status;
 										if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
 											runtime·sched.stopwait--;
 									}
 									// stop idle P's
 									while(p = pidleget()) {
 										p->status = Pgcstop;
 										runtime·sched.stopwait--;
 									}
 									wait = runtime·sched.stopwait > 0;
 									runtime·unlock(&runtime·sched);
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: preempt goroutines for GC
The last patch for preemptive scheduler,
with this change stoptheworld issues preemption
requests every 100us.
Update #543.

R=golang-dev, daniel.morsing, rsc
CC=golang-dev
https://golang.org/cl/10264044

											
										
										
											2013-06-28 07:52:17 -06:00
+									// wait for remaining P's to stop voluntarily
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(wait) {
-												runtime: preempt goroutines for GC
The last patch for preemptive scheduler,
with this change stoptheworld issues preemption
requests every 100us.
Update #543.

R=golang-dev, daniel.morsing, rsc
CC=golang-dev
https://golang.org/cl/10264044

											
										
										
											2013-06-28 07:52:17 -06:00
+										for(;;) {
 											// wait for 100us, then try to re-preempt in case of any races
 											if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
 												runtime·noteclear(&runtime·sched.stopnote);
 												break;
 											}
 											preemptall();
 										}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									}
 									if(runtime·sched.stopwait)
 										runtime·throw("stoptheworld: not stopped");
 									for(i = 0; i < runtime·gomaxprocs; i++) {
 										p = runtime·allp[i];
 										if(p->status != Pgcstop)
 											runtime·throw("stoptheworld: not stopped");
-												add support for ref counts to memory allocator.

mark and sweep, stop the world garbage collector
(intermediate step in the way to ref counting).
can run pretty with an explicit gc after each file.

R=r
DELTA=502  (346 added, 143 deleted, 13 changed)
OCL=20630
CL=20635

											
										
										
											2008-12-05 16:24:18 -07:00
+									}
 								}
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+								static void
 								mhelpgc(void)
 								{
-												runtime: faster parallel GC
Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase.

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkTree2-8               97100768     71417553  -26.45%
garbage.BenchmarkTree2LastPause-8     970931485    714103692  -26.45%
garbage.BenchmarkTree2Pause-8         469127802    345029253  -26.45%
garbage.BenchmarkParser-8            2880950854   2715456901   -5.74%
garbage.BenchmarkParserLastPause-8    137047399    103336476  -24.60%
garbage.BenchmarkParserPause-8         80686028     58922680  -26.97%

R=golang-dev, 0xe2.0x9a.0x9b, dave, adg, rsc, iant
CC=golang-dev
https://golang.org/cl/7816044

											
										
										
											2013-03-21 02:48:02 -06:00
+									m->helpgc = -1;
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+								}
-												add support for ref counts to memory allocator.

mark and sweep, stop the world garbage collector
(intermediate step in the way to ref counting).
can run pretty with an explicit gc after each file.

R=r
DELTA=502  (346 added, 143 deleted, 13 changed)
OCL=20630
CL=20635

											
										
										
											2008-12-05 16:24:18 -07:00
+								void
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+								runtime·starttheworld(void)
-												add support for ref counts to memory allocator.

mark and sweep, stop the world garbage collector
(intermediate step in the way to ref counting).
can run pretty with an explicit gc after each file.

R=r
DELTA=502  (346 added, 143 deleted, 13 changed)
OCL=20630
CL=20635

											
										
										
											2008-12-05 16:24:18 -07:00
+								{
-												runtime: fix deadlock
The deadlock episodically occurs on misc/cgo/test/TestCthread.
The problem is that starttheworld() leaves some P's with local work
without M's. Then all active M's enter into syscalls, but reject to
wake another M's due to the following check (both in entersyscallblock() and in retake()):
if(p->runqhead == p->runqtail &&
        runtime·atomicload(&runtime·sched.nmspinning) +
        runtime·atomicload(&runtime·sched.npidle) > 0)
        continue;

R=rsc
CC=golang-dev
https://golang.org/cl/7424054

											
										
										
											2013-03-07 10:39:59 -07:00
+									P *p, *p1;
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									M *mp;
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									G *gp;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									bool add;
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
-												runtime: disable preemption in several scheduler functions
Required for preemptive scheduler, see the comments for details.

R=golang-dev, khr, iant, khr
CC=golang-dev
https://golang.org/cl/9740051

											
										
										
											2013-06-03 04:40:38 -06:00
+									m->locks++;  // disable preemption because it can be holding p in a local var
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									gp = runtime·netpoll(false);  // non-blocking
 									injectglist(gp);
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									add = needaddgcproc();
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·lock(&runtime·sched);
 									if(newprocs) {
 										procresize(newprocs);
 										newprocs = 0;
 									} else
 										procresize(runtime·gomaxprocs);
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									runtime·sched.gcwaiting = 0;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
-												runtime: fix deadlock
The deadlock episodically occurs on misc/cgo/test/TestCthread.
The problem is that starttheworld() leaves some P's with local work
without M's. Then all active M's enter into syscalls, but reject to
wake another M's due to the following check (both in entersyscallblock() and in retake()):
if(p->runqhead == p->runqtail &&
        runtime·atomicload(&runtime·sched.nmspinning) +
        runtime·atomicload(&runtime·sched.npidle) > 0)
        continue;

R=rsc
CC=golang-dev
https://golang.org/cl/7424054

											
										
										
											2013-03-07 10:39:59 -07:00
+									p1 = nil;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									while(p = pidleget()) {
 										// procresize() puts p's with work at the beginning of the list.
 										// Once we reach a p without a run queue, the rest don't have one either.
 										if(p->runqhead == p->runqtail) {
 											pidleput(p);
 											break;
 										}
-												runtime: fix scheduler race condition
In starttheworld() we assume that P's with local work
are situated in the beginning of idle P list.
However, once we start the first M, it can execute all local G's
and steal G's from other P's.
That breaks the assumption above. Thus starttheworld() will fail
to start some P's with local work.
It seems that it can not lead to very bad things, but still
it's wrong and breaks other assumtions
(e.g. we can have a spinning M with local work).
The fix is to collect all P's with local work first,
and only then start them.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10051045

											
										
										
											2013-06-12 08:46:35 -06:00
+										p->m = mget();
 										p->link = p1;
 										p1 = p;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									}
 									if(runtime·sched.sysmonwait) {
 										runtime·sched.sysmonwait = false;
 										runtime·notewakeup(&runtime·sched.sysmonnote);
 									}
 									runtime·unlock(&runtime·sched);
-												runtime: fix deadlock
The deadlock episodically occurs on misc/cgo/test/TestCthread.
The problem is that starttheworld() leaves some P's with local work
without M's. Then all active M's enter into syscalls, but reject to
wake another M's due to the following check (both in entersyscallblock() and in retake()):
if(p->runqhead == p->runqtail &&
        runtime·atomicload(&runtime·sched.nmspinning) +
        runtime·atomicload(&runtime·sched.npidle) > 0)
        continue;

R=rsc
CC=golang-dev
https://golang.org/cl/7424054

											
										
										
											2013-03-07 10:39:59 -07:00
+									while(p1) {
 										p = p1;
 										p1 = p1->link;
-												runtime: fix scheduler race condition
In starttheworld() we assume that P's with local work
are situated in the beginning of idle P list.
However, once we start the first M, it can execute all local G's
and steal G's from other P's.
That breaks the assumption above. Thus starttheworld() will fail
to start some P's with local work.
It seems that it can not lead to very bad things, but still
it's wrong and breaks other assumtions
(e.g. we can have a spinning M with local work).
The fix is to collect all P's with local work first,
and only then start them.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10051045

											
										
										
											2013-06-12 08:46:35 -06:00
+										if(p->m) {
 											mp = p->m;
 											p->m = nil;
 											if(mp->nextp)
 												runtime·throw("starttheworld: inconsistent mp->nextp");
 											mp->nextp = p;
 											runtime·notewakeup(&mp->park);
 										} else {
 											// Start M to run P.  Do not start another M below.
 											newm(nil, p);
 											add = false;
 										}
-												runtime: fix deadlock
The deadlock episodically occurs on misc/cgo/test/TestCthread.
The problem is that starttheworld() leaves some P's with local work
without M's. Then all active M's enter into syscalls, but reject to
wake another M's due to the following check (both in entersyscallblock() and in retake()):
if(p->runqhead == p->runqtail &&
        runtime·atomicload(&runtime·sched.nmspinning) +
        runtime·atomicload(&runtime·sched.npidle) > 0)
        continue;

R=rsc
CC=golang-dev
https://golang.org/cl/7424054

											
										
										
											2013-03-07 10:39:59 -07:00
+									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(add) {
-												runtime: refactor helpgc functionality in preparation for parallel GC
Parallel GC needs to know in advance how many helper threads will be there.
Hopefully it's the last patch before I can tackle parallel sweep phase.
The benchmarks are unaffected.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/6200064

											
										
										
											2012-05-15 09:10:16 -06:00
+										// If GC could have used another helper proc, start one now,
 										// in the hope that it will be available next time.
 										// It would have been even better to start it before the collection,
 										// but doing so requires allocating memory, so it's tricky to
 										// coordinate.  This lazy approach works out in practice:
 										// we don't mind if the first couple gc rounds don't have quite
 										// the maximum number of procs.
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+										newm(mhelpgc, nil);
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+									}
-												runtime: disable preemption in several scheduler functions
Required for preemptive scheduler, see the comments for details.

R=golang-dev, khr, iant, khr
CC=golang-dev
https://golang.org/cl/9740051

											
										
										
											2013-06-03 04:40:38 -06:00
+									m->locks--;
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+									if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
 										g->stackguard0 = StackPreempt;
-												add support for ref counts to memory allocator.

mark and sweep, stop the world garbage collector
(intermediate step in the way to ref counting).
can run pretty with an explicit gc after each file.

R=r
DELTA=502  (346 added, 143 deleted, 13 changed)
OCL=20630
CL=20635

											
										
										
											2008-12-05 16:24:18 -07:00
+								}
-												proper handling of signals.
do not run init on g0.

R=r
DELTA=161  (124 added, 23 deleted, 14 changed)
OCL=15490
CL=15497

											
										
										
											2008-09-18 16:56:46 -06:00
+								// Called to start an M.
 								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·mstart(void)
-												proper handling of signals.
do not run init on g0.

R=r
DELTA=161  (124 added, 23 deleted, 14 changed)
OCL=15490
CL=15497

											
										
										
											2008-09-18 16:56:46 -06:00
+								{
-												runtime: only define SEH when we need it.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/11769043

											
										
										
											2013-07-24 10:59:47 -06:00
+								#ifdef GOOS_windows
 								#ifdef GOARCH_386
-												runtime: handle windows exceptions, even in cgo programs

Fixes #3543.

R=golang-dev, kardianos, rsc
CC=golang-dev, hectorchu, vcc.163
https://golang.org/cl/6245063

											
										
										
											2012-05-29 23:10:54 -06:00
+									// It is used by windows-386 only. Unfortunately, seh needs
 									// to be located on os stack, and mstart runs on os stack
 									// for both m0 and m.
 									SEH seh;
-												runtime: only define SEH when we need it.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/11769043

											
										
										
											2013-07-24 10:59:47 -06:00
+								#endif
 								#endif
-												runtime: handle windows exceptions, even in cgo programs

Fixes #3543.

R=golang-dev, kardianos, rsc
CC=golang-dev, hectorchu, vcc.163
https://golang.org/cl/6245063

											
										
										
											2012-05-29 23:10:54 -06:00
-												runtime: close TODO now that 8c bug is fixed

R=r
CC=golang-dev
https://golang.org/cl/183138

											
										
										
											2010-01-06 20:24:11 -07:00
+									if(g != m->g0)
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+										runtime·throw("bad runtime·mstart");
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
 									// Record top of stack for use by mcall.
 									// Once we call schedule we're never coming back,
 									// so other calls can reuse this stack space.
 									runtime·gosave(&m->g0->sched);
-												runtime: adjust traceback / garbage collector boundary

The garbage collection routine addframeroots is duplicating
logic in the traceback routine that calls it, sometimes correctly,
sometimes incorrectly, sometimes incompletely.
Pass necessary information to addframeroots instead of
deriving it anew.

Should make addframeroots significantly more robust.
It's certainly smaller.

Also try to standardize on uintptr for saved pc, sp values.

Will make CL 10036044 trivial.

R=golang-dev, dave, dvyukov
CC=golang-dev
https://golang.org/cl/10169045

											
										
										
											2013-06-12 06:49:38 -06:00
+									m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
-												runtime: add stackguard0 to G
This is part of preemptive scheduler.
stackguard0 is checked in split stack checks and can be set to StackPreempt.
stackguard is not set to StackPreempt (holds the original value).

R=golang-dev, daniel.morsing, iant
CC=golang-dev
https://golang.org/cl/9875043

											
										
										
											2013-06-03 02:28:24 -06:00
+									m->g0->stackguard = m->g0->stackguard0;  // cgo sets only stackguard0, copy it to stackguard
-												runtime: more cgocallback_gofunc work

Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.

Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).

Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.

R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/11551044

											
										
										
											2013-07-24 07:01:57 -06:00
+								#ifdef GOOS_windows
 								#ifdef GOARCH_386
-												runtime: handle windows exceptions, even in cgo programs

Fixes #3543.

R=golang-dev, kardianos, rsc
CC=golang-dev, hectorchu, vcc.163
https://golang.org/cl/6245063

											
										
										
											2012-05-29 23:10:54 -06:00
+									m->seh = &seh;
-												runtime: more cgocallback_gofunc work

Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.

Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).

Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.

R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/11551044

											
										
										
											2013-07-24 07:01:57 -06:00
+								#endif
 								#endif
-												runtime: on 386, fix FP control word on all threads, not just initial thread

It is possible that Linux and Windows copy the FP control word
from the parent thread when creating a new thread.  Empirically,
Darwin does not.  Reset the FP control world in all cases.

Enable the floating-point strconv test.

Fixes #2917 (again).

R=golang-dev, r, iant
CC=golang-dev
https://golang.org/cl/5660047

											
										
										
											2012-02-13 23:23:15 -07:00
+									runtime·asminit();
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+									runtime·minit();
-												runtime: do not handle signals before configuring handler

There was a small window during program initialization
where a signal could come in before the handling mechanisms
were set up to handle it.  Delay the signal-handler installation
until we're ready for the signals.

Fixes #3314.

R=golang-dev, dsymonds, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/5833049

											
										
										
											2012-03-15 20:17:54 -06:00
 									// Install signal handlers; after minit so that minit can
 									// prepare the thread to be able to handle the signals.
-												runtime: detect deadlocks in programs using cgo
When cgo is used, runtime creates an additional M to handle callbacks on threads not created by Go.
This effectively disabled deadlock detection, which is a right thing, because Go program can be blocked
and only serve callbacks on external threads.
This also disables deadlock detection under race detector, because it happens to use cgo.
With this change the additional M is created lazily on first cgo call. So deadlock detector
works for programs that import "C", "net" or "net/http/pprof" but do not use them in fact.
Also fixes deadlock detector under race detector.
It should be fine to create the M later, because C code can not call into Go before first cgo call,
because C code does not know when Go initialization has completed. So a Go program need to call into C
first either to create an external thread, or notify a thread created in global ctor that Go
initialization has completed.
Fixes #4973.
Fixes #5475.

R=golang-dev, minux.ma, iant
CC=golang-dev
https://golang.org/cl/9303046

											
										
										
											2013-05-22 12:57:47 -06:00
+									if(m == &runtime·m0)
-												runtime: do not handle signals before configuring handler

There was a small window during program initialization
where a signal could come in before the handling mechanisms
were set up to handle it.  Delay the signal-handler installation
until we're ready for the signals.

Fixes #3314.

R=golang-dev, dsymonds, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/5833049

											
										
										
											2012-03-15 20:17:54 -06:00
+										runtime·initsig();
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
 									if(m->mstartfn)
 										m->mstartfn();
-												runtime: do not handle signals before configuring handler

There was a small window during program initialization
where a signal could come in before the handling mechanisms
were set up to handle it.  Delay the signal-handler installation
until we're ready for the signals.

Fixes #3314.

R=golang-dev, dsymonds, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/5833049

											
										
										
											2012-03-15 20:17:54 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(m->helpgc) {
-												runtime: faster parallel GC
Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase.

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkTree2-8               97100768     71417553  -26.45%
garbage.BenchmarkTree2LastPause-8     970931485    714103692  -26.45%
garbage.BenchmarkTree2Pause-8         469127802    345029253  -26.45%
garbage.BenchmarkParser-8            2880950854   2715456901   -5.74%
garbage.BenchmarkParserLastPause-8    137047399    103336476  -24.60%
garbage.BenchmarkParserPause-8         80686028     58922680  -26.97%

R=golang-dev, 0xe2.0x9a.0x9b, dave, adg, rsc, iant
CC=golang-dev
https://golang.org/cl/7816044

											
										
										
											2013-03-21 02:48:02 -06:00
+										m->helpgc = 0;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										stopm();
 									} else if(m != &runtime·m0) {
 										acquirep(m->nextp);
 										m->nextp = nil;
 									}
 									schedule();
-												runtime: handle windows exceptions, even in cgo programs

Fixes #3543.

R=golang-dev, kardianos, rsc
CC=golang-dev, hectorchu, vcc.163
https://golang.org/cl/6245063

											
										
										
											2012-05-29 23:10:54 -06:00
 									// TODO(brainman): This point is never reached, because scheduler
 									// does not release os threads at the moment. But once this path
 									// is enabled, we must remove our seh here.
-												proper handling of signals.
do not run init on g0.

R=r
DELTA=161  (124 added, 23 deleted, 14 changed)
OCL=15490
CL=15497

											
										
										
											2008-09-18 16:56:46 -06:00
+								}
-												runtime/cgo: make symbol naming consistent

The naming in this package is a disaster.
Make it all consistent.

Remove some 'static' from functions that will
be referred to from other files soon.

This CL is purely renames using global search and replace.

Submitting separately so that real changes will not
be drowned out by these renames in future CLs.

TBR=iant
CC=golang-dev
https://golang.org/cl/7416046

											
										
										
											2013-02-28 14:24:38 -07:00
+								// When running with cgo, we call _cgo_thread_start
-c, 8l dynamic loading support.
better mach binaries.
cgo working on darwin+linux amd64+386.
eliminated context switches - pi is 30x faster.
add libcgo to build.

on snow leopard:
  - non-cgo binaries work; all tests pass.
  - cgo binaries work on amd64 but not 386.

R=r
DELTA=2031  (1316 added, 626 deleted, 89 changed)
OCL=35264
CL=35304

											
										
										
											2009-10-03 11:37:12 -06:00
+								// to start threads for us so that we can play nicely with
 								// foreign code.
-												runtime/cgo: make symbol naming consistent

The naming in this package is a disaster.
Make it all consistent.

Remove some 'static' from functions that will
be referred to from other files soon.

This CL is purely renames using global search and replace.

Submitting separately so that real changes will not
be drowned out by these renames in future CLs.

TBR=iant
CC=golang-dev
https://golang.org/cl/7416046

											
										
										
											2013-02-28 14:24:38 -07:00
+								void (*_cgo_thread_start)(void*);
-c, 8l dynamic loading support.
better mach binaries.
cgo working on darwin+linux amd64+386.
eliminated context switches - pi is 30x faster.
add libcgo to build.

on snow leopard:
  - non-cgo binaries work; all tests pass.
  - cgo binaries work on amd64 but not 386.

R=r
DELTA=2031  (1316 added, 626 deleted, 89 changed)
OCL=35264
CL=35304

											
										
										
											2009-10-03 11:37:12 -06:00
 								typedef struct CgoThreadStart CgoThreadStart;
 								struct CgoThreadStart
 								{
 									M *m;
 									G *g;
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+									uintptr *tls;
-c, 8l dynamic loading support.
better mach binaries.
cgo working on darwin+linux amd64+386.
eliminated context switches - pi is 30x faster.
add libcgo to build.

on snow leopard:
  - non-cgo binaries work; all tests pass.
  - cgo binaries work on amd64 but not 386.

R=r
DELTA=2031  (1316 added, 626 deleted, 89 changed)
OCL=35264
CL=35304

											
										
										
											2009-10-03 11:37:12 -06:00
+									void (*fn)(void);
 								};
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+								// Allocate a new m unassociated with any thread.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Can use p for allocation context if needed.
-												runtime: add timer support, use for package time

This looks like it is just moving some code from
time to runtime (and translating it to C), but the
runtime can do a better job managing the goroutines,
and it needs this functionality for its own maintenance
(for example, for the garbage collector to hand back
unused memory to the OS on a time delay).
Might as well have just one copy of the timer logic,
and runtime can't depend on time, so vice versa.

It also unifies Sleep, NewTicker, and NewTimer behind
one mechanism, so that there are no claims that one
is more efficient than another.  (For example, today
people recommend using time.After instead of time.Sleep
to avoid blocking an OS thread.)

Fixes #1644.
Fixes #1731.
Fixes #2190.

R=golang-dev, r, hectorchu, iant, iant, jsing, alex.brainman, dvyukov
CC=golang-dev
https://golang.org/cl/5334051

											
										
										
											2011-11-09 13:17:05 -07:00
+								M*
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								runtime·allocm(P *p)
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+								{
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									M *mp;
-												runtime: store types of allocated objects

R=rsc
CC=golang-dev
https://golang.org/cl/6569057

											
										
										
											2012-10-21 15:41:32 -06:00
+									static Type *mtype;  // The Go type M
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									m->locks++;  // disable GC because it can be called from sysmon
 									if(m->p == nil)
 										acquirep(p);  // temporarily borrow p for mallocs in this function
-												runtime: store types of allocated objects

R=rsc
CC=golang-dev
https://golang.org/cl/6569057

											
										
										
											2012-10-21 15:41:32 -06:00
+									if(mtype == nil) {
 										Eface e;
 										runtime·gc_m_ptr(&e);
 										mtype = ((PtrType*)e.type)->elem;
 									}
 									mp = runtime·cnew(mtype);
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									mcommoninit(mp);
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
-												runtime: add support for GOOS=solaris

R=alex.brainman, dave, jsing, gobot, rsc
CC=golang-codereviews
https://golang.org/cl/35990043

											
										
										
											2014-01-16 21:58:10 -07:00
+									// In case of cgo or Solaris, pthread_create will make us a stack.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// Windows will layout sched stack on OS stack.
-												runtime: add support for GOOS=solaris

R=alex.brainman, dave, jsing, gobot, rsc
CC=golang-codereviews
https://golang.org/cl/35990043

											
										
										
											2014-01-16 21:58:10 -07:00
+									if(runtime·iscgo || Solaris || Windows)
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+										mp->g0 = runtime·malg(-1);
 									else
 										mp->g0 = runtime·malg(8192);
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(p == m->p)
 										releasep();
 									m->locks--;
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+									if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
 										g->stackguard0 = StackPreempt;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									return mp;
 								}
 								static M* lockextra(bool nilokay);
 								static void unlockextra(M*);
 								// needm is called when a cgo callback happens on a
 								// thread without an m (a thread not created by Go).
 								// In this case, needm is expected to find an m to use
 								// and return with m, g initialized correctly.
 								// Since m and g are not set now (likely nil, but see below)
 								// needm is limited in what routines it can call. In particular
 								// it can only call nosplit functions (textflag 7) and cannot
 								// do any scheduling that requires an m.
 								//
 								// In order to avoid needing heavy lifting here, we adopt
 								// the following strategy: there is a stack of available m's
 								// that can be stolen. Using compare-and-swap
 								// to pop from the stack has ABA races, so we simulate
 								// a lock by doing an exchange (via casp) to steal the stack
 								// head and replace the top pointer with MLOCKED (1).
 								// This serves as a simple spin lock that we can use even
 								// without an m. The thread that locks the stack in this way
 								// unlocks the stack by storing a valid stack head pointer.
 								//
 								// In order to make sure that there is always an m structure
 								// available to be stolen, we maintain the invariant that there
 								// is always one more than needed. At the beginning of the
 								// program (if cgo is in use) the list is seeded with a single m.
 								// If needm finds that it has taken the last m off the list, its job
 								// is - once it has installed its own m so that it can do things like
 								// allocate memory - to create a spare m and put it on the list.
 								//
 								// Each of these extra m's also has a g0 and a curg that are
 								// pressed into service as the scheduling stack and current
 								// goroutine for the duration of the cgo callback.
 								//
 								// When the callback is done with the m, it calls dropm to
 								// put the m back on the list.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+								void
 								runtime·needm(byte x)
 								{
 									M *mp;
-												runtime: detect deadlocks in programs using cgo
When cgo is used, runtime creates an additional M to handle callbacks on threads not created by Go.
This effectively disabled deadlock detection, which is a right thing, because Go program can be blocked
and only serve callbacks on external threads.
This also disables deadlock detection under race detector, because it happens to use cgo.
With this change the additional M is created lazily on first cgo call. So deadlock detector
works for programs that import "C", "net" or "net/http/pprof" but do not use them in fact.
Also fixes deadlock detector under race detector.
It should be fine to create the M later, because C code can not call into Go before first cgo call,
because C code does not know when Go initialization has completed. So a Go program need to call into C
first either to create an external thread, or notify a thread created in global ctor that Go
initialization has completed.
Fixes #4973.
Fixes #5475.

R=golang-dev, minux.ma, iant
CC=golang-dev
https://golang.org/cl/9303046

											
										
										
											2013-05-22 12:57:47 -06:00
+									if(runtime·needextram) {
 										// Can happen if C/C++ code calls Go from a global ctor.
 										// Can not throw, because scheduler is not initialized yet.
 										runtime·write(2, "fatal error: cgo callback before cgo call\n",
 											sizeof("fatal error: cgo callback before cgo call\n")-1);
 										runtime·exit(1);
 									}
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									// Lock extra list, take head, unlock popped list.
 									// nilokay=false is safe here because of the invariant above,
 									// that the extra list always contains or will soon contain
 									// at least one m.
 									mp = lockextra(false);
 									// Set needextram when we've just emptied the list,
 									// so that the eventual call into cgocallbackg will
 									// allocate a new m for the extra list. We delay the
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									// allocation until then so that it can be done
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									// after exitsyscall makes sure it is okay to be
 									// running at all (that is, there's no garbage collection
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									// running right now).
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									mp->needextram = mp->schedlink == nil;
 									unlockextra(mp->schedlink);
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									// Install m and g (= m->g0) and set the stack bounds
 									// to match the current stack. We don't actually know
 									// how big the stack is, like we don't know how big any
 									// scheduling stack is, but we assume there's at least 32 kB,
 									// which is more than enough for us.
 									runtime·setmg(mp, mp->g0);
 									g->stackbase = (uintptr)(&x + 1024);
 									g->stackguard = (uintptr)(&x - 32*1024);
-												runtime: add stackguard0 to G
This is part of preemptive scheduler.
stackguard0 is checked in split stack checks and can be set to StackPreempt.
stackguard is not set to StackPreempt (holds the original value).

R=golang-dev, daniel.morsing, iant
CC=golang-dev
https://golang.org/cl/9875043

											
										
										
											2013-06-03 02:28:24 -06:00
+									g->stackguard0 = g->stackguard;
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
-												runtime: more cgocallback_gofunc work

Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.

Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).

Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.

R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/11551044

											
										
										
											2013-07-24 07:01:57 -06:00
+								#ifdef GOOS_windows
 								#ifdef GOARCH_386
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									// On windows/386, we need to put an SEH frame (two words)
-												runtime: reduce frame size for runtime.cgocallback_gofunc

Tying preemption to stack splits means that we have to able to
complete the call to exitsyscall (inside cgocallbackg at least for now)
without any stack split checks, meaning that the whole sequence
has to work within 128 bytes of stack, unless we increase the size
of the red zone. This CL frees up 24 bytes along that critical path
on amd64. (The 32-bit systems have plenty of space because all
their words are smaller.)

R=dvyukov
CC=golang-dev
https://golang.org/cl/11676043

											
										
										
											2013-07-23 16:40:02 -06:00
+									// somewhere on the current stack. We are called from cgocallback_gofunc
 									// and we know that it will leave two unused words below m->curg->sched.sp.
 									// Use those.
-												runtime: more cgocallback_gofunc work

Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.

Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).

Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.

R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/11551044

											
										
										
											2013-07-24 07:01:57 -06:00
+									m->seh = (SEH*)((uintptr*)&x + 1);
 								#endif
 								#endif
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
 									// Initialize this thread to use the m.
 									runtime·asminit();
 									runtime·minit();
 								}
 								// newextram allocates an m and puts it on the extra list.
 								// It is called with a working local m, so that it can do things
 								// like call schedlock and allocate.
 								void
 								runtime·newextram(void)
 								{
 									M *mp, *mnext;
 									G *gp;
 									// Create extra goroutine locked to extra m.
 									// The goroutine is the context in which the cgo callback will run.
 									// The sched.pc will never be returned to, but setting it to
 									// runtime.goexit makes clear to the traceback routines where
 									// the goroutine stack ends.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									mp = runtime·allocm(nil);
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									gp = runtime·malg(4096);
-												runtime: adjust traceback / garbage collector boundary

The garbage collection routine addframeroots is duplicating
logic in the traceback routine that calls it, sometimes correctly,
sometimes incorrectly, sometimes incompletely.
Pass necessary information to addframeroots instead of
deriving it anew.

Should make addframeroots significantly more robust.
It's certainly smaller.

Also try to standardize on uintptr for saved pc, sp values.

Will make CL 10036044 trivial.

R=golang-dev, dave, dvyukov
CC=golang-dev
https://golang.org/cl/10169045

											
										
										
											2013-06-12 06:49:38 -06:00
+									gp->sched.pc = (uintptr)runtime·goexit;
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									gp->sched.sp = gp->stackbase;
-												runtime: record proper goroutine state during stack split

Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048

											
										
										
											2013-06-27 09:32:01 -06:00
+									gp->sched.lr = 0;
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									gp->sched.g = gp;
-												runtime: fix traceback in cgo programs
Fixes #6061.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12609043

											
										
										
											2013-08-07 14:31:52 -06:00
+									gp->syscallpc = gp->sched.pc;
 									gp->syscallsp = gp->sched.sp;
 									gp->syscallstack = gp->stackbase;
 									gp->syscallguard = gp->stackguard;
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									gp->status = Gsyscall;
 									mp->curg = gp;
 									mp->locked = LockInternal;
 									mp->lockedg = gp;
 									gp->lockedm = mp;
-												runtime: fix traceback in cgo programs
Fixes #6061.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12609043

											
										
										
											2013-08-07 14:31:52 -06:00
+									gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
 									if(raceenabled)
 										gp->racectx = runtime·racegostart(runtime·newextram);
-												runtime: fix heap corruption

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397049

											
										
										
											2013-02-21 10:59:46 -07:00
+									// put on allg for garbage collector
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									allgadd(gp);
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
 									// Add m to the extra list.
 									mnext = lockextra(true);
 									mp->schedlink = mnext;
 									unlockextra(mp);
 								}
 								// dropm is called when a cgo callback has called needm but is now
 								// done with the callback and returning back into the non-Go thread.
 								// It puts the current m back onto the extra list.
 								//
 								// The main expense here is the call to signalstack to release the
 								// m's signal stack, and then the call to needm on the next callback
 								// from this thread. It is tempting to try to save the m for next time,
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+								// which would eliminate both these costs, but there might not be
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+								// a next time: the current thread (which Go does not control) might exit.
 								// If we saved the m for that thread, there would be an m leak each time
 								// such a thread exited. Instead, we acquire and release an m on each
 								// call. These should typically not be scheduling operations, just a few
 								// atomics, so the cost should be small.
 								//
 								// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
 								// variable using pthread_key_create. Unlike the pthread keys we already use
 								// on OS X, this dummy key would never be read by Go code. It would exist
 								// only so that we could register at thread-exit-time destructor.
 								// That destructor would put the m back onto the extra list.
 								// This is purely a performance optimization. The current version,
 								// in which dropm happens on each cgo call, is still correct too.
 								// We may have to keep the current version on systems with cgo
 								// but without pthreads, like Windows.
 								void
 								runtime·dropm(void)
 								{
 									M *mp, *mnext;
 									// Undo whatever initialization minit did during needm.
 									runtime·unminit();
-												runtime: more cgocallback_gofunc work

Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.

Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).

Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.

R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/11551044

											
										
										
											2013-07-24 07:01:57 -06:00
 								#ifdef GOOS_windows
 								#ifdef GOARCH_386
-												runtime: reset typed dangling pointer
If for whatever reason seh points into Go heap region,
the dangling pointer will cause memory corruption during GC.
Update #5193.

R=golang-dev, alex.brainman, iant
CC=golang-dev
https://golang.org/cl/8402045

											
										
										
											2013-04-06 21:00:45 -06:00
+									m->seh = nil;  // reset dangling typed pointer
-												runtime: more cgocallback_gofunc work

Debugging the Windows breakage I noticed that SEH
only exists on 386, so we can balance the two stacks
a little more on amd64 and reclaim another word.

Now we're down to just one word consumed by
cgocallback_gofunc, having reclaimed 25% of the
overall budget (4 words out of 16).

Separately, fix windows/386 - the SEH must be on the
m0 stack, as must the saved SP, so we are forced to have
a three-word frame for 386. It matters much less for
386, because there 128 bytes gives 32 words to use.

R=dvyukov, alex.brainman
CC=golang-dev
https://golang.org/cl/11551044

											
										
										
											2013-07-24 07:01:57 -06:00
+								#endif
 								#endif
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
 									// Clear m and g, and return m to the extra list.
 									// After the call to setmg we can only call nosplit functions.
 									mp = m;
 									runtime·setmg(nil, nil);
 									mnext = lockextra(true);
 									mp->schedlink = mnext;
 									unlockextra(mp);
 								}
 								#define MLOCKED ((M*)1)
 								// lockextra locks the extra list and returns the list head.
 								// The caller must unlock the list by storing a new list head
 								// to runtime.extram. If nilokay is true, then lockextra will
 								// return a nil list head if that's what it finds. If nilokay is false,
 								// lockextra will keep waiting until the list head is no longer nil.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+								static M*
 								lockextra(bool nilokay)
 								{
 									M *mp;
 									void (*yield)(void);
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+									for(;;) {
 										mp = runtime·atomicloadp(&runtime·extram);
 										if(mp == MLOCKED) {
 											yield = runtime·osyield;
 											yield();
 											continue;
 										}
 										if(mp == nil && !nilokay) {
 											runtime·usleep(1);
 											continue;
 										}
 										if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
 											yield = runtime·osyield;
 											yield();
 											continue;
 										}
 										break;
 									}
 									return mp;
 								}
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+								static void
 								unlockextra(M *mp)
 								{
 									runtime·atomicstorep(&runtime·extram, mp);
 								}
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+								// Create a new m.  It will start off with a call to fn, or else the scheduler.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static void
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+								newm(void(*fn)(void), P *p)
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
+								{
 									M *mp;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									mp = runtime·allocm(p);
 									mp->nextp = p;
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+									mp->mstartfn = fn;
-												runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104

											
										
										
											2013-02-20 15:48:23 -07:00
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
+									if(runtime·iscgo) {
 										CgoThreadStart ts;
-												runtime/cgo: make symbol naming consistent

The naming in this package is a disaster.
Make it all consistent.

Remove some 'static' from functions that will
be referred to from other files soon.

This CL is purely renames using global search and replace.

Submitting separately so that real changes will not
be drowned out by these renames in future CLs.

TBR=iant
CC=golang-dev
https://golang.org/cl/7416046

											
										
										
											2013-02-28 14:24:38 -07:00
+										if(_cgo_thread_start == nil)
 											runtime·throw("_cgo_thread_start missing");
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+										ts.m = mp;
 										ts.g = mp->g0;
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+										ts.tls = mp->tls;
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+										ts.fn = runtime·mstart;
-												runtime/cgo: make symbol naming consistent

The naming in this package is a disaster.
Make it all consistent.

Remove some 'static' from functions that will
be referred to from other files soon.

This CL is purely renames using global search and replace.

Submitting separately so that real changes will not
be drowned out by these renames in future CLs.

TBR=iant
CC=golang-dev
https://golang.org/cl/7416046

											
										
										
											2013-02-28 14:24:38 -07:00
+										runtime·asmcgocall(_cgo_thread_start, &ts);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										return;
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+									}
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+									runtime·newosproc(mp, (byte*)mp->g0->stackbase);
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Stops execution of the current m until new work is available.
 								// Returns with acquired P.
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								static void
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								stopm(void)
 								{
 									if(m->locks)
 										runtime·throw("stopm holding locks");
 									if(m->p)
 										runtime·throw("stopm holding p");
 									if(m->spinning) {
 										m->spinning = false;
 										runtime·xadd(&runtime·sched.nmspinning, -1);
 									}
 								retry:
 									runtime·lock(&runtime·sched);
 									mput(m);
 									runtime·unlock(&runtime·sched);
 									runtime·notesleep(&m->park);
 									runtime·noteclear(&m->park);
 									if(m->helpgc) {
 										runtime·gchelper();
-												runtime: faster parallel GC
Use per-thread work buffers instead of global mutex-protected pool. This eliminates contention from parallel scan phase.

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkTree2-8               97100768     71417553  -26.45%
garbage.BenchmarkTree2LastPause-8     970931485    714103692  -26.45%
garbage.BenchmarkTree2Pause-8         469127802    345029253  -26.45%
garbage.BenchmarkParser-8            2880950854   2715456901   -5.74%
garbage.BenchmarkParserLastPause-8    137047399    103336476  -24.60%
garbage.BenchmarkParserPause-8         80686028     58922680  -26.97%

R=golang-dev, 0xe2.0x9a.0x9b, dave, adg, rsc, iant
CC=golang-dev
https://golang.org/cl/7816044

											
										
										
											2013-03-21 02:48:02 -06:00
+										m->helpgc = 0;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										m->mcache = nil;
 										goto retry;
-												first cut at multithreading.  works on Linux.

* kick off new os procs (machs) as needed
* add sys·sleep for testing
* add Lock, Rendez
* properly lock mal, sys·newproc, scheduler
* linux syscall arg #4 is in R10, not CX
* chans are not multithread-safe yet
* multithreading disabled by default;
  set $gomaxprocs=2 (or 1000) to turn it on

This should build on OS X but may not.
Rob and I will fix soon after submitting.

TBR=r
OCL=13784
CL=13842

											
										
										
											2008-08-04 17:43:49 -06:00
+									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									acquirep(m->nextp);
 									m->nextp = nil;
 								}
-												first cut at multithreading.  works on Linux.

* kick off new os procs (machs) as needed
* add sys·sleep for testing
* add Lock, Rendez
* properly lock mal, sys·newproc, scheduler
* linux syscall arg #4 is in R10, not CX
* chans are not multithread-safe yet
* multithreading disabled by default;
  set $gomaxprocs=2 (or 1000) to turn it on

This should build on OS X but may not.
Rob and I will fix soon after submitting.

TBR=r
OCL=13784
CL=13842

											
										
										
											2008-08-04 17:43:49 -06:00
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+								static void
 								mspinning(void)
 								{
 									m->spinning = true;
 								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Schedules some M to run the p (creates an M if necessary).
-												runtime: fix comment
Void function can not return false.

R=golang-codereviews, bradfitz
CC=golang-codereviews
https://golang.org/cl/52000043

											
										
										
											2014-01-14 01:58:13 -07:00
+								// If p==nil, tries to get an idle P, if no idle P's does nothing.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static void
 								startm(P *p, bool spinning)
 								{
 									M *mp;
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+									void (*fn)(void);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
 									runtime·lock(&runtime·sched);
 									if(p == nil) {
 										p = pidleget();
 										if(p == nil) {
 											runtime·unlock(&runtime·sched);
 											if(spinning)
 												runtime·xadd(&runtime·sched.nmspinning, -1);
 											return;
 										}
 									}
 									mp = mget();
 									runtime·unlock(&runtime·sched);
 									if(mp == nil) {
-												runtime: start all threads with runtime.mstart

Putting the M initialization in multiple places will not scale.
Various code assumes mstart is the start already. Make it so.

R=golang-dev, devon.odell
CC=golang-dev
https://golang.org/cl/7420048

											
										
										
											2013-03-01 09:44:43 -07:00
+										fn = nil;
 										if(spinning)
 											fn = mspinning;
 										newm(fn, p);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										return;
 									}
 									if(mp->spinning)
 										runtime·throw("startm: m is spinning");
 									if(mp->nextp)
 										runtime·throw("startm: m has p");
 									mp->spinning = spinning;
 									mp->nextp = p;
 									runtime·notewakeup(&mp->park);
 								}
 								// Hands off P from syscall or locked M.
 								static void
 								handoffp(P *p)
 								{
 									// if it has local work, start it straight away
 									if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
 										startm(p, false);
 										return;
 									}
 									// no local work, check that there are no spinning/idle M's,
 									// otherwise our help is not required
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·cas(&runtime·sched.nmspinning, 0, 1)) {
 										startm(p, true);
 										return;
 									}
 									runtime·lock(&runtime·sched);
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									if(runtime·sched.gcwaiting) {
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										p->status = Pgcstop;
 										if(--runtime·sched.stopwait == 0)
 											runtime·notewakeup(&runtime·sched.stopnote);
 										runtime·unlock(&runtime·sched);
 										return;
 									}
 									if(runtime·sched.runqsize) {
 										runtime·unlock(&runtime·sched);
 										startm(p, false);
 										return;
 									}
-												runtime: fix deadlock in network poller
The invariant is that there must be at least one running P or a thread polling network.
It was broken.
Fixes #5216.

R=golang-dev, bradfitz, r
CC=golang-dev
https://golang.org/cl/8459043

											
										
										
											2013-04-06 23:27:54 -06:00
+									// If this is the last running P and nobody is polling network,
 									// need to wakeup another M to poll network.
 									if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
 										runtime·unlock(&runtime·sched);
 										startm(p, false);
 										return;
 									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									pidleput(p);
 									runtime·unlock(&runtime·sched);
 								}
 								// Tries to add one more P to execute G's.
 								// Called when a G is made runnable (newproc, ready).
 								static void
 								wakep(void)
 								{
 									// be conservative about spinning threads
 									if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
 										return;
 									startm(nil, true);
 								}
 								// Stops execution of the current m that is locked to a g until the g is runnable again.
 								// Returns with acquired P.
 								static void
 								stoplockedm(void)
 								{
 									P *p;
 									if(m->lockedg == nil || m->lockedg->lockedm != m)
 										runtime·throw("stoplockedm: inconsistent locking");
 									if(m->p) {
 										// Schedule another M to run this p.
 										p = releasep();
 										handoffp(p);
 									}
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+									incidlelocked(1);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// Wait until another thread schedules lockedg again.
 									runtime·notesleep(&m->park);
 									runtime·noteclear(&m->park);
 									if(m->lockedg->status != Grunnable)
 										runtime·throw("stoplockedm: not runnable");
 									acquirep(m->nextp);
 									m->nextp = nil;
 								}
 								// Schedules the locked m to run the locked gp.
 								static void
 								startlockedm(G *gp)
 								{
 									M *mp;
 									P *p;
 									mp = gp->lockedm;
 									if(mp == m)
 										runtime·throw("startlockedm: locked to me");
 									if(mp->nextp)
 										runtime·throw("startlockedm: m has p");
 									// directly handoff current P to the locked m
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+									incidlelocked(-1);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									p = releasep();
 									mp->nextp = p;
 									runtime·notewakeup(&mp->park);
 									stopm();
 								}
 								// Stops the current m for stoptheworld.
 								// Returns when the world is restarted.
 								static void
 								gcstopm(void)
 								{
 									P *p;
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									if(!runtime·sched.gcwaiting)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·throw("gcstopm: not waiting for gc");
 									if(m->spinning) {
 										m->spinning = false;
 										runtime·xadd(&runtime·sched.nmspinning, -1);
 									}
 									p = releasep();
 									runtime·lock(&runtime·sched);
 									p->status = Pgcstop;
 									if(--runtime·sched.stopwait == 0)
 										runtime·notewakeup(&runtime·sched.stopnote);
 									runtime·unlock(&runtime·sched);
 									stopm();
 								}
 								// Schedules gp to run on the current M.
 								// Never returns.
 								static void
 								execute(G *gp)
 								{
 									int32 hz;
 									if(gp->status != Grunnable) {
 										runtime·printf("execute: bad g status %d\n", gp->status);
 										runtime·throw("execute: bad g status");
 									}
 									gp->status = Grunning;
-												runtime: output how long goroutines are blocked
Example of output:

goroutine 4 [sleep for 3 min]:
time.Sleep(0x34630b8a000)
        src/pkg/runtime/time.goc:31 +0x31
main.func·002()
        block.go:16 +0x2c
created by main.main
        block.go:17 +0x33

Full program and output are here:
http://play.golang.org/p/NEZdADI3Td

Fixes #6809.

R=golang-codereviews, khr, kamil.kisiel, bradfitz, rsc
CC=golang-codereviews
https://golang.org/cl/50420043

											
										
										
											2014-01-16 01:54:46 -07:00
+									gp->waitsince = 0;
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+									gp->preempt = false;
-												runtime: add stackguard0 to G
This is part of preemptive scheduler.
stackguard0 is checked in split stack checks and can be set to StackPreempt.
stackguard is not set to StackPreempt (holds the original value).

R=golang-dev, daniel.morsing, iant
CC=golang-dev
https://golang.org/cl/9875043

											
										
										
											2013-06-03 02:28:24 -06:00
+									gp->stackguard0 = gp->stackguard;
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+									m->p->schedtick++;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									m->curg = gp;
 									gp->m = m;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+									// Check whether the profiler needs to be turned on or off.
 									hz = runtime·sched.profilehz;
 									if(m->profilehz != hz)
 										runtime·resetcpuprofiler(hz);
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+									runtime·gogo(&gp->sched);
-												synch chan

SVN=127057

											
										
										
											2008-07-14 15:34:27 -06:00
+								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Finds a runnable goroutine to execute.
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+								// Tries to steal from other P's, get g from global queue, poll network.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static G*
-												undo CL 9776044 / 1e280889f997

Failure on bot:
http://build.golang.org/log/f4c648906e1289ec2237c1d0880fb1a8b1852a08

««« original CL description
runtime: fix CPU underutilization
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.

R=rsc
TBR=rsc
CC=gobot, golang-dev
https://golang.org/cl/9776044
»»»

R=golang-dev
CC=golang-dev
https://golang.org/cl/10692043

											
										
										
											2013-06-27 11:03:35 -06:00
+								findrunnable(void)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								{
 									G *gp;
 									P *p;
 									int32 i;
 								top:
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									if(runtime·sched.gcwaiting) {
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										gcstopm();
 										goto top;
 									}
 									// local runq
 									gp = runqget(m->p);
 									if(gp)
 										return gp;
 									// global runq
 									if(runtime·sched.runqsize) {
 										runtime·lock(&runtime·sched);
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+										gp = globrunqget(m->p, 0);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·unlock(&runtime·sched);
 										if(gp)
 											return gp;
 									}
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									// poll network
 									gp = runtime·netpoll(false);  // non-blocking
 									if(gp) {
 										injectglist(gp->schedlink);
 										gp->status = Grunnable;
 										return gp;
 									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// If number of spinning M's >= number of busy P's, block.
 									// This is necessary to prevent excessive CPU consumption
 									// when GOMAXPROCS>>1 but the program parallelism is low.
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									if(!m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										goto stop;
 									if(!m->spinning) {
 										m->spinning = true;
 										runtime·xadd(&runtime·sched.nmspinning, 1);
 									}
 									// random steal from other P's
 									for(i = 0; i < 2*runtime·gomaxprocs; i++) {
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+										if(runtime·sched.gcwaiting)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											goto top;
 										p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
 										if(p == m->p)
 											gp = runqget(p);
 										else
 											gp = runqsteal(m->p, p);
 										if(gp)
 											return gp;
 									}
 								stop:
 									// return P and block
 									runtime·lock(&runtime·sched);
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									if(runtime·sched.gcwaiting) {
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·unlock(&runtime·sched);
 										goto top;
 									}
 									if(runtime·sched.runqsize) {
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+										gp = globrunqget(m->p, 0);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·unlock(&runtime·sched);
 										return gp;
 									}
 									p = releasep();
 									pidleput(p);
 									runtime·unlock(&runtime·sched);
 									if(m->spinning) {
 										m->spinning = false;
 										runtime·xadd(&runtime·sched.nmspinning, -1);
 									}
 									// check all runqueues once again
 									for(i = 0; i < runtime·gomaxprocs; i++) {
 										p = runtime·allp[i];
 										if(p && p->runqhead != p->runqtail) {
 											runtime·lock(&runtime·sched);
 											p = pidleget();
 											runtime·unlock(&runtime·sched);
 											if(p) {
 												acquirep(p);
 												goto top;
 											}
 											break;
 										}
 									}
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									// poll network
 									if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
 										if(m->p)
 											runtime·throw("findrunnable: netpoll with p");
 										if(m->spinning)
 											runtime·throw("findrunnable: netpoll with spinning");
 										gp = runtime·netpoll(true);  // block until new work is available
 										runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
 										if(gp) {
 											runtime·lock(&runtime·sched);
 											p = pidleget();
 											runtime·unlock(&runtime·sched);
 											if(p) {
 												acquirep(p);
 												injectglist(gp->schedlink);
 												gp->status = Grunnable;
 												return gp;
 											}
 											injectglist(gp);
 										}
 									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									stopm();
 									goto top;
 								}
-												runtime: fix CPU underutilization
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.
This is reincarnation of cl/9776044 with the bug fixed.
The bug was due to code added after cl/9776044 was created:
if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
        runtime·lock(&runtime·sched);
        gp = globrunqget(m->p, 1);
        runtime·unlock(&runtime·sched);
}
If M gets gp from global runq here, it does not reset m->spinning.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10743044

											
										
										
											2013-07-11 13:57:36 -06:00
+								static void
 								resetspinning(void)
 								{
 									int32 nmspinning;
 									if(m->spinning) {
 										m->spinning = false;
 										nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
 										if(nmspinning < 0)
 											runtime·throw("findrunnable: negative nmspinning");
 									} else
 										nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
 									// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
 									// so see if we need to wakeup another P here.
 									if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
 										wakep();
 								}
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+								// Injects the list of runnable G's into the scheduler.
 								// Can run concurrently with GC.
 								static void
 								injectglist(G *glist)
 								{
 									int32 n;
 									G *gp;
 									if(glist == nil)
 										return;
 									runtime·lock(&runtime·sched);
 									for(n = 0; glist; n++) {
 										gp = glist;
 										glist = gp->schedlink;
 										gp->status = Grunnable;
 										globrunqput(gp);
 									}
 									runtime·unlock(&runtime·sched);
 									for(; n && runtime·sched.npidle; n--)
 										startm(nil, false);
 								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// One round of scheduler: find a runnable goroutine and execute it.
 								// Never returns.
 								static void
 								schedule(void)
-												slightly gratuitous reorg of scheduler code
	* rename select (very loaded word) nextgoroutine
	* separate out "enter scheduler" (sys·gosched)
	  from the scheduler itself (scheduler)

R=r
APPROVED=r
DELTA=36  (17 added, 15 deleted, 4 changed)
OCL=13772
CL=13774

											
										
										
											2008-08-02 23:34:04 -06:00
+								{
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									G *gp;
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+									uint32 tick;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
 									if(m->locks)
 										runtime·throw("schedule: holding locks");
 								top:
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									if(runtime·sched.gcwaiting) {
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										gcstopm();
 										goto top;
 									}
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+									gp = nil;
 									// Check the global runnable queue once in a while to ensure fairness.
 									// Otherwise two goroutines can completely occupy the local runqueue
 									// by constantly respawning each other.
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+									tick = m->p->schedtick;
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+									// This is a fancy way to say tick%61==0,
 									// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
 									if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
 										runtime·lock(&runtime·sched);
 										gp = globrunqget(m->p, 1);
 										runtime·unlock(&runtime·sched);
-												runtime: fix CPU underutilization
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.
This is reincarnation of cl/9776044 with the bug fixed.
The bug was due to code added after cl/9776044 was created:
if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
        runtime·lock(&runtime·sched);
        gp = globrunqget(m->p, 1);
        runtime·unlock(&runtime·sched);
}
If M gets gp from global runq here, it does not reset m->spinning.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10743044

											
										
										
											2013-07-11 13:57:36 -06:00
+										if(gp)
 											resetspinning();
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+									}
 									if(gp == nil) {
 										gp = runqget(m->p);
 										if(gp && m->spinning)
 											runtime·throw("schedule: spinning with local work");
 									}
-												runtime: fix CPU underutilization
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.
This is reincarnation of cl/9776044 with the bug fixed.
The bug was due to code added after cl/9776044 was created:
if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
        runtime·lock(&runtime·sched);
        gp = globrunqget(m->p, 1);
        runtime·unlock(&runtime·sched);
}
If M gets gp from global runq here, it does not reset m->spinning.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10743044

											
										
										
											2013-07-11 13:57:36 -06:00
+									if(gp == nil) {
 										gp = findrunnable();  // blocks until work is available
 										resetspinning();
-												undo CL 9776044 / 1e280889f997

Failure on bot:
http://build.golang.org/log/f4c648906e1289ec2237c1d0880fb1a8b1852a08

««« original CL description
runtime: fix CPU underutilization
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.

R=rsc
TBR=rsc
CC=gobot, golang-dev
https://golang.org/cl/9776044
»»»

R=golang-dev
CC=golang-dev
https://golang.org/cl/10692043

											
										
										
											2013-06-27 11:03:35 -06:00
+									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(gp->lockedm) {
-												runtime: fix CPU underutilization
runtime.newproc/ready are deliberately sloppy about waking new M's,
they only ensure that there is at least 1 spinning M.
Currently to compensate for that, schedule() checks if the current P
has local work and there are no spinning M's, it wakes up another one.
It does not work if goroutines do not call schedule.
With this change a spinning M wakes up another M when it finds work to do.
It's also not ideal, but it fixes the underutilization.
A proper check would require to know the exact number of runnable G's,
but it's too expensive to maintain.
Fixes #5586.
This is reincarnation of cl/9776044 with the bug fixed.
The bug was due to code added after cl/9776044 was created:
if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
        runtime·lock(&runtime·sched);
        gp = globrunqget(m->p, 1);
        runtime·unlock(&runtime·sched);
}
If M gets gp from global runq here, it does not reset m->spinning.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10743044

											
										
										
											2013-07-11 13:57:36 -06:00
+										// Hands off own p to the locked m,
 										// then blocks waiting for a new p.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										startlockedm(gp);
 										goto top;
 									}
 									execute(gp);
-												slightly gratuitous reorg of scheduler code
	* rename select (very loaded word) nextgoroutine
	* separate out "enter scheduler" (sys·gosched)
	  from the scheduler itself (scheduler)

R=r
APPROVED=r
DELTA=36  (17 added, 15 deleted, 4 changed)
OCL=13772
CL=13774

											
										
										
											2008-08-02 23:34:04 -06:00
+								}
-												runtime: remove locks from netpoll hotpaths
Introduces two-phase goroutine parking mechanism -- prepare to park, commit park.
This mechanism does not require backing mutex to protect wait predicate.
Use it in netpoll. See comment in netpoll.goc for details.
This slightly reduces contention between reader, writer and read/write io notifications;
and just eliminates a bunch of mutex operations from hotpaths, thus making then faster.

benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4ConcurrentReadWrite           2109         1945   -7.78%
BenchmarkTCP4ConcurrentReadWrite-2         1162         1113   -4.22%
BenchmarkTCP4ConcurrentReadWrite-4          798          755   -5.39%
BenchmarkTCP4ConcurrentReadWrite-8          803          748   -6.85%
BenchmarkTCP4Persistent                    9411         9240   -1.82%
BenchmarkTCP4Persistent-2                  5888         5813   -1.27%
BenchmarkTCP4Persistent-4                  4016         3968   -1.20%
BenchmarkTCP4Persistent-8                  3943         3857   -2.18%

R=golang-codereviews, mikioh.mikioh, gobot, iant, rsc
CC=golang-codereviews, khr
https://golang.org/cl/45700043

											
										
										
											2014-01-22 00:27:16 -07:00
+								// Puts the current goroutine into a waiting state and calls unlockf.
 								// If unlockf returns false, the goroutine is resumed.
-												runtime: refactor goroutine blocking
The change is a preparation for the new scheduler.
It introduces runtime.park() function,
that will atomically unlock the mutex and park the goroutine.
It will allow to remove the racy readyonstop flag
that is difficult to implement w/o the global scheduler mutex.

R=rsc, remyoudompheng, dave
CC=golang-dev
https://golang.org/cl/6501077

											
										
										
											2012-09-18 11:15:46 -06:00
+								void
-												runtime: remove locks from netpoll hotpaths
Introduces two-phase goroutine parking mechanism -- prepare to park, commit park.
This mechanism does not require backing mutex to protect wait predicate.
Use it in netpoll. See comment in netpoll.goc for details.
This slightly reduces contention between reader, writer and read/write io notifications;
and just eliminates a bunch of mutex operations from hotpaths, thus making then faster.

benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4ConcurrentReadWrite           2109         1945   -7.78%
BenchmarkTCP4ConcurrentReadWrite-2         1162         1113   -4.22%
BenchmarkTCP4ConcurrentReadWrite-4          798          755   -5.39%
BenchmarkTCP4ConcurrentReadWrite-8          803          748   -6.85%
BenchmarkTCP4Persistent                    9411         9240   -1.82%
BenchmarkTCP4Persistent-2                  5888         5813   -1.27%
BenchmarkTCP4Persistent-4                  4016         3968   -1.20%
BenchmarkTCP4Persistent-8                  3943         3857   -2.18%

R=golang-codereviews, mikioh.mikioh, gobot, iant, rsc
CC=golang-codereviews, khr
https://golang.org/cl/45700043

											
										
										
											2014-01-22 00:27:16 -07:00
+								runtime·park(bool(*unlockf)(G*, void*), void *lock, int8 *reason)
-												runtime: refactor goroutine blocking
The change is a preparation for the new scheduler.
It introduces runtime.park() function,
that will atomically unlock the mutex and park the goroutine.
It will allow to remove the racy readyonstop flag
that is difficult to implement w/o the global scheduler mutex.

R=rsc, remyoudompheng, dave
CC=golang-dev
https://golang.org/cl/6501077

											
										
										
											2012-09-18 11:15:46 -06:00
+								{
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									m->waitlock = lock;
 									m->waitunlockf = unlockf;
-												runtime: refactor goroutine blocking
The change is a preparation for the new scheduler.
It introduces runtime.park() function,
that will atomically unlock the mutex and park the goroutine.
It will allow to remove the racy readyonstop flag
that is difficult to implement w/o the global scheduler mutex.

R=rsc, remyoudompheng, dave
CC=golang-dev
https://golang.org/cl/6501077

											
										
										
											2012-09-18 11:15:46 -06:00
+									g->waitreason = reason;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·mcall(park0);
 								}
-												runtime: remove locks from netpoll hotpaths
Introduces two-phase goroutine parking mechanism -- prepare to park, commit park.
This mechanism does not require backing mutex to protect wait predicate.
Use it in netpoll. See comment in netpoll.goc for details.
This slightly reduces contention between reader, writer and read/write io notifications;
and just eliminates a bunch of mutex operations from hotpaths, thus making then faster.

benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4ConcurrentReadWrite           2109         1945   -7.78%
BenchmarkTCP4ConcurrentReadWrite-2         1162         1113   -4.22%
BenchmarkTCP4ConcurrentReadWrite-4          798          755   -5.39%
BenchmarkTCP4ConcurrentReadWrite-8          803          748   -6.85%
BenchmarkTCP4Persistent                    9411         9240   -1.82%
BenchmarkTCP4Persistent-2                  5888         5813   -1.27%
BenchmarkTCP4Persistent-4                  4016         3968   -1.20%
BenchmarkTCP4Persistent-8                  3943         3857   -2.18%

R=golang-codereviews, mikioh.mikioh, gobot, iant, rsc
CC=golang-codereviews, khr
https://golang.org/cl/45700043

											
										
										
											2014-01-22 00:27:16 -07:00
+								static bool
 								parkunlock(G *gp, void *lock)
 								{
 									USED(gp);
 									runtime·unlock(lock);
 									return true;
 								}
 								// Puts the current goroutine into a waiting state and unlocks the lock.
 								// The goroutine can be made runnable again by calling runtime·ready(gp).
 								void
 								runtime·parkunlock(Lock *lock, int8 *reason)
 								{
 									runtime·park(parkunlock, lock, reason);
 								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// runtime·park continuation on g0.
 								static void
 								park0(G *gp)
 								{
-												runtime: remove locks from netpoll hotpaths
Introduces two-phase goroutine parking mechanism -- prepare to park, commit park.
This mechanism does not require backing mutex to protect wait predicate.
Use it in netpoll. See comment in netpoll.goc for details.
This slightly reduces contention between reader, writer and read/write io notifications;
and just eliminates a bunch of mutex operations from hotpaths, thus making then faster.

benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4ConcurrentReadWrite           2109         1945   -7.78%
BenchmarkTCP4ConcurrentReadWrite-2         1162         1113   -4.22%
BenchmarkTCP4ConcurrentReadWrite-4          798          755   -5.39%
BenchmarkTCP4ConcurrentReadWrite-8          803          748   -6.85%
BenchmarkTCP4Persistent                    9411         9240   -1.82%
BenchmarkTCP4Persistent-2                  5888         5813   -1.27%
BenchmarkTCP4Persistent-4                  4016         3968   -1.20%
BenchmarkTCP4Persistent-8                  3943         3857   -2.18%

R=golang-codereviews, mikioh.mikioh, gobot, iant, rsc
CC=golang-codereviews, khr
https://golang.org/cl/45700043

											
										
										
											2014-01-22 00:27:16 -07:00
+									bool ok;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									gp->status = Gwaiting;
 									gp->m = nil;
 									m->curg = nil;
 									if(m->waitunlockf) {
-												runtime: remove locks from netpoll hotpaths
Introduces two-phase goroutine parking mechanism -- prepare to park, commit park.
This mechanism does not require backing mutex to protect wait predicate.
Use it in netpoll. See comment in netpoll.goc for details.
This slightly reduces contention between reader, writer and read/write io notifications;
and just eliminates a bunch of mutex operations from hotpaths, thus making then faster.

benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4ConcurrentReadWrite           2109         1945   -7.78%
BenchmarkTCP4ConcurrentReadWrite-2         1162         1113   -4.22%
BenchmarkTCP4ConcurrentReadWrite-4          798          755   -5.39%
BenchmarkTCP4ConcurrentReadWrite-8          803          748   -6.85%
BenchmarkTCP4Persistent                    9411         9240   -1.82%
BenchmarkTCP4Persistent-2                  5888         5813   -1.27%
BenchmarkTCP4Persistent-4                  4016         3968   -1.20%
BenchmarkTCP4Persistent-8                  3943         3857   -2.18%

R=golang-codereviews, mikioh.mikioh, gobot, iant, rsc
CC=golang-codereviews, khr
https://golang.org/cl/45700043

											
										
										
											2014-01-22 00:27:16 -07:00
+										ok = m->waitunlockf(gp, m->waitlock);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										m->waitunlockf = nil;
-												runtime: reset dangling typed pointer
+untype it because it can point to different types
Update #5193.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/8454043

											
										
										
											2013-04-06 21:01:28 -06:00
+										m->waitlock = nil;
-												runtime: remove locks from netpoll hotpaths
Introduces two-phase goroutine parking mechanism -- prepare to park, commit park.
This mechanism does not require backing mutex to protect wait predicate.
Use it in netpoll. See comment in netpoll.goc for details.
This slightly reduces contention between reader, writer and read/write io notifications;
and just eliminates a bunch of mutex operations from hotpaths, thus making then faster.

benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4ConcurrentReadWrite           2109         1945   -7.78%
BenchmarkTCP4ConcurrentReadWrite-2         1162         1113   -4.22%
BenchmarkTCP4ConcurrentReadWrite-4          798          755   -5.39%
BenchmarkTCP4ConcurrentReadWrite-8          803          748   -6.85%
BenchmarkTCP4Persistent                    9411         9240   -1.82%
BenchmarkTCP4Persistent-2                  5888         5813   -1.27%
BenchmarkTCP4Persistent-4                  4016         3968   -1.20%
BenchmarkTCP4Persistent-8                  3943         3857   -2.18%

R=golang-codereviews, mikioh.mikioh, gobot, iant, rsc
CC=golang-codereviews, khr
https://golang.org/cl/45700043

											
										
										
											2014-01-22 00:27:16 -07:00
+										if(!ok) {
 											gp->status = Grunnable;
 											execute(gp);  // Schedule it back, never returns.
 										}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									}
 									if(m->lockedg) {
 										stoplockedm();
 										execute(gp);  // Never returns.
 									}
 									schedule();
 								}
 								// Scheduler yield.
 								void
 								runtime·gosched(void)
 								{
-												runtime: preempt goroutines for GC
The last patch for preemptive scheduler,
with this change stoptheworld issues preemption
requests every 100us.
Update #543.

R=golang-dev, daniel.morsing, rsc
CC=golang-dev
https://golang.org/cl/10264044

											
										
										
											2013-06-28 07:52:17 -06:00
+									runtime·mcall(runtime·gosched0);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								}
 								// runtime·gosched continuation on g0.
-												runtime: preempt goroutines for GC
The last patch for preemptive scheduler,
with this change stoptheworld issues preemption
requests every 100us.
Update #543.

R=golang-dev, daniel.morsing, rsc
CC=golang-dev
https://golang.org/cl/10264044

											
										
										
											2013-06-28 07:52:17 -06:00
+								void
 								runtime·gosched0(G *gp)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								{
 									gp->status = Grunnable;
 									gp->m = nil;
 									m->curg = nil;
 									runtime·lock(&runtime·sched);
 									globrunqput(gp);
 									runtime·unlock(&runtime·sched);
 									if(m->lockedg) {
 										stoplockedm();
 										execute(gp);  // Never returns.
 									}
 									schedule();
 								}
 								// Finishes execution of the current goroutine.
-												runtime: mark runtime.goexit as nosplit
Required for preemptive scheduler, see the comment.

R=golang-dev, daniel.morsing
CC=golang-dev
https://golang.org/cl/9841047

											
										
										
											2013-05-30 04:11:49 -06:00
+								// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
 								// Since it does not return it does not matter.  But if it is preempted
 								// at the split stack check, GC will complain about inconsistent sp.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								void
 								runtime·goexit(void)
 								{
 									if(raceenabled)
 										runtime·racegoend();
 									runtime·mcall(goexit0);
 								}
 								// runtime·goexit continuation on g0.
 								static void
 								goexit0(G *gp)
 								{
 									gp->status = Gdead;
 									gp->m = nil;
 									gp->lockedm = nil;
 									m->curg = nil;
 									m->lockedg = nil;
-												runtime: clear locked bit when goroutine exits

Otherwise the next goroutine run on the m
can get inadvertently locked if it executes a cgo call
that turns on the internal lock.

While we're here, fix the cgo panic unwind to
decrement m->ncgo like the non-panic unwind does.

Fixes #4971.

R=golang-dev, iant, dvyukov
CC=golang-dev
https://golang.org/cl/7627043

											
										
										
											2013-03-08 09:26:00 -07:00
+									if(m->locked & ~LockExternal) {
-												runtime: add a missing newline in a debug printf.

Trivial, but annoying while debugging this code.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/11656043

											
										
										
											2013-07-21 20:42:42 -06:00
+										runtime·printf("invalid m->locked = %d\n", m->locked);
-												runtime: clear locked bit when goroutine exits

Otherwise the next goroutine run on the m
can get inadvertently locked if it executes a cgo call
that turns on the internal lock.

While we're here, fix the cgo panic unwind to
decrement m->ncgo like the non-panic unwind does.

Fixes #4971.

R=golang-dev, iant, dvyukov
CC=golang-dev
https://golang.org/cl/7627043

											
										
										
											2013-03-08 09:26:00 -07:00
+										runtime·throw("internal lockOSThread error");
 									}
 									m->locked = 0;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·unwindstack(gp, nil);
 									gfput(m->p, gp);
 									schedule();
-												runtime: refactor goroutine blocking
The change is a preparation for the new scheduler.
It introduces runtime.park() function,
that will atomically unlock the mutex and park the goroutine.
It will allow to remove the racy readyonstop flag
that is difficult to implement w/o the global scheduler mutex.

R=rsc, remyoudompheng, dave
CC=golang-dev
https://golang.org/cl/6501077

											
										
										
											2012-09-18 11:15:46 -06:00
+								}
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+								static void
 								save(void *pc, uintptr sp)
 								{
 									g->sched.pc = (uintptr)pc;
 									g->sched.sp = sp;
 									g->sched.lr = 0;
 									g->sched.ret = 0;
 									g->sched.ctxt = 0;
 									g->sched.g = g;
 								}
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+								// The goroutine g is about to enter a system call.
 								// Record that it's not using the cpu anymore.
-												Library support for cgo export.

These functions are used to call from a C function back to a
Go function.  This only includes 386 support.

R=rsc
CC=golang-dev
https://golang.org/cl/834045

											
										
										
											2010-04-09 14:30:35 -06:00
+								// This is called only from the go syscall library and cgocall,
 								// not from the low-level system calls used by the runtime.
-												runtime: stack split + garbage collection bug

The g->sched.sp saved stack pointer and the
g->stackbase and g->stackguard stack bounds
can change even while "the world is stopped",
because a goroutine has to call functions (and
therefore might split its stack) when exiting a
system call to check whether the world is stopped
(and if so, wait until the world continues).

That means the garbage collector cannot access
those values safely (without a race) for goroutines
executing system calls.  Instead, save a consistent
triple in g->gcsp, g->gcstack, g->gcguard during
entersyscall and have the garbage collector refer
to those.

The old code was occasionally seeing (because of
the race) an sp and stk that did not correspond to
each other, so that stk - sp was not the number of
stack bytes following sp.  In that case, if sp < stk
then the call scanblock(sp, stk - sp) scanned too
many bytes (anything between the two pointers,
which pointed into different allocation blocks).
If sp > stk then stk - sp wrapped around.
On 32-bit, stk - sp is a uintptr (uint32) converted
to int64 in the call to scanblock, so a large (~4G)
but positive number.  Scanblock would try to scan
that many bytes and eventually fault accessing
unmapped memory.  On 64-bit, stk - sp is a uintptr (uint64)
promoted to int64 in the call to scanblock, so a negative
number.  Scanblock would not scan anything, possibly
causing in-use blocks to be freed.

In short, 32-bit platforms would have seen either
ineffective garbage collection or crashes during garbage
collection, while 64-bit platforms would have seen
either ineffective or incorrect garbage collection.
You can see the invalid arguments to scanblock in the
stack traces in issue 1620.

Fixes #1620.
Fixes #1746.

R=iant, r
CC=golang-dev
https://golang.org/cl/4437075

											
										
										
											2011-04-27 21:21:12 -06:00
+								//
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								// Entersyscall cannot split the stack: the runtime·gosave must
-												runtime: stack split + garbage collection bug

The g->sched.sp saved stack pointer and the
g->stackbase and g->stackguard stack bounds
can change even while "the world is stopped",
because a goroutine has to call functions (and
therefore might split its stack) when exiting a
system call to check whether the world is stopped
(and if so, wait until the world continues).

That means the garbage collector cannot access
those values safely (without a race) for goroutines
executing system calls.  Instead, save a consistent
triple in g->gcsp, g->gcstack, g->gcguard during
entersyscall and have the garbage collector refer
to those.

The old code was occasionally seeing (because of
the race) an sp and stk that did not correspond to
each other, so that stk - sp was not the number of
stack bytes following sp.  In that case, if sp < stk
then the call scanblock(sp, stk - sp) scanned too
many bytes (anything between the two pointers,
which pointed into different allocation blocks).
If sp > stk then stk - sp wrapped around.
On 32-bit, stk - sp is a uintptr (uint32) converted
to int64 in the call to scanblock, so a large (~4G)
but positive number.  Scanblock would try to scan
that many bytes and eventually fault accessing
unmapped memory.  On 64-bit, stk - sp is a uintptr (uint64)
promoted to int64 in the call to scanblock, so a negative
number.  Scanblock would not scan anything, possibly
causing in-use blocks to be freed.

In short, 32-bit platforms would have seen either
ineffective garbage collection or crashes during garbage
collection, while 64-bit platforms would have seen
either ineffective or incorrect garbage collection.
You can see the invalid arguments to scanblock in the
stack traces in issue 1620.

Fixes #1620.
Fixes #1746.

R=iant, r
CC=golang-dev
https://golang.org/cl/4437075

											
										
										
											2011-04-27 21:21:12 -06:00
+								// make g->sched refer to the caller's stack segment, because
 								// entersyscall is going to return immediately after.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+								void
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								·entersyscall(int32 dummy)
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								{
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									// Disable preemption because during this function g is in Gsyscall status,
 									// but can have inconsistent g->sched, do not let GC observe it.
 									m->locks++;
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									// Leave SP around for GC and traceback.
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+									save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									g->syscallsp = g->sched.sp;
 									g->syscallpc = g->sched.pc;
 									g->syscallstack = g->stackbase;
 									g->syscallguard = g->stackguard;
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+									g->status = Gsyscall;
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+										// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+										//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+										runtime·throw("entersyscall");
 									}
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·lock(&runtime·sched);
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+										if(runtime·atomicload(&runtime·sched.sysmonwait)) {
 											runtime·atomicstore(&runtime·sched.sysmonwait, 0);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											runtime·notewakeup(&runtime·sched.sysmonnote);
 										}
 										runtime·unlock(&runtime·sched);
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+										save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									m->mcache = nil;
 									m->p->m = nil;
 									runtime·atomicstore(&m->p->status, Psyscall);
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+									if(runtime·sched.gcwaiting) {
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·lock(&runtime·sched);
 										if (runtime·sched.stopwait > 0 && runtime·cas(&m->p->status, Psyscall, Pgcstop)) {
 											if(--runtime·sched.stopwait == 0)
 												runtime·notewakeup(&runtime·sched.stopnote);
 										}
 										runtime·unlock(&runtime·sched);
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+										save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									}
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
 									// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
 									// We set stackguard to StackPreempt so that first split stack check calls morestack.
 									// Morestack detects this case and throws.
 									g->stackguard0 = StackPreempt;
 									m->locks--;
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+								}
 								// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+								void
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								·entersyscallblock(int32 dummy)
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
+								{
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									P *p;
-												runtime: introduce entersyscallblock()
In preparation for the new scheduler.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7386044

											
										
										
											2013-02-20 09:21:45 -07:00
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									m->locks++;  // see comment in entersyscall
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									// Leave SP around for GC and traceback.
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+									save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									g->syscallsp = g->sched.sp;
 									g->syscallpc = g->sched.pc;
 									g->syscallstack = g->stackbase;
 									g->syscallguard = g->stackguard;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
+									g->status = Gsyscall;
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
 										// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
 										//	g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·throw("entersyscallblock");
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
+									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									p = releasep();
 									handoffp(p);
-												runtime: fix deadlock detector false negative
The issue was that scvg is assigned *after* the scavenger goroutine is started,
so when the scavenger calls entersyscall() the g==scvg check can fail.
Fixes #5025.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7629045

											
										
										
											2013-03-12 07:21:44 -06:00
+									if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+										incidlelocked(1);
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
 									// Resave for traceback during blocked call.
 									save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
 									g->stackguard0 = StackPreempt;  // see comment in entersyscall
 									m->locks--;
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+								}
 								// The goroutine g exited its system call.
 								// Arrange for it to run on a cpu again.
 								// This is called only from the go syscall library, not
 								// from the low-level system calls used by the runtime.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·exitsyscall(void)
-												change meaning of $GOMAXPROCS to number of cpus to use,
not number of threads.  can still starve all the other threads,
but only by looping, not by waiting in a system call.

fix darwin syscall.Syscall6 bug.

fix chanclient bug.

delete $GOMAXPROCS from network tests.

add stripped down printf, sys.printhex to runtime.

R=r
DELTA=355  (217 added, 36 deleted, 102 changed)
OCL=20017
CL=20019

											
										
										
											2008-11-25 17:48:10 -07:00
+								{
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									m->locks++;  // see comment in entersyscall
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+										incidlelocked(-1);
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
-												runtime: output how long goroutines are blocked
Example of output:

goroutine 4 [sleep for 3 min]:
time.Sleep(0x34630b8a000)
        src/pkg/runtime/time.goc:31 +0x31
main.func·002()
        block.go:16 +0x2c
created by main.main
        block.go:17 +0x33

Full program and output are here:
http://play.golang.org/p/NEZdADI3Td

Fixes #6809.

R=golang-codereviews, khr, kamil.kisiel, bradfitz, rsc
CC=golang-codereviews
https://golang.org/cl/50420043

											
										
										
											2014-01-16 01:54:46 -07:00
+									g->waitsince = 0;
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									if(exitsyscallfast()) {
-												runtime: faster entersyscall/exitsyscall

Replace cas with xadd in scheduler.
Suggested by Dmitriy in last code review.
Verified with Promela model.

When there's actual contention for the atomic word,
this avoids the looping that compare-and-swap requires.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               32           26  -17.08%
runtime_test.BenchmarkSyscall-2            155           59  -61.81%
runtime_test.BenchmarkSyscall-3            112           52  -52.95%
runtime_test.BenchmarkSyscall-4             94           48  -48.57%
runtime_test.BenchmarkSyscallWork          871          872   +0.11%
runtime_test.BenchmarkSyscallWork-2        481          477   -0.83%
runtime_test.BenchmarkSyscallWork-3        338          335   -0.89%
runtime_test.BenchmarkSyscallWork-4        263          256   -2.66%

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/4800047

											
										
										
											2011-07-23 10:22:55 -06:00
+										// There's a cpu for us, so we can run.
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+										m->p->syscalltick++;
-												runtime: faster entersyscall/exitsyscall

Replace cas with xadd in scheduler.
Suggested by Dmitriy in last code review.
Verified with Promela model.

When there's actual contention for the atomic word,
this avoids the looping that compare-and-swap requires.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               32           26  -17.08%
runtime_test.BenchmarkSyscall-2            155           59  -61.81%
runtime_test.BenchmarkSyscall-3            112           52  -52.95%
runtime_test.BenchmarkSyscall-4             94           48  -48.57%
runtime_test.BenchmarkSyscallWork          871          872   +0.11%
runtime_test.BenchmarkSyscallWork-2        481          477   -0.83%
runtime_test.BenchmarkSyscallWork-3        338          335   -0.89%
runtime_test.BenchmarkSyscallWork-4        263          256   -2.66%

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/4800047

											
										
										
											2011-07-23 10:22:55 -06:00
+										g->status = Grunning;
 										// Garbage collector isn't running (since we are),
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										// so okay to clear gcstack and gcsp.
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+										g->syscallstack = (uintptr)nil;
 										g->syscallsp = (uintptr)nil;
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+										m->locks--;
 										if(g->preempt) {
 											// restore the preemption request in case we've cleared it in newstack
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+											g->stackguard0 = StackPreempt;
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+										} else {
 											// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
 											g->stackguard0 = g->stackguard;
 										}
-												runtime: faster entersyscall/exitsyscall

Replace cas with xadd in scheduler.
Suggested by Dmitriy in last code review.
Verified with Promela model.

When there's actual contention for the atomic word,
this avoids the looping that compare-and-swap requires.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               32           26  -17.08%
runtime_test.BenchmarkSyscall-2            155           59  -61.81%
runtime_test.BenchmarkSyscall-3            112           52  -52.95%
runtime_test.BenchmarkSyscall-4             94           48  -48.57%
runtime_test.BenchmarkSyscallWork          871          872   +0.11%
runtime_test.BenchmarkSyscallWork-2        481          477   -0.83%
runtime_test.BenchmarkSyscallWork-3        338          335   -0.89%
runtime_test.BenchmarkSyscallWork-4        263          256   -2.66%

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/4800047

											
										
										
											2011-07-23 10:22:55 -06:00
+										return;
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+									}
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									m->locks--;
-												kill trailing white space.
(apparently my first attempt didn't work.)

R=r
OCL=13888
CL=13888

											
										
										
											2008-08-05 15:21:42 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// Call the scheduler.
 									runtime·mcall(exitsyscall0);
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// Scheduler returned, so we're allowed to run now.
-												runtime: stack split + garbage collection bug

The g->sched.sp saved stack pointer and the
g->stackbase and g->stackguard stack bounds
can change even while "the world is stopped",
because a goroutine has to call functions (and
therefore might split its stack) when exiting a
system call to check whether the world is stopped
(and if so, wait until the world continues).

That means the garbage collector cannot access
those values safely (without a race) for goroutines
executing system calls.  Instead, save a consistent
triple in g->gcsp, g->gcstack, g->gcguard during
entersyscall and have the garbage collector refer
to those.

The old code was occasionally seeing (because of
the race) an sp and stk that did not correspond to
each other, so that stk - sp was not the number of
stack bytes following sp.  In that case, if sp < stk
then the call scanblock(sp, stk - sp) scanned too
many bytes (anything between the two pointers,
which pointed into different allocation blocks).
If sp > stk then stk - sp wrapped around.
On 32-bit, stk - sp is a uintptr (uint32) converted
to int64 in the call to scanblock, so a large (~4G)
but positive number.  Scanblock would try to scan
that many bytes and eventually fault accessing
unmapped memory.  On 64-bit, stk - sp is a uintptr (uint64)
promoted to int64 in the call to scanblock, so a negative
number.  Scanblock would not scan anything, possibly
causing in-use blocks to be freed.

In short, 32-bit platforms would have seen either
ineffective garbage collection or crashes during garbage
collection, while 64-bit platforms would have seen
either ineffective or incorrect garbage collection.
You can see the invalid arguments to scanblock in the
stack traces in issue 1620.

Fixes #1620.
Fixes #1746.

R=iant, r
CC=golang-dev
https://golang.org/cl/4437075

											
										
										
											2011-04-27 21:21:12 -06:00
+									// Delete the gcstack information that we left for
 									// the garbage collector during the system call.
 									// Must wait until now because until gosched returns
 									// we don't know for sure that the garbage collector
 									// is not running.
-												runtime: use gcpc/gcsp during traceback of goroutines in syscalls
gcpc/gcsp are used by GC in similar situation.
gcpc/gcsp are also more stable than gp->sched,
because gp->sched is mutated by entersyscall/exitsyscall
in morestack and mcall. So it has higher chances of being inconsistent.
Also, rename gcpc/gcsp to syscallpc/syscallsp.

This is the same as reverted change 12250043
with save marked as textflag 7.
The problem was that if save calls morestack,
then subsequent lessstack spoils g->sched.pc/sp.
And that bad values were remembered in g->syscallpc/sp.
Entersyscallblock had the same problem,
but it was never triggered to date.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12478043

											
										
										
											2013-08-06 03:38:44 -06:00
+									g->syscallstack = (uintptr)nil;
 									g->syscallsp = (uintptr)nil;
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+									m->p->syscalltick++;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								}
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+								static bool
 								exitsyscallfast(void)
 								{
 									P *p;
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+									// Freezetheworld sets stopwait but does not retake P's.
 									if(runtime·sched.stopwait) {
 										m->p = nil;
 										return false;
 									}
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+									// Try to re-acquire the last P.
 									if(m->p && m->p->status == Psyscall && runtime·cas(&m->p->status, Psyscall, Prunning)) {
 										// There's a cpu for us, so we can run.
 										m->mcache = m->p->mcache;
 										m->p->m = m;
 										return true;
 									}
 									// Try to get any other idle P.
 									m->p = nil;
 									if(runtime·sched.pidle) {
 										runtime·lock(&runtime·sched);
 										p = pidleget();
-												runtime: do not park sysmon thread if any goroutines are running
Sysmon thread parks if no goroutines are running (runtime.sched.npidle ==
runtime.gomaxprocs).
Currently it's unparked when a goroutine enters syscall, it was enough
to retake P's from blocking syscalls.
But it's not enough for reliable goroutine preemption. We need to ensure that
sysmon runs if any goroutines are running.

R=rsc
CC=golang-dev
https://golang.org/cl/12176043

											
										
										
											2013-07-31 10:09:03 -06:00
+										if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
 											runtime·atomicstore(&runtime·sched.sysmonwait, 0);
 											runtime·notewakeup(&runtime·sched.sysmonnote);
 										}
-												runtime: do not split stacks in syscall status
Split stack checks (morestack) corrupt g->sched,
but g->sched must be preserved consistent for GC/traceback.
The change implements runtime.notetsleepg function,
which does entersyscall/exitsyscall and is carefully arranged
to not call any split functions in between.

R=rsc
CC=golang-dev
https://golang.org/cl/11575044

											
										
										
											2013-07-29 12:22:34 -06:00
+										runtime·unlock(&runtime·sched);
 										if(p) {
 											acquirep(p);
 											return true;
 										}
 									}
 									return false;
 								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// runtime·exitsyscall slow path on g0.
 								// Failed to acquire P, enqueue gp as runnable.
 								static void
 								exitsyscall0(G *gp)
 								{
 									P *p;
 									gp->status = Grunnable;
 									gp->m = nil;
 									m->curg = nil;
 									runtime·lock(&runtime·sched);
 									p = pidleget();
 									if(p == nil)
 										globrunqput(gp);
-												runtime: do not park sysmon thread if any goroutines are running
Sysmon thread parks if no goroutines are running (runtime.sched.npidle ==
runtime.gomaxprocs).
Currently it's unparked when a goroutine enters syscall, it was enough
to retake P's from blocking syscalls.
But it's not enough for reliable goroutine preemption. We need to ensure that
sysmon runs if any goroutines are running.

R=rsc
CC=golang-dev
https://golang.org/cl/12176043

											
										
										
											2013-07-31 10:09:03 -06:00
+									else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
 										runtime·atomicstore(&runtime·sched.sysmonwait, 0);
 										runtime·notewakeup(&runtime·sched.sysmonnote);
 									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·unlock(&runtime·sched);
 									if(p) {
 										acquirep(p);
 										execute(gp);  // Never returns.
 									}
 									if(m->lockedg) {
 										// Wait until another thread schedules gp and so m again.
 										stoplockedm();
 										execute(gp);  // Never returns.
 									}
 									stopm();
 									schedule();  // Never returns.
-												* comment, clean up scheduler
* rewrite lock implementation to be correct
  (tip: never assume that an algorithm you found
  in a linux man page is correct.)
* delete unneeded void* arg from clone fn
* replace Rendez with Note
* comment mal better
* use 6c -w, fix warnings
* mark all assembly functions 7

R=r
DELTA=828  (338 added, 221 deleted, 269 changed)
OCL=13884
CL=13886

											
										
										
											2008-08-05 15:18:47 -06:00
+								}
-												syscall: disable cpu profiling around fork
Fixes #5517.
Fixes #5659.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12183044

											
										
										
											2013-08-13 03:01:30 -06:00
+								// Called from syscall package before fork.
 								void
 								syscall·runtime_BeforeFork(void)
 								{
 									// Fork can hang if preempted with signals frequently enough (see issue 5517).
 									// Ensure that we stay on the same M where we disable profiling.
 									m->locks++;
 									if(m->profilehz != 0)
 										runtime·resetcpuprofiler(0);
 								}
 								// Called from syscall package after fork in parent.
 								void
 								syscall·runtime_AfterFork(void)
 								{
 									int32 hz;
 									hz = runtime·sched.profilehz;
 									if(hz != 0)
 										runtime·resetcpuprofiler(hz);
 									m->locks--;
 								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Hook used by runtime·malg to call runtime·stackalloc on the
 								// scheduler stack.  This exists because runtime·stackalloc insists
 								// on being called on the scheduler stack, to avoid trying to grow
 								// the stack while allocating a new stack segment.
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+								static void
 								mstackalloc(G *gp)
 								{
 									gp->param = runtime·stackalloc((uintptr)gp->param);
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+									runtime·gogo(&gp->sched);
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Allocate a new g, with a stack big enough for stacksize bytes.
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+								G*
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·malg(int32 stacksize)
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+								{
-												runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045

											
										
										
											2011-02-23 13:51:20 -07:00
+									G *newg;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+									byte *stk;
-												runtime: do not handle signals before configuring handler

There was a small window during program initialization
where a signal could come in before the handling mechanisms
were set up to handle it.  Delay the signal-handler installation
until we're ready for the signals.

Fixes #3314.

R=golang-dev, dsymonds, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/5833049

											
										
										
											2012-03-15 20:17:54 -06:00
-l, 6l, 8l: fix stack split logic for stacks near default segment size

Fixes #3310.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5823051

											
										
										
											2012-03-15 13:22:30 -06:00
+									if(StackTop < sizeof(Stktop)) {
 										runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
 										runtime·throw("runtime: bad stack.h");
 									}
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
-												runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045

											
										
										
											2011-02-23 13:51:20 -07:00
+									newg = runtime·malloc(sizeof(G));
-												runtime: garbage collection + malloc performance
  * add bit tracking finalizer status, avoiding getfinalizer lookup
  * add ability to allocate uncleared memory

R=iant
CC=golang-dev
https://golang.org/cl/207044

											
										
										
											2010-02-10 01:00:12 -07:00
+									if(stacksize >= 0) {
-												runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045

											
										
										
											2011-02-23 13:51:20 -07:00
+										if(g == m->g0) {
 											// running on scheduler stack already.
 											stk = runtime·stackalloc(StackSystem + stacksize);
 										} else {
 											// have to call stackalloc on scheduler stack.
 											g->param = (void*)(StackSystem + stacksize);
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+											runtime·mcall(mstackalloc);
-												runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045

											
										
										
											2011-02-23 13:51:20 -07:00
+											stk = g->param;
 											g->param = nil;
 										}
-												runtime: fix goroutine stack accounting
Fixes #6166.
Fixes #6168.

R=golang-dev, bradfitz, remyoudompheng
CC=golang-dev
https://golang.org/cl/12927045

											
										
										
											2013-08-16 11:04:05 -06:00
+										newg->stacksize = StackSystem + stacksize;
-												runtime: update field types in preparation for GC changes

R=rsc, remyoudompheng, minux.ma, ality
CC=golang-dev
https://golang.org/cl/6242061

											
										
										
											2012-05-30 11:07:52 -06:00
+										newg->stack0 = (uintptr)stk;
 										newg->stackguard = (uintptr)stk + StackGuard;
-												runtime: add stackguard0 to G
This is part of preemptive scheduler.
stackguard0 is checked in split stack checks and can be set to StackPreempt.
stackguard is not set to StackPreempt (holds the original value).

R=golang-dev, daniel.morsing, iant
CC=golang-dev
https://golang.org/cl/9875043

											
										
										
											2013-06-03 02:28:24 -06:00
+										newg->stackguard0 = newg->stackguard;
-												runtime: update field types in preparation for GC changes

R=rsc, remyoudompheng, minux.ma, ality
CC=golang-dev
https://golang.org/cl/6242061

											
										
										
											2012-05-30 11:07:52 -06:00
+										newg->stackbase = (uintptr)stk + StackSystem + stacksize - sizeof(Stktop);
 										runtime·memclr((byte*)newg->stackbase, sizeof(Stktop));
-												runtime: garbage collection + malloc performance
  * add bit tracking finalizer status, avoiding getfinalizer lookup
  * add ability to allocate uncleared memory

R=iant
CC=golang-dev
https://golang.org/cl/207044

											
										
										
											2010-02-10 01:00:12 -07:00
+									}
-												runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045

											
										
										
											2011-02-23 13:51:20 -07:00
+									return newg;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Create a new g running fn with siz bytes of arguments.
 								// Put it on the queue of g's waiting to run.
 								// The compiler turns a go statement into a call to this.
 								// Cannot split the stack because it assumes that the arguments
 								// are available sequentially after &fn; they would not be
 								// copied if a stack split occurred.  It's OK for this to call
 								// functions that split the stack.
-												runtime: change textflags from numbers to symbols

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12798043

											
										
										
											2013-08-12 14:47:18 -06:00
+								#pragma textflag NOSPLIT
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+								void
-												cmd/gc, reflect, runtime: switch to indirect func value representation

Step 1 of http://golang.org/s/go11func.

R=golang-dev, r, daniel.morsing, remyoudompheng
CC=golang-dev
https://golang.org/cl/7393045

											
										
										
											2013-02-21 15:01:13 -07:00
+								runtime·newproc(int32 siz, FuncVal* fn, ...)
-												runtime: allow arbitrary return type in SetFinalizer.
	finalize chan, to free OS X semaphore inside Lock.
os: finalize File, to close fd.

Fixes #503.

R=ken2
CC=golang-dev
https://golang.org/cl/204065

											
										
										
											2010-02-08 22:41:54 -07:00
+								{
-												runtime: fix arm reflect.call boundary case

The fault was lucky: when it wasn't faulting it was silently
copying a word from some other block and later putting
that same word back.  If some other goroutine had changed
that word of memory in the interim, too bad.

The ARM code was inconsistent about whether the
"argument frame" included the saved LR.  Including it made
some things more regular but mostly just caused confusion
in the places where the regularity broke.  Now the rule
reflects reality: argp is always a pointer to arguments,
never a saved link register.

Renamed struct fields to make meaning clearer.

Running ARM in QEMU, package time's gotest:
  * before: 27/58 failed
  * after: 0/50

R=r, r2
CC=golang-dev
https://golang.org/cl/3993041

											
										
										
											2011-01-14 12:05:20 -07:00
+									byte *argp;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: fix arm reflect.call boundary case

The fault was lucky: when it wasn't faulting it was silently
copying a word from some other block and later putting
that same word back.  If some other goroutine had changed
that word of memory in the interim, too bad.

The ARM code was inconsistent about whether the
"argument frame" included the saved LR.  Including it made
some things more regular but mostly just caused confusion
in the places where the regularity broke.  Now the rule
reflects reality: argp is always a pointer to arguments,
never a saved link register.

Renamed struct fields to make meaning clearer.

Running ARM in QEMU, package time's gotest:
  * before: 27/58 failed
  * after: 0/50

R=r, r2
CC=golang-dev
https://golang.org/cl/3993041

											
										
										
											2011-01-14 12:05:20 -07:00
+									if(thechar == '5')
 										argp = (byte*)(&fn+2);  // skip caller's saved LR
 									else
 										argp = (byte*)(&fn+1);
-												runtime: record goroutine creation pc and display in traceback

package main

func main() {
        go func() { *(*int)(nil) = 0 }()
        select{}
}

panic: runtime error: invalid memory address or nil pointer dereference

[signal 0xb code=0x1 addr=0x0 pc=0x1c96]

runtime.panic+0xac /Users/rsc/g/go/src/pkg/runtime/proc.c:1083
        runtime.panic(0x11bf0, 0xf8400011f0)
runtime.panicstring+0xa3 /Users/rsc/g/go/src/pkg/runtime/runtime.c:116
        runtime.panicstring(0x29a57, 0x0)
runtime.sigpanic+0x144 /Users/rsc/g/go/src/pkg/runtime/darwin/thread.c:470
        runtime.sigpanic()
main._func_001+0x16 /Users/rsc/g/go/src/pkg/runtime/x.go:188
        main._func_001()
runtime.goexit /Users/rsc/g/go/src/pkg/runtime/proc.c:150
        runtime.goexit()
----- goroutine created by -----
main.main+0x3d /Users/rsc/g/go/src/pkg/runtime/x.go:4

goroutine 1 [4]:
runtime.gosched+0x77 /Users/rsc/g/go/src/pkg/runtime/proc.c:598
        runtime.gosched()
runtime.block+0x27 /Users/rsc/g/go/src/pkg/runtime/chan.c:680
        runtime.block()
main.main+0x44 /Users/rsc/g/go/src/pkg/runtime/x.go:5
        main.main()
runtime.mainstart+0xf /Users/rsc/g/go/src/pkg/runtime/amd64/asm.s:77
        runtime.mainstart()
runtime.goexit /Users/rsc/g/go/src/pkg/runtime/proc.c:150
        runtime.goexit()
----- goroutine created by -----
_rt0_amd64+0x8e /Users/rsc/g/go/src/pkg/runtime/amd64/asm.s:64

Fixes #1563.

R=r
CC=golang-dev
https://golang.org/cl/4243046

											
										
										
											2011-03-02 11:42:02 -07:00
+									runtime·newproc1(fn, argp, siz, 0, runtime·getcallerpc(&siz));
-												runtime: allow arbitrary return type in SetFinalizer.
	finalize chan, to free OS X semaphore inside Lock.
os: finalize File, to close fd.

Fixes #503.

R=ken2
CC=golang-dev
https://golang.org/cl/204065

											
										
										
											2010-02-08 22:41:54 -07:00
+								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Create a new g running fn with narg bytes of arguments starting
 								// at argp and returning nret bytes of results.  callerpc is the
 								// address of the go statement that created this.  The new g is put
 								// on the queue of g's waiting to run.
-												runtime: run all finalizers in a single goroutine.
eliminate second pass of mark+sweep
by scanning finalizer table specially.

R=r
CC=golang-dev
https://golang.org/cl/782041

											
										
										
											2010-03-26 15:15:30 -06:00
+								G*
-												cmd/gc, reflect, runtime: switch to indirect func value representation

Step 1 of http://golang.org/s/go11func.

R=golang-dev, r, daniel.morsing, remyoudompheng
CC=golang-dev
https://golang.org/cl/7393045

											
										
										
											2013-02-21 15:01:13 -07:00
+								runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+								{
-												runtime: garbage collection + malloc performance
  * add bit tracking finalizer status, avoiding getfinalizer lookup
  * add ability to allocate uncleared memory

R=iant
CC=golang-dev
https://golang.org/cl/207044

											
										
										
											2010-02-10 01:00:12 -07:00
+									byte *sp;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+									G *newg;
-												runtime: allocate goroutine ids in batches
Helps reduce contention on sched.goidgen.

benchmark                               old ns/op    new ns/op    delta
BenchmarkCreateGoroutines-16                  259          237   -8.49%
BenchmarkCreateGoroutinesParallel-16          127           43  -66.06%

R=golang-codereviews, dave, bradfitz, khr
CC=golang-codereviews, rsc
https://golang.org/cl/46970043

											
										
										
											2014-01-21 23:34:36 -07:00
+									P *p;
-												runtime: allow arbitrary return type in SetFinalizer.
	finalize chan, to free OS X semaphore inside Lock.
os: finalize File, to close fd.

Fixes #503.

R=ken2
CC=golang-dev
https://golang.org/cl/204065

											
										
										
											2010-02-08 22:41:54 -07:00
+									int32 siz;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
-												runtime: fix newproc debugging print

R=golang-dev, remyoudompheng, r
CC=golang-dev
https://golang.org/cl/9249044

											
										
										
											2013-05-18 16:47:15 -06:00
+								//runtime·printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
-												runtime: disable preemption in several scheduler functions
Required for preemptive scheduler, see the comments for details.

R=golang-dev, khr, iant, khr
CC=golang-dev
https://golang.org/cl/9740051

											
										
										
											2013-06-03 04:40:38 -06:00
+									m->locks++;  // disable preemption because it can be holding p in a local var
-												runtime: allow arbitrary return type in SetFinalizer.
	finalize chan, to free OS X semaphore inside Lock.
os: finalize File, to close fd.

Fixes #503.

R=ken2
CC=golang-dev
https://golang.org/cl/204065

											
										
										
											2010-02-08 22:41:54 -07:00
+									siz = narg + nret;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+									siz = (siz+7) & ~7;
-												runtime: parallelize garbage collector mark + sweep

Running test/garbage/parser.out.

On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 	 1 cpu, no atomics
32.27u 0.58s 32.86r 	 1 cpu, atomic instructions
33.04u 0.83s 27.47r 	 2 cpu

On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 	 1 cpu, no atomics
34.87u 1.12s 29.60r 	 2 cpu
36.00u 1.87s 28.43r 	 3 cpu
36.46u 2.34s 27.10r 	 4 cpu
38.28u 3.85s 26.92r 	 5 cpu
37.72u 5.25s 26.73r	 6 cpu
39.63u 7.11s 26.95r	 7 cpu
39.67u 8.10s 26.68r	 8 cpu

On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 	 1 cpu, no atomics
43.98u 2.95s 38.69r 	 2 cpu

On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 	 1 cpu, no atomics
57.15u 4.72s 51.54r 	 2 cpu

The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.

R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082

											
										
										
											2011-09-30 07:40:01 -06:00
-												runtime: higher goroutine arg limit, clearer error

Fixes #591.

R=ken2
CC=golang-dev
https://golang.org/cl/4803054

											
										
										
											2011-07-27 10:41:46 -06:00
+									// We could instead create a secondary stack frame
 									// and make it look like goexit was on the original but
 									// the call to the actual goroutine function was split.
 									// Not worth it: this is almost always an error.
 									if(siz > StackMin - 1024)
 										runtime·throw("runtime.newproc: function arguments too large for new goroutine");
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
-												runtime: allocate goroutine ids in batches
Helps reduce contention on sched.goidgen.

benchmark                               old ns/op    new ns/op    delta
BenchmarkCreateGoroutines-16                  259          237   -8.49%
BenchmarkCreateGoroutinesParallel-16          127           43  -66.06%

R=golang-codereviews, dave, bradfitz, khr
CC=golang-codereviews, rsc
https://golang.org/cl/46970043

											
										
										
											2014-01-21 23:34:36 -07:00
+									p = m->p;
 									if((newg = gfget(p)) != nil) {
-												runtime: make StackSystem part of StackGuard

Fixes #1779

R=rsc
CC=golang-dev
https://golang.org/cl/4543052

											
										
										
											2011-05-16 14:57:49 -06:00
+										if(newg->stackguard - StackGuard != newg->stack0)
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+											runtime·throw("invalid stack in newg");
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+									} else {
-												ld: detect stack overflow due to NOSPLIT

Fix problems found.

On amd64, various library routines had bigger
stack frames than expected, because large function
calls had been added.

runtime.assertI2T: nosplit stack overflow
        120	assumed on entry to runtime.assertI2T
        8	after runtime.assertI2T uses 112
        0	on entry to runtime.newTypeAssertionError
        -8	on entry to runtime.morestack01

runtime.assertE2E: nosplit stack overflow
        120	assumed on entry to runtime.assertE2E
        16	after runtime.assertE2E uses 104
        8	on entry to runtime.panic
        0	on entry to runtime.morestack16
        -8	after runtime.morestack16 uses 8

runtime.assertE2T: nosplit stack overflow
        120	assumed on entry to runtime.assertE2T
        16	after runtime.assertE2T uses 104
        8	on entry to runtime.panic
        0	on entry to runtime.morestack16
        -8	after runtime.morestack16 uses 8

runtime.newselect: nosplit stack overflow
        120	assumed on entry to runtime.newselect
        56	after runtime.newselect uses 64
        48	on entry to runtime.printf
        8	after runtime.printf uses 40
        0	on entry to vprintf
        -8	on entry to runtime.morestack16

runtime.selectdefault: nosplit stack overflow
        120	assumed on entry to runtime.selectdefault
        56	after runtime.selectdefault uses 64
        48	on entry to runtime.printf
        8	after runtime.printf uses 40
        0	on entry to vprintf
        -8	on entry to runtime.morestack16

runtime.selectgo: nosplit stack overflow
        120	assumed on entry to runtime.selectgo
        0	after runtime.selectgo uses 120
        -8	on entry to runtime.gosched

On arm, 5c was tagging functions NOSPLIT that should
not have been, like the recursive function printpanics:

printpanics: nosplit stack overflow
        124	assumed on entry to printpanics
        112	after printpanics uses 12
        108	on entry to printpanics
        96	after printpanics uses 12
        92	on entry to printpanics
        80	after printpanics uses 12
        76	on entry to printpanics
        64	after printpanics uses 12
        60	on entry to printpanics
        48	after printpanics uses 12
        44	on entry to printpanics
        32	after printpanics uses 12
        28	on entry to printpanics
        16	after printpanics uses 12
        12	on entry to printpanics
        0	after printpanics uses 12
        -4	on entry to printpanics

R=r, r2
CC=golang-dev
https://golang.org/cl/4188061

											
										
										
											2011-02-22 15:40:40 -07:00
+										newg = runtime·malg(StackMin);
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+										allgadd(newg);
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+									}
-												runtime: update field types in preparation for GC changes

R=rsc, remyoudompheng, minux.ma, ality
CC=golang-dev
https://golang.org/cl/6242061

											
										
										
											2012-05-30 11:07:52 -06:00
+									sp = (byte*)newg->stackbase;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+									sp -= siz;
-												runtime: replace runtime.mcpy with runtime.memmove
faster string operations, and more

tested on linux/386

runtime_test.BenchmarkSliceToString                    642          532  -17.13%
runtime_test.BenchmarkStringToSlice                    636          528  -16.98%
runtime_test.BenchmarkConcatString                    1109          897  -19.12%

R=r, iant, rsc
CC=golang-dev
https://golang.org/cl/4674042

											
										
										
											2011-07-12 18:30:40 -06:00
+									runtime·memmove(sp, argp, narg);
-												runtime: fix arm reflect.call boundary case

The fault was lucky: when it wasn't faulting it was silently
copying a word from some other block and later putting
that same word back.  If some other goroutine had changed
that word of memory in the interim, too bad.

The ARM code was inconsistent about whether the
"argument frame" included the saved LR.  Including it made
some things more regular but mostly just caused confusion
in the places where the regularity broke.  Now the rule
reflects reality: argp is always a pointer to arguments,
never a saved link register.

Renamed struct fields to make meaning clearer.

Running ARM in QEMU, package time's gotest:
  * before: 27/58 failed
  * after: 0/50

R=r, r2
CC=golang-dev
https://golang.org/cl/3993041

											
										
										
											2011-01-14 12:05:20 -07:00
+									if(thechar == '5') {
 										// caller's LR
 										sp -= sizeof(void*);
 										*(void**)sp = nil;
 									}
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+									runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
-												runtime: update field types in preparation for GC changes

R=rsc, remyoudompheng, minux.ma, ality
CC=golang-dev
https://golang.org/cl/6242061

											
										
										
											2012-05-30 11:07:52 -06:00
+									newg->sched.sp = (uintptr)sp;
-												runtime: adjust traceback / garbage collector boundary

The garbage collection routine addframeroots is duplicating
logic in the traceback routine that calls it, sometimes correctly,
sometimes incorrectly, sometimes incompletely.
Pass necessary information to addframeroots instead of
deriving it anew.

Should make addframeroots significantly more robust.
It's certainly smaller.

Also try to standardize on uintptr for saved pc, sp values.

Will make CL 10036044 trivial.

R=golang-dev, dave, dvyukov
CC=golang-dev
https://golang.org/cl/10169045

											
										
										
											2013-06-12 06:49:38 -06:00
+									newg->sched.pc = (uintptr)runtime·goexit;
-												runtime: stack growth adjustments, cleanup
	* keep coherent SP/PC in gobuf
	  (i.e., SP that would be in use at that PC)
	* gogocall replaces setspgoto,
	  should work better in presence of link registers
	* delete unused system calls

only amd64; 386 is now broken

R=r
DELTA=548  (183 added, 183 deleted, 182 changed)
OCL=30381
CL=30442

											
										
										
											2009-06-17 16:12:16 -06:00
+									newg->sched.g = newg;
-												runtime: add lr, ctxt, ret to Gobuf

Add gostartcall and gostartcallfn.
The old gogocall = gostartcall + gogo.
The old gogocallfn = gostartcallfn + gogo.

R=dvyukov, minux.ma
CC=golang-dev
https://golang.org/cl/10036044

											
										
										
											2013-06-12 13:22:26 -06:00
+									runtime·gostartcallfn(&newg->sched, fn);
-												runtime: record goroutine creation pc and display in traceback

package main

func main() {
        go func() { *(*int)(nil) = 0 }()
        select{}
}

panic: runtime error: invalid memory address or nil pointer dereference

[signal 0xb code=0x1 addr=0x0 pc=0x1c96]

runtime.panic+0xac /Users/rsc/g/go/src/pkg/runtime/proc.c:1083
        runtime.panic(0x11bf0, 0xf8400011f0)
runtime.panicstring+0xa3 /Users/rsc/g/go/src/pkg/runtime/runtime.c:116
        runtime.panicstring(0x29a57, 0x0)
runtime.sigpanic+0x144 /Users/rsc/g/go/src/pkg/runtime/darwin/thread.c:470
        runtime.sigpanic()
main._func_001+0x16 /Users/rsc/g/go/src/pkg/runtime/x.go:188
        main._func_001()
runtime.goexit /Users/rsc/g/go/src/pkg/runtime/proc.c:150
        runtime.goexit()
----- goroutine created by -----
main.main+0x3d /Users/rsc/g/go/src/pkg/runtime/x.go:4

goroutine 1 [4]:
runtime.gosched+0x77 /Users/rsc/g/go/src/pkg/runtime/proc.c:598
        runtime.gosched()
runtime.block+0x27 /Users/rsc/g/go/src/pkg/runtime/chan.c:680
        runtime.block()
main.main+0x44 /Users/rsc/g/go/src/pkg/runtime/x.go:5
        main.main()
runtime.mainstart+0xf /Users/rsc/g/go/src/pkg/runtime/amd64/asm.s:77
        runtime.mainstart()
runtime.goexit /Users/rsc/g/go/src/pkg/runtime/proc.c:150
        runtime.goexit()
----- goroutine created by -----
_rt0_amd64+0x8e /Users/rsc/g/go/src/pkg/runtime/amd64/asm.s:64

Fixes #1563.

R=r
CC=golang-dev
https://golang.org/cl/4243046

											
										
										
											2011-03-02 11:42:02 -07:00
+									newg->gopc = (uintptr)callerpc;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									newg->status = Grunnable;
-												runtime: allocate goroutine ids in batches
Helps reduce contention on sched.goidgen.

benchmark                               old ns/op    new ns/op    delta
BenchmarkCreateGoroutines-16                  259          237   -8.49%
BenchmarkCreateGoroutinesParallel-16          127           43  -66.06%

R=golang-codereviews, dave, bradfitz, khr
CC=golang-codereviews, rsc
https://golang.org/cl/46970043

											
										
										
											2014-01-21 23:34:36 -07:00
+									if(p->goidcache == p->goidcacheend) {
 										p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
 										p->goidcacheend = p->goidcache + GoidCacheBatch;
 									}
 									newg->goid = p->goidcache++;
-												runtime, cmd/gc, cmd/ld: ignore method wrappers in recover

Bug #1:

Issue 5406 identified an interesting case:
        defer iface.M()
may end up calling a wrapper that copies an indirect receiver
from the iface value and then calls the real M method. That's
two calls down, not just one, and so recover() == nil always
in the real M method, even during a panic.

[For the purposes of this entire discussion, a wrapper's
implementation is a function containing an ordinary call, not
the optimized tail call form that is somtimes possible. The
tail call does not create a second frame, so it is already
handled correctly.]

Fix this bug by introducing g->panicwrap, which counts the
number of bytes on current stack segment that are due to
wrapper calls that should not count against the recover
check. All wrapper functions must now adjust g->panicwrap up
on entry and back down on exit. This adds slightly to their
expense; on the x86 it is a single instruction at entry and
exit; on the ARM it is three. However, the alternative is to
make a call to recover depend on being able to walk the stack,
which I very much want to avoid. We have enough problems
walking the stack for garbage collection and profiling.
Also, if performance is critical in a specific case, it is already
faster to use a pointer receiver and avoid this kind of wrapper
entirely.

Bug #2:

The old code, which did not consider the possibility of two
calls, already contained a check to see if the call had split
its stack and so the panic-created segment was one behind the
current segment. In the wrapper case, both of the two calls
might split their stacks, so the panic-created segment can be
two behind the current segment.

Fix this by propagating the Stktop.panic flag forward during
stack splits instead of looking backward during recover.

Fixes #5406.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/13367052

											
										
										
											2013-09-12 12:00:16 -06:00
+									newg->panicwrap = 0;
-												runtime/race: switch to explicit race context instead of goroutine id's
Removes limit on maximum number of goroutines ever existed.
code.google.com/p/goexecutor tests now pass successfully.
Also slightly improves performance.
Before: $ time ./flate.test -test.short
real	0m9.314s
After:  $ time ./flate.test -test.short
real	0m8.958s
Fixes #4286.
The runtime is built from llvm rev 174312.

R=rsc
CC=golang-dev
https://golang.org/cl/7218044

											
										
										
											2013-02-06 00:40:54 -07:00
+									if(raceenabled)
-												runtime: adjust traceback / garbage collector boundary

The garbage collection routine addframeroots is duplicating
logic in the traceback routine that calls it, sometimes correctly,
sometimes incorrectly, sometimes incompletely.
Pass necessary information to addframeroots instead of
deriving it anew.

Should make addframeroots significantly more robust.
It's certainly smaller.

Also try to standardize on uintptr for saved pc, sp values.

Will make CL 10036044 trivial.

R=golang-dev, dave, dvyukov
CC=golang-dev
https://golang.org/cl/10169045

											
										
										
											2013-06-12 06:49:38 -06:00
+										newg->racectx = runtime·racegostart((void*)callerpc);
-												runtime: allocate goroutine ids in batches
Helps reduce contention on sched.goidgen.

benchmark                               old ns/op    new ns/op    delta
BenchmarkCreateGoroutines-16                  259          237   -8.49%
BenchmarkCreateGoroutinesParallel-16          127           43  -66.06%

R=golang-codereviews, dave, bradfitz, khr
CC=golang-codereviews, rsc
https://golang.org/cl/46970043

											
										
										
											2014-01-21 23:34:36 -07:00
+									runqput(p, newg);
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										wakep();
-												runtime: disable preemption in several scheduler functions
Required for preemptive scheduler, see the comments for details.

R=golang-dev, khr, iant, khr
CC=golang-dev
https://golang.org/cl/9740051

											
										
										
											2013-06-03 04:40:38 -06:00
+									m->locks--;
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+									if(m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
 										g->stackguard0 = StackPreempt;
-												runtime: run all finalizers in a single goroutine.
eliminate second pass of mark+sweep
by scanning finalizer table specially.

R=r
CC=golang-dev
https://golang.org/cl/782041

											
										
										
											2010-03-26 15:15:30 -06:00
+									return newg;
-												fix runtime stack overflow bug that gri ran into:
160 - 75 was just barely not enough for deferproc + morestack.

added enum names and bumped to 256 - 128.
added explanation.

changed a few mal() (garbage-collected) to
malloc()/free() (manually collected).

R=ken
OCL=26981
CL=26981

											
										
										
											2009-04-01 01:26:00 -06:00
+								}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+								static void
 								allgadd(G *gp)
 								{
 									G **new;
 									uintptr cap;
 									runtime·lock(&allglock);
 									if(runtime·allglen >= allgcap) {
 										cap = 4096/sizeof(new[0]);
 										if(cap < 2*allgcap)
 											cap = 2*allgcap;
 										new = runtime·malloc(cap*sizeof(new[0]));
 										if(new == nil)
 											runtime·throw("runtime: cannot allocate memory");
 										if(runtime·allg != nil) {
 											runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
 											runtime·free(runtime·allg);
 										}
 										runtime·allg = new;
 										allgcap = cap;
 									}
 									runtime·allg[runtime·allglen++] = gp;
 									runtime·unlock(&allglock);
 								}
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								// Put on gfree list.
 								// If local list is too long, transfer a batch to the global list.
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
+								static void
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								gfput(P *p, G *gp)
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
+								{
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									if(gp->stackguard - StackGuard != gp->stack0)
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+										runtime·throw("invalid stack in gfput");
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+									gp->schedlink = p->gfree;
 									p->gfree = gp;
 									p->gfreecnt++;
 									if(p->gfreecnt >= 64) {
 										runtime·lock(&runtime·sched.gflock);
 										while(p->gfreecnt >= 32) {
 											p->gfreecnt--;
 											gp = p->gfree;
 											p->gfree = gp->schedlink;
 											gp->schedlink = runtime·sched.gfree;
 											runtime·sched.gfree = gp;
 										}
 										runtime·unlock(&runtime·sched.gflock);
 									}
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
+								}
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								// Get from gfree list.
 								// If local list is empty, grab a batch from global list.
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
+								static G*
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								gfget(P *p)
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
+								{
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									G *gp;
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								retry:
 									gp = p->gfree;
 									if(gp == nil && runtime·sched.gfree) {
 										runtime·lock(&runtime·sched.gflock);
 										while(p->gfreecnt < 32 && runtime·sched.gfree) {
 											p->gfreecnt++;
 											gp = runtime·sched.gfree;
 											runtime·sched.gfree = gp->schedlink;
 											gp->schedlink = p->gfree;
 											p->gfree = gp;
 										}
 										runtime·unlock(&runtime·sched.gflock);
 										goto retry;
 									}
 									if(gp) {
 										p->gfree = gp->schedlink;
 										p->gfreecnt--;
 									}
-												runtime: refactor proc.c
1. Rename 'g' and 'm' local vars to 'gp' and 'mp' (convention already used in some functions)
'g' and 'm' are global vars that mean current goroutine and current machine,
when they are shadowed by local vars, it's confusing, no ability to debug log both, etc.
2. White-space shuffling.
No semantic changes.
In preparation to bigger changes.

R=golang-dev, dave
CC=golang-dev
https://golang.org/cl/6355061

											
										
										
											2012-07-03 02:54:13 -06:00
+									return gp;
-												runtime: run deferred calls at Goexit

baby step toward panic+recover.

Fixes #349.

R=r
CC=golang-dev
https://golang.org/cl/825043

											
										
										
											2010-03-29 22:48:22 -06:00
+								}
-												add stub routines stackalloc() and stackfree().
run oldstack on g0's stack, just like newstack does,
so that oldstack can free the old stack.

R=r
DELTA=53  (44 added, 0 deleted, 9 changed)
OCL=20404
CL=20433

											
										
										
											2008-12-04 09:30:54 -07:00
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								// Purge all cached G's from gfree list to the global list.
 								static void
 								gfpurge(P *p)
 								{
 									G *gp;
 									runtime·lock(&runtime·sched.gflock);
 									while(p->gfreecnt) {
 										p->gfreecnt--;
 										gp = p->gfree;
 										p->gfree = gp->schedlink;
 										gp->schedlink = runtime·sched.gfree;
 										runtime·sched.gfree = gp;
 									}
 									runtime·unlock(&runtime·sched.gflock);
 								}
-												move things out of sys into os and runtime

R=r
OCL=28569
CL=28573

											
										
										
											2009-05-08 16:21:41 -06:00
+								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·Breakpoint(void)
-												move things out of sys into os and runtime

R=r
OCL=28569
CL=28573

											
										
										
											2009-05-08 16:21:41 -06:00
+								{
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+									runtime·breakpoint();
-												move things out of sys into os and runtime

R=r
OCL=28569
CL=28573

											
										
										
											2009-05-08 16:21:41 -06:00
+								}
 								void
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·Gosched(void)
-												move things out of sys into os and runtime

R=r
OCL=28569
CL=28573

											
										
										
											2009-05-08 16:21:41 -06:00
+								{
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+									runtime·gosched();
-												move things out of sys into os and runtime

R=r
OCL=28569
CL=28573

											
										
										
											2009-05-08 16:21:41 -06:00
+								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Implementation of runtime.GOMAXPROCS.
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// delete when scheduler is even stronger
-												runtime.GOMAXPROCS: hack it to have it return the old value.

R=rsc
CC=golang-dev
https://golang.org/cl/1140041

											
										
										
											2010-05-06 12:50:47 -06:00
+								int32
-												runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost

Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries.  The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.

The symbols left alone are:

	** known to cgo **
	_cgo_free
	_cgo_malloc
	libcgo_thread_start
	initcgo
	ncgocall

	** known to linker **
	_rt0_$GOARCH
	_rt0_$GOARCH_$GOOS
	text
	etext
	data
	end
	pclntab
	epclntab
	symtab
	esymtab

	** known to C compiler **
	_divv
	_modv
	_div64by32
	etc (arch specific)

Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.

Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.

R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041

											
										
										
											2010-11-04 12:00:19 -06:00
+								runtime·gomaxprocsfunc(int32 n)
-												add runtime.GOMAXPROCS, allowing a program to, in effect, set $GOMAXPROCS

R=rsc
DELTA=29  (28 added, 1 deleted, 0 changed)
OCL=32829
CL=32837

											
										
										
											2009-08-06 14:07:05 -06:00
+								{
-												runtime.GOMAXPROCS: hack it to have it return the old value.

R=rsc
CC=golang-dev
https://golang.org/cl/1140041

											
										
										
											2010-05-06 12:50:47 -06:00
+									int32 ret;
-												add runtime.GOMAXPROCS, allowing a program to, in effect, set $GOMAXPROCS

R=rsc
DELTA=29  (28 added, 1 deleted, 0 changed)
OCL=32829
CL=32837

											
										
										
											2009-08-06 14:07:05 -06:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(n > MaxGomaxprocs)
 										n = MaxGomaxprocs;
 									runtime·lock(&runtime·sched);
-												runtime: minor cleanup

implement runtime.casp on amd64.
keep simultaneous panic messages separate.

R=r
CC=golang-dev
https://golang.org/cl/4188053

											
										
										
											2011-02-16 11:21:13 -07:00
+									ret = runtime·gomaxprocs;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(n <= 0 || n == ret) {
 										runtime·unlock(&runtime·sched);
-												runtime: fix GOMAXPROCS vs garbage collection bug

Fixes #1715.

R=golang-dev, rsc1, rsc
CC=golang-dev
https://golang.org/cl/4434053

											
										
										
											2011-04-21 10:09:25 -06:00
+										return ret;
 									}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									runtime·unlock(&runtime·sched);
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												net: add special netFD mutex
The mutex, fdMutex, handles locking and lifetime of sysfd,
and serializes Read and Write methods.
This allows to strip 2 sync.Mutex.Lock calls,
2 sync.Mutex.Unlock calls, 1 defer and some amount
of misc overhead from every network operation.

On linux/amd64, Intel E5-2690:
benchmark                             old ns/op    new ns/op    delta
BenchmarkTCP4Persistent                    9595         9454   -1.47%
BenchmarkTCP4Persistent-2                  8978         8772   -2.29%
BenchmarkTCP4ConcurrentReadWrite           4900         4625   -5.61%
BenchmarkTCP4ConcurrentReadWrite-2         2603         2500   -3.96%

In general it strips 70-500 ns from every network operation depending
on processor model. On my relatively new E5-2690 it accounts to ~5%
of network op cost.

Fixes #6074.

R=golang-dev, bradfitz, alex.brainman, iant, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/12418043

											
										
										
											2013-08-09 11:43:00 -06:00
+									runtime·semacquire(&runtime·worldsema, false);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									m->gcing = 1;
 									runtime·stoptheworld();
 									newprocs = n;
 									m->gcing = 0;
 									runtime·semrelease(&runtime·worldsema);
 									runtime·starttheworld();
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime.GOMAXPROCS: hack it to have it return the old value.

R=rsc
CC=golang-dev
https://golang.org/cl/1140041

											
										
										
											2010-05-06 12:50:47 -06:00
+									return ret;
-												add runtime.GOMAXPROCS, allowing a program to, in effect, set $GOMAXPROCS

R=rsc
DELTA=29  (28 added, 1 deleted, 0 changed)
OCL=32829
CL=32837

											
										
										
											2009-08-06 14:07:05 -06:00
+								}
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+								// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
 								// after they modify m->locked. Do not allow preemption during this call,
 								// or else the m might be different in this function than in the caller.
 								#pragma textflag NOSPLIT
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+								static void
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+								lockOSThread(void)
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+								{
 									m->lockedg = g;
 									g->lockedm = m;
 								}
-												add LockOSThread and UnlockOSThread to
runtime package for use by debugger,
which needs to make sure that all ptrace calls
about a given pid come from the same thread.

R=r
DELTA=175  (90 added, 63 deleted, 22 changed)
OCL=31546
CL=31558

											
										
										
											2009-07-13 18:28:39 -06:00
+								void
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+								runtime·LockOSThread(void)
 								{
 									m->locked |= LockExternal;
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+									lockOSThread();
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+								}
 								void
 								runtime·lockOSThread(void)
 								{
 									m->locked += LockInternal;
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+									lockOSThread();
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+								}
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
 								// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
 								// after they update m->locked. Do not allow preemption during this call,
 								// or else the m might be in different in this function than in the caller.
 								#pragma textflag NOSPLIT
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+								static void
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+								unlockOSThread(void)
-												add LockOSThread and UnlockOSThread to
runtime package for use by debugger,
which needs to make sure that all ptrace calls
about a given pid come from the same thread.

R=r
DELTA=175  (90 added, 63 deleted, 22 changed)
OCL=31546
CL=31558

											
										
										
											2009-07-13 18:28:39 -06:00
+								{
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+									if(m->locked != 0)
-												runtime: lock the main goroutine to the main OS thread during init

We only guarantee that the main goroutine runs on the
main OS thread for initialization.  Programs that wish to
preserve that property for main.main can call runtime.LockOSThread.
This is what programs used to do before we unleashed
goroutines during init, so it is both a simple fix and keeps
existing programs working.

R=iant, r, dave, dvyukov
CC=golang-dev
https://golang.org/cl/5309070

											
										
										
											2011-10-27 19:04:12 -06:00
+										return;
-												add LockOSThread and UnlockOSThread to
runtime package for use by debugger,
which needs to make sure that all ptrace calls
about a given pid come from the same thread.

R=r
DELTA=175  (90 added, 63 deleted, 22 changed)
OCL=31546
CL=31558

											
										
										
											2009-07-13 18:28:39 -06:00
+									m->lockedg = nil;
 									g->lockedm = nil;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+								}
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
 								void
 								runtime·UnlockOSThread(void)
 								{
 									m->locked &= ~LockExternal;
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+									unlockOSThread();
-												runtime: cgo-related fixes

* Separate internal and external LockOSThread, for cgo safety.
* Show goroutine that made faulting cgo call.
* Never start a panic due to a signal caused by a cgo call.

Fixes #3774.
Fixes #3775.
Fixes #3797.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7228081

											
										
										
											2013-02-01 09:34:41 -07:00
+								}
 								void
 								runtime·unlockOSThread(void)
 								{
 									if(m->locked < LockInternal)
 										runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
 									m->locked -= LockInternal;
-												runtime: fix LockOSThread
Fixes #6100.

R=golang-dev, dave, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/12703045

											
										
										
											2013-08-13 12:37:04 -06:00
+									unlockOSThread();
-												add LockOSThread and UnlockOSThread to
runtime package for use by debugger,
which needs to make sure that all ptrace calls
about a given pid come from the same thread.

R=r
DELTA=175  (90 added, 63 deleted, 22 changed)
OCL=31546
CL=31558

											
										
										
											2009-07-13 18:28:39 -06:00
+								}
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+								bool
 								runtime·lockedOSThread(void)
 								{
 									return g->lockedm != nil && m->lockedg != nil;
 								}
-												runtime: add windows callback tests

Just a copy of cgo callback tests from misc/cgo/test.

R=rsc
CC=golang-dev, hectorchu
https://golang.org/cl/5331062

											
										
										
											2011-11-07 22:53:31 -07:00
+								// for testing of callbacks
 								void
 								runtime·golockedOSThread(bool ret)
 								{
 									ret = runtime·lockedOSThread();
 									FLUSH(&ret);
 								}
-												runtime: add Goroutines

R=rsc
CC=golang-dev
https://golang.org/cl/3508041

											
										
										
											2010-12-07 16:06:31 -07:00
+								void
-												runtime: prepare for 64-bit ints

This CL makes the runtime understand that the type of
the len or cap of a map, slice, or string is 'int', not 'int32',
and it is also careful to distinguish between function arguments
and results of type 'int' vs type 'int32'.

In the runtime, the new typedefs 'intgo' and 'uintgo' refer
to Go int and uint. The C types int and uint continue to be
unavailable (cause intentional compile errors).

This CL does not change the meaning of int, but it should make
the eventual change of the meaning of int on amd64 a bit
smoother.

Update #2188.

R=iant, r, dave, remyoudompheng
CC=golang-dev
https://golang.org/cl/6551067

											
										
										
											2012-09-24 12:58:34 -06:00
+								runtime·NumGoroutine(intgo ret)
-												runtime: add Goroutines

R=rsc
CC=golang-dev
https://golang.org/cl/3508041

											
										
										
											2010-12-07 16:06:31 -07:00
+								{
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									ret = runtime·gcount();
-												runtime: add Goroutines

R=rsc
CC=golang-dev
https://golang.org/cl/3508041

											
										
										
											2010-12-07 16:06:31 -07:00
+									FLUSH(&ret);
 								}
-												runtime: simpler heap map, memory allocation

The old heap maps used a multilevel table, but that
was overkill: there are only 1M entries on a 32-bit
machine and we can arrange to use a dense address
range on a 64-bit machine.

The heap map is in bss.  The assumption is that if
we don't touch the pages they won't be mapped in.

Also moved some duplicated memory allocation
code out of the OS-specific files.

R=r
CC=golang-dev
https://golang.org/cl/4118042

											
										
										
											2011-01-28 13:03:26 -07:00
-												runtime: goroutine profile, stack dumps

R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/5687076

											
										
										
											2012-02-22 19:45:01 -07:00
+								int32
 								runtime·gcount(void)
 								{
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									G *gp;
 									int32 n, s;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									uintptr i;
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
 									n = 0;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·lock(&allglock);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// TODO(dvyukov): runtime.NumGoroutine() is O(N).
 									// We do not want to increment/decrement centralized counter in newproc/goexit,
 									// just to make runtime.NumGoroutine() faster.
 									// Compromise solution is to introduce per-P counters of active goroutines.
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									for(i = 0; i < runtime·allglen; i++) {
 										gp = runtime·allg[i];
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+										s = gp->status;
 										if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
 											n++;
 									}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·unlock(&allglock);
-												runtime: minor changes
to minimize diffs of new scheduler

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7381048

											
										
										
											2013-02-22 21:39:31 -07:00
+									return n;
-												runtime: goroutine profile, stack dumps

R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/5687076

											
										
										
											2012-02-22 19:45:01 -07:00
+								}
-												runtime: simpler heap map, memory allocation

The old heap maps used a multilevel table, but that
was overkill: there are only 1M entries on a 32-bit
machine and we can arrange to use a dense address
range on a 64-bit machine.

The heap map is in bss.  The assumption is that if
we don't touch the pages they won't be mapped in.

Also moved some duplicated memory allocation
code out of the OS-specific files.

R=r
CC=golang-dev
https://golang.org/cl/4118042

											
										
										
											2011-01-28 13:03:26 -07:00
+								int32
 								runtime·mcount(void)
 								{
 									return runtime·sched.mcount;
 								}
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
 								void
-												runtime: jump to badmcall instead of calling it.
This replaces the mcall frame with the badmcall frame instead of
leaving the mcall frame on the stack and adding the badmcall frame.
Because mcall is no longer on the stack, traceback will now report what
called mcall, which is what we would like to see in this situation.

R=golang-dev, cshapiro
CC=golang-dev
https://golang.org/cl/13012044

											
										
										
											2013-08-29 16:53:34 -06:00
+								runtime·badmcall(void (*fn)(G*))  // called from assembly
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+								{
-												runtime: jump to badmcall instead of calling it.
This replaces the mcall frame with the badmcall frame instead of
leaving the mcall frame on the stack and adding the badmcall frame.
Because mcall is no longer on the stack, traceback will now report what
called mcall, which is what we would like to see in this situation.

R=golang-dev, cshapiro
CC=golang-dev
https://golang.org/cl/13012044

											
										
										
											2013-08-29 16:53:34 -06:00
+									USED(fn); // TODO: print fn?
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+									runtime·throw("runtime: mcall called on m->g0 stack");
 								}
 								void
-												runtime: jump to badmcall instead of calling it.
This replaces the mcall frame with the badmcall frame instead of
leaving the mcall frame on the stack and adding the badmcall frame.
Because mcall is no longer on the stack, traceback will now report what
called mcall, which is what we would like to see in this situation.

R=golang-dev, cshapiro
CC=golang-dev
https://golang.org/cl/13012044

											
										
										
											2013-08-29 16:53:34 -06:00
+								runtime·badmcall2(void (*fn)(G*))  // called from assembly
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+								{
-												runtime: jump to badmcall instead of calling it.
This replaces the mcall frame with the badmcall frame instead of
leaving the mcall frame on the stack and adding the badmcall frame.
Because mcall is no longer on the stack, traceback will now report what
called mcall, which is what we would like to see in this situation.

R=golang-dev, cshapiro
CC=golang-dev
https://golang.org/cl/13012044

											
										
										
											2013-08-29 16:53:34 -06:00
+									USED(fn);
-												runtime: scheduler, cgo reorganization

* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).

Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).

The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack.  Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler.  If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless.  Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.

* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.

Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.

The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).

The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc.  Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.

Fixes #1560.

R=iant
CC=golang-dev
https://golang.org/cl/4253054

											
										
										
											2011-03-07 08:37:42 -07:00
+									runtime·throw("runtime: mcall function returned");
 								}
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
-												runtime: reimplement reflect.call to not use stack splitting.

R=golang-dev, r, khr, rsc
CC=golang-dev
https://golang.org/cl/12053043

											
										
										
											2013-08-02 14:03:14 -06:00
+								void
 								runtime·badreflectcall(void) // called from assembly
 								{
 									runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
 								}
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+								static struct {
 									Lock;
 									void (*fn)(uintptr*, int32);
 									int32 hz;
 									uintptr pcbuf[100];
 								} prof;
-												runtime: do no lose CPU profiling signals
Currently we lose lots of profiling signals.
Most notably, GC is not accounted at all.
But stack splits, scheduler, syscalls, etc are lost as well.
This creates seriously misleading profile.
With this change all profiling signals are accounted.
Now I see these additional entries that were previously absent:
161  29.7%  29.7%      164  30.3% syscall.Syscall
 12   2.2%  50.9%       12   2.2% scanblock
 11   2.0%  55.0%       11   2.0% markonly
 10   1.8%  58.9%       10   1.8% sweepspan
  2   0.4%  85.8%        2   0.4% runtime.newstack
It is still impossible to understand what causes stack splits,
but at least it's clear how many time is spent on them.
Update #2197.
Update #5659.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12179043

											
										
										
											2013-08-13 12:12:02 -06:00
+								static void
 								System(void)
 								{
 								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Called if we receive a SIGPROF signal.
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+								void
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+								runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+								{
 									int32 n;
-												runtime: do no lose CPU profiling signals
Currently we lose lots of profiling signals.
Most notably, GC is not accounted at all.
But stack splits, scheduler, syscalls, etc are lost as well.
This creates seriously misleading profile.
With this change all profiling signals are accounted.
Now I see these additional entries that were previously absent:
161  29.7%  29.7%      164  30.3% syscall.Syscall
 12   2.2%  50.9%       12   2.2% scanblock
 11   2.0%  55.0%       11   2.0% markonly
 10   1.8%  58.9%       10   1.8% sweepspan
  2   0.4%  85.8%        2   0.4% runtime.newstack
It is still impossible to understand what causes stack splits,
but at least it's clear how many time is spent on them.
Update #2197.
Update #5659.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12179043

											
										
										
											2013-08-13 12:12:02 -06:00
+									bool traceback;
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+									MCache *mcache;
 									// Do not use global m in this function, use mp instead.
 									// On windows one m is sending reports about all the g's, so m means a wrong thing.
 									byte m;
 									m = 0;
 									USED(m);
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												undo CL 12167043 / 475e11851fc1

Submitted with some unrelated changes that were not intended to go in.

««« original CL description
runtime: do not park sysmon thread if any goroutines are running
Sysmon thread parks if no goroutines are running (runtime.sched.npidle == runtime.gomaxprocs).
Currently it's unparked when a goroutine enters syscall, it was enough
to retake P's from blocking syscalls.
But it's not enough for reliable goroutine preemption. We need to ensure that
sysmon runs if any goroutines are running.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12167043
»»»

R=rsc
CC=golang-dev
https://golang.org/cl/12171044

											
										
										
											2013-07-31 10:03:05 -06:00
+									if(prof.fn == nil || prof.hz == 0)
 										return;
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
 									// Profiling runs concurrently with GC, so it must not allocate.
 									mcache = mp->mcache;
 									mp->mcache = nil;
-												runtime: avoid inconsistent goroutine state in profiler

Because profiling signals can arrive at any time, we must
handle the case where a profiling signal arrives halfway
through a goroutine switch. Luckily, although there is much
to think through, very little needs to change.

Fixes #6000.
Fixes #6015.

R=golang-dev, dvyukov
CC=golang-dev
https://golang.org/cl/13421048

											
										
										
											2013-09-13 12:19:23 -06:00
+									// Define that a "user g" is a user-created goroutine, and a "system g"
 									// is one that is m->g0 or m->gsignal. We've only made sure that we
 									// can unwind user g's, so exclude the system g's.
 									//
 									// It is not quite as easy as testing gp == m->curg (the current user g)
 									// because we might be interrupted for profiling halfway through a
 									// goroutine switch. The switch involves updating three (or four) values:
 									// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
 									// because once it gets updated the new g is running.
 									//
 									// When switching from a user g to a system g, LR is not considered live,
 									// so the update only affects g, SP, and PC. Since PC must be last, there
 									// the possible partial transitions in ordinary execution are (1) g alone is updated,
 									// (2) both g and SP are updated, and (3) SP alone is updated.
 									// If g is updated, we'll see a system g and not look closer.
 									// If SP alone is updated, we can detect the partial transition by checking
 									// whether the SP is within g's stack bounds. (We could also require that SP
 									// be changed only after g, but the stack bounds check is needed by other
 									// cases, so there is no need to impose an additional requirement.)
 									//
 									// There is one exceptional transition to a system g, not in ordinary execution.
 									// When a signal arrives, the operating system starts the signal handler running
 									// with an updated PC and SP. The g is updated last, at the beginning of the
 									// handler. There are two reasons this is okay. First, until g is updated the
 									// g and SP do not match, so the stack bounds check detects the partial transition.
 									// Second, signal handlers currently run with signals disabled, so a profiling
 									// signal cannot arrive during the handler.
 									//
 									// When switching from a system g to a user g, there are three possibilities.
 									//
 									// First, it may be that the g switch has no PC update, because the SP
 									// either corresponds to a user g throughout (as in runtime.asmcgocall)
 									// or because it has been arranged to look like a user g frame
 									// (as in runtime.cgocallback_gofunc). In this case, since the entire
 									// transition is a g+SP update, a partial transition updating just one of
 									// those will be detected by the stack bounds check.
 									//
 									// Second, when returning from a signal handler, the PC and SP updates
 									// are performed by the operating system in an atomic update, so the g
 									// update must be done before them. The stack bounds check detects
 									// the partial transition here, and (again) signal handlers run with signals
 									// disabled, so a profiling signal cannot arrive then anyway.
 									//
 									// Third, the common case: it may be that the switch updates g, SP, and PC
 									// separately, as in runtime.gogo.
 									//
 									// Because runtime.gogo is the only instance, we check whether the PC lies
 									// within that function, and if so, not ask for a traceback. This approach
 									// requires knowing the size of the runtime.gogo function, which we
 									// record in arch_*.h and check in runtime_test.go.
 									//
 									// There is another apparently viable approach, recorded here in case
 									// the "PC within runtime.gogo" check turns out not to be usable.
 									// It would be possible to delay the update of either g or SP until immediately
 									// before the PC update instruction. Then, because of the stack bounds check,
 									// the only problematic interrupt point is just before that PC update instruction,
 									// and the sigprof handler can detect that instruction and simulate stepping past
 									// it in order to reach a consistent state. On ARM, the update of g must be made
 									// in two places (in R10 and also in a TLS slot), so the delayed update would
 									// need to be the SP update. The sigprof handler must read the instruction at
 									// the current PC and if it was the known instruction (for example, JMP BX or
 									// MOV R2, PC), use that other register in place of the PC value.
 									// The biggest drawback to this solution is that it requires that we can tell
 									// whether it's safe to read from the memory pointed at by PC.
 									// In a correct program, we can test PC == nil and otherwise read,
 									// but if a profiling signal happens at the instant that a program executes
 									// a bad jump (before the program manages to handle the resulting fault)
 									// the profiling handler could fault trying to read nonexistent memory.
 									//
 									// To recap, there are no constraints on the assembly being used for the
 									// transition. We simply require that g and SP match and that the PC is not
 									// in runtime.gogo.
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+									traceback = true;
 									if(gp == nil || gp != mp->curg ||
-												runtime: fix CPU profiling on Windows

The test 'gp == m->curg' is not valid on Windows,
because the goroutine being profiled is not from the
current m.

TBR=golang-dev
CC=golang-dev
https://golang.org/cl/13718043

											
										
										
											2013-09-15 10:05:24 -06:00
+									   (uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
-												runtime: avoid inconsistent goroutine state in profiler

Because profiling signals can arrive at any time, we must
handle the case where a profiling signal arrives halfway
through a goroutine switch. Luckily, although there is much
to think through, very little needs to change.

Fixes #6000.
Fixes #6015.

R=golang-dev, dvyukov
CC=golang-dev
https://golang.org/cl/13421048

											
										
										
											2013-09-13 12:19:23 -06:00
+									   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
-												runtime: do no lose CPU profiling signals
Currently we lose lots of profiling signals.
Most notably, GC is not accounted at all.
But stack splits, scheduler, syscalls, etc are lost as well.
This creates seriously misleading profile.
With this change all profiling signals are accounted.
Now I see these additional entries that were previously absent:
161  29.7%  29.7%      164  30.3% syscall.Syscall
 12   2.2%  50.9%       12   2.2% scanblock
 11   2.0%  55.0%       11   2.0% markonly
 10   1.8%  58.9%       10   1.8% sweepspan
  2   0.4%  85.8%        2   0.4% runtime.newstack
It is still impossible to understand what causes stack splits,
but at least it's clear how many time is spent on them.
Update #2197.
Update #5659.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12179043

											
										
										
											2013-08-13 12:12:02 -06:00
+										traceback = false;
-												runtime: avoid inconsistent goroutine state in profiler

Because profiling signals can arrive at any time, we must
handle the case where a profiling signal arrives halfway
through a goroutine switch. Luckily, although there is much
to think through, very little needs to change.

Fixes #6000.
Fixes #6015.

R=golang-dev, dvyukov
CC=golang-dev
https://golang.org/cl/13421048

											
										
										
											2013-09-13 12:19:23 -06:00
-												runtime: do no lose CPU profiling signals
Currently we lose lots of profiling signals.
Most notably, GC is not accounted at all.
But stack splits, scheduler, syscalls, etc are lost as well.
This creates seriously misleading profile.
With this change all profiling signals are accounted.
Now I see these additional entries that were previously absent:
161  29.7%  29.7%      164  30.3% syscall.Syscall
 12   2.2%  50.9%       12   2.2% scanblock
 11   2.0%  55.0%       11   2.0% markonly
 10   1.8%  58.9%       10   1.8% sweepspan
  2   0.4%  85.8%        2   0.4% runtime.newstack
It is still impossible to understand what causes stack splits,
but at least it's clear how many time is spent on them.
Update #2197.
Update #5659.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12179043

											
										
										
											2013-08-13 12:12:02 -06:00
+									// Race detector calls asmcgocall w/o entersyscall/exitsyscall,
 									// we can not currently unwind through asmcgocall.
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+									if(mp != nil && mp->racecall)
-												runtime: do no lose CPU profiling signals
Currently we lose lots of profiling signals.
Most notably, GC is not accounted at all.
But stack splits, scheduler, syscalls, etc are lost as well.
This creates seriously misleading profile.
With this change all profiling signals are accounted.
Now I see these additional entries that were previously absent:
161  29.7%  29.7%      164  30.3% syscall.Syscall
 12   2.2%  50.9%       12   2.2% scanblock
 11   2.0%  55.0%       11   2.0% markonly
 10   1.8%  58.9%       10   1.8% sweepspan
  2   0.4%  85.8%        2   0.4% runtime.newstack
It is still impossible to understand what causes stack splits,
but at least it's clear how many time is spent on them.
Update #2197.
Update #5659.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12179043

											
										
										
											2013-08-13 12:12:02 -06:00
+										traceback = false;
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+									runtime·lock(&prof);
 									if(prof.fn == nil) {
 										runtime·unlock(&prof);
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+										mp->mcache = mcache;
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+										return;
 									}
-												runtime: do no lose CPU profiling signals
Currently we lose lots of profiling signals.
Most notably, GC is not accounted at all.
But stack splits, scheduler, syscalls, etc are lost as well.
This creates seriously misleading profile.
With this change all profiling signals are accounted.
Now I see these additional entries that were previously absent:
161  29.7%  29.7%      164  30.3% syscall.Syscall
 12   2.2%  50.9%       12   2.2% scanblock
 11   2.0%  55.0%       11   2.0% markonly
 10   1.8%  58.9%       10   1.8% sweepspan
  2   0.4%  85.8%        2   0.4% runtime.newstack
It is still impossible to understand what causes stack splits,
but at least it's clear how many time is spent on them.
Update #2197.
Update #5659.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12179043

											
										
										
											2013-08-13 12:12:02 -06:00
+									n = 0;
 									if(traceback)
 										n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf), nil, nil, false);
 									if(!traceback || n <= 0) {
 										n = 2;
 										prof.pcbuf[0] = (uintptr)pc;
 										prof.pcbuf[1] = (uintptr)System + 1;
 									}
 									prof.fn(prof.pcbuf, n);
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+									runtime·unlock(&prof);
-												runtime: fix and improve CPU profiling

- do not lose profiling signals when we have no mcache (possible for syscalls/cgo)
- do not lose any profiling signals on windows
- fix profiling of cgo programs on windows (they had no m->thread setup)
- properly setup tls in cgo programs on windows
- check _beginthread return value

Fixes #6417.
Fixes #6986.

R=alex.brainman, rsc
CC=golang-codereviews
https://golang.org/cl/44820047

											
										
										
											2014-01-21 23:30:10 -07:00
+									mp->mcache = mcache;
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+								}
-												runtime: add comments for various functions in proc.c

R=rsc
CC=golang-dev
https://golang.org/cl/5357047

											
										
										
											2011-11-08 19:16:25 -07:00
+								// Arrange to call fn with a traceback hz times a second.
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+								void
 								runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
 								{
 									// Force sane arguments.
 									if(hz < 0)
 										hz = 0;
 									if(hz == 0)
 										fn = nil;
 									if(fn == nil)
 										hz = 0;
-												syscall: disable cpu profiling around fork
Fixes #5517.
Fixes #5659.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12183044

											
										
										
											2013-08-13 03:01:30 -06:00
+									// Disable preemption, otherwise we can be rescheduled to another thread
 									// that has profiling enabled.
 									m->locks++;
 									// Stop profiler on this thread so that it is safe to lock prof.
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+									// if a profiling signal came in while we had prof locked,
 									// it would deadlock.
 									runtime·resetcpuprofiler(0);
 									runtime·lock(&prof);
 									prof.fn = fn;
 									prof.hz = hz;
 									runtime·unlock(&prof);
 									runtime·lock(&runtime·sched);
 									runtime·sched.profilehz = hz;
 									runtime·unlock(&runtime·sched);
-												runtime: faster entersyscall, exitsyscall

Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042

											
										
										
											2011-07-19 09:01:17 -06:00
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+									if(hz != 0)
 										runtime·resetcpuprofiler(hz);
-												syscall: disable cpu profiling around fork
Fixes #5517.
Fixes #5659.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/12183044

											
										
										
											2013-08-13 03:01:30 -06:00
 									m->locks--;
-												runtime: cpu profiling support

R=r
CC=golang-dev
https://golang.org/cl/4306043

											
										
										
											2011-03-23 09:43:37 -06:00
+								}
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Change number of processors.  The world is stopped, sched is locked.
 								static void
 								procresize(int32 new)
 								{
 									int32 i, old;
-												runtime: ensure fair scheduling during frequent GCs
What was happenning is as follows:
Each writer goroutine always triggers GC during its scheduling quntum.
After GC goroutines are shuffled so that the timer goroutine is always second in the queue.
This repeats infinitely, causing timer goroutine starvation.
Fixes #7126.

R=golang-codereviews, shanemhansen, khr, khr
CC=golang-codereviews
https://golang.org/cl/53080043

											
										
										
											2014-01-20 23:24:42 -07:00
+									bool empty;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									G *gp;
 									P *p;
 									old = runtime·gomaxprocs;
 									if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
 										runtime·throw("procresize: invalid arg");
 									// initialize new P's
 									for(i = 0; i < new; i++) {
 										p = runtime·allp[i];
 										if(p == nil) {
-												runtime: refactor mallocgc
Make it accept type, combine flags.
Several reasons for the change:
1. mallocgc and settype must be atomic wrt GC
2. settype is called from only one place now
3. it will help performance (eventually settype
functionality must be combined with markallocated)
4. flags are easier to read now (no mallocgc(sz, 0, 1, 0) anymore)

R=golang-dev, iant, nightlyone, rsc, dave, khr, bradfitz, r
CC=golang-dev
https://golang.org/cl/10136043

											
										
										
											2013-07-26 11:17:24 -06:00
+											p = (P*)runtime·mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+											p->id = i;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											p->status = Pgcstop;
 											runtime·atomicstorep(&runtime·allp[i], p);
 										}
 										if(p->mcache == nil) {
 											if(old==0 && i==0)
 												p->mcache = m->mcache;  // bootstrap
 											else
 												p->mcache = runtime·allocmcache();
 										}
 									}
 									// redistribute runnable G's evenly
-												runtime: ensure fair scheduling during frequent GCs
What was happenning is as follows:
Each writer goroutine always triggers GC during its scheduling quntum.
After GC goroutines are shuffled so that the timer goroutine is always second in the queue.
This repeats infinitely, causing timer goroutine starvation.
Fixes #7126.

R=golang-codereviews, shanemhansen, khr, khr
CC=golang-codereviews
https://golang.org/cl/53080043

											
										
										
											2014-01-20 23:24:42 -07:00
+									// collect all runnable goroutines in global queue preserving FIFO order
 									// FIFO order is required to ensure fairness even during frequent GCs
 									// see http://golang.org/issue/7126
 									empty = false;
 									while(!empty) {
 										empty = true;
 										for(i = 0; i < old; i++) {
 											p = runtime·allp[i];
 											if(p->runqhead == p->runqtail)
 												continue;
 											empty = false;
 											// pop from tail of local queue
 											p->runqtail--;
 											gp = p->runq[p->runqtail%nelem(p->runq)];
 											// push onto head of global queue
 											gp->schedlink = runtime·sched.runqhead;
 											runtime·sched.runqhead = gp;
 											if(runtime·sched.runqtail == nil)
 												runtime·sched.runqtail = gp;
 											runtime·sched.runqsize++;
 										}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									}
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									// fill local queues with at most nelem(p->runq)/2 goroutines
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									// start at 1 because current M already executes some G and will acquire allp[0] below,
 									// so if we have a spare G we want to put it into allp[1].
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										gp = runtime·sched.runqhead;
 										runtime·sched.runqhead = gp->schedlink;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+										if(runtime·sched.runqhead == nil)
 											runtime·sched.runqtail = nil;
 										runtime·sched.runqsize--;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runqput(runtime·allp[i%new], gp);
 									}
 									// free unused P's
 									for(i = new; i < old; i++) {
 										p = runtime·allp[i];
 										runtime·freemcache(p->mcache);
 										p->mcache = nil;
 										gfpurge(p);
 										p->status = Pdead;
 										// can't free P itself because it can be referenced by an M in syscall
 									}
 									if(m->p)
 										m->p->m = nil;
 									m->p = nil;
 									m->mcache = nil;
 									p = runtime·allp[0];
 									p->m = nil;
 									p->status = Pidle;
 									acquirep(p);
 									for(i = new-1; i > 0; i--) {
 										p = runtime·allp[i];
 										p->status = Pidle;
 										pidleput(p);
 									}
 									runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
 								}
 								// Associate p and the current m.
 								static void
 								acquirep(P *p)
 								{
 									if(m->p || m->mcache)
 										runtime·throw("acquirep: already in go");
 									if(p->m || p->status != Pidle) {
 										runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
 										runtime·throw("acquirep: invalid p state");
 									}
 									m->mcache = p->mcache;
 									m->p = p;
 									p->m = m;
 									p->status = Prunning;
 								}
 								// Disassociate p and the current m.
 								static P*
 								releasep(void)
 								{
 									P *p;
 									if(m->p == nil || m->mcache == nil)
 										runtime·throw("releasep: invalid arg");
 									p = m->p;
 									if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
 										runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
 											m, m->p, p->m, m->mcache, p->mcache, p->status);
 										runtime·throw("releasep: invalid p state");
 									}
 									m->p = nil;
 									m->mcache = nil;
 									p->m = nil;
 									p->status = Pidle;
 									return p;
 								}
 								static void
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+								incidlelocked(int32 v)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								{
 									runtime·lock(&runtime·sched);
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+									runtime·sched.nmidlelocked += v;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(v > 0)
 										checkdead();
 									runtime·unlock(&runtime·sched);
 								}
 								// Check for deadlock situation.
 								// The check is based on number of running M's, if 0 -> deadlock.
 								static void
 								checkdead(void)
 								{
 									G *gp;
 									int32 run, grunning, s;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									uintptr i;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
 									// -1 for sysmon
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+									run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(run > 0)
 										return;
 									if(run < 0) {
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+										runtime·printf("checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
 											runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										runtime·throw("checkdead: inconsistent counts");
 									}
 									grunning = 0;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·lock(&allglock);
 									for(i = 0; i < runtime·allglen; i++) {
 										gp = runtime·allg[i];
-												runtime: fix deadlock detector false negative
The issue was that scvg is assigned *after* the scavenger goroutine is started,
so when the scavenger calls entersyscall() the g==scvg check can fail.
Fixes #5025.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7629045

											
										
										
											2013-03-12 07:21:44 -06:00
+										if(gp->isbackground)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											continue;
 										s = gp->status;
 										if(s == Gwaiting)
 											grunning++;
 										else if(s == Grunnable || s == Grunning || s == Gsyscall) {
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+											runtime·unlock(&allglock);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											runtime·printf("checkdead: find g %D in status %d\n", gp->goid, s);
 											runtime·throw("checkdead: runnable g");
 										}
 									}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·unlock(&allglock);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
 										runtime·exit(0);
 									m->throwing = -1;  // do not dump full stacks
 									runtime·throw("all goroutines are asleep - deadlock!");
 								}
 								static void
 								sysmon(void)
 								{
 									uint32 idle, delay;
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+									int64 now, lastpoll, lasttrace;
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+									G *gp;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+									lasttrace = 0;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									idle = 0;  // how many cycles in succession we had not wokeup somebody
 									delay = 0;
 									for(;;) {
 										if(idle == 0)  // start with 20us sleep...
 											delay = 20;
 										else if(idle > 50)  // start doubling the sleep after 1ms...
 											delay *= 2;
 										if(delay > 10*1000)  // up to 10ms
 											delay = 10*1000;
 										runtime·usleep(delay);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+										if(runtime·debug.schedtrace <= 0 &&
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+											(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											runtime·lock(&runtime·sched);
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+											if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+												runtime·atomicstore(&runtime·sched.sysmonwait, 1);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+												runtime·unlock(&runtime·sched);
 												runtime·notesleep(&runtime·sched.sysmonnote);
 												runtime·noteclear(&runtime·sched.sysmonnote);
 												idle = 0;
 												delay = 20;
 											} else
 												runtime·unlock(&runtime·sched);
 										}
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+										// poll network if not polled for more than 10ms
 										lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
 										now = runtime·nanotime();
-												runtime: correct test for when to poll network

Fixes #6610.

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/14793043

											
										
										
											2013-10-17 12:57:48 -06:00
+										if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
-												runtime: prevent sysmon from polling network excessivly
If the network is not polled for 10ms, sysmon starts polling network
on every iteration (every 20us) until another thread blocks in netpoll.
Fixes #5922.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/11569043

											
										
										
											2013-07-19 07:45:34 -06:00
+											runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+											gp = runtime·netpoll(false);  // non-blocking
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+											if(gp) {
 												// Need to decrement number of idle locked M's
 												// (pretending that one more is running) before injectglist.
 												// Otherwise it can lead to the following situation:
 												// injectglist grabs all P's but before it starts M's to run the P's,
 												// another M returns from syscall, finishes running its G,
 												// observes that there is no work to do and no other running M's
 												// and reports deadlock.
 												incidlelocked(-1);
 												injectglist(gp);
 												incidlelocked(1);
 											}
-												runtime: add network polling support into scheduler
This is a part of the bigger change that moves network poller into runtime:
https://golang.org/cl/7326051/

R=golang-dev, bradfitz, mikioh.mikioh, rsc
CC=golang-dev
https://golang.org/cl/7448048

											
										
										
											2013-03-12 11:14:26 -06:00
+										}
 										// retake P's blocked in syscalls
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+										// and preempt long running G's
 										if(retake(now))
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+											idle = 0;
 										else
 											idle++;
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
 										if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
 											lasttrace = now;
 											runtime·schedtrace(runtime·debug.scheddetail);
 										}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+									}
 								}
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+								typedef struct Pdesc Pdesc;
 								struct Pdesc
 								{
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+									uint32	schedtick;
 									int64	schedwhen;
 									uint32	syscalltick;
 									int64	syscallwhen;
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+								};
 								static Pdesc pdesc[MaxGomaxprocs];
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								static uint32
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+								retake(int64 now)
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								{
 									uint32 i, s, n;
 									int64 t;
 									P *p;
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+									Pdesc *pd;
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
 									n = 0;
 									for(i = 0; i < runtime·gomaxprocs; i++) {
 										p = runtime·allp[i];
 										if(p==nil)
 											continue;
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+										pd = &pdesc[i];
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										s = p->status;
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+										if(s == Psyscall) {
-												runtime: tune P retake logic
When GOMAXPROCS>1 the last P in syscall is never retaken
(because there are already idle P's -- npidle>0).
This prevents sysmon thread from sleeping.
On a darwin machine the program from issue 6673 constantly
consumes ~0.2% CPU. With this change it stably consumes 0.0% CPU.
Fixes #6673.

R=golang-codereviews, r
CC=bradfitz, golang-codereviews, iant, khr
https://golang.org/cl/56990045

											
										
										
											2014-01-27 12:17:46 -07:00
+											// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+											t = p->syscalltick;
 											if(pd->syscalltick != t) {
 												pd->syscalltick = t;
 												pd->syscallwhen = now;
 												continue;
 											}
-												runtime: tune P retake logic
When GOMAXPROCS>1 the last P in syscall is never retaken
(because there are already idle P's -- npidle>0).
This prevents sysmon thread from sleeping.
On a darwin machine the program from issue 6673 constantly
consumes ~0.2% CPU. With this change it stably consumes 0.0% CPU.
Fixes #6673.

R=golang-codereviews, r
CC=bradfitz, golang-codereviews, iant, khr
https://golang.org/cl/56990045

											
										
										
											2014-01-27 12:17:46 -07:00
+											// On the one hand we don't want to retake Ps if there is no other work to do,
 											// but on the other hand we want to retake them eventually
 											// because they can prevent the sysmon thread from deep sleep.
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+											if(p->runqhead == p->runqtail &&
-												runtime: tune P retake logic
When GOMAXPROCS>1 the last P in syscall is never retaken
(because there are already idle P's -- npidle>0).
This prevents sysmon thread from sleeping.
On a darwin machine the program from issue 6673 constantly
consumes ~0.2% CPU. With this change it stably consumes 0.0% CPU.
Fixes #6673.

R=golang-codereviews, r
CC=bradfitz, golang-codereviews, iant, khr
https://golang.org/cl/56990045

											
										
										
											2014-01-27 12:17:46 -07:00
+												runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
 												pd->syscallwhen + 10*1000*1000 > now)
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+												continue;
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+											// Need to decrement number of idle locked M's
 											// (pretending that one more is running) before the CAS.
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+											// Otherwise the M from which we retake can exit the syscall,
 											// increment nmidle and report deadlock.
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+											incidlelocked(-1);
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+											if(runtime·cas(&p->status, s, Pidle)) {
 												n++;
 												handoffp(p);
 											}
-												runtime: fix false deadlock crash
Fixes #6070.
Update #6055.

R=golang-dev, nightlyone, rsc
CC=golang-dev
https://golang.org/cl/12602043

											
										
										
											2013-08-13 12:07:42 -06:00
+											incidlelocked(1);
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+										} else if(s == Prunning) {
 											// Preempt G if it's running for more than 10ms.
-												runtime: more reliable preemption
Currently it's possible that a goroutine
that periodically executes non-blocking
cgo/syscalls is never preempted.
This change splits scheduler and syscall
ticks to prevent such situation.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12658045

											
										
										
											2013-08-13 12:14:04 -06:00
+											t = p->schedtick;
 											if(pd->schedtick != t) {
 												pd->schedtick = t;
 												pd->schedwhen = now;
 												continue;
 											}
 											if(pd->schedwhen + 10*1000*1000 > now)
-												runtime: preempt long-running goroutines
If a goroutine runs for more than 10ms, preempt it.
Update #543.

R=rsc
CC=golang-dev
https://golang.org/cl/10796043

											
										
										
											2013-07-18 15:22:26 -06:00
+												continue;
 											preemptone(p);
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+										}
 									}
 									return n;
 								}
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+								// Tell all goroutines that they have been preempted and they should stop.
 								// This function is purely best-effort.  It can fail to inform a goroutine if a
 								// processor just started running it.
 								// No locks need to be held.
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+								// Returns true if preemption request was issued to at least one goroutine.
 								static bool
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+								preemptall(void)
 								{
 									P *p;
 									int32 i;
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+									bool res;
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+									res = false;
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+									for(i = 0; i < runtime·gomaxprocs; i++) {
 										p = runtime·allp[i];
 										if(p == nil || p->status != Prunning)
 											continue;
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+										res |= preemptone(p);
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+									}
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+									return res;
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+								}
 								// Tell the goroutine running on processor P to stop.
 								// This function is purely best-effort.  It can incorrectly fail to inform the
 								// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
 								// correct goroutine, that goroutine might ignore the request if it is
 								// simultaneously executing runtime·newstack.
 								// No lock needs to be held.
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+								// Returns true if preemption request was issued.
 								static bool
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+								preemptone(P *p)
 								{
 									M *mp;
 									G *gp;
 									mp = p->m;
 									if(mp == nil || mp == m)
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+										return false;
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+									gp = mp->curg;
 									if(gp == nil || gp == mp->g0)
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+										return false;
-												runtime: more reliable preemption
Currently preemption signal g->stackguard0==StackPreempt
can be lost if it is received when preemption is disabled
(e.g. m->lock!=0). This change duplicates the preemption
signal in g->preempt and restores g->stackguard0
when preemption is enabled.
Update #543.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/10792043

											
										
										
											2013-07-17 10:52:37 -06:00
+									gp->preempt = true;
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+									gp->stackguard0 = StackPreempt;
-												runtime: traceback running goroutines
Introduce freezetheworld function that is a best-effort attempt to stop any concurrently running goroutines. Call it during crash.
Fixes #5873.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/12054044

											
										
										
											2013-08-09 02:53:35 -06:00
+									return true;
-												runtime: introduce preemption function (not used for now)
This is part of preemptive scheduler.

R=golang-dev, cshapiro, iant
CC=golang-dev
https://golang.org/cl/9843046

											
										
										
											2013-06-03 03:20:17 -06:00
+								}
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+								void
 								runtime·schedtrace(bool detailed)
 								{
 									static int64 starttime;
 									int64 now;
-												runtime: fix build on arm

Do not use ? :
I cannot say this enough.

TBR=dvyukov
CC=golang-dev
https://golang.org/cl/12903043

											
										
										
											2013-08-13 17:37:54 -06:00
+									int64 id1, id2, id3;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									int32 i, t, h;
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									uintptr gi;
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+									int8 *fmt;
 									M *mp, *lockedm;
 									G *gp, *lockedg;
 									P *p;
 									now = runtime·nanotime();
 									if(starttime == 0)
 										starttime = now;
 									runtime·lock(&runtime·sched);
 									runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
 										(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
 										runtime·sched.nmidle, runtime·sched.runqsize);
 									if(detailed) {
 										runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
-												runtime: remove old preemption checks
runtime.gcwaiting checks are not needed anymore

R=golang-dev, khr
CC=golang-dev
https://golang.org/cl/12934043

											
										
										
											2013-08-15 04:32:10 -06:00
+											runtime·sched.gcwaiting, runtime·sched.nmidlelocked, runtime·sched.nmspinning,
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+											runtime·sched.stopwait, runtime·sched.sysmonwait);
 									}
 									// We must be careful while reading data from P's, M's and G's.
 									// Even if we hold schedlock, most data can be changed concurrently.
 									// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
 									for(i = 0; i < runtime·gomaxprocs; i++) {
 										p = runtime·allp[i];
 										if(p == nil)
 											continue;
 										mp = p->m;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+										h = runtime·atomicload(&p->runqhead);
 										t = runtime·atomicload(&p->runqtail);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+										if(detailed)
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+											runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
 												i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+										else {
 											// In non-detailed mode format lengths of per-P run queues as:
 											// [len1 len2 len3 len4]
 											fmt = " %d";
 											if(runtime·gomaxprocs == 1)
 												fmt = " [%d]\n";
 											else if(i == 0)
 												fmt = " [%d";
 											else if(i == runtime·gomaxprocs-1)
 												fmt = " %d]\n";
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+											runtime·printf(fmt, t-h);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+										}
 									}
 									if(!detailed) {
 										runtime·unlock(&runtime·sched);
 										return;
 									}
 									for(mp = runtime·allm; mp; mp = mp->alllink) {
 										p = mp->p;
 										gp = mp->curg;
 										lockedg = mp->lockedg;
-												runtime: fix build on arm

Do not use ? :
I cannot say this enough.

TBR=dvyukov
CC=golang-dev
https://golang.org/cl/12903043

											
										
										
											2013-08-13 17:37:54 -06:00
+										id1 = -1;
 										if(p)
 											id1 = p->id;
 										id2 = -1;
 										if(gp)
 											id2 = gp->goid;
 										id3 = -1;
 										if(lockedg)
 											id3 = lockedg->goid;
 										runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+											" locks=%d dying=%d helpgc=%d spinning=%d lockedg=%D\n",
-												runtime: fix build on arm

Do not use ? :
I cannot say this enough.

TBR=dvyukov
CC=golang-dev
https://golang.org/cl/12903043

											
										
										
											2013-08-13 17:37:54 -06:00
+											mp->id, id1, id2,
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+											mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
-												runtime: fix build on arm

Do not use ? :
I cannot say this enough.

TBR=dvyukov
CC=golang-dev
https://golang.org/cl/12903043

											
										
										
											2013-08-13 17:37:54 -06:00
+											mp->spinning, id3);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+									}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·lock(&allglock);
 									for(gi = 0; gi < runtime·allglen; gi++) {
 										gp = runtime·allg[gi];
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+										mp = gp->m;
 										lockedm = gp->lockedm;
 										runtime·printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
 											gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
 											lockedm ? lockedm->id : -1);
 									}
-												runtime: do not collect GC roots explicitly
Currently we collect (add) all roots into a global array in a single-threaded GC phase.
This hinders parallelism.
With this change we just kick off parallel for for number_of_goroutines+5 iterations.
Then parallel for callback decides whether it needs to scan stack of a goroutine
scan data segment, scan finalizers, etc. This eliminates the single-threaded phase entirely.
This requires to store all goroutines in an array instead of a linked list
(to allow direct indexing).
This CL also removes DebugScan functionality. It is broken because it uses
unbounded stack, so it can not run on g0. When it was working, I've found
it helpless for debugging issues because the two algorithms are too different now.
This change would require updating the DebugScan, so it's simpler to just delete it.

With 8 threads this change reduces GC pause by ~6%, while keeping cputime roughly the same.

garbage-8
allocated                 2987886      2989221      +0.04%
allocs                      62885        62887      +0.00%
cputime                  21286000     21272000      -0.07%
gc-pause-one             26633247     24885421      -6.56%
gc-pause-total             873570       811264      -7.13%
rss                     242089984    242515968      +0.18%
sys-gc                   13934336     13869056      -0.47%
sys-heap                205062144    205062144      +0.00%
sys-other                12628288     12628288      +0.00%
sys-stack                11534336     11927552      +3.41%
sys-total               243159104    243487040      +0.13%
time                      2809477      2740795      -2.44%

R=golang-codereviews, rsc
CC=cshapiro, golang-codereviews, khr
https://golang.org/cl/46860043

											
										
										
											2014-01-21 02:06:57 -07:00
+									runtime·unlock(&allglock);
-												runtime: dump scheduler state if GODEBUG=schedtrace is set
The schedtrace value sets dump period in milliseconds.
In default mode the trace looks as follows:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 [1 0 0 0]
SCHED 1001ms: gomaxprocs=4 idleprocs=3 threads=6 idlethreads=3 runqueue=0 [0 0 0 0]
SCHED 2008ms: gomaxprocs=4 idleprocs=1 threads=6 idlethreads=1 runqueue=0 [0 1 0 0]
If GODEBUG=scheddetail=1 is set as well, then the detailed trace is printed:
SCHED 0ms: gomaxprocs=4 idleprocs=0 threads=3 idlethreads=0 runqueue=0 singleproc=0 gcwaiting=1 mlocked=0 nmspinning=0 stopwait=0 sysmonwait=0
  P0: status=3 tick=1 m=0 runqsize=1/128 gfreecnt=0
  P1: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P2: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  P3: status=3 tick=0 m=-1 runqsize=0/128 gfreecnt=0
  M2: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M1: p=-1 curg=-1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=-1
  M0: p=0 curg=1 mallocing=0 throwing=0 gcing=0 locks=1 dying=0 helpgc=0 spinning=0 lockedg=1
  G1: status=2() m=0 lockedm=0
  G2: status=1() m=-1 lockedm=-1

R=golang-dev, raggi, rsc
CC=golang-dev
https://golang.org/cl/11435044

											
										
										
											2013-08-13 14:30:55 -06:00
+									runtime·unlock(&runtime·sched);
 								}
-												runtime: improved scheduler
Distribute runnable queues, memory cache
and cache of dead G's per processor.
Faster non-blocking syscall enter/exit.
More conservative worker thread blocking/unblocking.

R=dave, bradfitz, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7314062

											
										
										
											2013-03-01 04:49:16 -07:00
+								// Put mp on midle list.
 								// Sched must be locked.
 								static void
 								mput(M *mp)
 								{
 									mp->schedlink = runtime·sched.midle;
 									runtime·sched.midle = mp;
 									runtime·sched.nmidle++;
 									checkdead();
 								}
 								// Try to get an m from midle list.
 								// Sched must be locked.
 								static M*
 								mget(void)
 								{
 									M *mp;
 									if((mp = runtime·sched.midle) != nil){
 										runtime·sched.midle = mp->schedlink;
 										runtime·sched.nmidle--;
 									}
 									return mp;
 								}
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								// Put gp on the global runnable queue.
 								// Sched must be locked.
 								static void
 								globrunqput(G *gp)
 								{
 									gp->schedlink = nil;
 									if(runtime·sched.runqtail)
 										runtime·sched.runqtail->schedlink = gp;
 									else
 										runtime·sched.runqhead = gp;
 									runtime·sched.runqtail = gp;
 									runtime·sched.runqsize++;
 								}
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+								// Put a batch of runnable goroutines on the global runnable queue.
 								// Sched must be locked.
 								static void
 								globrunqputbatch(G *ghead, G *gtail, int32 n)
 								{
 									gtail->schedlink = nil;
 									if(runtime·sched.runqtail)
 										runtime·sched.runqtail->schedlink = ghead;
 									else
 										runtime·sched.runqhead = ghead;
 									runtime·sched.runqtail = gtail;
 									runtime·sched.runqsize += n;
 								}
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								// Try get a batch of G's from the global runnable queue.
 								// Sched must be locked.
 								static G*
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+								globrunqget(P *p, int32 max)
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								{
 									G *gp, *gp1;
 									int32 n;
 									if(runtime·sched.runqsize == 0)
 										return nil;
 									n = runtime·sched.runqsize/runtime·gomaxprocs+1;
 									if(n > runtime·sched.runqsize)
 										n = runtime·sched.runqsize;
-												runtime: improve scheduler fairness
Currently global runqueue is starved if a group of goroutines
constantly respawn each other (local runqueue never becomes empty).
Fixes #5639.

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/10042044

											
										
										
											2013-06-15 06:06:28 -06:00
+									if(max > 0 && n > max)
 										n = max;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									if(n > nelem(p->runq)/2)
 										n = nelem(p->runq)/2;
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+									runtime·sched.runqsize -= n;
 									if(runtime·sched.runqsize == 0)
 										runtime·sched.runqtail = nil;
 									gp = runtime·sched.runqhead;
 									runtime·sched.runqhead = gp->schedlink;
 									n--;
 									while(n--) {
 										gp1 = runtime·sched.runqhead;
 										runtime·sched.runqhead = gp1->schedlink;
 										runqput(p, gp1);
 									}
 									return gp;
 								}
 								// Put p to on pidle list.
 								// Sched must be locked.
 								static void
 								pidleput(P *p)
 								{
 									p->link = runtime·sched.pidle;
 									runtime·sched.pidle = p;
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+									runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+								}
 								// Try get a p from pidle list.
 								// Sched must be locked.
 								static P*
 								pidleget(void)
 								{
 									P *p;
 									p = runtime·sched.pidle;
 									if(p) {
 										runtime·sched.pidle = p->link;
-												runtime: add atomics to fix arm

R=golang-dev, minux.ma
CC=golang-dev
https://golang.org/cl/7429046

											
										
										
											2013-03-01 12:57:05 -07:00
+										runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
-												runtime: more changes in preparation to the new scheduler
add per-P cache of dead G's
add global runnable queue (not used for now)
add list of idle P's (not used for now)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7397061

											
										
										
											2013-02-27 12:17:53 -07:00
+									}
 									return p;
 								}
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+								// Try to put g on local runnable queue.
 								// If it's full, put onto global queue.
 								// Executed only by the owner P.
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								static void
 								runqput(P *p, G *gp)
 								{
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									uint32 h, t;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
 								retry:
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									t = p->runqtail;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									if(t - h < nelem(p->runq)) {
 										p->runq[t%nelem(p->runq)] = gp;
 										runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
 										return;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									}
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									if(runqputslow(p, gp, h, t))
 										return;
 									// the queue is not full, now the put above must suceed
 									goto retry;
 								}
 								// Put g and a batch of work from local runnable queue on global queue.
 								// Executed only by the owner P.
 								static bool
 								runqputslow(P *p, G *gp, uint32 h, uint32 t)
 								{
 									G *batch[nelem(p->runq)/2+1];
 									uint32 n, i;
 									// First, grab a batch from local queue.
 									n = t-h;
 									n = n/2;
 									if(n != nelem(p->runq)/2)
 										runtime·throw("runqputslow: queue is not full");
 									for(i=0; i<n; i++)
 										batch[i] = p->runq[(h+i)%nelem(p->runq)];
 									if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
 										return false;
 									batch[n] = gp;
 									// Link the goroutines.
 									for(i=0; i<n; i++)
 										batch[i]->schedlink = batch[i+1];
 									// Now put the batch on global queue.
 									runtime·lock(&runtime·sched);
 									globrunqputbatch(batch[0], batch[n], n+1);
 									runtime·unlock(&runtime·sched);
 									return true;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								}
 								// Get g from local runnable queue.
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+								// Executed only by the owner P.
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								static G*
 								runqget(P *p)
 								{
 									G *gp;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									uint32 t, h;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									for(;;) {
 										h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
 										t = p->runqtail;
 										if(t == h)
 											return nil;
 										gp = p->runq[h%nelem(p->runq)];
 										if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume
 											return gp;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									}
 								}
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+								// Grabs a batch of goroutines from local runnable queue.
 								// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
 								// Can be executed by any P.
 								static uint32
 								runqgrab(P *p, G **batch)
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								{
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									uint32 t, h, n, i;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									for(;;) {
 										h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
 										t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
 										n = t-h;
 										n = n - n/2;
 										if(n == 0)
 											break;
 										if(n > nelem(p->runq)/2)  // read inconsistent h and t
 											continue;
 										for(i=0; i<n; i++)
 											batch[i] = p->runq[(h+i)%nelem(p->runq)];
 										if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
 											break;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									}
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									return n;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+								}
 								// Steal half of elements from local runnable queue of p2
 								// and put onto local runnable queue of p.
 								// Returns one of the stolen elements (or nil if failed).
 								static G*
 								runqsteal(P *p, P *p2)
 								{
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									G *gp;
 									G *batch[nelem(p->runq)/2];
 									uint32 t, h, n, i;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									n = runqgrab(p2, batch);
 									if(n == 0)
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+										return nil;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									n--;
 									gp = batch[n];
 									if(n == 0)
 										return gp;
 									h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									t = p->runqtail;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									if(t - h + n >= nelem(p->runq))
 										runtime·throw("runqsteal: runq overflow");
 									for(i=0; i<n; i++, t++)
 										p->runq[t%nelem(p->runq)] = batch[i];
 									runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									return gp;
 								}
 								void
 								runtime·testSchedLocalQueue(void)
 								{
 									P p;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									G gs[nelem(p.runq)];
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									int32 i, j;
 									runtime·memclr((byte*)&p, sizeof(p));
 									for(i = 0; i < nelem(gs); i++) {
 										if(runqget(&p) != nil)
 											runtime·throw("runq is not empty initially");
 										for(j = 0; j < i; j++)
 											runqput(&p, &gs[i]);
 										for(j = 0; j < i; j++) {
 											if(runqget(&p) != &gs[i]) {
 												runtime·printf("bad element at iter %d/%d\n", i, j);
 												runtime·throw("bad element");
 											}
 										}
 										if(runqget(&p) != nil)
 											runtime·throw("runq is not empty afterwards");
 									}
 								}
 								void
 								runtime·testSchedLocalQueueSteal(void)
 								{
 									P p1, p2;
-												runtime: use lock-free ring for work queues
Use lock-free fixed-size ring for work queues
instead of an unbounded mutex-protected array.
The ring has single producer and multiple consumers.
If the ring overflows, work is put onto global queue.

benchmark              old ns/op    new ns/op    delta
BenchmarkMatmult               7            5  -18.12%
BenchmarkMatmult-4             2            2  -18.98%
BenchmarkMatmult-16            1            0  -12.84%

BenchmarkCreateGoroutines                     105           88  -16.10%
BenchmarkCreateGoroutines-4                   376          219  -41.76%
BenchmarkCreateGoroutines-16                  241          174  -27.80%
BenchmarkCreateGoroutinesParallel             103           87  -14.66%
BenchmarkCreateGoroutinesParallel-4           169          143  -15.38%
BenchmarkCreateGoroutinesParallel-16          158          151   -4.43%

R=golang-codereviews, rsc
CC=ddetlefs, devon.odell, golang-codereviews
https://golang.org/cl/46170044

											
										
										
											2014-01-16 01:17:00 -07:00
+									G gs[nelem(p1.runq)], *gp;
-												runtime: implement local work queues (in preparation for new scheduler)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7402047

											
										
										
											2013-02-22 21:48:02 -07:00
+									int32 i, j, s;
 									runtime·memclr((byte*)&p1, sizeof(p1));
 									runtime·memclr((byte*)&p2, sizeof(p2));
 									for(i = 0; i < nelem(gs); i++) {
 										for(j = 0; j < i; j++) {
 											gs[j].sig = 0;
 											runqput(&p1, &gs[j]);
 										}
 										gp = runqsteal(&p2, &p1);
 										s = 0;
 										if(gp) {
 											s++;
 											gp->sig++;
 										}
 										while(gp = runqget(&p2)) {
 											s++;
 											gp->sig++;
 										}
 										while(gp = runqget(&p1))
 											gp->sig++;
 										for(j = 0; j < i; j++) {
 											if(gs[j].sig != 1) {
 												runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
 												runtime·throw("bad element");
 											}
 										}
 										if(s != i/2 && s != i/2+1) {
 											runtime·printf("bad steal %d, want %d or %d, iter %d\n",
 												s, i/2, i/2+1, i);
 											runtime·throw("bad steal");
 										}
 									}
-												runtime/cgo: make symbol naming consistent

The naming in this package is a disaster.
Make it all consistent.

Remove some 'static' from functions that will
be referred to from other files soon.

This CL is purely renames using global search and replace.

Submitting separately so that real changes will not
be drowned out by these renames in future CLs.

TBR=iant
CC=golang-dev
https://golang.org/cl/7416046

											
										
										
											2013-02-28 14:24:38 -07:00
+								}
-												runtime: record proper goroutine state during stack split

Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.

For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:

        TEXT myfunc
                check for split
                jmpcond nosplit
                call morestack
        nosplit:
                sub $xxx, sp

Now an extra instruction is inserted after the call:

        TEXT myfunc
        start:
                check for split
                jmpcond nosplit
                call morestack
                jmp start
        nosplit:
                sub $xxx, sp

The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.

The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.

The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.

Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)

runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
        morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
        sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow

goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
        /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
        /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
        /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
        /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
        /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
        /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
        /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
        /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8

And here is a seg fault during oldstack:

SIGSEGV: segmentation violation
PC=0x1b2a6

runtime.oldstack()
        /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
        /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22

goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
        /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
        /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
        /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
        /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
        /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
        strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8

goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
        /Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
        /Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
        /Users/rsc/g/go/src/pkg/runtime/proc.c:166

rax     0x23ccc0
rbx     0x23ccc0
rcx     0x0
rdx     0x38
rdi     0x2102c0170
rsi     0x221032cfe0
rbp     0x221032cfa0
rsp     0x7fff5fbff5b0
r8      0x2102c0120
r9      0x221032cfa0
r10     0x221032c000
r11     0x104ce8
r12     0xe5c80
r13     0x1be82baac718
r14     0x13091135f7d69200
r15     0x0
rip     0x1b2a6
rflags  0x10246
cs      0x2b
fs      0x0
gs      0x0

Fixes #5723.

R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048

											
										
										
											2013-06-27 09:32:01 -06:00
+								extern void runtime·morestack(void);
-												runtime: use new frame argument size information

With this CL, I believe the runtime always knows
the frame size during the gc walk. There is no fallback
to "assume entire stack frame of caller" anymore.

R=golang-dev, khr, cshapiro, dvyukov
CC=golang-dev
https://golang.org/cl/11374044

											
										
										
											2013-07-17 10:47:18 -06:00
+								// Does f mark the top of a goroutine stack?
 								bool
 								runtime·topofstack(Func *f)
 								{
 									return f->entry == (uintptr)runtime·goexit ||
 										f->entry == (uintptr)runtime·mstart ||
 										f->entry == (uintptr)runtime·mcall ||
-												runtime: handle morestack/lessstack in stack trace

If we start a garbage collection on g0 during a
stack split or unsplit, we'll see morestack or lessstack
at the top of the stack. Record an argument frame size
for those, and record that they terminate the stack.

R=golang-dev, dvyukov
CC=golang-dev
https://golang.org/cl/11533043

											
										
										
											2013-07-18 14:53:45 -06:00
+										f->entry == (uintptr)runtime·morestack ||
 										f->entry == (uintptr)runtime·lessstack ||
-												runtime: use new frame argument size information

With this CL, I believe the runtime always knows
the frame size during the gc walk. There is no fallback
to "assume entire stack frame of caller" anymore.

R=golang-dev, khr, cshapiro, dvyukov
CC=golang-dev
https://golang.org/cl/11374044

											
										
										
											2013-07-17 10:47:18 -06:00
+										f->entry == (uintptr)_rt0_go;
 								}
-												runtime: impose thread count limit

Actually working to stay within the limit could cause subtle deadlocks.
Crashing avoids the subtlety.

Fixes #4056.

R=golang-dev, r, dvyukov
CC=golang-dev
https://golang.org/cl/13037043

											
										
										
											2013-08-16 20:25:26 -06:00
 								void
 								runtime∕debug·setMaxThreads(intgo in, intgo out)
 								{
 									runtime·lock(&runtime·sched);
 									out = runtime·sched.maxmcount;
 									runtime·sched.maxmcount = in;
 									checkmcount();
 									runtime·unlock(&runtime·sched);
 									FLUSH(&out);
 								}
-												build: disable precise collection of stack frames

The code for call site-specific pointer bitmaps was not ready in time,
but the zeroing required without it is too expensive to use by default.
We will have to wait for precise collection of stack frames until Go 1.3.

The precise collection can be re-enabled by

        GOEXPERIMENT=precisestack ./all.bash

but that will not be the default for a Go 1.2 build.

Fixes #6087.

R=golang-dev, jeremyjackins, dan.kortschak, r
CC=golang-dev
https://golang.org/cl/13677045

											
										
										
											2013-09-16 18:26:10 -06:00
 								static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
 								static bool
 								haveexperiment(int8 *name)
 								{
 									int32 i, j;
 									for(i=0; i<sizeof(experiment); i++) {
 										if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
 											for(j=0; name[j]; j++)
 												if(experiment[i+j] != name[j])
 													goto nomatch;
 											if(experiment[i+j] != '\0' && experiment[i+j] != ',')
 												goto nomatch;
 											return 1;
 										}
 									nomatch:;
 									}
 									return 0;
 								}
-												sync: scalable Pool
Introduce fixed-size P-local caches.
When local caches overflow/underflow a batch of items
is transferred to/from global mutex-protected cache.

benchmark                    old ns/op    new ns/op    delta
BenchmarkPool                    50554        22423  -55.65%
BenchmarkPool-4                 400359         5904  -98.53%
BenchmarkPool-16                403311         1598  -99.60%
BenchmarkPool-32                367310         1526  -99.58%

BenchmarkPoolOverlflow            5214         3633  -30.32%
BenchmarkPoolOverlflow-4         42663         9539  -77.64%
BenchmarkPoolOverlflow-8         46919        11385  -75.73%
BenchmarkPoolOverlflow-16        39454        13048  -66.93%

BenchmarkSprintfEmpty                    84           63  -25.68%
BenchmarkSprintfEmpty-2                 371           32  -91.13%
BenchmarkSprintfEmpty-4                 465           22  -95.25%
BenchmarkSprintfEmpty-8                 565           12  -97.77%
BenchmarkSprintfEmpty-16                498            5  -98.87%
BenchmarkSprintfEmpty-32                492            4  -99.04%

BenchmarkSprintfString                  259          229  -11.58%
BenchmarkSprintfString-2                574          144  -74.91%
BenchmarkSprintfString-4                651           77  -88.05%
BenchmarkSprintfString-8                868           47  -94.48%
BenchmarkSprintfString-16               825           33  -95.96%
BenchmarkSprintfString-32               825           30  -96.28%

BenchmarkSprintfInt                     213          188  -11.74%
BenchmarkSprintfInt-2                   448          138  -69.20%
BenchmarkSprintfInt-4                   624           52  -91.63%
BenchmarkSprintfInt-8                   691           31  -95.43%
BenchmarkSprintfInt-16                  724           18  -97.46%
BenchmarkSprintfInt-32                  718           16  -97.70%

BenchmarkSprintfIntInt                  311          282   -9.32%
BenchmarkSprintfIntInt-2                333          145  -56.46%
BenchmarkSprintfIntInt-4                642          110  -82.87%
BenchmarkSprintfIntInt-8                832           42  -94.90%
BenchmarkSprintfIntInt-16               817           24  -97.00%
BenchmarkSprintfIntInt-32               805           22  -97.17%

BenchmarkSprintfPrefixedInt             309          269  -12.94%
BenchmarkSprintfPrefixedInt-2           245          168  -31.43%
BenchmarkSprintfPrefixedInt-4           598           99  -83.36%
BenchmarkSprintfPrefixedInt-8           770           67  -91.23%
BenchmarkSprintfPrefixedInt-16          829           54  -93.49%
BenchmarkSprintfPrefixedInt-32          824           50  -93.83%

BenchmarkSprintfFloat                   418          398   -4.78%
BenchmarkSprintfFloat-2                 295          203  -31.19%
BenchmarkSprintfFloat-4                 585          128  -78.12%
BenchmarkSprintfFloat-8                 873           60  -93.13%
BenchmarkSprintfFloat-16                884           33  -96.24%
BenchmarkSprintfFloat-32                881           29  -96.62%

BenchmarkManyArgs                      1097         1069   -2.55%
BenchmarkManyArgs-2                     705          567  -19.57%
BenchmarkManyArgs-4                     792          319  -59.72%
BenchmarkManyArgs-8                     963          172  -82.14%
BenchmarkManyArgs-16                   1115          103  -90.76%
BenchmarkManyArgs-32                   1133           90  -92.03%

LGTM=rsc
R=golang-codereviews, bradfitz, minux.ma, gobot, rsc
CC=golang-codereviews
https://golang.org/cl/46010043

											
										
										
											2014-01-24 11:29:53 -07:00
 								// func runtime_procPin() int
 								void
 								sync·runtime_procPin(intgo p)
 								{
 									M *mp;
 									mp = m;
 									// Disable preemption.
 									mp->locks++;
 									p = mp->p->id;
 									FLUSH(&p);
 								}
 								// func runtime_procUnpin()
 								void
 								sync·runtime_procUnpin(void)
 								{
 									m->locks--;
 								}