2008-07-14 15:34:27 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
#include "runtime.h"
|
2011-12-16 13:33:58 -07:00
|
|
|
#include "arch_GOARCH.h"
|
2013-09-16 18:26:10 -06:00
|
|
|
#include "zaexperiment.h"
|
2009-01-26 18:37:05 -07:00
|
|
|
#include "malloc.h"
|
2011-02-22 15:40:40 -07:00
|
|
|
#include "stack.h"
|
2012-10-07 12:05:32 -06:00
|
|
|
#include "race.h"
|
2012-10-21 15:41:32 -06:00
|
|
|
#include "type.h"
|
2014-07-29 01:01:02 -06:00
|
|
|
#include "mgc0.h"
|
2013-08-12 14:47:18 -06:00
|
|
|
#include "../../cmd/ld/textflag.h"
|
2008-07-14 15:34:27 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Goroutine scheduler
|
|
|
|
// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
|
2008-08-05 15:18:47 -06:00
|
|
|
//
|
2013-03-01 04:49:16 -07:00
|
|
|
// The main concepts are:
|
|
|
|
// G - goroutine.
|
|
|
|
// M - worker thread, or machine.
|
|
|
|
// P - processor, a resource that is required to execute Go code.
|
|
|
|
// M must have an associated P to execute Go code, however it can be
|
|
|
|
// blocked or in a syscall w/o an associated P.
|
2013-03-04 08:36:45 -07:00
|
|
|
//
|
|
|
|
// Design doc at http://golang.org/s/go11sched.
|
2008-08-05 15:18:47 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
typedef struct Sched Sched;
|
2008-08-04 17:43:49 -06:00
|
|
|
struct Sched {
|
2014-08-27 21:32:49 -06:00
|
|
|
Mutex lock;
|
2008-08-05 15:18:47 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
uint64 goidgen;
|
2008-08-05 15:21:42 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
M* midle; // idle m's waiting for work
|
|
|
|
int32 nmidle; // number of idle m's waiting for work
|
2013-08-13 12:07:42 -06:00
|
|
|
int32 nmidlelocked; // number of locked m's waiting for work
|
2013-03-01 04:49:16 -07:00
|
|
|
int32 mcount; // number of m's that have been created
|
2013-08-16 20:25:26 -06:00
|
|
|
int32 maxmcount; // maximum number of m's allowed (or die)
|
2013-02-27 12:17:53 -07:00
|
|
|
|
|
|
|
P* pidle; // idle P's
|
|
|
|
uint32 npidle;
|
2013-03-01 04:49:16 -07:00
|
|
|
uint32 nmspinning;
|
2013-02-27 12:17:53 -07:00
|
|
|
|
|
|
|
// Global runnable queue.
|
|
|
|
G* runqhead;
|
|
|
|
G* runqtail;
|
|
|
|
int32 runqsize;
|
|
|
|
|
|
|
|
// Global cache of dead G's.
|
2014-08-27 21:32:49 -06:00
|
|
|
Mutex gflock;
|
2013-02-27 12:17:53 -07:00
|
|
|
G* gfree;
|
2014-07-17 11:51:03 -06:00
|
|
|
int32 ngfree;
|
2013-02-27 12:17:53 -07:00
|
|
|
|
2013-08-15 04:32:10 -06:00
|
|
|
uint32 gcwaiting; // gc is waiting to run
|
2013-03-01 04:49:16 -07:00
|
|
|
int32 stopwait;
|
|
|
|
Note stopnote;
|
2013-03-01 12:57:05 -07:00
|
|
|
uint32 sysmonwait;
|
2013-03-01 04:49:16 -07:00
|
|
|
Note sysmonnote;
|
2013-03-12 11:14:26 -06:00
|
|
|
uint64 lastpoll;
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
int32 profilehz; // cpu profiling rate
|
2011-07-19 09:01:17 -06:00
|
|
|
};
|
|
|
|
|
2014-01-21 23:34:36 -07:00
|
|
|
enum
|
|
|
|
{
|
|
|
|
// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
|
|
|
|
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
|
|
|
|
GoidCacheBatch = 16,
|
|
|
|
};
|
2008-08-04 17:43:49 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
Sched runtime·sched;
|
|
|
|
int32 runtime·gomaxprocs;
|
runtime: detect deadlocks in programs using cgo
When cgo is used, runtime creates an additional M to handle callbacks on threads not created by Go.
This effectively disabled deadlock detection, which is a right thing, because Go program can be blocked
and only serve callbacks on external threads.
This also disables deadlock detection under race detector, because it happens to use cgo.
With this change the additional M is created lazily on first cgo call. So deadlock detector
works for programs that import "C", "net" or "net/http/pprof" but do not use them in fact.
Also fixes deadlock detector under race detector.
It should be fine to create the M later, because C code can not call into Go before first cgo call,
because C code does not know when Go initialization has completed. So a Go program need to call into C
first either to create an external thread, or notify a thread created in global ctor that Go
initialization has completed.
Fixes #4973.
Fixes #5475.
R=golang-dev, minux.ma, iant
CC=golang-dev
https://golang.org/cl/9303046
2013-05-22 12:57:47 -06:00
|
|
|
uint32 runtime·needextram;
|
2013-03-01 04:49:16 -07:00
|
|
|
bool runtime·iscgo;
|
|
|
|
M runtime·m0;
|
2014-01-21 02:06:57 -07:00
|
|
|
G runtime·g0; // idle goroutine for m0
|
2013-03-01 04:49:16 -07:00
|
|
|
G* runtime·lastg;
|
|
|
|
M* runtime·allm;
|
|
|
|
M* runtime·extram;
|
2014-08-29 14:41:08 -06:00
|
|
|
P* runtime·allp[MaxGomaxprocs+1];
|
2013-03-01 04:49:16 -07:00
|
|
|
int8* runtime·goos;
|
|
|
|
int32 runtime·ncpu;
|
|
|
|
static int32 newprocs;
|
|
|
|
|
2014-08-27 21:32:49 -06:00
|
|
|
static Mutex allglock; // the following vars are protected by this lock or by stoptheworld
|
2014-01-21 02:06:57 -07:00
|
|
|
G** runtime·allg;
|
2014-09-01 16:51:12 -06:00
|
|
|
Slice runtime·allgs;
|
2014-01-21 02:06:57 -07:00
|
|
|
uintptr runtime·allglen;
|
|
|
|
static uintptr allgcap;
|
2014-08-29 01:08:10 -06:00
|
|
|
ForceGCState runtime·forcegc;
|
2014-01-21 02:06:57 -07:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
void runtime·mstart(void);
|
2013-02-22 21:48:02 -07:00
|
|
|
static void runqput(P*, G*);
|
|
|
|
static G* runqget(P*);
|
2014-01-16 01:17:00 -07:00
|
|
|
static bool runqputslow(P*, G*, uint32, uint32);
|
2013-02-22 21:48:02 -07:00
|
|
|
static G* runqsteal(P*, P*);
|
2013-03-01 04:49:16 -07:00
|
|
|
static void mput(M*);
|
|
|
|
static M* mget(void);
|
|
|
|
static void mcommoninit(M*);
|
|
|
|
static void schedule(void);
|
|
|
|
static void procresize(int32);
|
|
|
|
static void acquirep(P*);
|
|
|
|
static P* releasep(void);
|
2013-03-01 09:44:43 -07:00
|
|
|
static void newm(void(*)(void), P*);
|
2013-03-01 04:49:16 -07:00
|
|
|
static void stopm(void);
|
|
|
|
static void startm(P*, bool);
|
|
|
|
static void handoffp(P*);
|
|
|
|
static void wakep(void);
|
|
|
|
static void stoplockedm(void);
|
|
|
|
static void startlockedm(G*);
|
|
|
|
static void sysmon(void);
|
2013-07-18 15:22:26 -06:00
|
|
|
static uint32 retake(int64);
|
2013-08-13 12:07:42 -06:00
|
|
|
static void incidlelocked(int32);
|
2013-03-01 04:49:16 -07:00
|
|
|
static void checkdead(void);
|
|
|
|
static void exitsyscall0(G*);
|
2014-08-21 10:41:09 -06:00
|
|
|
void runtime·park_m(G*);
|
2013-03-01 04:49:16 -07:00
|
|
|
static void goexit0(G*);
|
|
|
|
static void gfput(P*, G*);
|
|
|
|
static G* gfget(P*);
|
|
|
|
static void gfpurge(P*);
|
2013-02-27 12:17:53 -07:00
|
|
|
static void globrunqput(G*);
|
2014-01-16 01:17:00 -07:00
|
|
|
static void globrunqputbatch(G*, G*, int32);
|
2013-06-15 06:06:28 -06:00
|
|
|
static G* globrunqget(P*, int32);
|
2013-02-27 12:17:53 -07:00
|
|
|
static P* pidleget(void);
|
|
|
|
static void pidleput(P*);
|
2013-03-12 11:14:26 -06:00
|
|
|
static void injectglist(G*);
|
2013-08-09 02:53:35 -06:00
|
|
|
static bool preemptall(void);
|
|
|
|
static bool preemptone(P*);
|
2013-07-29 12:22:34 -06:00
|
|
|
static bool exitsyscallfast(void);
|
2013-09-16 18:26:10 -06:00
|
|
|
static bool haveexperiment(int8*);
|
2014-01-21 02:06:57 -07:00
|
|
|
static void allgadd(G*);
|
2008-08-05 15:18:47 -06:00
|
|
|
|
2014-07-21 21:52:11 -06:00
|
|
|
extern String runtime·buildVersion;
|
|
|
|
|
2008-09-18 16:56:46 -06:00
|
|
|
// The bootstrap sequence is:
|
|
|
|
//
|
|
|
|
// call osinit
|
|
|
|
// call schedinit
|
|
|
|
// make & queue new G
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
// call runtime·mstart
|
2008-09-18 16:56:46 -06:00
|
|
|
//
|
2011-10-27 19:04:12 -06:00
|
|
|
// The new G calls runtime·main.
|
2008-08-05 15:18:47 -06:00
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·schedinit(void)
|
2008-08-05 15:18:47 -06:00
|
|
|
{
|
2013-03-01 04:49:16 -07:00
|
|
|
int32 n, procs;
|
2008-08-05 15:18:47 -06:00
|
|
|
byte *p;
|
runtime: avoid allocation of internal panic values
If a fault happens in malloc, inevitably the next thing that happens
is a deadlock trying to allocate the panic value that says the fault
happened. Stop doing that, two ways.
First, reject panic in malloc just as we reject panic in garbage collection.
Second, runtime.panicstring was using an error implementation
backed by a Go string, so the interface held an allocated *string.
Since the actual errors are C strings, define a new error
implementation backed by a C char*, which needs no indirection
and therefore no allocation.
This second fix will avoid allocation for errors like nil panic derefs
or division by zero, so it is worth doing even though the first fix
should take care of faults during malloc.
Update #6419
R=golang-dev, dvyukov, dave
CC=golang-dev
https://golang.org/cl/13774043
2013-09-20 13:15:25 -06:00
|
|
|
Eface i;
|
2009-10-12 11:26:38 -06:00
|
|
|
|
2014-06-20 17:36:21 -06:00
|
|
|
// raceinit must be the first call to race detector.
|
|
|
|
// In particular, it must be done before mallocinit below calls racemapshadow.
|
|
|
|
if(raceenabled)
|
|
|
|
g->racectx = runtime·raceinit();
|
|
|
|
|
2013-08-16 20:25:26 -06:00
|
|
|
runtime·sched.maxmcount = 10000;
|
2014-02-19 15:09:08 -07:00
|
|
|
runtime·precisestack = true; // haveexperiment("precisestack");
|
2013-08-16 20:25:26 -06:00
|
|
|
|
2014-03-12 20:42:58 -06:00
|
|
|
runtime·symtabinit();
|
undo CL 101570044 / 2c57aaea79c4
redo stack allocation. This is mostly the same as
the original CL with a few bug fixes.
1. add racemalloc() for stack allocations
2. fix poolalloc/poolfree to terminate free lists correctly.
3. adjust span ref count correctly.
4. don't use cache for sizes >= StackCacheSize.
Should fix bugs and memory leaks in original changelist.
««« original CL description
undo CL 104200047 / 318b04f28372
Breaks windows and race detector.
TBR=rsc
««« original CL description
runtime: stack allocator, separate from mallocgc
In order to move malloc to Go, we need to have a
separate stack allocator. If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.
Stacks are the last remaining FlagNoGC objects in the
GC heap. Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)
Fixes #7468
Fixes #7424
LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»
TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
»»»
LGTM=dvyukov
R=dvyukov, dave, khr, alex.brainman
CC=golang-codereviews
https://golang.org/cl/112240044
2014-07-17 15:41:46 -06:00
|
|
|
runtime·stackinit();
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·mallocinit();
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
mcommoninit(g->m);
|
runtime: avoid allocation of internal panic values
If a fault happens in malloc, inevitably the next thing that happens
is a deadlock trying to allocate the panic value that says the fault
happened. Stop doing that, two ways.
First, reject panic in malloc just as we reject panic in garbage collection.
Second, runtime.panicstring was using an error implementation
backed by a Go string, so the interface held an allocated *string.
Since the actual errors are C strings, define a new error
implementation backed by a C char*, which needs no indirection
and therefore no allocation.
This second fix will avoid allocation for errors like nil panic derefs
or division by zero, so it is worth doing even though the first fix
should take care of faults during malloc.
Update #6419
R=golang-dev, dvyukov, dave
CC=golang-dev
https://golang.org/cl/13774043
2013-09-20 13:15:25 -06:00
|
|
|
|
|
|
|
// Initialize the itable value for newErrorCString,
|
|
|
|
// so that the next time it gets called, possibly
|
|
|
|
// in a fault during a garbage collection, it will not
|
|
|
|
// need to allocated memory.
|
|
|
|
runtime·newErrorCString(0, &i);
|
2014-04-08 20:35:41 -06:00
|
|
|
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·goargs();
|
2011-01-11 17:48:15 -07:00
|
|
|
runtime·goenvs();
|
2013-06-28 08:37:06 -06:00
|
|
|
runtime·parsedebugvars();
|
2014-08-21 01:55:05 -06:00
|
|
|
runtime·gcinit();
|
2009-01-16 15:58:14 -07:00
|
|
|
|
2013-03-12 11:14:26 -06:00
|
|
|
runtime·sched.lastpoll = runtime·nanotime();
|
2013-03-01 04:49:16 -07:00
|
|
|
procs = 1;
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
p = runtime·getenv("GOMAXPROCS");
|
2013-03-01 04:49:16 -07:00
|
|
|
if(p != nil && (n = runtime·atoi(p)) > 0) {
|
|
|
|
if(n > MaxGomaxprocs)
|
|
|
|
n = MaxGomaxprocs;
|
|
|
|
procs = n;
|
2011-07-19 09:01:17 -06:00
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
procresize(procs);
|
2011-10-25 01:35:20 -06:00
|
|
|
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
runtime·copystack = runtime·precisestack;
|
|
|
|
p = runtime·getenv("GOCOPYSTACK");
|
|
|
|
if(p != nil && !runtime·strcmp(p, (byte*)"0"))
|
|
|
|
runtime·copystack = false;
|
|
|
|
|
2011-10-13 13:54:23 -06:00
|
|
|
mstats.enablegc = 1;
|
2014-07-21 21:52:11 -06:00
|
|
|
|
|
|
|
if(runtime·buildVersion.str == nil) {
|
|
|
|
// Condition should never trigger. This code just serves
|
|
|
|
// to ensure runtime·buildVersion is kept in the resulting binary.
|
|
|
|
runtime·buildVersion.str = (uint8*)"unknown";
|
|
|
|
runtime·buildVersion.len = 7;
|
|
|
|
}
|
2008-08-05 15:18:47 -06:00
|
|
|
}
|
|
|
|
|
2011-10-27 19:04:12 -06:00
|
|
|
extern void main·init(void);
|
2014-08-29 11:22:31 -06:00
|
|
|
extern void runtime·init(void);
|
2011-10-27 19:04:12 -06:00
|
|
|
extern void main·main(void);
|
|
|
|
|
2013-07-26 11:54:44 -06:00
|
|
|
static FuncVal initDone = { runtime·unlockOSThread };
|
|
|
|
|
2011-10-27 19:04:12 -06:00
|
|
|
// The main goroutine.
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
// Note: C frames in general are not copyable during stack growth, for two reasons:
|
|
|
|
// 1) We don't know where in a frame to find pointers to other stack locations.
|
|
|
|
// 2) There's no guarantee that globals or heap values do not point into the frame.
|
|
|
|
//
|
|
|
|
// The C frame for runtime.main is copyable, because:
|
|
|
|
// 1) There are no pointers to other stack locations in the frame
|
|
|
|
// (d.fn points at a global, d.link is nil, d.argp is -1).
|
|
|
|
// 2) The only pointer into this frame is from the defer chain,
|
|
|
|
// which is explicitly handled during stack copying.
|
2011-10-27 19:04:12 -06:00
|
|
|
void
|
|
|
|
runtime·main(void)
|
|
|
|
{
|
2013-07-26 11:54:44 -06:00
|
|
|
Defer d;
|
2013-08-15 20:34:06 -06:00
|
|
|
|
|
|
|
// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
|
|
|
|
// Using decimal instead of binary GB and MB because
|
|
|
|
// they look nicer in the stack overflow failure message.
|
|
|
|
if(sizeof(void*) == 8)
|
|
|
|
runtime·maxstacksize = 1000000000;
|
|
|
|
else
|
|
|
|
runtime·maxstacksize = 250000000;
|
2013-07-26 11:54:44 -06:00
|
|
|
|
2013-03-01 09:44:43 -07:00
|
|
|
newm(sysmon, nil);
|
2013-03-01 04:49:16 -07:00
|
|
|
|
2011-10-27 19:04:12 -06:00
|
|
|
// Lock the main goroutine onto this, the main OS thread,
|
|
|
|
// during initialization. Most programs won't care, but a few
|
|
|
|
// do require certain calls to be made by the main thread.
|
|
|
|
// Those can arrange for main.main to run in the main thread
|
|
|
|
// by calling runtime.LockOSThread during initialization
|
|
|
|
// to preserve the lock.
|
2013-02-01 09:34:41 -07:00
|
|
|
runtime·lockOSThread();
|
2013-07-26 11:54:44 -06:00
|
|
|
|
|
|
|
// Defer unlock so that runtime.Goexit during init does the unlock too.
|
|
|
|
d.fn = &initDone;
|
|
|
|
d.siz = 0;
|
|
|
|
d.link = g->defer;
|
2014-05-31 08:10:12 -06:00
|
|
|
d.argp = NoArgs;
|
2013-07-26 11:54:44 -06:00
|
|
|
d.special = true;
|
|
|
|
g->defer = &d;
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m != &runtime·m0)
|
2013-02-22 21:39:31 -07:00
|
|
|
runtime·throw("runtime·main not on m0");
|
2014-08-29 11:22:31 -06:00
|
|
|
runtime·init();
|
2013-02-20 01:15:02 -07:00
|
|
|
main·init();
|
2013-07-26 11:54:44 -06:00
|
|
|
|
|
|
|
if(g->defer != &d || d.fn != &initDone)
|
|
|
|
runtime·throw("runtime: bad defer entry after init");
|
|
|
|
g->defer = d.link;
|
2013-02-20 01:15:02 -07:00
|
|
|
runtime·unlockOSThread();
|
2012-03-27 10:22:19 -06:00
|
|
|
|
2011-10-27 19:04:12 -06:00
|
|
|
main·main();
|
2012-10-07 12:05:32 -06:00
|
|
|
if(raceenabled)
|
|
|
|
runtime·racefini();
|
2013-02-22 21:39:31 -07:00
|
|
|
|
2013-02-15 12:48:35 -07:00
|
|
|
// Make racy client program work: if panicking on
|
|
|
|
// another goroutine at the same time as main returns,
|
|
|
|
// let the other goroutine finish printing the panic trace.
|
|
|
|
// Once it does, it will exit. See issue 3934.
|
|
|
|
if(runtime·panicking)
|
2014-08-21 10:41:09 -06:00
|
|
|
runtime·park(nil, nil, runtime·gostringnocopy((byte*)"panicwait"));
|
2013-02-15 12:48:35 -07:00
|
|
|
|
2011-10-27 19:04:12 -06:00
|
|
|
runtime·exit(0);
|
|
|
|
for(;;)
|
|
|
|
*(int32*)runtime·main = 0;
|
|
|
|
}
|
|
|
|
|
2011-08-22 21:26:39 -06:00
|
|
|
void
|
2012-07-03 02:54:13 -06:00
|
|
|
runtime·goroutineheader(G *gp)
|
2011-08-22 21:26:39 -06:00
|
|
|
{
|
2014-08-21 10:41:09 -06:00
|
|
|
String status;
|
2014-01-16 01:54:46 -07:00
|
|
|
int64 waitfor;
|
2014-08-27 09:15:47 -06:00
|
|
|
uint32 gpstatus;
|
2011-08-22 21:26:39 -06:00
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
gpstatus = runtime·readgstatus(gp);
|
|
|
|
switch(gpstatus) {
|
2011-08-22 21:26:39 -06:00
|
|
|
case Gidle:
|
2014-08-21 10:41:09 -06:00
|
|
|
status = runtime·gostringnocopy((byte*)"idle");
|
2011-08-22 21:26:39 -06:00
|
|
|
break;
|
|
|
|
case Grunnable:
|
2014-08-21 10:41:09 -06:00
|
|
|
status = runtime·gostringnocopy((byte*)"runnable");
|
2011-08-22 21:26:39 -06:00
|
|
|
break;
|
|
|
|
case Grunning:
|
2014-08-21 10:41:09 -06:00
|
|
|
status = runtime·gostringnocopy((byte*)"running");
|
2011-08-22 21:26:39 -06:00
|
|
|
break;
|
|
|
|
case Gsyscall:
|
2014-08-21 10:41:09 -06:00
|
|
|
status = runtime·gostringnocopy((byte*)"syscall");
|
2011-08-22 21:26:39 -06:00
|
|
|
break;
|
|
|
|
case Gwaiting:
|
2014-08-21 10:41:09 -06:00
|
|
|
if(gp->waitreason.str != nil)
|
2012-07-03 02:54:13 -06:00
|
|
|
status = gp->waitreason;
|
2011-08-22 21:26:39 -06:00
|
|
|
else
|
2014-08-21 10:41:09 -06:00
|
|
|
status = runtime·gostringnocopy((byte*)"waiting");
|
2011-08-22 21:26:39 -06:00
|
|
|
break;
|
2014-08-27 09:15:47 -06:00
|
|
|
case Gscan:
|
|
|
|
status = runtime·gostringnocopy((byte*)"scan");
|
|
|
|
break;
|
|
|
|
case Gscanrunnable:
|
|
|
|
status = runtime·gostringnocopy((byte*)"scanrunnable");
|
|
|
|
break;
|
|
|
|
case Gscanrunning:
|
|
|
|
status = runtime·gostringnocopy((byte*)"scanrunning");
|
|
|
|
break;
|
|
|
|
case Gscansyscall:
|
|
|
|
status = runtime·gostringnocopy((byte*)"scansyscall");
|
|
|
|
break;
|
|
|
|
case Gscanenqueue:
|
|
|
|
status = runtime·gostringnocopy((byte*)"scanenqueue");
|
|
|
|
break;
|
|
|
|
case Gscanwaiting:
|
|
|
|
if(gp->waitreason.str != nil)
|
|
|
|
status = gp->waitreason;
|
|
|
|
else
|
|
|
|
status = runtime·gostringnocopy((byte*)"scanwaiting");
|
|
|
|
break;
|
|
|
|
case Gcopystack:
|
|
|
|
status = runtime·gostringnocopy((byte*)"copystack");
|
|
|
|
break;
|
2011-08-22 21:26:39 -06:00
|
|
|
default:
|
2014-08-21 10:41:09 -06:00
|
|
|
status = runtime·gostringnocopy((byte*)"???");
|
2011-08-22 21:26:39 -06:00
|
|
|
break;
|
|
|
|
}
|
2014-01-16 01:54:46 -07:00
|
|
|
|
|
|
|
// approx time the G is blocked, in minutes
|
|
|
|
waitfor = 0;
|
2014-08-27 09:15:47 -06:00
|
|
|
gpstatus = gpstatus&~Gscan; // drop the scan bit
|
|
|
|
if((gpstatus == Gwaiting || gpstatus == Gsyscall) && gp->waitsince != 0)
|
2014-01-16 01:54:46 -07:00
|
|
|
waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
|
|
|
|
|
2014-08-21 10:41:09 -06:00
|
|
|
runtime·printf("goroutine %D [%S", gp->goid, status);
|
2014-06-26 12:40:48 -06:00
|
|
|
if(waitfor >= 1)
|
|
|
|
runtime·printf(", %D minutes", waitfor);
|
|
|
|
if(gp->lockedm != nil)
|
|
|
|
runtime·printf(", locked to thread");
|
|
|
|
runtime·printf("]:\n");
|
2011-08-22 21:26:39 -06:00
|
|
|
}
|
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
static void
|
|
|
|
dumpgstatus(G* gp)
|
|
|
|
{
|
|
|
|
runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
|
|
|
|
}
|
|
|
|
|
2008-07-28 12:29:41 -06:00
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·tracebackothers(G *me)
|
2008-07-28 12:29:41 -06:00
|
|
|
{
|
2012-07-03 02:54:13 -06:00
|
|
|
G *gp;
|
2013-01-29 03:57:11 -07:00
|
|
|
int32 traceback;
|
2014-01-21 02:06:57 -07:00
|
|
|
uintptr i;
|
2014-08-27 09:15:47 -06:00
|
|
|
uint32 status;
|
2008-07-28 12:29:41 -06:00
|
|
|
|
2013-03-14 23:11:03 -06:00
|
|
|
traceback = runtime·gotraceback(nil);
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
|
|
|
|
// Show the current goroutine first, if we haven't already.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if((gp = g->m->curg) != nil && gp != me) {
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
runtime·printf("\n");
|
|
|
|
runtime·goroutineheader(gp);
|
2013-12-13 13:44:57 -07:00
|
|
|
runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
}
|
|
|
|
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·lock(&allglock);
|
|
|
|
for(i = 0; i < runtime·allglen; i++) {
|
|
|
|
gp = runtime·allg[i];
|
2014-08-27 09:15:47 -06:00
|
|
|
if(gp == me || gp == g->m->curg || runtime·readgstatus(gp) == Gdead)
|
2008-07-28 12:29:41 -06:00
|
|
|
continue;
|
2013-01-29 03:57:11 -07:00
|
|
|
if(gp->issystem && traceback < 2)
|
|
|
|
continue;
|
2011-08-22 21:26:39 -06:00
|
|
|
runtime·printf("\n");
|
2012-07-03 02:54:13 -06:00
|
|
|
runtime·goroutineheader(gp);
|
2014-08-27 09:15:47 -06:00
|
|
|
status = runtime·readgstatus(gp);
|
|
|
|
if((status&~Gscan) == Grunning){
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
runtime·printf("\tgoroutine running on other thread; stack unavailable\n");
|
2013-08-01 09:28:38 -06:00
|
|
|
runtime·printcreatedby(gp);
|
|
|
|
} else
|
2013-12-13 13:44:57 -07:00
|
|
|
runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp);
|
2008-07-28 12:29:41 -06:00
|
|
|
}
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·unlock(&allglock);
|
2008-07-28 12:29:41 -06:00
|
|
|
}
|
|
|
|
|
2013-08-16 20:25:26 -06:00
|
|
|
static void
|
|
|
|
checkmcount(void)
|
|
|
|
{
|
|
|
|
// sched lock is held
|
2014-08-27 09:15:47 -06:00
|
|
|
if(runtime·sched.mcount > runtime·sched.maxmcount){
|
2013-08-16 20:25:26 -06:00
|
|
|
runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
|
|
|
|
runtime·throw("thread exhaustion");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-07-12 10:24:32 -06:00
|
|
|
static void
|
2012-07-03 02:54:13 -06:00
|
|
|
mcommoninit(M *mp)
|
2011-07-12 10:24:32 -06:00
|
|
|
{
|
runtime: fix unexpected return pc for runtime.newstackcall
With cl/112640043 TestCgoDeadlockCrash episodically print:
unexpected return pc for runtime.newstackcall
After adding debug output I see the following trace:
runtime: unexpected return pc for runtime.newstackcall called from 0xc208011b00
runtime.throw(0x414da86)
src/pkg/runtime/panic.c:523 +0x77
runtime.gentraceback(0x40165fc, 0xba440c28, 0x0, 0xc208d15200, 0xc200000000, 0xc208ddfd20, 0x20, 0x0, 0x0, 0x300)
src/pkg/runtime/traceback_x86.c:185 +0xca4
runtime.callers(0x1, 0xc208ddfd20, 0x20)
src/pkg/runtime/traceback_x86.c:438 +0x98
mcommoninit(0xc208ddfc00)
src/pkg/runtime/proc.c:369 +0x5c
runtime.allocm(0xc208052000)
src/pkg/runtime/proc.c:686 +0xa6
newm(0x4017850, 0xc208052000)
src/pkg/runtime/proc.c:933 +0x27
startm(0xc208052000, 0x100000001)
src/pkg/runtime/proc.c:1011 +0xba
wakep()
src/pkg/runtime/proc.c:1071 +0x57
resetspinning()
src/pkg/runtime/proc.c:1297 +0xa1
schedule()
src/pkg/runtime/proc.c:1366 +0x14b
runtime.gosched0(0xc20808e240)
src/pkg/runtime/proc.c:1465 +0x5b
runtime.newstack()
src/pkg/runtime/stack.c:891 +0x44d
runtime: unexpected return pc for runtime.newstackcall called from 0xc208011b00
runtime.newstackcall(0x4000cbd, 0x4000b80)
src/pkg/runtime/asm_amd64.s:278 +0x6f
I suspect that it can happen on any stack split.
So don't unwind g0 stack.
Also, that comment is lying -- we can traceback w/o mcache,
CPU profiler does that.
LGTM=rsc
R=golang-codereviews
CC=golang-codereviews, khr, rsc
https://golang.org/cl/120040043
2014-07-23 08:51:34 -06:00
|
|
|
// g0 stack won't make sense for user (and is not necessary unwindable).
|
|
|
|
if(g != g->m->g0)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·callers(1, mp->createstack, nelem(mp->createstack));
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
|
2012-02-08 08:33:54 -07:00
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
mp->id = runtime·sched.mcount++;
|
2013-08-16 20:25:26 -06:00
|
|
|
checkmcount();
|
2013-02-21 05:24:38 -07:00
|
|
|
runtime·mpreinit(mp);
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
// Add to runtime·allm so garbage collector doesn't free g->m
|
2012-02-08 08:33:54 -07:00
|
|
|
// when it is just in a register or thread-local storage.
|
2012-07-03 02:54:13 -06:00
|
|
|
mp->alllink = runtime·allm;
|
2012-02-16 14:49:41 -07:00
|
|
|
// runtime·NumCgoCall() iterates over allm w/o schedlock,
|
2012-02-08 08:33:54 -07:00
|
|
|
// so we need to publish it safely.
|
2012-07-03 02:54:13 -06:00
|
|
|
runtime·atomicstorep(&runtime·allm, mp);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2011-07-12 10:24:32 -06:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Mark gp ready to run.
|
2008-08-04 17:43:49 -06:00
|
|
|
void
|
2012-07-03 02:54:13 -06:00
|
|
|
runtime·ready(G *gp)
|
2008-08-04 17:43:49 -06:00
|
|
|
{
|
2014-08-27 09:15:47 -06:00
|
|
|
uint32 status;
|
|
|
|
|
|
|
|
status = runtime·readgstatus(gp);
|
2008-08-05 15:18:47 -06:00
|
|
|
// Mark runnable.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++; // disable preemption because it can be holding p in a local var
|
2014-08-27 09:15:47 -06:00
|
|
|
if((status&~Gscan) != Gwaiting){
|
|
|
|
dumpgstatus(gp);
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·throw("bad g->status in ready");
|
runtime: always run stackalloc on scheduler stack
Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.
The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.
runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...
R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
|
|
|
}
|
2014-08-27 09:15:47 -06:00
|
|
|
// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
|
|
|
|
runtime·casgstatus(gp, Gwaiting, Grunnable);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
runqput(g->m->p, gp);
|
2013-03-01 12:57:05 -07:00
|
|
|
if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0) // TODO: fast atomic
|
2013-03-01 04:49:16 -07:00
|
|
|
wakep();
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
|
|
|
if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
|
2013-07-17 10:52:37 -06:00
|
|
|
g->stackguard0 = StackPreempt;
|
2008-07-14 15:34:27 -06:00
|
|
|
}
|
|
|
|
|
2014-08-21 10:41:09 -06:00
|
|
|
void
|
|
|
|
runtime·ready_m(void)
|
|
|
|
{
|
|
|
|
G *gp;
|
|
|
|
|
|
|
|
gp = g->m->ptrarg[0];
|
|
|
|
g->m->ptrarg[0] = nil;
|
|
|
|
runtime·ready(gp);
|
|
|
|
}
|
|
|
|
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
int32
|
2012-05-15 09:10:16 -06:00
|
|
|
runtime·gcprocs(void)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
{
|
2012-05-15 09:10:16 -06:00
|
|
|
int32 n;
|
2013-02-22 21:39:31 -07:00
|
|
|
|
2012-05-15 09:10:16 -06:00
|
|
|
// Figure out how many CPUs to use during GC.
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2012-05-15 09:10:16 -06:00
|
|
|
n = runtime·gomaxprocs;
|
|
|
|
if(n > runtime·ncpu)
|
|
|
|
n = runtime·ncpu;
|
|
|
|
if(n > MaxGcproc)
|
|
|
|
n = MaxGcproc;
|
2013-03-01 04:49:16 -07:00
|
|
|
if(n > runtime·sched.nmidle+1) // one M is currently running
|
|
|
|
n = runtime·sched.nmidle+1;
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2012-05-15 09:10:16 -06:00
|
|
|
return n;
|
|
|
|
}
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2013-02-22 21:39:31 -07:00
|
|
|
static bool
|
|
|
|
needaddgcproc(void)
|
|
|
|
{
|
|
|
|
int32 n;
|
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-02-22 21:39:31 -07:00
|
|
|
n = runtime·gomaxprocs;
|
|
|
|
if(n > runtime·ncpu)
|
|
|
|
n = runtime·ncpu;
|
|
|
|
if(n > MaxGcproc)
|
|
|
|
n = MaxGcproc;
|
2013-03-01 04:49:16 -07:00
|
|
|
n -= runtime·sched.nmidle+1; // one M is currently running
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-02-22 21:39:31 -07:00
|
|
|
return n > 0;
|
|
|
|
}
|
|
|
|
|
2012-05-15 09:10:16 -06:00
|
|
|
void
|
|
|
|
runtime·helpgc(int32 nproc)
|
|
|
|
{
|
|
|
|
M *mp;
|
2013-03-01 04:49:16 -07:00
|
|
|
int32 n, pos;
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
pos = 0;
|
|
|
|
for(n = 1; n < nproc; n++) { // one M is currently running
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(runtime·allp[pos]->mcache == g->m->mcache)
|
2013-03-01 04:49:16 -07:00
|
|
|
pos++;
|
|
|
|
mp = mget();
|
2012-05-15 09:10:16 -06:00
|
|
|
if(mp == nil)
|
|
|
|
runtime·throw("runtime·gcprocs inconsistency");
|
2013-03-21 02:48:02 -06:00
|
|
|
mp->helpgc = n;
|
2013-03-01 04:49:16 -07:00
|
|
|
mp->mcache = runtime·allp[pos]->mcache;
|
|
|
|
pos++;
|
|
|
|
runtime·notewakeup(&mp->park);
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
}
|
|
|
|
|
2013-08-09 02:53:35 -06:00
|
|
|
// Similar to stoptheworld but best-effort and can be called several times.
|
|
|
|
// There is no reverse operation, used during crashing.
|
|
|
|
// This function must not lock any mutexes.
|
|
|
|
void
|
|
|
|
runtime·freezetheworld(void)
|
|
|
|
{
|
|
|
|
int32 i;
|
|
|
|
|
|
|
|
if(runtime·gomaxprocs == 1)
|
|
|
|
return;
|
|
|
|
// stopwait and preemption requests can be lost
|
|
|
|
// due to races with concurrently executing threads,
|
|
|
|
// so try several times
|
|
|
|
for(i = 0; i < 5; i++) {
|
|
|
|
// this should tell the scheduler to not start any new goroutines
|
|
|
|
runtime·sched.stopwait = 0x7fffffff;
|
2013-08-15 04:32:10 -06:00
|
|
|
runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
|
2013-08-09 02:53:35 -06:00
|
|
|
// this should stop running goroutines
|
|
|
|
if(!preemptall())
|
|
|
|
break; // no running goroutines
|
|
|
|
runtime·usleep(1000);
|
|
|
|
}
|
|
|
|
// to be sure
|
|
|
|
runtime·usleep(1000);
|
|
|
|
preemptall();
|
|
|
|
runtime·usleep(1000);
|
|
|
|
}
|
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
static bool
|
|
|
|
isscanstatus(uint32 status)
|
|
|
|
{
|
|
|
|
if(status == Gscan)
|
|
|
|
runtime·throw("isscanstatus: Bad status Gscan");
|
|
|
|
return (status&Gscan) == Gscan;
|
|
|
|
}
|
|
|
|
|
|
|
|
// All reads and writes of g's status go through readgstatus, casgstatus
|
|
|
|
// castogscanstatus, casfromgscanstatus.
|
|
|
|
uint32
|
|
|
|
runtime·readgstatus(G *gp)
|
|
|
|
{
|
|
|
|
return runtime·atomicload(&gp->atomicstatus);
|
|
|
|
}
|
|
|
|
|
|
|
|
// The Gscanstatuses are acting like locks and this releases them.
|
|
|
|
// If it proves to be a performance hit we should be able to make these
|
|
|
|
// simple atomic stores but for now we are going to throw if
|
|
|
|
// we see an inconsistent state.
|
|
|
|
void
|
|
|
|
runtime·casfromgscanstatus(G *gp, uint32 oldval, uint32 newval)
|
|
|
|
{
|
|
|
|
bool success = false;
|
|
|
|
|
|
|
|
// Check that transition is valid.
|
|
|
|
switch(oldval) {
|
|
|
|
case Gscanrunnable:
|
|
|
|
case Gscanwaiting:
|
|
|
|
case Gscanrunning:
|
|
|
|
case Gscansyscall:
|
|
|
|
if(newval == (oldval&~Gscan))
|
|
|
|
success = runtime·cas(&gp->atomicstatus, oldval, newval);
|
|
|
|
break;
|
|
|
|
case Gscanenqueue:
|
|
|
|
if(newval == Gwaiting)
|
|
|
|
success = runtime·cas(&gp->atomicstatus, oldval, newval);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if(!success){
|
|
|
|
runtime·printf("runtime: casfromgscanstatus failed gp=%p, oldval=%d, newval=%d\n",
|
|
|
|
gp, oldval, newval);
|
|
|
|
dumpgstatus(gp);
|
|
|
|
runtime·throw("casfromgscanstatus: gp->status is not in scan state");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// This will return false if the gp is not in the expected status and the cas fails.
|
|
|
|
// This acts like a lock acquire while the casfromgstatus acts like a lock release.
|
|
|
|
bool
|
|
|
|
runtime·castogscanstatus(G *gp, uint32 oldval, uint32 newval)
|
|
|
|
{
|
|
|
|
switch(oldval) {
|
|
|
|
case Grunnable:
|
|
|
|
case Gwaiting:
|
|
|
|
case Gsyscall:
|
|
|
|
if(newval == (oldval|Gscan))
|
|
|
|
return runtime·cas(&gp->atomicstatus, oldval, newval);
|
|
|
|
break;
|
|
|
|
case Grunning:
|
|
|
|
if(newval == Gscanrunning || newval == Gscanenqueue)
|
|
|
|
return runtime·cas(&gp->atomicstatus, oldval, newval);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
runtime·printf("runtime: castogscanstatus oldval=%d newval=%d\n", oldval, newval);
|
|
|
|
runtime·throw("castogscanstatus");
|
|
|
|
return false; // not reached
|
|
|
|
}
|
|
|
|
|
|
|
|
// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
|
|
|
|
// and casfromgscanstatus instead.
|
|
|
|
// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
|
|
|
|
// put it in the Gscan state is finished.
|
|
|
|
void
|
|
|
|
runtime·casgstatus(G *gp, uint32 oldval, uint32 newval)
|
|
|
|
{
|
|
|
|
if(isscanstatus(oldval) || isscanstatus(newval) || oldval == newval) {
|
|
|
|
runtime·printf("casgstatus: oldval=%d, newval=%d\n", oldval, newval);
|
|
|
|
runtime·throw("casgstatus: bad incoming values");
|
|
|
|
}
|
|
|
|
|
|
|
|
while(!runtime·cas(&gp->atomicstatus, oldval, newval)) {
|
|
|
|
// loop if gp->atomicstatus is in a scan state giving
|
|
|
|
// GC time to finish and change the state to oldval.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is used by the GC as well as the routines that do stack dumps. In the case
|
|
|
|
// of GC all the routines can be reliably stopped. This is not always the case
|
|
|
|
// when the system is in panic or being exited.
|
2008-12-05 16:24:18 -07:00
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·stoptheworld(void)
|
2008-12-05 16:24:18 -07:00
|
|
|
{
|
2013-03-01 04:49:16 -07:00
|
|
|
int32 i;
|
|
|
|
uint32 s;
|
|
|
|
P *p;
|
|
|
|
bool wait;
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2014-08-21 01:46:53 -06:00
|
|
|
// If we hold a lock, then we won't be able to stop another M
|
|
|
|
// that is blocked trying to acquire the lock.
|
|
|
|
if(g->m->locks > 0)
|
|
|
|
runtime·throw("stoptheworld: holding locks");
|
|
|
|
// There is no evidence that stoptheworld on g0 does not work,
|
|
|
|
// we just don't do it today.
|
|
|
|
if(g == g->m->g0)
|
|
|
|
runtime·throw("stoptheworld: on g0");
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·sched.stopwait = runtime·gomaxprocs;
|
2013-08-15 04:32:10 -06:00
|
|
|
runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
|
2013-06-28 07:52:17 -06:00
|
|
|
preemptall();
|
2013-03-01 04:49:16 -07:00
|
|
|
// stop current P
|
2014-08-27 09:15:47 -06:00
|
|
|
g->m->p->status = Pgcstop; // Pgcstop is only diagnostic.
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·sched.stopwait--;
|
|
|
|
// try to retake all P's in Psyscall status
|
|
|
|
for(i = 0; i < runtime·gomaxprocs; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
s = p->status;
|
|
|
|
if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
|
|
|
|
runtime·sched.stopwait--;
|
|
|
|
}
|
|
|
|
// stop idle P's
|
|
|
|
while(p = pidleget()) {
|
|
|
|
p->status = Pgcstop;
|
|
|
|
runtime·sched.stopwait--;
|
|
|
|
}
|
|
|
|
wait = runtime·sched.stopwait > 0;
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2013-06-28 07:52:17 -06:00
|
|
|
// wait for remaining P's to stop voluntarily
|
2013-03-01 04:49:16 -07:00
|
|
|
if(wait) {
|
2013-06-28 07:52:17 -06:00
|
|
|
for(;;) {
|
|
|
|
// wait for 100us, then try to re-preempt in case of any races
|
|
|
|
if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
|
|
|
|
runtime·noteclear(&runtime·sched.stopnote);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
preemptall();
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
if(runtime·sched.stopwait)
|
|
|
|
runtime·throw("stoptheworld: not stopped");
|
|
|
|
for(i = 0; i < runtime·gomaxprocs; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p->status != Pgcstop)
|
|
|
|
runtime·throw("stoptheworld: not stopped");
|
2008-12-05 16:24:18 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-01 09:44:43 -07:00
|
|
|
static void
|
|
|
|
mhelpgc(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->helpgc = -1;
|
2013-03-01 09:44:43 -07:00
|
|
|
}
|
|
|
|
|
2008-12-05 16:24:18 -07:00
|
|
|
void
|
2012-05-15 09:10:16 -06:00
|
|
|
runtime·starttheworld(void)
|
2008-12-05 16:24:18 -07:00
|
|
|
{
|
2013-03-07 10:39:59 -07:00
|
|
|
P *p, *p1;
|
2012-07-03 02:54:13 -06:00
|
|
|
M *mp;
|
2013-03-12 11:14:26 -06:00
|
|
|
G *gp;
|
2013-02-22 21:39:31 -07:00
|
|
|
bool add;
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++; // disable preemption because it can be holding p in a local var
|
2013-03-12 11:14:26 -06:00
|
|
|
gp = runtime·netpoll(false); // non-blocking
|
|
|
|
injectglist(gp);
|
2013-02-22 21:39:31 -07:00
|
|
|
add = needaddgcproc();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(newprocs) {
|
|
|
|
procresize(newprocs);
|
|
|
|
newprocs = 0;
|
|
|
|
} else
|
|
|
|
procresize(runtime·gomaxprocs);
|
2013-08-15 04:32:10 -06:00
|
|
|
runtime·sched.gcwaiting = 0;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
2013-03-07 10:39:59 -07:00
|
|
|
p1 = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
while(p = pidleget()) {
|
|
|
|
// procresize() puts p's with work at the beginning of the list.
|
|
|
|
// Once we reach a p without a run queue, the rest don't have one either.
|
|
|
|
if(p->runqhead == p->runqtail) {
|
|
|
|
pidleput(p);
|
|
|
|
break;
|
|
|
|
}
|
2013-06-12 08:46:35 -06:00
|
|
|
p->m = mget();
|
|
|
|
p->link = p1;
|
|
|
|
p1 = p;
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
if(runtime·sched.sysmonwait) {
|
|
|
|
runtime·sched.sysmonwait = false;
|
|
|
|
runtime·notewakeup(&runtime·sched.sysmonnote);
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
|
2013-03-07 10:39:59 -07:00
|
|
|
while(p1) {
|
|
|
|
p = p1;
|
|
|
|
p1 = p1->link;
|
2013-06-12 08:46:35 -06:00
|
|
|
if(p->m) {
|
|
|
|
mp = p->m;
|
|
|
|
p->m = nil;
|
|
|
|
if(mp->nextp)
|
|
|
|
runtime·throw("starttheworld: inconsistent mp->nextp");
|
|
|
|
mp->nextp = p;
|
|
|
|
runtime·notewakeup(&mp->park);
|
|
|
|
} else {
|
|
|
|
// Start M to run P. Do not start another M below.
|
|
|
|
newm(nil, p);
|
|
|
|
add = false;
|
|
|
|
}
|
2013-03-07 10:39:59 -07:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
if(add) {
|
2012-05-15 09:10:16 -06:00
|
|
|
// If GC could have used another helper proc, start one now,
|
|
|
|
// in the hope that it will be available next time.
|
|
|
|
// It would have been even better to start it before the collection,
|
|
|
|
// but doing so requires allocating memory, so it's tricky to
|
|
|
|
// coordinate. This lazy approach works out in practice:
|
|
|
|
// we don't mind if the first couple gc rounds don't have quite
|
|
|
|
// the maximum number of procs.
|
2013-03-01 09:44:43 -07:00
|
|
|
newm(mhelpgc, nil);
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
|
|
|
if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
|
2013-07-17 10:52:37 -06:00
|
|
|
g->stackguard0 = StackPreempt;
|
2008-12-05 16:24:18 -07:00
|
|
|
}
|
|
|
|
|
2008-09-18 16:56:46 -06:00
|
|
|
// Called to start an M.
|
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·mstart(void)
|
2008-09-18 16:56:46 -06:00
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g != g->m->g0)
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·throw("bad runtime·mstart");
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
|
|
|
|
// Record top of stack for use by mcall.
|
|
|
|
// Once we call schedule we're never coming back,
|
|
|
|
// so other calls can reuse this stack space.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
runtime·gosave(&g->m->g0->sched);
|
|
|
|
g->m->g0->sched.pc = (uintptr)-1; // make sure it is never used
|
|
|
|
g->m->g0->stackguard = g->m->g0->stackguard0; // cgo sets only stackguard0, copy it to stackguard
|
2012-02-13 23:23:15 -07:00
|
|
|
runtime·asminit();
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·minit();
|
2012-03-15 20:17:54 -06:00
|
|
|
|
|
|
|
// Install signal handlers; after minit so that minit can
|
|
|
|
// prepare the thread to be able to handle the signals.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m == &runtime·m0)
|
2012-03-15 20:17:54 -06:00
|
|
|
runtime·initsig();
|
2013-03-01 09:44:43 -07:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->mstartfn)
|
|
|
|
g->m->mstartfn();
|
2012-03-15 20:17:54 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->helpgc) {
|
|
|
|
g->m->helpgc = 0;
|
2013-03-01 04:49:16 -07:00
|
|
|
stopm();
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
} else if(g->m != &runtime·m0) {
|
|
|
|
acquirep(g->m->nextp);
|
|
|
|
g->m->nextp = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
schedule();
|
2012-05-29 23:10:54 -06:00
|
|
|
|
|
|
|
// TODO(brainman): This point is never reached, because scheduler
|
|
|
|
// does not release os threads at the moment. But once this path
|
|
|
|
// is enabled, we must remove our seh here.
|
2008-09-18 16:56:46 -06:00
|
|
|
}
|
|
|
|
|
2013-02-28 14:24:38 -07:00
|
|
|
// When running with cgo, we call _cgo_thread_start
|
2009-10-03 11:37:12 -06:00
|
|
|
// to start threads for us so that we can play nicely with
|
|
|
|
// foreign code.
|
2013-02-28 14:24:38 -07:00
|
|
|
void (*_cgo_thread_start)(void*);
|
2009-10-03 11:37:12 -06:00
|
|
|
|
|
|
|
typedef struct CgoThreadStart CgoThreadStart;
|
|
|
|
struct CgoThreadStart
|
|
|
|
{
|
|
|
|
G *g;
|
2014-01-21 23:30:10 -07:00
|
|
|
uintptr *tls;
|
2009-10-03 11:37:12 -06:00
|
|
|
void (*fn)(void);
|
|
|
|
};
|
|
|
|
|
2013-02-20 15:48:23 -07:00
|
|
|
// Allocate a new m unassociated with any thread.
|
2013-03-01 04:49:16 -07:00
|
|
|
// Can use p for allocation context if needed.
|
runtime: add timer support, use for package time
This looks like it is just moving some code from
time to runtime (and translating it to C), but the
runtime can do a better job managing the goroutines,
and it needs this functionality for its own maintenance
(for example, for the garbage collector to hand back
unused memory to the OS on a time delay).
Might as well have just one copy of the timer logic,
and runtime can't depend on time, so vice versa.
It also unifies Sleep, NewTicker, and NewTimer behind
one mechanism, so that there are no claims that one
is more efficient than another. (For example, today
people recommend using time.After instead of time.Sleep
to avoid blocking an OS thread.)
Fixes #1644.
Fixes #1731.
Fixes #2190.
R=golang-dev, r, hectorchu, iant, iant, jsing, alex.brainman, dvyukov
CC=golang-dev
https://golang.org/cl/5334051
2011-11-09 13:17:05 -07:00
|
|
|
M*
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·allocm(P *p)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
{
|
2012-07-03 02:54:13 -06:00
|
|
|
M *mp;
|
2012-10-21 15:41:32 -06:00
|
|
|
static Type *mtype; // The Go type M
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++; // disable GC because it can be called from sysmon
|
|
|
|
if(g->m->p == nil)
|
2013-03-01 04:49:16 -07:00
|
|
|
acquirep(p); // temporarily borrow p for mallocs in this function
|
2012-10-21 15:41:32 -06:00
|
|
|
if(mtype == nil) {
|
|
|
|
Eface e;
|
|
|
|
runtime·gc_m_ptr(&e);
|
|
|
|
mtype = ((PtrType*)e.type)->elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
mp = runtime·cnew(mtype);
|
2012-07-03 02:54:13 -06:00
|
|
|
mcommoninit(mp);
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2014-01-16 21:58:10 -07:00
|
|
|
// In case of cgo or Solaris, pthread_create will make us a stack.
|
2013-03-01 04:49:16 -07:00
|
|
|
// Windows will layout sched stack on OS stack.
|
2014-01-16 21:58:10 -07:00
|
|
|
if(runtime·iscgo || Solaris || Windows)
|
2013-02-20 15:48:23 -07:00
|
|
|
mp->g0 = runtime·malg(-1);
|
|
|
|
else
|
|
|
|
mp->g0 = runtime·malg(8192);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
mp->g0->m = mp;
|
2013-02-22 21:39:31 -07:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(p == g->m->p)
|
2013-03-01 04:49:16 -07:00
|
|
|
releasep();
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
|
|
|
if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
|
2013-07-17 10:52:37 -06:00
|
|
|
g->stackguard0 = StackPreempt;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
2013-02-20 15:48:23 -07:00
|
|
|
return mp;
|
|
|
|
}
|
|
|
|
|
2014-04-28 10:47:09 -06:00
|
|
|
static G*
|
|
|
|
allocg(void)
|
|
|
|
{
|
|
|
|
G *gp;
|
|
|
|
static Type *gtype;
|
|
|
|
|
|
|
|
if(gtype == nil) {
|
|
|
|
Eface e;
|
|
|
|
runtime·gc_g_ptr(&e);
|
|
|
|
gtype = ((PtrType*)e.type)->elem;
|
|
|
|
}
|
|
|
|
gp = runtime·cnew(gtype);
|
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
|
2013-02-20 15:48:23 -07:00
|
|
|
static M* lockextra(bool nilokay);
|
|
|
|
static void unlockextra(M*);
|
|
|
|
|
|
|
|
// needm is called when a cgo callback happens on a
|
|
|
|
// thread without an m (a thread not created by Go).
|
|
|
|
// In this case, needm is expected to find an m to use
|
|
|
|
// and return with m, g initialized correctly.
|
|
|
|
// Since m and g are not set now (likely nil, but see below)
|
|
|
|
// needm is limited in what routines it can call. In particular
|
|
|
|
// it can only call nosplit functions (textflag 7) and cannot
|
|
|
|
// do any scheduling that requires an m.
|
|
|
|
//
|
|
|
|
// In order to avoid needing heavy lifting here, we adopt
|
|
|
|
// the following strategy: there is a stack of available m's
|
|
|
|
// that can be stolen. Using compare-and-swap
|
|
|
|
// to pop from the stack has ABA races, so we simulate
|
|
|
|
// a lock by doing an exchange (via casp) to steal the stack
|
|
|
|
// head and replace the top pointer with MLOCKED (1).
|
|
|
|
// This serves as a simple spin lock that we can use even
|
|
|
|
// without an m. The thread that locks the stack in this way
|
|
|
|
// unlocks the stack by storing a valid stack head pointer.
|
|
|
|
//
|
|
|
|
// In order to make sure that there is always an m structure
|
|
|
|
// available to be stolen, we maintain the invariant that there
|
|
|
|
// is always one more than needed. At the beginning of the
|
|
|
|
// program (if cgo is in use) the list is seeded with a single m.
|
|
|
|
// If needm finds that it has taken the last m off the list, its job
|
|
|
|
// is - once it has installed its own m so that it can do things like
|
|
|
|
// allocate memory - to create a spare m and put it on the list.
|
|
|
|
//
|
|
|
|
// Each of these extra m's also has a g0 and a curg that are
|
|
|
|
// pressed into service as the scheduling stack and current
|
|
|
|
// goroutine for the duration of the cgo callback.
|
|
|
|
//
|
|
|
|
// When the callback is done with the m, it calls dropm to
|
|
|
|
// put the m back on the list.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-02-20 15:48:23 -07:00
|
|
|
void
|
|
|
|
runtime·needm(byte x)
|
|
|
|
{
|
|
|
|
M *mp;
|
|
|
|
|
runtime: detect deadlocks in programs using cgo
When cgo is used, runtime creates an additional M to handle callbacks on threads not created by Go.
This effectively disabled deadlock detection, which is a right thing, because Go program can be blocked
and only serve callbacks on external threads.
This also disables deadlock detection under race detector, because it happens to use cgo.
With this change the additional M is created lazily on first cgo call. So deadlock detector
works for programs that import "C", "net" or "net/http/pprof" but do not use them in fact.
Also fixes deadlock detector under race detector.
It should be fine to create the M later, because C code can not call into Go before first cgo call,
because C code does not know when Go initialization has completed. So a Go program need to call into C
first either to create an external thread, or notify a thread created in global ctor that Go
initialization has completed.
Fixes #4973.
Fixes #5475.
R=golang-dev, minux.ma, iant
CC=golang-dev
https://golang.org/cl/9303046
2013-05-22 12:57:47 -06:00
|
|
|
if(runtime·needextram) {
|
|
|
|
// Can happen if C/C++ code calls Go from a global ctor.
|
|
|
|
// Can not throw, because scheduler is not initialized yet.
|
|
|
|
runtime·write(2, "fatal error: cgo callback before cgo call\n",
|
|
|
|
sizeof("fatal error: cgo callback before cgo call\n")-1);
|
|
|
|
runtime·exit(1);
|
|
|
|
}
|
|
|
|
|
2013-02-20 15:48:23 -07:00
|
|
|
// Lock extra list, take head, unlock popped list.
|
|
|
|
// nilokay=false is safe here because of the invariant above,
|
|
|
|
// that the extra list always contains or will soon contain
|
|
|
|
// at least one m.
|
|
|
|
mp = lockextra(false);
|
|
|
|
|
|
|
|
// Set needextram when we've just emptied the list,
|
|
|
|
// so that the eventual call into cgocallbackg will
|
|
|
|
// allocate a new m for the extra list. We delay the
|
2013-02-22 21:39:31 -07:00
|
|
|
// allocation until then so that it can be done
|
2013-02-20 15:48:23 -07:00
|
|
|
// after exitsyscall makes sure it is okay to be
|
|
|
|
// running at all (that is, there's no garbage collection
|
2013-02-22 21:39:31 -07:00
|
|
|
// running right now).
|
2013-02-20 15:48:23 -07:00
|
|
|
mp->needextram = mp->schedlink == nil;
|
|
|
|
unlockextra(mp->schedlink);
|
2013-02-22 21:39:31 -07:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
// Install g (= m->g0) and set the stack bounds
|
2013-02-20 15:48:23 -07:00
|
|
|
// to match the current stack. We don't actually know
|
|
|
|
// how big the stack is, like we don't know how big any
|
|
|
|
// scheduling stack is, but we assume there's at least 32 kB,
|
|
|
|
// which is more than enough for us.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
runtime·setg(mp->g0);
|
2013-02-20 15:48:23 -07:00
|
|
|
g->stackbase = (uintptr)(&x + 1024);
|
|
|
|
g->stackguard = (uintptr)(&x - 32*1024);
|
2013-06-03 02:28:24 -06:00
|
|
|
g->stackguard0 = g->stackguard;
|
2013-02-20 15:48:23 -07:00
|
|
|
|
|
|
|
// Initialize this thread to use the m.
|
|
|
|
runtime·asminit();
|
|
|
|
runtime·minit();
|
|
|
|
}
|
|
|
|
|
|
|
|
// newextram allocates an m and puts it on the extra list.
|
|
|
|
// It is called with a working local m, so that it can do things
|
|
|
|
// like call schedlock and allocate.
|
|
|
|
void
|
|
|
|
runtime·newextram(void)
|
|
|
|
{
|
|
|
|
M *mp, *mnext;
|
|
|
|
G *gp;
|
|
|
|
|
|
|
|
// Create extra goroutine locked to extra m.
|
|
|
|
// The goroutine is the context in which the cgo callback will run.
|
|
|
|
// The sched.pc will never be returned to, but setting it to
|
|
|
|
// runtime.goexit makes clear to the traceback routines where
|
|
|
|
// the goroutine stack ends.
|
2013-03-01 04:49:16 -07:00
|
|
|
mp = runtime·allocm(nil);
|
2013-02-20 15:48:23 -07:00
|
|
|
gp = runtime·malg(4096);
|
2013-06-12 06:49:38 -06:00
|
|
|
gp->sched.pc = (uintptr)runtime·goexit;
|
2013-02-20 15:48:23 -07:00
|
|
|
gp->sched.sp = gp->stackbase;
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
gp->sched.lr = 0;
|
2013-02-20 15:48:23 -07:00
|
|
|
gp->sched.g = gp;
|
2013-08-07 14:31:52 -06:00
|
|
|
gp->syscallpc = gp->sched.pc;
|
|
|
|
gp->syscallsp = gp->sched.sp;
|
|
|
|
gp->syscallstack = gp->stackbase;
|
|
|
|
gp->syscallguard = gp->stackguard;
|
2014-08-27 09:15:47 -06:00
|
|
|
// malg returns status as Gidle, change to Gsyscall before adding to allg
|
|
|
|
// where GC will see it.
|
|
|
|
runtime·casgstatus(gp, Gidle, Gsyscall);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp->m = mp;
|
2013-02-20 15:48:23 -07:00
|
|
|
mp->curg = gp;
|
|
|
|
mp->locked = LockInternal;
|
|
|
|
mp->lockedg = gp;
|
|
|
|
gp->lockedm = mp;
|
2013-08-07 14:31:52 -06:00
|
|
|
gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
|
|
|
|
if(raceenabled)
|
|
|
|
gp->racectx = runtime·racegostart(runtime·newextram);
|
2013-02-21 10:59:46 -07:00
|
|
|
// put on allg for garbage collector
|
2014-01-21 02:06:57 -07:00
|
|
|
allgadd(gp);
|
2013-02-20 15:48:23 -07:00
|
|
|
|
|
|
|
// Add m to the extra list.
|
|
|
|
mnext = lockextra(true);
|
|
|
|
mp->schedlink = mnext;
|
|
|
|
unlockextra(mp);
|
|
|
|
}
|
|
|
|
|
|
|
|
// dropm is called when a cgo callback has called needm but is now
|
|
|
|
// done with the callback and returning back into the non-Go thread.
|
|
|
|
// It puts the current m back onto the extra list.
|
|
|
|
//
|
|
|
|
// The main expense here is the call to signalstack to release the
|
|
|
|
// m's signal stack, and then the call to needm on the next callback
|
|
|
|
// from this thread. It is tempting to try to save the m for next time,
|
2013-02-22 21:39:31 -07:00
|
|
|
// which would eliminate both these costs, but there might not be
|
2013-02-20 15:48:23 -07:00
|
|
|
// a next time: the current thread (which Go does not control) might exit.
|
|
|
|
// If we saved the m for that thread, there would be an m leak each time
|
|
|
|
// such a thread exited. Instead, we acquire and release an m on each
|
|
|
|
// call. These should typically not be scheduling operations, just a few
|
|
|
|
// atomics, so the cost should be small.
|
|
|
|
//
|
|
|
|
// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
|
|
|
|
// variable using pthread_key_create. Unlike the pthread keys we already use
|
|
|
|
// on OS X, this dummy key would never be read by Go code. It would exist
|
|
|
|
// only so that we could register at thread-exit-time destructor.
|
|
|
|
// That destructor would put the m back onto the extra list.
|
|
|
|
// This is purely a performance optimization. The current version,
|
|
|
|
// in which dropm happens on each cgo call, is still correct too.
|
|
|
|
// We may have to keep the current version on systems with cgo
|
|
|
|
// but without pthreads, like Windows.
|
|
|
|
void
|
|
|
|
runtime·dropm(void)
|
|
|
|
{
|
|
|
|
M *mp, *mnext;
|
|
|
|
|
|
|
|
// Undo whatever initialization minit did during needm.
|
|
|
|
runtime·unminit();
|
2013-07-24 07:01:57 -06:00
|
|
|
|
2013-02-20 15:48:23 -07:00
|
|
|
// Clear m and g, and return m to the extra list.
|
|
|
|
// After the call to setmg we can only call nosplit functions.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
mp = g->m;
|
|
|
|
runtime·setg(nil);
|
2013-02-20 15:48:23 -07:00
|
|
|
|
|
|
|
mnext = lockextra(true);
|
|
|
|
mp->schedlink = mnext;
|
|
|
|
unlockextra(mp);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define MLOCKED ((M*)1)
|
|
|
|
|
|
|
|
// lockextra locks the extra list and returns the list head.
|
|
|
|
// The caller must unlock the list by storing a new list head
|
|
|
|
// to runtime.extram. If nilokay is true, then lockextra will
|
|
|
|
// return a nil list head if that's what it finds. If nilokay is false,
|
|
|
|
// lockextra will keep waiting until the list head is no longer nil.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-02-20 15:48:23 -07:00
|
|
|
static M*
|
|
|
|
lockextra(bool nilokay)
|
|
|
|
{
|
|
|
|
M *mp;
|
|
|
|
void (*yield)(void);
|
2013-02-22 21:39:31 -07:00
|
|
|
|
2013-02-20 15:48:23 -07:00
|
|
|
for(;;) {
|
|
|
|
mp = runtime·atomicloadp(&runtime·extram);
|
|
|
|
if(mp == MLOCKED) {
|
|
|
|
yield = runtime·osyield;
|
|
|
|
yield();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(mp == nil && !nilokay) {
|
|
|
|
runtime·usleep(1);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(!runtime·casp(&runtime·extram, mp, MLOCKED)) {
|
|
|
|
yield = runtime·osyield;
|
|
|
|
yield();
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return mp;
|
|
|
|
}
|
|
|
|
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-02-20 15:48:23 -07:00
|
|
|
static void
|
|
|
|
unlockextra(M *mp)
|
|
|
|
{
|
|
|
|
runtime·atomicstorep(&runtime·extram, mp);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-03-01 09:44:43 -07:00
|
|
|
// Create a new m. It will start off with a call to fn, or else the scheduler.
|
2013-03-01 04:49:16 -07:00
|
|
|
static void
|
2013-03-01 09:44:43 -07:00
|
|
|
newm(void(*fn)(void), P *p)
|
2013-02-20 15:48:23 -07:00
|
|
|
{
|
|
|
|
M *mp;
|
2013-02-22 21:39:31 -07:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
mp = runtime·allocm(p);
|
|
|
|
mp->nextp = p;
|
2013-03-01 09:44:43 -07:00
|
|
|
mp->mstartfn = fn;
|
2013-02-20 15:48:23 -07:00
|
|
|
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
if(runtime·iscgo) {
|
|
|
|
CgoThreadStart ts;
|
|
|
|
|
2013-02-28 14:24:38 -07:00
|
|
|
if(_cgo_thread_start == nil)
|
|
|
|
runtime·throw("_cgo_thread_start missing");
|
2012-07-03 02:54:13 -06:00
|
|
|
ts.g = mp->g0;
|
2014-01-21 23:30:10 -07:00
|
|
|
ts.tls = mp->tls;
|
2013-03-01 09:44:43 -07:00
|
|
|
ts.fn = runtime·mstart;
|
2013-02-28 14:24:38 -07:00
|
|
|
runtime·asmcgocall(_cgo_thread_start, &ts);
|
2013-03-01 04:49:16 -07:00
|
|
|
return;
|
2008-11-25 17:48:10 -07:00
|
|
|
}
|
2013-03-01 09:44:43 -07:00
|
|
|
runtime·newosproc(mp, (byte*)mp->g0->stackbase);
|
2008-11-25 17:48:10 -07:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Stops execution of the current m until new work is available.
|
|
|
|
// Returns with acquired P.
|
2008-08-05 15:18:47 -06:00
|
|
|
static void
|
2013-03-01 04:49:16 -07:00
|
|
|
stopm(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->locks)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("stopm holding locks");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("stopm holding p");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->spinning) {
|
|
|
|
g->m->spinning = false;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·xadd(&runtime·sched.nmspinning, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
retry:
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
mput(g->m);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
runtime·notesleep(&g->m->park);
|
|
|
|
runtime·noteclear(&g->m->park);
|
|
|
|
if(g->m->helpgc) {
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·gchelper();
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->helpgc = 0;
|
|
|
|
g->m->mcache = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
goto retry;
|
2008-08-04 17:43:49 -06:00
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
acquirep(g->m->nextp);
|
|
|
|
g->m->nextp = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
2008-08-04 17:43:49 -06:00
|
|
|
|
2013-03-01 09:44:43 -07:00
|
|
|
static void
|
|
|
|
mspinning(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->spinning = true;
|
2013-03-01 09:44:43 -07:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Schedules some M to run the p (creates an M if necessary).
|
2014-01-14 01:58:13 -07:00
|
|
|
// If p==nil, tries to get an idle P, if no idle P's does nothing.
|
2013-03-01 04:49:16 -07:00
|
|
|
static void
|
|
|
|
startm(P *p, bool spinning)
|
|
|
|
{
|
|
|
|
M *mp;
|
2013-03-01 09:44:43 -07:00
|
|
|
void (*fn)(void);
|
2013-03-01 04:49:16 -07:00
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(p == nil) {
|
|
|
|
p = pidleget();
|
|
|
|
if(p == nil) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(spinning)
|
|
|
|
runtime·xadd(&runtime·sched.nmspinning, -1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mp = mget();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(mp == nil) {
|
2013-03-01 09:44:43 -07:00
|
|
|
fn = nil;
|
|
|
|
if(spinning)
|
|
|
|
fn = mspinning;
|
|
|
|
newm(fn, p);
|
2013-03-01 04:49:16 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if(mp->spinning)
|
|
|
|
runtime·throw("startm: m is spinning");
|
|
|
|
if(mp->nextp)
|
|
|
|
runtime·throw("startm: m has p");
|
|
|
|
mp->spinning = spinning;
|
|
|
|
mp->nextp = p;
|
|
|
|
runtime·notewakeup(&mp->park);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Hands off P from syscall or locked M.
|
|
|
|
static void
|
|
|
|
handoffp(P *p)
|
|
|
|
{
|
|
|
|
// if it has local work, start it straight away
|
|
|
|
if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
|
|
|
|
startm(p, false);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// no local work, check that there are no spinning/idle M's,
|
|
|
|
// otherwise our help is not required
|
2013-03-01 12:57:05 -07:00
|
|
|
if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 && // TODO: fast atomic
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·cas(&runtime·sched.nmspinning, 0, 1)){
|
2013-03-01 04:49:16 -07:00
|
|
|
startm(p, true);
|
|
|
|
return;
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·sched.gcwaiting) {
|
2013-03-01 04:49:16 -07:00
|
|
|
p->status = Pgcstop;
|
|
|
|
if(--runtime·sched.stopwait == 0)
|
|
|
|
runtime·notewakeup(&runtime·sched.stopnote);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if(runtime·sched.runqsize) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
startm(p, false);
|
|
|
|
return;
|
|
|
|
}
|
2013-04-06 23:27:54 -06:00
|
|
|
// If this is the last running P and nobody is polling network,
|
|
|
|
// need to wakeup another M to poll network.
|
|
|
|
if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-04-06 23:27:54 -06:00
|
|
|
startm(p, false);
|
|
|
|
return;
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
pidleput(p);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Tries to add one more P to execute G's.
|
|
|
|
// Called when a G is made runnable (newproc, ready).
|
|
|
|
static void
|
|
|
|
wakep(void)
|
|
|
|
{
|
|
|
|
// be conservative about spinning threads
|
|
|
|
if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
|
|
|
|
return;
|
|
|
|
startm(nil, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stops execution of the current m that is locked to a g until the g is runnable again.
|
|
|
|
// Returns with acquired P.
|
|
|
|
static void
|
|
|
|
stoplockedm(void)
|
|
|
|
{
|
|
|
|
P *p;
|
2014-08-27 09:15:47 -06:00
|
|
|
uint32 status;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->lockedg == nil || g->m->lockedg->lockedm != g->m)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("stoplockedm: inconsistent locking");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p) {
|
2013-03-01 04:49:16 -07:00
|
|
|
// Schedule another M to run this p.
|
|
|
|
p = releasep();
|
|
|
|
handoffp(p);
|
|
|
|
}
|
2013-08-13 12:07:42 -06:00
|
|
|
incidlelocked(1);
|
2013-03-01 04:49:16 -07:00
|
|
|
// Wait until another thread schedules lockedg again.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
runtime·notesleep(&g->m->park);
|
|
|
|
runtime·noteclear(&g->m->park);
|
2014-08-27 09:15:47 -06:00
|
|
|
status = runtime·readgstatus(g->m->lockedg);
|
|
|
|
if((status&~Gscan) != Grunnable){
|
|
|
|
runtime·printf("runtime:stoplockedm: g is not Grunnable or Gscanrunnable");
|
|
|
|
dumpgstatus(g);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("stoplockedm: not runnable");
|
2014-08-27 09:15:47 -06:00
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
acquirep(g->m->nextp);
|
|
|
|
g->m->nextp = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Schedules the locked m to run the locked gp.
|
|
|
|
static void
|
|
|
|
startlockedm(G *gp)
|
|
|
|
{
|
|
|
|
M *mp;
|
|
|
|
P *p;
|
|
|
|
|
|
|
|
mp = gp->lockedm;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(mp == g->m)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("startlockedm: locked to me");
|
|
|
|
if(mp->nextp)
|
|
|
|
runtime·throw("startlockedm: m has p");
|
|
|
|
// directly handoff current P to the locked m
|
2013-08-13 12:07:42 -06:00
|
|
|
incidlelocked(-1);
|
2013-03-01 04:49:16 -07:00
|
|
|
p = releasep();
|
|
|
|
mp->nextp = p;
|
|
|
|
runtime·notewakeup(&mp->park);
|
|
|
|
stopm();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stops the current m for stoptheworld.
|
|
|
|
// Returns when the world is restarted.
|
|
|
|
static void
|
|
|
|
gcstopm(void)
|
|
|
|
{
|
|
|
|
P *p;
|
|
|
|
|
2013-08-15 04:32:10 -06:00
|
|
|
if(!runtime·sched.gcwaiting)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("gcstopm: not waiting for gc");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->spinning) {
|
|
|
|
g->m->spinning = false;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·xadd(&runtime·sched.nmspinning, -1);
|
|
|
|
}
|
|
|
|
p = releasep();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
p->status = Pgcstop;
|
|
|
|
if(--runtime·sched.stopwait == 0)
|
|
|
|
runtime·notewakeup(&runtime·sched.stopnote);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
stopm();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Schedules gp to run on the current M.
|
|
|
|
// Never returns.
|
|
|
|
static void
|
|
|
|
execute(G *gp)
|
|
|
|
{
|
|
|
|
int32 hz;
|
2014-08-27 09:15:47 -06:00
|
|
|
|
|
|
|
runtime·casgstatus(gp, Grunnable, Grunning);
|
2014-01-16 01:54:46 -07:00
|
|
|
gp->waitsince = 0;
|
2013-07-17 10:52:37 -06:00
|
|
|
gp->preempt = false;
|
2013-06-03 02:28:24 -06:00
|
|
|
gp->stackguard0 = gp->stackguard;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->p->schedtick++;
|
|
|
|
g->m->curg = gp;
|
|
|
|
gp->m = g->m;
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2011-03-23 09:43:37 -06:00
|
|
|
// Check whether the profiler needs to be turned on or off.
|
|
|
|
hz = runtime·sched.profilehz;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->profilehz != hz)
|
2011-03-23 09:43:37 -06:00
|
|
|
runtime·resetcpuprofiler(hz);
|
|
|
|
|
2013-06-12 13:22:26 -06:00
|
|
|
runtime·gogo(&gp->sched);
|
2008-07-14 15:34:27 -06:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Finds a runnable goroutine to execute.
|
2013-03-12 11:14:26 -06:00
|
|
|
// Tries to steal from other P's, get g from global queue, poll network.
|
2013-03-01 04:49:16 -07:00
|
|
|
static G*
|
2013-06-27 11:03:35 -06:00
|
|
|
findrunnable(void)
|
2013-03-01 04:49:16 -07:00
|
|
|
{
|
|
|
|
G *gp;
|
|
|
|
P *p;
|
|
|
|
int32 i;
|
|
|
|
|
|
|
|
top:
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·sched.gcwaiting) {
|
2013-03-01 04:49:16 -07:00
|
|
|
gcstopm();
|
|
|
|
goto top;
|
|
|
|
}
|
2014-03-26 05:11:36 -06:00
|
|
|
if(runtime·fingwait && runtime·fingwake && (gp = runtime·wakefing()) != nil)
|
|
|
|
runtime·ready(gp);
|
2013-03-01 04:49:16 -07:00
|
|
|
// local runq
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp = runqget(g->m->p);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(gp)
|
|
|
|
return gp;
|
|
|
|
// global runq
|
|
|
|
if(runtime·sched.runqsize) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp = globrunqget(g->m->p, 0);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(gp)
|
|
|
|
return gp;
|
|
|
|
}
|
2013-03-12 11:14:26 -06:00
|
|
|
// poll network
|
|
|
|
gp = runtime·netpoll(false); // non-blocking
|
|
|
|
if(gp) {
|
|
|
|
injectglist(gp->schedlink);
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Gwaiting, Grunnable);
|
2013-03-12 11:14:26 -06:00
|
|
|
return gp;
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
// If number of spinning M's >= number of busy P's, block.
|
|
|
|
// This is necessary to prevent excessive CPU consumption
|
|
|
|
// when GOMAXPROCS>>1 but the program parallelism is low.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(!g->m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle)) // TODO: fast atomic
|
2013-03-01 04:49:16 -07:00
|
|
|
goto stop;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(!g->m->spinning) {
|
|
|
|
g->m->spinning = true;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·xadd(&runtime·sched.nmspinning, 1);
|
|
|
|
}
|
|
|
|
// random steal from other P's
|
|
|
|
for(i = 0; i < 2*runtime·gomaxprocs; i++) {
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·sched.gcwaiting)
|
2013-03-01 04:49:16 -07:00
|
|
|
goto top;
|
|
|
|
p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(p == g->m->p)
|
2013-03-01 04:49:16 -07:00
|
|
|
gp = runqget(p);
|
|
|
|
else
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp = runqsteal(g->m->p, p);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(gp)
|
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
stop:
|
|
|
|
// return P and block
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·sched.gcwaiting) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
goto top;
|
|
|
|
}
|
|
|
|
if(runtime·sched.runqsize) {
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp = globrunqget(g->m->p, 0);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
p = releasep();
|
|
|
|
pidleput(p);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->spinning) {
|
|
|
|
g->m->spinning = false;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·xadd(&runtime·sched.nmspinning, -1);
|
|
|
|
}
|
|
|
|
// check all runqueues once again
|
|
|
|
for(i = 0; i < runtime·gomaxprocs; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p && p->runqhead != p->runqtail) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
p = pidleget();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(p) {
|
|
|
|
acquirep(p);
|
|
|
|
goto top;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-03-12 11:14:26 -06:00
|
|
|
// poll network
|
|
|
|
if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p)
|
2013-03-12 11:14:26 -06:00
|
|
|
runtime·throw("findrunnable: netpoll with p");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->spinning)
|
2013-03-12 11:14:26 -06:00
|
|
|
runtime·throw("findrunnable: netpoll with spinning");
|
|
|
|
gp = runtime·netpoll(true); // block until new work is available
|
|
|
|
runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
|
|
|
|
if(gp) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-12 11:14:26 -06:00
|
|
|
p = pidleget();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-12 11:14:26 -06:00
|
|
|
if(p) {
|
|
|
|
acquirep(p);
|
|
|
|
injectglist(gp->schedlink);
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Gwaiting, Grunnable);
|
2013-03-12 11:14:26 -06:00
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
injectglist(gp);
|
|
|
|
}
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
stopm();
|
|
|
|
goto top;
|
|
|
|
}
|
|
|
|
|
2013-07-11 13:57:36 -06:00
|
|
|
static void
|
|
|
|
resetspinning(void)
|
|
|
|
{
|
|
|
|
int32 nmspinning;
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->spinning) {
|
|
|
|
g->m->spinning = false;
|
2013-07-11 13:57:36 -06:00
|
|
|
nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
|
|
|
|
if(nmspinning < 0)
|
|
|
|
runtime·throw("findrunnable: negative nmspinning");
|
|
|
|
} else
|
|
|
|
nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
|
|
|
|
|
|
|
|
// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
|
|
|
|
// so see if we need to wakeup another P here.
|
|
|
|
if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
|
|
|
|
wakep();
|
|
|
|
}
|
|
|
|
|
2013-03-12 11:14:26 -06:00
|
|
|
// Injects the list of runnable G's into the scheduler.
|
|
|
|
// Can run concurrently with GC.
|
|
|
|
static void
|
|
|
|
injectglist(G *glist)
|
|
|
|
{
|
|
|
|
int32 n;
|
|
|
|
G *gp;
|
|
|
|
|
|
|
|
if(glist == nil)
|
|
|
|
return;
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-12 11:14:26 -06:00
|
|
|
for(n = 0; glist; n++) {
|
|
|
|
gp = glist;
|
|
|
|
glist = gp->schedlink;
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Gwaiting, Grunnable);
|
2013-03-12 11:14:26 -06:00
|
|
|
globrunqput(gp);
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-12 11:14:26 -06:00
|
|
|
|
|
|
|
for(; n && runtime·sched.npidle; n--)
|
|
|
|
startm(nil, false);
|
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// One round of scheduler: find a runnable goroutine and execute it.
|
|
|
|
// Never returns.
|
|
|
|
static void
|
|
|
|
schedule(void)
|
2008-08-02 23:34:04 -06:00
|
|
|
{
|
2013-03-01 04:49:16 -07:00
|
|
|
G *gp;
|
2013-06-15 06:06:28 -06:00
|
|
|
uint32 tick;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->locks)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("schedule: holding locks");
|
|
|
|
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
if(g->m->lockedg) {
|
|
|
|
stoplockedm();
|
|
|
|
execute(g->m->lockedg); // Never returns.
|
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
top:
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·sched.gcwaiting) {
|
2013-03-01 04:49:16 -07:00
|
|
|
gcstopm();
|
|
|
|
goto top;
|
|
|
|
}
|
|
|
|
|
2013-06-15 06:06:28 -06:00
|
|
|
gp = nil;
|
|
|
|
// Check the global runnable queue once in a while to ensure fairness.
|
|
|
|
// Otherwise two goroutines can completely occupy the local runqueue
|
|
|
|
// by constantly respawning each other.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
tick = g->m->p->schedtick;
|
2013-06-15 06:06:28 -06:00
|
|
|
// This is a fancy way to say tick%61==0,
|
|
|
|
// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
|
|
|
|
if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp = globrunqget(g->m->p, 1);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-07-11 13:57:36 -06:00
|
|
|
if(gp)
|
|
|
|
resetspinning();
|
2013-06-15 06:06:28 -06:00
|
|
|
}
|
|
|
|
if(gp == nil) {
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gp = runqget(g->m->p);
|
|
|
|
if(gp && g->m->spinning)
|
2013-06-15 06:06:28 -06:00
|
|
|
runtime·throw("schedule: spinning with local work");
|
|
|
|
}
|
2013-07-11 13:57:36 -06:00
|
|
|
if(gp == nil) {
|
|
|
|
gp = findrunnable(); // blocks until work is available
|
|
|
|
resetspinning();
|
2013-06-27 11:03:35 -06:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
if(gp->lockedm) {
|
2013-07-11 13:57:36 -06:00
|
|
|
// Hands off own p to the locked m,
|
|
|
|
// then blocks waiting for a new p.
|
2013-03-01 04:49:16 -07:00
|
|
|
startlockedm(gp);
|
|
|
|
goto top;
|
|
|
|
}
|
|
|
|
|
|
|
|
execute(gp);
|
2008-08-02 23:34:04 -06:00
|
|
|
}
|
|
|
|
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
// dropg removes the association between m and the current goroutine m->curg (gp for short).
|
|
|
|
// Typically a caller sets gp's status away from Grunning and then
|
|
|
|
// immediately calls dropg to finish the job. The caller is also responsible
|
|
|
|
// for arranging that gp will be restarted using runtime·ready at an
|
|
|
|
// appropriate time. After calling dropg and arranging for gp to be
|
|
|
|
// readied later, the caller can do other work but eventually should
|
|
|
|
// call schedule to restart the scheduling of goroutines on this m.
|
2014-08-08 10:15:52 -06:00
|
|
|
static void
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
dropg(void)
|
|
|
|
{
|
|
|
|
if(g->m->lockedg == nil) {
|
|
|
|
g->m->curg->m = nil;
|
|
|
|
g->m->curg = nil;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-22 00:27:16 -07:00
|
|
|
// Puts the current goroutine into a waiting state and calls unlockf.
|
|
|
|
// If unlockf returns false, the goroutine is resumed.
|
2012-09-18 11:15:46 -06:00
|
|
|
void
|
2014-08-21 10:41:09 -06:00
|
|
|
runtime·park(bool(*unlockf)(G*, void*), void *lock, String reason)
|
2012-09-18 11:15:46 -06:00
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->waitlock = lock;
|
|
|
|
g->m->waitunlockf = unlockf;
|
2012-09-18 11:15:46 -06:00
|
|
|
g->waitreason = reason;
|
2014-08-21 10:41:09 -06:00
|
|
|
runtime·mcall(runtime·park_m);
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
|
2014-08-21 10:41:09 -06:00
|
|
|
bool
|
|
|
|
runtime·parkunlock_c(G *gp, void *lock)
|
2014-01-22 00:27:16 -07:00
|
|
|
{
|
|
|
|
USED(gp);
|
|
|
|
runtime·unlock(lock);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Puts the current goroutine into a waiting state and unlocks the lock.
|
|
|
|
// The goroutine can be made runnable again by calling runtime·ready(gp).
|
|
|
|
void
|
2014-08-27 21:32:49 -06:00
|
|
|
runtime·parkunlock(Mutex *lock, String reason)
|
2014-01-22 00:27:16 -07:00
|
|
|
{
|
2014-08-21 10:41:09 -06:00
|
|
|
runtime·park(runtime·parkunlock_c, lock, reason);
|
2014-01-22 00:27:16 -07:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// runtime·park continuation on g0.
|
2014-08-21 10:41:09 -06:00
|
|
|
void
|
|
|
|
runtime·park_m(G *gp)
|
2013-03-01 04:49:16 -07:00
|
|
|
{
|
2014-01-22 00:27:16 -07:00
|
|
|
bool ok;
|
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Grunning, Gwaiting);
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
dropg();
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->waitunlockf) {
|
|
|
|
ok = g->m->waitunlockf(gp, g->m->waitlock);
|
|
|
|
g->m->waitunlockf = nil;
|
|
|
|
g->m->waitlock = nil;
|
2014-01-22 00:27:16 -07:00
|
|
|
if(!ok) {
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Gwaiting, Grunnable);
|
2014-01-22 00:27:16 -07:00
|
|
|
execute(gp); // Schedule it back, never returns.
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
schedule();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Scheduler yield.
|
|
|
|
void
|
|
|
|
runtime·gosched(void)
|
|
|
|
{
|
2014-08-19 01:49:59 -06:00
|
|
|
runtime·mcall(runtime·gosched_m);
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// runtime·gosched continuation on g0.
|
2013-06-28 07:52:17 -06:00
|
|
|
void
|
2014-08-19 01:49:59 -06:00
|
|
|
runtime·gosched_m(G *gp)
|
2013-03-01 04:49:16 -07:00
|
|
|
{
|
2014-08-27 09:15:47 -06:00
|
|
|
uint32 status;
|
|
|
|
|
|
|
|
status = runtime·readgstatus(gp);
|
|
|
|
if ((status&~Gscan) != Grunning){
|
|
|
|
dumpgstatus(gp);
|
2014-08-19 01:49:59 -06:00
|
|
|
runtime·throw("bad g status");
|
2014-08-27 09:15:47 -06:00
|
|
|
}
|
|
|
|
runtime·casgstatus(gp, Grunning, Grunnable);
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
dropg();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
globrunqput(gp);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
schedule();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finishes execution of the current goroutine.
|
2013-05-30 04:11:49 -06:00
|
|
|
// Need to mark it as nosplit, because it runs with sp > stackbase (as runtime·lessstack).
|
|
|
|
// Since it does not return it does not matter. But if it is preempted
|
|
|
|
// at the split stack check, GC will complain about inconsistent sp.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-03-01 04:49:16 -07:00
|
|
|
void
|
|
|
|
runtime·goexit(void)
|
|
|
|
{
|
|
|
|
if(raceenabled)
|
|
|
|
runtime·racegoend();
|
|
|
|
runtime·mcall(goexit0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// runtime·goexit continuation on g0.
|
|
|
|
static void
|
|
|
|
goexit0(G *gp)
|
|
|
|
{
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Grunning, Gdead);
|
2013-03-01 04:49:16 -07:00
|
|
|
gp->m = nil;
|
|
|
|
gp->lockedm = nil;
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
g->m->lockedg = nil;
|
2014-02-20 14:18:05 -07:00
|
|
|
gp->paniconfault = 0;
|
2014-06-06 14:52:14 -06:00
|
|
|
gp->defer = nil; // should be true already but just in case.
|
|
|
|
gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
|
2014-08-28 21:26:40 -06:00
|
|
|
gp->writebuf.array = nil;
|
|
|
|
gp->writebuf.len = 0;
|
|
|
|
gp->writebuf.cap = 0;
|
2014-08-21 10:41:09 -06:00
|
|
|
gp->waitreason.str = nil;
|
|
|
|
gp->waitreason.len = 0;
|
2014-06-06 14:52:14 -06:00
|
|
|
gp->param = nil;
|
2014-08-27 09:15:47 -06:00
|
|
|
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
dropg();
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->locked & ~LockExternal) {
|
|
|
|
runtime·printf("invalid m->locked = %d\n", g->m->locked);
|
2013-03-08 09:26:00 -07:00
|
|
|
runtime·throw("internal lockOSThread error");
|
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locked = 0;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·unwindstack(gp, nil);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
gfput(g->m->p, gp);
|
2013-03-01 04:49:16 -07:00
|
|
|
schedule();
|
2012-09-18 11:15:46 -06:00
|
|
|
}
|
|
|
|
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-06-12 13:22:26 -06:00
|
|
|
static void
|
|
|
|
save(void *pc, uintptr sp)
|
|
|
|
{
|
|
|
|
g->sched.pc = (uintptr)pc;
|
|
|
|
g->sched.sp = sp;
|
|
|
|
g->sched.lr = 0;
|
|
|
|
g->sched.ret = 0;
|
|
|
|
g->sched.ctxt = 0;
|
|
|
|
g->sched.g = g;
|
|
|
|
}
|
|
|
|
|
2008-11-25 17:48:10 -07:00
|
|
|
// The goroutine g is about to enter a system call.
|
|
|
|
// Record that it's not using the cpu anymore.
|
2010-04-09 14:30:35 -06:00
|
|
|
// This is called only from the go syscall library and cgocall,
|
|
|
|
// not from the low-level system calls used by the runtime.
|
runtime: stack split + garbage collection bug
The g->sched.sp saved stack pointer and the
g->stackbase and g->stackguard stack bounds
can change even while "the world is stopped",
because a goroutine has to call functions (and
therefore might split its stack) when exiting a
system call to check whether the world is stopped
(and if so, wait until the world continues).
That means the garbage collector cannot access
those values safely (without a race) for goroutines
executing system calls. Instead, save a consistent
triple in g->gcsp, g->gcstack, g->gcguard during
entersyscall and have the garbage collector refer
to those.
The old code was occasionally seeing (because of
the race) an sp and stk that did not correspond to
each other, so that stk - sp was not the number of
stack bytes following sp. In that case, if sp < stk
then the call scanblock(sp, stk - sp) scanned too
many bytes (anything between the two pointers,
which pointed into different allocation blocks).
If sp > stk then stk - sp wrapped around.
On 32-bit, stk - sp is a uintptr (uint32) converted
to int64 in the call to scanblock, so a large (~4G)
but positive number. Scanblock would try to scan
that many bytes and eventually fault accessing
unmapped memory. On 64-bit, stk - sp is a uintptr (uint64)
promoted to int64 in the call to scanblock, so a negative
number. Scanblock would not scan anything, possibly
causing in-use blocks to be freed.
In short, 32-bit platforms would have seen either
ineffective garbage collection or crashes during garbage
collection, while 64-bit platforms would have seen
either ineffective or incorrect garbage collection.
You can see the invalid arguments to scanblock in the
stack traces in issue 1620.
Fixes #1620.
Fixes #1746.
R=iant, r
CC=golang-dev
https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
|
|
|
//
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
// Entersyscall cannot split the stack: the runtime·gosave must
|
runtime: stack split + garbage collection bug
The g->sched.sp saved stack pointer and the
g->stackbase and g->stackguard stack bounds
can change even while "the world is stopped",
because a goroutine has to call functions (and
therefore might split its stack) when exiting a
system call to check whether the world is stopped
(and if so, wait until the world continues).
That means the garbage collector cannot access
those values safely (without a race) for goroutines
executing system calls. Instead, save a consistent
triple in g->gcsp, g->gcstack, g->gcguard during
entersyscall and have the garbage collector refer
to those.
The old code was occasionally seeing (because of
the race) an sp and stk that did not correspond to
each other, so that stk - sp was not the number of
stack bytes following sp. In that case, if sp < stk
then the call scanblock(sp, stk - sp) scanned too
many bytes (anything between the two pointers,
which pointed into different allocation blocks).
If sp > stk then stk - sp wrapped around.
On 32-bit, stk - sp is a uintptr (uint32) converted
to int64 in the call to scanblock, so a large (~4G)
but positive number. Scanblock would try to scan
that many bytes and eventually fault accessing
unmapped memory. On 64-bit, stk - sp is a uintptr (uint64)
promoted to int64 in the call to scanblock, so a negative
number. Scanblock would not scan anything, possibly
causing in-use blocks to be freed.
In short, 32-bit platforms would have seen either
ineffective garbage collection or crashes during garbage
collection, while 64-bit platforms would have seen
either ineffective or incorrect garbage collection.
You can see the invalid arguments to scanblock in the
stack traces in issue 1620.
Fixes #1620.
Fixes #1746.
R=iant, r
CC=golang-dev
https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
|
|
|
// make g->sched refer to the caller's stack segment, because
|
|
|
|
// entersyscall is going to return immediately after.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2008-11-25 17:48:10 -07:00
|
|
|
void
|
2013-03-01 04:49:16 -07:00
|
|
|
·entersyscall(int32 dummy)
|
2008-08-05 15:18:47 -06:00
|
|
|
{
|
2013-07-29 12:22:34 -06:00
|
|
|
// Disable preemption because during this function g is in Gsyscall status,
|
|
|
|
// but can have inconsistent g->sched, do not let GC observe it.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++;
|
2013-07-29 12:22:34 -06:00
|
|
|
|
2013-08-06 03:38:44 -06:00
|
|
|
// Leave SP around for GC and traceback.
|
2013-06-12 13:22:26 -06:00
|
|
|
save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
|
2013-08-06 03:38:44 -06:00
|
|
|
g->syscallsp = g->sched.sp;
|
|
|
|
g->syscallpc = g->sched.pc;
|
|
|
|
g->syscallstack = g->stackbase;
|
|
|
|
g->syscallguard = g->stackguard;
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(g, Grunning, Gsyscall);
|
2013-08-06 03:38:44 -06:00
|
|
|
if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
|
2013-02-20 09:21:45 -07:00
|
|
|
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
|
2013-08-06 03:38:44 -06:00
|
|
|
// g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
|
2013-02-20 09:21:45 -07:00
|
|
|
runtime·throw("entersyscall");
|
|
|
|
}
|
|
|
|
|
2013-03-01 12:57:05 -07:00
|
|
|
if(runtime·atomicload(&runtime·sched.sysmonwait)) { // TODO: fast atomic
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 12:57:05 -07:00
|
|
|
if(runtime·atomicload(&runtime·sched.sysmonwait)) {
|
|
|
|
runtime·atomicstore(&runtime·sched.sysmonwait, 0);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·notewakeup(&runtime·sched.sysmonnote);
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-06-12 13:22:26 -06:00
|
|
|
save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
|
2013-02-20 09:21:45 -07:00
|
|
|
}
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->mcache = nil;
|
|
|
|
g->m->p->m = nil;
|
|
|
|
runtime·atomicstore(&g->m->p->status, Psyscall);
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·sched.gcwaiting) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if (runtime·sched.stopwait > 0 && runtime·cas(&g->m->p->status, Psyscall, Pgcstop)) {
|
2013-03-01 04:49:16 -07:00
|
|
|
if(--runtime·sched.stopwait == 0)
|
|
|
|
runtime·notewakeup(&runtime·sched.stopnote);
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-06-12 13:22:26 -06:00
|
|
|
save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
2013-07-29 12:22:34 -06:00
|
|
|
|
|
|
|
// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
|
|
|
|
// We set stackguard to StackPreempt so that first split stack check calls morestack.
|
|
|
|
// Morestack detects this case and throws.
|
|
|
|
g->stackguard0 = StackPreempt;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
2013-02-20 09:21:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-02-20 09:21:45 -07:00
|
|
|
void
|
2013-03-01 04:49:16 -07:00
|
|
|
·entersyscallblock(int32 dummy)
|
2013-02-20 09:21:45 -07:00
|
|
|
{
|
2013-03-01 04:49:16 -07:00
|
|
|
P *p;
|
2013-02-20 09:21:45 -07:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++; // see comment in entersyscall
|
2013-07-29 12:22:34 -06:00
|
|
|
|
2013-08-06 03:38:44 -06:00
|
|
|
// Leave SP around for GC and traceback.
|
2013-06-12 13:22:26 -06:00
|
|
|
save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
|
2013-08-06 03:38:44 -06:00
|
|
|
g->syscallsp = g->sched.sp;
|
|
|
|
g->syscallpc = g->sched.pc;
|
|
|
|
g->syscallstack = g->stackbase;
|
|
|
|
g->syscallguard = g->stackguard;
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(g, Grunning, Gsyscall);
|
2013-08-06 03:38:44 -06:00
|
|
|
if(g->syscallsp < g->syscallguard-StackGuard || g->syscallstack < g->syscallsp) {
|
|
|
|
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
|
|
|
|
// g->syscallsp, g->syscallguard-StackGuard, g->syscallstack);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("entersyscallblock");
|
2011-07-19 09:01:17 -06:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
p = releasep();
|
|
|
|
handoffp(p);
|
2013-06-12 13:22:26 -06:00
|
|
|
|
|
|
|
// Resave for traceback during blocked call.
|
|
|
|
save(runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
|
2013-07-29 12:22:34 -06:00
|
|
|
|
|
|
|
g->stackguard0 = StackPreempt; // see comment in entersyscall
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
2008-11-25 17:48:10 -07:00
|
|
|
}
|
|
|
|
|
2014-08-22 12:13:01 -06:00
|
|
|
// The same as runtime·entersyscallblock(), but called on g0 stack.
|
|
|
|
void
|
|
|
|
runtime·entersyscallblock_m(void)
|
|
|
|
{
|
|
|
|
G *gp;
|
|
|
|
|
|
|
|
gp = g->m->curg;
|
|
|
|
// sched.{g,pc,sp,lr} are already set by mcall.
|
|
|
|
gp->stackguard0 = StackPreempt; // we are on g0, the goroutine must not touch its stack until exitsyscall
|
|
|
|
gp->sched.ret = 0;
|
|
|
|
gp->sched.ctxt = 0;
|
|
|
|
gp->syscallsp = gp->sched.sp;
|
|
|
|
gp->syscallpc = gp->sched.pc;
|
|
|
|
gp->syscallstack = gp->stackbase;
|
|
|
|
gp->syscallguard = gp->stackguard;
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Grunning, Gsyscall);
|
2014-08-22 12:13:01 -06:00
|
|
|
if(gp->syscallsp < gp->syscallguard-StackGuard || gp->syscallstack < gp->syscallsp) {
|
|
|
|
// runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
|
|
|
|
// gp->syscallsp, gp->syscallguard-StackGuard, gp->syscallstack);
|
|
|
|
runtime·throw("entersyscall_m");
|
|
|
|
}
|
|
|
|
|
|
|
|
handoffp(releasep());
|
|
|
|
}
|
|
|
|
|
2008-11-25 17:48:10 -07:00
|
|
|
// The goroutine g exited its system call.
|
|
|
|
// Arrange for it to run on a cpu again.
|
|
|
|
// This is called only from the go syscall library, not
|
|
|
|
// from the low-level system calls used by the runtime.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2008-11-25 17:48:10 -07:00
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·exitsyscall(void)
|
2008-11-25 17:48:10 -07:00
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++; // see comment in entersyscall
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2014-01-16 01:54:46 -07:00
|
|
|
g->waitsince = 0;
|
2013-07-29 12:22:34 -06:00
|
|
|
if(exitsyscallfast()) {
|
2011-07-23 10:22:55 -06:00
|
|
|
// There's a cpu for us, so we can run.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->p->syscalltick++;
|
2014-08-27 09:15:47 -06:00
|
|
|
// We need to cas the status and scan before resuming...
|
|
|
|
runtime·casgstatus(g, Gsyscall, Grunning);
|
|
|
|
|
2011-07-23 10:22:55 -06:00
|
|
|
// Garbage collector isn't running (since we are),
|
2013-03-01 04:49:16 -07:00
|
|
|
// so okay to clear gcstack and gcsp.
|
2013-08-06 03:38:44 -06:00
|
|
|
g->syscallstack = (uintptr)nil;
|
|
|
|
g->syscallsp = (uintptr)nil;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
2013-07-29 12:22:34 -06:00
|
|
|
if(g->preempt) {
|
|
|
|
// restore the preemption request in case we've cleared it in newstack
|
2013-07-17 10:52:37 -06:00
|
|
|
g->stackguard0 = StackPreempt;
|
2013-07-29 12:22:34 -06:00
|
|
|
} else {
|
|
|
|
// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
|
|
|
|
g->stackguard0 = g->stackguard;
|
|
|
|
}
|
2011-07-23 10:22:55 -06:00
|
|
|
return;
|
2008-08-05 15:18:47 -06:00
|
|
|
}
|
2011-07-19 09:01:17 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
2008-08-05 15:21:42 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Call the scheduler.
|
|
|
|
runtime·mcall(exitsyscall0);
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Scheduler returned, so we're allowed to run now.
|
runtime: stack split + garbage collection bug
The g->sched.sp saved stack pointer and the
g->stackbase and g->stackguard stack bounds
can change even while "the world is stopped",
because a goroutine has to call functions (and
therefore might split its stack) when exiting a
system call to check whether the world is stopped
(and if so, wait until the world continues).
That means the garbage collector cannot access
those values safely (without a race) for goroutines
executing system calls. Instead, save a consistent
triple in g->gcsp, g->gcstack, g->gcguard during
entersyscall and have the garbage collector refer
to those.
The old code was occasionally seeing (because of
the race) an sp and stk that did not correspond to
each other, so that stk - sp was not the number of
stack bytes following sp. In that case, if sp < stk
then the call scanblock(sp, stk - sp) scanned too
many bytes (anything between the two pointers,
which pointed into different allocation blocks).
If sp > stk then stk - sp wrapped around.
On 32-bit, stk - sp is a uintptr (uint32) converted
to int64 in the call to scanblock, so a large (~4G)
but positive number. Scanblock would try to scan
that many bytes and eventually fault accessing
unmapped memory. On 64-bit, stk - sp is a uintptr (uint64)
promoted to int64 in the call to scanblock, so a negative
number. Scanblock would not scan anything, possibly
causing in-use blocks to be freed.
In short, 32-bit platforms would have seen either
ineffective garbage collection or crashes during garbage
collection, while 64-bit platforms would have seen
either ineffective or incorrect garbage collection.
You can see the invalid arguments to scanblock in the
stack traces in issue 1620.
Fixes #1620.
Fixes #1746.
R=iant, r
CC=golang-dev
https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
|
|
|
// Delete the gcstack information that we left for
|
|
|
|
// the garbage collector during the system call.
|
|
|
|
// Must wait until now because until gosched returns
|
|
|
|
// we don't know for sure that the garbage collector
|
|
|
|
// is not running.
|
2013-08-06 03:38:44 -06:00
|
|
|
g->syscallstack = (uintptr)nil;
|
|
|
|
g->syscallsp = (uintptr)nil;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->p->syscalltick++;
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-07-29 12:22:34 -06:00
|
|
|
static bool
|
|
|
|
exitsyscallfast(void)
|
|
|
|
{
|
|
|
|
P *p;
|
|
|
|
|
2013-08-09 02:53:35 -06:00
|
|
|
// Freezetheworld sets stopwait but does not retake P's.
|
|
|
|
if(runtime·sched.stopwait) {
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->p = nil;
|
2013-08-09 02:53:35 -06:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-07-29 12:22:34 -06:00
|
|
|
// Try to re-acquire the last P.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p && g->m->p->status == Psyscall && runtime·cas(&g->m->p->status, Psyscall, Prunning)) {
|
2013-07-29 12:22:34 -06:00
|
|
|
// There's a cpu for us, so we can run.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->mcache = g->m->p->mcache;
|
|
|
|
g->m->p->m = g->m;
|
2013-07-29 12:22:34 -06:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// Try to get any other idle P.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->p = nil;
|
2013-07-29 12:22:34 -06:00
|
|
|
if(runtime·sched.pidle) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-07-29 12:22:34 -06:00
|
|
|
p = pidleget();
|
2013-07-31 10:09:03 -06:00
|
|
|
if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
|
|
|
|
runtime·atomicstore(&runtime·sched.sysmonwait, 0);
|
|
|
|
runtime·notewakeup(&runtime·sched.sysmonnote);
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-07-29 12:22:34 -06:00
|
|
|
if(p) {
|
|
|
|
acquirep(p);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// runtime·exitsyscall slow path on g0.
|
|
|
|
// Failed to acquire P, enqueue gp as runnable.
|
|
|
|
static void
|
|
|
|
exitsyscall0(G *gp)
|
|
|
|
{
|
|
|
|
P *p;
|
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(gp, Gsyscall, Grunnable);
|
runtime: refactor routines for stopping, running goroutine from m
This CL adds 'dropg', which is called to drop the association
between m and its current goroutine, and it makes schedule
handle locked goroutines correctly, instead of requiring all
callers of schedule to do that.
The effect is that if you want to take over an m for, say,
garbage collection work while still allowing the current g
to run on some other m, you can do an mcall to a function
that is:
// dissociate gp
dropg();
gp->status = Gwaiting; // for ready
// put gp on run queue for others to find
runtime·ready(gp);
/* ... do other work here ... */
// done with m, let it run goroutines again
schedule();
Before this CL, the dropg() body had to be written explicitly,
and the check for lockedg before schedule had to be
written explicitly too, both of which make the code a bit
more fragile than it needs to be.
LGTM=iant
R=dvyukov, iant
CC=golang-codereviews, rlh
https://golang.org/cl/113110043
2014-07-14 18:56:37 -06:00
|
|
|
dropg();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
p = pidleget();
|
|
|
|
if(p == nil)
|
|
|
|
globrunqput(gp);
|
2013-07-31 10:09:03 -06:00
|
|
|
else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
|
|
|
|
runtime·atomicstore(&runtime·sched.sysmonwait, 0);
|
|
|
|
runtime·notewakeup(&runtime·sched.sysmonnote);
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(p) {
|
|
|
|
acquirep(p);
|
|
|
|
execute(gp); // Never returns.
|
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->lockedg) {
|
2013-03-01 04:49:16 -07:00
|
|
|
// Wait until another thread schedules gp and so m again.
|
|
|
|
stoplockedm();
|
|
|
|
execute(gp); // Never returns.
|
|
|
|
}
|
|
|
|
stopm();
|
|
|
|
schedule(); // Never returns.
|
2008-08-05 15:18:47 -06:00
|
|
|
}
|
|
|
|
|
2013-08-13 03:01:30 -06:00
|
|
|
// Called from syscall package before fork.
|
runtime: fix stack split detection around fork
If runtime_BeforeFork splits stack, it will unsplit it
with spoiled g->stackguard. It leads to check failure in oldstack:
fatal error: stackfree: bad fixed size
runtime stack:
runtime.throw(0xadf3cd)
runtime.stackfree(0xc208040480, 0xfffffffffffff9dd, 0x1b00fa8)
runtime.oldstack()
runtime.lessstack()
goroutine 311 [stack unsplit]:
syscall.forkAndExecInChild(0xc20802eea0, 0xc208192c00, 0x5, 0x5, 0xc208072a80, ...)
syscall.forkExec(0xc20802ed80, 0x54, 0xc2081ccb40, 0x4, 0x4, ...)
Fixes #7567.
LGTM=iant
R=golang-codereviews, iant
CC=golang-codereviews, khr, rsc
https://golang.org/cl/77340045
2014-03-19 07:04:51 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-08-13 03:01:30 -06:00
|
|
|
void
|
|
|
|
syscall·runtime_BeforeFork(void)
|
|
|
|
{
|
|
|
|
// Fork can hang if preempted with signals frequently enough (see issue 5517).
|
|
|
|
// Ensure that we stay on the same M where we disable profiling.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++;
|
|
|
|
if(g->m->profilehz != 0)
|
2013-08-13 03:01:30 -06:00
|
|
|
runtime·resetcpuprofiler(0);
|
2014-03-13 07:41:08 -06:00
|
|
|
|
|
|
|
// This function is called before fork in syscall package.
|
|
|
|
// Code between fork and exec must not allocate memory nor even try to grow stack.
|
|
|
|
// Here we spoil g->stackguard to reliably detect any attempts to grow stack.
|
|
|
|
// runtime_AfterFork will undo this in parent process, but not in child.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->forkstackguard = g->stackguard;
|
2014-03-13 07:41:08 -06:00
|
|
|
g->stackguard0 = StackPreempt-1;
|
|
|
|
g->stackguard = StackPreempt-1;
|
2013-08-13 03:01:30 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Called from syscall package after fork in parent.
|
2014-03-13 07:41:08 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2013-08-13 03:01:30 -06:00
|
|
|
void
|
|
|
|
syscall·runtime_AfterFork(void)
|
|
|
|
{
|
|
|
|
int32 hz;
|
|
|
|
|
2014-03-13 07:41:08 -06:00
|
|
|
// See the comment in runtime_BeforeFork.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->stackguard0 = g->m->forkstackguard;
|
|
|
|
g->stackguard = g->m->forkstackguard;
|
|
|
|
g->m->forkstackguard = 0;
|
2014-03-13 07:41:08 -06:00
|
|
|
|
2013-08-13 03:01:30 -06:00
|
|
|
hz = runtime·sched.profilehz;
|
|
|
|
if(hz != 0)
|
|
|
|
runtime·resetcpuprofiler(hz);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
2013-08-13 03:01:30 -06:00
|
|
|
}
|
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Hook used by runtime·malg to call runtime·stackalloc on the
|
|
|
|
// scheduler stack. This exists because runtime·stackalloc insists
|
|
|
|
// on being called on the scheduler stack, to avoid trying to grow
|
|
|
|
// the stack while allocating a new stack segment.
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
static void
|
|
|
|
mstackalloc(G *gp)
|
|
|
|
{
|
2014-03-07 09:52:29 -07:00
|
|
|
G *newg;
|
|
|
|
uintptr size;
|
|
|
|
|
|
|
|
newg = (G*)gp->param;
|
|
|
|
size = newg->stacksize;
|
|
|
|
newg->stacksize = 0;
|
|
|
|
gp->param = runtime·stackalloc(newg, size);
|
2013-06-12 13:22:26 -06:00
|
|
|
runtime·gogo(&gp->sched);
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
}
|
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Allocate a new g, with a stack big enough for stacksize bytes.
|
2009-04-01 01:26:00 -06:00
|
|
|
G*
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·malg(int32 stacksize)
|
2009-04-01 01:26:00 -06:00
|
|
|
{
|
runtime: always run stackalloc on scheduler stack
Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.
The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.
runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...
R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
|
|
|
G *newg;
|
2009-04-01 01:26:00 -06:00
|
|
|
byte *stk;
|
2012-03-15 20:17:54 -06:00
|
|
|
|
2012-03-15 13:22:30 -06:00
|
|
|
if(StackTop < sizeof(Stktop)) {
|
|
|
|
runtime·printf("runtime: SizeofStktop=%d, should be >=%d\n", (int32)StackTop, (int32)sizeof(Stktop));
|
|
|
|
runtime·throw("runtime: bad stack.h");
|
|
|
|
}
|
2009-04-01 01:26:00 -06:00
|
|
|
|
2014-04-28 10:47:09 -06:00
|
|
|
newg = allocg();
|
2010-02-10 01:00:12 -07:00
|
|
|
if(stacksize >= 0) {
|
2014-03-07 13:11:16 -07:00
|
|
|
stacksize = runtime·round2(StackSystem + stacksize);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g == g->m->g0) {
|
runtime: always run stackalloc on scheduler stack
Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.
The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.
runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...
R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
|
|
|
// running on scheduler stack already.
|
2014-03-07 13:11:16 -07:00
|
|
|
stk = runtime·stackalloc(newg, stacksize);
|
runtime: always run stackalloc on scheduler stack
Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.
The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.
runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...
R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
|
|
|
} else {
|
|
|
|
// have to call stackalloc on scheduler stack.
|
2014-03-07 13:11:16 -07:00
|
|
|
newg->stacksize = stacksize;
|
2014-03-07 09:52:29 -07:00
|
|
|
g->param = newg;
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
runtime·mcall(mstackalloc);
|
runtime: always run stackalloc on scheduler stack
Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.
The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.
runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...
R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
|
|
|
stk = g->param;
|
|
|
|
g->param = nil;
|
|
|
|
}
|
2012-05-30 11:07:52 -06:00
|
|
|
newg->stack0 = (uintptr)stk;
|
|
|
|
newg->stackguard = (uintptr)stk + StackGuard;
|
2013-06-03 02:28:24 -06:00
|
|
|
newg->stackguard0 = newg->stackguard;
|
2014-03-07 13:11:16 -07:00
|
|
|
newg->stackbase = (uintptr)stk + stacksize - sizeof(Stktop);
|
2010-02-10 01:00:12 -07:00
|
|
|
}
|
runtime: always run stackalloc on scheduler stack
Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.
The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.
runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...
R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
2011-02-23 13:51:20 -07:00
|
|
|
return newg;
|
2009-04-01 01:26:00 -06:00
|
|
|
}
|
|
|
|
|
2014-08-23 08:42:30 -06:00
|
|
|
static void
|
|
|
|
newproc_m(void)
|
|
|
|
{
|
|
|
|
byte *argp;
|
|
|
|
void *callerpc;
|
|
|
|
FuncVal *fn;
|
|
|
|
int32 siz;
|
|
|
|
|
|
|
|
siz = g->m->scalararg[0];
|
|
|
|
callerpc = (void*)g->m->scalararg[1];
|
|
|
|
argp = g->m->ptrarg[0];
|
|
|
|
fn = (FuncVal*)g->m->ptrarg[1];
|
|
|
|
|
|
|
|
runtime·newproc1(fn, argp, siz, 0, callerpc);
|
|
|
|
g->m->ptrarg[0] = nil;
|
|
|
|
g->m->ptrarg[1] = nil;
|
|
|
|
}
|
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Create a new g running fn with siz bytes of arguments.
|
|
|
|
// Put it on the queue of g's waiting to run.
|
|
|
|
// The compiler turns a go statement into a call to this.
|
|
|
|
// Cannot split the stack because it assumes that the arguments
|
|
|
|
// are available sequentially after &fn; they would not be
|
2014-08-23 08:42:30 -06:00
|
|
|
// copied if a stack split occurred.
|
2013-08-12 14:47:18 -06:00
|
|
|
#pragma textflag NOSPLIT
|
2009-04-01 01:26:00 -06:00
|
|
|
void
|
2013-02-21 15:01:13 -07:00
|
|
|
runtime·newproc(int32 siz, FuncVal* fn, ...)
|
2010-02-08 22:41:54 -07:00
|
|
|
{
|
2011-01-14 12:05:20 -07:00
|
|
|
byte *argp;
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2011-01-14 12:05:20 -07:00
|
|
|
if(thechar == '5')
|
|
|
|
argp = (byte*)(&fn+2); // skip caller's saved LR
|
|
|
|
else
|
|
|
|
argp = (byte*)(&fn+1);
|
2014-08-23 08:42:30 -06:00
|
|
|
|
|
|
|
g->m->locks++;
|
|
|
|
g->m->scalararg[0] = siz;
|
|
|
|
g->m->scalararg[1] = (uintptr)runtime·getcallerpc(&siz);
|
|
|
|
g->m->ptrarg[0] = argp;
|
|
|
|
g->m->ptrarg[1] = fn;
|
|
|
|
runtime·onM(newproc_m);
|
|
|
|
g->m->locks--;
|
2010-02-08 22:41:54 -07:00
|
|
|
}
|
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Create a new g running fn with narg bytes of arguments starting
|
|
|
|
// at argp and returning nret bytes of results. callerpc is the
|
|
|
|
// address of the go statement that created this. The new g is put
|
|
|
|
// on the queue of g's waiting to run.
|
2010-03-26 15:15:30 -06:00
|
|
|
G*
|
2013-02-21 15:01:13 -07:00
|
|
|
runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
|
2009-04-01 01:26:00 -06:00
|
|
|
{
|
2010-02-10 01:00:12 -07:00
|
|
|
byte *sp;
|
2009-04-01 01:26:00 -06:00
|
|
|
G *newg;
|
2014-01-21 23:34:36 -07:00
|
|
|
P *p;
|
2010-02-08 22:41:54 -07:00
|
|
|
int32 siz;
|
2009-04-01 01:26:00 -06:00
|
|
|
|
2014-05-27 22:00:01 -06:00
|
|
|
if(fn == nil) {
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->throwing = -1; // do not dump full stacks
|
2014-05-27 22:00:01 -06:00
|
|
|
runtime·throw("go of nil func value");
|
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++; // disable preemption because it can be holding p in a local var
|
2010-02-08 22:41:54 -07:00
|
|
|
siz = narg + nret;
|
2009-04-01 01:26:00 -06:00
|
|
|
siz = (siz+7) & ~7;
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2011-07-27 10:41:46 -06:00
|
|
|
// We could instead create a secondary stack frame
|
|
|
|
// and make it look like goexit was on the original but
|
|
|
|
// the call to the actual goroutine function was split.
|
|
|
|
// Not worth it: this is almost always an error.
|
|
|
|
if(siz > StackMin - 1024)
|
|
|
|
runtime·throw("runtime.newproc: function arguments too large for new goroutine");
|
2009-04-01 01:26:00 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
p = g->m->p;
|
2014-01-21 23:34:36 -07:00
|
|
|
if((newg = gfget(p)) != nil) {
|
2011-05-16 14:57:49 -06:00
|
|
|
if(newg->stackguard - StackGuard != newg->stack0)
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·throw("invalid stack in newg");
|
2009-04-01 01:26:00 -06:00
|
|
|
} else {
|
2011-02-22 15:40:40 -07:00
|
|
|
newg = runtime·malg(StackMin);
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(newg, Gidle, Gdead);
|
|
|
|
allgadd(newg); // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
|
2009-04-01 01:26:00 -06:00
|
|
|
}
|
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
if(runtime·readgstatus(newg) != Gdead)
|
|
|
|
runtime·throw("newproc1: new g is not Gdead");
|
|
|
|
|
2012-05-30 11:07:52 -06:00
|
|
|
sp = (byte*)newg->stackbase;
|
2009-04-01 01:26:00 -06:00
|
|
|
sp -= siz;
|
2011-07-12 18:30:40 -06:00
|
|
|
runtime·memmove(sp, argp, narg);
|
2011-01-14 12:05:20 -07:00
|
|
|
if(thechar == '5') {
|
|
|
|
// caller's LR
|
|
|
|
sp -= sizeof(void*);
|
|
|
|
*(void**)sp = nil;
|
|
|
|
}
|
2009-04-01 01:26:00 -06:00
|
|
|
|
2013-06-12 13:22:26 -06:00
|
|
|
runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
|
2012-05-30 11:07:52 -06:00
|
|
|
newg->sched.sp = (uintptr)sp;
|
2013-06-12 06:49:38 -06:00
|
|
|
newg->sched.pc = (uintptr)runtime·goexit;
|
2009-06-17 16:12:16 -06:00
|
|
|
newg->sched.g = newg;
|
2013-06-12 13:22:26 -06:00
|
|
|
runtime·gostartcallfn(&newg->sched, fn);
|
2011-03-02 11:42:02 -07:00
|
|
|
newg->gopc = (uintptr)callerpc;
|
2014-08-27 09:15:47 -06:00
|
|
|
runtime·casgstatus(newg, Gdead, Grunnable);
|
|
|
|
|
2014-01-21 23:34:36 -07:00
|
|
|
if(p->goidcache == p->goidcacheend) {
|
2014-07-16 02:19:33 -06:00
|
|
|
// Sched.goidgen is the last allocated id,
|
|
|
|
// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
|
|
|
|
// At startup sched.goidgen=0, so main goroutine receives goid=1.
|
2014-01-21 23:34:36 -07:00
|
|
|
p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
|
2014-07-16 02:19:33 -06:00
|
|
|
p->goidcache -= GoidCacheBatch - 1;
|
2014-01-21 23:34:36 -07:00
|
|
|
p->goidcacheend = p->goidcache + GoidCacheBatch;
|
|
|
|
}
|
|
|
|
newg->goid = p->goidcache++;
|
runtime, cmd/gc, cmd/ld: ignore method wrappers in recover
Bug #1:
Issue 5406 identified an interesting case:
defer iface.M()
may end up calling a wrapper that copies an indirect receiver
from the iface value and then calls the real M method. That's
two calls down, not just one, and so recover() == nil always
in the real M method, even during a panic.
[For the purposes of this entire discussion, a wrapper's
implementation is a function containing an ordinary call, not
the optimized tail call form that is somtimes possible. The
tail call does not create a second frame, so it is already
handled correctly.]
Fix this bug by introducing g->panicwrap, which counts the
number of bytes on current stack segment that are due to
wrapper calls that should not count against the recover
check. All wrapper functions must now adjust g->panicwrap up
on entry and back down on exit. This adds slightly to their
expense; on the x86 it is a single instruction at entry and
exit; on the ARM it is three. However, the alternative is to
make a call to recover depend on being able to walk the stack,
which I very much want to avoid. We have enough problems
walking the stack for garbage collection and profiling.
Also, if performance is critical in a specific case, it is already
faster to use a pointer receiver and avoid this kind of wrapper
entirely.
Bug #2:
The old code, which did not consider the possibility of two
calls, already contained a check to see if the call had split
its stack and so the panic-created segment was one behind the
current segment. In the wrapper case, both of the two calls
might split their stacks, so the panic-created segment can be
two behind the current segment.
Fix this by propagating the Stktop.panic flag forward during
stack splits instead of looking backward during recover.
Fixes #5406.
R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/13367052
2013-09-12 12:00:16 -06:00
|
|
|
newg->panicwrap = 0;
|
2013-02-06 00:40:54 -07:00
|
|
|
if(raceenabled)
|
2013-06-12 06:49:38 -06:00
|
|
|
newg->racectx = runtime·racegostart((void*)callerpc);
|
2014-01-21 23:34:36 -07:00
|
|
|
runqput(p, newg);
|
2009-04-01 01:26:00 -06:00
|
|
|
|
2013-03-01 12:57:05 -07:00
|
|
|
if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main) // TODO: fast atomic
|
2013-03-01 04:49:16 -07:00
|
|
|
wakep();
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
|
|
|
if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack
|
2013-07-17 10:52:37 -06:00
|
|
|
g->stackguard0 = StackPreempt;
|
2010-03-26 15:15:30 -06:00
|
|
|
return newg;
|
2009-04-01 01:26:00 -06:00
|
|
|
}
|
|
|
|
|
2014-01-21 02:06:57 -07:00
|
|
|
static void
|
|
|
|
allgadd(G *gp)
|
|
|
|
{
|
|
|
|
G **new;
|
|
|
|
uintptr cap;
|
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
if (runtime·readgstatus(gp) == Gidle)
|
|
|
|
runtime·throw("allgadd: bad status Gidle");
|
|
|
|
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·lock(&allglock);
|
|
|
|
if(runtime·allglen >= allgcap) {
|
|
|
|
cap = 4096/sizeof(new[0]);
|
|
|
|
if(cap < 2*allgcap)
|
|
|
|
cap = 2*allgcap;
|
2014-08-07 03:04:04 -06:00
|
|
|
new = runtime·mallocgc(cap*sizeof(new[0]), nil, 0);
|
2014-01-21 02:06:57 -07:00
|
|
|
if(new == nil)
|
|
|
|
runtime·throw("runtime: cannot allocate memory");
|
2014-07-31 02:55:40 -06:00
|
|
|
if(runtime·allg != nil)
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·memmove(new, runtime·allg, runtime·allglen*sizeof(new[0]));
|
|
|
|
runtime·allg = new;
|
2014-09-01 16:51:12 -06:00
|
|
|
runtime·allgs.array = (void*)runtime·allg;
|
2014-01-21 02:06:57 -07:00
|
|
|
allgcap = cap;
|
2014-09-01 16:51:12 -06:00
|
|
|
runtime·allgs.cap = allgcap;
|
2014-01-21 02:06:57 -07:00
|
|
|
}
|
|
|
|
runtime·allg[runtime·allglen++] = gp;
|
2014-09-01 16:51:12 -06:00
|
|
|
runtime·allgs.len = runtime·allglen;
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·unlock(&allglock);
|
|
|
|
}
|
|
|
|
|
2013-02-27 12:17:53 -07:00
|
|
|
// Put on gfree list.
|
|
|
|
// If local list is too long, transfer a batch to the global list.
|
2010-03-29 22:48:22 -06:00
|
|
|
static void
|
2013-02-27 12:17:53 -07:00
|
|
|
gfput(P *p, G *gp)
|
2010-03-29 22:48:22 -06:00
|
|
|
{
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
uintptr stksize;
|
2014-03-07 09:52:29 -07:00
|
|
|
Stktop *top;
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
|
2014-08-27 09:15:47 -06:00
|
|
|
if (runtime·readgstatus(gp) != Gdead)
|
|
|
|
runtime·throw("gfput: bad status (not Gdead)");
|
|
|
|
|
2012-07-03 02:54:13 -06:00
|
|
|
if(gp->stackguard - StackGuard != gp->stack0)
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·throw("invalid stack in gfput");
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
stksize = gp->stackbase + sizeof(Stktop) - gp->stack0;
|
2014-03-07 09:52:29 -07:00
|
|
|
if(stksize != gp->stacksize) {
|
|
|
|
runtime·printf("runtime: bad stacksize, goroutine %D, remain=%d, last=%d\n",
|
|
|
|
gp->goid, (int32)gp->stacksize, (int32)stksize);
|
|
|
|
runtime·throw("gfput: bad stacksize");
|
|
|
|
}
|
|
|
|
top = (Stktop*)gp->stackbase;
|
undo CL 101570044 / 2c57aaea79c4
redo stack allocation. This is mostly the same as
the original CL with a few bug fixes.
1. add racemalloc() for stack allocations
2. fix poolalloc/poolfree to terminate free lists correctly.
3. adjust span ref count correctly.
4. don't use cache for sizes >= StackCacheSize.
Should fix bugs and memory leaks in original changelist.
««« original CL description
undo CL 104200047 / 318b04f28372
Breaks windows and race detector.
TBR=rsc
««« original CL description
runtime: stack allocator, separate from mallocgc
In order to move malloc to Go, we need to have a
separate stack allocator. If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.
Stacks are the last remaining FlagNoGC objects in the
GC heap. Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)
Fixes #7468
Fixes #7424
LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»
TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
»»»
LGTM=dvyukov
R=dvyukov, dave, khr, alex.brainman
CC=golang-codereviews
https://golang.org/cl/112240044
2014-07-17 15:41:46 -06:00
|
|
|
if(stksize != FixedStack) {
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
// non-standard stack size - free it.
|
2014-03-07 09:52:29 -07:00
|
|
|
runtime·stackfree(gp, (void*)gp->stack0, top);
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
gp->stack0 = 0;
|
|
|
|
gp->stackguard = 0;
|
|
|
|
gp->stackguard0 = 0;
|
|
|
|
gp->stackbase = 0;
|
|
|
|
}
|
2013-02-27 12:17:53 -07:00
|
|
|
gp->schedlink = p->gfree;
|
|
|
|
p->gfree = gp;
|
|
|
|
p->gfreecnt++;
|
|
|
|
if(p->gfreecnt >= 64) {
|
|
|
|
runtime·lock(&runtime·sched.gflock);
|
|
|
|
while(p->gfreecnt >= 32) {
|
|
|
|
p->gfreecnt--;
|
|
|
|
gp = p->gfree;
|
|
|
|
p->gfree = gp->schedlink;
|
|
|
|
gp->schedlink = runtime·sched.gfree;
|
|
|
|
runtime·sched.gfree = gp;
|
2014-07-17 11:51:03 -06:00
|
|
|
runtime·sched.ngfree++;
|
2013-02-27 12:17:53 -07:00
|
|
|
}
|
|
|
|
runtime·unlock(&runtime·sched.gflock);
|
|
|
|
}
|
2010-03-29 22:48:22 -06:00
|
|
|
}
|
|
|
|
|
2013-02-27 12:17:53 -07:00
|
|
|
// Get from gfree list.
|
|
|
|
// If local list is empty, grab a batch from global list.
|
2010-03-29 22:48:22 -06:00
|
|
|
static G*
|
2013-02-27 12:17:53 -07:00
|
|
|
gfget(P *p)
|
2010-03-29 22:48:22 -06:00
|
|
|
{
|
2012-07-03 02:54:13 -06:00
|
|
|
G *gp;
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
byte *stk;
|
2010-03-29 22:48:22 -06:00
|
|
|
|
2013-02-27 12:17:53 -07:00
|
|
|
retry:
|
|
|
|
gp = p->gfree;
|
|
|
|
if(gp == nil && runtime·sched.gfree) {
|
|
|
|
runtime·lock(&runtime·sched.gflock);
|
2014-07-17 11:51:03 -06:00
|
|
|
while(p->gfreecnt < 32 && runtime·sched.gfree != nil) {
|
2013-02-27 12:17:53 -07:00
|
|
|
p->gfreecnt++;
|
|
|
|
gp = runtime·sched.gfree;
|
|
|
|
runtime·sched.gfree = gp->schedlink;
|
2014-07-17 11:51:03 -06:00
|
|
|
runtime·sched.ngfree--;
|
2013-02-27 12:17:53 -07:00
|
|
|
gp->schedlink = p->gfree;
|
|
|
|
p->gfree = gp;
|
|
|
|
}
|
|
|
|
runtime·unlock(&runtime·sched.gflock);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
if(gp) {
|
|
|
|
p->gfree = gp->schedlink;
|
|
|
|
p->gfreecnt--;
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
|
|
|
|
if(gp->stack0 == 0) {
|
|
|
|
// Stack was deallocated in gfput. Allocate a new one.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g == g->m->g0) {
|
2014-03-07 09:52:29 -07:00
|
|
|
stk = runtime·stackalloc(gp, FixedStack);
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
} else {
|
2014-03-07 09:52:29 -07:00
|
|
|
gp->stacksize = FixedStack;
|
|
|
|
g->param = gp;
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
runtime·mcall(mstackalloc);
|
|
|
|
stk = g->param;
|
|
|
|
g->param = nil;
|
|
|
|
}
|
|
|
|
gp->stack0 = (uintptr)stk;
|
|
|
|
gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop);
|
|
|
|
gp->stackguard = (uintptr)stk + StackGuard;
|
|
|
|
gp->stackguard0 = gp->stackguard;
|
undo CL 101570044 / 2c57aaea79c4
redo stack allocation. This is mostly the same as
the original CL with a few bug fixes.
1. add racemalloc() for stack allocations
2. fix poolalloc/poolfree to terminate free lists correctly.
3. adjust span ref count correctly.
4. don't use cache for sizes >= StackCacheSize.
Should fix bugs and memory leaks in original changelist.
««« original CL description
undo CL 104200047 / 318b04f28372
Breaks windows and race detector.
TBR=rsc
««« original CL description
runtime: stack allocator, separate from mallocgc
In order to move malloc to Go, we need to have a
separate stack allocator. If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.
Stacks are the last remaining FlagNoGC objects in the
GC heap. Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)
Fixes #7468
Fixes #7424
LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»
TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
»»»
LGTM=dvyukov
R=dvyukov, dave, khr, alex.brainman
CC=golang-codereviews
https://golang.org/cl/112240044
2014-07-17 15:41:46 -06:00
|
|
|
} else {
|
|
|
|
if(raceenabled)
|
|
|
|
runtime·racemalloc((void*)gp->stack0, gp->stackbase + sizeof(Stktop) - gp->stack0);
|
runtime: grow stack by copying
On stack overflow, if all frames on the stack are
copyable, we copy the frames to a new stack twice
as large as the old one. During GC, if a G is using
less than 1/4 of its stack, copy the stack to a stack
half its size.
TODO
- Do something about C frames. When a C frame is in the
stack segment, it isn't copyable. We allocate a new segment
in this case.
- For idempotent C code, we can abort it, copy the stack,
then retry. I'm working on a separate CL for this.
- For other C code, we can raise the stackguard
to the lowest Go frame so the next call that Go frame
makes triggers a copy, which will then succeed.
- Pick a starting stack size?
The plan is that eventually we reach a point where the
stack contains only copyable frames.
LGTM=rsc
R=dvyukov, rsc
CC=golang-codereviews
https://golang.org/cl/54650044
2014-02-27 00:28:44 -07:00
|
|
|
}
|
2013-02-27 12:17:53 -07:00
|
|
|
}
|
2012-07-03 02:54:13 -06:00
|
|
|
return gp;
|
2010-03-29 22:48:22 -06:00
|
|
|
}
|
2008-12-04 09:30:54 -07:00
|
|
|
|
2013-02-27 12:17:53 -07:00
|
|
|
// Purge all cached G's from gfree list to the global list.
|
|
|
|
static void
|
|
|
|
gfpurge(P *p)
|
|
|
|
{
|
|
|
|
G *gp;
|
|
|
|
|
|
|
|
runtime·lock(&runtime·sched.gflock);
|
2014-07-17 11:51:03 -06:00
|
|
|
while(p->gfreecnt != 0) {
|
2013-02-27 12:17:53 -07:00
|
|
|
p->gfreecnt--;
|
|
|
|
gp = p->gfree;
|
|
|
|
p->gfree = gp->schedlink;
|
|
|
|
gp->schedlink = runtime·sched.gfree;
|
|
|
|
runtime·sched.gfree = gp;
|
2014-07-17 11:51:03 -06:00
|
|
|
runtime·sched.ngfree++;
|
2013-02-27 12:17:53 -07:00
|
|
|
}
|
|
|
|
runtime·unlock(&runtime·sched.gflock);
|
|
|
|
}
|
|
|
|
|
2009-05-08 16:21:41 -06:00
|
|
|
void
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·Breakpoint(void)
|
2009-05-08 16:21:41 -06:00
|
|
|
{
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·breakpoint();
|
2009-05-08 16:21:41 -06:00
|
|
|
}
|
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Implementation of runtime.GOMAXPROCS.
|
2013-03-01 04:49:16 -07:00
|
|
|
// delete when scheduler is even stronger
|
2010-05-06 12:50:47 -06:00
|
|
|
int32
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
runtime·gomaxprocsfunc(int32 n)
|
2009-08-06 14:07:05 -06:00
|
|
|
{
|
2010-05-06 12:50:47 -06:00
|
|
|
int32 ret;
|
2009-08-06 14:07:05 -06:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
if(n > MaxGomaxprocs)
|
|
|
|
n = MaxGomaxprocs;
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2011-02-16 11:21:13 -07:00
|
|
|
ret = runtime·gomaxprocs;
|
2013-03-01 04:49:16 -07:00
|
|
|
if(n <= 0 || n == ret) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2011-04-21 10:09:25 -06:00
|
|
|
return ret;
|
|
|
|
}
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2011-07-19 09:01:17 -06:00
|
|
|
|
net: add special netFD mutex
The mutex, fdMutex, handles locking and lifetime of sysfd,
and serializes Read and Write methods.
This allows to strip 2 sync.Mutex.Lock calls,
2 sync.Mutex.Unlock calls, 1 defer and some amount
of misc overhead from every network operation.
On linux/amd64, Intel E5-2690:
benchmark old ns/op new ns/op delta
BenchmarkTCP4Persistent 9595 9454 -1.47%
BenchmarkTCP4Persistent-2 8978 8772 -2.29%
BenchmarkTCP4ConcurrentReadWrite 4900 4625 -5.61%
BenchmarkTCP4ConcurrentReadWrite-2 2603 2500 -3.96%
In general it strips 70-500 ns from every network operation depending
on processor model. On my relatively new E5-2690 it accounts to ~5%
of network op cost.
Fixes #6074.
R=golang-dev, bradfitz, alex.brainman, iant, mikioh.mikioh
CC=golang-dev
https://golang.org/cl/12418043
2013-08-09 11:43:00 -06:00
|
|
|
runtime·semacquire(&runtime·worldsema, false);
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->gcing = 1;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·stoptheworld();
|
|
|
|
newprocs = n;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->gcing = 0;
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·semrelease(&runtime·worldsema);
|
|
|
|
runtime·starttheworld();
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2010-05-06 12:50:47 -06:00
|
|
|
return ret;
|
2009-08-06 14:07:05 -06:00
|
|
|
}
|
|
|
|
|
2013-08-13 12:37:04 -06:00
|
|
|
// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
|
|
|
|
// after they modify m->locked. Do not allow preemption during this call,
|
|
|
|
// or else the m might be different in this function than in the caller.
|
|
|
|
#pragma textflag NOSPLIT
|
2013-02-01 09:34:41 -07:00
|
|
|
static void
|
2013-08-13 12:37:04 -06:00
|
|
|
lockOSThread(void)
|
2011-10-27 19:04:12 -06:00
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->lockedg = g;
|
|
|
|
g->lockedm = g->m;
|
2011-10-27 19:04:12 -06:00
|
|
|
}
|
|
|
|
|
2009-07-13 18:28:39 -06:00
|
|
|
void
|
2013-02-01 09:34:41 -07:00
|
|
|
runtime·LockOSThread(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locked |= LockExternal;
|
2013-08-13 12:37:04 -06:00
|
|
|
lockOSThread();
|
2013-02-01 09:34:41 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
runtime·lockOSThread(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locked += LockInternal;
|
2013-08-13 12:37:04 -06:00
|
|
|
lockOSThread();
|
2013-02-01 09:34:41 -07:00
|
|
|
}
|
|
|
|
|
2013-08-13 12:37:04 -06:00
|
|
|
|
|
|
|
// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
|
|
|
|
// after they update m->locked. Do not allow preemption during this call,
|
|
|
|
// or else the m might be in different in this function than in the caller.
|
|
|
|
#pragma textflag NOSPLIT
|
2013-02-01 09:34:41 -07:00
|
|
|
static void
|
2013-08-13 12:37:04 -06:00
|
|
|
unlockOSThread(void)
|
2009-07-13 18:28:39 -06:00
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->locked != 0)
|
2011-10-27 19:04:12 -06:00
|
|
|
return;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->lockedg = nil;
|
2009-07-13 18:28:39 -06:00
|
|
|
g->lockedm = nil;
|
2013-02-22 21:39:31 -07:00
|
|
|
}
|
2013-02-01 09:34:41 -07:00
|
|
|
|
|
|
|
void
|
|
|
|
runtime·UnlockOSThread(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locked &= ~LockExternal;
|
2013-08-13 12:37:04 -06:00
|
|
|
unlockOSThread();
|
2013-02-01 09:34:41 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
runtime·unlockOSThread(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->locked < LockInternal)
|
2013-02-01 09:34:41 -07:00
|
|
|
runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locked -= LockInternal;
|
2013-08-13 12:37:04 -06:00
|
|
|
unlockOSThread();
|
2009-07-13 18:28:39 -06:00
|
|
|
}
|
|
|
|
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
bool
|
|
|
|
runtime·lockedOSThread(void)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
return g->lockedm != nil && g->m->lockedg != nil;
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
}
|
|
|
|
|
2012-02-22 19:45:01 -07:00
|
|
|
int32
|
|
|
|
runtime·gcount(void)
|
|
|
|
{
|
2014-07-17 11:51:03 -06:00
|
|
|
P *p, **pp;
|
|
|
|
int32 n;
|
2013-02-22 21:39:31 -07:00
|
|
|
|
2014-07-17 11:51:03 -06:00
|
|
|
n = runtime·allglen - runtime·sched.ngfree;
|
|
|
|
for(pp=runtime·allp; p=*pp; pp++)
|
|
|
|
n -= p->gfreecnt;
|
|
|
|
// All these variables can be changed concurrently, so the result can be inconsistent.
|
|
|
|
// But at least the current goroutine is running.
|
|
|
|
if(n < 1)
|
|
|
|
n = 1;
|
2013-02-22 21:39:31 -07:00
|
|
|
return n;
|
2012-02-22 19:45:01 -07:00
|
|
|
}
|
|
|
|
|
2011-01-28 13:03:26 -07:00
|
|
|
int32
|
|
|
|
runtime·mcount(void)
|
|
|
|
{
|
|
|
|
return runtime·sched.mcount;
|
|
|
|
}
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
|
|
|
|
void
|
2013-08-29 16:53:34 -06:00
|
|
|
runtime·badmcall(void (*fn)(G*)) // called from assembly
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
{
|
2013-08-29 16:53:34 -06:00
|
|
|
USED(fn); // TODO: print fn?
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
runtime·throw("runtime: mcall called on m->g0 stack");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2013-08-29 16:53:34 -06:00
|
|
|
runtime·badmcall2(void (*fn)(G*)) // called from assembly
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
{
|
2013-08-29 16:53:34 -06:00
|
|
|
USED(fn);
|
runtime: scheduler, cgo reorganization
* Change use of m->g0 stack (aka scheduler stack).
* Provide runtime.mcall(f) to invoke f() on m->g0 stack.
* Replace scheduler loop entry with runtime.mcall(schedule).
Runtime.mcall eliminates the need for fake scheduler states that
exist just to run a bit of code on the m->g0 stack
(Grecovery, Gstackalloc).
The elimination of the scheduler as a loop that stops and
starts using gosave and gogo fixes a bad interaction with the
way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled)
C functions on that stack, and then when calling back into Go,
it sets m->g0->sched.sp below the added call frames, so that
other uses of m->g0's stack will not interfere with those frames.
Unfortunately, gogo (longjmp) back to the scheduler loop at
this point would end up running scheduler with the lower
sp, which no longer points at a valid stack frame for
a call to scheduler. If scheduler then wrote any function call
arguments or local variables to where it expected the stack
frame to be, it would overwrite other data on the stack.
I realized this possibility while debugging a problem with
calling complex Go code in a Go -> C -> Go cgo callback.
This wasn't the bug I was looking for, it turns out, but I believe
it is a real bug nonetheless. Switching to runtime.mcall, which
only adds new frames to the stack and never jumps into
functions running in existing ones, fixes this bug.
* Move cgo-related code out of proc.c into cgocall.c.
* Add very large comment describing cgo call sequences.
* Simpilify, regularize cgo function implementations and names.
* Add test suite as misc/cgo/test.
Now the Go -> C path calls cgocall, which calls asmcgocall,
and the C -> Go path calls cgocallback, which calls cgocallbackg.
The shuffling, which affects mainly the callback case, moves
most of the callback implementation to cgocallback running
on the m->curg stack (not the m->g0 scheduler stack) and
only while accounted for with $GOMAXPROCS (between calls
to exitsyscall and entersyscall).
The previous callback code did not block in startcgocallback's
approximation to exitsyscall, so if, say, the garbage collector
were running, it would still barge in and start doing things
like call malloc. Similarly endcgocallback's approximation of
entersyscall did not call matchmg to kick off new OS threads
when necessary, which caused the bug in issue 1560.
Fixes #1560.
R=iant
CC=golang-dev
https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
|
|
|
runtime·throw("runtime: mcall function returned");
|
|
|
|
}
|
2011-03-23 09:43:37 -06:00
|
|
|
|
2013-08-02 14:03:14 -06:00
|
|
|
void
|
|
|
|
runtime·badreflectcall(void) // called from assembly
|
|
|
|
{
|
|
|
|
runtime·panicstring("runtime: arg size to reflect.call more than 1GB");
|
|
|
|
}
|
|
|
|
|
2011-03-23 09:43:37 -06:00
|
|
|
static struct {
|
2014-08-27 21:32:49 -06:00
|
|
|
Mutex lock;
|
2011-03-23 09:43:37 -06:00
|
|
|
void (*fn)(uintptr*, int32);
|
|
|
|
int32 hz;
|
|
|
|
} prof;
|
|
|
|
|
2014-02-12 11:31:36 -07:00
|
|
|
static void System(void) {}
|
|
|
|
static void ExternalCode(void) {}
|
|
|
|
static void GC(void) {}
|
2014-08-27 18:15:05 -06:00
|
|
|
extern byte runtime·etext[];
|
2013-08-13 12:12:02 -06:00
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Called if we receive a SIGPROF signal.
|
2011-03-23 09:43:37 -06:00
|
|
|
void
|
2014-01-21 23:30:10 -07:00
|
|
|
runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
|
2011-03-23 09:43:37 -06:00
|
|
|
{
|
|
|
|
int32 n;
|
2013-08-13 12:12:02 -06:00
|
|
|
bool traceback;
|
2014-01-21 23:30:10 -07:00
|
|
|
// Do not use global m in this function, use mp instead.
|
|
|
|
// On windows one m is sending reports about all the g's, so m means a wrong thing.
|
|
|
|
byte m;
|
2014-08-25 02:10:24 -06:00
|
|
|
uintptr stk[100];
|
2014-01-21 23:30:10 -07:00
|
|
|
|
|
|
|
m = 0;
|
|
|
|
USED(m);
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2013-07-31 10:03:05 -06:00
|
|
|
if(prof.fn == nil || prof.hz == 0)
|
|
|
|
return;
|
2014-01-21 23:30:10 -07:00
|
|
|
|
|
|
|
// Profiling runs concurrently with GC, so it must not allocate.
|
2014-02-10 09:24:47 -07:00
|
|
|
mp->mallocing++;
|
2014-01-21 23:30:10 -07:00
|
|
|
|
2013-09-13 12:19:23 -06:00
|
|
|
// Define that a "user g" is a user-created goroutine, and a "system g"
|
|
|
|
// is one that is m->g0 or m->gsignal. We've only made sure that we
|
|
|
|
// can unwind user g's, so exclude the system g's.
|
|
|
|
//
|
|
|
|
// It is not quite as easy as testing gp == m->curg (the current user g)
|
|
|
|
// because we might be interrupted for profiling halfway through a
|
|
|
|
// goroutine switch. The switch involves updating three (or four) values:
|
|
|
|
// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
|
|
|
|
// because once it gets updated the new g is running.
|
|
|
|
//
|
|
|
|
// When switching from a user g to a system g, LR is not considered live,
|
|
|
|
// so the update only affects g, SP, and PC. Since PC must be last, there
|
|
|
|
// the possible partial transitions in ordinary execution are (1) g alone is updated,
|
|
|
|
// (2) both g and SP are updated, and (3) SP alone is updated.
|
|
|
|
// If g is updated, we'll see a system g and not look closer.
|
|
|
|
// If SP alone is updated, we can detect the partial transition by checking
|
|
|
|
// whether the SP is within g's stack bounds. (We could also require that SP
|
|
|
|
// be changed only after g, but the stack bounds check is needed by other
|
|
|
|
// cases, so there is no need to impose an additional requirement.)
|
|
|
|
//
|
|
|
|
// There is one exceptional transition to a system g, not in ordinary execution.
|
|
|
|
// When a signal arrives, the operating system starts the signal handler running
|
|
|
|
// with an updated PC and SP. The g is updated last, at the beginning of the
|
|
|
|
// handler. There are two reasons this is okay. First, until g is updated the
|
|
|
|
// g and SP do not match, so the stack bounds check detects the partial transition.
|
|
|
|
// Second, signal handlers currently run with signals disabled, so a profiling
|
|
|
|
// signal cannot arrive during the handler.
|
|
|
|
//
|
|
|
|
// When switching from a system g to a user g, there are three possibilities.
|
|
|
|
//
|
|
|
|
// First, it may be that the g switch has no PC update, because the SP
|
|
|
|
// either corresponds to a user g throughout (as in runtime.asmcgocall)
|
|
|
|
// or because it has been arranged to look like a user g frame
|
|
|
|
// (as in runtime.cgocallback_gofunc). In this case, since the entire
|
|
|
|
// transition is a g+SP update, a partial transition updating just one of
|
|
|
|
// those will be detected by the stack bounds check.
|
|
|
|
//
|
|
|
|
// Second, when returning from a signal handler, the PC and SP updates
|
|
|
|
// are performed by the operating system in an atomic update, so the g
|
|
|
|
// update must be done before them. The stack bounds check detects
|
|
|
|
// the partial transition here, and (again) signal handlers run with signals
|
|
|
|
// disabled, so a profiling signal cannot arrive then anyway.
|
|
|
|
//
|
|
|
|
// Third, the common case: it may be that the switch updates g, SP, and PC
|
|
|
|
// separately, as in runtime.gogo.
|
|
|
|
//
|
|
|
|
// Because runtime.gogo is the only instance, we check whether the PC lies
|
|
|
|
// within that function, and if so, not ask for a traceback. This approach
|
|
|
|
// requires knowing the size of the runtime.gogo function, which we
|
|
|
|
// record in arch_*.h and check in runtime_test.go.
|
|
|
|
//
|
|
|
|
// There is another apparently viable approach, recorded here in case
|
|
|
|
// the "PC within runtime.gogo" check turns out not to be usable.
|
|
|
|
// It would be possible to delay the update of either g or SP until immediately
|
|
|
|
// before the PC update instruction. Then, because of the stack bounds check,
|
|
|
|
// the only problematic interrupt point is just before that PC update instruction,
|
|
|
|
// and the sigprof handler can detect that instruction and simulate stepping past
|
|
|
|
// it in order to reach a consistent state. On ARM, the update of g must be made
|
|
|
|
// in two places (in R10 and also in a TLS slot), so the delayed update would
|
|
|
|
// need to be the SP update. The sigprof handler must read the instruction at
|
|
|
|
// the current PC and if it was the known instruction (for example, JMP BX or
|
|
|
|
// MOV R2, PC), use that other register in place of the PC value.
|
|
|
|
// The biggest drawback to this solution is that it requires that we can tell
|
|
|
|
// whether it's safe to read from the memory pointed at by PC.
|
|
|
|
// In a correct program, we can test PC == nil and otherwise read,
|
|
|
|
// but if a profiling signal happens at the instant that a program executes
|
|
|
|
// a bad jump (before the program manages to handle the resulting fault)
|
|
|
|
// the profiling handler could fault trying to read nonexistent memory.
|
|
|
|
//
|
|
|
|
// To recap, there are no constraints on the assembly being used for the
|
|
|
|
// transition. We simply require that g and SP match and that the PC is not
|
|
|
|
// in runtime.gogo.
|
2014-01-21 23:30:10 -07:00
|
|
|
traceback = true;
|
|
|
|
if(gp == nil || gp != mp->curg ||
|
2013-09-15 10:05:24 -06:00
|
|
|
(uintptr)sp < gp->stackguard - StackGuard || gp->stackbase < (uintptr)sp ||
|
2013-09-13 12:19:23 -06:00
|
|
|
((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
|
2013-08-13 12:12:02 -06:00
|
|
|
traceback = false;
|
2013-09-13 12:19:23 -06:00
|
|
|
|
2013-08-13 12:12:02 -06:00
|
|
|
n = 0;
|
|
|
|
if(traceback)
|
2014-08-25 02:10:24 -06:00
|
|
|
n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, stk, nelem(stk), nil, nil, false);
|
2013-08-13 12:12:02 -06:00
|
|
|
if(!traceback || n <= 0) {
|
2014-02-12 11:31:36 -07:00
|
|
|
// Normal traceback is impossible or has failed.
|
|
|
|
// See if it falls into several common cases.
|
|
|
|
n = 0;
|
|
|
|
if(mp->ncgo > 0 && mp->curg != nil &&
|
|
|
|
mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
|
|
|
|
// Cgo, we can't unwind and symbolize arbitrary C code,
|
|
|
|
// so instead collect Go stack that leads to the cgo call.
|
|
|
|
// This is especially important on windows, since all syscalls are cgo calls.
|
2014-08-25 02:10:24 -06:00
|
|
|
n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, stk, nelem(stk), nil, nil, false);
|
2014-02-12 11:31:36 -07:00
|
|
|
}
|
|
|
|
#ifdef GOOS_windows
|
|
|
|
if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
|
|
|
|
// Libcall, i.e. runtime syscall on windows.
|
|
|
|
// Collect Go stack that leads to the call.
|
2014-08-25 02:10:24 -06:00
|
|
|
n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, stk, nelem(stk), nil, nil, false);
|
2014-02-12 11:31:36 -07:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if(n == 0) {
|
|
|
|
// If all of the above has failed, account it against abstract "System" or "GC".
|
|
|
|
n = 2;
|
|
|
|
// "ExternalCode" is better than "etext".
|
2014-08-27 18:15:05 -06:00
|
|
|
if((uintptr)pc > (uintptr)runtime·etext)
|
2014-02-12 11:31:36 -07:00
|
|
|
pc = (byte*)ExternalCode + PCQuantum;
|
2014-08-25 02:10:24 -06:00
|
|
|
stk[0] = (uintptr)pc;
|
2014-02-12 11:31:36 -07:00
|
|
|
if(mp->gcing || mp->helpgc)
|
2014-08-25 02:10:24 -06:00
|
|
|
stk[1] = (uintptr)GC + PCQuantum;
|
2014-02-12 11:31:36 -07:00
|
|
|
else
|
2014-08-25 02:10:24 -06:00
|
|
|
stk[1] = (uintptr)System + PCQuantum;
|
2014-02-12 11:31:36 -07:00
|
|
|
}
|
2013-08-13 12:12:02 -06:00
|
|
|
}
|
2014-08-25 02:10:24 -06:00
|
|
|
|
|
|
|
if(prof.fn != nil) {
|
|
|
|
runtime·lock(&prof.lock);
|
|
|
|
if(prof.fn != nil)
|
|
|
|
prof.fn(stk, n);
|
|
|
|
runtime·unlock(&prof.lock);
|
|
|
|
}
|
2014-02-10 09:24:47 -07:00
|
|
|
mp->mallocing--;
|
2011-03-23 09:43:37 -06:00
|
|
|
}
|
|
|
|
|
2011-11-08 19:16:25 -07:00
|
|
|
// Arrange to call fn with a traceback hz times a second.
|
2011-03-23 09:43:37 -06:00
|
|
|
void
|
|
|
|
runtime·setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
|
|
|
|
{
|
|
|
|
// Force sane arguments.
|
|
|
|
if(hz < 0)
|
|
|
|
hz = 0;
|
|
|
|
if(hz == 0)
|
|
|
|
fn = nil;
|
|
|
|
if(fn == nil)
|
|
|
|
hz = 0;
|
|
|
|
|
2013-08-13 03:01:30 -06:00
|
|
|
// Disable preemption, otherwise we can be rescheduled to another thread
|
|
|
|
// that has profiling enabled.
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks++;
|
2013-08-13 03:01:30 -06:00
|
|
|
|
|
|
|
// Stop profiler on this thread so that it is safe to lock prof.
|
2011-03-23 09:43:37 -06:00
|
|
|
// if a profiling signal came in while we had prof locked,
|
|
|
|
// it would deadlock.
|
|
|
|
runtime·resetcpuprofiler(0);
|
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&prof.lock);
|
2011-03-23 09:43:37 -06:00
|
|
|
prof.fn = fn;
|
|
|
|
prof.hz = hz;
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&prof.lock);
|
|
|
|
runtime·lock(&runtime·sched.lock);
|
2011-03-23 09:43:37 -06:00
|
|
|
runtime·sched.profilehz = hz;
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2011-07-19 09:01:17 -06:00
|
|
|
|
2011-03-23 09:43:37 -06:00
|
|
|
if(hz != 0)
|
|
|
|
runtime·resetcpuprofiler(hz);
|
2013-08-13 03:01:30 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->locks--;
|
2011-03-23 09:43:37 -06:00
|
|
|
}
|
2013-02-22 21:48:02 -07:00
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Change number of processors. The world is stopped, sched is locked.
|
|
|
|
static void
|
|
|
|
procresize(int32 new)
|
|
|
|
{
|
|
|
|
int32 i, old;
|
2014-01-20 23:24:42 -07:00
|
|
|
bool empty;
|
2013-03-01 04:49:16 -07:00
|
|
|
G *gp;
|
|
|
|
P *p;
|
|
|
|
|
|
|
|
old = runtime·gomaxprocs;
|
|
|
|
if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
|
|
|
|
runtime·throw("procresize: invalid arg");
|
|
|
|
// initialize new P's
|
|
|
|
for(i = 0; i < new; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p == nil) {
|
2014-08-07 03:04:04 -06:00
|
|
|
p = (P*)runtime·mallocgc(sizeof(*p), 0, 0);
|
2013-08-13 14:30:55 -06:00
|
|
|
p->id = i;
|
2013-03-01 04:49:16 -07:00
|
|
|
p->status = Pgcstop;
|
|
|
|
runtime·atomicstorep(&runtime·allp[i], p);
|
|
|
|
}
|
|
|
|
if(p->mcache == nil) {
|
|
|
|
if(old==0 && i==0)
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
p->mcache = g->m->mcache; // bootstrap
|
2013-03-01 04:49:16 -07:00
|
|
|
else
|
|
|
|
p->mcache = runtime·allocmcache();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// redistribute runnable G's evenly
|
2014-01-20 23:24:42 -07:00
|
|
|
// collect all runnable goroutines in global queue preserving FIFO order
|
|
|
|
// FIFO order is required to ensure fairness even during frequent GCs
|
|
|
|
// see http://golang.org/issue/7126
|
|
|
|
empty = false;
|
|
|
|
while(!empty) {
|
|
|
|
empty = true;
|
|
|
|
for(i = 0; i < old; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p->runqhead == p->runqtail)
|
|
|
|
continue;
|
|
|
|
empty = false;
|
|
|
|
// pop from tail of local queue
|
|
|
|
p->runqtail--;
|
|
|
|
gp = p->runq[p->runqtail%nelem(p->runq)];
|
|
|
|
// push onto head of global queue
|
|
|
|
gp->schedlink = runtime·sched.runqhead;
|
|
|
|
runtime·sched.runqhead = gp;
|
|
|
|
if(runtime·sched.runqtail == nil)
|
|
|
|
runtime·sched.runqtail = gp;
|
|
|
|
runtime·sched.runqsize++;
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
2014-01-16 01:17:00 -07:00
|
|
|
// fill local queues with at most nelem(p->runq)/2 goroutines
|
2013-03-01 04:49:16 -07:00
|
|
|
// start at 1 because current M already executes some G and will acquire allp[0] below,
|
|
|
|
// so if we have a spare G we want to put it into allp[1].
|
2014-01-16 01:17:00 -07:00
|
|
|
for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
|
2013-03-01 04:49:16 -07:00
|
|
|
gp = runtime·sched.runqhead;
|
|
|
|
runtime·sched.runqhead = gp->schedlink;
|
2014-01-16 01:17:00 -07:00
|
|
|
if(runtime·sched.runqhead == nil)
|
|
|
|
runtime·sched.runqtail = nil;
|
|
|
|
runtime·sched.runqsize--;
|
2013-03-01 04:49:16 -07:00
|
|
|
runqput(runtime·allp[i%new], gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
// free unused P's
|
|
|
|
for(i = new; i < old; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
runtime·freemcache(p->mcache);
|
|
|
|
p->mcache = nil;
|
|
|
|
gfpurge(p);
|
|
|
|
p->status = Pdead;
|
|
|
|
// can't free P itself because it can be referenced by an M in syscall
|
|
|
|
}
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p)
|
|
|
|
g->m->p->m = nil;
|
|
|
|
g->m->p = nil;
|
|
|
|
g->m->mcache = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
p = runtime·allp[0];
|
|
|
|
p->m = nil;
|
|
|
|
p->status = Pidle;
|
|
|
|
acquirep(p);
|
|
|
|
for(i = new-1; i > 0; i--) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
p->status = Pidle;
|
|
|
|
pidleput(p);
|
|
|
|
}
|
|
|
|
runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Associate p and the current m.
|
|
|
|
static void
|
|
|
|
acquirep(P *p)
|
|
|
|
{
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p || g->m->mcache)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("acquirep: already in go");
|
|
|
|
if(p->m || p->status != Pidle) {
|
|
|
|
runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
|
|
|
|
runtime·throw("acquirep: invalid p state");
|
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->mcache = p->mcache;
|
|
|
|
g->m->p = p;
|
|
|
|
p->m = g->m;
|
2013-03-01 04:49:16 -07:00
|
|
|
p->status = Prunning;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Disassociate p and the current m.
|
|
|
|
static P*
|
|
|
|
releasep(void)
|
|
|
|
{
|
|
|
|
P *p;
|
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(g->m->p == nil || g->m->mcache == nil)
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("releasep: invalid arg");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
p = g->m->p;
|
|
|
|
if(p->m != g->m || p->mcache != g->m->mcache || p->status != Prunning) {
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m, g->m->p, p->m, g->m->mcache, p->mcache, p->status);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("releasep: invalid p state");
|
|
|
|
}
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->p = nil;
|
|
|
|
g->m->mcache = nil;
|
2013-03-01 04:49:16 -07:00
|
|
|
p->m = nil;
|
|
|
|
p->status = Pidle;
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2013-08-13 12:07:42 -06:00
|
|
|
incidlelocked(int32 v)
|
2013-03-01 04:49:16 -07:00
|
|
|
{
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-08-13 12:07:42 -06:00
|
|
|
runtime·sched.nmidlelocked += v;
|
2013-03-01 04:49:16 -07:00
|
|
|
if(v > 0)
|
|
|
|
checkdead();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check for deadlock situation.
|
|
|
|
// The check is based on number of running M's, if 0 -> deadlock.
|
|
|
|
static void
|
|
|
|
checkdead(void)
|
|
|
|
{
|
|
|
|
G *gp;
|
|
|
|
int32 run, grunning, s;
|
2014-01-21 02:06:57 -07:00
|
|
|
uintptr i;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
|
|
|
// -1 for sysmon
|
2013-08-13 12:07:42 -06:00
|
|
|
run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
|
2013-03-01 04:49:16 -07:00
|
|
|
if(run > 0)
|
|
|
|
return;
|
2014-02-14 02:24:48 -07:00
|
|
|
// If we are dying because of a signal caught on an already idle thread,
|
|
|
|
// freezetheworld will cause all running threads to block.
|
|
|
|
// And runtime will essentially enter into deadlock state,
|
|
|
|
// except that there is a thread that will call runtime·exit soon.
|
|
|
|
if(runtime·panicking > 0)
|
|
|
|
return;
|
2013-03-01 04:49:16 -07:00
|
|
|
if(run < 0) {
|
2014-02-14 02:24:48 -07:00
|
|
|
runtime·printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
|
2013-08-13 12:07:42 -06:00
|
|
|
runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("checkdead: inconsistent counts");
|
|
|
|
}
|
|
|
|
grunning = 0;
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·lock(&allglock);
|
|
|
|
for(i = 0; i < runtime·allglen; i++) {
|
|
|
|
gp = runtime·allg[i];
|
2014-08-25 10:59:52 -06:00
|
|
|
if(gp->issystem)
|
2013-03-01 04:49:16 -07:00
|
|
|
continue;
|
2014-08-27 09:15:47 -06:00
|
|
|
s = runtime·readgstatus(gp);
|
|
|
|
switch(s&~Gscan) {
|
|
|
|
case Gwaiting:
|
2013-03-01 04:49:16 -07:00
|
|
|
grunning++;
|
2014-08-27 09:15:47 -06:00
|
|
|
break;
|
|
|
|
case Grunnable:
|
|
|
|
case Grunning:
|
|
|
|
case Gsyscall:
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·unlock(&allglock);
|
2014-02-14 02:24:48 -07:00
|
|
|
runtime·printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("checkdead: runnable g");
|
2014-08-27 09:15:47 -06:00
|
|
|
break;
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
}
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·unlock(&allglock);
|
2013-03-01 04:49:16 -07:00
|
|
|
if(grunning == 0) // possible if main goroutine calls runtime·Goexit()
|
runtime: crash when func main calls Goexit and all other goroutines exit
This has typically crashed in the past, although usually with
an 'all goroutines are asleep - deadlock!' message that shows
no goroutines (because there aren't any).
Previous discussion at:
https://groups.google.com/d/msg/golang-nuts/uCT_7WxxopQ/BoSBlLFzUTkJ
https://groups.google.com/d/msg/golang-dev/KUojayEr20I/u4fp_Ej5PdUJ
http://golang.org/issue/7711
There is general agreement that runtime.Goexit terminates the
main goroutine, so that main cannot return, so the program does
not exit.
The interpretation that all other goroutines exiting causes an
exit(0) is relatively new and was not part of those discussions.
That is what this CL changes.
Thankfully, even though the exit(0) has been there for a while,
some other accounting bugs made it very difficult to trigger,
so it is reasonable to replace. In particular, see golang.org/issue/7711#c10
for an examination of the behavior across past releases.
Fixes #7711.
LGTM=iant, r
R=golang-codereviews, iant, dvyukov, r
CC=golang-codereviews
https://golang.org/cl/88210044
2014-04-16 11:12:18 -06:00
|
|
|
runtime·throw("no goroutines (main called runtime.Goexit) - deadlock!");
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
g->m->throwing = -1; // do not dump full stacks
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·throw("all goroutines are asleep - deadlock!");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
sysmon(void)
|
|
|
|
{
|
2014-08-25 10:59:52 -06:00
|
|
|
uint32 idle, delay, nscavenge;
|
2014-08-29 01:08:10 -06:00
|
|
|
int64 now, unixnow, lastpoll, lasttrace, lastgc;
|
2014-08-25 10:59:52 -06:00
|
|
|
int64 forcegcperiod, scavengelimit, lastscavenge, maxsleep;
|
2013-03-12 11:14:26 -06:00
|
|
|
G *gp;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
2014-08-25 10:59:52 -06:00
|
|
|
// If we go two minutes without a garbage collection, force one to run.
|
2014-08-25 13:30:39 -06:00
|
|
|
forcegcperiod = 2*60*1e9;
|
2014-08-25 10:59:52 -06:00
|
|
|
// If a heap span goes unused for 5 minutes after a garbage collection,
|
|
|
|
// we hand it back to the operating system.
|
2014-08-25 13:30:39 -06:00
|
|
|
scavengelimit = 5*60*1e9;
|
|
|
|
if(runtime·debug.scavenge > 0) {
|
|
|
|
// Scavenge-a-lot for testing.
|
|
|
|
forcegcperiod = 10*1e6;
|
|
|
|
scavengelimit = 20*1e6;
|
|
|
|
}
|
2014-08-25 10:59:52 -06:00
|
|
|
lastscavenge = runtime·nanotime();
|
|
|
|
nscavenge = 0;
|
|
|
|
// Make wake-up period small enough for the sampling to be correct.
|
|
|
|
maxsleep = forcegcperiod/2;
|
|
|
|
if(scavengelimit < forcegcperiod)
|
|
|
|
maxsleep = scavengelimit/2;
|
|
|
|
|
2013-08-13 14:30:55 -06:00
|
|
|
lasttrace = 0;
|
2013-03-01 04:49:16 -07:00
|
|
|
idle = 0; // how many cycles in succession we had not wokeup somebody
|
|
|
|
delay = 0;
|
|
|
|
for(;;) {
|
|
|
|
if(idle == 0) // start with 20us sleep...
|
|
|
|
delay = 20;
|
|
|
|
else if(idle > 50) // start doubling the sleep after 1ms...
|
|
|
|
delay *= 2;
|
|
|
|
if(delay > 10*1000) // up to 10ms
|
|
|
|
delay = 10*1000;
|
|
|
|
runtime·usleep(delay);
|
2013-08-13 14:30:55 -06:00
|
|
|
if(runtime·debug.schedtrace <= 0 &&
|
2013-08-15 04:32:10 -06:00
|
|
|
(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) { // TODO: fast atomic
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-08-15 04:32:10 -06:00
|
|
|
if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
|
2013-03-01 12:57:05 -07:00
|
|
|
runtime·atomicstore(&runtime·sched.sysmonwait, 1);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2014-08-25 10:59:52 -06:00
|
|
|
runtime·notetsleep(&runtime·sched.sysmonnote, maxsleep);
|
|
|
|
runtime·lock(&runtime·sched.lock);
|
|
|
|
runtime·atomicstore(&runtime·sched.sysmonwait, 0);
|
2013-03-01 04:49:16 -07:00
|
|
|
runtime·noteclear(&runtime·sched.sysmonnote);
|
|
|
|
idle = 0;
|
|
|
|
delay = 20;
|
2014-08-25 10:59:52 -06:00
|
|
|
}
|
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
2013-03-12 11:14:26 -06:00
|
|
|
// poll network if not polled for more than 10ms
|
|
|
|
lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
|
|
|
|
now = runtime·nanotime();
|
2014-08-25 10:59:52 -06:00
|
|
|
unixnow = runtime·unixnanotime();
|
2013-10-17 12:57:48 -06:00
|
|
|
if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
|
2013-07-19 07:45:34 -06:00
|
|
|
runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
|
2013-03-12 11:14:26 -06:00
|
|
|
gp = runtime·netpoll(false); // non-blocking
|
2013-08-13 12:07:42 -06:00
|
|
|
if(gp) {
|
|
|
|
// Need to decrement number of idle locked M's
|
|
|
|
// (pretending that one more is running) before injectglist.
|
|
|
|
// Otherwise it can lead to the following situation:
|
|
|
|
// injectglist grabs all P's but before it starts M's to run the P's,
|
|
|
|
// another M returns from syscall, finishes running its G,
|
|
|
|
// observes that there is no work to do and no other running M's
|
|
|
|
// and reports deadlock.
|
|
|
|
incidlelocked(-1);
|
|
|
|
injectglist(gp);
|
|
|
|
incidlelocked(1);
|
|
|
|
}
|
2013-03-12 11:14:26 -06:00
|
|
|
}
|
|
|
|
// retake P's blocked in syscalls
|
2013-07-18 15:22:26 -06:00
|
|
|
// and preempt long running G's
|
|
|
|
if(retake(now))
|
2013-03-01 04:49:16 -07:00
|
|
|
idle = 0;
|
|
|
|
else
|
|
|
|
idle++;
|
2013-08-13 14:30:55 -06:00
|
|
|
|
2014-08-25 10:59:52 -06:00
|
|
|
// check if we need to force a GC
|
2014-08-29 01:08:10 -06:00
|
|
|
lastgc = runtime·atomicload64(&mstats.last_gc);
|
|
|
|
if(lastgc != 0 && unixnow - lastgc > forcegcperiod && runtime·atomicload(&runtime·forcegc.idle)) {
|
|
|
|
runtime·lock(&runtime·forcegc.lock);
|
|
|
|
runtime·forcegc.idle = 0;
|
|
|
|
runtime·forcegc.g->schedlink = nil;
|
|
|
|
injectglist(runtime·forcegc.g);
|
|
|
|
runtime·unlock(&runtime·forcegc.lock);
|
2014-08-25 10:59:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// scavenge heap once in a while
|
|
|
|
if(lastscavenge + scavengelimit/2 < now) {
|
|
|
|
runtime·MHeap_Scavenge(nscavenge, now, scavengelimit);
|
|
|
|
lastscavenge = now;
|
|
|
|
nscavenge++;
|
|
|
|
}
|
|
|
|
|
2013-08-13 14:30:55 -06:00
|
|
|
if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
|
|
|
|
lasttrace = now;
|
|
|
|
runtime·schedtrace(runtime·debug.scheddetail);
|
|
|
|
}
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-07-18 15:22:26 -06:00
|
|
|
typedef struct Pdesc Pdesc;
|
|
|
|
struct Pdesc
|
|
|
|
{
|
2013-08-13 12:14:04 -06:00
|
|
|
uint32 schedtick;
|
|
|
|
int64 schedwhen;
|
|
|
|
uint32 syscalltick;
|
|
|
|
int64 syscallwhen;
|
2013-07-18 15:22:26 -06:00
|
|
|
};
|
2014-05-31 17:21:17 -06:00
|
|
|
#pragma dataflag NOPTR
|
2013-07-18 15:22:26 -06:00
|
|
|
static Pdesc pdesc[MaxGomaxprocs];
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
static uint32
|
2013-07-18 15:22:26 -06:00
|
|
|
retake(int64 now)
|
2013-03-01 04:49:16 -07:00
|
|
|
{
|
|
|
|
uint32 i, s, n;
|
|
|
|
int64 t;
|
|
|
|
P *p;
|
2013-07-18 15:22:26 -06:00
|
|
|
Pdesc *pd;
|
2013-03-01 04:49:16 -07:00
|
|
|
|
|
|
|
n = 0;
|
|
|
|
for(i = 0; i < runtime·gomaxprocs; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p==nil)
|
|
|
|
continue;
|
2013-07-18 15:22:26 -06:00
|
|
|
pd = &pdesc[i];
|
2013-03-01 04:49:16 -07:00
|
|
|
s = p->status;
|
2013-07-18 15:22:26 -06:00
|
|
|
if(s == Psyscall) {
|
2014-01-27 12:17:46 -07:00
|
|
|
// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
|
2013-08-13 12:14:04 -06:00
|
|
|
t = p->syscalltick;
|
|
|
|
if(pd->syscalltick != t) {
|
|
|
|
pd->syscalltick = t;
|
|
|
|
pd->syscallwhen = now;
|
|
|
|
continue;
|
|
|
|
}
|
2014-01-27 12:17:46 -07:00
|
|
|
// On the one hand we don't want to retake Ps if there is no other work to do,
|
|
|
|
// but on the other hand we want to retake them eventually
|
|
|
|
// because they can prevent the sysmon thread from deep sleep.
|
2013-07-18 15:22:26 -06:00
|
|
|
if(p->runqhead == p->runqtail &&
|
2014-01-27 12:17:46 -07:00
|
|
|
runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
|
|
|
|
pd->syscallwhen + 10*1000*1000 > now)
|
2013-07-18 15:22:26 -06:00
|
|
|
continue;
|
2013-08-13 12:07:42 -06:00
|
|
|
// Need to decrement number of idle locked M's
|
|
|
|
// (pretending that one more is running) before the CAS.
|
2013-07-18 15:22:26 -06:00
|
|
|
// Otherwise the M from which we retake can exit the syscall,
|
|
|
|
// increment nmidle and report deadlock.
|
2013-08-13 12:07:42 -06:00
|
|
|
incidlelocked(-1);
|
2013-07-18 15:22:26 -06:00
|
|
|
if(runtime·cas(&p->status, s, Pidle)) {
|
|
|
|
n++;
|
|
|
|
handoffp(p);
|
|
|
|
}
|
2013-08-13 12:07:42 -06:00
|
|
|
incidlelocked(1);
|
2013-07-18 15:22:26 -06:00
|
|
|
} else if(s == Prunning) {
|
|
|
|
// Preempt G if it's running for more than 10ms.
|
2013-08-13 12:14:04 -06:00
|
|
|
t = p->schedtick;
|
|
|
|
if(pd->schedtick != t) {
|
|
|
|
pd->schedtick = t;
|
|
|
|
pd->schedwhen = now;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if(pd->schedwhen + 10*1000*1000 > now)
|
2013-07-18 15:22:26 -06:00
|
|
|
continue;
|
|
|
|
preemptone(p);
|
2013-03-01 04:49:16 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2013-06-03 03:20:17 -06:00
|
|
|
// Tell all goroutines that they have been preempted and they should stop.
|
|
|
|
// This function is purely best-effort. It can fail to inform a goroutine if a
|
|
|
|
// processor just started running it.
|
|
|
|
// No locks need to be held.
|
2013-08-09 02:53:35 -06:00
|
|
|
// Returns true if preemption request was issued to at least one goroutine.
|
|
|
|
static bool
|
2013-06-03 03:20:17 -06:00
|
|
|
preemptall(void)
|
|
|
|
{
|
|
|
|
P *p;
|
|
|
|
int32 i;
|
2013-08-09 02:53:35 -06:00
|
|
|
bool res;
|
2013-06-03 03:20:17 -06:00
|
|
|
|
2013-08-09 02:53:35 -06:00
|
|
|
res = false;
|
2013-06-03 03:20:17 -06:00
|
|
|
for(i = 0; i < runtime·gomaxprocs; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p == nil || p->status != Prunning)
|
|
|
|
continue;
|
2013-08-09 02:53:35 -06:00
|
|
|
res |= preemptone(p);
|
2013-06-03 03:20:17 -06:00
|
|
|
}
|
2013-08-09 02:53:35 -06:00
|
|
|
return res;
|
2013-06-03 03:20:17 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Tell the goroutine running on processor P to stop.
|
|
|
|
// This function is purely best-effort. It can incorrectly fail to inform the
|
|
|
|
// goroutine. It can send inform the wrong goroutine. Even if it informs the
|
|
|
|
// correct goroutine, that goroutine might ignore the request if it is
|
|
|
|
// simultaneously executing runtime·newstack.
|
|
|
|
// No lock needs to be held.
|
2013-08-09 02:53:35 -06:00
|
|
|
// Returns true if preemption request was issued.
|
2014-08-27 09:15:47 -06:00
|
|
|
// The actual preemption will happen at some point in the future
|
|
|
|
// and will be indicated by the gp->status no longer being
|
|
|
|
// Grunning
|
2013-08-09 02:53:35 -06:00
|
|
|
static bool
|
2013-06-03 03:20:17 -06:00
|
|
|
preemptone(P *p)
|
|
|
|
{
|
|
|
|
M *mp;
|
|
|
|
G *gp;
|
|
|
|
|
|
|
|
mp = p->m;
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
if(mp == nil || mp == g->m)
|
2013-08-09 02:53:35 -06:00
|
|
|
return false;
|
2013-06-03 03:20:17 -06:00
|
|
|
gp = mp->curg;
|
|
|
|
if(gp == nil || gp == mp->g0)
|
2013-08-09 02:53:35 -06:00
|
|
|
return false;
|
2013-07-17 10:52:37 -06:00
|
|
|
gp->preempt = true;
|
2014-08-27 09:15:47 -06:00
|
|
|
// Every call in a go routine checks for stack overflow by
|
|
|
|
// comparing the current stack pointer to gp->stackguard0.
|
|
|
|
// Setting gp->stackguard0 to StackPreempt folds
|
|
|
|
// preemption into the normal stack overflow check.
|
2013-06-03 03:20:17 -06:00
|
|
|
gp->stackguard0 = StackPreempt;
|
2013-08-09 02:53:35 -06:00
|
|
|
return true;
|
2013-06-03 03:20:17 -06:00
|
|
|
}
|
|
|
|
|
2013-08-13 14:30:55 -06:00
|
|
|
void
|
|
|
|
runtime·schedtrace(bool detailed)
|
|
|
|
{
|
|
|
|
static int64 starttime;
|
|
|
|
int64 now;
|
2013-08-13 17:37:54 -06:00
|
|
|
int64 id1, id2, id3;
|
2014-01-16 01:17:00 -07:00
|
|
|
int32 i, t, h;
|
2014-01-21 02:06:57 -07:00
|
|
|
uintptr gi;
|
2013-08-13 14:30:55 -06:00
|
|
|
int8 *fmt;
|
|
|
|
M *mp, *lockedm;
|
|
|
|
G *gp, *lockedg;
|
|
|
|
P *p;
|
|
|
|
|
|
|
|
now = runtime·nanotime();
|
|
|
|
if(starttime == 0)
|
|
|
|
starttime = now;
|
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2014-06-26 18:16:43 -06:00
|
|
|
runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d spinningthreads=%d idlethreads=%d runqueue=%d",
|
2013-08-13 14:30:55 -06:00
|
|
|
(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
|
2014-06-26 18:16:43 -06:00
|
|
|
runtime·sched.nmspinning, runtime·sched.nmidle, runtime·sched.runqsize);
|
2013-08-13 14:30:55 -06:00
|
|
|
if(detailed) {
|
2014-06-26 18:16:43 -06:00
|
|
|
runtime·printf(" gcwaiting=%d nmidlelocked=%d stopwait=%d sysmonwait=%d\n",
|
|
|
|
runtime·sched.gcwaiting, runtime·sched.nmidlelocked,
|
2013-08-13 14:30:55 -06:00
|
|
|
runtime·sched.stopwait, runtime·sched.sysmonwait);
|
|
|
|
}
|
|
|
|
// We must be careful while reading data from P's, M's and G's.
|
|
|
|
// Even if we hold schedlock, most data can be changed concurrently.
|
|
|
|
// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
|
|
|
|
for(i = 0; i < runtime·gomaxprocs; i++) {
|
|
|
|
p = runtime·allp[i];
|
|
|
|
if(p == nil)
|
|
|
|
continue;
|
|
|
|
mp = p->m;
|
2014-01-16 01:17:00 -07:00
|
|
|
h = runtime·atomicload(&p->runqhead);
|
|
|
|
t = runtime·atomicload(&p->runqtail);
|
2013-08-13 14:30:55 -06:00
|
|
|
if(detailed)
|
2014-01-16 01:17:00 -07:00
|
|
|
runtime·printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
|
|
|
|
i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
|
2013-08-13 14:30:55 -06:00
|
|
|
else {
|
|
|
|
// In non-detailed mode format lengths of per-P run queues as:
|
|
|
|
// [len1 len2 len3 len4]
|
|
|
|
fmt = " %d";
|
|
|
|
if(runtime·gomaxprocs == 1)
|
|
|
|
fmt = " [%d]\n";
|
|
|
|
else if(i == 0)
|
|
|
|
fmt = " [%d";
|
|
|
|
else if(i == runtime·gomaxprocs-1)
|
|
|
|
fmt = " %d]\n";
|
2014-01-16 01:17:00 -07:00
|
|
|
runtime·printf(fmt, t-h);
|
2013-08-13 14:30:55 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if(!detailed) {
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-08-13 14:30:55 -06:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
for(mp = runtime·allm; mp; mp = mp->alllink) {
|
|
|
|
p = mp->p;
|
|
|
|
gp = mp->curg;
|
|
|
|
lockedg = mp->lockedg;
|
2013-08-13 17:37:54 -06:00
|
|
|
id1 = -1;
|
|
|
|
if(p)
|
|
|
|
id1 = p->id;
|
|
|
|
id2 = -1;
|
|
|
|
if(gp)
|
|
|
|
id2 = gp->goid;
|
|
|
|
id3 = -1;
|
|
|
|
if(lockedg)
|
|
|
|
id3 = lockedg->goid;
|
|
|
|
runtime·printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
|
2014-02-10 04:40:55 -07:00
|
|
|
" locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
|
2013-08-13 17:37:54 -06:00
|
|
|
mp->id, id1, id2,
|
2013-08-13 14:30:55 -06:00
|
|
|
mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
mp->spinning, g->m->blocked, id3);
|
2013-08-13 14:30:55 -06:00
|
|
|
}
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·lock(&allglock);
|
|
|
|
for(gi = 0; gi < runtime·allglen; gi++) {
|
|
|
|
gp = runtime·allg[gi];
|
2013-08-13 14:30:55 -06:00
|
|
|
mp = gp->m;
|
|
|
|
lockedm = gp->lockedm;
|
2014-08-21 10:41:09 -06:00
|
|
|
runtime·printf(" G%D: status=%d(%S) m=%d lockedm=%d\n",
|
2014-08-27 09:15:47 -06:00
|
|
|
gp->goid, runtime·readgstatus(gp), gp->waitreason, mp ? mp->id : -1,
|
2013-08-13 14:30:55 -06:00
|
|
|
lockedm ? lockedm->id : -1);
|
|
|
|
}
|
2014-01-21 02:06:57 -07:00
|
|
|
runtime·unlock(&allglock);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2013-08-13 14:30:55 -06:00
|
|
|
}
|
|
|
|
|
2013-03-01 04:49:16 -07:00
|
|
|
// Put mp on midle list.
|
|
|
|
// Sched must be locked.
|
|
|
|
static void
|
|
|
|
mput(M *mp)
|
|
|
|
{
|
|
|
|
mp->schedlink = runtime·sched.midle;
|
|
|
|
runtime·sched.midle = mp;
|
|
|
|
runtime·sched.nmidle++;
|
|
|
|
checkdead();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try to get an m from midle list.
|
|
|
|
// Sched must be locked.
|
|
|
|
static M*
|
|
|
|
mget(void)
|
|
|
|
{
|
|
|
|
M *mp;
|
|
|
|
|
|
|
|
if((mp = runtime·sched.midle) != nil){
|
|
|
|
runtime·sched.midle = mp->schedlink;
|
|
|
|
runtime·sched.nmidle--;
|
|
|
|
}
|
|
|
|
return mp;
|
|
|
|
}
|
|
|
|
|
2013-02-27 12:17:53 -07:00
|
|
|
// Put gp on the global runnable queue.
|
|
|
|
// Sched must be locked.
|
|
|
|
static void
|
|
|
|
globrunqput(G *gp)
|
|
|
|
{
|
|
|
|
gp->schedlink = nil;
|
|
|
|
if(runtime·sched.runqtail)
|
|
|
|
runtime·sched.runqtail->schedlink = gp;
|
|
|
|
else
|
|
|
|
runtime·sched.runqhead = gp;
|
|
|
|
runtime·sched.runqtail = gp;
|
|
|
|
runtime·sched.runqsize++;
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:17:00 -07:00
|
|
|
// Put a batch of runnable goroutines on the global runnable queue.
|
|
|
|
// Sched must be locked.
|
|
|
|
static void
|
|
|
|
globrunqputbatch(G *ghead, G *gtail, int32 n)
|
|
|
|
{
|
|
|
|
gtail->schedlink = nil;
|
|
|
|
if(runtime·sched.runqtail)
|
|
|
|
runtime·sched.runqtail->schedlink = ghead;
|
|
|
|
else
|
|
|
|
runtime·sched.runqhead = ghead;
|
|
|
|
runtime·sched.runqtail = gtail;
|
|
|
|
runtime·sched.runqsize += n;
|
|
|
|
}
|
|
|
|
|
2013-02-27 12:17:53 -07:00
|
|
|
// Try get a batch of G's from the global runnable queue.
|
|
|
|
// Sched must be locked.
|
|
|
|
static G*
|
2013-06-15 06:06:28 -06:00
|
|
|
globrunqget(P *p, int32 max)
|
2013-02-27 12:17:53 -07:00
|
|
|
{
|
|
|
|
G *gp, *gp1;
|
|
|
|
int32 n;
|
|
|
|
|
|
|
|
if(runtime·sched.runqsize == 0)
|
|
|
|
return nil;
|
|
|
|
n = runtime·sched.runqsize/runtime·gomaxprocs+1;
|
|
|
|
if(n > runtime·sched.runqsize)
|
|
|
|
n = runtime·sched.runqsize;
|
2013-06-15 06:06:28 -06:00
|
|
|
if(max > 0 && n > max)
|
|
|
|
n = max;
|
2014-01-16 01:17:00 -07:00
|
|
|
if(n > nelem(p->runq)/2)
|
|
|
|
n = nelem(p->runq)/2;
|
2013-02-27 12:17:53 -07:00
|
|
|
runtime·sched.runqsize -= n;
|
|
|
|
if(runtime·sched.runqsize == 0)
|
|
|
|
runtime·sched.runqtail = nil;
|
|
|
|
gp = runtime·sched.runqhead;
|
|
|
|
runtime·sched.runqhead = gp->schedlink;
|
|
|
|
n--;
|
|
|
|
while(n--) {
|
|
|
|
gp1 = runtime·sched.runqhead;
|
|
|
|
runtime·sched.runqhead = gp1->schedlink;
|
|
|
|
runqput(p, gp1);
|
|
|
|
}
|
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Put p to on pidle list.
|
|
|
|
// Sched must be locked.
|
|
|
|
static void
|
|
|
|
pidleput(P *p)
|
|
|
|
{
|
|
|
|
p->link = runtime·sched.pidle;
|
|
|
|
runtime·sched.pidle = p;
|
2013-03-01 12:57:05 -07:00
|
|
|
runtime·xadd(&runtime·sched.npidle, 1); // TODO: fast atomic
|
2013-02-27 12:17:53 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Try get a p from pidle list.
|
|
|
|
// Sched must be locked.
|
|
|
|
static P*
|
|
|
|
pidleget(void)
|
|
|
|
{
|
|
|
|
P *p;
|
|
|
|
|
|
|
|
p = runtime·sched.pidle;
|
|
|
|
if(p) {
|
|
|
|
runtime·sched.pidle = p->link;
|
2013-03-01 12:57:05 -07:00
|
|
|
runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic
|
2013-02-27 12:17:53 -07:00
|
|
|
}
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:17:00 -07:00
|
|
|
// Try to put g on local runnable queue.
|
|
|
|
// If it's full, put onto global queue.
|
|
|
|
// Executed only by the owner P.
|
2013-02-22 21:48:02 -07:00
|
|
|
static void
|
|
|
|
runqput(P *p, G *gp)
|
|
|
|
{
|
2014-01-16 01:17:00 -07:00
|
|
|
uint32 h, t;
|
2013-02-22 21:48:02 -07:00
|
|
|
|
|
|
|
retry:
|
2014-01-16 01:17:00 -07:00
|
|
|
h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with consumers
|
2013-02-22 21:48:02 -07:00
|
|
|
t = p->runqtail;
|
2014-01-16 01:17:00 -07:00
|
|
|
if(t - h < nelem(p->runq)) {
|
|
|
|
p->runq[t%nelem(p->runq)] = gp;
|
|
|
|
runtime·atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
|
|
|
|
return;
|
2013-02-22 21:48:02 -07:00
|
|
|
}
|
2014-01-16 01:17:00 -07:00
|
|
|
if(runqputslow(p, gp, h, t))
|
|
|
|
return;
|
|
|
|
// the queue is not full, now the put above must suceed
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Put g and a batch of work from local runnable queue on global queue.
|
|
|
|
// Executed only by the owner P.
|
|
|
|
static bool
|
|
|
|
runqputslow(P *p, G *gp, uint32 h, uint32 t)
|
|
|
|
{
|
|
|
|
G *batch[nelem(p->runq)/2+1];
|
|
|
|
uint32 n, i;
|
|
|
|
|
|
|
|
// First, grab a batch from local queue.
|
|
|
|
n = t-h;
|
|
|
|
n = n/2;
|
|
|
|
if(n != nelem(p->runq)/2)
|
|
|
|
runtime·throw("runqputslow: queue is not full");
|
|
|
|
for(i=0; i<n; i++)
|
|
|
|
batch[i] = p->runq[(h+i)%nelem(p->runq)];
|
|
|
|
if(!runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits consume
|
|
|
|
return false;
|
|
|
|
batch[n] = gp;
|
|
|
|
// Link the goroutines.
|
|
|
|
for(i=0; i<n; i++)
|
|
|
|
batch[i]->schedlink = batch[i+1];
|
|
|
|
// Now put the batch on global queue.
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2014-01-16 01:17:00 -07:00
|
|
|
globrunqputbatch(batch[0], batch[n], n+1);
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2014-01-16 01:17:00 -07:00
|
|
|
return true;
|
2013-02-22 21:48:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get g from local runnable queue.
|
2014-01-16 01:17:00 -07:00
|
|
|
// Executed only by the owner P.
|
2013-02-22 21:48:02 -07:00
|
|
|
static G*
|
|
|
|
runqget(P *p)
|
|
|
|
{
|
|
|
|
G *gp;
|
2014-01-16 01:17:00 -07:00
|
|
|
uint32 t, h;
|
2013-02-22 21:48:02 -07:00
|
|
|
|
2014-01-16 01:17:00 -07:00
|
|
|
for(;;) {
|
|
|
|
h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
|
|
|
|
t = p->runqtail;
|
|
|
|
if(t == h)
|
|
|
|
return nil;
|
|
|
|
gp = p->runq[h%nelem(p->runq)];
|
|
|
|
if(runtime·cas(&p->runqhead, h, h+1)) // cas-release, commits consume
|
|
|
|
return gp;
|
2013-02-22 21:48:02 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:17:00 -07:00
|
|
|
// Grabs a batch of goroutines from local runnable queue.
|
|
|
|
// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
|
|
|
|
// Can be executed by any P.
|
|
|
|
static uint32
|
|
|
|
runqgrab(P *p, G **batch)
|
2013-02-22 21:48:02 -07:00
|
|
|
{
|
2014-01-16 01:17:00 -07:00
|
|
|
uint32 t, h, n, i;
|
2013-02-22 21:48:02 -07:00
|
|
|
|
2014-01-16 01:17:00 -07:00
|
|
|
for(;;) {
|
|
|
|
h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
|
|
|
|
t = runtime·atomicload(&p->runqtail); // load-acquire, synchronize with the producer
|
|
|
|
n = t-h;
|
|
|
|
n = n - n/2;
|
|
|
|
if(n == 0)
|
|
|
|
break;
|
|
|
|
if(n > nelem(p->runq)/2) // read inconsistent h and t
|
|
|
|
continue;
|
|
|
|
for(i=0; i<n; i++)
|
|
|
|
batch[i] = p->runq[(h+i)%nelem(p->runq)];
|
|
|
|
if(runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits consume
|
|
|
|
break;
|
2013-02-22 21:48:02 -07:00
|
|
|
}
|
2014-01-16 01:17:00 -07:00
|
|
|
return n;
|
2013-02-22 21:48:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Steal half of elements from local runnable queue of p2
|
|
|
|
// and put onto local runnable queue of p.
|
|
|
|
// Returns one of the stolen elements (or nil if failed).
|
|
|
|
static G*
|
|
|
|
runqsteal(P *p, P *p2)
|
|
|
|
{
|
2014-01-16 01:17:00 -07:00
|
|
|
G *gp;
|
|
|
|
G *batch[nelem(p->runq)/2];
|
|
|
|
uint32 t, h, n, i;
|
2013-02-22 21:48:02 -07:00
|
|
|
|
2014-01-16 01:17:00 -07:00
|
|
|
n = runqgrab(p2, batch);
|
|
|
|
if(n == 0)
|
2013-02-22 21:48:02 -07:00
|
|
|
return nil;
|
2014-01-16 01:17:00 -07:00
|
|
|
n--;
|
|
|
|
gp = batch[n];
|
|
|
|
if(n == 0)
|
|
|
|
return gp;
|
|
|
|
h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with consumers
|
2013-02-22 21:48:02 -07:00
|
|
|
t = p->runqtail;
|
2014-01-16 01:17:00 -07:00
|
|
|
if(t - h + n >= nelem(p->runq))
|
|
|
|
runtime·throw("runqsteal: runq overflow");
|
|
|
|
for(i=0; i<n; i++, t++)
|
|
|
|
p->runq[t%nelem(p->runq)] = batch[i];
|
|
|
|
runtime·atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
|
2013-02-22 21:48:02 -07:00
|
|
|
return gp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
runtime·testSchedLocalQueue(void)
|
|
|
|
{
|
|
|
|
P p;
|
2014-01-16 01:17:00 -07:00
|
|
|
G gs[nelem(p.runq)];
|
2013-02-22 21:48:02 -07:00
|
|
|
int32 i, j;
|
|
|
|
|
|
|
|
runtime·memclr((byte*)&p, sizeof(p));
|
|
|
|
|
|
|
|
for(i = 0; i < nelem(gs); i++) {
|
|
|
|
if(runqget(&p) != nil)
|
|
|
|
runtime·throw("runq is not empty initially");
|
|
|
|
for(j = 0; j < i; j++)
|
|
|
|
runqput(&p, &gs[i]);
|
|
|
|
for(j = 0; j < i; j++) {
|
|
|
|
if(runqget(&p) != &gs[i]) {
|
|
|
|
runtime·printf("bad element at iter %d/%d\n", i, j);
|
|
|
|
runtime·throw("bad element");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(runqget(&p) != nil)
|
|
|
|
runtime·throw("runq is not empty afterwards");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
runtime·testSchedLocalQueueSteal(void)
|
|
|
|
{
|
|
|
|
P p1, p2;
|
2014-01-16 01:17:00 -07:00
|
|
|
G gs[nelem(p1.runq)], *gp;
|
2013-02-22 21:48:02 -07:00
|
|
|
int32 i, j, s;
|
|
|
|
|
|
|
|
runtime·memclr((byte*)&p1, sizeof(p1));
|
|
|
|
runtime·memclr((byte*)&p2, sizeof(p2));
|
|
|
|
|
|
|
|
for(i = 0; i < nelem(gs); i++) {
|
|
|
|
for(j = 0; j < i; j++) {
|
|
|
|
gs[j].sig = 0;
|
|
|
|
runqput(&p1, &gs[j]);
|
|
|
|
}
|
|
|
|
gp = runqsteal(&p2, &p1);
|
|
|
|
s = 0;
|
|
|
|
if(gp) {
|
|
|
|
s++;
|
|
|
|
gp->sig++;
|
|
|
|
}
|
|
|
|
while(gp = runqget(&p2)) {
|
|
|
|
s++;
|
|
|
|
gp->sig++;
|
|
|
|
}
|
|
|
|
while(gp = runqget(&p1))
|
|
|
|
gp->sig++;
|
|
|
|
for(j = 0; j < i; j++) {
|
|
|
|
if(gs[j].sig != 1) {
|
|
|
|
runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
|
|
|
|
runtime·throw("bad element");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(s != i/2 && s != i/2+1) {
|
|
|
|
runtime·printf("bad steal %d, want %d or %d, iter %d\n",
|
|
|
|
s, i/2, i/2+1, i);
|
|
|
|
runtime·throw("bad steal");
|
|
|
|
}
|
|
|
|
}
|
2013-02-28 14:24:38 -07:00
|
|
|
}
|
|
|
|
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
extern void runtime·morestack(void);
|
2014-02-13 22:20:51 -07:00
|
|
|
uintptr runtime·externalthreadhandlerp;
|
runtime: record proper goroutine state during stack split
Until now, the goroutine state has been scattered during the
execution of newstack and oldstack. It's all there, and those routines
know how to get back to a working goroutine, but other pieces of
the system, like stack traces, do not. If something does interrupt
the newstack or oldstack execution, the rest of the system can't
understand the goroutine. For example, if newstack decides there
is an overflow and calls throw, the stack tracer wouldn't dump the
goroutine correctly.
For newstack to save a useful state snapshot, it needs to be able
to rewind the PC in the function that triggered the split back to
the beginning of the function. (The PC is a few instructions in, just
after the call to morestack.) To make that possible, we change the
prologues to insert a jmp back to the beginning of the function
after the call to morestack. That is, the prologue used to be roughly:
TEXT myfunc
check for split
jmpcond nosplit
call morestack
nosplit:
sub $xxx, sp
Now an extra instruction is inserted after the call:
TEXT myfunc
start:
check for split
jmpcond nosplit
call morestack
jmp start
nosplit:
sub $xxx, sp
The jmp is not executed directly. It is decoded and simulated by
runtime.rewindmorestack to discover the beginning of the function,
and then the call to morestack returns directly to the start label
instead of to the jump instruction. So logically the jmp is still
executed, just not by the cpu.
The prologue thus repeats in the case of a function that needs a
stack split, but against the cost of the split itself, the extra few
instructions are noise. The repeated prologue has the nice effect of
making a stack split double-check that the new stack is big enough:
if morestack happens to return on a too-small stack, we'll now notice
before corruption happens.
The ability for newstack to rewind to the beginning of the function
should help preemption too. If newstack decides that it was called
for preemption instead of a stack split, it now has the goroutine state
correctly paused if rescheduling is needed, and when the goroutine
can run again, it can return to the start label on its original stack
and re-execute the split check.
Here is an example of a split stack overflow showing the full
trace, without any special cases in the stack printer.
(This one was triggered by making the split check incorrect.)
runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0]
morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0}
sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700}
runtime: split stack overflow: 0x6aebd0 < 0x6b0000
fatal error: runtime: split stack overflow
goroutine 1 [stack split]:
runtime.mallocgc(0x290, 0x100000000, 0x1)
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8
runtime.new()
/Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08
go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...)
/Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0
main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8
main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...)
/Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98
main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0)
/Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80
----- stack segment boundary -----
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...)
/Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0
main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...)
/Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658
main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...)
/Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68
----- stack segment boundary -----
main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2)
/Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0
main.main()
/Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8
And here is a seg fault during oldstack:
SIGSEGV: segmentation violation
PC=0x1b2a6
runtime.oldstack()
/Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76
runtime.lessstack()
/Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22
goroutine 1 [stack unsplit]:
fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8
fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0
fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...)
/Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40
flag.(*stringValue).String(0x2102c9210, 0x1, 0x0)
/Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0
flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0
flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8
flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38
flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...)
/Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80
testing.init()
/Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0
strings_test.init()
/Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70
main.init()
strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78
runtime.main()
/Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8
goroutine 2 [runnable]:
runtime.MHeap_Scavenger()
/Users/rsc/g/go/src/pkg/runtime/mheap.c:438
runtime.goexit()
/Users/rsc/g/go/src/pkg/runtime/proc.c:1269
created by runtime.main
/Users/rsc/g/go/src/pkg/runtime/proc.c:166
rax 0x23ccc0
rbx 0x23ccc0
rcx 0x0
rdx 0x38
rdi 0x2102c0170
rsi 0x221032cfe0
rbp 0x221032cfa0
rsp 0x7fff5fbff5b0
r8 0x2102c0120
r9 0x221032cfa0
r10 0x221032c000
r11 0x104ce8
r12 0xe5c80
r13 0x1be82baac718
r14 0x13091135f7d69200
r15 0x0
rip 0x1b2a6
rflags 0x10246
cs 0x2b
fs 0x0
gs 0x0
Fixes #5723.
R=r, dvyukov, go.peter.90, dave, iant
CC=golang-dev
https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
|
|
|
|
2013-07-17 10:47:18 -06:00
|
|
|
// Does f mark the top of a goroutine stack?
|
|
|
|
bool
|
|
|
|
runtime·topofstack(Func *f)
|
|
|
|
{
|
|
|
|
return f->entry == (uintptr)runtime·goexit ||
|
|
|
|
f->entry == (uintptr)runtime·mstart ||
|
|
|
|
f->entry == (uintptr)runtime·mcall ||
|
2014-07-30 10:01:52 -06:00
|
|
|
f->entry == (uintptr)runtime·onM ||
|
2013-07-18 14:53:45 -06:00
|
|
|
f->entry == (uintptr)runtime·morestack ||
|
|
|
|
f->entry == (uintptr)runtime·lessstack ||
|
2014-02-13 22:20:51 -07:00
|
|
|
f->entry == (uintptr)_rt0_go ||
|
|
|
|
(runtime·externalthreadhandlerp != 0 && f->entry == runtime·externalthreadhandlerp);
|
2013-07-17 10:47:18 -06:00
|
|
|
}
|
2013-08-16 20:25:26 -06:00
|
|
|
|
2014-08-24 21:27:00 -06:00
|
|
|
void
|
|
|
|
runtime·setmaxthreads_m(void)
|
2013-08-16 20:25:26 -06:00
|
|
|
{
|
2014-08-24 21:27:00 -06:00
|
|
|
int32 in;
|
runtime: use goc2c as much as possible
Package runtime's C functions written to be called from Go
started out written in C using carefully constructed argument
lists and the FLUSH macro to write a result back to memory.
For some functions, the appropriate parameter list ended up
being architecture-dependent due to differences in alignment,
so we added 'goc2c', which takes a .goc file containing Go func
declarations but C bodies, rewrites the Go func declaration to
equivalent C declarations for the target architecture, adds the
needed FLUSH statements, and writes out an equivalent C file.
That C file is compiled as part of package runtime.
Native Client's x86-64 support introduces the most complex
alignment rules yet, breaking many functions that could until
now be portably written in C. Using goc2c for those avoids the
breakage.
Separately, Keith's work on emitting stack information from
the C compiler would require the hand-written functions
to add #pragmas specifying how many arguments are result
parameters. Using goc2c for those avoids maintaining #pragmas.
For both reasons, use goc2c for as many Go-called C functions
as possible.
This CL is a replay of the bulk of CL 15400047 and CL 15790043,
both of which were reviewed as part of the NaCl port and are
checked in to the NaCl branch. This CL is part of bringing the
NaCl code into the main tree.
No new code here, just reformatting and occasional movement
into .h files.
LGTM=r
R=dave, alex.brainman, r
CC=golang-codereviews
https://golang.org/cl/65220044
2014-02-20 13:58:47 -07:00
|
|
|
int32 out;
|
|
|
|
|
2014-08-24 21:27:00 -06:00
|
|
|
in = g->m->scalararg[0];
|
|
|
|
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·lock(&runtime·sched.lock);
|
2013-08-16 20:25:26 -06:00
|
|
|
out = runtime·sched.maxmcount;
|
|
|
|
runtime·sched.maxmcount = in;
|
|
|
|
checkmcount();
|
2014-08-07 07:00:02 -06:00
|
|
|
runtime·unlock(&runtime·sched.lock);
|
2014-08-24 21:27:00 -06:00
|
|
|
|
|
|
|
g->m->scalararg[0] = out;
|
2013-08-16 20:25:26 -06:00
|
|
|
}
|
2013-09-16 18:26:10 -06:00
|
|
|
|
|
|
|
static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
|
|
|
|
|
|
|
|
static bool
|
|
|
|
haveexperiment(int8 *name)
|
|
|
|
{
|
|
|
|
int32 i, j;
|
|
|
|
|
|
|
|
for(i=0; i<sizeof(experiment); i++) {
|
|
|
|
if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
|
|
|
|
for(j=0; name[j]; j++)
|
|
|
|
if(experiment[i+j] != name[j])
|
|
|
|
goto nomatch;
|
|
|
|
if(experiment[i+j] != '\0' && experiment[i+j] != ',')
|
|
|
|
goto nomatch;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
nomatch:;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2014-08-25 23:01:52 -06:00
|
|
|
|
|
|
|
#pragma textflag NOSPLIT
|
|
|
|
void
|
|
|
|
sync·runtime_procPin(intptr p)
|
|
|
|
{
|
|
|
|
M *mp;
|
|
|
|
|
|
|
|
mp = g->m;
|
|
|
|
// Disable preemption.
|
|
|
|
mp->locks++;
|
|
|
|
p = mp->p->id;
|
|
|
|
FLUSH(&p);
|
|
|
|
}
|
|
|
|
|
|
|
|
#pragma textflag NOSPLIT
|
|
|
|
void
|
|
|
|
sync·runtime_procUnpin()
|
|
|
|
{
|
|
|
|
g->m->locks--;
|
|
|
|
}
|