1
0
mirror of https://github.com/golang/go synced 2024-10-04 22:31:22 -06:00
go/src/pkg/runtime/runtime.h

1083 lines
29 KiB
C
Raw Normal View History

2008-06-05 20:38:39 -06:00
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
* basic types
*/
typedef signed char int8;
typedef unsigned char uint8;
typedef signed short int16;
typedef unsigned short uint16;
typedef signed int int32;
typedef unsigned int uint32;
typedef signed long long int int64;
typedef unsigned long long int uint64;
typedef float float32;
typedef double float64;
#ifdef _64BIT
typedef uint64 uintptr;
typedef int64 intptr;
typedef int64 intgo; // Go's int
typedef uint64 uintgo; // Go's uint
#else
typedef uint32 uintptr;
typedef int32 intptr;
typedef int32 intgo; // Go's int
typedef uint32 uintgo; // Go's uint
#endif
2008-06-05 20:38:39 -06:00
/*
* get rid of C types
* the / / / forces a syntax error immediately,
* which will show "last name: XXunsigned".
2008-06-05 20:38:39 -06:00
*/
#define unsigned XXunsigned / / /
#define signed XXsigned / / /
#define char XXchar / / /
#define short XXshort / / /
#define int XXint / / /
#define long XXlong / / /
#define float XXfloat / / /
#define double XXdouble / / /
2008-06-05 20:38:39 -06:00
/*
* defined types
*/
typedef uint8 bool;
typedef uint8 byte;
typedef struct Func Func;
2008-07-13 15:29:46 -06:00
typedef struct G G;
typedef struct Gobuf Gobuf;
typedef struct Lock Lock;
typedef struct M M;
typedef struct P P;
typedef struct Note Note;
typedef struct Slice Slice;
typedef struct Stktop Stktop;
typedef struct String String;
typedef struct FuncVal FuncVal;
typedef struct SigTab SigTab;
typedef struct MCache MCache;
typedef struct FixAlloc FixAlloc;
typedef struct Iface Iface;
typedef struct Itab Itab;
typedef struct InterfaceType InterfaceType;
typedef struct Eface Eface;
typedef struct Type Type;
typedef struct PtrType PtrType;
typedef struct ChanType ChanType;
typedef struct MapType MapType;
2009-01-27 13:03:53 -07:00
typedef struct Defer Defer;
typedef struct Panic Panic;
typedef struct Hmap Hmap;
typedef struct Hchan Hchan;
typedef struct Complex64 Complex64;
typedef struct Complex128 Complex128;
typedef struct LibCall LibCall;
typedef struct SEH SEH;
typedef struct WinCallbackContext WinCallbackContext;
typedef struct Timers Timers;
typedef struct Timer Timer;
typedef struct GCStats GCStats;
typedef struct LFNode LFNode;
typedef struct ParFor ParFor;
typedef struct ParForThread ParForThread;
typedef struct CgoMal CgoMal;
typedef struct PollDesc PollDesc;
typedef struct DebugVars DebugVars;
2008-07-13 15:29:46 -06:00
/*
* Per-CPU declaration.
*
* "extern register" is a special storage class implemented by 6c, 8c, etc.
* On the ARM, it is an actual register; elsewhere it is a slot in thread-
* local storage indexed by a segment register. See zasmhdr in
* src/cmd/dist/buildruntime.c for details, and be aware that the linker may
* make further OS-specific changes to the compiler's output. For example,
* 6l/linux rewrites 0(GS) as -16(FS).
*
* Every C file linked into a Go program must include runtime.h so that the
* C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated
* registers. The Go compiler (6g, 8g, etc.) knows to avoid them.
2008-07-13 15:29:46 -06:00
*/
extern register G* g;
extern register M* m;
2008-07-13 15:29:46 -06:00
/*
* defined constants
*/
enum
{
// G status
//
// If you add to this list, add to the list
// of "okay during garbage collection" status
// in mgc0.c too.
2008-07-13 15:29:46 -06:00
Gidle,
Grunnable,
Grunning,
Gsyscall,
2008-07-14 15:33:39 -06:00
Gwaiting,
Gmoribund_unused, // currently unused, but hardcoded in gdb scripts
2008-07-13 15:29:46 -06:00
Gdead,
};
enum
{
// P status
Pidle,
Prunning,
Psyscall,
Pgcstop,
Pdead,
};
enum
2008-07-13 15:29:46 -06:00
{
true = 1,
false = 0,
};
enum
{
PtrSize = sizeof(void*),
};
enum
{
// Per-M stack segment cache size.
StackCacheSize = 32,
// Global <-> per-M stack segment cache transfer batch size.
StackCacheBatch = 16,
};
2008-07-13 15:29:46 -06:00
/*
* structures
*/
struct Lock
{
// Futex-based impl treats it as uint32 key,
// while sema-based impl as M* waitm.
// Used to be a union, but unions break precise GC.
uintptr key;
};
struct Note
{
// Futex-based impl treats it as uint32 key,
// while sema-based impl as M* waitm.
// Used to be a union, but unions break precise GC.
uintptr key;
};
2008-07-13 15:29:46 -06:00
struct String
2008-06-05 20:38:39 -06:00
{
byte* str;
intgo len;
2008-07-13 15:29:46 -06:00
};
struct FuncVal
{
void (*fn)(void);
// variable-size, fn-specific data here
};
struct Iface
{
Itab* tab;
void* data;
};
struct Eface
{
Type* type;
void* data;
};
struct Complex64
{
float32 real;
float32 imag;
};
struct Complex128
{
float64 real;
float64 imag;
};
2008-08-27 18:28:30 -06:00
struct Slice
2008-08-27 18:28:30 -06:00
{ // must not move anything
byte* array; // actual data
uintgo len; // number of elements
uintgo cap; // allocated number of elements
2008-08-27 18:28:30 -06:00
};
struct Gobuf
{
// The offsets of sp, pc, and g are known to (hard-coded in) libmach.
uintptr sp;
uintptr pc;
G* g;
uintptr ret;
void* ctxt;
uintptr lr;
};
runtime: make GC stats per-M This is factored out part of: https://golang.org/cl/5279048/ (Parallel GC) benchmark old ns/op new ns/op delta garbage.BenchmarkParser 3999106750 3975026500 -0.60% garbage.BenchmarkParser-2 3720553750 3719196500 -0.04% garbage.BenchmarkParser-4 3502857000 3474980500 -0.80% garbage.BenchmarkParser-8 3375448000 3341310500 -1.01% garbage.BenchmarkParserLastPause 329401000 324097000 -1.61% garbage.BenchmarkParserLastPause-2 208953000 214222000 +2.52% garbage.BenchmarkParserLastPause-4 110933000 111656000 +0.65% garbage.BenchmarkParserLastPause-8 71969000 78230000 +8.70% garbage.BenchmarkParserPause 230808842 197237400 -14.55% garbage.BenchmarkParserPause-2 123674365 125197595 +1.23% garbage.BenchmarkParserPause-4 80518525 85710333 +6.45% garbage.BenchmarkParserPause-8 58310243 56940512 -2.35% garbage.BenchmarkTree2 31471700 31289400 -0.58% garbage.BenchmarkTree2-2 21536800 21086300 -2.09% garbage.BenchmarkTree2-4 11074700 10880000 -1.76% garbage.BenchmarkTree2-8 7568600 7351400 -2.87% garbage.BenchmarkTree2LastPause 314664000 312840000 -0.58% garbage.BenchmarkTree2LastPause-2 215319000 210815000 -2.09% garbage.BenchmarkTree2LastPause-4 110698000 108751000 -1.76% garbage.BenchmarkTree2LastPause-8 75635000 73463000 -2.87% garbage.BenchmarkTree2Pause 174280857 173147571 -0.65% garbage.BenchmarkTree2Pause-2 131332714 129665761 -1.27% garbage.BenchmarkTree2Pause-4 93803095 93422904 -0.41% garbage.BenchmarkTree2Pause-8 86242333 85146761 -1.27% R=rsc CC=golang-dev https://golang.org/cl/5987045
2012-04-05 10:48:28 -06:00
struct GCStats
{
// the struct must consist of only uint64's,
// because it is casted to uint64[].
uint64 nhandoff;
uint64 nhandoffcnt;
uint64 nprocyield;
uint64 nosyield;
uint64 nsleep;
};
struct LibCall
{
void (*fn)(void*);
uintptr n; // number of parameters
void* args; // parameters
uintptr r1; // return values
uintptr r2;
uintptr err; // error number
};
struct SEH
{
void* prev;
void* handler;
};
// describes how to handle callback
struct WinCallbackContext
{
void* gobody; // Go function to call
uintptr argsize; // callback arguments size (in bytes)
uintptr restorestack; // adjust stack on return by (in bytes) (386 only)
bool cleanstack;
};
struct G
{
// stackguard0 can be set to StackPreempt as opposed to stackguard
uintptr stackguard0; // cannot move - also known to linker, libmach, runtime/cgo
uintptr stackbase; // cannot move - also known to libmach, runtime/cgo
runtime, cmd/gc, cmd/ld: ignore method wrappers in recover Bug #1: Issue 5406 identified an interesting case: defer iface.M() may end up calling a wrapper that copies an indirect receiver from the iface value and then calls the real M method. That's two calls down, not just one, and so recover() == nil always in the real M method, even during a panic. [For the purposes of this entire discussion, a wrapper's implementation is a function containing an ordinary call, not the optimized tail call form that is somtimes possible. The tail call does not create a second frame, so it is already handled correctly.] Fix this bug by introducing g->panicwrap, which counts the number of bytes on current stack segment that are due to wrapper calls that should not count against the recover check. All wrapper functions must now adjust g->panicwrap up on entry and back down on exit. This adds slightly to their expense; on the x86 it is a single instruction at entry and exit; on the ARM it is three. However, the alternative is to make a call to recover depend on being able to walk the stack, which I very much want to avoid. We have enough problems walking the stack for garbage collection and profiling. Also, if performance is critical in a specific case, it is already faster to use a pointer receiver and avoid this kind of wrapper entirely. Bug #2: The old code, which did not consider the possibility of two calls, already contained a check to see if the call had split its stack and so the panic-created segment was one behind the current segment. In the wrapper case, both of the two calls might split their stacks, so the panic-created segment can be two behind the current segment. Fix this by propagating the Stktop.panic flag forward during stack splits instead of looking backward during recover. Fixes #5406. R=golang-dev, iant CC=golang-dev https://golang.org/cl/13367052
2013-09-12 12:00:16 -06:00
uint32 panicwrap; // cannot move - also known to linker
uint32 selgen; // valid sudog pointer
Defer* defer;
Panic* panic;
Gobuf sched;
uintptr syscallstack; // if status==Gsyscall, syscallstack = stackbase to use during gc
uintptr syscallsp; // if status==Gsyscall, syscallsp = sched.sp to use during gc
uintptr syscallpc; // if status==Gsyscall, syscallpc = sched.pc to use during gc
uintptr syscallguard; // if status==Gsyscall, syscallguard = stackguard to use during gc
uintptr stackguard; // same as stackguard0, but not set to StackPreempt
uintptr stack0;
uintptr stacksize;
void* param; // passed parameter on wakeup
int16 status;
int64 goid;
int64 waitsince; // approx time when the G become blocked
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
int8* waitreason; // if status==Gwaiting
G* schedlink;
bool ispanic;
bool issystem; // do not output in stack dump
bool isbackground; // ignore in deadlock detector
bool preempt; // preemption signal, duplicates stackguard0 = StackPreempt
int8 raceignore; // ignore race detection events
M* m; // for debuggers, but offset not hard-coded
M* lockedm;
int32 sig;
int32 writenbuf;
byte* writebuf;
uintptr sigcode0;
uintptr sigcode1;
uintptr sigpc;
uintptr gopc; // pc of go statement that created this goroutine
uintptr racectx;
uintptr end[];
};
struct M
{
G* g0; // goroutine with scheduling stack
void* moreargp; // argument pointer for more stack
Gobuf morebuf; // gobuf arg to morestack
// Fields not known to debuggers.
uint32 moreframesize; // size arguments to morestack
uint32 moreargsize;
uintptr cret; // return value from C
uint64 procid; // for debuggers, but offset not hard-coded
G* gsignal; // signal-handling G
uintptr tls[4]; // thread-local storage (for x86 extern register)
void (*mstartfn)(void);
G* curg; // current running goroutine
runtime: record proper goroutine state during stack split Until now, the goroutine state has been scattered during the execution of newstack and oldstack. It's all there, and those routines know how to get back to a working goroutine, but other pieces of the system, like stack traces, do not. If something does interrupt the newstack or oldstack execution, the rest of the system can't understand the goroutine. For example, if newstack decides there is an overflow and calls throw, the stack tracer wouldn't dump the goroutine correctly. For newstack to save a useful state snapshot, it needs to be able to rewind the PC in the function that triggered the split back to the beginning of the function. (The PC is a few instructions in, just after the call to morestack.) To make that possible, we change the prologues to insert a jmp back to the beginning of the function after the call to morestack. That is, the prologue used to be roughly: TEXT myfunc check for split jmpcond nosplit call morestack nosplit: sub $xxx, sp Now an extra instruction is inserted after the call: TEXT myfunc start: check for split jmpcond nosplit call morestack jmp start nosplit: sub $xxx, sp The jmp is not executed directly. It is decoded and simulated by runtime.rewindmorestack to discover the beginning of the function, and then the call to morestack returns directly to the start label instead of to the jump instruction. So logically the jmp is still executed, just not by the cpu. The prologue thus repeats in the case of a function that needs a stack split, but against the cost of the split itself, the extra few instructions are noise. The repeated prologue has the nice effect of making a stack split double-check that the new stack is big enough: if morestack happens to return on a too-small stack, we'll now notice before corruption happens. The ability for newstack to rewind to the beginning of the function should help preemption too. If newstack decides that it was called for preemption instead of a stack split, it now has the goroutine state correctly paused if rescheduling is needed, and when the goroutine can run again, it can return to the start label on its original stack and re-execute the split check. Here is an example of a split stack overflow showing the full trace, without any special cases in the stack printer. (This one was triggered by making the split check incorrect.) runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0] morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0} sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700} runtime: split stack overflow: 0x6aebd0 < 0x6b0000 fatal error: runtime: split stack overflow goroutine 1 [stack split]: runtime.mallocgc(0x290, 0x100000000, 0x1) /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8 runtime.new() /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08 go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...) /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0 main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8 main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98 main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0) /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80 ----- stack segment boundary ----- main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...) /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0 main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...) /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658 main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...) /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68 ----- stack segment boundary ----- main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2) /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0 main.main() /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8 And here is a seg fault during oldstack: SIGSEGV: segmentation violation PC=0x1b2a6 runtime.oldstack() /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76 runtime.lessstack() /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22 goroutine 1 [stack unsplit]: fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8 fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0 fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40 flag.(*stringValue).String(0x2102c9210, 0x1, 0x0) /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0 flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0 flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8 flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38 flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80 testing.init() /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0 strings_test.init() /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70 main.init() strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8 goroutine 2 [runnable]: runtime.MHeap_Scavenger() /Users/rsc/g/go/src/pkg/runtime/mheap.c:438 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 created by runtime.main /Users/rsc/g/go/src/pkg/runtime/proc.c:166 rax 0x23ccc0 rbx 0x23ccc0 rcx 0x0 rdx 0x38 rdi 0x2102c0170 rsi 0x221032cfe0 rbp 0x221032cfa0 rsp 0x7fff5fbff5b0 r8 0x2102c0120 r9 0x221032cfa0 r10 0x221032c000 r11 0x104ce8 r12 0xe5c80 r13 0x1be82baac718 r14 0x13091135f7d69200 r15 0x0 rip 0x1b2a6 rflags 0x10246 cs 0x2b fs 0x0 gs 0x0 Fixes #5723. R=r, dvyukov, go.peter.90, dave, iant CC=golang-dev https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
G* caughtsig; // goroutine running during fatal signal
P* p; // attached P for executing Go code (nil if not executing Go code)
P* nextp;
int32 id;
int32 mallocing;
int32 throwing;
int32 gcing;
int32 locks;
int32 dying;
int32 profilehz;
int32 helpgc;
bool spinning;
uint32 fastrand;
uint64 ncgocall; // number of cgo calls in total
int32 ncgo; // number of cgo calls currently in progress
CgoMal* cgomal;
Note park;
M* alllink; // on allm
M* schedlink;
uint32 machport; // Return address for Mach IPC (OS X)
MCache* mcache;
int32 stackinuse;
uint32 stackcachepos;
uint32 stackcachecnt;
void* stackcache[StackCacheSize];
G* lockedg;
uintptr createstack[32];// Stack that created this thread.
uint32 freglo[16]; // D[i] lsb and F[i]
uint32 freghi[16]; // D[i] msb and F[i+16]
uint32 fflag; // floating point compare flags
uint32 locked; // tracking for LockOSThread
M* nextwaitm; // next M waiting for lock
uintptr waitsema; // semaphore for parking on locks
uint32 waitsemacount;
uint32 waitsemalock;
runtime: make GC stats per-M This is factored out part of: https://golang.org/cl/5279048/ (Parallel GC) benchmark old ns/op new ns/op delta garbage.BenchmarkParser 3999106750 3975026500 -0.60% garbage.BenchmarkParser-2 3720553750 3719196500 -0.04% garbage.BenchmarkParser-4 3502857000 3474980500 -0.80% garbage.BenchmarkParser-8 3375448000 3341310500 -1.01% garbage.BenchmarkParserLastPause 329401000 324097000 -1.61% garbage.BenchmarkParserLastPause-2 208953000 214222000 +2.52% garbage.BenchmarkParserLastPause-4 110933000 111656000 +0.65% garbage.BenchmarkParserLastPause-8 71969000 78230000 +8.70% garbage.BenchmarkParserPause 230808842 197237400 -14.55% garbage.BenchmarkParserPause-2 123674365 125197595 +1.23% garbage.BenchmarkParserPause-4 80518525 85710333 +6.45% garbage.BenchmarkParserPause-8 58310243 56940512 -2.35% garbage.BenchmarkTree2 31471700 31289400 -0.58% garbage.BenchmarkTree2-2 21536800 21086300 -2.09% garbage.BenchmarkTree2-4 11074700 10880000 -1.76% garbage.BenchmarkTree2-8 7568600 7351400 -2.87% garbage.BenchmarkTree2LastPause 314664000 312840000 -0.58% garbage.BenchmarkTree2LastPause-2 215319000 210815000 -2.09% garbage.BenchmarkTree2LastPause-4 110698000 108751000 -1.76% garbage.BenchmarkTree2LastPause-8 75635000 73463000 -2.87% garbage.BenchmarkTree2Pause 174280857 173147571 -0.65% garbage.BenchmarkTree2Pause-2 131332714 129665761 -1.27% garbage.BenchmarkTree2Pause-4 93803095 93422904 -0.41% garbage.BenchmarkTree2Pause-8 86242333 85146761 -1.27% R=rsc CC=golang-dev https://golang.org/cl/5987045
2012-04-05 10:48:28 -06:00
GCStats gcstats;
bool racecall;
bool needextram;
void (*waitunlockf)(Lock*);
void* waitlock;
uintptr settype_buf[1024];
uintptr settype_bufsize;
#ifdef GOOS_windows
void* thread; // thread handle
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
LibCall libcall;
#endif
#ifdef GOOS_solaris
int32* perrno; // pointer to TLS errno
// these are here because they are too large to be on the stack
// of low-level NOSPLIT functions.
LibCall libcall;
struct {
int64 tv_sec;
int64 tv_nsec;
} ts;
struct {
uintptr v[6];
} scratch;
#endif
#ifdef GOOS_plan9
int8* notesig;
byte* errstr;
#endif
SEH* seh;
uintptr end[];
};
runtime: stack split + garbage collection bug The g->sched.sp saved stack pointer and the g->stackbase and g->stackguard stack bounds can change even while "the world is stopped", because a goroutine has to call functions (and therefore might split its stack) when exiting a system call to check whether the world is stopped (and if so, wait until the world continues). That means the garbage collector cannot access those values safely (without a race) for goroutines executing system calls. Instead, save a consistent triple in g->gcsp, g->gcstack, g->gcguard during entersyscall and have the garbage collector refer to those. The old code was occasionally seeing (because of the race) an sp and stk that did not correspond to each other, so that stk - sp was not the number of stack bytes following sp. In that case, if sp < stk then the call scanblock(sp, stk - sp) scanned too many bytes (anything between the two pointers, which pointed into different allocation blocks). If sp > stk then stk - sp wrapped around. On 32-bit, stk - sp is a uintptr (uint32) converted to int64 in the call to scanblock, so a large (~4G) but positive number. Scanblock would try to scan that many bytes and eventually fault accessing unmapped memory. On 64-bit, stk - sp is a uintptr (uint64) promoted to int64 in the call to scanblock, so a negative number. Scanblock would not scan anything, possibly causing in-use blocks to be freed. In short, 32-bit platforms would have seen either ineffective garbage collection or crashes during garbage collection, while 64-bit platforms would have seen either ineffective or incorrect garbage collection. You can see the invalid arguments to scanblock in the stack traces in issue 1620. Fixes #1620. Fixes #1746. R=iant, r CC=golang-dev https://golang.org/cl/4437075
2011-04-27 21:21:12 -06:00
struct P
{
Lock;
int32 id;
uint32 status; // one of Pidle/Prunning/...
P* link;
uint32 schedtick; // incremented on every scheduler call
uint32 syscalltick; // incremented on every system call
M* m; // back-link to associated M (nil if idle)
MCache* mcache;
Defer* deferpool[5]; // pool of available Defer structs of different sizes (see panic.c)
// Queue of runnable goroutines.
uint32 runqhead;
uint32 runqtail;
G* runq[256];
// Available G's (status == Gdead)
G* gfree;
int32 gfreecnt;
byte pad[64];
};
// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
// External locks are not recursive; a second lock is silently ignored.
// The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread
// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
// goroutine is holding the lock during the initialization phase.
enum
{
LockExternal = 1,
LockInternal = 2,
};
2008-07-14 15:33:39 -06:00
struct Stktop
2008-07-13 15:29:46 -06:00
{
// The offsets of these fields are known to (hard-coded in) libmach.
uintptr stackguard;
uintptr stackbase;
Gobuf gobuf;
uint32 argsize;
runtime, cmd/gc, cmd/ld: ignore method wrappers in recover Bug #1: Issue 5406 identified an interesting case: defer iface.M() may end up calling a wrapper that copies an indirect receiver from the iface value and then calls the real M method. That's two calls down, not just one, and so recover() == nil always in the real M method, even during a panic. [For the purposes of this entire discussion, a wrapper's implementation is a function containing an ordinary call, not the optimized tail call form that is somtimes possible. The tail call does not create a second frame, so it is already handled correctly.] Fix this bug by introducing g->panicwrap, which counts the number of bytes on current stack segment that are due to wrapper calls that should not count against the recover check. All wrapper functions must now adjust g->panicwrap up on entry and back down on exit. This adds slightly to their expense; on the x86 it is a single instruction at entry and exit; on the ARM it is three. However, the alternative is to make a call to recover depend on being able to walk the stack, which I very much want to avoid. We have enough problems walking the stack for garbage collection and profiling. Also, if performance is critical in a specific case, it is already faster to use a pointer receiver and avoid this kind of wrapper entirely. Bug #2: The old code, which did not consider the possibility of two calls, already contained a check to see if the call had split its stack and so the panic-created segment was one behind the current segment. In the wrapper case, both of the two calls might split their stacks, so the panic-created segment can be two behind the current segment. Fix this by propagating the Stktop.panic flag forward during stack splits instead of looking backward during recover. Fixes #5406. R=golang-dev, iant CC=golang-dev https://golang.org/cl/13367052
2013-09-12 12:00:16 -06:00
uint32 panicwrap;
uint8* argp; // pointer to arguments in old frame
uintptr free; // if free>0, call stackfree using free as size
bool panic; // is this frame the top of a panic?
};
2008-07-13 15:29:46 -06:00
struct SigTab
2008-06-05 20:38:39 -06:00
{
int32 flags;
2008-07-13 15:29:46 -06:00
int8 *name;
2008-06-05 20:38:39 -06:00
};
enum
{
SigNotify = 1<<0, // let signal.Notify have signal, even if from kernel
SigKill = 1<<1, // if signal.Notify doesn't take it, exit quietly
SigThrow = 1<<2, // if signal.Notify doesn't take it, exit loudly
SigPanic = 1<<3, // if the signal is from the kernel, panic
SigDefault = 1<<4, // if the signal isn't explicitly requested, don't monitor it
SigHandling = 1<<5, // our signal handler is registered
SigIgnored = 1<<6, // the signal was ignored before we registered for it
};
2008-06-05 20:38:39 -06:00
// Layout of in-memory per-function information prepared by linker
// See http://golang.org/s/go12symtab.
// Keep in sync with linker and with ../../libmach/sym.c
// and with package debug/gosym.
struct Func
{
uintptr entry; // start pc
int32 nameoff;// function name
int32 args; // in/out args size
int32 frame; // legacy frame size; use pcsp if possible
int32 pcsp;
int32 pcfile;
int32 pcln;
int32 npcdata;
int32 nfuncdata;
};
// layout of Itab known to compilers
// allocated in non-garbage-collected memory
struct Itab
{
InterfaceType* inter;
Type* type;
Itab* link;
int32 bad;
int32 unused;
void (*fun[])(void);
};
#ifdef GOOS_windows
enum {
Windows = 1
};
#else
enum {
Windows = 0
};
#endif
#ifdef GOOS_solaris
enum {
Solaris = 1
};
#else
enum {
Solaris = 0
};
#endif
struct Timers
{
Lock;
G *timerproc;
bool sleeping;
bool rescheduling;
Note waitnote;
Timer **t;
int32 len;
int32 cap;
};
// Package time knows the layout of this structure.
// If this struct changes, adjust ../time/sleep.go:/runtimeTimer.
struct Timer
{
int32 i; // heap index
// Timer wakes up at when, and then at when+period, ... (period > 0 only)
// each time calling f(now, arg) in the timer goroutine, so f must be
// a well-behaved function and not block.
int64 when;
int64 period;
FuncVal *fv;
Eface arg;
};
// Lock-free stack node.
struct LFNode
{
LFNode *next;
uintptr pushcnt;
};
// Parallel for descriptor.
struct ParFor
{
void (*body)(ParFor*, uint32); // executed for each element
uint32 done; // number of idle threads
uint32 nthr; // total number of threads
uint32 nthrmax; // maximum number of threads
uint32 thrseq; // thread id sequencer
uint32 cnt; // iteration space [0, cnt)
void *ctx; // arbitrary user context
bool wait; // if true, wait while all threads finish processing,
// otherwise parfor may return while other threads are still working
ParForThread *thr; // array of thread descriptors
uint32 pad; // to align ParForThread.pos for 64-bit atomic operations
// stats
uint64 nsteal;
uint64 nstealcnt;
uint64 nprocyield;
uint64 nosyield;
uint64 nsleep;
};
// Track memory allocated by code not written in Go during a cgo call,
// so that the garbage collector can see them.
struct CgoMal
{
CgoMal *next;
void *alloc;
};
// Holds variables parsed from GODEBUG env var.
struct DebugVars
{
int32 allocfreetrace;
int32 efence;
int32 gctrace;
int32 scheddetail;
int32 schedtrace;
};
extern bool runtime·precisestack;
2008-06-05 20:38:39 -06:00
/*
* defined macros
* you need super-gopher-guru privilege
2008-06-05 20:38:39 -06:00
* to add this list.
*/
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define nil ((void*)0)
#define offsetof(s,m) (uint32)(&(((s*)0)->m))
#define ROUND(x, n) (((x)+(n)-1)&~((n)-1)) /* all-caps to mark as macro: it evaluates n twice */
2008-06-05 20:38:39 -06:00
/*
* known to compiler
*/
enum {
Structrnd = sizeof(uintptr)
};
/*
* type algorithms - known to compiler
*/
enum
{
AMEM,
AMEM0,
AMEM8,
AMEM16,
AMEM32,
AMEM64,
AMEM128,
ANOEQ,
ANOEQ0,
ANOEQ8,
ANOEQ16,
ANOEQ32,
ANOEQ64,
ANOEQ128,
ASTRING,
AINTER,
ANILINTER,
ASLICE,
AFLOAT32,
AFLOAT64,
ACPLX64,
ACPLX128,
Amax
};
typedef struct Alg Alg;
struct Alg
{
void (*hash)(uintptr*, uintptr, void*);
void (*equal)(bool*, uintptr, void*, void*);
void (*print)(uintptr, void*);
void (*copy)(uintptr, void*, void*);
};
extern Alg runtime·algarray[Amax];
byte* runtime·startup_random_data;
uint32 runtime·startup_random_data_len;
void runtime·get_random_data(byte**, int32*);
enum {
// hashinit wants this many random bytes
HashRandomBytes = 32
};
void runtime·hashinit(void);
void runtime·memhash(uintptr*, uintptr, void*);
void runtime·nohash(uintptr*, uintptr, void*);
void runtime·strhash(uintptr*, uintptr, void*);
void runtime·interhash(uintptr*, uintptr, void*);
void runtime·nilinterhash(uintptr*, uintptr, void*);
void runtime·aeshash(uintptr*, uintptr, void*);
void runtime·aeshash32(uintptr*, uintptr, void*);
void runtime·aeshash64(uintptr*, uintptr, void*);
void runtime·aeshashstr(uintptr*, uintptr, void*);
void runtime·memequal(bool*, uintptr, void*, void*);
void runtime·noequal(bool*, uintptr, void*, void*);
void runtime·strequal(bool*, uintptr, void*, void*);
void runtime·interequal(bool*, uintptr, void*, void*);
void runtime·nilinterequal(bool*, uintptr, void*, void*);
bool runtime·memeq(void*, void*, uintptr);
void runtime·memprint(uintptr, void*);
void runtime·strprint(uintptr, void*);
void runtime·interprint(uintptr, void*);
void runtime·nilinterprint(uintptr, void*);
void runtime·memcopy(uintptr, void*, void*);
void runtime·memcopy8(uintptr, void*, void*);
void runtime·memcopy16(uintptr, void*, void*);
void runtime·memcopy32(uintptr, void*, void*);
void runtime·memcopy64(uintptr, void*, void*);
void runtime·memcopy128(uintptr, void*, void*);
void runtime·strcopy(uintptr, void*, void*);
void runtime·algslicecopy(uintptr, void*, void*);
void runtime·intercopy(uintptr, void*, void*);
void runtime·nilintercopy(uintptr, void*, void*);
2009-01-27 13:03:53 -07:00
/*
2009-01-27 14:23:28 -07:00
* deferred subroutine calls
2009-01-27 13:03:53 -07:00
*/
struct Defer
{
int32 siz;
bool special; // not part of defer frame
byte* argp; // where args were copied from
byte* pc;
FuncVal* fn;
2009-01-27 13:03:53 -07:00
Defer* link;
void* args[1]; // padded to actual size
2009-01-27 13:03:53 -07:00
};
/*
* panics
*/
struct Panic
{
Eface arg; // argument to panic
uintptr stackbase; // g->stackbase in panic
Panic* link; // link to earlier panic
bool recovered; // whether this panic is over
};
/*
* stack traces
*/
typedef struct Stkframe Stkframe;
struct Stkframe
{
Func* fn; // function being run
uintptr pc; // program counter within fn
uintptr lr; // program counter at caller aka link register
uintptr sp; // stack pointer at pc
uintptr fp; // stack pointer at caller aka frame pointer
byte* varp; // top of local variables
byte* argp; // pointer to function arguments
uintptr arglen; // number of bytes at argp
};
runtime: record proper goroutine state during stack split Until now, the goroutine state has been scattered during the execution of newstack and oldstack. It's all there, and those routines know how to get back to a working goroutine, but other pieces of the system, like stack traces, do not. If something does interrupt the newstack or oldstack execution, the rest of the system can't understand the goroutine. For example, if newstack decides there is an overflow and calls throw, the stack tracer wouldn't dump the goroutine correctly. For newstack to save a useful state snapshot, it needs to be able to rewind the PC in the function that triggered the split back to the beginning of the function. (The PC is a few instructions in, just after the call to morestack.) To make that possible, we change the prologues to insert a jmp back to the beginning of the function after the call to morestack. That is, the prologue used to be roughly: TEXT myfunc check for split jmpcond nosplit call morestack nosplit: sub $xxx, sp Now an extra instruction is inserted after the call: TEXT myfunc start: check for split jmpcond nosplit call morestack jmp start nosplit: sub $xxx, sp The jmp is not executed directly. It is decoded and simulated by runtime.rewindmorestack to discover the beginning of the function, and then the call to morestack returns directly to the start label instead of to the jump instruction. So logically the jmp is still executed, just not by the cpu. The prologue thus repeats in the case of a function that needs a stack split, but against the cost of the split itself, the extra few instructions are noise. The repeated prologue has the nice effect of making a stack split double-check that the new stack is big enough: if morestack happens to return on a too-small stack, we'll now notice before corruption happens. The ability for newstack to rewind to the beginning of the function should help preemption too. If newstack decides that it was called for preemption instead of a stack split, it now has the goroutine state correctly paused if rescheduling is needed, and when the goroutine can run again, it can return to the start label on its original stack and re-execute the split check. Here is an example of a split stack overflow showing the full trace, without any special cases in the stack printer. (This one was triggered by making the split check incorrect.) runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0] morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0} sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700} runtime: split stack overflow: 0x6aebd0 < 0x6b0000 fatal error: runtime: split stack overflow goroutine 1 [stack split]: runtime.mallocgc(0x290, 0x100000000, 0x1) /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8 runtime.new() /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08 go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...) /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0 main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8 main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98 main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0) /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80 ----- stack segment boundary ----- main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...) /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0 main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...) /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658 main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...) /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68 ----- stack segment boundary ----- main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2) /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0 main.main() /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8 And here is a seg fault during oldstack: SIGSEGV: segmentation violation PC=0x1b2a6 runtime.oldstack() /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76 runtime.lessstack() /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22 goroutine 1 [stack unsplit]: fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8 fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0 fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40 flag.(*stringValue).String(0x2102c9210, 0x1, 0x0) /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0 flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0 flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8 flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38 flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80 testing.init() /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0 strings_test.init() /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70 main.init() strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8 goroutine 2 [runnable]: runtime.MHeap_Scavenger() /Users/rsc/g/go/src/pkg/runtime/mheap.c:438 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 created by runtime.main /Users/rsc/g/go/src/pkg/runtime/proc.c:166 rax 0x23ccc0 rbx 0x23ccc0 rcx 0x0 rdx 0x38 rdi 0x2102c0170 rsi 0x221032cfe0 rbp 0x221032cfa0 rsp 0x7fff5fbff5b0 r8 0x2102c0120 r9 0x221032cfa0 r10 0x221032c000 r11 0x104ce8 r12 0xe5c80 r13 0x1be82baac718 r14 0x13091135f7d69200 r15 0x0 rip 0x1b2a6 rflags 0x10246 cs 0x2b fs 0x0 gs 0x0 Fixes #5723. R=r, dvyukov, go.peter.90, dave, iant CC=golang-dev https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
int32 runtime·gentraceback(uintptr, uintptr, uintptr, G*, int32, uintptr*, int32, void(*)(Stkframe*, void*), void*, bool);
void runtime·traceback(uintptr pc, uintptr sp, uintptr lr, G* gp);
void runtime·tracebackothers(G*);
bool runtime·haszeroargs(uintptr pc);
bool runtime·topofstack(Func*);
2008-07-13 15:29:46 -06:00
/*
* external data
*/
extern String runtime·emptystring;
extern uintptr runtime·zerobase;
extern G** runtime·allg;
extern uintptr runtime·allglen;
extern G* runtime·lastg;
extern M* runtime·allm;
extern P** runtime·allp;
extern int32 runtime·gomaxprocs;
extern uint32 runtime·needextram;
extern uint32 runtime·panicking;
extern int8* runtime·goos;
extern int32 runtime·ncpu;
extern bool runtime·iscgo;
extern void (*runtime·sysargs)(int32, uint8**);
extern uintptr runtime·maxstring;
extern uint32 runtime·Hchansize;
extern uint32 runtime·cpuid_ecx;
extern uint32 runtime·cpuid_edx;
extern DebugVars runtime·debug;
extern uintptr runtime·maxstacksize;
2008-07-13 15:29:46 -06:00
/*
* common functions and data
*/
int32 runtime·strcmp(byte*, byte*);
runtime: improve Linux mutex The implementation is hybrid active/passive spin/blocking mutex. The design minimizes amount of context switches and futex calls. The idea is that all critical sections in runtime are intentially small, so pure blocking mutex behaves badly causing a lot of context switches, thread parking/unparking and kernel calls. Note that some synthetic benchmarks become somewhat slower, that's due to increased contention on other data structures, it should not affect programs that do any real work. On 2 x Intel E5620, 8 HT cores, 2.4GHz benchmark old ns/op new ns/op delta BenchmarkSelectContended 521.00 503.00 -3.45% BenchmarkSelectContended-2 661.00 320.00 -51.59% BenchmarkSelectContended-4 1139.00 629.00 -44.78% BenchmarkSelectContended-8 2870.00 878.00 -69.41% BenchmarkSelectContended-16 5276.00 818.00 -84.50% BenchmarkChanContended 112.00 103.00 -8.04% BenchmarkChanContended-2 631.00 174.00 -72.42% BenchmarkChanContended-4 682.00 272.00 -60.12% BenchmarkChanContended-8 1601.00 520.00 -67.52% BenchmarkChanContended-16 3100.00 372.00 -88.00% BenchmarkChanSync 253.00 239.00 -5.53% BenchmarkChanSync-2 5030.00 4648.00 -7.59% BenchmarkChanSync-4 4826.00 4694.00 -2.74% BenchmarkChanSync-8 4778.00 4713.00 -1.36% BenchmarkChanSync-16 5289.00 4710.00 -10.95% BenchmarkChanProdCons0 273.00 254.00 -6.96% BenchmarkChanProdCons0-2 599.00 400.00 -33.22% BenchmarkChanProdCons0-4 1168.00 659.00 -43.58% BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66% BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29% BenchmarkChanProdCons10 150.00 140.00 -6.67% BenchmarkChanProdCons10-2 607.00 268.00 -55.85% BenchmarkChanProdCons10-4 1137.00 404.00 -64.47% BenchmarkChanProdCons10-8 2115.00 828.00 -60.85% BenchmarkChanProdCons10-16 4283.00 855.00 -80.04% BenchmarkChanProdCons100 117.00 110.00 -5.98% BenchmarkChanProdCons100-2 558.00 218.00 -60.93% BenchmarkChanProdCons100-4 722.00 287.00 -60.25% BenchmarkChanProdCons100-8 1840.00 431.00 -76.58% BenchmarkChanProdCons100-16 3394.00 448.00 -86.80% BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89% BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63% BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06% BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53% BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29% BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47% BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01% BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93% BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12% BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98% BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70% BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21% BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14% BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82% BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75% BenchmarkSyscall 34.40 33.30 -3.20% BenchmarkSyscall-2 160.00 121.00 -24.38% BenchmarkSyscall-4 131.00 136.00 +3.82% BenchmarkSyscall-8 139.00 131.00 -5.76% BenchmarkSyscall-16 161.00 168.00 +4.35% BenchmarkSyscallWork 950.00 950.00 +0.00% BenchmarkSyscallWork-2 481.00 480.00 -0.21% BenchmarkSyscallWork-4 268.00 270.00 +0.75% BenchmarkSyscallWork-8 156.00 169.00 +8.33% BenchmarkSyscallWork-16 188.00 184.00 -2.13% BenchmarkSemaSyntNonblock 36.40 35.60 -2.20% BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59% BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29% BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00% BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82% BenchmarkSemaSyntBlock 35.30 35.30 +0.00% BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08% BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86% BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90% BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36% BenchmarkSemaWorkNonblock 810.00 811.00 +0.12% BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03% BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20% BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00% BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85% BenchmarkSemaWorkBlock 810.00 811.00 +0.12% BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64% BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82% BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52% BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82% BenchmarkContendedSemaphore 123.00 102.00 -17.07% BenchmarkContendedSemaphore-2 34.80 34.90 +0.29% BenchmarkContendedSemaphore-4 34.70 34.80 +0.29% BenchmarkContendedSemaphore-8 34.70 34.70 +0.00% BenchmarkContendedSemaphore-16 34.80 34.70 -0.29% BenchmarkMutex 26.80 26.00 -2.99% BenchmarkMutex-2 108.00 45.20 -58.15% BenchmarkMutex-4 103.00 127.00 +23.30% BenchmarkMutex-8 109.00 147.00 +34.86% BenchmarkMutex-16 102.00 152.00 +49.02% BenchmarkMutexSlack 27.00 26.90 -0.37% BenchmarkMutexSlack-2 149.00 165.00 +10.74% BenchmarkMutexSlack-4 121.00 209.00 +72.73% BenchmarkMutexSlack-8 101.00 158.00 +56.44% BenchmarkMutexSlack-16 97.00 129.00 +32.99% BenchmarkMutexWork 792.00 794.00 +0.25% BenchmarkMutexWork-2 407.00 409.00 +0.49% BenchmarkMutexWork-4 220.00 209.00 -5.00% BenchmarkMutexWork-8 267.00 160.00 -40.07% BenchmarkMutexWork-16 315.00 300.00 -4.76% BenchmarkMutexWorkSlack 792.00 793.00 +0.13% BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49% BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78% BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25% BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00% BenchmarkRWMutexWrite100 27.10 27.00 -0.37% BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26% BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04% BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92% BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35% BenchmarkRWMutexWrite10 29.60 29.40 -0.68% BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68% BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96% BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31% BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20% BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38% BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19% BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34% BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25% BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08% BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00% BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29% BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31% BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84% BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48% R=rsc, iant, msolo, fw, iant CC=golang-dev https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
byte* runtime·strstr(byte*, byte*);
intgo runtime·findnull(byte*);
intgo runtime·findnullw(uint16*);
void runtime·dump(byte*, int32);
int32 runtime·runetochar(byte*, int32);
int32 runtime·charntorune(int32*, uint8*, int32);
2008-06-05 20:38:39 -06:00
/*
2008-06-16 23:34:50 -06:00
* very low level c-called
2008-06-05 20:38:39 -06:00
*/
#define FLUSH(x) USED(x)
void runtime·gogo(Gobuf*);
void runtime·gostartcall(Gobuf*, void(*)(void), void*);
void runtime·gostartcallfn(Gobuf*, FuncVal*);
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
void runtime·gosave(Gobuf*);
void runtime·lessstack(void);
void runtime·goargs(void);
void runtime·goenvs(void);
void runtime·goenvs_unix(void);
void* runtime·getu(void);
void runtime·throw(int8*);
void runtime·panicstring(int8*);
void runtime·prints(int8*);
void runtime·printf(int8*, ...);
byte* runtime·mchr(byte*, byte, byte*);
int32 runtime·mcmp(byte*, byte*, uintptr);
void runtime·memmove(void*, void*, uintptr);
void* runtime·mal(uintptr);
String runtime·catstring(String, String);
String runtime·gostring(byte*);
String runtime·gostringn(byte*, intgo);
Slice runtime·gobytes(byte*, intgo);
String runtime·gostringnocopy(byte*);
String runtime·gostringw(uint16*);
void runtime·initsig(void);
void runtime·sigenable(uint32 sig);
void runtime·sigdisable(uint32 sig);
int32 runtime·gotraceback(bool *crash);
runtime: simplify stack traces Make the stack traces more readable for new Go programmers while preserving their utility for old hands. - Change status number [4] to string. - Elide frames in runtime package (internal details). - Swap file:line and arguments. - Drop 'created by' for main goroutine. - Show goroutines in order of allocation: implies main goroutine first if nothing else. There is no option to get the extra frames back. Uncomment 'return 1' at the bottom of symtab.c. $ 6.out throw: all goroutines are asleep - deadlock! goroutine 1 [chan send]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:22 +0x8a goroutine 2 [select (no cases)]: main.sel() /Users/rsc/g/go/src/pkg/runtime/x.go:11 +0x18 created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:19 +0x23 goroutine 3 [chan receive]: main.recv(0xf8400010a0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:20 +0x50 goroutine 4 [chan receive (nil chan)]: main.recv(0x0, 0x0) /Users/rsc/g/go/src/pkg/runtime/x.go:15 +0x2e created by main.main /Users/rsc/g/go/src/pkg/runtime/x.go:21 +0x66 $ $ 6.out index panic: runtime error: index out of range goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:25 +0xb9 $ $ 6.out nil panic: runtime error: invalid memory address or nil pointer dereference [signal 0xb code=0x1 addr=0x0 pc=0x22ca] goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:28 +0x211 $ $ 6.out panic panic: panic goroutine 1 [running]: main.main() /Users/rsc/g/go/src/pkg/runtime/x.go:30 +0x101 $ R=golang-dev, qyzhai, n13m3y3r, r CC=golang-dev https://golang.org/cl/4907048
2011-08-22 21:26:39 -06:00
void runtime·goroutineheader(G*);
int32 runtime·open(int8*, int32, int32);
int32 runtime·read(int32, void*, int32);
int32 runtime·write(int32, void*, int32);
int32 runtime·close(int32);
int32 runtime·mincore(void*, uintptr, byte*);
bool runtime·cas(uint32*, uint32, uint32);
bool runtime·cas64(uint64*, uint64, uint64);
bool runtime·casp(void**, void*, void*);
runtime: replace Semacquire/Semrelease implementation 1. The implementation uses distributed hash table of waitlists instead of a centralized one. It significantly improves scalability for uncontended semaphores. 2. The implementation provides wait-free fast-path for signalers. 3. The implementation uses less locks (1 lock/unlock instead of 5 for Semacquire). 4. runtime·ready() call is moved out of critical section. 5. Semacquire() does not call semwake(). Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows: benchmark old ns/op new ns/op delta runtime_test.BenchmarkSemaUncontended 58.20 36.30 -37.63% runtime_test.BenchmarkSemaUncontended-2 199.00 18.30 -90.80% runtime_test.BenchmarkSemaUncontended-4 327.00 9.20 -97.19% runtime_test.BenchmarkSemaUncontended-8 491.00 5.32 -98.92% runtime_test.BenchmarkSemaUncontended-16 946.00 4.18 -99.56% runtime_test.BenchmarkSemaSyntNonblock 59.00 36.80 -37.63% runtime_test.BenchmarkSemaSyntNonblock-2 167.00 138.00 -17.37% runtime_test.BenchmarkSemaSyntNonblock-4 333.00 129.00 -61.26% runtime_test.BenchmarkSemaSyntNonblock-8 464.00 130.00 -71.98% runtime_test.BenchmarkSemaSyntNonblock-16 1015.00 136.00 -86.60% runtime_test.BenchmarkSemaSyntBlock 58.80 36.70 -37.59% runtime_test.BenchmarkSemaSyntBlock-2 294.00 149.00 -49.32% runtime_test.BenchmarkSemaSyntBlock-4 333.00 177.00 -46.85% runtime_test.BenchmarkSemaSyntBlock-8 471.00 221.00 -53.08% runtime_test.BenchmarkSemaSyntBlock-16 990.00 227.00 -77.07% runtime_test.BenchmarkSemaWorkNonblock 829.00 832.00 +0.36% runtime_test.BenchmarkSemaWorkNonblock-2 425.00 419.00 -1.41% runtime_test.BenchmarkSemaWorkNonblock-4 308.00 220.00 -28.57% runtime_test.BenchmarkSemaWorkNonblock-8 394.00 147.00 -62.69% runtime_test.BenchmarkSemaWorkNonblock-16 1510.00 149.00 -90.13% runtime_test.BenchmarkSemaWorkBlock 828.00 813.00 -1.81% runtime_test.BenchmarkSemaWorkBlock-2 428.00 436.00 +1.87% runtime_test.BenchmarkSemaWorkBlock-4 232.00 219.00 -5.60% runtime_test.BenchmarkSemaWorkBlock-8 392.00 251.00 -35.97% runtime_test.BenchmarkSemaWorkBlock-16 1524.00 298.00 -80.45% sync_test.BenchmarkMutexUncontended 24.10 24.00 -0.41% sync_test.BenchmarkMutexUncontended-2 12.00 12.00 +0.00% sync_test.BenchmarkMutexUncontended-4 6.25 6.17 -1.28% sync_test.BenchmarkMutexUncontended-8 3.43 3.34 -2.62% sync_test.BenchmarkMutexUncontended-16 2.34 2.32 -0.85% sync_test.BenchmarkMutex 24.70 24.70 +0.00% sync_test.BenchmarkMutex-2 208.00 99.50 -52.16% sync_test.BenchmarkMutex-4 2744.00 256.00 -90.67% sync_test.BenchmarkMutex-8 5137.00 556.00 -89.18% sync_test.BenchmarkMutex-16 5368.00 1284.00 -76.08% sync_test.BenchmarkMutexSlack 24.70 25.00 +1.21% sync_test.BenchmarkMutexSlack-2 1094.00 186.00 -83.00% sync_test.BenchmarkMutexSlack-4 3430.00 402.00 -88.28% sync_test.BenchmarkMutexSlack-8 5051.00 1066.00 -78.90% sync_test.BenchmarkMutexSlack-16 6806.00 1363.00 -79.97% sync_test.BenchmarkMutexWork 793.00 792.00 -0.13% sync_test.BenchmarkMutexWork-2 398.00 398.00 +0.00% sync_test.BenchmarkMutexWork-4 1441.00 308.00 -78.63% sync_test.BenchmarkMutexWork-8 8532.00 847.00 -90.07% sync_test.BenchmarkMutexWork-16 8225.00 2760.00 -66.44% sync_test.BenchmarkMutexWorkSlack 793.00 793.00 +0.00% sync_test.BenchmarkMutexWorkSlack-2 418.00 414.00 -0.96% sync_test.BenchmarkMutexWorkSlack-4 4481.00 480.00 -89.29% sync_test.BenchmarkMutexWorkSlack-8 6317.00 1598.00 -74.70% sync_test.BenchmarkMutexWorkSlack-16 9111.00 3038.00 -66.66% R=rsc CC=golang-dev https://golang.org/cl/4631059
2011-06-28 13:09:53 -06:00
// Don't confuse with XADD x86 instruction,
// this one is actually 'addx', that is, add-and-fetch.
uint32 runtime·xadd(uint32 volatile*, int32);
uint64 runtime·xadd64(uint64 volatile*, int64);
runtime: improve Linux mutex The implementation is hybrid active/passive spin/blocking mutex. The design minimizes amount of context switches and futex calls. The idea is that all critical sections in runtime are intentially small, so pure blocking mutex behaves badly causing a lot of context switches, thread parking/unparking and kernel calls. Note that some synthetic benchmarks become somewhat slower, that's due to increased contention on other data structures, it should not affect programs that do any real work. On 2 x Intel E5620, 8 HT cores, 2.4GHz benchmark old ns/op new ns/op delta BenchmarkSelectContended 521.00 503.00 -3.45% BenchmarkSelectContended-2 661.00 320.00 -51.59% BenchmarkSelectContended-4 1139.00 629.00 -44.78% BenchmarkSelectContended-8 2870.00 878.00 -69.41% BenchmarkSelectContended-16 5276.00 818.00 -84.50% BenchmarkChanContended 112.00 103.00 -8.04% BenchmarkChanContended-2 631.00 174.00 -72.42% BenchmarkChanContended-4 682.00 272.00 -60.12% BenchmarkChanContended-8 1601.00 520.00 -67.52% BenchmarkChanContended-16 3100.00 372.00 -88.00% BenchmarkChanSync 253.00 239.00 -5.53% BenchmarkChanSync-2 5030.00 4648.00 -7.59% BenchmarkChanSync-4 4826.00 4694.00 -2.74% BenchmarkChanSync-8 4778.00 4713.00 -1.36% BenchmarkChanSync-16 5289.00 4710.00 -10.95% BenchmarkChanProdCons0 273.00 254.00 -6.96% BenchmarkChanProdCons0-2 599.00 400.00 -33.22% BenchmarkChanProdCons0-4 1168.00 659.00 -43.58% BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66% BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29% BenchmarkChanProdCons10 150.00 140.00 -6.67% BenchmarkChanProdCons10-2 607.00 268.00 -55.85% BenchmarkChanProdCons10-4 1137.00 404.00 -64.47% BenchmarkChanProdCons10-8 2115.00 828.00 -60.85% BenchmarkChanProdCons10-16 4283.00 855.00 -80.04% BenchmarkChanProdCons100 117.00 110.00 -5.98% BenchmarkChanProdCons100-2 558.00 218.00 -60.93% BenchmarkChanProdCons100-4 722.00 287.00 -60.25% BenchmarkChanProdCons100-8 1840.00 431.00 -76.58% BenchmarkChanProdCons100-16 3394.00 448.00 -86.80% BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89% BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63% BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06% BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53% BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29% BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47% BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01% BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93% BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12% BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98% BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70% BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21% BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14% BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82% BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75% BenchmarkSyscall 34.40 33.30 -3.20% BenchmarkSyscall-2 160.00 121.00 -24.38% BenchmarkSyscall-4 131.00 136.00 +3.82% BenchmarkSyscall-8 139.00 131.00 -5.76% BenchmarkSyscall-16 161.00 168.00 +4.35% BenchmarkSyscallWork 950.00 950.00 +0.00% BenchmarkSyscallWork-2 481.00 480.00 -0.21% BenchmarkSyscallWork-4 268.00 270.00 +0.75% BenchmarkSyscallWork-8 156.00 169.00 +8.33% BenchmarkSyscallWork-16 188.00 184.00 -2.13% BenchmarkSemaSyntNonblock 36.40 35.60 -2.20% BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59% BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29% BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00% BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82% BenchmarkSemaSyntBlock 35.30 35.30 +0.00% BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08% BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86% BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90% BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36% BenchmarkSemaWorkNonblock 810.00 811.00 +0.12% BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03% BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20% BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00% BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85% BenchmarkSemaWorkBlock 810.00 811.00 +0.12% BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64% BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82% BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52% BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82% BenchmarkContendedSemaphore 123.00 102.00 -17.07% BenchmarkContendedSemaphore-2 34.80 34.90 +0.29% BenchmarkContendedSemaphore-4 34.70 34.80 +0.29% BenchmarkContendedSemaphore-8 34.70 34.70 +0.00% BenchmarkContendedSemaphore-16 34.80 34.70 -0.29% BenchmarkMutex 26.80 26.00 -2.99% BenchmarkMutex-2 108.00 45.20 -58.15% BenchmarkMutex-4 103.00 127.00 +23.30% BenchmarkMutex-8 109.00 147.00 +34.86% BenchmarkMutex-16 102.00 152.00 +49.02% BenchmarkMutexSlack 27.00 26.90 -0.37% BenchmarkMutexSlack-2 149.00 165.00 +10.74% BenchmarkMutexSlack-4 121.00 209.00 +72.73% BenchmarkMutexSlack-8 101.00 158.00 +56.44% BenchmarkMutexSlack-16 97.00 129.00 +32.99% BenchmarkMutexWork 792.00 794.00 +0.25% BenchmarkMutexWork-2 407.00 409.00 +0.49% BenchmarkMutexWork-4 220.00 209.00 -5.00% BenchmarkMutexWork-8 267.00 160.00 -40.07% BenchmarkMutexWork-16 315.00 300.00 -4.76% BenchmarkMutexWorkSlack 792.00 793.00 +0.13% BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49% BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78% BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25% BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00% BenchmarkRWMutexWrite100 27.10 27.00 -0.37% BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26% BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04% BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92% BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35% BenchmarkRWMutexWrite10 29.60 29.40 -0.68% BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68% BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96% BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31% BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20% BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38% BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19% BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34% BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25% BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08% BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00% BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29% BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31% BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84% BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48% R=rsc, iant, msolo, fw, iant CC=golang-dev https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
uint32 runtime·xchg(uint32 volatile*, uint32);
uint64 runtime·xchg64(uint64 volatile*, uint64);
uint32 runtime·atomicload(uint32 volatile*);
void runtime·atomicstore(uint32 volatile*, uint32);
void runtime·atomicstore64(uint64 volatile*, uint64);
uint64 runtime·atomicload64(uint64 volatile*);
void* runtime·atomicloadp(void* volatile*);
void runtime·atomicstorep(void* volatile*, void*);
void runtime·jmpdefer(FuncVal*, void*);
void runtime·exit1(int32);
void runtime·ready(G*);
byte* runtime·getenv(int8*);
int32 runtime·atoi(byte*);
void runtime·newosproc(M *mp, void *stk);
void runtime·mstart(void);
G* runtime·malg(int32);
void runtime·asminit(void);
void runtime·mpreinit(M*);
void runtime·minit(void);
void runtime·unminit(void);
void runtime·signalstack(byte*, int32);
void runtime·symtabinit(void);
Func* runtime·findfunc(uintptr);
int32 runtime·funcline(Func*, uintptr, String*);
int32 runtime·funcarglen(Func*, uintptr);
int32 runtime·funcspdelta(Func*, uintptr);
int8* runtime·funcname(Func*);
int32 runtime·pcdatavalue(Func*, int32, uintptr);
void* runtime·stackalloc(uint32);
void runtime·stackfree(void*, uintptr);
MCache* runtime·allocmcache(void);
void runtime·freemcache(MCache*);
void runtime·mallocinit(void);
bool runtime·ifaceeq_c(Iface, Iface);
bool runtime·efaceeq_c(Eface, Eface);
uintptr runtime·ifacehash(Iface, uintptr);
uintptr runtime·efacehash(Eface, uintptr);
void* runtime·malloc(uintptr size);
void runtime·free(void *v);
void runtime·runpanic(Panic*);
uintptr runtime·getcallersp(void*);
int32 runtime·mcount(void);
int32 runtime·gcount(void);
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
void runtime·mcall(void(*)(G*));
uint32 runtime·fastrand1(void);
runtime: record proper goroutine state during stack split Until now, the goroutine state has been scattered during the execution of newstack and oldstack. It's all there, and those routines know how to get back to a working goroutine, but other pieces of the system, like stack traces, do not. If something does interrupt the newstack or oldstack execution, the rest of the system can't understand the goroutine. For example, if newstack decides there is an overflow and calls throw, the stack tracer wouldn't dump the goroutine correctly. For newstack to save a useful state snapshot, it needs to be able to rewind the PC in the function that triggered the split back to the beginning of the function. (The PC is a few instructions in, just after the call to morestack.) To make that possible, we change the prologues to insert a jmp back to the beginning of the function after the call to morestack. That is, the prologue used to be roughly: TEXT myfunc check for split jmpcond nosplit call morestack nosplit: sub $xxx, sp Now an extra instruction is inserted after the call: TEXT myfunc start: check for split jmpcond nosplit call morestack jmp start nosplit: sub $xxx, sp The jmp is not executed directly. It is decoded and simulated by runtime.rewindmorestack to discover the beginning of the function, and then the call to morestack returns directly to the start label instead of to the jump instruction. So logically the jmp is still executed, just not by the cpu. The prologue thus repeats in the case of a function that needs a stack split, but against the cost of the split itself, the extra few instructions are noise. The repeated prologue has the nice effect of making a stack split double-check that the new stack is big enough: if morestack happens to return on a too-small stack, we'll now notice before corruption happens. The ability for newstack to rewind to the beginning of the function should help preemption too. If newstack decides that it was called for preemption instead of a stack split, it now has the goroutine state correctly paused if rescheduling is needed, and when the goroutine can run again, it can return to the start label on its original stack and re-execute the split check. Here is an example of a split stack overflow showing the full trace, without any special cases in the stack printer. (This one was triggered by making the split check incorrect.) runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0] morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0} sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700} runtime: split stack overflow: 0x6aebd0 < 0x6b0000 fatal error: runtime: split stack overflow goroutine 1 [stack split]: runtime.mallocgc(0x290, 0x100000000, 0x1) /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8 runtime.new() /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08 go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...) /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0 main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8 main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98 main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0) /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80 ----- stack segment boundary ----- main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...) /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0 main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...) /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658 main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...) /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68 ----- stack segment boundary ----- main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2) /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0 main.main() /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8 And here is a seg fault during oldstack: SIGSEGV: segmentation violation PC=0x1b2a6 runtime.oldstack() /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76 runtime.lessstack() /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22 goroutine 1 [stack unsplit]: fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8 fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0 fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40 flag.(*stringValue).String(0x2102c9210, 0x1, 0x0) /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0 flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0 flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8 flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38 flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80 testing.init() /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0 strings_test.init() /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70 main.init() strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8 goroutine 2 [runnable]: runtime.MHeap_Scavenger() /Users/rsc/g/go/src/pkg/runtime/mheap.c:438 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 created by runtime.main /Users/rsc/g/go/src/pkg/runtime/proc.c:166 rax 0x23ccc0 rbx 0x23ccc0 rcx 0x0 rdx 0x38 rdi 0x2102c0170 rsi 0x221032cfe0 rbp 0x221032cfa0 rsp 0x7fff5fbff5b0 r8 0x2102c0120 r9 0x221032cfa0 r10 0x221032c000 r11 0x104ce8 r12 0xe5c80 r13 0x1be82baac718 r14 0x13091135f7d69200 r15 0x0 rip 0x1b2a6 rflags 0x10246 cs 0x2b fs 0x0 gs 0x0 Fixes #5723. R=r, dvyukov, go.peter.90, dave, iant CC=golang-dev https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
void runtime·rewindmorestack(Gobuf*);
int32 runtime·timediv(int64, int32, int32*);
void runtime·setmg(M*, G*);
void runtime·newextram(void);
void runtime·exit(int32);
void runtime·breakpoint(void);
void runtime·gosched(void);
void runtime·gosched0(G*);
void runtime·schedtrace(bool);
void runtime·park(void(*)(Lock*), Lock*, int8*);
void runtime·tsleep(int64, int8*);
M* runtime·newm(void);
void runtime·goexit(void);
runtime: scheduler, cgo reorganization * Change use of m->g0 stack (aka scheduler stack). * Provide runtime.mcall(f) to invoke f() on m->g0 stack. * Replace scheduler loop entry with runtime.mcall(schedule). Runtime.mcall eliminates the need for fake scheduler states that exist just to run a bit of code on the m->g0 stack (Grecovery, Gstackalloc). The elimination of the scheduler as a loop that stops and starts using gosave and gogo fixes a bad interaction with the way cgo uses the m->g0 stack. Cgo runs external (gcc-compiled) C functions on that stack, and then when calling back into Go, it sets m->g0->sched.sp below the added call frames, so that other uses of m->g0's stack will not interfere with those frames. Unfortunately, gogo (longjmp) back to the scheduler loop at this point would end up running scheduler with the lower sp, which no longer points at a valid stack frame for a call to scheduler. If scheduler then wrote any function call arguments or local variables to where it expected the stack frame to be, it would overwrite other data on the stack. I realized this possibility while debugging a problem with calling complex Go code in a Go -> C -> Go cgo callback. This wasn't the bug I was looking for, it turns out, but I believe it is a real bug nonetheless. Switching to runtime.mcall, which only adds new frames to the stack and never jumps into functions running in existing ones, fixes this bug. * Move cgo-related code out of proc.c into cgocall.c. * Add very large comment describing cgo call sequences. * Simpilify, regularize cgo function implementations and names. * Add test suite as misc/cgo/test. Now the Go -> C path calls cgocall, which calls asmcgocall, and the C -> Go path calls cgocallback, which calls cgocallbackg. The shuffling, which affects mainly the callback case, moves most of the callback implementation to cgocallback running on the m->curg stack (not the m->g0 scheduler stack) and only while accounted for with $GOMAXPROCS (between calls to exitsyscall and entersyscall). The previous callback code did not block in startcgocallback's approximation to exitsyscall, so if, say, the garbage collector were running, it would still barge in and start doing things like call malloc. Similarly endcgocallback's approximation of entersyscall did not call matchmg to kick off new OS threads when necessary, which caused the bug in issue 1560. Fixes #1560. R=iant CC=golang-dev https://golang.org/cl/4253054
2011-03-07 08:37:42 -07:00
void runtime·asmcgocall(void (*fn)(void*), void*);
void runtime·entersyscall(void);
void runtime·entersyscallblock(void);
void runtime·exitsyscall(void);
G* runtime·newproc1(FuncVal*, byte*, int32, int32, void*);
bool runtime·sigsend(int32 sig);
int32 runtime·callers(int32, uintptr*, int32);
int64 runtime·nanotime(void);
void runtime·dopanic(int32);
void runtime·startpanic(void);
void runtime·freezetheworld(void);
void runtime·unwindstack(G*, byte*);
void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp);
void runtime·resetcpuprofiler(int32);
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
void runtime·usleep(uint32);
int64 runtime·cputicks(void);
int64 runtime·tickspersecond(void);
void runtime·blockevent(int64, int32);
extern int64 runtime·blockprofilerate;
void runtime·addtimer(Timer*);
bool runtime·deltimer(Timer*);
G* runtime·netpoll(bool);
void runtime·netpollinit(void);
int32 runtime·netpollopen(uintptr, PollDesc*);
int32 runtime·netpollclose(uintptr);
void runtime·netpollready(G**, PollDesc*, int32);
uintptr runtime·netpollfd(PollDesc*);
void runtime·netpollarmread(uintptr fd);
void runtime·netpollarmwrite(uintptr fd);
void runtime·crash(void);
void runtime·parsedebugvars(void);
void _rt0_go(void);
void* runtime·funcdata(Func*, int32);
#pragma varargck argpos runtime·printf 1
#pragma varargck type "c" int32
#pragma varargck type "d" int32
#pragma varargck type "d" uint32
#pragma varargck type "D" int64
#pragma varargck type "D" uint64
#pragma varargck type "x" int32
#pragma varargck type "x" uint32
#pragma varargck type "X" int64
#pragma varargck type "X" uint64
#pragma varargck type "p" void*
#pragma varargck type "p" uintptr
#pragma varargck type "s" int8*
#pragma varargck type "s" uint8*
#pragma varargck type "S" String
void runtime·stoptheworld(void);
void runtime·starttheworld(void);
extern uint32 runtime·worldsema;
/*
* mutual exclusion locks. in the uncontended case,
* as fast as spin locks (just a few user-level instructions),
* but on the contention path they sleep in the kernel.
* a zeroed Lock is unlocked (no need to initialize each lock).
*/
void runtime·lock(Lock*);
void runtime·unlock(Lock*);
/*
* sleep and wakeup on one-time events.
* before any calls to notesleep or notewakeup,
* must call noteclear to initialize the Note.
* then, exactly one thread can call notesleep
* and exactly one thread can call notewakeup (once).
* once notewakeup has been called, the notesleep
* will return. future notesleep will return immediately.
* subsequent noteclear must be called only after
* previous notesleep has returned, e.g. it's disallowed
* to call noteclear straight after notewakeup.
*
* notetsleep is like notesleep but wakes up after
* a given number of nanoseconds even if the event
* has not yet happened. if a goroutine uses notetsleep to
* wake up early, it must wait to call noteclear until it
* can be sure that no other goroutine is calling
* notewakeup.
*
* notesleep/notetsleep are generally called on g0,
* notetsleepg is similar to notetsleep but is called on user g.
*/
void runtime·noteclear(Note*);
void runtime·notesleep(Note*);
void runtime·notewakeup(Note*);
bool runtime·notetsleep(Note*, int64); // false - timeout
bool runtime·notetsleepg(Note*, int64); // false - timeout
/*
* low-level synchronization for implementing the above
*/
uintptr runtime·semacreate(void);
int32 runtime·semasleep(int64);
void runtime·semawakeup(M*);
// or
void runtime·futexsleep(uint32*, uint32, int64);
void runtime·futexwakeup(uint32*, uint32);
/*
* Lock-free stack.
* Initialize uint64 head to 0, compare with 0 to test for emptiness.
* The stack does not keep pointers to nodes,
* so they can be garbage collected if there are no other pointers to nodes.
*/
void runtime·lfstackpush(uint64 *head, LFNode *node);
LFNode* runtime·lfstackpop(uint64 *head);
/*
* Parallel for over [0, n).
* body() is executed for each iteration.
* nthr - total number of worker threads.
* ctx - arbitrary user context.
* if wait=true, threads return from parfor() when all work is done;
* otherwise, threads can return while other threads are still finishing processing.
*/
ParFor* runtime·parforalloc(uint32 nthrmax);
void runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32));
void runtime·parfordo(ParFor *desc);
/*
* This is consistent across Linux and BSD.
* If a new OS is added that is different, move this to
* $GOOS/$GOARCH/defs.h.
*/
#define EACCES 13
2008-06-16 23:34:50 -06:00
/*
* low level C-called
2008-06-16 23:34:50 -06:00
*/
// for mmap, we only pass the lower 32 bits of file offset to the
// assembly routine; the higher bits (if required), should be provided
// by the assembly routine as 0.
uint8* runtime·mmap(byte*, uintptr, int32, int32, int32, uint32);
void runtime·munmap(byte*, uintptr);
void runtime·madvise(byte*, uintptr, int32);
void runtime·memclr(byte*, uintptr);
void runtime·setcallerpc(void*, void*);
void* runtime·getcallerpc(void*);
2008-06-05 20:38:39 -06:00
/*
2008-06-16 23:34:50 -06:00
* runtime go-called
2008-06-05 20:38:39 -06:00
*/
void runtime·printbool(bool);
void runtime·printbyte(int8);
void runtime·printfloat(float64);
void runtime·printint(int64);
void runtime·printiface(Iface);
void runtime·printeface(Eface);
void runtime·printstring(String);
void runtime·printpc(void*);
void runtime·printpointer(void*);
void runtime·printuint(uint64);
void runtime·printhex(uint64);
void runtime·printslice(Slice);
void runtime·printcomplex(Complex128);
void runtime·newstackcall(FuncVal*, byte*, uint32);
void reflect·call(FuncVal*, byte*, uint32);
void runtime·panic(Eface);
void runtime·panicindex(void);
void runtime·panicslice(void);
/*
* runtime c-called (but written in Go)
*/
void runtime·printany(Eface);
void runtime·newTypeAssertionError(String*, String*, String*, String*, Eface*);
void runtime·newErrorString(String, Eface*);
void runtime·newErrorCString(int8*, Eface*);
void runtime·fadd64c(uint64, uint64, uint64*);
void runtime·fsub64c(uint64, uint64, uint64*);
void runtime·fmul64c(uint64, uint64, uint64*);
void runtime·fdiv64c(uint64, uint64, uint64*);
void runtime·fneg64c(uint64, uint64*);
void runtime·f32to64c(uint32, uint64*);
void runtime·f64to32c(uint64, uint32*);
void runtime·fcmp64c(uint64, uint64, int32*, bool*);
void runtime·fintto64c(int64, uint64*);
void runtime·f64tointc(uint64, int64*, bool*);
/*
* wrapped for go users
*/
float64 runtime·Inf(int32 sign);
float64 runtime·NaN(void);
float32 runtime·float32frombits(uint32 i);
uint32 runtime·float32tobits(float32 f);
float64 runtime·float64frombits(uint64 i);
uint64 runtime·float64tobits(float64 f);
float64 runtime·frexp(float64 d, int32 *ep);
bool runtime·isInf(float64 f, int32 sign);
bool runtime·isNaN(float64 f);
float64 runtime·ldexp(float64 d, int32 e);
float64 runtime·modf(float64 d, float64 *ip);
void runtime·semacquire(uint32*, bool);
void runtime·semrelease(uint32*);
int32 runtime·gomaxprocsfunc(int32 n);
runtime: improve Linux mutex The implementation is hybrid active/passive spin/blocking mutex. The design minimizes amount of context switches and futex calls. The idea is that all critical sections in runtime are intentially small, so pure blocking mutex behaves badly causing a lot of context switches, thread parking/unparking and kernel calls. Note that some synthetic benchmarks become somewhat slower, that's due to increased contention on other data structures, it should not affect programs that do any real work. On 2 x Intel E5620, 8 HT cores, 2.4GHz benchmark old ns/op new ns/op delta BenchmarkSelectContended 521.00 503.00 -3.45% BenchmarkSelectContended-2 661.00 320.00 -51.59% BenchmarkSelectContended-4 1139.00 629.00 -44.78% BenchmarkSelectContended-8 2870.00 878.00 -69.41% BenchmarkSelectContended-16 5276.00 818.00 -84.50% BenchmarkChanContended 112.00 103.00 -8.04% BenchmarkChanContended-2 631.00 174.00 -72.42% BenchmarkChanContended-4 682.00 272.00 -60.12% BenchmarkChanContended-8 1601.00 520.00 -67.52% BenchmarkChanContended-16 3100.00 372.00 -88.00% BenchmarkChanSync 253.00 239.00 -5.53% BenchmarkChanSync-2 5030.00 4648.00 -7.59% BenchmarkChanSync-4 4826.00 4694.00 -2.74% BenchmarkChanSync-8 4778.00 4713.00 -1.36% BenchmarkChanSync-16 5289.00 4710.00 -10.95% BenchmarkChanProdCons0 273.00 254.00 -6.96% BenchmarkChanProdCons0-2 599.00 400.00 -33.22% BenchmarkChanProdCons0-4 1168.00 659.00 -43.58% BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66% BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29% BenchmarkChanProdCons10 150.00 140.00 -6.67% BenchmarkChanProdCons10-2 607.00 268.00 -55.85% BenchmarkChanProdCons10-4 1137.00 404.00 -64.47% BenchmarkChanProdCons10-8 2115.00 828.00 -60.85% BenchmarkChanProdCons10-16 4283.00 855.00 -80.04% BenchmarkChanProdCons100 117.00 110.00 -5.98% BenchmarkChanProdCons100-2 558.00 218.00 -60.93% BenchmarkChanProdCons100-4 722.00 287.00 -60.25% BenchmarkChanProdCons100-8 1840.00 431.00 -76.58% BenchmarkChanProdCons100-16 3394.00 448.00 -86.80% BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89% BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63% BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06% BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53% BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29% BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47% BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01% BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93% BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12% BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98% BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70% BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21% BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14% BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82% BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75% BenchmarkSyscall 34.40 33.30 -3.20% BenchmarkSyscall-2 160.00 121.00 -24.38% BenchmarkSyscall-4 131.00 136.00 +3.82% BenchmarkSyscall-8 139.00 131.00 -5.76% BenchmarkSyscall-16 161.00 168.00 +4.35% BenchmarkSyscallWork 950.00 950.00 +0.00% BenchmarkSyscallWork-2 481.00 480.00 -0.21% BenchmarkSyscallWork-4 268.00 270.00 +0.75% BenchmarkSyscallWork-8 156.00 169.00 +8.33% BenchmarkSyscallWork-16 188.00 184.00 -2.13% BenchmarkSemaSyntNonblock 36.40 35.60 -2.20% BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59% BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29% BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00% BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82% BenchmarkSemaSyntBlock 35.30 35.30 +0.00% BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08% BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86% BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90% BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36% BenchmarkSemaWorkNonblock 810.00 811.00 +0.12% BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03% BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20% BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00% BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85% BenchmarkSemaWorkBlock 810.00 811.00 +0.12% BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64% BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82% BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52% BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82% BenchmarkContendedSemaphore 123.00 102.00 -17.07% BenchmarkContendedSemaphore-2 34.80 34.90 +0.29% BenchmarkContendedSemaphore-4 34.70 34.80 +0.29% BenchmarkContendedSemaphore-8 34.70 34.70 +0.00% BenchmarkContendedSemaphore-16 34.80 34.70 -0.29% BenchmarkMutex 26.80 26.00 -2.99% BenchmarkMutex-2 108.00 45.20 -58.15% BenchmarkMutex-4 103.00 127.00 +23.30% BenchmarkMutex-8 109.00 147.00 +34.86% BenchmarkMutex-16 102.00 152.00 +49.02% BenchmarkMutexSlack 27.00 26.90 -0.37% BenchmarkMutexSlack-2 149.00 165.00 +10.74% BenchmarkMutexSlack-4 121.00 209.00 +72.73% BenchmarkMutexSlack-8 101.00 158.00 +56.44% BenchmarkMutexSlack-16 97.00 129.00 +32.99% BenchmarkMutexWork 792.00 794.00 +0.25% BenchmarkMutexWork-2 407.00 409.00 +0.49% BenchmarkMutexWork-4 220.00 209.00 -5.00% BenchmarkMutexWork-8 267.00 160.00 -40.07% BenchmarkMutexWork-16 315.00 300.00 -4.76% BenchmarkMutexWorkSlack 792.00 793.00 +0.13% BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49% BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78% BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25% BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00% BenchmarkRWMutexWrite100 27.10 27.00 -0.37% BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26% BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04% BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92% BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35% BenchmarkRWMutexWrite10 29.60 29.40 -0.68% BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68% BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96% BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31% BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20% BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38% BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19% BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34% BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25% BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08% BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00% BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29% BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31% BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84% BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48% R=rsc, iant, msolo, fw, iant CC=golang-dev https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
void runtime·procyield(uint32);
void runtime·osyield(void);
void runtime·lockOSThread(void);
void runtime·unlockOSThread(void);
Hchan* runtime·makechan_c(ChanType*, int64);
void runtime·chansend(ChanType*, Hchan*, byte*, bool*, void*);
void runtime·chanrecv(ChanType*, Hchan*, byte*, bool*, bool*);
runtime: record proper goroutine state during stack split Until now, the goroutine state has been scattered during the execution of newstack and oldstack. It's all there, and those routines know how to get back to a working goroutine, but other pieces of the system, like stack traces, do not. If something does interrupt the newstack or oldstack execution, the rest of the system can't understand the goroutine. For example, if newstack decides there is an overflow and calls throw, the stack tracer wouldn't dump the goroutine correctly. For newstack to save a useful state snapshot, it needs to be able to rewind the PC in the function that triggered the split back to the beginning of the function. (The PC is a few instructions in, just after the call to morestack.) To make that possible, we change the prologues to insert a jmp back to the beginning of the function after the call to morestack. That is, the prologue used to be roughly: TEXT myfunc check for split jmpcond nosplit call morestack nosplit: sub $xxx, sp Now an extra instruction is inserted after the call: TEXT myfunc start: check for split jmpcond nosplit call morestack jmp start nosplit: sub $xxx, sp The jmp is not executed directly. It is decoded and simulated by runtime.rewindmorestack to discover the beginning of the function, and then the call to morestack returns directly to the start label instead of to the jump instruction. So logically the jmp is still executed, just not by the cpu. The prologue thus repeats in the case of a function that needs a stack split, but against the cost of the split itself, the extra few instructions are noise. The repeated prologue has the nice effect of making a stack split double-check that the new stack is big enough: if morestack happens to return on a too-small stack, we'll now notice before corruption happens. The ability for newstack to rewind to the beginning of the function should help preemption too. If newstack decides that it was called for preemption instead of a stack split, it now has the goroutine state correctly paused if rescheduling is needed, and when the goroutine can run again, it can return to the start label on its original stack and re-execute the split check. Here is an example of a split stack overflow showing the full trace, without any special cases in the stack printer. (This one was triggered by making the split check incorrect.) runtime: newstack framesize=0x0 argsize=0x18 sp=0x6aebd0 stack=[0x6b0000, 0x6b0fa0] morebuf={pc:0x69f5b sp:0x6aebd8 lr:0x0} sched={pc:0x68880 sp:0x6aebd0 lr:0x0 ctxt:0x34e700} runtime: split stack overflow: 0x6aebd0 < 0x6b0000 fatal error: runtime: split stack overflow goroutine 1 [stack split]: runtime.mallocgc(0x290, 0x100000000, 0x1) /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:21 fp=0x6aebd8 runtime.new() /Users/rsc/g/go/src/pkg/runtime/zmalloc_darwin_amd64.c:682 +0x5b fp=0x6aec08 go/build.(*Context).Import(0x5ae340, 0xc210030c71, 0xa, 0xc2100b4380, 0x1b, ...) /Users/rsc/g/go/src/pkg/go/build/build.go:424 +0x3a fp=0x6b00a0 main.loadImport(0xc210030c71, 0xa, 0xc2100b4380, 0x1b, 0xc2100b42c0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:249 +0x371 fp=0x6b01a8 main.(*Package).load(0xc21017c800, 0xc2100b42c0, 0xc2101828c0, 0x0, 0x0, ...) /Users/rsc/g/go/src/cmd/go/pkg.go:431 +0x2801 fp=0x6b0c98 main.loadPackage(0x369040, 0x7, 0xc2100b42c0, 0x0) /Users/rsc/g/go/src/cmd/go/pkg.go:709 +0x857 fp=0x6b0f80 ----- stack segment boundary ----- main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc2100e6c00, 0xc2100e5750, ...) /Users/rsc/g/go/src/cmd/go/build.go:539 +0x437 fp=0x6b14a0 main.(*builder).action(0xc2100902a0, 0x0, 0x0, 0xc21015b400, 0x2, ...) /Users/rsc/g/go/src/cmd/go/build.go:528 +0x1d2 fp=0x6b1658 main.(*builder).test(0xc2100902a0, 0xc210092000, 0x0, 0x0, 0xc21008ff60, ...) /Users/rsc/g/go/src/cmd/go/test.go:622 +0x1b53 fp=0x6b1f68 ----- stack segment boundary ----- main.runTest(0x5a6b20, 0xc21000a020, 0x2, 0x2) /Users/rsc/g/go/src/cmd/go/test.go:366 +0xd09 fp=0x6a5cf0 main.main() /Users/rsc/g/go/src/cmd/go/main.go:161 +0x4f9 fp=0x6a5f78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:183 +0x92 fp=0x6a5fa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1266 fp=0x6a5fa8 And here is a seg fault during oldstack: SIGSEGV: segmentation violation PC=0x1b2a6 runtime.oldstack() /Users/rsc/g/go/src/pkg/runtime/stack.c:159 +0x76 runtime.lessstack() /Users/rsc/g/go/src/pkg/runtime/asm_amd64.s:270 +0x22 goroutine 1 [stack unsplit]: fmt.(*pp).printArg(0x2102e64e0, 0xe5c80, 0x2102c9220, 0x73, 0x0, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:818 +0x3d3 fp=0x221031e6f8 fmt.(*pp).doPrintf(0x2102e64e0, 0x12fb20, 0x2, 0x221031eb98, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:1183 +0x15cb fp=0x221031eaf0 fmt.Sprintf(0x12fb20, 0x2, 0x221031eb98, 0x1, 0x1, ...) /Users/rsc/g/go/src/pkg/fmt/print.go:234 +0x67 fp=0x221031eb40 flag.(*stringValue).String(0x2102c9210, 0x1, 0x0) /Users/rsc/g/go/src/pkg/flag/flag.go:180 +0xb3 fp=0x221031ebb0 flag.(*FlagSet).Var(0x2102f6000, 0x293d38, 0x2102c9210, 0x143490, 0xa, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:633 +0x40 fp=0x221031eca0 flag.(*FlagSet).StringVar(0x2102f6000, 0x2102c9210, 0x143490, 0xa, 0x12fa60, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:550 +0x91 fp=0x221031ece8 flag.(*FlagSet).String(0x2102f6000, 0x143490, 0xa, 0x12fa60, 0x0, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:563 +0x87 fp=0x221031ed38 flag.String(0x143490, 0xa, 0x12fa60, 0x0, 0x161950, ...) /Users/rsc/g/go/src/pkg/flag/flag.go:570 +0x6b fp=0x221031ed80 testing.init() /Users/rsc/g/go/src/pkg/testing/testing.go:-531 +0xbb fp=0x221031edc0 strings_test.init() /Users/rsc/g/go/src/pkg/strings/strings_test.go:1115 +0x62 fp=0x221031ef70 main.init() strings/_test/_testmain.go:90 +0x3d fp=0x221031ef78 runtime.main() /Users/rsc/g/go/src/pkg/runtime/proc.c:180 +0x8a fp=0x221031efa0 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 fp=0x221031efa8 goroutine 2 [runnable]: runtime.MHeap_Scavenger() /Users/rsc/g/go/src/pkg/runtime/mheap.c:438 runtime.goexit() /Users/rsc/g/go/src/pkg/runtime/proc.c:1269 created by runtime.main /Users/rsc/g/go/src/pkg/runtime/proc.c:166 rax 0x23ccc0 rbx 0x23ccc0 rcx 0x0 rdx 0x38 rdi 0x2102c0170 rsi 0x221032cfe0 rbp 0x221032cfa0 rsp 0x7fff5fbff5b0 r8 0x2102c0120 r9 0x221032cfa0 r10 0x221032c000 r11 0x104ce8 r12 0xe5c80 r13 0x1be82baac718 r14 0x13091135f7d69200 r15 0x0 rip 0x1b2a6 rflags 0x10246 cs 0x2b fs 0x0 gs 0x0 Fixes #5723. R=r, dvyukov, go.peter.90, dave, iant CC=golang-dev https://golang.org/cl/10360048
2013-06-27 09:32:01 -06:00
bool runtime·showframe(Func*, G*);
void runtime·printcreatedby(G*);
void runtime·ifaceE2I(InterfaceType*, Eface, Iface*);
bool runtime·ifaceE2I2(InterfaceType*, Eface, Iface*);
uintptr runtime·memlimit(void);
// float.c
extern float64 runtime·nan;
extern float64 runtime·posinf;
extern float64 runtime·neginf;
extern uint64 ·nan;
extern uint64 ·posinf;
extern uint64 ·neginf;
#define ISNAN(f) ((f) != (f))
enum
{
UseSpanType = 1,
};