mirror of
https://github.com/golang/go
synced 2024-11-13 17:20:22 -07:00
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out. On a 4-core Lenovo X201s (Linux): 31.12u 0.60s 31.74r 1 cpu, no atomics 32.27u 0.58s 32.86r 1 cpu, atomic instructions 33.04u 0.83s 27.47r 2 cpu On a 16-core Xeon (Linux): 33.08u 0.65s 33.80r 1 cpu, no atomics 34.87u 1.12s 29.60r 2 cpu 36.00u 1.87s 28.43r 3 cpu 36.46u 2.34s 27.10r 4 cpu 38.28u 3.85s 26.92r 5 cpu 37.72u 5.25s 26.73r 6 cpu 39.63u 7.11s 26.95r 7 cpu 39.67u 8.10s 26.68r 8 cpu On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5): 39.43u 1.45s 41.27r 1 cpu, no atomics 43.98u 2.95s 38.69r 2 cpu On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1): 48.81u 2.12s 51.76r 1 cpu, no atomics 57.15u 4.72s 51.54r 2 cpu The handoff algorithm is really only good for two cores. Beyond that we will need to so something more sophisticated, like have each core hand off to the next one, around a circle. Even so, the code is a good checkpoint; for now we'll limit the number of gc procs to at most 2. R=dvyukov CC=golang-dev https://golang.org/cl/4641082
This commit is contained in:
parent
b0cddb98b9
commit
d324f2143b
@ -97,7 +97,7 @@ TEXT runtime·sigtramp(SB),7,$40
|
|||||||
// save g
|
// save g
|
||||||
MOVL g(CX), DI
|
MOVL g(CX), DI
|
||||||
MOVL DI, 20(SP)
|
MOVL DI, 20(SP)
|
||||||
|
|
||||||
// g = m->gsignal
|
// g = m->gsignal
|
||||||
MOVL m(CX), BP
|
MOVL m(CX), BP
|
||||||
MOVL m_gsignal(BP), BP
|
MOVL m_gsignal(BP), BP
|
||||||
@ -111,7 +111,7 @@ TEXT runtime·sigtramp(SB),7,$40
|
|||||||
MOVL context+16(FP), BX
|
MOVL context+16(FP), BX
|
||||||
MOVL BX, 8(SP)
|
MOVL BX, 8(SP)
|
||||||
MOVL DI, 12(SP)
|
MOVL DI, 12(SP)
|
||||||
|
|
||||||
MOVL handler+0(FP), BX
|
MOVL handler+0(FP), BX
|
||||||
CALL BX
|
CALL BX
|
||||||
|
|
||||||
@ -138,6 +138,26 @@ TEXT runtime·sigaltstack(SB),7,$0
|
|||||||
CALL runtime·notok(SB)
|
CALL runtime·notok(SB)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·usleep(SB),7,$32
|
||||||
|
MOVL $0, DX
|
||||||
|
MOVL usec+0(FP), AX
|
||||||
|
MOVL $1000000, CX
|
||||||
|
DIVL CX
|
||||||
|
MOVL AX, 24(SP) // sec
|
||||||
|
MOVL DX, 28(SP) // usec
|
||||||
|
|
||||||
|
// select(0, 0, 0, 0, &tv)
|
||||||
|
MOVL $0, 0(SP) // "return PC" - ignored
|
||||||
|
MOVL $0, 4(SP)
|
||||||
|
MOVL $0, 8(SP)
|
||||||
|
MOVL $0, 12(SP)
|
||||||
|
MOVL $0, 16(SP)
|
||||||
|
LEAL 24(SP), AX
|
||||||
|
MOVL AX, 20(SP)
|
||||||
|
MOVL $93, AX
|
||||||
|
INT $0x80
|
||||||
|
RET
|
||||||
|
|
||||||
// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
|
// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
|
||||||
// System call args are: func arg stack pthread flags.
|
// System call args are: func arg stack pthread flags.
|
||||||
TEXT runtime·bsdthread_create(SB),7,$32
|
TEXT runtime·bsdthread_create(SB),7,$32
|
||||||
@ -309,3 +329,12 @@ TEXT runtime·setldt(SB),7,$32
|
|||||||
XORL AX, AX
|
XORL AX, AX
|
||||||
MOVW GS, AX
|
MOVW GS, AX
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·sysctl(SB),7,$0
|
||||||
|
MOVL $202, AX
|
||||||
|
INT $0x80
|
||||||
|
JAE 3(PC)
|
||||||
|
NEGL AX
|
||||||
|
RET
|
||||||
|
MOVL $0, AX
|
||||||
|
RET
|
||||||
|
@ -81,11 +81,11 @@ TEXT runtime·sigaction(SB),7,$0
|
|||||||
|
|
||||||
TEXT runtime·sigtramp(SB),7,$64
|
TEXT runtime·sigtramp(SB),7,$64
|
||||||
get_tls(BX)
|
get_tls(BX)
|
||||||
|
|
||||||
// save g
|
// save g
|
||||||
MOVQ g(BX), R10
|
MOVQ g(BX), R10
|
||||||
MOVQ R10, 48(SP)
|
MOVQ R10, 48(SP)
|
||||||
|
|
||||||
// g = m->gsignal
|
// g = m->gsignal
|
||||||
MOVQ m(BX), BP
|
MOVQ m(BX), BP
|
||||||
MOVQ m_gsignal(BP), BP
|
MOVQ m_gsignal(BP), BP
|
||||||
@ -146,6 +146,24 @@ TEXT runtime·sigaltstack(SB),7,$0
|
|||||||
CALL runtime·notok(SB)
|
CALL runtime·notok(SB)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·usleep(SB),7,$16
|
||||||
|
MOVL $0, DX
|
||||||
|
MOVL usec+0(FP), AX
|
||||||
|
MOVL $1000000, CX
|
||||||
|
DIVL CX
|
||||||
|
MOVQ AX, 0(SP) // sec
|
||||||
|
MOVL DX, 8(SP) // usec
|
||||||
|
|
||||||
|
// select(0, 0, 0, 0, &tv)
|
||||||
|
MOVL $0, DI
|
||||||
|
MOVL $0, SI
|
||||||
|
MOVL $0, DX
|
||||||
|
MOVL $0, R10
|
||||||
|
MOVQ SP, R8
|
||||||
|
MOVL $(0x2000000+23), AX
|
||||||
|
SYSCALL
|
||||||
|
RET
|
||||||
|
|
||||||
// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
|
// void bsdthread_create(void *stk, M *m, G *g, void (*fn)(void))
|
||||||
TEXT runtime·bsdthread_create(SB),7,$0
|
TEXT runtime·bsdthread_create(SB),7,$0
|
||||||
// Set up arguments to bsdthread_create system call.
|
// Set up arguments to bsdthread_create system call.
|
||||||
@ -189,7 +207,7 @@ TEXT runtime·bsdthread_start(SB),7,$0
|
|||||||
POPQ SI
|
POPQ SI
|
||||||
POPQ CX
|
POPQ CX
|
||||||
POPQ DX
|
POPQ DX
|
||||||
|
|
||||||
get_tls(BX)
|
get_tls(BX)
|
||||||
MOVQ CX, m(BX)
|
MOVQ CX, m(BX)
|
||||||
MOVQ SI, m_procid(CX) // thread port is m->procid
|
MOVQ SI, m_procid(CX) // thread port is m->procid
|
||||||
@ -293,3 +311,18 @@ TEXT runtime·settls(SB),7,$32
|
|||||||
MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3
|
MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3
|
||||||
SYSCALL
|
SYSCALL
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·sysctl(SB),7,$0
|
||||||
|
MOVQ 8(SP), DI
|
||||||
|
MOVL 16(SP), SI
|
||||||
|
MOVQ 24(SP), DX
|
||||||
|
MOVQ 32(SP), R10
|
||||||
|
MOVQ 40(SP), R8
|
||||||
|
MOVQ 48(SP), R9
|
||||||
|
MOVL $(0x2000000+202), AX // syscall entry
|
||||||
|
SYSCALL
|
||||||
|
JCC 3(PC)
|
||||||
|
NEGL AX
|
||||||
|
RET
|
||||||
|
MOVL $0, AX
|
||||||
|
RET
|
||||||
|
@ -18,6 +18,7 @@ uint32 runtime·mach_task_self(void);
|
|||||||
uint32 runtime·mach_task_self(void);
|
uint32 runtime·mach_task_self(void);
|
||||||
uint32 runtime·mach_thread_self(void);
|
uint32 runtime·mach_thread_self(void);
|
||||||
uint32 runtime·mach_thread_self(void);
|
uint32 runtime·mach_thread_self(void);
|
||||||
|
int32 runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
|
||||||
|
|
||||||
struct Sigaction;
|
struct Sigaction;
|
||||||
void runtime·sigaction(uintptr, struct Sigaction*, struct Sigaction*);
|
void runtime·sigaction(uintptr, struct Sigaction*, struct Sigaction*);
|
||||||
|
@ -148,6 +148,20 @@ runtime·osinit(void)
|
|||||||
if(!runtime·iscgo)
|
if(!runtime·iscgo)
|
||||||
runtime·bsdthread_register();
|
runtime·bsdthread_register();
|
||||||
runtime·destroylock = destroylock;
|
runtime·destroylock = destroylock;
|
||||||
|
|
||||||
|
// Use sysctl to fetch hw.ncpu.
|
||||||
|
uint32 mib[2];
|
||||||
|
uint32 out;
|
||||||
|
int32 ret;
|
||||||
|
uintptr nout;
|
||||||
|
|
||||||
|
mib[0] = 6;
|
||||||
|
mib[1] = 3;
|
||||||
|
nout = sizeof out;
|
||||||
|
out = 0;
|
||||||
|
ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
|
||||||
|
if(ret >= 0)
|
||||||
|
runtime·ncpu = out;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -52,6 +52,25 @@ TEXT runtime·read(SB),7,$0
|
|||||||
CALL *runtime·_vdso(SB)
|
CALL *runtime·_vdso(SB)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·usleep(SB),7,$28
|
||||||
|
MOVL $0, DX
|
||||||
|
MOVL usec+0(FP), AX
|
||||||
|
MOVL $1000000, CX
|
||||||
|
DIVL CX
|
||||||
|
MOVL AX, 20(SP)
|
||||||
|
MOVL DX, 24(SP)
|
||||||
|
|
||||||
|
// select(0, 0, 0, 0, &tv)
|
||||||
|
MOVL $0, 0(SP)
|
||||||
|
MOVL $0, 4(SP)
|
||||||
|
MOVL $0, 8(SP)
|
||||||
|
MOVL $0, 12(SP)
|
||||||
|
LEAL 20(SP), AX
|
||||||
|
MOVL AX, 16(SP)
|
||||||
|
MOVL $82, AX
|
||||||
|
SYSCALL
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT runtime·raisesigpipe(SB),7,$12
|
TEXT runtime·raisesigpipe(SB),7,$12
|
||||||
MOVL $224, AX // syscall - gettid
|
MOVL $224, AX // syscall - gettid
|
||||||
CALL *runtime·_vdso(SB)
|
CALL *runtime·_vdso(SB)
|
||||||
@ -105,16 +124,16 @@ TEXT runtime·rt_sigaction(SB),7,$0
|
|||||||
|
|
||||||
TEXT runtime·sigtramp(SB),7,$44
|
TEXT runtime·sigtramp(SB),7,$44
|
||||||
get_tls(CX)
|
get_tls(CX)
|
||||||
|
|
||||||
// save g
|
// save g
|
||||||
MOVL g(CX), DI
|
MOVL g(CX), DI
|
||||||
MOVL DI, 20(SP)
|
MOVL DI, 20(SP)
|
||||||
|
|
||||||
// g = m->gsignal
|
// g = m->gsignal
|
||||||
MOVL m(CX), BX
|
MOVL m(CX), BX
|
||||||
MOVL m_gsignal(BX), BX
|
MOVL m_gsignal(BX), BX
|
||||||
MOVL BX, g(CX)
|
MOVL BX, g(CX)
|
||||||
|
|
||||||
// copy arguments for call to sighandler
|
// copy arguments for call to sighandler
|
||||||
MOVL sig+0(FP), BX
|
MOVL sig+0(FP), BX
|
||||||
MOVL BX, 0(SP)
|
MOVL BX, 0(SP)
|
||||||
@ -125,12 +144,12 @@ TEXT runtime·sigtramp(SB),7,$44
|
|||||||
MOVL DI, 12(SP)
|
MOVL DI, 12(SP)
|
||||||
|
|
||||||
CALL runtime·sighandler(SB)
|
CALL runtime·sighandler(SB)
|
||||||
|
|
||||||
// restore g
|
// restore g
|
||||||
get_tls(CX)
|
get_tls(CX)
|
||||||
MOVL 20(SP), BX
|
MOVL 20(SP), BX
|
||||||
MOVL BX, g(CX)
|
MOVL BX, g(CX)
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT runtime·sigignore(SB),7,$0
|
TEXT runtime·sigignore(SB),7,$0
|
||||||
@ -202,7 +221,7 @@ TEXT runtime·clone(SB),7,$0
|
|||||||
MOVL $1234, 12(CX)
|
MOVL $1234, 12(CX)
|
||||||
|
|
||||||
// cannot use CALL *runtime·_vdso(SB) here, because
|
// cannot use CALL *runtime·_vdso(SB) here, because
|
||||||
// the stack changes during the system call (after
|
// the stack changes during the system call (after
|
||||||
// CALL *runtime·_vdso(SB), the child is still using
|
// CALL *runtime·_vdso(SB), the child is still using
|
||||||
// the parent's stack when executing its RET instruction).
|
// the parent's stack when executing its RET instruction).
|
||||||
INT $0x80
|
INT $0x80
|
||||||
|
@ -50,6 +50,24 @@ TEXT runtime·read(SB),7,$0-24
|
|||||||
SYSCALL
|
SYSCALL
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·usleep(SB),7,$16
|
||||||
|
MOVL $0, DX
|
||||||
|
MOVL usec+0(FP), AX
|
||||||
|
MOVL $1000000, CX
|
||||||
|
DIVL CX
|
||||||
|
MOVQ AX, 0(SP)
|
||||||
|
MOVQ DX, 8(SP)
|
||||||
|
|
||||||
|
// select(0, 0, 0, 0, &tv)
|
||||||
|
MOVL $0, DI
|
||||||
|
MOVL $0, SI
|
||||||
|
MOVL $0, DX
|
||||||
|
MOVL $0, R10
|
||||||
|
MOVQ SP, R8
|
||||||
|
MOVL $23, AX
|
||||||
|
SYSCALL
|
||||||
|
RET
|
||||||
|
|
||||||
TEXT runtime·raisesigpipe(SB),7,$12
|
TEXT runtime·raisesigpipe(SB),7,$12
|
||||||
MOVL $186, AX // syscall - gettid
|
MOVL $186, AX // syscall - gettid
|
||||||
SYSCALL
|
SYSCALL
|
||||||
@ -195,10 +213,10 @@ TEXT runtime·clone(SB),7,$0
|
|||||||
CMPQ AX, $0
|
CMPQ AX, $0
|
||||||
JEQ 2(PC)
|
JEQ 2(PC)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// In child, on new stack.
|
// In child, on new stack.
|
||||||
MOVQ SI, SP
|
MOVQ SI, SP
|
||||||
|
|
||||||
// Initialize m->procid to Linux tid
|
// Initialize m->procid to Linux tid
|
||||||
MOVL $186, AX // gettid
|
MOVL $186, AX // gettid
|
||||||
SYSCALL
|
SYSCALL
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
#define SYS_gettid (SYS_BASE + 224)
|
#define SYS_gettid (SYS_BASE + 224)
|
||||||
#define SYS_tkill (SYS_BASE + 238)
|
#define SYS_tkill (SYS_BASE + 238)
|
||||||
#define SYS_sched_yield (SYS_BASE + 158)
|
#define SYS_sched_yield (SYS_BASE + 158)
|
||||||
|
#define SYS_select (SYS_BASE + 82)
|
||||||
|
|
||||||
#define ARM_BASE (SYS_BASE + 0x0f0000)
|
#define ARM_BASE (SYS_BASE + 0x0f0000)
|
||||||
#define SYS_ARM_cacheflush (ARM_BASE + 2)
|
#define SYS_ARM_cacheflush (ARM_BASE + 2)
|
||||||
@ -254,7 +255,7 @@ TEXT runtime·sigtramp(SB),7,$24
|
|||||||
// save g
|
// save g
|
||||||
MOVW g, R3
|
MOVW g, R3
|
||||||
MOVW g, 20(R13)
|
MOVW g, 20(R13)
|
||||||
|
|
||||||
// g = m->gsignal
|
// g = m->gsignal
|
||||||
MOVW m_gsignal(m), g
|
MOVW m_gsignal(m), g
|
||||||
|
|
||||||
@ -265,7 +266,7 @@ TEXT runtime·sigtramp(SB),7,$24
|
|||||||
MOVW R3, 16(R13)
|
MOVW R3, 16(R13)
|
||||||
|
|
||||||
BL runtime·sighandler(SB)
|
BL runtime·sighandler(SB)
|
||||||
|
|
||||||
// restore g
|
// restore g
|
||||||
MOVW 20(R13), g
|
MOVW 20(R13), g
|
||||||
|
|
||||||
@ -285,6 +286,23 @@ TEXT runtime·sigreturn(SB),7,$0
|
|||||||
SWI $0
|
SWI $0
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT runtime·usleep(SB),7,$12
|
||||||
|
MOVW usec+0(FP), R0
|
||||||
|
MOVW R0, R1
|
||||||
|
MOVW $1000000, R2
|
||||||
|
DIV R1, R0
|
||||||
|
MOD R2, R0
|
||||||
|
MOVW R1, 4(SP)
|
||||||
|
MOVW R2, 8(SP)
|
||||||
|
MOVW $0, R0
|
||||||
|
MOVW $0, R1
|
||||||
|
MOVW $0, R2
|
||||||
|
MOVW $0, R3
|
||||||
|
MOVW $4(SP), R4
|
||||||
|
MOVW $SYS_select, R7
|
||||||
|
SWI $0
|
||||||
|
RET
|
||||||
|
|
||||||
// Use kernel version instead of native armcas in ../../arm.s.
|
// Use kernel version instead of native armcas in ../../arm.s.
|
||||||
// See ../../../sync/atomic/asm_linux_arm.s for details.
|
// See ../../../sync/atomic/asm_linux_arm.s for details.
|
||||||
TEXT cas<>(SB),7,$0
|
TEXT cas<>(SB),7,$0
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include "stack.h"
|
#include "stack.h"
|
||||||
|
|
||||||
extern SigTab runtime·sigtab[];
|
extern SigTab runtime·sigtab[];
|
||||||
static int32 proccount;
|
|
||||||
|
|
||||||
int32 runtime·open(uint8*, int32, int32);
|
int32 runtime·open(uint8*, int32, int32);
|
||||||
int32 runtime·close(int32);
|
int32 runtime·close(int32);
|
||||||
@ -136,13 +135,10 @@ futexlock(Lock *l)
|
|||||||
// its wakeup call.
|
// its wakeup call.
|
||||||
wait = v;
|
wait = v;
|
||||||
|
|
||||||
if(proccount == 0)
|
|
||||||
proccount = getproccount();
|
|
||||||
|
|
||||||
// On uniprocessor's, no point spinning.
|
// On uniprocessor's, no point spinning.
|
||||||
// On multiprocessors, spin for ACTIVE_SPIN attempts.
|
// On multiprocessors, spin for ACTIVE_SPIN attempts.
|
||||||
spin = 0;
|
spin = 0;
|
||||||
if(proccount > 1)
|
if(runtime·ncpu > 1)
|
||||||
spin = ACTIVE_SPIN;
|
spin = ACTIVE_SPIN;
|
||||||
|
|
||||||
for(;;) {
|
for(;;) {
|
||||||
@ -276,6 +272,7 @@ runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void))
|
|||||||
void
|
void
|
||||||
runtime·osinit(void)
|
runtime·osinit(void)
|
||||||
{
|
{
|
||||||
|
runtime·ncpu = getproccount();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -120,6 +120,13 @@ enum
|
|||||||
#else
|
#else
|
||||||
MHeapMap_Bits = 20,
|
MHeapMap_Bits = 20,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Max number of threads to run garbage collection.
|
||||||
|
// 2, 3, and 4 are all plausible maximums depending
|
||||||
|
// on the hardware details of the machine. The second
|
||||||
|
// proc is the one that helps the most (after the first),
|
||||||
|
// so start with just 2 for now.
|
||||||
|
MaxGcproc = 2,
|
||||||
};
|
};
|
||||||
|
|
||||||
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
|
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
|
||||||
@ -192,7 +199,7 @@ struct MStats
|
|||||||
uint64 nlookup; // number of pointer lookups
|
uint64 nlookup; // number of pointer lookups
|
||||||
uint64 nmalloc; // number of mallocs
|
uint64 nmalloc; // number of mallocs
|
||||||
uint64 nfree; // number of frees
|
uint64 nfree; // number of frees
|
||||||
|
|
||||||
// Statistics about malloc heap.
|
// Statistics about malloc heap.
|
||||||
// protected by mheap.Lock
|
// protected by mheap.Lock
|
||||||
uint64 heap_alloc; // bytes allocated and still in use
|
uint64 heap_alloc; // bytes allocated and still in use
|
||||||
@ -210,7 +217,7 @@ struct MStats
|
|||||||
uint64 mcache_inuse; // MCache structures
|
uint64 mcache_inuse; // MCache structures
|
||||||
uint64 mcache_sys;
|
uint64 mcache_sys;
|
||||||
uint64 buckhash_sys; // profiling bucket hash table
|
uint64 buckhash_sys; // profiling bucket hash table
|
||||||
|
|
||||||
// Statistics about garbage collector.
|
// Statistics about garbage collector.
|
||||||
// Protected by stopping the world during GC.
|
// Protected by stopping the world during GC.
|
||||||
uint64 next_gc; // next GC (in heap_alloc time)
|
uint64 next_gc; // next GC (in heap_alloc time)
|
||||||
@ -219,7 +226,7 @@ struct MStats
|
|||||||
uint32 numgc;
|
uint32 numgc;
|
||||||
bool enablegc;
|
bool enablegc;
|
||||||
bool debuggc;
|
bool debuggc;
|
||||||
|
|
||||||
// Statistics about allocation size classes.
|
// Statistics about allocation size classes.
|
||||||
struct {
|
struct {
|
||||||
uint32 size;
|
uint32 size;
|
||||||
@ -240,7 +247,7 @@ extern MStats mstats;
|
|||||||
//
|
//
|
||||||
// class_to_size[i] = largest size in class i
|
// class_to_size[i] = largest size in class i
|
||||||
// class_to_allocnpages[i] = number of pages to allocate when
|
// class_to_allocnpages[i] = number of pages to allocate when
|
||||||
// making new objects in class i
|
// making new objects in class i
|
||||||
// class_to_transfercount[i] = number of objects to move when
|
// class_to_transfercount[i] = number of objects to move when
|
||||||
// taking a bunch of objects out of the central lists
|
// taking a bunch of objects out of the central lists
|
||||||
// and putting them in the thread free list.
|
// and putting them in the thread free list.
|
||||||
@ -279,7 +286,7 @@ struct MCache
|
|||||||
int64 nmalloc;
|
int64 nmalloc;
|
||||||
int64 nfree;
|
int64 nfree;
|
||||||
} local_by_size[NumSizeClasses];
|
} local_by_size[NumSizeClasses];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
|
void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
|
||||||
@ -352,7 +359,7 @@ struct MHeap
|
|||||||
byte *arena_start;
|
byte *arena_start;
|
||||||
byte *arena_used;
|
byte *arena_used;
|
||||||
byte *arena_end;
|
byte *arena_end;
|
||||||
|
|
||||||
// central free lists for small size classes.
|
// central free lists for small size classes.
|
||||||
// the union makes sure that the MCentrals are
|
// the union makes sure that the MCentrals are
|
||||||
// spaced 64 bytes apart, so that each MCentral.Lock
|
// spaced 64 bytes apart, so that each MCentral.Lock
|
||||||
@ -400,6 +407,8 @@ enum
|
|||||||
|
|
||||||
void runtime·MProf_Malloc(void*, uintptr);
|
void runtime·MProf_Malloc(void*, uintptr);
|
||||||
void runtime·MProf_Free(void*, uintptr);
|
void runtime·MProf_Free(void*, uintptr);
|
||||||
|
int32 runtime·helpgc(void);
|
||||||
|
void runtime·gchelper(void);
|
||||||
|
|
||||||
// Malloc profiling settings.
|
// Malloc profiling settings.
|
||||||
// Must match definition in extern.go.
|
// Must match definition in extern.go.
|
||||||
|
@ -10,9 +10,9 @@
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
Debug = 0,
|
Debug = 0,
|
||||||
UseCas = 1,
|
|
||||||
PtrSize = sizeof(void*),
|
PtrSize = sizeof(void*),
|
||||||
|
DebugMark = 0, // run second pass to check mark
|
||||||
|
|
||||||
// Four bits per word (see #defines below).
|
// Four bits per word (see #defines below).
|
||||||
wordsPerBitmapWord = sizeof(void*)*8/4,
|
wordsPerBitmapWord = sizeof(void*)*8/4,
|
||||||
bitShift = sizeof(void*)*8/4,
|
bitShift = sizeof(void*)*8/4,
|
||||||
@ -51,17 +51,20 @@ enum {
|
|||||||
|
|
||||||
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
|
#define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
|
||||||
|
|
||||||
|
// TODO: Make these per-M.
|
||||||
static uint64 nlookup;
|
static uint64 nlookup;
|
||||||
static uint64 nsizelookup;
|
static uint64 nsizelookup;
|
||||||
static uint64 naddrlookup;
|
static uint64 naddrlookup;
|
||||||
|
static uint64 nhandoff;
|
||||||
|
|
||||||
static int32 gctrace;
|
static int32 gctrace;
|
||||||
|
|
||||||
typedef struct Workbuf Workbuf;
|
typedef struct Workbuf Workbuf;
|
||||||
struct Workbuf
|
struct Workbuf
|
||||||
{
|
{
|
||||||
Workbuf *next;
|
Workbuf *next;
|
||||||
uintptr nw;
|
uintptr nobj;
|
||||||
byte *w[2048-2];
|
byte *obj[512-2];
|
||||||
};
|
};
|
||||||
|
|
||||||
extern byte data[];
|
extern byte data[];
|
||||||
@ -75,6 +78,26 @@ static int32 fingwait;
|
|||||||
static void runfinq(void);
|
static void runfinq(void);
|
||||||
static Workbuf* getempty(Workbuf*);
|
static Workbuf* getempty(Workbuf*);
|
||||||
static Workbuf* getfull(Workbuf*);
|
static Workbuf* getfull(Workbuf*);
|
||||||
|
static void putempty(Workbuf*);
|
||||||
|
static Workbuf* handoff(Workbuf*);
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
Lock fmu;
|
||||||
|
Workbuf *full;
|
||||||
|
Lock emu;
|
||||||
|
Workbuf *empty;
|
||||||
|
uint32 nproc;
|
||||||
|
volatile uint32 nwait;
|
||||||
|
volatile uint32 ndone;
|
||||||
|
Note alldone;
|
||||||
|
Lock markgate;
|
||||||
|
Lock sweepgate;
|
||||||
|
MSpan *spans;
|
||||||
|
|
||||||
|
Lock;
|
||||||
|
byte *chunk;
|
||||||
|
uintptr nchunk;
|
||||||
|
} work;
|
||||||
|
|
||||||
// scanblock scans a block of n bytes starting at pointer b for references
|
// scanblock scans a block of n bytes starting at pointer b for references
|
||||||
// to other objects, scanning any it finds recursively until there are no
|
// to other objects, scanning any it finds recursively until there are no
|
||||||
@ -85,13 +108,14 @@ static Workbuf* getfull(Workbuf*);
|
|||||||
static void
|
static void
|
||||||
scanblock(byte *b, int64 n)
|
scanblock(byte *b, int64 n)
|
||||||
{
|
{
|
||||||
byte *obj, *arena_start, *p;
|
byte *obj, *arena_start, *arena_used, *p;
|
||||||
void **vp;
|
void **vp;
|
||||||
uintptr size, *bitp, bits, shift, i, j, x, xbits, off;
|
uintptr size, *bitp, bits, shift, i, j, x, xbits, off, nobj, nproc;
|
||||||
MSpan *s;
|
MSpan *s;
|
||||||
PageID k;
|
PageID k;
|
||||||
void **bw, **w, **ew;
|
void **wp;
|
||||||
Workbuf *wbuf;
|
Workbuf *wbuf;
|
||||||
|
bool keepworking;
|
||||||
|
|
||||||
if((int64)(uintptr)n != n || n < 0) {
|
if((int64)(uintptr)n != n || n < 0) {
|
||||||
runtime·printf("scanblock %p %D\n", b, n);
|
runtime·printf("scanblock %p %D\n", b, n);
|
||||||
@ -100,11 +124,19 @@ scanblock(byte *b, int64 n)
|
|||||||
|
|
||||||
// Memory arena parameters.
|
// Memory arena parameters.
|
||||||
arena_start = runtime·mheap.arena_start;
|
arena_start = runtime·mheap.arena_start;
|
||||||
|
arena_used = runtime·mheap.arena_used;
|
||||||
|
nproc = work.nproc;
|
||||||
|
|
||||||
wbuf = nil; // current work buffer
|
wbuf = nil; // current work buffer
|
||||||
ew = nil; // end of work buffer
|
wp = nil; // storage for next queued pointer (write pointer)
|
||||||
bw = nil; // beginning of work buffer
|
nobj = 0; // number of queued objects
|
||||||
w = nil; // current pointer into work buffer
|
|
||||||
|
// Scanblock helpers pass b==nil.
|
||||||
|
// The main proc needs to return to make more
|
||||||
|
// calls to scanblock. But if work.nproc==1 then
|
||||||
|
// might as well process blocks as soon as we
|
||||||
|
// have them.
|
||||||
|
keepworking = b == nil || work.nproc == 1;
|
||||||
|
|
||||||
// Align b to a word boundary.
|
// Align b to a word boundary.
|
||||||
off = (uintptr)b & (PtrSize-1);
|
off = (uintptr)b & (PtrSize-1);
|
||||||
@ -120,17 +152,17 @@ scanblock(byte *b, int64 n)
|
|||||||
runtime·printf("scanblock %p %D\n", b, n);
|
runtime·printf("scanblock %p %D\n", b, n);
|
||||||
|
|
||||||
vp = (void**)b;
|
vp = (void**)b;
|
||||||
n /= PtrSize;
|
n >>= (2+PtrSize/8); /* n /= PtrSize (4 or 8) */
|
||||||
for(i=0; i<n; i++) {
|
for(i=0; i<n; i++) {
|
||||||
obj = (byte*)vp[i];
|
obj = (byte*)vp[i];
|
||||||
|
|
||||||
// Words outside the arena cannot be pointers.
|
// Words outside the arena cannot be pointers.
|
||||||
if((byte*)obj < arena_start || (byte*)obj >= runtime·mheap.arena_used)
|
if((byte*)obj < arena_start || (byte*)obj >= arena_used)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// obj may be a pointer to a live object.
|
// obj may be a pointer to a live object.
|
||||||
// Try to find the beginning of the object.
|
// Try to find the beginning of the object.
|
||||||
|
|
||||||
// Round down to word boundary.
|
// Round down to word boundary.
|
||||||
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
|
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
|
||||||
|
|
||||||
@ -188,47 +220,72 @@ scanblock(byte *b, int64 n)
|
|||||||
found:
|
found:
|
||||||
// Now we have bits, bitp, and shift correct for
|
// Now we have bits, bitp, and shift correct for
|
||||||
// obj pointing at the base of the object.
|
// obj pointing at the base of the object.
|
||||||
// If not allocated or already marked, done.
|
// Only care about allocated and not marked.
|
||||||
if((bits & bitAllocated) == 0 || (bits & bitMarked) != 0)
|
if((bits & (bitAllocated|bitMarked)) != bitAllocated)
|
||||||
continue;
|
continue;
|
||||||
*bitp |= bitMarked<<shift;
|
if(nproc == 1)
|
||||||
|
*bitp |= bitMarked<<shift;
|
||||||
|
else {
|
||||||
|
for(;;) {
|
||||||
|
x = *bitp;
|
||||||
|
if(x & (bitMarked<<shift))
|
||||||
|
goto continue_obj;
|
||||||
|
if(runtime·casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If object has no pointers, don't need to scan further.
|
// If object has no pointers, don't need to scan further.
|
||||||
if((bits & bitNoPointers) != 0)
|
if((bits & bitNoPointers) != 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// If buffer is full, get a new one.
|
// If another proc wants a pointer, give it some.
|
||||||
if(w >= ew) {
|
if(nobj > 4 && work.nwait > 0 && work.full == nil) {
|
||||||
wbuf = getempty(wbuf);
|
wbuf->nobj = nobj;
|
||||||
bw = wbuf->w;
|
wbuf = handoff(wbuf);
|
||||||
w = bw;
|
nobj = wbuf->nobj;
|
||||||
ew = bw + nelem(wbuf->w);
|
wp = wbuf->obj + nobj;
|
||||||
}
|
}
|
||||||
*w++ = obj;
|
|
||||||
|
// If buffer is full, get a new one.
|
||||||
|
if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
|
||||||
|
if(wbuf != nil)
|
||||||
|
wbuf->nobj = nobj;
|
||||||
|
wbuf = getempty(wbuf);
|
||||||
|
wp = wbuf->obj;
|
||||||
|
nobj = 0;
|
||||||
|
}
|
||||||
|
*wp++ = obj;
|
||||||
|
nobj++;
|
||||||
|
continue_obj:;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Done scanning [b, b+n). Prepare for the next iteration of
|
// Done scanning [b, b+n). Prepare for the next iteration of
|
||||||
// the loop by setting b and n to the parameters for the next block.
|
// the loop by setting b and n to the parameters for the next block.
|
||||||
|
|
||||||
// Fetch b from the work buffers.
|
// Fetch b from the work buffer.
|
||||||
if(w <= bw) {
|
if(nobj == 0) {
|
||||||
|
if(!keepworking) {
|
||||||
|
putempty(wbuf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Emptied our buffer: refill.
|
// Emptied our buffer: refill.
|
||||||
wbuf = getfull(wbuf);
|
wbuf = getfull(wbuf);
|
||||||
if(wbuf == nil)
|
if(wbuf == nil)
|
||||||
break;
|
return;
|
||||||
bw = wbuf->w;
|
nobj = wbuf->nobj;
|
||||||
ew = wbuf->w + nelem(wbuf->w);
|
wp = wbuf->obj + wbuf->nobj;
|
||||||
w = bw+wbuf->nw;
|
|
||||||
}
|
}
|
||||||
b = *--w;
|
b = *--wp;
|
||||||
|
nobj--;
|
||||||
|
|
||||||
// Figure out n = size of b. Start by loading bits for b.
|
// Figure out n = size of b. Start by loading bits for b.
|
||||||
off = (uintptr*)b - (uintptr*)arena_start;
|
off = (uintptr*)b - (uintptr*)arena_start;
|
||||||
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||||
shift = off % wordsPerBitmapWord;
|
shift = off % wordsPerBitmapWord;
|
||||||
xbits = *bitp;
|
xbits = *bitp;
|
||||||
bits = xbits >> shift;
|
bits = xbits >> shift;
|
||||||
|
|
||||||
// Might be small; look for nearby block boundary.
|
// Might be small; look for nearby block boundary.
|
||||||
// A block boundary is marked by either bitBlockBoundary
|
// A block boundary is marked by either bitBlockBoundary
|
||||||
// or bitAllocated being set (see notes near their definition).
|
// or bitAllocated being set (see notes near their definition).
|
||||||
@ -247,12 +304,12 @@ scanblock(byte *b, int64 n)
|
|||||||
// apply a mask to keep only the bits corresponding
|
// apply a mask to keep only the bits corresponding
|
||||||
// to shift+j < bitShift aka j < bitShift-shift.
|
// to shift+j < bitShift aka j < bitShift-shift.
|
||||||
bits &= (boundary<<(bitShift-shift)) - boundary;
|
bits &= (boundary<<(bitShift-shift)) - boundary;
|
||||||
|
|
||||||
// A block boundary j words before b is indicated by
|
// A block boundary j words before b is indicated by
|
||||||
// xbits>>(shift-j) & boundary
|
// xbits>>(shift-j) & boundary
|
||||||
// (assuming shift >= j). There is no cleverness here
|
// (assuming shift >= j). There is no cleverness here
|
||||||
// avoid the test, because when j gets too large the shift
|
// avoid the test, because when j gets too large the shift
|
||||||
// turns negative, which is undefined in C.
|
// turns negative, which is undefined in C.
|
||||||
|
|
||||||
for(j=1; j<bitShift; j++) {
|
for(j=1; j<bitShift; j++) {
|
||||||
if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) {
|
if(((bits>>j)&boundary) != 0 || shift>=j && ((xbits>>(shift-j))&boundary) != 0) {
|
||||||
@ -260,7 +317,7 @@ scanblock(byte *b, int64 n)
|
|||||||
goto scan;
|
goto scan;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fall back to asking span about size class.
|
// Fall back to asking span about size class.
|
||||||
// (Manually inlined copy of MHeap_Lookup.)
|
// (Manually inlined copy of MHeap_Lookup.)
|
||||||
nlookup++;
|
nlookup++;
|
||||||
@ -277,29 +334,123 @@ scanblock(byte *b, int64 n)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct {
|
// debug_scanblock is the debug copy of scanblock.
|
||||||
Workbuf *full;
|
// it is simpler, slower, single-threaded, recursive,
|
||||||
Workbuf *empty;
|
// and uses bitSpecial as the mark bit.
|
||||||
byte *chunk;
|
static void
|
||||||
uintptr nchunk;
|
debug_scanblock(byte *b, int64 n)
|
||||||
} work;
|
{
|
||||||
|
byte *obj, *p;
|
||||||
|
void **vp;
|
||||||
|
uintptr size, *bitp, bits, shift, i, xbits, off;
|
||||||
|
MSpan *s;
|
||||||
|
|
||||||
|
if(!DebugMark)
|
||||||
|
runtime·throw("debug_scanblock without DebugMark");
|
||||||
|
|
||||||
|
if((int64)(uintptr)n != n || n < 0) {
|
||||||
|
runtime·printf("debug_scanblock %p %D\n", b, n);
|
||||||
|
runtime·throw("debug_scanblock");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Align b to a word boundary.
|
||||||
|
off = (uintptr)b & (PtrSize-1);
|
||||||
|
if(off != 0) {
|
||||||
|
b += PtrSize - off;
|
||||||
|
n -= PtrSize - off;
|
||||||
|
}
|
||||||
|
|
||||||
|
vp = (void**)b;
|
||||||
|
n /= PtrSize;
|
||||||
|
for(i=0; i<n; i++) {
|
||||||
|
obj = (byte*)vp[i];
|
||||||
|
|
||||||
|
// Words outside the arena cannot be pointers.
|
||||||
|
if((byte*)obj < runtime·mheap.arena_start || (byte*)obj >= runtime·mheap.arena_used)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Round down to word boundary.
|
||||||
|
obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
|
||||||
|
|
||||||
|
// Consult span table to find beginning.
|
||||||
|
s = runtime·MHeap_LookupMaybe(&runtime·mheap, obj);
|
||||||
|
if(s == nil)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
|
||||||
|
p = (byte*)((uintptr)s->start<<PageShift);
|
||||||
|
if(s->sizeclass == 0) {
|
||||||
|
obj = p;
|
||||||
|
size = (uintptr)s->npages<<PageShift;
|
||||||
|
} else {
|
||||||
|
if((byte*)obj >= (byte*)s->limit)
|
||||||
|
continue;
|
||||||
|
size = runtime·class_to_size[s->sizeclass];
|
||||||
|
int32 i = ((byte*)obj - p)/size;
|
||||||
|
obj = p+i*size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we know the object header, reload bits.
|
||||||
|
off = (uintptr*)obj - (uintptr*)runtime·mheap.arena_start;
|
||||||
|
bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||||
|
shift = off % wordsPerBitmapWord;
|
||||||
|
xbits = *bitp;
|
||||||
|
bits = xbits >> shift;
|
||||||
|
|
||||||
|
// Now we have bits, bitp, and shift correct for
|
||||||
|
// obj pointing at the base of the object.
|
||||||
|
// If not allocated or already marked, done.
|
||||||
|
if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
|
||||||
|
continue;
|
||||||
|
*bitp |= bitSpecial<<shift;
|
||||||
|
if(!(bits & bitMarked))
|
||||||
|
runtime·printf("found unmarked block %p in %p\n", obj, vp+i);
|
||||||
|
|
||||||
|
// If object has no pointers, don't need to scan further.
|
||||||
|
if((bits & bitNoPointers) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
debug_scanblock(obj, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get an empty work buffer off the work.empty list,
|
// Get an empty work buffer off the work.empty list,
|
||||||
// allocating new buffers as needed.
|
// allocating new buffers as needed.
|
||||||
static Workbuf*
|
static Workbuf*
|
||||||
getempty(Workbuf *b)
|
getempty(Workbuf *b)
|
||||||
{
|
{
|
||||||
if(b != nil) {
|
if(work.nproc == 1) {
|
||||||
b->nw = nelem(b->w);
|
// Put b on full list.
|
||||||
b->next = work.full;
|
if(b != nil) {
|
||||||
work.full = b;
|
b->next = work.full;
|
||||||
|
work.full = b;
|
||||||
|
}
|
||||||
|
// Grab from empty list if possible.
|
||||||
|
b = work.empty;
|
||||||
|
if(b != nil) {
|
||||||
|
work.empty = b->next;
|
||||||
|
goto haveb;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Put b on full list.
|
||||||
|
if(b != nil) {
|
||||||
|
runtime·lock(&work.fmu);
|
||||||
|
b->next = work.full;
|
||||||
|
work.full = b;
|
||||||
|
runtime·unlock(&work.fmu);
|
||||||
|
}
|
||||||
|
// Grab from empty list if possible.
|
||||||
|
runtime·lock(&work.emu);
|
||||||
|
b = work.empty;
|
||||||
|
if(b != nil)
|
||||||
|
work.empty = b->next;
|
||||||
|
runtime·unlock(&work.emu);
|
||||||
|
if(b != nil)
|
||||||
|
goto haveb;
|
||||||
}
|
}
|
||||||
b = work.empty;
|
|
||||||
if(b != nil) {
|
// Need to allocate.
|
||||||
work.empty = b->next;
|
runtime·lock(&work);
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(work.nchunk < sizeof *b) {
|
if(work.nchunk < sizeof *b) {
|
||||||
work.nchunk = 1<<20;
|
work.nchunk = 1<<20;
|
||||||
work.chunk = runtime·SysAlloc(work.nchunk);
|
work.chunk = runtime·SysAlloc(work.nchunk);
|
||||||
@ -307,27 +458,122 @@ getempty(Workbuf *b)
|
|||||||
b = (Workbuf*)work.chunk;
|
b = (Workbuf*)work.chunk;
|
||||||
work.chunk += sizeof *b;
|
work.chunk += sizeof *b;
|
||||||
work.nchunk -= sizeof *b;
|
work.nchunk -= sizeof *b;
|
||||||
|
runtime·unlock(&work);
|
||||||
|
|
||||||
|
haveb:
|
||||||
|
b->nobj = 0;
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
putempty(Workbuf *b)
|
||||||
|
{
|
||||||
|
if(b == nil)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(work.nproc == 1) {
|
||||||
|
b->next = work.empty;
|
||||||
|
work.empty = b;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime·lock(&work.emu);
|
||||||
|
b->next = work.empty;
|
||||||
|
work.empty = b->next;
|
||||||
|
runtime·unlock(&work.emu);
|
||||||
|
}
|
||||||
|
|
||||||
// Get a full work buffer off the work.full list, or return nil.
|
// Get a full work buffer off the work.full list, or return nil.
|
||||||
static Workbuf*
|
static Workbuf*
|
||||||
getfull(Workbuf *b)
|
getfull(Workbuf *b)
|
||||||
{
|
{
|
||||||
if(b != nil) {
|
int32 i;
|
||||||
b->nw = 0;
|
Workbuf *b1;
|
||||||
b->next = work.empty;
|
|
||||||
work.empty = b;
|
if(work.nproc == 1) {
|
||||||
|
// Put b on empty list.
|
||||||
|
if(b != nil) {
|
||||||
|
b->next = work.empty;
|
||||||
|
work.empty = b;
|
||||||
|
}
|
||||||
|
// Grab from full list if possible.
|
||||||
|
// Since work.nproc==1, no one else is
|
||||||
|
// going to give us work.
|
||||||
|
b = work.full;
|
||||||
|
if(b != nil)
|
||||||
|
work.full = b->next;
|
||||||
|
return b;
|
||||||
}
|
}
|
||||||
b = work.full;
|
|
||||||
if(b != nil)
|
putempty(b);
|
||||||
work.full = b->next;
|
|
||||||
return b;
|
// Grab buffer from full list if possible.
|
||||||
|
for(;;) {
|
||||||
|
b1 = work.full;
|
||||||
|
if(b1 == nil)
|
||||||
|
break;
|
||||||
|
runtime·lock(&work.fmu);
|
||||||
|
if(work.full != nil) {
|
||||||
|
b1 = work.full;
|
||||||
|
work.full = b1->next;
|
||||||
|
runtime·unlock(&work.fmu);
|
||||||
|
return b1;
|
||||||
|
}
|
||||||
|
runtime·unlock(&work.fmu);
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime·xadd(&work.nwait, +1);
|
||||||
|
for(i=0;; i++) {
|
||||||
|
b1 = work.full;
|
||||||
|
if(b1 != nil) {
|
||||||
|
runtime·lock(&work.fmu);
|
||||||
|
if(work.full != nil) {
|
||||||
|
runtime·xadd(&work.nwait, -1);
|
||||||
|
b1 = work.full;
|
||||||
|
work.full = b1->next;
|
||||||
|
runtime·unlock(&work.fmu);
|
||||||
|
return b1;
|
||||||
|
}
|
||||||
|
runtime·unlock(&work.fmu);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(work.nwait == work.nproc)
|
||||||
|
return nil;
|
||||||
|
if(i < 10)
|
||||||
|
runtime·procyield(20);
|
||||||
|
else if(i < 20)
|
||||||
|
runtime·osyield();
|
||||||
|
else
|
||||||
|
runtime·usleep(100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static Workbuf*
|
||||||
|
handoff(Workbuf *b)
|
||||||
|
{
|
||||||
|
int32 n;
|
||||||
|
Workbuf *b1;
|
||||||
|
|
||||||
|
// Make new buffer with half of b's pointers.
|
||||||
|
b1 = getempty(nil);
|
||||||
|
n = b->nobj/2;
|
||||||
|
b->nobj -= n;
|
||||||
|
b1->nobj = n;
|
||||||
|
runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
|
||||||
|
nhandoff += n;
|
||||||
|
|
||||||
|
// Put b on full list - let first half of b get stolen.
|
||||||
|
runtime·lock(&work.fmu);
|
||||||
|
b->next = work.full;
|
||||||
|
work.full = b;
|
||||||
|
runtime·unlock(&work.fmu);
|
||||||
|
|
||||||
|
return b1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scanstack calls scanblock on each of gp's stack segments.
|
// Scanstack calls scanblock on each of gp's stack segments.
|
||||||
static void
|
static void
|
||||||
scanstack(G *gp)
|
scanstack(void (*scanblock)(byte*, int64), G *gp)
|
||||||
{
|
{
|
||||||
int32 n;
|
int32 n;
|
||||||
Stktop *stk;
|
Stktop *stk;
|
||||||
@ -339,6 +585,9 @@ scanstack(G *gp)
|
|||||||
if(gp == g) {
|
if(gp == g) {
|
||||||
// Scanning our own stack: start at &gp.
|
// Scanning our own stack: start at &gp.
|
||||||
sp = (byte*)&gp;
|
sp = (byte*)&gp;
|
||||||
|
} else if(gp->m != nil && gp->m->helpgc) {
|
||||||
|
// Gc helper scans its own stack.
|
||||||
|
return;
|
||||||
} else {
|
} else {
|
||||||
// Scanning another goroutine's stack.
|
// Scanning another goroutine's stack.
|
||||||
// The goroutine is usually asleep (the world is stopped).
|
// The goroutine is usually asleep (the world is stopped).
|
||||||
@ -387,17 +636,27 @@ markfin(void *v)
|
|||||||
scanblock(v, size);
|
scanblock(v, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark
|
|
||||||
static void
|
static void
|
||||||
mark(void)
|
debug_markfin(void *v)
|
||||||
|
{
|
||||||
|
uintptr size;
|
||||||
|
|
||||||
|
if(!runtime·mlookup(v, &v, &size, nil))
|
||||||
|
runtime·throw("debug_mark - finalizer inconsistency");
|
||||||
|
debug_scanblock(v, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark
|
||||||
|
static void
|
||||||
|
mark(void (*scan)(byte*, int64))
|
||||||
{
|
{
|
||||||
G *gp;
|
G *gp;
|
||||||
|
|
||||||
// mark data+bss.
|
// mark data+bss.
|
||||||
// skip runtime·mheap itself, which has no interesting pointers
|
// skip runtime·mheap itself, which has no interesting pointers
|
||||||
// and is mostly zeroed and would not otherwise be paged in.
|
// and is mostly zeroed and would not otherwise be paged in.
|
||||||
scanblock(data, (byte*)&runtime·mheap - data);
|
scan(data, (byte*)&runtime·mheap - data);
|
||||||
scanblock((byte*)(&runtime·mheap+1), end - (byte*)(&runtime·mheap+1));
|
scan((byte*)(&runtime·mheap+1), end - (byte*)(&runtime·mheap+1));
|
||||||
|
|
||||||
// mark stacks
|
// mark stacks
|
||||||
for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
|
for(gp=runtime·allg; gp!=nil; gp=gp->alllink) {
|
||||||
@ -410,18 +669,24 @@ mark(void)
|
|||||||
case Grunning:
|
case Grunning:
|
||||||
if(gp != g)
|
if(gp != g)
|
||||||
runtime·throw("mark - world not stopped");
|
runtime·throw("mark - world not stopped");
|
||||||
scanstack(gp);
|
scanstack(scan, gp);
|
||||||
break;
|
break;
|
||||||
case Grunnable:
|
case Grunnable:
|
||||||
case Gsyscall:
|
case Gsyscall:
|
||||||
case Gwaiting:
|
case Gwaiting:
|
||||||
scanstack(gp);
|
scanstack(scan, gp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// mark things pointed at by objects with finalizers
|
// mark things pointed at by objects with finalizers
|
||||||
runtime·walkfintab(markfin);
|
if(scan == debug_scanblock)
|
||||||
|
runtime·walkfintab(debug_markfin);
|
||||||
|
else
|
||||||
|
runtime·walkfintab(markfin);
|
||||||
|
|
||||||
|
// in multiproc mode, join in the queued work.
|
||||||
|
scan(nil, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sweep frees or calls finalizers for blocks not marked in the mark phase.
|
// Sweep frees or calls finalizers for blocks not marked in the mark phase.
|
||||||
@ -435,8 +700,17 @@ sweep(void)
|
|||||||
byte *p;
|
byte *p;
|
||||||
MCache *c;
|
MCache *c;
|
||||||
Finalizer *f;
|
Finalizer *f;
|
||||||
|
byte *arena_start;
|
||||||
|
|
||||||
|
arena_start = runtime·mheap.arena_start;
|
||||||
|
|
||||||
|
for(;;) {
|
||||||
|
s = work.spans;
|
||||||
|
if(s == nil)
|
||||||
|
break;
|
||||||
|
if(!runtime·casp(&work.spans, s, s->allnext))
|
||||||
|
continue;
|
||||||
|
|
||||||
for(s = runtime·mheap.allspans; s != nil; s = s->allnext) {
|
|
||||||
if(s->state != MSpanInUse)
|
if(s->state != MSpanInUse)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -451,13 +725,15 @@ sweep(void)
|
|||||||
npages = runtime·class_to_allocnpages[cl];
|
npages = runtime·class_to_allocnpages[cl];
|
||||||
n = (npages << PageShift) / size;
|
n = (npages << PageShift) / size;
|
||||||
}
|
}
|
||||||
|
|
||||||
// sweep through n objects of given size starting at p.
|
// Sweep through n objects of given size starting at p.
|
||||||
|
// This thread owns the span now, so it can manipulate
|
||||||
|
// the block bitmap without atomic operations.
|
||||||
for(; n > 0; n--, p += size) {
|
for(; n > 0; n--, p += size) {
|
||||||
uintptr off, *bitp, shift, bits;
|
uintptr off, *bitp, shift, bits;
|
||||||
|
|
||||||
off = (uintptr*)p - (uintptr*)runtime·mheap.arena_start;
|
off = (uintptr*)p - (uintptr*)arena_start;
|
||||||
bitp = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
|
||||||
shift = off % wordsPerBitmapWord;
|
shift = off % wordsPerBitmapWord;
|
||||||
bits = *bitp>>shift;
|
bits = *bitp>>shift;
|
||||||
|
|
||||||
@ -465,17 +741,27 @@ sweep(void)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
if((bits & bitMarked) != 0) {
|
if((bits & bitMarked) != 0) {
|
||||||
|
if(DebugMark) {
|
||||||
|
if(!(bits & bitSpecial))
|
||||||
|
runtime·printf("found spurious mark on %p\n", p);
|
||||||
|
*bitp &= ~(bitSpecial<<shift);
|
||||||
|
}
|
||||||
*bitp &= ~(bitMarked<<shift);
|
*bitp &= ~(bitMarked<<shift);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if((bits & bitSpecial) != 0) {
|
if(DebugMark || (bits & bitSpecial) != 0) {
|
||||||
// Special means it has a finalizer or is being profiled.
|
// Special means it has a finalizer or is being profiled.
|
||||||
|
// In DebugMark mode, the bit has been coopted so
|
||||||
|
// we have to assume all blocks are special.
|
||||||
f = runtime·getfinalizer(p, 1);
|
f = runtime·getfinalizer(p, 1);
|
||||||
if(f != nil) {
|
if(f != nil) {
|
||||||
f->arg = p;
|
f->arg = p;
|
||||||
f->next = finq;
|
for(;;) {
|
||||||
finq = f;
|
f->next = finq;
|
||||||
|
if(runtime·casp(&finq, f->next, f))
|
||||||
|
break;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
runtime·MProf_Free(p, size);
|
runtime·MProf_Free(p, size);
|
||||||
@ -503,6 +789,23 @@ sweep(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
runtime·gchelper(void)
|
||||||
|
{
|
||||||
|
// Wait until main proc is ready for mark help.
|
||||||
|
runtime·lock(&work.markgate);
|
||||||
|
runtime·unlock(&work.markgate);
|
||||||
|
scanblock(nil, 0);
|
||||||
|
|
||||||
|
// Wait until main proc is ready for sweep help.
|
||||||
|
runtime·lock(&work.sweepgate);
|
||||||
|
runtime·unlock(&work.sweepgate);
|
||||||
|
sweep();
|
||||||
|
|
||||||
|
if(runtime·xadd(&work.ndone, +1) == work.nproc-1)
|
||||||
|
runtime·notewakeup(&work.alldone);
|
||||||
|
}
|
||||||
|
|
||||||
// Semaphore, not Lock, so that the goroutine
|
// Semaphore, not Lock, so that the goroutine
|
||||||
// reschedules when there is contention rather
|
// reschedules when there is contention rather
|
||||||
// than spinning.
|
// than spinning.
|
||||||
@ -523,7 +826,7 @@ static void
|
|||||||
stealcache(void)
|
stealcache(void)
|
||||||
{
|
{
|
||||||
M *m;
|
M *m;
|
||||||
|
|
||||||
for(m=runtime·allm; m; m=m->alllink)
|
for(m=runtime·allm; m; m=m->alllink)
|
||||||
runtime·MCache_ReleaseAll(m->mcache);
|
runtime·MCache_ReleaseAll(m->mcache);
|
||||||
}
|
}
|
||||||
@ -562,6 +865,7 @@ runtime·gc(int32 force)
|
|||||||
uint64 heap0, heap1, obj0, obj1;
|
uint64 heap0, heap1, obj0, obj1;
|
||||||
byte *p;
|
byte *p;
|
||||||
Finalizer *fp;
|
Finalizer *fp;
|
||||||
|
bool extra;
|
||||||
|
|
||||||
// The gc is turned off (via enablegc) until
|
// The gc is turned off (via enablegc) until
|
||||||
// the bootstrap has completed.
|
// the bootstrap has completed.
|
||||||
@ -582,7 +886,7 @@ runtime·gc(int32 force)
|
|||||||
gcpercent = -1;
|
gcpercent = -1;
|
||||||
else
|
else
|
||||||
gcpercent = runtime·atoi(p);
|
gcpercent = runtime·atoi(p);
|
||||||
|
|
||||||
p = runtime·getenv("GOGCTRACE");
|
p = runtime·getenv("GOGCTRACE");
|
||||||
if(p != nil)
|
if(p != nil)
|
||||||
gctrace = runtime·atoi(p);
|
gctrace = runtime·atoi(p);
|
||||||
@ -600,6 +904,7 @@ runtime·gc(int32 force)
|
|||||||
nlookup = 0;
|
nlookup = 0;
|
||||||
nsizelookup = 0;
|
nsizelookup = 0;
|
||||||
naddrlookup = 0;
|
naddrlookup = 0;
|
||||||
|
nhandoff = 0;
|
||||||
|
|
||||||
m->gcing = 1;
|
m->gcing = 1;
|
||||||
runtime·stoptheworld();
|
runtime·stoptheworld();
|
||||||
@ -608,10 +913,30 @@ runtime·gc(int32 force)
|
|||||||
heap0 = mstats.heap_alloc;
|
heap0 = mstats.heap_alloc;
|
||||||
obj0 = mstats.nmalloc - mstats.nfree;
|
obj0 = mstats.nmalloc - mstats.nfree;
|
||||||
|
|
||||||
mark();
|
runtime·lock(&work.markgate);
|
||||||
|
runtime·lock(&work.sweepgate);
|
||||||
|
|
||||||
|
work.nproc = 1;
|
||||||
|
if(runtime·gomaxprocs > 1 && runtime·ncpu > 1) {
|
||||||
|
runtime·noteclear(&work.alldone);
|
||||||
|
work.nproc += runtime·helpgc();
|
||||||
|
}
|
||||||
|
work.nwait = 0;
|
||||||
|
work.ndone = 0;
|
||||||
|
|
||||||
|
runtime·unlock(&work.markgate); // let the helpers in
|
||||||
|
mark(scanblock);
|
||||||
|
if(DebugMark)
|
||||||
|
mark(debug_scanblock);
|
||||||
t1 = runtime·nanotime();
|
t1 = runtime·nanotime();
|
||||||
|
|
||||||
|
work.spans = runtime·mheap.allspans;
|
||||||
|
runtime·unlock(&work.sweepgate); // let the helpers in
|
||||||
sweep();
|
sweep();
|
||||||
|
if(work.nproc > 1)
|
||||||
|
runtime·notesleep(&work.alldone);
|
||||||
t2 = runtime·nanotime();
|
t2 = runtime·nanotime();
|
||||||
|
|
||||||
stealcache();
|
stealcache();
|
||||||
cachestats();
|
cachestats();
|
||||||
|
|
||||||
@ -641,22 +966,32 @@ runtime·gc(int32 force)
|
|||||||
mstats.numgc++;
|
mstats.numgc++;
|
||||||
if(mstats.debuggc)
|
if(mstats.debuggc)
|
||||||
runtime·printf("pause %D\n", t3-t0);
|
runtime·printf("pause %D\n", t3-t0);
|
||||||
|
|
||||||
if(gctrace) {
|
if(gctrace) {
|
||||||
runtime·printf("gc%d: %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr)\n",
|
runtime·printf("gc%d(%d): %D+%D+%D ms %D -> %D MB %D -> %D (%D-%D) objects %D pointer lookups (%D size, %D addr) %D handoff\n",
|
||||||
mstats.numgc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
|
mstats.numgc, work.nproc, (t1-t0)/1000000, (t2-t1)/1000000, (t3-t2)/1000000,
|
||||||
heap0>>20, heap1>>20, obj0, obj1,
|
heap0>>20, heap1>>20, obj0, obj1,
|
||||||
mstats.nmalloc, mstats.nfree,
|
mstats.nmalloc, mstats.nfree,
|
||||||
nlookup, nsizelookup, naddrlookup);
|
nlookup, nsizelookup, naddrlookup, nhandoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
runtime·semrelease(&gcsema);
|
runtime·semrelease(&gcsema);
|
||||||
runtime·starttheworld();
|
|
||||||
|
// If we could have used another helper proc, start one now,
|
||||||
|
// in the hope that it will be available next time.
|
||||||
|
// It would have been even better to start it before the collection,
|
||||||
|
// but doing so requires allocating memory, so it's tricky to
|
||||||
|
// coordinate. This lazy approach works out in practice:
|
||||||
|
// we don't mind if the first couple gc rounds don't have quite
|
||||||
|
// the maximum number of procs.
|
||||||
|
extra = work.nproc < runtime·gomaxprocs && work.nproc < MaxGcproc;
|
||||||
|
|
||||||
|
runtime·starttheworld(extra);
|
||||||
|
|
||||||
// give the queued finalizers, if any, a chance to run
|
// give the queued finalizers, if any, a chance to run
|
||||||
if(fp != nil)
|
if(fp != nil)
|
||||||
runtime·gosched();
|
runtime·gosched();
|
||||||
|
|
||||||
if(gctrace > 1 && !force)
|
if(gctrace > 1 && !force)
|
||||||
runtime·gc(1);
|
runtime·gc(1);
|
||||||
}
|
}
|
||||||
@ -674,7 +1009,7 @@ runtime·UpdateMemStats(void)
|
|||||||
cachestats();
|
cachestats();
|
||||||
m->gcing = 0;
|
m->gcing = 0;
|
||||||
runtime·semrelease(&gcsema);
|
runtime·semrelease(&gcsema);
|
||||||
runtime·starttheworld();
|
runtime·starttheworld(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -858,6 +1193,9 @@ runtime·blockspecial(void *v)
|
|||||||
{
|
{
|
||||||
uintptr *b, off, shift;
|
uintptr *b, off, shift;
|
||||||
|
|
||||||
|
if(DebugMark)
|
||||||
|
return true;
|
||||||
|
|
||||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
|
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
|
||||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||||
shift = off % wordsPerBitmapWord;
|
shift = off % wordsPerBitmapWord;
|
||||||
@ -870,6 +1208,9 @@ runtime·setblockspecial(void *v)
|
|||||||
{
|
{
|
||||||
uintptr *b, off, shift, bits, obits;
|
uintptr *b, off, shift, bits, obits;
|
||||||
|
|
||||||
|
if(DebugMark)
|
||||||
|
return;
|
||||||
|
|
||||||
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
|
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;
|
||||||
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
|
||||||
shift = off % wordsPerBitmapWord;
|
shift = off % wordsPerBitmapWord;
|
||||||
@ -887,7 +1228,7 @@ runtime·setblockspecial(void *v)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
runtime·MHeap_MapBits(MHeap *h)
|
runtime·MHeap_MapBits(MHeap *h)
|
||||||
{
|
{
|
||||||
@ -898,7 +1239,7 @@ runtime·MHeap_MapBits(MHeap *h)
|
|||||||
bitmapChunk = 8192
|
bitmapChunk = 8192
|
||||||
};
|
};
|
||||||
uintptr n;
|
uintptr n;
|
||||||
|
|
||||||
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
|
n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
|
||||||
n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
|
n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
|
||||||
if(h->bitmap_mapped >= n)
|
if(h->bitmap_mapped >= n)
|
||||||
|
@ -51,7 +51,7 @@ vprintf(int8 *s, byte *base)
|
|||||||
uintptr arg, narg;
|
uintptr arg, narg;
|
||||||
byte *v;
|
byte *v;
|
||||||
|
|
||||||
// lock(&debuglock);
|
//runtime·lock(&debuglock);
|
||||||
|
|
||||||
lp = p = s;
|
lp = p = s;
|
||||||
arg = 0;
|
arg = 0;
|
||||||
@ -152,7 +152,7 @@ vprintf(int8 *s, byte *base)
|
|||||||
if(p > lp)
|
if(p > lp)
|
||||||
runtime·write(2, lp, p-lp);
|
runtime·write(2, lp, p-lp);
|
||||||
|
|
||||||
// unlock(&debuglock);
|
//runtime·unlock(&debuglock);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma textflag 7
|
#pragma textflag 7
|
||||||
@ -348,4 +348,4 @@ runtime·typestring(Eface e, String s)
|
|||||||
s = *e.type->string;
|
s = *e.type->string;
|
||||||
FLUSH(&s);
|
FLUSH(&s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ static void unwindstack(G*, byte*);
|
|||||||
static void schedule(G*);
|
static void schedule(G*);
|
||||||
static void acquireproc(void);
|
static void acquireproc(void);
|
||||||
static void releaseproc(void);
|
static void releaseproc(void);
|
||||||
|
static M *startm(void);
|
||||||
|
|
||||||
typedef struct Sched Sched;
|
typedef struct Sched Sched;
|
||||||
|
|
||||||
@ -323,6 +324,9 @@ mcommoninit(M *m)
|
|||||||
m->fastrand = 0x49f6428aUL + m->id;
|
m->fastrand = 0x49f6428aUL + m->id;
|
||||||
m->stackalloc = runtime·malloc(sizeof(*m->stackalloc));
|
m->stackalloc = runtime·malloc(sizeof(*m->stackalloc));
|
||||||
runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil);
|
runtime·FixAlloc_Init(m->stackalloc, FixedStack, runtime·SysAlloc, nil, nil);
|
||||||
|
|
||||||
|
if(m->mcache == nil)
|
||||||
|
m->mcache = runtime·allocmcache();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to increment mcpu. Report whether succeeded.
|
// Try to increment mcpu. Report whether succeeded.
|
||||||
@ -422,7 +426,7 @@ mget(G *g)
|
|||||||
M *m;
|
M *m;
|
||||||
|
|
||||||
// if g has its own m, use it.
|
// if g has its own m, use it.
|
||||||
if((m = g->lockedm) != nil)
|
if(g && (m = g->lockedm) != nil)
|
||||||
return m;
|
return m;
|
||||||
|
|
||||||
// otherwise use general m pool.
|
// otherwise use general m pool.
|
||||||
@ -507,6 +511,7 @@ nextgandunlock(void)
|
|||||||
G *gp;
|
G *gp;
|
||||||
uint32 v;
|
uint32 v;
|
||||||
|
|
||||||
|
top:
|
||||||
if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs)
|
if(atomic_mcpu(runtime·sched.atomic) >= maxgomaxprocs)
|
||||||
runtime·throw("negative mcpu");
|
runtime·throw("negative mcpu");
|
||||||
|
|
||||||
@ -584,12 +589,49 @@ nextgandunlock(void)
|
|||||||
schedunlock();
|
schedunlock();
|
||||||
|
|
||||||
runtime·notesleep(&m->havenextg);
|
runtime·notesleep(&m->havenextg);
|
||||||
|
if(m->helpgc) {
|
||||||
|
runtime·gchelper();
|
||||||
|
m->helpgc = 0;
|
||||||
|
runtime·lock(&runtime·sched);
|
||||||
|
goto top;
|
||||||
|
}
|
||||||
if((gp = m->nextg) == nil)
|
if((gp = m->nextg) == nil)
|
||||||
runtime·throw("bad m->nextg in nextgoroutine");
|
runtime·throw("bad m->nextg in nextgoroutine");
|
||||||
m->nextg = nil;
|
m->nextg = nil;
|
||||||
return gp;
|
return gp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int32
|
||||||
|
runtime·helpgc(void)
|
||||||
|
{
|
||||||
|
M *m;
|
||||||
|
int32 n, max;
|
||||||
|
|
||||||
|
// Figure out how many CPUs to use.
|
||||||
|
// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
|
||||||
|
max = runtime·gomaxprocs;
|
||||||
|
if(max > runtime·ncpu)
|
||||||
|
max = runtime·ncpu;
|
||||||
|
if(max > MaxGcproc)
|
||||||
|
max = MaxGcproc;
|
||||||
|
|
||||||
|
|
||||||
|
// We're going to use one CPU no matter what.
|
||||||
|
// Figure out the max number of additional CPUs.
|
||||||
|
max--;
|
||||||
|
|
||||||
|
runtime·lock(&runtime·sched);
|
||||||
|
n = 0;
|
||||||
|
while(n < max && (m = mget(nil)) != nil) {
|
||||||
|
n++;
|
||||||
|
m->helpgc = 1;
|
||||||
|
m->waitnextg = 0;
|
||||||
|
runtime·notewakeup(&m->havenextg);
|
||||||
|
}
|
||||||
|
runtime·unlock(&runtime·sched);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
runtime·stoptheworld(void)
|
runtime·stoptheworld(void)
|
||||||
{
|
{
|
||||||
@ -626,15 +668,28 @@ runtime·stoptheworld(void)
|
|||||||
schedunlock();
|
schedunlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(rsc): Remove. This is only temporary,
|
|
||||||
// for the mark and sweep collector.
|
|
||||||
void
|
void
|
||||||
runtime·starttheworld(void)
|
runtime·starttheworld(bool extra)
|
||||||
{
|
{
|
||||||
|
M *m;
|
||||||
|
|
||||||
schedlock();
|
schedlock();
|
||||||
runtime·gcwaiting = 0;
|
runtime·gcwaiting = 0;
|
||||||
setmcpumax(runtime·gomaxprocs);
|
setmcpumax(runtime·gomaxprocs);
|
||||||
matchmg();
|
matchmg();
|
||||||
|
if(extra && canaddmcpu()) {
|
||||||
|
// Start a new m that will (we hope) be idle
|
||||||
|
// and so available to help when the next
|
||||||
|
// garbage collection happens.
|
||||||
|
// canaddmcpu above did mcpu++
|
||||||
|
// (necessary, because m will be doing various
|
||||||
|
// initialization work so is definitely running),
|
||||||
|
// but m is not running a specific goroutine,
|
||||||
|
// so set the helpgc flag as a signal to m's
|
||||||
|
// first schedule(nil) to mcpu--.
|
||||||
|
m = startm();
|
||||||
|
m->helpgc = 1;
|
||||||
|
}
|
||||||
schedunlock();
|
schedunlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -644,8 +699,6 @@ runtime·mstart(void)
|
|||||||
{
|
{
|
||||||
if(g != m->g0)
|
if(g != m->g0)
|
||||||
runtime·throw("bad runtime·mstart");
|
runtime·throw("bad runtime·mstart");
|
||||||
if(m->mcache == nil)
|
|
||||||
m->mcache = runtime·allocmcache();
|
|
||||||
|
|
||||||
// Record top of stack for use by mcall.
|
// Record top of stack for use by mcall.
|
||||||
// Once we call schedule we're never coming back,
|
// Once we call schedule we're never coming back,
|
||||||
@ -677,46 +730,55 @@ struct CgoThreadStart
|
|||||||
static void
|
static void
|
||||||
matchmg(void)
|
matchmg(void)
|
||||||
{
|
{
|
||||||
G *g;
|
G *gp;
|
||||||
|
M *mp;
|
||||||
|
|
||||||
if(m->mallocing || m->gcing)
|
if(m->mallocing || m->gcing)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
while(haveg() && canaddmcpu()) {
|
while(haveg() && canaddmcpu()) {
|
||||||
g = gget();
|
gp = gget();
|
||||||
if(g == nil)
|
if(gp == nil)
|
||||||
runtime·throw("gget inconsistency");
|
runtime·throw("gget inconsistency");
|
||||||
|
|
||||||
// Find the m that will run g.
|
// Find the m that will run gp.
|
||||||
M *m;
|
if((mp = mget(gp)) == nil)
|
||||||
if((m = mget(g)) == nil){
|
mp = startm();
|
||||||
m = runtime·malloc(sizeof(M));
|
mnextg(mp, gp);
|
||||||
mcommoninit(m);
|
|
||||||
|
|
||||||
if(runtime·iscgo) {
|
|
||||||
CgoThreadStart ts;
|
|
||||||
|
|
||||||
if(libcgo_thread_start == nil)
|
|
||||||
runtime·throw("libcgo_thread_start missing");
|
|
||||||
// pthread_create will make us a stack.
|
|
||||||
m->g0 = runtime·malg(-1);
|
|
||||||
ts.m = m;
|
|
||||||
ts.g = m->g0;
|
|
||||||
ts.fn = runtime·mstart;
|
|
||||||
runtime·asmcgocall(libcgo_thread_start, &ts);
|
|
||||||
} else {
|
|
||||||
if(Windows)
|
|
||||||
// windows will layout sched stack on os stack
|
|
||||||
m->g0 = runtime·malg(-1);
|
|
||||||
else
|
|
||||||
m->g0 = runtime·malg(8192);
|
|
||||||
runtime·newosproc(m, m->g0, m->g0->stackbase, runtime·mstart);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mnextg(m, g);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static M*
|
||||||
|
startm(void)
|
||||||
|
{
|
||||||
|
M *m;
|
||||||
|
|
||||||
|
m = runtime·malloc(sizeof(M));
|
||||||
|
mcommoninit(m);
|
||||||
|
|
||||||
|
if(runtime·iscgo) {
|
||||||
|
CgoThreadStart ts;
|
||||||
|
|
||||||
|
if(libcgo_thread_start == nil)
|
||||||
|
runtime·throw("libcgo_thread_start missing");
|
||||||
|
// pthread_create will make us a stack.
|
||||||
|
m->g0 = runtime·malg(-1);
|
||||||
|
ts.m = m;
|
||||||
|
ts.g = m->g0;
|
||||||
|
ts.fn = runtime·mstart;
|
||||||
|
runtime·asmcgocall(libcgo_thread_start, &ts);
|
||||||
|
} else {
|
||||||
|
if(Windows)
|
||||||
|
// windows will layout sched stack on os stack
|
||||||
|
m->g0 = runtime·malg(-1);
|
||||||
|
else
|
||||||
|
m->g0 = runtime·malg(8192);
|
||||||
|
runtime·newosproc(m, m->g0, m->g0->stackbase, runtime·mstart);
|
||||||
|
}
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
// One round of scheduler: find a goroutine and run it.
|
// One round of scheduler: find a goroutine and run it.
|
||||||
// The argument is the goroutine that was running before
|
// The argument is the goroutine that was running before
|
||||||
// schedule was called, or nil if this is the first call.
|
// schedule was called, or nil if this is the first call.
|
||||||
@ -767,6 +829,12 @@ schedule(G *gp)
|
|||||||
gp->readyonstop = 0;
|
gp->readyonstop = 0;
|
||||||
readylocked(gp);
|
readylocked(gp);
|
||||||
}
|
}
|
||||||
|
} else if(m->helpgc) {
|
||||||
|
// atomic { mcpu-- }
|
||||||
|
v = runtime·xadd(&runtime·sched.atomic, -1<<mcpuShift);
|
||||||
|
if(atomic_mcpu(v) > maxgomaxprocs)
|
||||||
|
runtime·throw("negative mcpu in scheduler");
|
||||||
|
m->helpgc = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find (or wait for) g to run. Unlocks runtime·sched.
|
// Find (or wait for) g to run. Unlocks runtime·sched.
|
||||||
@ -1097,7 +1165,7 @@ runtime·newproc1(byte *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
|
|||||||
//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
|
//printf("newproc1 %p %p narg=%d nret=%d\n", fn, argp, narg, nret);
|
||||||
siz = narg + nret;
|
siz = narg + nret;
|
||||||
siz = (siz+7) & ~7;
|
siz = (siz+7) & ~7;
|
||||||
|
|
||||||
// We could instead create a secondary stack frame
|
// We could instead create a secondary stack frame
|
||||||
// and make it look like goexit was on the original but
|
// and make it look like goexit was on the original but
|
||||||
// the call to the actual goroutine function was split.
|
// the call to the actual goroutine function was split.
|
||||||
|
@ -57,7 +57,7 @@ typedef struct String String;
|
|||||||
typedef struct Usema Usema;
|
typedef struct Usema Usema;
|
||||||
typedef struct SigTab SigTab;
|
typedef struct SigTab SigTab;
|
||||||
typedef struct MCache MCache;
|
typedef struct MCache MCache;
|
||||||
typedef struct FixAlloc FixAlloc;
|
typedef struct FixAlloc FixAlloc;
|
||||||
typedef struct Iface Iface;
|
typedef struct Iface Iface;
|
||||||
typedef struct Itab Itab;
|
typedef struct Itab Itab;
|
||||||
typedef struct Eface Eface;
|
typedef struct Eface Eface;
|
||||||
@ -238,6 +238,7 @@ struct M
|
|||||||
int32 waitnextg;
|
int32 waitnextg;
|
||||||
int32 dying;
|
int32 dying;
|
||||||
int32 profilehz;
|
int32 profilehz;
|
||||||
|
int32 helpgc;
|
||||||
uint32 fastrand;
|
uint32 fastrand;
|
||||||
uint64 ncgocall;
|
uint64 ncgocall;
|
||||||
Note havenextg;
|
Note havenextg;
|
||||||
@ -406,6 +407,7 @@ extern bool runtime·singleproc;
|
|||||||
extern uint32 runtime·panicking;
|
extern uint32 runtime·panicking;
|
||||||
extern int32 runtime·gcwaiting; // gc is waiting to run
|
extern int32 runtime·gcwaiting; // gc is waiting to run
|
||||||
int8* runtime·goos;
|
int8* runtime·goos;
|
||||||
|
int32 runtime·ncpu;
|
||||||
extern bool runtime·iscgo;
|
extern bool runtime·iscgo;
|
||||||
extern void (*runtime·destroylock)(Lock*);
|
extern void (*runtime·destroylock)(Lock*);
|
||||||
|
|
||||||
@ -515,6 +517,7 @@ void runtime·startpanic(void);
|
|||||||
void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp);
|
void runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp);
|
||||||
void runtime·resetcpuprofiler(int32);
|
void runtime·resetcpuprofiler(int32);
|
||||||
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
||||||
|
void runtime·usleep(uint32);
|
||||||
|
|
||||||
#pragma varargck argpos runtime·printf 1
|
#pragma varargck argpos runtime·printf 1
|
||||||
#pragma varargck type "d" int32
|
#pragma varargck type "d" int32
|
||||||
@ -534,7 +537,7 @@ void runtime·setcpuprofilerate(void(*)(uintptr*, int32), int32);
|
|||||||
// TODO(rsc): Remove. These are only temporary,
|
// TODO(rsc): Remove. These are only temporary,
|
||||||
// for the mark and sweep collector.
|
// for the mark and sweep collector.
|
||||||
void runtime·stoptheworld(void);
|
void runtime·stoptheworld(void);
|
||||||
void runtime·starttheworld(void);
|
void runtime·starttheworld(bool);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mutual exclusion locks. in the uncontended case,
|
* mutual exclusion locks. in the uncontended case,
|
||||||
|
@ -18,7 +18,7 @@ all: $(addsuffix .out, $(ALL))
|
|||||||
$(LD) -o $@ $*.$O
|
$(LD) -o $@ $*.$O
|
||||||
|
|
||||||
%.bench: %.out
|
%.bench: %.out
|
||||||
./$*.out
|
time ./$*.out
|
||||||
|
|
||||||
bench: $(addsuffix .bench, $(ALL))
|
bench: $(addsuffix .bench, $(ALL))
|
||||||
|
|
||||||
|
@ -73,10 +73,6 @@ func parseDir(dirpath string) map[string]*ast.Package {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
runtime.GOMAXPROCS(4)
|
|
||||||
go func() {}()
|
|
||||||
go func() {}()
|
|
||||||
go func() {}()
|
|
||||||
st := &runtime.MemStats
|
st := &runtime.MemStats
|
||||||
packages = append(packages, packages...)
|
packages = append(packages, packages...)
|
||||||
packages = append(packages, packages...)
|
packages = append(packages, packages...)
|
||||||
@ -132,7 +128,6 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
var packages = []string{
|
var packages = []string{
|
||||||
"archive/tar",
|
"archive/tar",
|
||||||
"asn1",
|
"asn1",
|
||||||
@ -148,7 +143,6 @@ var packages = []string{
|
|||||||
"container/ring",
|
"container/ring",
|
||||||
"container/vector",
|
"container/vector",
|
||||||
"crypto/aes",
|
"crypto/aes",
|
||||||
"crypto/block",
|
|
||||||
"crypto/blowfish",
|
"crypto/blowfish",
|
||||||
"crypto/hmac",
|
"crypto/hmac",
|
||||||
"crypto/md4",
|
"crypto/md4",
|
||||||
@ -167,7 +161,6 @@ var packages = []string{
|
|||||||
"debug/macho",
|
"debug/macho",
|
||||||
"debug/elf",
|
"debug/elf",
|
||||||
"debug/gosym",
|
"debug/gosym",
|
||||||
"debug/proc",
|
|
||||||
"ebnf",
|
"ebnf",
|
||||||
"encoding/ascii85",
|
"encoding/ascii85",
|
||||||
"encoding/base64",
|
"encoding/base64",
|
||||||
@ -177,9 +170,6 @@ var packages = []string{
|
|||||||
"encoding/pem",
|
"encoding/pem",
|
||||||
"exec",
|
"exec",
|
||||||
"exp/datafmt",
|
"exp/datafmt",
|
||||||
"exp/draw",
|
|
||||||
"exp/eval",
|
|
||||||
"exp/iterable",
|
|
||||||
"expvar",
|
"expvar",
|
||||||
"flag",
|
"flag",
|
||||||
"fmt",
|
"fmt",
|
||||||
|
Loading…
Reference in New Issue
Block a user