mirror of
https://github.com/golang/go
synced 2024-10-04 14:21:21 -06:00
67c83db60d
Package runtime's C functions written to be called from Go started out written in C using carefully constructed argument lists and the FLUSH macro to write a result back to memory. For some functions, the appropriate parameter list ended up being architecture-dependent due to differences in alignment, so we added 'goc2c', which takes a .goc file containing Go func declarations but C bodies, rewrites the Go func declaration to equivalent C declarations for the target architecture, adds the needed FLUSH statements, and writes out an equivalent C file. That C file is compiled as part of package runtime. Native Client's x86-64 support introduces the most complex alignment rules yet, breaking many functions that could until now be portably written in C. Using goc2c for those avoids the breakage. Separately, Keith's work on emitting stack information from the C compiler would require the hand-written functions to add #pragmas specifying how many arguments are result parameters. Using goc2c for those avoids maintaining #pragmas. For both reasons, use goc2c for as many Go-called C functions as possible. This CL is a replay of the bulk of CL 15400047 and CL 15790043, both of which were reviewed as part of the NaCl port and are checked in to the NaCl branch. This CL is part of bringing the NaCl code into the main tree. No new code here, just reformatting and occasional movement into .h files. LGTM=r R=dave, alex.brainman, r CC=golang-codereviews https://golang.org/cl/65220044
200 lines
4.8 KiB
C
200 lines
4.8 KiB
C
// Copyright 2012 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Parallel for algorithm.
|
|
|
|
#include "runtime.h"
|
|
#include "arch_GOARCH.h"
|
|
|
|
struct ParForThread
|
|
{
|
|
// the thread's iteration space [32lsb, 32msb)
|
|
uint64 pos;
|
|
// stats
|
|
uint64 nsteal;
|
|
uint64 nstealcnt;
|
|
uint64 nprocyield;
|
|
uint64 nosyield;
|
|
uint64 nsleep;
|
|
byte pad[CacheLineSize];
|
|
};
|
|
|
|
ParFor*
|
|
runtime·parforalloc(uint32 nthrmax)
|
|
{
|
|
ParFor *desc;
|
|
|
|
// The ParFor object is followed by CacheLineSize padding
|
|
// and then nthrmax ParForThread.
|
|
desc = (ParFor*)runtime·malloc(sizeof(ParFor) + CacheLineSize + nthrmax * sizeof(ParForThread));
|
|
desc->thr = (ParForThread*)((byte*)(desc+1) + CacheLineSize);
|
|
desc->nthrmax = nthrmax;
|
|
return desc;
|
|
}
|
|
|
|
void
|
|
runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
|
|
{
|
|
uint32 i, begin, end;
|
|
uint64 *pos;
|
|
|
|
if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
|
|
runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
|
|
runtime·throw("parfor: invalid args");
|
|
}
|
|
|
|
desc->body = body;
|
|
desc->done = 0;
|
|
desc->nthr = nthr;
|
|
desc->thrseq = 0;
|
|
desc->cnt = n;
|
|
desc->ctx = ctx;
|
|
desc->wait = wait;
|
|
desc->nsteal = 0;
|
|
desc->nstealcnt = 0;
|
|
desc->nprocyield = 0;
|
|
desc->nosyield = 0;
|
|
desc->nsleep = 0;
|
|
for(i=0; i<nthr; i++) {
|
|
begin = (uint64)n*i / nthr;
|
|
end = (uint64)n*(i+1) / nthr;
|
|
pos = &desc->thr[i].pos;
|
|
if(((uintptr)pos & 7) != 0)
|
|
runtime·throw("parforsetup: pos is not aligned");
|
|
*pos = (uint64)begin | (((uint64)end)<<32);
|
|
}
|
|
}
|
|
|
|
void
|
|
runtime·parfordo(ParFor *desc)
|
|
{
|
|
ParForThread *me;
|
|
uint32 tid, begin, end, begin2, try, victim, i;
|
|
uint64 *mypos, *victimpos, pos, newpos;
|
|
void (*body)(ParFor*, uint32);
|
|
bool idle;
|
|
|
|
// Obtain 0-based thread index.
|
|
tid = runtime·xadd(&desc->thrseq, 1) - 1;
|
|
if(tid >= desc->nthr) {
|
|
runtime·printf("tid=%d nthr=%d\n", tid, desc->nthr);
|
|
runtime·throw("parfor: invalid tid");
|
|
}
|
|
|
|
// If single-threaded, just execute the for serially.
|
|
if(desc->nthr==1) {
|
|
for(i=0; i<desc->cnt; i++)
|
|
desc->body(desc, i);
|
|
return;
|
|
}
|
|
|
|
body = desc->body;
|
|
me = &desc->thr[tid];
|
|
mypos = &me->pos;
|
|
for(;;) {
|
|
for(;;) {
|
|
// While there is local work,
|
|
// bump low index and execute the iteration.
|
|
pos = runtime·xadd64(mypos, 1);
|
|
begin = (uint32)pos-1;
|
|
end = (uint32)(pos>>32);
|
|
if(begin < end) {
|
|
body(desc, begin);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Out of work, need to steal something.
|
|
idle = false;
|
|
for(try=0;; try++) {
|
|
// If we don't see any work for long enough,
|
|
// increment the done counter...
|
|
if(try > desc->nthr*4 && !idle) {
|
|
idle = true;
|
|
runtime·xadd(&desc->done, 1);
|
|
}
|
|
// ...if all threads have incremented the counter,
|
|
// we are done.
|
|
if(desc->done + !idle == desc->nthr) {
|
|
if(!idle)
|
|
runtime·xadd(&desc->done, 1);
|
|
goto exit;
|
|
}
|
|
// Choose a random victim for stealing.
|
|
victim = runtime·fastrand1() % (desc->nthr-1);
|
|
if(victim >= tid)
|
|
victim++;
|
|
victimpos = &desc->thr[victim].pos;
|
|
for(;;) {
|
|
// See if it has any work.
|
|
pos = runtime·atomicload64(victimpos);
|
|
begin = (uint32)pos;
|
|
end = (uint32)(pos>>32);
|
|
if(begin+1 >= end) {
|
|
begin = end = 0;
|
|
break;
|
|
}
|
|
if(idle) {
|
|
runtime·xadd(&desc->done, -1);
|
|
idle = false;
|
|
}
|
|
begin2 = begin + (end-begin)/2;
|
|
newpos = (uint64)begin | (uint64)begin2<<32;
|
|
if(runtime·cas64(victimpos, pos, newpos)) {
|
|
begin = begin2;
|
|
break;
|
|
}
|
|
}
|
|
if(begin < end) {
|
|
// Has successfully stolen some work.
|
|
if(idle)
|
|
runtime·throw("parfor: should not be idle");
|
|
runtime·atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
|
|
me->nsteal++;
|
|
me->nstealcnt += end-begin;
|
|
break;
|
|
}
|
|
// Backoff.
|
|
if(try < desc->nthr) {
|
|
// nothing
|
|
} else if (try < 4*desc->nthr) {
|
|
me->nprocyield++;
|
|
runtime·procyield(20);
|
|
// If a caller asked not to wait for the others, exit now
|
|
// (assume that most work is already done at this point).
|
|
} else if (!desc->wait) {
|
|
if(!idle)
|
|
runtime·xadd(&desc->done, 1);
|
|
goto exit;
|
|
} else if (try < 6*desc->nthr) {
|
|
me->nosyield++;
|
|
runtime·osyield();
|
|
} else {
|
|
me->nsleep++;
|
|
runtime·usleep(1);
|
|
}
|
|
}
|
|
}
|
|
exit:
|
|
runtime·xadd64(&desc->nsteal, me->nsteal);
|
|
runtime·xadd64(&desc->nstealcnt, me->nstealcnt);
|
|
runtime·xadd64(&desc->nprocyield, me->nprocyield);
|
|
runtime·xadd64(&desc->nosyield, me->nosyield);
|
|
runtime·xadd64(&desc->nsleep, me->nsleep);
|
|
me->nsteal = 0;
|
|
me->nstealcnt = 0;
|
|
me->nprocyield = 0;
|
|
me->nosyield = 0;
|
|
me->nsleep = 0;
|
|
}
|
|
|
|
// For testing from Go.
|
|
void
|
|
runtime·parforiters(ParFor *desc, uintptr tid, uintptr *start, uintptr *end)
|
|
{
|
|
*start = (uint32)desc->thr[tid].pos;
|
|
*end = (uint32)(desc->thr[tid].pos>>32);
|
|
}
|