mirror of
https://github.com/golang/go
synced 2024-11-25 02:07:58 -07:00
runtime: speed up cgo calls
Allocate Defer on stack during cgo calls, as suggested by dvyukov. Also includes some comment corrections. benchmark old,ns/op new,ns/op BenchmarkCgoCall 669 330 (Intel Xeon CPU 1.80GHz * 4, Linux 386) R=dvyukov, rsc CC=golang-dev https://golang.org/cl/4910041
This commit is contained in:
parent
4cf630da0f
commit
72e83483a7
@ -48,6 +48,10 @@ struct ibv_async_event {
|
|||||||
struct ibv_context {
|
struct ibv_context {
|
||||||
xxpthread_mutex_t mutex;
|
xxpthread_mutex_t mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int add(int x, int y) {
|
||||||
|
return x+y;
|
||||||
|
};
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
import (
|
import (
|
||||||
@ -132,3 +136,11 @@ var (
|
|||||||
type Context struct {
|
type Context struct {
|
||||||
ctx *C.struct_ibv_context
|
ctx *C.struct_ibv_context
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func benchCgoCall(b *testing.B) {
|
||||||
|
const x = C.int(2)
|
||||||
|
const y = C.int(3)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
C.add(x, y)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -26,3 +26,5 @@ func TestBlocking(t *testing.T) { testBlocking(t) }
|
|||||||
func Test1328(t *testing.T) { test1328(t) }
|
func Test1328(t *testing.T) { test1328(t) }
|
||||||
func TestParallelSleep(t *testing.T) { testParallelSleep(t) }
|
func TestParallelSleep(t *testing.T) { testParallelSleep(t) }
|
||||||
func TestSetEnv(t *testing.T) { testSetEnv(t) }
|
func TestSetEnv(t *testing.T) { testSetEnv(t) }
|
||||||
|
|
||||||
|
func BenchmarkCgoCall(b *testing.B) { benchCgoCall(b) }
|
||||||
|
@ -432,17 +432,17 @@ TEXT runtime·cgocallback(SB),7,$12
|
|||||||
PUSHL (g_sched+gobuf_sp)(SI)
|
PUSHL (g_sched+gobuf_sp)(SI)
|
||||||
MOVL SP, (g_sched+gobuf_sp)(SI)
|
MOVL SP, (g_sched+gobuf_sp)(SI)
|
||||||
|
|
||||||
// Switch to m->curg stack and call runtime.cgocallback
|
// Switch to m->curg stack and call runtime.cgocallbackg
|
||||||
// with the three arguments. Because we are taking over
|
// with the three arguments. Because we are taking over
|
||||||
// the execution of m->curg but *not* resuming what had
|
// the execution of m->curg but *not* resuming what had
|
||||||
// been running, we need to save that information (m->curg->gobuf)
|
// been running, we need to save that information (m->curg->gobuf)
|
||||||
// so that we can restore it when we're done.
|
// so that we can restore it when we're done.
|
||||||
// We can restore m->curg->gobuf.sp easily, because calling
|
// We can restore m->curg->gobuf.sp easily, because calling
|
||||||
// runtime.cgocallback leaves SP unchanged upon return.
|
// runtime.cgocallbackg leaves SP unchanged upon return.
|
||||||
// To save m->curg->gobuf.pc, we push it onto the stack.
|
// To save m->curg->gobuf.pc, we push it onto the stack.
|
||||||
// This has the added benefit that it looks to the traceback
|
// This has the added benefit that it looks to the traceback
|
||||||
// routine like cgocallback is going to return to that
|
// routine like cgocallbackg is going to return to that
|
||||||
// PC (because we defined cgocallback to have
|
// PC (because we defined cgocallbackg to have
|
||||||
// a frame size of 12, the same amount that we use below),
|
// a frame size of 12, the same amount that we use below),
|
||||||
// so that the traceback will seamlessly trace back into
|
// so that the traceback will seamlessly trace back into
|
||||||
// the earlier calls.
|
// the earlier calls.
|
||||||
|
@ -477,17 +477,17 @@ TEXT runtime·cgocallback(SB),7,$24
|
|||||||
PUSHQ (g_sched+gobuf_sp)(SI)
|
PUSHQ (g_sched+gobuf_sp)(SI)
|
||||||
MOVQ SP, (g_sched+gobuf_sp)(SI)
|
MOVQ SP, (g_sched+gobuf_sp)(SI)
|
||||||
|
|
||||||
// Switch to m->curg stack and call runtime.cgocallback
|
// Switch to m->curg stack and call runtime.cgocallbackg
|
||||||
// with the three arguments. Because we are taking over
|
// with the three arguments. Because we are taking over
|
||||||
// the execution of m->curg but *not* resuming what had
|
// the execution of m->curg but *not* resuming what had
|
||||||
// been running, we need to save that information (m->curg->gobuf)
|
// been running, we need to save that information (m->curg->gobuf)
|
||||||
// so that we can restore it when we're done.
|
// so that we can restore it when we're done.
|
||||||
// We can restore m->curg->gobuf.sp easily, because calling
|
// We can restore m->curg->gobuf.sp easily, because calling
|
||||||
// runtime.cgocallback leaves SP unchanged upon return.
|
// runtime.cgocallbackg leaves SP unchanged upon return.
|
||||||
// To save m->curg->gobuf.pc, we push it onto the stack.
|
// To save m->curg->gobuf.pc, we push it onto the stack.
|
||||||
// This has the added benefit that it looks to the traceback
|
// This has the added benefit that it looks to the traceback
|
||||||
// routine like cgocallback is going to return to that
|
// routine like cgocallbackg is going to return to that
|
||||||
// PC (because we defined cgocallback to have
|
// PC (because we defined cgocallbackg to have
|
||||||
// a frame size of 24, the same amount that we use below),
|
// a frame size of 24, the same amount that we use below),
|
||||||
// so that the traceback will seamlessly trace back into
|
// so that the traceback will seamlessly trace back into
|
||||||
// the earlier calls.
|
// the earlier calls.
|
||||||
|
@ -68,7 +68,7 @@
|
|||||||
// stack (not an m->g0 stack). First it calls runtime.exitsyscall, which will
|
// stack (not an m->g0 stack). First it calls runtime.exitsyscall, which will
|
||||||
// block until the $GOMAXPROCS limit allows running this goroutine.
|
// block until the $GOMAXPROCS limit allows running this goroutine.
|
||||||
// Once exitsyscall has returned, it is safe to do things like call the memory
|
// Once exitsyscall has returned, it is safe to do things like call the memory
|
||||||
// allocator or invoke the Go callback function p.GoF. runtime.cgocallback
|
// allocator or invoke the Go callback function p.GoF. runtime.cgocallbackg
|
||||||
// first defers a function to unwind m->g0.sched.sp, so that if p.GoF
|
// first defers a function to unwind m->g0.sched.sp, so that if p.GoF
|
||||||
// panics, m->g0.sched.sp will be restored to its old value: the m->g0 stack
|
// panics, m->g0.sched.sp will be restored to its old value: the m->g0 stack
|
||||||
// and the m->curg stack will be unwound in lock step.
|
// and the m->curg stack will be unwound in lock step.
|
||||||
@ -92,7 +92,7 @@ static void unwindm(void);
|
|||||||
void
|
void
|
||||||
runtime·cgocall(void (*fn)(void*), void *arg)
|
runtime·cgocall(void (*fn)(void*), void *arg)
|
||||||
{
|
{
|
||||||
Defer *d;
|
Defer d;
|
||||||
|
|
||||||
if(!runtime·iscgo)
|
if(!runtime·iscgo)
|
||||||
runtime·throw("cgocall unavailable");
|
runtime·throw("cgocall unavailable");
|
||||||
@ -106,18 +106,18 @@ runtime·cgocall(void (*fn)(void*), void *arg)
|
|||||||
* Lock g to m to ensure we stay on the same stack if we do a
|
* Lock g to m to ensure we stay on the same stack if we do a
|
||||||
* cgo callback.
|
* cgo callback.
|
||||||
*/
|
*/
|
||||||
d = nil;
|
d.nofree = false;
|
||||||
if(m->lockedg == nil) {
|
if(m->lockedg == nil) {
|
||||||
m->lockedg = g;
|
m->lockedg = g;
|
||||||
g->lockedm = m;
|
g->lockedm = m;
|
||||||
|
|
||||||
// Add entry to defer stack in case of panic.
|
// Add entry to defer stack in case of panic.
|
||||||
d = runtime·malloc(sizeof(*d));
|
d.fn = (byte*)unlockm;
|
||||||
d->fn = (byte*)unlockm;
|
d.siz = 0;
|
||||||
d->siz = 0;
|
d.link = g->defer;
|
||||||
d->link = g->defer;
|
d.argp = (void*)-1; // unused because unlockm never recovers
|
||||||
d->argp = (void*)-1; // unused because unwindm never recovers
|
d.nofree = true;
|
||||||
g->defer = d;
|
g->defer = &d;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -135,11 +135,10 @@ runtime·cgocall(void (*fn)(void*), void *arg)
|
|||||||
runtime·asmcgocall(fn, arg);
|
runtime·asmcgocall(fn, arg);
|
||||||
runtime·exitsyscall();
|
runtime·exitsyscall();
|
||||||
|
|
||||||
if(d != nil) {
|
if(d.nofree) {
|
||||||
if(g->defer != d || d->fn != (byte*)unlockm)
|
if(g->defer != &d || d.fn != (byte*)unlockm)
|
||||||
runtime·throw("runtime: bad defer entry in cgocallback");
|
runtime·throw("runtime: bad defer entry in cgocallback");
|
||||||
g->defer = d->link;
|
g->defer = d.link;
|
||||||
runtime·free(d);
|
|
||||||
unlockm();
|
unlockm();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -192,7 +191,7 @@ runtime·cfree(void *p)
|
|||||||
void
|
void
|
||||||
runtime·cgocallbackg(void (*fn)(void), void *arg, uintptr argsize)
|
runtime·cgocallbackg(void (*fn)(void), void *arg, uintptr argsize)
|
||||||
{
|
{
|
||||||
Defer *d;
|
Defer d;
|
||||||
|
|
||||||
if(g != m->curg)
|
if(g != m->curg)
|
||||||
runtime·throw("runtime: bad g in cgocallback");
|
runtime·throw("runtime: bad g in cgocallback");
|
||||||
@ -200,12 +199,12 @@ runtime·cgocallbackg(void (*fn)(void), void *arg, uintptr argsize)
|
|||||||
runtime·exitsyscall(); // coming out of cgo call
|
runtime·exitsyscall(); // coming out of cgo call
|
||||||
|
|
||||||
// Add entry to defer stack in case of panic.
|
// Add entry to defer stack in case of panic.
|
||||||
d = runtime·malloc(sizeof(*d));
|
d.fn = (byte*)unwindm;
|
||||||
d->fn = (byte*)unwindm;
|
d.siz = 0;
|
||||||
d->siz = 0;
|
d.link = g->defer;
|
||||||
d->link = g->defer;
|
d.argp = (void*)-1; // unused because unwindm never recovers
|
||||||
d->argp = (void*)-1; // unused because unwindm never recovers
|
d.nofree = true;
|
||||||
g->defer = d;
|
g->defer = &d;
|
||||||
|
|
||||||
// Invoke callback.
|
// Invoke callback.
|
||||||
reflect·call((byte*)fn, arg, argsize);
|
reflect·call((byte*)fn, arg, argsize);
|
||||||
@ -213,10 +212,9 @@ runtime·cgocallbackg(void (*fn)(void), void *arg, uintptr argsize)
|
|||||||
// Pop defer.
|
// Pop defer.
|
||||||
// Do not unwind m->g0->sched.sp.
|
// Do not unwind m->g0->sched.sp.
|
||||||
// Our caller, cgocallback, will do that.
|
// Our caller, cgocallback, will do that.
|
||||||
if(g->defer != d || d->fn != (byte*)unwindm)
|
if(g->defer != &d || d.fn != (byte*)unwindm)
|
||||||
runtime·throw("runtime: bad defer entry in cgocallback");
|
runtime·throw("runtime: bad defer entry in cgocallback");
|
||||||
g->defer = d->link;
|
g->defer = d.link;
|
||||||
runtime·free(d);
|
|
||||||
|
|
||||||
runtime·entersyscall(); // going back to cgo call
|
runtime·entersyscall(); // going back to cgo call
|
||||||
}
|
}
|
||||||
|
@ -1153,7 +1153,8 @@ runtime·deferreturn(uintptr arg0)
|
|||||||
runtime·memmove(argp, d->args, d->siz);
|
runtime·memmove(argp, d->args, d->siz);
|
||||||
g->defer = d->link;
|
g->defer = d->link;
|
||||||
fn = d->fn;
|
fn = d->fn;
|
||||||
runtime·free(d);
|
if(!d->nofree)
|
||||||
|
runtime·free(d);
|
||||||
runtime·jmpdefer(fn, argp);
|
runtime·jmpdefer(fn, argp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1165,7 +1166,8 @@ rundefer(void)
|
|||||||
while((d = g->defer) != nil) {
|
while((d = g->defer) != nil) {
|
||||||
g->defer = d->link;
|
g->defer = d->link;
|
||||||
reflect·call(d->fn, d->args, d->siz);
|
reflect·call(d->fn, d->args, d->siz);
|
||||||
runtime·free(d);
|
if(!d->nofree)
|
||||||
|
runtime·free(d);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1245,7 +1247,8 @@ runtime·panic(Eface e)
|
|||||||
runtime·mcall(recovery);
|
runtime·mcall(recovery);
|
||||||
runtime·throw("recovery failed"); // mcall should not return
|
runtime·throw("recovery failed"); // mcall should not return
|
||||||
}
|
}
|
||||||
runtime·free(d);
|
if(!d->nofree)
|
||||||
|
runtime·free(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ran out of deferred calls - old-school panic now
|
// ran out of deferred calls - old-school panic now
|
||||||
@ -1280,7 +1283,8 @@ recovery(G *gp)
|
|||||||
else
|
else
|
||||||
gp->sched.sp = (byte*)d->argp - 2*sizeof(uintptr);
|
gp->sched.sp = (byte*)d->argp - 2*sizeof(uintptr);
|
||||||
gp->sched.pc = d->pc;
|
gp->sched.pc = d->pc;
|
||||||
runtime·free(d);
|
if(!d->nofree)
|
||||||
|
runtime·free(d);
|
||||||
runtime·gogo(&gp->sched, 1);
|
runtime·gogo(&gp->sched, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -359,6 +359,7 @@ enum {
|
|||||||
struct Defer
|
struct Defer
|
||||||
{
|
{
|
||||||
int32 siz;
|
int32 siz;
|
||||||
|
bool nofree;
|
||||||
byte* argp; // where args were copied from
|
byte* argp; // where args were copied from
|
||||||
byte* pc;
|
byte* pc;
|
||||||
byte* fn;
|
byte* fn;
|
||||||
|
Loading…
Reference in New Issue
Block a user