From b104a0ef32e1388dbd6af7174975857320fb8e80 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Fri, 23 Jun 2023 13:20:33 +0700 Subject: [PATCH] runtime/cgo: reduce runtime init done check using atomic Every call from C to Go does acquire a mutex to check whether Go runtime has been fully initialized. This often does not matter, because the lock is held only briefly. However, with code that does a lot of parallel calls from C to Go could cause heavy contention on the mutex. Since this is an initialization guard, we can double check with atomic operation to provide a fast path in case the initialization is done. With this CL, program in #60961 reduces from ~2.7s to ~1.8s. Fixes #60961 Change-Id: Iba4cabbee3c9bc646e70ef7eb074212ba63fdc04 Reviewed-on: https://go-review.googlesource.com/c/go/+/505455 Reviewed-by: Heschi Kreinick Auto-Submit: Cuong Manh Le Run-TryBot: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Reviewed-by: Ian Lance Taylor Run-TryBot: Cuong Manh Le Reviewed-by: Ian Lance Taylor TryBot-Result: Gopher Robot --- src/runtime/cgo/gcc_libinit.c | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/runtime/cgo/gcc_libinit.c b/src/runtime/cgo/gcc_libinit.c index 96765932117..0b2cc25277c 100644 --- a/src/runtime/cgo/gcc_libinit.c +++ b/src/runtime/cgo/gcc_libinit.c @@ -41,30 +41,37 @@ x_cgo_sys_thread_create(void* (*func)(void*), void* arg) { uintptr_t _cgo_wait_runtime_init_done(void) { void (*pfn)(struct context_arg*); + pfn = __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); - pthread_mutex_lock(&runtime_init_mu); - while (runtime_init_done == 0) { - pthread_cond_wait(&runtime_init_cond, &runtime_init_mu); + int done = 2; + if (__atomic_load_n(&runtime_init_done, __ATOMIC_CONSUME) != done) { + pthread_mutex_lock(&runtime_init_mu); + while (__atomic_load_n(&runtime_init_done, __ATOMIC_CONSUME) == 0) { + pthread_cond_wait(&runtime_init_cond, &runtime_init_mu); + } + + // The key and x_cgo_pthread_key_created are for the whole program, + // whereas the specific and destructor is per thread. + if (x_cgo_pthread_key_created == 0 && pthread_key_create(&pthread_g, pthread_key_destructor) == 0) { + x_cgo_pthread_key_created = 1; + } + + + // TODO(iant): For the case of a new C thread calling into Go, such + // as when using -buildmode=c-archive, we know that Go runtime + // initialization is complete but we do not know that all Go init + // functions have been run. We should not fetch cgo_context_function + // until they have been, because that is where a call to + // SetCgoTraceback is likely to occur. We are going to wait for Go + // initialization to be complete anyhow, later, by waiting for + // main_init_done to be closed in cgocallbackg1. We should wait here + // instead. See also issue #15943. + pfn = __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); + + __atomic_store_n(&runtime_init_done, done, __ATOMIC_RELEASE); + pthread_mutex_unlock(&runtime_init_mu); } - // The key and x_cgo_pthread_key_created are for the whole program, - // whereas the specific and destructor is per thread. - if (x_cgo_pthread_key_created == 0 && pthread_key_create(&pthread_g, pthread_key_destructor) == 0) { - x_cgo_pthread_key_created = 1; - } - - // TODO(iant): For the case of a new C thread calling into Go, such - // as when using -buildmode=c-archive, we know that Go runtime - // initialization is complete but we do not know that all Go init - // functions have been run. We should not fetch cgo_context_function - // until they have been, because that is where a call to - // SetCgoTraceback is likely to occur. We are going to wait for Go - // initialization to be complete anyhow, later, by waiting for - // main_init_done to be closed in cgocallbackg1. We should wait here - // instead. See also issue #15943. - pfn = cgo_context_function; - - pthread_mutex_unlock(&runtime_init_mu); if (pfn != nil) { struct context_arg arg; @@ -88,7 +95,7 @@ void x_cgo_bindm(void* g) { void x_cgo_notify_runtime_init_done(void* dummy __attribute__ ((unused))) { pthread_mutex_lock(&runtime_init_mu); - runtime_init_done = 1; + __atomic_store_n(&runtime_init_done, 1, __ATOMIC_RELEASE); pthread_cond_broadcast(&runtime_init_cond); pthread_mutex_unlock(&runtime_init_mu); } @@ -96,19 +103,12 @@ x_cgo_notify_runtime_init_done(void* dummy __attribute__ ((unused))) { // Sets the context function to call to record the traceback context // when calling a Go function from C code. Called from runtime.SetCgoTraceback. void x_cgo_set_context_function(void (*context)(struct context_arg*)) { - pthread_mutex_lock(&runtime_init_mu); - cgo_context_function = context; - pthread_mutex_unlock(&runtime_init_mu); + __atomic_store_n(&cgo_context_function, context, __ATOMIC_RELEASE); } // Gets the context function. void (*(_cgo_get_context_function(void)))(struct context_arg*) { - void (*ret)(struct context_arg*); - - pthread_mutex_lock(&runtime_init_mu); - ret = cgo_context_function; - pthread_mutex_unlock(&runtime_init_mu); - return ret; + return __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME); } // _cgo_try_pthread_create retries pthread_create if it fails with