mirror of
https://github.com/golang/go
synced 2024-11-25 09:37:56 -07:00
sync: faster Cond
The new version does not require any memory allocations and is 30-50% faster. Also detect and painc if Cond is copied after first. benchmark old ns/op new ns/op delta BenchmarkCond1 317 195 -38.49% BenchmarkCond1-2 875 607 -30.63% BenchmarkCond1-4 1116 548 -50.90% BenchmarkCond1-8 1013 613 -39.49% BenchmarkCond1-16 983 450 -54.22% BenchmarkCond2 559 352 -37.03% BenchmarkCond2-2 1916 1378 -28.08% BenchmarkCond2-4 1518 1322 -12.91% BenchmarkCond2-8 2313 1291 -44.19% BenchmarkCond2-16 1885 1078 -42.81% BenchmarkCond4 1070 614 -42.62% BenchmarkCond4-2 4899 3047 -37.80% BenchmarkCond4-4 3813 3006 -21.16% BenchmarkCond4-8 3605 3045 -15.53% BenchmarkCond4-16 4148 2637 -36.43% BenchmarkCond8 2086 1264 -39.41% BenchmarkCond8-2 9961 6736 -32.38% BenchmarkCond8-4 8135 7689 -5.48% BenchmarkCond8-8 9623 7517 -21.89% BenchmarkCond8-16 11661 8093 -30.60% R=sougou, rsc, bradfitz, r CC=golang-dev https://golang.org/cl/11573043
This commit is contained in:
parent
d1b66439f9
commit
5a20b4a6a9
@ -21,22 +21,23 @@ package sync
|
|||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
#include "arch_GOARCH.h"
|
#include "arch_GOARCH.h"
|
||||||
|
|
||||||
typedef struct Sema Sema;
|
typedef struct SemaWaiter SemaWaiter;
|
||||||
struct Sema
|
struct SemaWaiter
|
||||||
{
|
{
|
||||||
uint32 volatile* addr;
|
uint32 volatile* addr;
|
||||||
G* g;
|
G* g;
|
||||||
int64 releasetime;
|
int64 releasetime;
|
||||||
Sema* prev;
|
int32 nrelease; // -1 for acquire
|
||||||
Sema* next;
|
SemaWaiter* prev;
|
||||||
|
SemaWaiter* next;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct SemaRoot SemaRoot;
|
typedef struct SemaRoot SemaRoot;
|
||||||
struct SemaRoot
|
struct SemaRoot
|
||||||
{
|
{
|
||||||
Lock;
|
Lock;
|
||||||
Sema* head;
|
SemaWaiter* head;
|
||||||
Sema* tail;
|
SemaWaiter* tail;
|
||||||
// Number of waiters. Read w/o the lock.
|
// Number of waiters. Read w/o the lock.
|
||||||
uint32 volatile nwait;
|
uint32 volatile nwait;
|
||||||
};
|
};
|
||||||
@ -59,7 +60,7 @@ semroot(uint32 *addr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s)
|
semqueue(SemaRoot *root, uint32 volatile *addr, SemaWaiter *s)
|
||||||
{
|
{
|
||||||
s->g = g;
|
s->g = g;
|
||||||
s->addr = addr;
|
s->addr = addr;
|
||||||
@ -73,7 +74,7 @@ semqueue(SemaRoot *root, uint32 volatile *addr, Sema *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
semdequeue(SemaRoot *root, Sema *s)
|
semdequeue(SemaRoot *root, SemaWaiter *s)
|
||||||
{
|
{
|
||||||
if(s->next)
|
if(s->next)
|
||||||
s->next->prev = s->prev;
|
s->next->prev = s->prev;
|
||||||
@ -101,7 +102,7 @@ cansemacquire(uint32 *addr)
|
|||||||
void
|
void
|
||||||
runtime·semacquire(uint32 volatile *addr, bool profile)
|
runtime·semacquire(uint32 volatile *addr, bool profile)
|
||||||
{
|
{
|
||||||
Sema s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
|
SemaWaiter s; // Needs to be allocated on stack, otherwise garbage collector could deallocate it
|
||||||
SemaRoot *root;
|
SemaRoot *root;
|
||||||
int64 t0;
|
int64 t0;
|
||||||
|
|
||||||
@ -147,7 +148,7 @@ runtime·semacquire(uint32 volatile *addr, bool profile)
|
|||||||
void
|
void
|
||||||
runtime·semrelease(uint32 volatile *addr)
|
runtime·semrelease(uint32 volatile *addr)
|
||||||
{
|
{
|
||||||
Sema *s;
|
SemaWaiter *s;
|
||||||
SemaRoot *root;
|
SemaRoot *root;
|
||||||
|
|
||||||
root = semroot(addr);
|
root = semroot(addr);
|
||||||
@ -200,3 +201,93 @@ func runtime_Semacquire(addr *uint32) {
|
|||||||
func runtime_Semrelease(addr *uint32) {
|
func runtime_Semrelease(addr *uint32) {
|
||||||
runtime·semrelease(addr);
|
runtime·semrelease(addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct SyncSema SyncSema;
|
||||||
|
struct SyncSema
|
||||||
|
{
|
||||||
|
Lock;
|
||||||
|
SemaWaiter* head;
|
||||||
|
SemaWaiter* tail;
|
||||||
|
};
|
||||||
|
|
||||||
|
func runtime_Syncsemcheck(size uintptr) {
|
||||||
|
if(size != sizeof(SyncSema)) {
|
||||||
|
runtime·printf("bad SyncSema size: sync:%D runtime:%D\n", (int64)size, (int64)sizeof(SyncSema));
|
||||||
|
runtime·throw("bad SyncSema size");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Syncsemacquire waits for a pairing Syncsemrelease on the same semaphore s.
|
||||||
|
func runtime_Syncsemacquire(s *SyncSema) {
|
||||||
|
SemaWaiter w, *wake;
|
||||||
|
int64 t0;
|
||||||
|
|
||||||
|
w.g = g;
|
||||||
|
w.nrelease = -1;
|
||||||
|
w.next = nil;
|
||||||
|
w.releasetime = 0;
|
||||||
|
t0 = 0;
|
||||||
|
if(runtime·blockprofilerate > 0) {
|
||||||
|
t0 = runtime·cputicks();
|
||||||
|
w.releasetime = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime·lock(s);
|
||||||
|
if(s->head && s->head->nrelease > 0) {
|
||||||
|
// have pending release, consume it
|
||||||
|
wake = nil;
|
||||||
|
s->head->nrelease--;
|
||||||
|
if(s->head->nrelease == 0) {
|
||||||
|
wake = s->head;
|
||||||
|
s->head = wake->next;
|
||||||
|
if(s->head == nil)
|
||||||
|
s->tail = nil;
|
||||||
|
}
|
||||||
|
runtime·unlock(s);
|
||||||
|
if(wake)
|
||||||
|
runtime·ready(wake->g);
|
||||||
|
} else {
|
||||||
|
// enqueue itself
|
||||||
|
if(s->tail == nil)
|
||||||
|
s->head = &w;
|
||||||
|
else
|
||||||
|
s->tail->next = &w;
|
||||||
|
s->tail = &w;
|
||||||
|
runtime·park(runtime·unlock, s, "semacquire");
|
||||||
|
if(t0)
|
||||||
|
runtime·blockevent(w.releasetime - t0, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Syncsemrelease waits for n pairing Syncsemacquire on the same semaphore s.
|
||||||
|
func runtime_Syncsemrelease(s *SyncSema, n uint32) {
|
||||||
|
SemaWaiter w, *wake;
|
||||||
|
|
||||||
|
w.g = g;
|
||||||
|
w.nrelease = (int32)n;
|
||||||
|
w.next = nil;
|
||||||
|
w.releasetime = 0;
|
||||||
|
|
||||||
|
runtime·lock(s);
|
||||||
|
while(w.nrelease > 0 && s->head && s->head->nrelease < 0) {
|
||||||
|
// have pending acquire, satisfy it
|
||||||
|
wake = s->head;
|
||||||
|
s->head = wake->next;
|
||||||
|
if(s->head == nil)
|
||||||
|
s->tail = nil;
|
||||||
|
if(wake->releasetime)
|
||||||
|
wake->releasetime = runtime·cputicks();
|
||||||
|
runtime·ready(wake->g);
|
||||||
|
w.nrelease--;
|
||||||
|
}
|
||||||
|
if(w.nrelease > 0) {
|
||||||
|
// enqueue itself
|
||||||
|
if(s->tail == nil)
|
||||||
|
s->head = &w;
|
||||||
|
else
|
||||||
|
s->tail->next = &w;
|
||||||
|
s->tail = &w;
|
||||||
|
runtime·park(runtime·unlock, s, "semarelease");
|
||||||
|
} else
|
||||||
|
runtime·unlock(s);
|
||||||
|
}
|
||||||
|
@ -4,6 +4,11 @@
|
|||||||
|
|
||||||
package sync
|
package sync
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync/atomic"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
// Cond implements a condition variable, a rendezvous point
|
// Cond implements a condition variable, a rendezvous point
|
||||||
// for goroutines waiting for or announcing the occurrence
|
// for goroutines waiting for or announcing the occurrence
|
||||||
// of an event.
|
// of an event.
|
||||||
@ -11,27 +16,16 @@ package sync
|
|||||||
// Each Cond has an associated Locker L (often a *Mutex or *RWMutex),
|
// Each Cond has an associated Locker L (often a *Mutex or *RWMutex),
|
||||||
// which must be held when changing the condition and
|
// which must be held when changing the condition and
|
||||||
// when calling the Wait method.
|
// when calling the Wait method.
|
||||||
|
//
|
||||||
|
// A Cond can be created as part of other structures.
|
||||||
|
// A Cond must not be copied after first use.
|
||||||
type Cond struct {
|
type Cond struct {
|
||||||
L Locker // held while observing or changing the condition
|
// L is held while observing or changing the condition
|
||||||
m Mutex // held to avoid internal races
|
L Locker
|
||||||
|
|
||||||
// We must be careful to make sure that when Signal
|
sema syncSema
|
||||||
// releases a semaphore, the corresponding acquire is
|
waiters uint32 // number of waiters
|
||||||
// executed by a goroutine that was already waiting at
|
checker copyChecker
|
||||||
// the time of the call to Signal, not one that arrived later.
|
|
||||||
// To ensure this, we segment waiting goroutines into
|
|
||||||
// generations punctuated by calls to Signal. Each call to
|
|
||||||
// Signal begins another generation if there are no goroutines
|
|
||||||
// left in older generations for it to wake. Because of this
|
|
||||||
// optimization (only begin another generation if there
|
|
||||||
// are no older goroutines left), we only need to keep track
|
|
||||||
// of the two most recent generations, which we call old
|
|
||||||
// and new.
|
|
||||||
oldWaiters int // number of waiters in old generation...
|
|
||||||
oldSema *uint32 // ... waiting on this semaphore
|
|
||||||
|
|
||||||
newWaiters int // number of waiters in new generation...
|
|
||||||
newSema *uint32 // ... waiting on this semaphore
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCond returns a new Cond with Locker l.
|
// NewCond returns a new Cond with Locker l.
|
||||||
@ -56,22 +50,16 @@ func NewCond(l Locker) *Cond {
|
|||||||
// c.L.Unlock()
|
// c.L.Unlock()
|
||||||
//
|
//
|
||||||
func (c *Cond) Wait() {
|
func (c *Cond) Wait() {
|
||||||
|
c.checker.check()
|
||||||
if raceenabled {
|
if raceenabled {
|
||||||
_ = c.m.state
|
|
||||||
raceDisable()
|
raceDisable()
|
||||||
}
|
}
|
||||||
c.m.Lock()
|
atomic.AddUint32(&c.waiters, 1)
|
||||||
if c.newSema == nil {
|
|
||||||
c.newSema = new(uint32)
|
|
||||||
}
|
|
||||||
s := c.newSema
|
|
||||||
c.newWaiters++
|
|
||||||
c.m.Unlock()
|
|
||||||
if raceenabled {
|
if raceenabled {
|
||||||
raceEnable()
|
raceEnable()
|
||||||
}
|
}
|
||||||
c.L.Unlock()
|
c.L.Unlock()
|
||||||
runtime_Semacquire(s)
|
runtime_Syncsemacquire(&c.sema)
|
||||||
c.L.Lock()
|
c.L.Lock()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,26 +68,7 @@ func (c *Cond) Wait() {
|
|||||||
// It is allowed but not required for the caller to hold c.L
|
// It is allowed but not required for the caller to hold c.L
|
||||||
// during the call.
|
// during the call.
|
||||||
func (c *Cond) Signal() {
|
func (c *Cond) Signal() {
|
||||||
if raceenabled {
|
c.signalImpl(false)
|
||||||
_ = c.m.state
|
|
||||||
raceDisable()
|
|
||||||
}
|
|
||||||
c.m.Lock()
|
|
||||||
if c.oldWaiters == 0 && c.newWaiters > 0 {
|
|
||||||
// Retire old generation; rename new to old.
|
|
||||||
c.oldWaiters = c.newWaiters
|
|
||||||
c.oldSema = c.newSema
|
|
||||||
c.newWaiters = 0
|
|
||||||
c.newSema = nil
|
|
||||||
}
|
|
||||||
if c.oldWaiters > 0 {
|
|
||||||
c.oldWaiters--
|
|
||||||
runtime_Semrelease(c.oldSema)
|
|
||||||
}
|
|
||||||
c.m.Unlock()
|
|
||||||
if raceenabled {
|
|
||||||
raceEnable()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Broadcast wakes all goroutines waiting on c.
|
// Broadcast wakes all goroutines waiting on c.
|
||||||
@ -107,27 +76,43 @@ func (c *Cond) Signal() {
|
|||||||
// It is allowed but not required for the caller to hold c.L
|
// It is allowed but not required for the caller to hold c.L
|
||||||
// during the call.
|
// during the call.
|
||||||
func (c *Cond) Broadcast() {
|
func (c *Cond) Broadcast() {
|
||||||
|
c.signalImpl(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cond) signalImpl(all bool) {
|
||||||
|
c.checker.check()
|
||||||
if raceenabled {
|
if raceenabled {
|
||||||
_ = c.m.state
|
|
||||||
raceDisable()
|
raceDisable()
|
||||||
}
|
}
|
||||||
c.m.Lock()
|
for {
|
||||||
// Wake both generations.
|
old := atomic.LoadUint32(&c.waiters)
|
||||||
if c.oldWaiters > 0 {
|
if old == 0 {
|
||||||
for i := 0; i < c.oldWaiters; i++ {
|
if raceenabled {
|
||||||
runtime_Semrelease(c.oldSema)
|
raceEnable()
|
||||||
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
c.oldWaiters = 0
|
new := old - 1
|
||||||
}
|
if all {
|
||||||
if c.newWaiters > 0 {
|
new = 0
|
||||||
for i := 0; i < c.newWaiters; i++ {
|
}
|
||||||
runtime_Semrelease(c.newSema)
|
if atomic.CompareAndSwapUint32(&c.waiters, old, new) {
|
||||||
|
if raceenabled {
|
||||||
|
raceEnable()
|
||||||
|
}
|
||||||
|
runtime_Syncsemrelease(&c.sema, old-new)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
c.newWaiters = 0
|
}
|
||||||
c.newSema = nil
|
}
|
||||||
}
|
|
||||||
c.m.Unlock()
|
// copyChecker holds back pointer to itself to detect object copying.
|
||||||
if raceenabled {
|
type copyChecker uintptr
|
||||||
raceEnable()
|
|
||||||
|
func (c *copyChecker) check() {
|
||||||
|
if uintptr(*c) != uintptr(unsafe.Pointer(c)) &&
|
||||||
|
!atomic.CompareAndSwapUintptr((*uintptr)(c), 0, uintptr(unsafe.Pointer(c))) &&
|
||||||
|
uintptr(*c) != uintptr(unsafe.Pointer(c)) {
|
||||||
|
panic("sync.Cond is copied")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@ package sync_test
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
. "sync"
|
. "sync"
|
||||||
|
|
||||||
|
"runtime"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -124,3 +126,130 @@ func TestCondBroadcast(t *testing.T) {
|
|||||||
}
|
}
|
||||||
c.Broadcast()
|
c.Broadcast()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRace(t *testing.T) {
|
||||||
|
x := 0
|
||||||
|
c := NewCond(&Mutex{})
|
||||||
|
done := make(chan bool)
|
||||||
|
go func() {
|
||||||
|
c.L.Lock()
|
||||||
|
x = 1
|
||||||
|
c.Wait()
|
||||||
|
if x != 2 {
|
||||||
|
t.Fatal("want 2")
|
||||||
|
}
|
||||||
|
x = 3
|
||||||
|
c.Signal()
|
||||||
|
c.L.Unlock()
|
||||||
|
done <- true
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
c.L.Lock()
|
||||||
|
for {
|
||||||
|
if x == 1 {
|
||||||
|
x = 2
|
||||||
|
c.Signal()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.L.Unlock()
|
||||||
|
runtime.Gosched()
|
||||||
|
c.L.Lock()
|
||||||
|
}
|
||||||
|
c.L.Unlock()
|
||||||
|
done <- true
|
||||||
|
}()
|
||||||
|
go func() {
|
||||||
|
c.L.Lock()
|
||||||
|
for {
|
||||||
|
if x == 2 {
|
||||||
|
c.Wait()
|
||||||
|
if x != 3 {
|
||||||
|
t.Fatal("want 3")
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if x == 3 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c.L.Unlock()
|
||||||
|
runtime.Gosched()
|
||||||
|
c.L.Lock()
|
||||||
|
}
|
||||||
|
c.L.Unlock()
|
||||||
|
done <- true
|
||||||
|
}()
|
||||||
|
<-done
|
||||||
|
<-done
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCondCopy(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
err := recover()
|
||||||
|
if err == nil || err.(string) != "sync.Cond is copied" {
|
||||||
|
t.Fatalf("got %v, expect sync.Cond is copied", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
c := Cond{L: &Mutex{}}
|
||||||
|
c.Signal()
|
||||||
|
c2 := c
|
||||||
|
c2.Signal()
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCond1(b *testing.B) {
|
||||||
|
benchmarkCond(b, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCond2(b *testing.B) {
|
||||||
|
benchmarkCond(b, 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCond4(b *testing.B) {
|
||||||
|
benchmarkCond(b, 4)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCond8(b *testing.B) {
|
||||||
|
benchmarkCond(b, 8)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCond16(b *testing.B) {
|
||||||
|
benchmarkCond(b, 16)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCond32(b *testing.B) {
|
||||||
|
benchmarkCond(b, 32)
|
||||||
|
}
|
||||||
|
|
||||||
|
func benchmarkCond(b *testing.B, waiters int) {
|
||||||
|
c := NewCond(&Mutex{})
|
||||||
|
done := make(chan bool)
|
||||||
|
id := 0
|
||||||
|
|
||||||
|
for routine := 0; routine < waiters+1; routine++ {
|
||||||
|
go func() {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
c.L.Lock()
|
||||||
|
if id == -1 {
|
||||||
|
c.L.Unlock()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
id++
|
||||||
|
if id == waiters+1 {
|
||||||
|
id = 0
|
||||||
|
c.Broadcast()
|
||||||
|
} else {
|
||||||
|
c.Wait()
|
||||||
|
}
|
||||||
|
c.L.Unlock()
|
||||||
|
}
|
||||||
|
c.L.Lock()
|
||||||
|
id = -1
|
||||||
|
c.Broadcast()
|
||||||
|
c.L.Unlock()
|
||||||
|
done <- true
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
for routine := 0; routine < waiters+1; routine++ {
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
package sync
|
package sync
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
// defined in package runtime
|
// defined in package runtime
|
||||||
|
|
||||||
// Semacquire waits until *s > 0 and then atomically decrements it.
|
// Semacquire waits until *s > 0 and then atomically decrements it.
|
||||||
@ -16,3 +18,19 @@ func runtime_Semacquire(s *uint32)
|
|||||||
// It is intended as a simple wakeup primitive for use by the synchronization
|
// It is intended as a simple wakeup primitive for use by the synchronization
|
||||||
// library and should not be used directly.
|
// library and should not be used directly.
|
||||||
func runtime_Semrelease(s *uint32)
|
func runtime_Semrelease(s *uint32)
|
||||||
|
|
||||||
|
// Opaque representation of SyncSema in runtime/sema.goc.
|
||||||
|
type syncSema [3]uintptr
|
||||||
|
|
||||||
|
// Syncsemacquire waits for a pairing Syncsemrelease on the same semaphore s.
|
||||||
|
func runtime_Syncsemacquire(s *syncSema)
|
||||||
|
|
||||||
|
// Syncsemrelease waits for n pairing Syncsemacquire on the same semaphore s.
|
||||||
|
func runtime_Syncsemrelease(s *syncSema, n uint32)
|
||||||
|
|
||||||
|
// Ensure that sync and runtime agree on size of syncSema.
|
||||||
|
func runtime_Syncsemcheck(size uintptr)
|
||||||
|
func init() {
|
||||||
|
var s syncSema
|
||||||
|
runtime_Syncsemcheck(unsafe.Sizeof(s))
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user