1
0
mirror of https://github.com/golang/go synced 2024-11-20 03:24:41 -07:00
go/src/sync/mutex.go

128 lines
3.1 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package sync provides basic synchronization primitives such as mutual
// exclusion locks. Other than the Once and WaitGroup types, most are intended
// for use by low-level library routines. Higher-level synchronization is
// better done via channels and communication.
//
// Values containing the types defined in this package should not be copied.
package sync
import (
"internal/race"
"sync/atomic"
"unsafe"
)
// A Mutex is a mutual exclusion lock.
// Mutexes can be created as part of other structures;
// the zero value for a Mutex is an unlocked mutex.
2009-01-20 15:40:40 -07:00
type Mutex struct {
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
state int32
sema uint32
}
// A Locker represents an object that can be locked and unlocked.
type Locker interface {
Lock()
Unlock()
}
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
const (
mutexLocked = 1 << iota // mutex is locked
mutexWoken
mutexWaiterShift = iota
)
// Lock locks m.
// If the lock is already in use, the calling goroutine
// blocks until the mutex is available.
func (m *Mutex) Lock() {
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
// Fast path: grab unlocked mutex.
if atomic.CompareAndSwapInt32(&m.state, 0, mutexLocked) {
if race.Enabled {
race.Acquire(unsafe.Pointer(m))
}
return
}
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
awoke := false
sync: add active spinning to Mutex Currently sync.Mutex is fully cooperative. That is, once contention is discovered, the goroutine calls into scheduler. This is suboptimal as the resource can become free soon after (especially if critical sections are short). Server software usually runs at ~~50% CPU utilization, that is, switching to other goroutines is not necessary profitable. This change adds limited active spinning to sync.Mutex if: 1. running on a multicore machine and 2. GOMAXPROCS>1 and 3. there is at least one other running P and 4. local runq is empty. As opposed to runtime mutex we don't do passive spinning, because there can be work on global runq on on other Ps. benchmark old ns/op new ns/op delta BenchmarkMutexNoSpin 1271 1272 +0.08% BenchmarkMutexNoSpin-2 702 683 -2.71% BenchmarkMutexNoSpin-4 377 372 -1.33% BenchmarkMutexNoSpin-8 197 190 -3.55% BenchmarkMutexNoSpin-16 131 122 -6.87% BenchmarkMutexNoSpin-32 170 164 -3.53% BenchmarkMutexSpin 4724 4728 +0.08% BenchmarkMutexSpin-2 2501 2491 -0.40% BenchmarkMutexSpin-4 1330 1325 -0.38% BenchmarkMutexSpin-8 684 684 +0.00% BenchmarkMutexSpin-16 414 372 -10.14% BenchmarkMutexSpin-32 559 469 -16.10% BenchmarkMutex 19.1 19.1 +0.00% BenchmarkMutex-2 81.6 54.3 -33.46% BenchmarkMutex-4 143 100 -30.07% BenchmarkMutex-8 154 156 +1.30% BenchmarkMutex-16 140 159 +13.57% BenchmarkMutex-32 141 163 +15.60% BenchmarkMutexSlack 33.3 31.2 -6.31% BenchmarkMutexSlack-2 122 97.7 -19.92% BenchmarkMutexSlack-4 168 158 -5.95% BenchmarkMutexSlack-8 152 158 +3.95% BenchmarkMutexSlack-16 140 159 +13.57% BenchmarkMutexSlack-32 146 162 +10.96% BenchmarkMutexWork 154 154 +0.00% BenchmarkMutexWork-2 89.2 89.9 +0.78% BenchmarkMutexWork-4 139 86.1 -38.06% BenchmarkMutexWork-8 177 162 -8.47% BenchmarkMutexWork-16 170 173 +1.76% BenchmarkMutexWork-32 176 176 +0.00% BenchmarkMutexWorkSlack 160 160 +0.00% BenchmarkMutexWorkSlack-2 103 99.1 -3.79% BenchmarkMutexWorkSlack-4 155 148 -4.52% BenchmarkMutexWorkSlack-8 176 170 -3.41% BenchmarkMutexWorkSlack-16 170 173 +1.76% BenchmarkMutexWorkSlack-32 175 176 +0.57% "No work" benchmarks are not very interesting (BenchmarkMutex and BenchmarkMutexSlack), as they are absolutely not realistic. Fixes #8889 Change-Id: I6f14f42af1fa48f73a776fdd11f0af6dd2bb428b Reviewed-on: https://go-review.googlesource.com/5430 Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
2015-02-20 01:50:56 -07:00
iter := 0
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
for {
old := m.state
new := old | mutexLocked
if old&mutexLocked != 0 {
sync: add active spinning to Mutex Currently sync.Mutex is fully cooperative. That is, once contention is discovered, the goroutine calls into scheduler. This is suboptimal as the resource can become free soon after (especially if critical sections are short). Server software usually runs at ~~50% CPU utilization, that is, switching to other goroutines is not necessary profitable. This change adds limited active spinning to sync.Mutex if: 1. running on a multicore machine and 2. GOMAXPROCS>1 and 3. there is at least one other running P and 4. local runq is empty. As opposed to runtime mutex we don't do passive spinning, because there can be work on global runq on on other Ps. benchmark old ns/op new ns/op delta BenchmarkMutexNoSpin 1271 1272 +0.08% BenchmarkMutexNoSpin-2 702 683 -2.71% BenchmarkMutexNoSpin-4 377 372 -1.33% BenchmarkMutexNoSpin-8 197 190 -3.55% BenchmarkMutexNoSpin-16 131 122 -6.87% BenchmarkMutexNoSpin-32 170 164 -3.53% BenchmarkMutexSpin 4724 4728 +0.08% BenchmarkMutexSpin-2 2501 2491 -0.40% BenchmarkMutexSpin-4 1330 1325 -0.38% BenchmarkMutexSpin-8 684 684 +0.00% BenchmarkMutexSpin-16 414 372 -10.14% BenchmarkMutexSpin-32 559 469 -16.10% BenchmarkMutex 19.1 19.1 +0.00% BenchmarkMutex-2 81.6 54.3 -33.46% BenchmarkMutex-4 143 100 -30.07% BenchmarkMutex-8 154 156 +1.30% BenchmarkMutex-16 140 159 +13.57% BenchmarkMutex-32 141 163 +15.60% BenchmarkMutexSlack 33.3 31.2 -6.31% BenchmarkMutexSlack-2 122 97.7 -19.92% BenchmarkMutexSlack-4 168 158 -5.95% BenchmarkMutexSlack-8 152 158 +3.95% BenchmarkMutexSlack-16 140 159 +13.57% BenchmarkMutexSlack-32 146 162 +10.96% BenchmarkMutexWork 154 154 +0.00% BenchmarkMutexWork-2 89.2 89.9 +0.78% BenchmarkMutexWork-4 139 86.1 -38.06% BenchmarkMutexWork-8 177 162 -8.47% BenchmarkMutexWork-16 170 173 +1.76% BenchmarkMutexWork-32 176 176 +0.00% BenchmarkMutexWorkSlack 160 160 +0.00% BenchmarkMutexWorkSlack-2 103 99.1 -3.79% BenchmarkMutexWorkSlack-4 155 148 -4.52% BenchmarkMutexWorkSlack-8 176 170 -3.41% BenchmarkMutexWorkSlack-16 170 173 +1.76% BenchmarkMutexWorkSlack-32 175 176 +0.57% "No work" benchmarks are not very interesting (BenchmarkMutex and BenchmarkMutexSlack), as they are absolutely not realistic. Fixes #8889 Change-Id: I6f14f42af1fa48f73a776fdd11f0af6dd2bb428b Reviewed-on: https://go-review.googlesource.com/5430 Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
2015-02-20 01:50:56 -07:00
if runtime_canSpin(iter) {
// Active spinning makes sense.
// Try to set mutexWoken flag to inform Unlock
// to not wake other blocked goroutines.
if !awoke && old&mutexWoken == 0 && old>>mutexWaiterShift != 0 &&
atomic.CompareAndSwapInt32(&m.state, old, old|mutexWoken) {
awoke = true
}
runtime_doSpin()
iter++
continue
}
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
new = old + 1<<mutexWaiterShift
}
if awoke {
// The goroutine has been woken from sleep,
// so we need to reset the flag in either case.
sync: add active spinning to Mutex Currently sync.Mutex is fully cooperative. That is, once contention is discovered, the goroutine calls into scheduler. This is suboptimal as the resource can become free soon after (especially if critical sections are short). Server software usually runs at ~~50% CPU utilization, that is, switching to other goroutines is not necessary profitable. This change adds limited active spinning to sync.Mutex if: 1. running on a multicore machine and 2. GOMAXPROCS>1 and 3. there is at least one other running P and 4. local runq is empty. As opposed to runtime mutex we don't do passive spinning, because there can be work on global runq on on other Ps. benchmark old ns/op new ns/op delta BenchmarkMutexNoSpin 1271 1272 +0.08% BenchmarkMutexNoSpin-2 702 683 -2.71% BenchmarkMutexNoSpin-4 377 372 -1.33% BenchmarkMutexNoSpin-8 197 190 -3.55% BenchmarkMutexNoSpin-16 131 122 -6.87% BenchmarkMutexNoSpin-32 170 164 -3.53% BenchmarkMutexSpin 4724 4728 +0.08% BenchmarkMutexSpin-2 2501 2491 -0.40% BenchmarkMutexSpin-4 1330 1325 -0.38% BenchmarkMutexSpin-8 684 684 +0.00% BenchmarkMutexSpin-16 414 372 -10.14% BenchmarkMutexSpin-32 559 469 -16.10% BenchmarkMutex 19.1 19.1 +0.00% BenchmarkMutex-2 81.6 54.3 -33.46% BenchmarkMutex-4 143 100 -30.07% BenchmarkMutex-8 154 156 +1.30% BenchmarkMutex-16 140 159 +13.57% BenchmarkMutex-32 141 163 +15.60% BenchmarkMutexSlack 33.3 31.2 -6.31% BenchmarkMutexSlack-2 122 97.7 -19.92% BenchmarkMutexSlack-4 168 158 -5.95% BenchmarkMutexSlack-8 152 158 +3.95% BenchmarkMutexSlack-16 140 159 +13.57% BenchmarkMutexSlack-32 146 162 +10.96% BenchmarkMutexWork 154 154 +0.00% BenchmarkMutexWork-2 89.2 89.9 +0.78% BenchmarkMutexWork-4 139 86.1 -38.06% BenchmarkMutexWork-8 177 162 -8.47% BenchmarkMutexWork-16 170 173 +1.76% BenchmarkMutexWork-32 176 176 +0.00% BenchmarkMutexWorkSlack 160 160 +0.00% BenchmarkMutexWorkSlack-2 103 99.1 -3.79% BenchmarkMutexWorkSlack-4 155 148 -4.52% BenchmarkMutexWorkSlack-8 176 170 -3.41% BenchmarkMutexWorkSlack-16 170 173 +1.76% BenchmarkMutexWorkSlack-32 175 176 +0.57% "No work" benchmarks are not very interesting (BenchmarkMutex and BenchmarkMutexSlack), as they are absolutely not realistic. Fixes #8889 Change-Id: I6f14f42af1fa48f73a776fdd11f0af6dd2bb428b Reviewed-on: https://go-review.googlesource.com/5430 Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
2015-02-20 01:50:56 -07:00
if new&mutexWoken == 0 {
panic("sync: inconsistent mutex state")
}
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
new &^= mutexWoken
}
if atomic.CompareAndSwapInt32(&m.state, old, new) {
if old&mutexLocked == 0 {
break
}
runtime_Semacquire(&m.sema)
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
awoke = true
sync: add active spinning to Mutex Currently sync.Mutex is fully cooperative. That is, once contention is discovered, the goroutine calls into scheduler. This is suboptimal as the resource can become free soon after (especially if critical sections are short). Server software usually runs at ~~50% CPU utilization, that is, switching to other goroutines is not necessary profitable. This change adds limited active spinning to sync.Mutex if: 1. running on a multicore machine and 2. GOMAXPROCS>1 and 3. there is at least one other running P and 4. local runq is empty. As opposed to runtime mutex we don't do passive spinning, because there can be work on global runq on on other Ps. benchmark old ns/op new ns/op delta BenchmarkMutexNoSpin 1271 1272 +0.08% BenchmarkMutexNoSpin-2 702 683 -2.71% BenchmarkMutexNoSpin-4 377 372 -1.33% BenchmarkMutexNoSpin-8 197 190 -3.55% BenchmarkMutexNoSpin-16 131 122 -6.87% BenchmarkMutexNoSpin-32 170 164 -3.53% BenchmarkMutexSpin 4724 4728 +0.08% BenchmarkMutexSpin-2 2501 2491 -0.40% BenchmarkMutexSpin-4 1330 1325 -0.38% BenchmarkMutexSpin-8 684 684 +0.00% BenchmarkMutexSpin-16 414 372 -10.14% BenchmarkMutexSpin-32 559 469 -16.10% BenchmarkMutex 19.1 19.1 +0.00% BenchmarkMutex-2 81.6 54.3 -33.46% BenchmarkMutex-4 143 100 -30.07% BenchmarkMutex-8 154 156 +1.30% BenchmarkMutex-16 140 159 +13.57% BenchmarkMutex-32 141 163 +15.60% BenchmarkMutexSlack 33.3 31.2 -6.31% BenchmarkMutexSlack-2 122 97.7 -19.92% BenchmarkMutexSlack-4 168 158 -5.95% BenchmarkMutexSlack-8 152 158 +3.95% BenchmarkMutexSlack-16 140 159 +13.57% BenchmarkMutexSlack-32 146 162 +10.96% BenchmarkMutexWork 154 154 +0.00% BenchmarkMutexWork-2 89.2 89.9 +0.78% BenchmarkMutexWork-4 139 86.1 -38.06% BenchmarkMutexWork-8 177 162 -8.47% BenchmarkMutexWork-16 170 173 +1.76% BenchmarkMutexWork-32 176 176 +0.00% BenchmarkMutexWorkSlack 160 160 +0.00% BenchmarkMutexWorkSlack-2 103 99.1 -3.79% BenchmarkMutexWorkSlack-4 155 148 -4.52% BenchmarkMutexWorkSlack-8 176 170 -3.41% BenchmarkMutexWorkSlack-16 170 173 +1.76% BenchmarkMutexWorkSlack-32 175 176 +0.57% "No work" benchmarks are not very interesting (BenchmarkMutex and BenchmarkMutexSlack), as they are absolutely not realistic. Fixes #8889 Change-Id: I6f14f42af1fa48f73a776fdd11f0af6dd2bb428b Reviewed-on: https://go-review.googlesource.com/5430 Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
2015-02-20 01:50:56 -07:00
iter = 0
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
}
}
if race.Enabled {
race.Acquire(unsafe.Pointer(m))
}
}
// Unlock unlocks m.
// It is a run-time error if m is not locked on entry to Unlock.
//
// A locked Mutex is not associated with a particular goroutine.
// It is allowed for one goroutine to lock a Mutex and then
// arrange for another goroutine to unlock it.
func (m *Mutex) Unlock() {
if race.Enabled {
_ = m.state
race.Release(unsafe.Pointer(m))
}
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
// Fast path: drop lock bit.
new := atomic.AddInt32(&m.state, -mutexLocked)
if (new+mutexLocked)&mutexLocked == 0 {
panic("sync: unlock of unlocked mutex")
}
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
old := new
for {
// If there are no waiters or a goroutine has already
// been woken or grabbed the lock, no need to wake anyone.
if old>>mutexWaiterShift == 0 || old&(mutexLocked|mutexWoken) != 0 {
return
}
// Grab the right to wake someone.
new = (old - 1<<mutexWaiterShift) | mutexWoken
if atomic.CompareAndSwapInt32(&m.state, old, new) {
runtime_Semrelease(&m.sema)
sync: improve Mutex to allow successive acquisitions This implementation allows a goroutine to do successive acquisitions of a mutex even if there are blocked goroutines. Moreover, it allows a newcomer goroutine to acquire a mutex ahead of blocked goroutines (that is, it does not enforce FIFO). On implementation level it's achieved by separating waiter count and locked flag. Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz) are as follows (with 4631059 "replace Semacquire/Semrelease implementation" patch applied): benchmark old ns/op new ns/op delta sync_test.BenchmarkMutexUncontended 24.10 25.40 +5.39% sync_test.BenchmarkMutexUncontended-2 12.00 13.00 +8.33% sync_test.BenchmarkMutexUncontended-4 6.06 6.83 +12.71% sync_test.BenchmarkMutexUncontended-8 3.63 3.60 -0.83% sync_test.BenchmarkMutexUncontended-16 2.38 2.49 +4.62% sync_test.BenchmarkMutex 25.00 26.40 +5.60% sync_test.BenchmarkMutex-2 231.00 49.00 -78.79% sync_test.BenchmarkMutex-4 259.00 114.00 -55.98% sync_test.BenchmarkMutex-8 641.00 110.00 -82.84% sync_test.BenchmarkMutex-16 1380.00 96.30 -93.02% sync_test.BenchmarkMutexSlack 24.80 26.20 +5.65% sync_test.BenchmarkMutexSlack-2 210.00 106.00 -49.52% sync_test.BenchmarkMutexSlack-4 453.00 119.00 -73.73% sync_test.BenchmarkMutexSlack-8 1024.00 105.00 -89.75% sync_test.BenchmarkMutexSlack-16 1291.00 91.90 -92.88% sync_test.BenchmarkMutexWork 796.00 796.00 +0.00% sync_test.BenchmarkMutexWork-2 399.00 401.00 +0.50% sync_test.BenchmarkMutexWork-4 216.00 212.00 -1.85% sync_test.BenchmarkMutexWork-8 1547.00 196.00 -87.33% sync_test.BenchmarkMutexWork-16 2754.00 287.00 -89.58% sync_test.BenchmarkMutexWorkSlack 792.00 800.00 +1.01% sync_test.BenchmarkMutexWorkSlack-2 430.00 420.00 -2.33% sync_test.BenchmarkMutexWorkSlack-4 467.00 230.00 -50.75% sync_test.BenchmarkMutexWorkSlack-8 1860.00 273.00 -85.32% sync_test.BenchmarkMutexWorkSlack-16 3029.00 294.00 -90.29% R=rsc CC=golang-dev https://golang.org/cl/4631075
2011-06-30 09:13:29 -06:00
return
}
old = m.state
}
}