mirror of
https://github.com/golang/go
synced 2024-11-26 01:07:57 -07:00
sync: allow inlining the Mutex.Lock fast path
name old time/op new time/op delta MutexUncontended 18.9ns ± 0% 16.2ns ± 0% -14.29% (p=0.000 n=19+19) MutexUncontended-4 4.75ns ± 1% 4.08ns ± 0% -14.20% (p=0.000 n=20+19) MutexUncontended-16 2.05ns ± 0% 2.11ns ± 0% +2.93% (p=0.000 n=19+16) Mutex 19.3ns ± 1% 16.2ns ± 0% -15.86% (p=0.000 n=17+19) Mutex-4 52.4ns ± 4% 48.6ns ± 9% -7.22% (p=0.000 n=20+20) Mutex-16 139ns ± 2% 140ns ± 3% +1.03% (p=0.011 n=16+20) MutexSlack 18.9ns ± 1% 16.2ns ± 1% -13.96% (p=0.000 n=20+20) MutexSlack-4 225ns ± 8% 211ns ±10% -5.94% (p=0.000 n=18+19) MutexSlack-16 98.4ns ± 1% 90.9ns ± 1% -7.60% (p=0.000 n=17+18) MutexWork 58.2ns ± 3% 55.4ns ± 0% -4.82% (p=0.000 n=20+17) MutexWork-4 103ns ± 7% 95ns ±18% -8.03% (p=0.000 n=20+20) MutexWork-16 163ns ± 2% 155ns ± 2% -4.47% (p=0.000 n=18+18) MutexWorkSlack 57.7ns ± 1% 55.4ns ± 0% -3.99% (p=0.000 n=20+13) MutexWorkSlack-4 276ns ±13% 260ns ±10% -5.64% (p=0.001 n=19+19) MutexWorkSlack-16 147ns ± 0% 156ns ± 1% +5.87% (p=0.000 n=14+19) MutexNoSpin 968ns ± 0% 900ns ± 1% -6.98% (p=0.000 n=20+18) MutexNoSpin-4 270ns ± 2% 255ns ± 2% -5.74% (p=0.000 n=19+20) MutexNoSpin-16 120ns ± 4% 112ns ± 0% -6.99% (p=0.000 n=19+14) MutexSpin 3.13µs ± 1% 3.19µs ± 6% ~ (p=0.401 n=20+20) MutexSpin-4 832ns ± 2% 831ns ± 1% -0.17% (p=0.023 n=16+18) MutexSpin-16 395ns ± 0% 399ns ± 0% +0.94% (p=0.000 n=17+19) RWMutexUncontended 69.5ns ± 0% 68.4ns ± 0% -1.59% (p=0.000 n=20+20) RWMutexUncontended-4 17.5ns ± 0% 16.7ns ± 0% -4.30% (p=0.000 n=18+17) RWMutexUncontended-16 7.92ns ± 0% 7.87ns ± 0% -0.61% (p=0.000 n=18+17) RWMutexWrite100 24.9ns ± 1% 25.0ns ± 1% +0.32% (p=0.000 n=20+20) RWMutexWrite100-4 46.2ns ± 4% 46.2ns ± 5% ~ (p=0.840 n=19+20) RWMutexWrite100-16 69.9ns ± 5% 69.9ns ± 3% ~ (p=0.545 n=20+19) RWMutexWrite10 27.0ns ± 2% 26.8ns ± 2% -0.98% (p=0.001 n=20+20) RWMutexWrite10-4 34.7ns ± 2% 35.0ns ± 4% ~ (p=0.191 n=18+20) RWMutexWrite10-16 37.2ns ± 4% 37.3ns ± 2% ~ (p=0.438 n=20+19) RWMutexWorkWrite100 164ns ± 0% 163ns ± 0% -0.24% (p=0.025 n=20+20) RWMutexWorkWrite100-4 193ns ± 3% 191ns ± 2% -1.06% (p=0.027 n=20+20) RWMutexWorkWrite100-16 210ns ± 3% 207ns ± 3% -1.22% (p=0.038 n=20+20) RWMutexWorkWrite10 153ns ± 0% 153ns ± 0% ~ (all equal) RWMutexWorkWrite10-4 178ns ± 2% 179ns ± 2% ~ (p=0.186 n=20+20) RWMutexWorkWrite10-16 192ns ± 2% 192ns ± 2% ~ (p=0.731 n=19+20) linux/amd64 bin/go 14663387 (previous commit 14630572, +32815/+0.22%) Change-Id: I98171006dce14069b1a62da07c3d165455a7906b Reviewed-on: https://go-review.googlesource.com/c/go/+/148959 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
parent
83a33d3855
commit
41cb0aedff
@ -53,12 +53,12 @@ var semtable [semTabSize]struct {
|
||||
|
||||
//go:linkname sync_runtime_Semacquire sync.runtime_Semacquire
|
||||
func sync_runtime_Semacquire(addr *uint32) {
|
||||
semacquire1(addr, false, semaBlockProfile)
|
||||
semacquire1(addr, false, semaBlockProfile, 0)
|
||||
}
|
||||
|
||||
//go:linkname poll_runtime_Semacquire internal/poll.runtime_Semacquire
|
||||
func poll_runtime_Semacquire(addr *uint32) {
|
||||
semacquire1(addr, false, semaBlockProfile)
|
||||
semacquire1(addr, false, semaBlockProfile, 0)
|
||||
}
|
||||
|
||||
//go:linkname sync_runtime_Semrelease sync.runtime_Semrelease
|
||||
@ -67,8 +67,8 @@ func sync_runtime_Semrelease(addr *uint32, handoff bool, skipframes int) {
|
||||
}
|
||||
|
||||
//go:linkname sync_runtime_SemacquireMutex sync.runtime_SemacquireMutex
|
||||
func sync_runtime_SemacquireMutex(addr *uint32, lifo bool) {
|
||||
semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile)
|
||||
func sync_runtime_SemacquireMutex(addr *uint32, lifo bool, skipframes int) {
|
||||
semacquire1(addr, lifo, semaBlockProfile|semaMutexProfile, skipframes)
|
||||
}
|
||||
|
||||
//go:linkname poll_runtime_Semrelease internal/poll.runtime_Semrelease
|
||||
@ -92,10 +92,10 @@ const (
|
||||
|
||||
// Called from runtime.
|
||||
func semacquire(addr *uint32) {
|
||||
semacquire1(addr, false, 0)
|
||||
semacquire1(addr, false, 0, 0)
|
||||
}
|
||||
|
||||
func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) {
|
||||
func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags, skipframes int) {
|
||||
gp := getg()
|
||||
if gp != gp.m.curg {
|
||||
throw("semacquire not on the G stack")
|
||||
@ -141,13 +141,13 @@ func semacquire1(addr *uint32, lifo bool, profile semaProfileFlags) {
|
||||
// Any semrelease after the cansemacquire knows we're waiting
|
||||
// (we set nwait above), so go to sleep.
|
||||
root.queue(addr, s, lifo)
|
||||
goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4)
|
||||
goparkunlock(&root.lock, waitReasonSemacquire, traceEvGoBlockSync, 4+skipframes)
|
||||
if s.ticket != 0 || cansemacquire(addr) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if s.releasetime > 0 {
|
||||
blockevent(s.releasetime-t0, 3)
|
||||
blockevent(s.releasetime-t0, 3+skipframes)
|
||||
}
|
||||
releaseSudog(s)
|
||||
}
|
||||
|
@ -77,7 +77,11 @@ func (m *Mutex) Lock() {
|
||||
}
|
||||
return
|
||||
}
|
||||
// Slow path (outlined so that the fast path can be inlined)
|
||||
m.lockSlow()
|
||||
}
|
||||
|
||||
func (m *Mutex) lockSlow() {
|
||||
var waitStartTime int64
|
||||
starving := false
|
||||
awoke := false
|
||||
@ -131,7 +135,7 @@ func (m *Mutex) Lock() {
|
||||
if waitStartTime == 0 {
|
||||
waitStartTime = runtime_nanotime()
|
||||
}
|
||||
runtime_SemacquireMutex(&m.sema, queueLifo)
|
||||
runtime_SemacquireMutex(&m.sema, queueLifo, 1)
|
||||
starving = starving || runtime_nanotime()-waitStartTime > starvationThresholdNs
|
||||
old = m.state
|
||||
if old&mutexStarving != 0 {
|
||||
|
@ -15,7 +15,9 @@ func runtime_Semacquire(s *uint32)
|
||||
|
||||
// SemacquireMutex is like Semacquire, but for profiling contended Mutexes.
|
||||
// If lifo is true, queue waiter at the head of wait queue.
|
||||
func runtime_SemacquireMutex(s *uint32, lifo bool)
|
||||
// skipframes is the number of frames to omit during tracing, counting from
|
||||
// runtime_SemacquireMutex's caller.
|
||||
func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int)
|
||||
|
||||
// Semrelease atomically increments *s and notifies a waiting goroutine
|
||||
// if one is blocked in Semacquire.
|
||||
|
@ -47,7 +47,7 @@ func (rw *RWMutex) RLock() {
|
||||
}
|
||||
if atomic.AddInt32(&rw.readerCount, 1) < 0 {
|
||||
// A writer is pending, wait for it.
|
||||
runtime_SemacquireMutex(&rw.readerSem, false)
|
||||
runtime_SemacquireMutex(&rw.readerSem, false, 0)
|
||||
}
|
||||
if race.Enabled {
|
||||
race.Enable()
|
||||
@ -95,7 +95,7 @@ func (rw *RWMutex) Lock() {
|
||||
r := atomic.AddInt32(&rw.readerCount, -rwmutexMaxReaders) + rwmutexMaxReaders
|
||||
// Wait for active readers.
|
||||
if r != 0 && atomic.AddInt32(&rw.readerWait, r) != 0 {
|
||||
runtime_SemacquireMutex(&rw.writerSem, false)
|
||||
runtime_SemacquireMutex(&rw.writerSem, false, 0)
|
||||
}
|
||||
if race.Enabled {
|
||||
race.Enable()
|
||||
|
@ -8,7 +8,11 @@
|
||||
// Test, using compiler diagnostic flags, that inlining of functions
|
||||
// imported from the sync package is working.
|
||||
// Compiles but does not run.
|
||||
// FIXME: nacl-386 is excluded as inlining currently does not work there.
|
||||
|
||||
// FIXME: This test is disabled on architectures where atomic operations
|
||||
// are function calls rather than intrinsics, since this prevents inlining
|
||||
// of the sync fast paths. This test should be re-enabled once the problem
|
||||
// is solved.
|
||||
|
||||
package foo
|
||||
|
||||
@ -22,3 +26,8 @@ func small5() { // ERROR "can inline small5"
|
||||
// the Unlock fast path should be inlined
|
||||
mutex.Unlock() // ERROR "inlining call to sync\.\(\*Mutex\)\.Unlock" "&sync\.m\.state escapes to heap"
|
||||
}
|
||||
|
||||
func small6() { // ERROR "can inline small6"
|
||||
// the Lock fast path should be inlined
|
||||
mutex.Lock() // ERROR "inlining call to sync\.\(\*Mutex\)\.Lock" "&sync\.m\.state escapes to heap"
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user