mirror of
https://github.com/golang/go
synced 2024-11-24 01:10:12 -07:00
runtime: use native CAS and memory barrier on ARMv7
This gets us around the kernel helpers on ARMv7. It is slightly faster than using the kernel helper. name old time/op new time/op delta AtomicLoad-4 72.5ns ± 0% 69.5ns ± 0% -4.08% (p=0.000 n=9+9) AtomicStore-4 57.6ns ± 1% 54.4ns ± 0% -5.58% (p=0.000 n=10+9) [Geo mean] 64.6ns 61.5ns -4.83% If performance is really critical, we can even do compiler intrinsics on GOARM=7. Fixes #23792. Change-Id: I36497d880890b26bdf01e048b542bd5fd7b17d23 Reviewed-on: https://go-review.googlesource.com/94076 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
1b6fec862c
commit
150b728675
@ -26,3 +26,19 @@ func BenchmarkAtomicStore64(b *testing.B) {
|
||||
atomic.Store64(&x, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAtomicLoad(b *testing.B) {
|
||||
var x uint32
|
||||
sink = &x
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = atomic.Load(&x)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkAtomicStore(b *testing.B) {
|
||||
var x uint32
|
||||
sink = &x
|
||||
for i := 0; i < b.N; i++ {
|
||||
atomic.Store(&x, 0)
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,14 @@
|
||||
TEXT cas<>(SB),NOSPLIT,$0
|
||||
MOVW $0xffff0fc0, R15 // R15 is hardware PC.
|
||||
|
||||
TEXT runtime∕internal∕atomic·Cas(SB),NOSPLIT,$0
|
||||
TEXT runtime∕internal∕atomic·Cas(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP ·armcas(SB)
|
||||
JMP ·kernelcas<>(SB)
|
||||
|
||||
TEXT runtime∕internal∕atomic·kernelcas<>(SB),NOSPLIT,$0
|
||||
MOVW ptr+0(FP), R2
|
||||
// trigger potential paging fault here,
|
||||
// because we don't know how to traceback through __kuser_cmpxchg
|
||||
|
@ -489,13 +489,18 @@ TEXT runtime·usleep(SB),NOSPLIT,$12
|
||||
// even on single-core devices. The kernel helper takes care of all of
|
||||
// this for us.
|
||||
|
||||
TEXT publicationBarrier<>(SB),NOSPLIT,$0
|
||||
TEXT kernelPublicationBarrier<>(SB),NOSPLIT,$0
|
||||
// void __kuser_memory_barrier(void);
|
||||
MOVW $0xffff0fa0, R15 // R15 is hardware PC.
|
||||
MOVW $0xffff0fa0, R11
|
||||
CALL (R11)
|
||||
RET
|
||||
|
||||
TEXT ·publicationBarrier(SB),NOSPLIT,$0
|
||||
BL publicationBarrier<>(SB)
|
||||
RET
|
||||
MOVB ·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP ·armPublicationBarrier(SB)
|
||||
JMP kernelPublicationBarrier<>(SB) // extra layer so this function is leaf and no SP adjustment on GOARM=7
|
||||
|
||||
TEXT runtime·osyield(SB),NOSPLIT,$0
|
||||
MOVW $SYS_sched_yield, R7
|
||||
|
Loading…
Reference in New Issue
Block a user