mirror of
https://github.com/golang/go
synced 2024-11-18 10:54:40 -07:00
runtime/internal/atomic: improve ARM atomics
This is a follow-up of CL 93637. There, when we redirect sync/atomic to runtime/internal/atomic, a few good implementations of ARM atomics were lost. This CL brings most of them back, with some improvements. - Change atomic Store to a plain store with memory barrier, as we already changed atomic Load to plain load with memory barrier. - Use native 64-bit atomics on ARMv7, jump to Go implementations on older machines. But drop the kernel helper. In particular, for Load64, just do loads, not using Cas on the address being load from, so it works also for read-only memory (since we have already fixed 32-bit Load). Change-Id: I725cd65cf945ae5200db81a35be3f251c9f7af14 Reviewed-on: https://go-review.googlesource.com/111315 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
parent
150b728675
commit
14f929af91
@ -27,8 +27,8 @@ casl:
|
||||
CMP R0, R2
|
||||
BNE casfail
|
||||
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
MOVB runtime·goarm(SB), R8
|
||||
CMP $7, R8
|
||||
BLT 2(PC)
|
||||
DMB MB_ISHST
|
||||
|
||||
@ -37,8 +37,7 @@ casl:
|
||||
BNE casl
|
||||
MOVW $1, R0
|
||||
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
CMP $7, R8
|
||||
BLT 2(PC)
|
||||
DMB MB_ISH
|
||||
|
||||
@ -49,12 +48,17 @@ casfail:
|
||||
MOVB R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
// stubs
|
||||
|
||||
TEXT runtime∕internal∕atomic·Loadp(SB),NOSPLIT|NOFRAME,$0-8
|
||||
B runtime∕internal∕atomic·Load(SB)
|
||||
|
||||
TEXT runtime∕internal∕atomic·Casuintptr(SB),NOSPLIT,$0-13
|
||||
B runtime∕internal∕atomic·Cas(SB)
|
||||
|
||||
TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0-13
|
||||
B runtime∕internal∕atomic·Cas(SB)
|
||||
|
||||
TEXT runtime∕internal∕atomic·Loaduintptr(SB),NOSPLIT,$0-8
|
||||
B runtime∕internal∕atomic·Load(SB)
|
||||
|
||||
@ -64,6 +68,9 @@ TEXT runtime∕internal∕atomic·Loaduint(SB),NOSPLIT,$0-8
|
||||
TEXT runtime∕internal∕atomic·Storeuintptr(SB),NOSPLIT,$0-8
|
||||
B runtime∕internal∕atomic·Store(SB)
|
||||
|
||||
TEXT runtime∕internal∕atomic·StorepNoWB(SB),NOSPLIT,$0-8
|
||||
B runtime∕internal∕atomic·Store(SB)
|
||||
|
||||
TEXT runtime∕internal∕atomic·Xadduintptr(SB),NOSPLIT,$0-12
|
||||
B runtime∕internal∕atomic·Xadd(SB)
|
||||
|
||||
@ -72,3 +79,162 @@ TEXT runtime∕internal∕atomic·Loadint64(SB),NOSPLIT,$0-12
|
||||
|
||||
TEXT runtime∕internal∕atomic·Xaddint64(SB),NOSPLIT,$0-20
|
||||
B runtime∕internal∕atomic·Xadd64(SB)
|
||||
|
||||
// 64-bit atomics
|
||||
// The native ARM implementations use LDREXD/STREXD, which are
|
||||
// available on ARMv6k or later. We use them only on ARMv7.
|
||||
// On older ARM, we use Go implementations which simulate 64-bit
|
||||
// atomics with locks.
|
||||
|
||||
TEXT armCas64<>(SB),NOSPLIT,$0-21
|
||||
MOVW addr+0(FP), R1
|
||||
// make unaligned atomic access panic
|
||||
AND.S $7, R1, R2
|
||||
BEQ 2(PC)
|
||||
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
|
||||
MOVW old_lo+4(FP), R2
|
||||
MOVW old_hi+8(FP), R3
|
||||
MOVW new_lo+12(FP), R4
|
||||
MOVW new_hi+16(FP), R5
|
||||
cas64loop:
|
||||
LDREXD (R1), R6 // loads R6 and R7
|
||||
CMP R2, R6
|
||||
BNE cas64fail
|
||||
CMP R3, R7
|
||||
BNE cas64fail
|
||||
|
||||
DMB MB_ISHST
|
||||
|
||||
STREXD R4, (R1), R0 // stores R4 and R5
|
||||
CMP $0, R0
|
||||
BNE cas64loop
|
||||
MOVW $1, R0
|
||||
|
||||
DMB MB_ISH
|
||||
|
||||
MOVBU R0, swapped+20(FP)
|
||||
RET
|
||||
cas64fail:
|
||||
MOVW $0, R0
|
||||
MOVBU R0, swapped+20(FP)
|
||||
RET
|
||||
|
||||
TEXT armXadd64<>(SB),NOSPLIT,$0-20
|
||||
MOVW addr+0(FP), R1
|
||||
// make unaligned atomic access panic
|
||||
AND.S $7, R1, R2
|
||||
BEQ 2(PC)
|
||||
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
|
||||
MOVW delta_lo+4(FP), R2
|
||||
MOVW delta_hi+8(FP), R3
|
||||
|
||||
add64loop:
|
||||
LDREXD (R1), R4 // loads R4 and R5
|
||||
ADD.S R2, R4
|
||||
ADC R3, R5
|
||||
|
||||
DMB MB_ISHST
|
||||
|
||||
STREXD R4, (R1), R0 // stores R4 and R5
|
||||
CMP $0, R0
|
||||
BNE add64loop
|
||||
|
||||
DMB MB_ISH
|
||||
|
||||
MOVW R4, new_lo+12(FP)
|
||||
MOVW R5, new_hi+16(FP)
|
||||
RET
|
||||
|
||||
TEXT armXchg64<>(SB),NOSPLIT,$0-20
|
||||
MOVW addr+0(FP), R1
|
||||
// make unaligned atomic access panic
|
||||
AND.S $7, R1, R2
|
||||
BEQ 2(PC)
|
||||
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
|
||||
MOVW new_lo+4(FP), R2
|
||||
MOVW new_hi+8(FP), R3
|
||||
|
||||
swap64loop:
|
||||
LDREXD (R1), R4 // loads R4 and R5
|
||||
|
||||
DMB MB_ISHST
|
||||
|
||||
STREXD R2, (R1), R0 // stores R2 and R3
|
||||
CMP $0, R0
|
||||
BNE swap64loop
|
||||
|
||||
DMB MB_ISH
|
||||
|
||||
MOVW R4, old_lo+12(FP)
|
||||
MOVW R5, old_hi+16(FP)
|
||||
RET
|
||||
|
||||
TEXT armLoad64<>(SB),NOSPLIT,$0-12
|
||||
MOVW addr+0(FP), R1
|
||||
// make unaligned atomic access panic
|
||||
AND.S $7, R1, R2
|
||||
BEQ 2(PC)
|
||||
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
|
||||
|
||||
LDREXD (R1), R2 // loads R2 and R3
|
||||
DMB MB_ISH
|
||||
|
||||
MOVW R2, val_lo+4(FP)
|
||||
MOVW R3, val_hi+8(FP)
|
||||
RET
|
||||
|
||||
TEXT armStore64<>(SB),NOSPLIT,$0-12
|
||||
MOVW addr+0(FP), R1
|
||||
// make unaligned atomic access panic
|
||||
AND.S $7, R1, R2
|
||||
BEQ 2(PC)
|
||||
MOVW R2, (R2) // crash. AND.S above left only low 3 bits in R2.
|
||||
MOVW val_lo+4(FP), R2
|
||||
MOVW val_hi+8(FP), R3
|
||||
|
||||
store64loop:
|
||||
LDREXD (R1), R4 // loads R4 and R5
|
||||
|
||||
DMB MB_ISHST
|
||||
|
||||
STREXD R2, (R1), R0 // stores R2 and R3
|
||||
CMP $0, R0
|
||||
BNE store64loop
|
||||
|
||||
DMB MB_ISH
|
||||
RET
|
||||
|
||||
TEXT ·Cas64(SB),NOSPLIT,$0-21
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP armCas64<>(SB)
|
||||
JMP ·goCas64(SB)
|
||||
|
||||
TEXT ·Xadd64(SB),NOSPLIT,$0-20
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP armXadd64<>(SB)
|
||||
JMP ·goXadd64(SB)
|
||||
|
||||
TEXT ·Xchg64(SB),NOSPLIT,$0-20
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP armXchg64<>(SB)
|
||||
JMP ·goXchg64(SB)
|
||||
|
||||
TEXT ·Load64(SB),NOSPLIT,$0-12
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP armLoad64<>(SB)
|
||||
JMP ·goLoad64(SB)
|
||||
|
||||
TEXT ·Store64(SB),NOSPLIT,$0-12
|
||||
MOVB runtime·goarm(SB), R11
|
||||
CMP $7, R11
|
||||
BLT 2(PC)
|
||||
JMP armStore64<>(SB)
|
||||
JMP ·goStore64(SB)
|
||||
|
@ -68,28 +68,14 @@ func Xchguintptr(addr *uintptr, v uintptr) uintptr {
|
||||
return uintptr(Xchg((*uint32)(unsafe.Pointer(addr)), uint32(v)))
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer) {
|
||||
for {
|
||||
old := *(*unsafe.Pointer)(addr)
|
||||
if Casp1((*unsafe.Pointer)(addr), old, v) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
// Not noescape -- it installs a pointer to addr.
|
||||
func StorepNoWB(addr unsafe.Pointer, v unsafe.Pointer)
|
||||
|
||||
//go:noescape
|
||||
func Store(addr *uint32, v uint32)
|
||||
|
||||
//go:nosplit
|
||||
func Store(addr *uint32, v uint32) {
|
||||
for {
|
||||
old := *addr
|
||||
if Cas(addr, old, v) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func Cas64(addr *uint64, old, new uint64) bool {
|
||||
func goCas64(addr *uint64, old, new uint64) bool {
|
||||
if uintptr(unsafe.Pointer(addr))&7 != 0 {
|
||||
*(*int)(nil) = 0 // crash on unaligned uint64
|
||||
}
|
||||
@ -105,7 +91,7 @@ func Cas64(addr *uint64, old, new uint64) bool {
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func Xadd64(addr *uint64, delta int64) uint64 {
|
||||
func goXadd64(addr *uint64, delta int64) uint64 {
|
||||
if uintptr(unsafe.Pointer(addr))&7 != 0 {
|
||||
*(*int)(nil) = 0 // crash on unaligned uint64
|
||||
}
|
||||
@ -119,7 +105,7 @@ func Xadd64(addr *uint64, delta int64) uint64 {
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func Xchg64(addr *uint64, v uint64) uint64 {
|
||||
func goXchg64(addr *uint64, v uint64) uint64 {
|
||||
if uintptr(unsafe.Pointer(addr))&7 != 0 {
|
||||
*(*int)(nil) = 0 // crash on unaligned uint64
|
||||
}
|
||||
@ -133,7 +119,7 @@ func Xchg64(addr *uint64, v uint64) uint64 {
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func Load64(addr *uint64) uint64 {
|
||||
func goLoad64(addr *uint64) uint64 {
|
||||
if uintptr(unsafe.Pointer(addr))&7 != 0 {
|
||||
*(*int)(nil) = 0 // crash on unaligned uint64
|
||||
}
|
||||
@ -146,7 +132,7 @@ func Load64(addr *uint64) uint64 {
|
||||
}
|
||||
|
||||
//go:nosplit
|
||||
func Store64(addr *uint64, v uint64) {
|
||||
func goStore64(addr *uint64, v uint64) {
|
||||
if uintptr(unsafe.Pointer(addr))&7 != 0 {
|
||||
*(*int)(nil) = 0 // crash on unaligned uint64
|
||||
}
|
||||
@ -194,3 +180,18 @@ func Load(addr *uint32) uint32
|
||||
|
||||
//go:noescape
|
||||
func Loadp(addr unsafe.Pointer) unsafe.Pointer
|
||||
|
||||
//go:noescape
|
||||
func Cas64(addr *uint64, old, new uint64) bool
|
||||
|
||||
//go:noescape
|
||||
func Xadd64(addr *uint64, delta int64) uint64
|
||||
|
||||
//go:noescape
|
||||
func Xchg64(addr *uint64, v uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
func Load64(addr *uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
func Store64(addr *uint64, v uint64)
|
||||
|
@ -55,9 +55,6 @@ check:
|
||||
MOVB R0, ret+12(FP)
|
||||
RET
|
||||
|
||||
TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0
|
||||
B runtime∕internal∕atomic·Cas(SB)
|
||||
|
||||
// As for cas, memory barriers are complicated on ARM, but the kernel
|
||||
// provides a user helper. ARMv5 does not support SMP and has no
|
||||
// memory barrier instruction at all. ARMv6 added SMP support and has
|
||||
@ -70,7 +67,7 @@ TEXT runtime∕internal∕atomic·Casp1(SB),NOSPLIT,$0
|
||||
TEXT memory_barrier<>(SB),NOSPLIT|NOFRAME,$0
|
||||
MOVW $0xffff0fa0, R15 // R15 is hardware PC.
|
||||
|
||||
TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT,$0-8
|
||||
TEXT ·Load(SB),NOSPLIT,$0-8
|
||||
MOVW addr+0(FP), R0
|
||||
MOVW (R0), R1
|
||||
|
||||
@ -78,10 +75,32 @@ TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT,$0-8
|
||||
CMP $7, R11
|
||||
BGE native_barrier
|
||||
BL memory_barrier<>(SB)
|
||||
B prolog
|
||||
B end
|
||||
native_barrier:
|
||||
DMB MB_ISH
|
||||
end:
|
||||
MOVW R1, ret+4(FP)
|
||||
RET
|
||||
|
||||
TEXT ·Store(SB),NOSPLIT,$0-8
|
||||
MOVW addr+0(FP), R1
|
||||
MOVW v+4(FP), R2
|
||||
|
||||
MOVB runtime·goarm(SB), R8
|
||||
CMP $7, R8
|
||||
BGE native_barrier
|
||||
BL memory_barrier<>(SB)
|
||||
B store
|
||||
native_barrier:
|
||||
DMB MB_ISH
|
||||
|
||||
prolog:
|
||||
MOVW R1, ret+4(FP)
|
||||
store:
|
||||
MOVW R2, (R1)
|
||||
|
||||
CMP $7, R8
|
||||
BGE native_barrier2
|
||||
BL memory_barrier<>(SB)
|
||||
RET
|
||||
native_barrier2:
|
||||
DMB MB_ISH
|
||||
RET
|
||||
|
@ -17,10 +17,11 @@
|
||||
TEXT ·Cas(SB),NOSPLIT,$0
|
||||
JMP ·armcas(SB)
|
||||
|
||||
TEXT ·Casp1(SB),NOSPLIT,$0
|
||||
JMP ·Cas(SB)
|
||||
// Non-linux OSes support only single processor machines before ARMv7.
|
||||
// So we don't need memory barriers if goarm < 7. And we fail loud at
|
||||
// startup (runtime.checkgoarm) if it is a multi-processor but goarm < 7.
|
||||
|
||||
TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT|NOFRAME,$0-8
|
||||
TEXT ·Load(SB),NOSPLIT|NOFRAME,$0-8
|
||||
MOVW addr+0(FP), R0
|
||||
MOVW (R0), R1
|
||||
|
||||
@ -31,3 +32,19 @@ TEXT runtime∕internal∕atomic·Load(SB),NOSPLIT|NOFRAME,$0-8
|
||||
|
||||
MOVW R1, ret+4(FP)
|
||||
RET
|
||||
|
||||
TEXT ·Store(SB),NOSPLIT,$0-8
|
||||
MOVW addr+0(FP), R1
|
||||
MOVW v+4(FP), R2
|
||||
|
||||
MOVB runtime·goarm(SB), R8
|
||||
CMP $7, R8
|
||||
BLT 2(PC)
|
||||
DMB MB_ISH
|
||||
|
||||
MOVW R2, (R1)
|
||||
|
||||
CMP $7, R8
|
||||
BLT 2(PC)
|
||||
DMB MB_ISH
|
||||
RET
|
||||
|
Loading…
Reference in New Issue
Block a user