mirror of
https://github.com/golang/go
synced 2024-11-15 00:10:28 -07:00
internal/runtime/atomic: add arm native implementations of And8/Or8
With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7.
Benchmark results for a raspberry Pi 3 model B+:
goos: linux
goarch: arm
pkg: internal/runtime/atomic
cpu: ARMv7 Processor rev 4 (v7l)
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
And8-4 127.65n ± 0% 68.74n ± 0% -46.15% (p=0.000 n=10)
Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc
GitHub-Last-Rev: a7351802fd
GitHub-Pull-Request: golang/go#70002
Cq-Include-Trybots: luci.golang.try:gotip-linux-arm
Reviewed-on: https://go-review.googlesource.com/c/go/+/622075
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
bbdc65bb38
commit
c5d424bfca
@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) {
|
||||
addrLock(addr).unlock()
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func Or8(addr *uint8, v uint8)
|
||||
|
||||
//go:nosplit
|
||||
func Or8(addr *uint8, v uint8) {
|
||||
func goOr8(addr *uint8, v uint8) {
|
||||
// Align down to 4 bytes and use 32-bit CAS.
|
||||
uaddr := uintptr(unsafe.Pointer(addr))
|
||||
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
|
||||
@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) {
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func And8(addr *uint8, v uint8)
|
||||
|
||||
//go:nosplit
|
||||
func And8(addr *uint8, v uint8) {
|
||||
func goAnd8(addr *uint8, v uint8) {
|
||||
// Align down to 4 bytes and use 32-bit CAS.
|
||||
uaddr := uintptr(unsafe.Pointer(addr))
|
||||
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
|
||||
|
@ -228,6 +228,42 @@ store64loop:
|
||||
DMB MB_ISH
|
||||
RET
|
||||
|
||||
TEXT armAnd8<>(SB),NOSPLIT,$0-5
|
||||
// addr is already in R1
|
||||
MOVB v+4(FP), R2
|
||||
|
||||
and8loop:
|
||||
LDREXB (R1), R6
|
||||
|
||||
DMB MB_ISHST
|
||||
|
||||
AND R2, R6
|
||||
STREXB R6, (R1), R0
|
||||
CMP $0, R0
|
||||
BNE and8loop
|
||||
|
||||
DMB MB_ISH
|
||||
|
||||
RET
|
||||
|
||||
TEXT armOr8<>(SB),NOSPLIT,$0-5
|
||||
// addr is already in R1
|
||||
MOVB v+4(FP), R2
|
||||
|
||||
or8loop:
|
||||
LDREXB (R1), R6
|
||||
|
||||
DMB MB_ISHST
|
||||
|
||||
ORR R2, R6
|
||||
STREXB R6, (R1), R0
|
||||
CMP $0, R0
|
||||
BNE or8loop
|
||||
|
||||
DMB MB_ISH
|
||||
|
||||
RET
|
||||
|
||||
// The following functions all panic if their address argument isn't
|
||||
// 8-byte aligned. Since we're calling back into Go code to do this,
|
||||
// we have to cooperate with stack unwinding. In the normal case, the
|
||||
@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12
|
||||
JMP ·goStore64(SB)
|
||||
#endif
|
||||
JMP armStore64<>(SB)
|
||||
|
||||
TEXT ·And8(SB),NOSPLIT,$-4-5
|
||||
NO_LOCAL_POINTERS
|
||||
MOVW addr+0(FP), R1
|
||||
|
||||
// Uses STREXB/LDREXB that is armv6k or later.
|
||||
// For simplicity we only enable this on armv7.
|
||||
#ifndef GOARM_7
|
||||
MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
|
||||
CMP $1, R11
|
||||
BEQ 2(PC)
|
||||
JMP ·goAnd8(SB)
|
||||
#endif
|
||||
JMP armAnd8<>(SB)
|
||||
|
||||
TEXT ·Or8(SB),NOSPLIT,$-4-5
|
||||
NO_LOCAL_POINTERS
|
||||
MOVW addr+0(FP), R1
|
||||
|
||||
// Uses STREXB/LDREXB that is armv6k or later.
|
||||
// For simplicity we only enable this on armv7.
|
||||
#ifndef GOARM_7
|
||||
MOVB internal∕cpu·ARM+const_offsetARMHasV7Atomics(SB), R11
|
||||
CMP $1, R11
|
||||
BEQ 2(PC)
|
||||
JMP ·goOr8(SB)
|
||||
#endif
|
||||
JMP armOr8<>(SB)
|
||||
|
Loading…
Reference in New Issue
Block a user