1
0
mirror of https://github.com/golang/go synced 2024-11-15 00:10:28 -07:00

internal/runtime/atomic: add arm native implementations of And8/Or8

With LDREXB/STREXB now available for the arm assembler we can implement these operations natively. The instructions are armv6k+ but for simplicity I only use them on armv7.

Benchmark results for a raspberry Pi 3 model B+:

	goos: linux
	goarch: arm
	pkg: internal/runtime/atomic
	cpu: ARMv7 Processor rev 4 (v7l)
			 │   old.txt    │               new.txt               │
			 │    sec/op    │   sec/op     vs base                │
	And8-4             127.65n ± 0%   68.74n ± 0%  -46.15% (p=0.000 n=10)

Change-Id: Ic87f307c35f7d7f56010980302f253056f6d54dc
GitHub-Last-Rev: a7351802fd
GitHub-Pull-Request: golang/go#70002
Cq-Include-Trybots: luci.golang.try:gotip-linux-arm
Reviewed-on: https://go-review.googlesource.com/c/go/+/622075
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Mauri de Souza Meneguzzo 2024-10-23 17:06:39 +00:00 committed by Cherry Mui
parent bbdc65bb38
commit c5d424bfca
2 changed files with 72 additions and 2 deletions

View File

@ -159,8 +159,11 @@ func goStore64(addr *uint64, v uint64) {
addrLock(addr).unlock()
}
//go:noescape
func Or8(addr *uint8, v uint8)
//go:nosplit
func Or8(addr *uint8, v uint8) {
func goOr8(addr *uint8, v uint8) {
// Align down to 4 bytes and use 32-bit CAS.
uaddr := uintptr(unsafe.Pointer(addr))
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))
@ -173,8 +176,11 @@ func Or8(addr *uint8, v uint8) {
}
}
//go:noescape
func And8(addr *uint8, v uint8)
//go:nosplit
func And8(addr *uint8, v uint8) {
func goAnd8(addr *uint8, v uint8) {
// Align down to 4 bytes and use 32-bit CAS.
uaddr := uintptr(unsafe.Pointer(addr))
addr32 := (*uint32)(unsafe.Pointer(uaddr &^ 3))

View File

@ -228,6 +228,42 @@ store64loop:
DMB MB_ISH
RET
TEXT armAnd8<>(SB),NOSPLIT,$0-5
// addr is already in R1
MOVB v+4(FP), R2
and8loop:
LDREXB (R1), R6
DMB MB_ISHST
AND R2, R6
STREXB R6, (R1), R0
CMP $0, R0
BNE and8loop
DMB MB_ISH
RET
TEXT armOr8<>(SB),NOSPLIT,$0-5
// addr is already in R1
MOVB v+4(FP), R2
or8loop:
LDREXB (R1), R6
DMB MB_ISHST
ORR R2, R6
STREXB R6, (R1), R0
CMP $0, R0
BNE or8loop
DMB MB_ISH
RET
// The following functions all panic if their address argument isn't
// 8-byte aligned. Since we're calling back into Go code to do this,
// we have to cooperate with stack unwinding. In the normal case, the
@ -310,3 +346,31 @@ TEXT ·Store64(SB),NOSPLIT,$-4-12
JMP ·goStore64(SB)
#endif
JMP armStore64<>(SB)
TEXT ·And8(SB),NOSPLIT,$-4-5
NO_LOCAL_POINTERS
MOVW addr+0(FP), R1
// Uses STREXB/LDREXB that is armv6k or later.
// For simplicity we only enable this on armv7.
#ifndef GOARM_7
MOVB internalcpu·ARM+const_offsetARMHasV7Atomics(SB), R11
CMP $1, R11
BEQ 2(PC)
JMP ·goAnd8(SB)
#endif
JMP armAnd8<>(SB)
TEXT ·Or8(SB),NOSPLIT,$-4-5
NO_LOCAL_POINTERS
MOVW addr+0(FP), R1
// Uses STREXB/LDREXB that is armv6k or later.
// For simplicity we only enable this on armv7.
#ifndef GOARM_7
MOVB internalcpu·ARM+const_offsetARMHasV7Atomics(SB), R11
CMP $1, R11
BEQ 2(PC)
JMP ·goOr8(SB)
#endif
JMP armOr8<>(SB)