mirror of
https://github.com/golang/go
synced 2024-11-18 14:14:46 -07:00
cmd/compile/internal/ppc64, runtime internal/atomic, sync/atomic: implement faster atomics for ppc64x
This change implements faster atomics for ppc64x based on the ISA 2.07B, Appendix B.2 recommendations, replacing SYNC/ISYNC by LWSYNC in some cases. Updates #21348 name old time/op new time/op delta Cond1-16 955ns 856ns -10.33% Cond2-16 2.38µs 2.03µs -14.59% Cond4-16 5.90µs 5.44µs -7.88% Cond8-16 12.1µs 11.1µs -8.42% Cond16-16 27.0µs 25.1µs -7.04% Cond32-16 59.1µs 55.5µs -6.14% LoadMostlyHits/*sync_test.DeepCopyMap-16 22.1ns 24.1ns +9.02% LoadMostlyHits/*sync_test.RWMutexMap-16 252ns 249ns -1.20% LoadMostlyHits/*sync.Map-16 16.2ns 16.3ns ~ LoadMostlyMisses/*sync_test.DeepCopyMap-16 22.3ns 22.6ns ~ LoadMostlyMisses/*sync_test.RWMutexMap-16 249ns 247ns -0.51% LoadMostlyMisses/*sync.Map-16 12.7ns 12.7ns ~ LoadOrStoreBalanced/*sync_test.RWMutexMap-16 1.27µs 1.17µs -7.54% LoadOrStoreBalanced/*sync.Map-16 1.12µs 1.10µs -2.35% LoadOrStoreUnique/*sync_test.RWMutexMap-16 1.75µs 1.68µs -3.84% LoadOrStoreUnique/*sync.Map-16 2.07µs 1.97µs -5.13% LoadOrStoreCollision/*sync_test.DeepCopyMap-16 15.8ns 15.9ns ~ LoadOrStoreCollision/*sync_test.RWMutexMap-16 496ns 424ns -14.48% LoadOrStoreCollision/*sync.Map-16 6.07ns 6.07ns ~ Range/*sync_test.DeepCopyMap-16 1.65µs 1.64µs ~ Range/*sync_test.RWMutexMap-16 278µs 288µs +3.75% Range/*sync.Map-16 2.00µs 2.01µs ~ AdversarialAlloc/*sync_test.DeepCopyMap-16 3.45µs 3.44µs ~ AdversarialAlloc/*sync_test.RWMutexMap-16 226ns 227ns ~ AdversarialAlloc/*sync.Map-16 1.09µs 1.07µs -2.36% AdversarialDelete/*sync_test.DeepCopyMap-16 553ns 550ns -0.57% AdversarialDelete/*sync_test.RWMutexMap-16 273ns 274ns ~ AdversarialDelete/*sync.Map-16 247ns 249ns ~ UncontendedSemaphore-16 79.0ns 65.5ns -17.11% ContendedSemaphore-16 112ns 97ns -13.77% MutexUncontended-16 3.34ns 2.51ns -24.69% Mutex-16 266ns 191ns -28.26% MutexSlack-16 226ns 159ns -29.55% MutexWork-16 377ns 338ns -10.14% MutexWorkSlack-16 335ns 308ns -8.20% MutexNoSpin-16 196ns 184ns -5.91% MutexSpin-16 710ns 666ns -6.21% Once-16 1.29ns 1.29ns ~ Pool-16 8.64ns 8.71ns ~ PoolOverflow-16 1.60µs 1.44µs -10.25% SemaUncontended-16 5.39ns 4.42ns -17.96% SemaSyntNonblock-16 539ns 483ns -10.42% SemaSyntBlock-16 413ns 354ns -14.20% SemaWorkNonblock-16 305ns 258ns -15.36% SemaWorkBlock-16 266ns 229ns -14.06% RWMutexUncontended-16 12.9ns 9.7ns -24.80% RWMutexWrite100-16 203ns 147ns -27.47% RWMutexWrite10-16 177ns 119ns -32.74% RWMutexWorkWrite100-16 435ns 403ns -7.39% RWMutexWorkWrite10-16 642ns 611ns -4.79% WaitGroupUncontended-16 4.67ns 3.70ns -20.92% WaitGroupAddDone-16 402ns 355ns -11.54% WaitGroupAddDoneWork-16 208ns 250ns +20.09% WaitGroupWait-16 1.21ns 1.21ns ~ WaitGroupWaitWork-16 5.91ns 5.87ns -0.81% WaitGroupActuallyWait-16 92.2ns 85.8ns -6.91% Updates #21348 Change-Id: Ibb9b271d11b308264103829e176c6d9fe8f867d3 Reviewed-on: https://go-review.googlesource.com/95175 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
a3d8326993
commit
6633bb2aa7
@ -154,16 +154,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
|
|
||||||
case ssa.OpPPC64LoweredAtomicAnd8,
|
case ssa.OpPPC64LoweredAtomicAnd8,
|
||||||
ssa.OpPPC64LoweredAtomicOr8:
|
ssa.OpPPC64LoweredAtomicOr8:
|
||||||
// SYNC
|
// LWSYNC
|
||||||
// LBAR (Rarg0), Rtmp
|
// LBAR (Rarg0), Rtmp
|
||||||
// AND/OR Rarg1, Rtmp
|
// AND/OR Rarg1, Rtmp
|
||||||
// STBCCC Rtmp, (Rarg0)
|
// STBCCC Rtmp, (Rarg0)
|
||||||
// BNE -3(PC)
|
// BNE -3(PC)
|
||||||
// ISYNC
|
|
||||||
r0 := v.Args[0].Reg()
|
r0 := v.Args[0].Reg()
|
||||||
r1 := v.Args[1].Reg()
|
r1 := v.Args[1].Reg()
|
||||||
psync := s.Prog(ppc64.ASYNC)
|
// LWSYNC - Assuming shared data not write-through-required nor
|
||||||
psync.To.Type = obj.TYPE_NONE
|
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
|
||||||
|
plwsync := s.Prog(ppc64.ALWSYNC)
|
||||||
|
plwsync.To.Type = obj.TYPE_NONE
|
||||||
p := s.Prog(ppc64.ALBAR)
|
p := s.Prog(ppc64.ALBAR)
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
p.From.Reg = r0
|
p.From.Reg = r0
|
||||||
@ -183,17 +184,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
p3 := s.Prog(ppc64.ABNE)
|
p3 := s.Prog(ppc64.ABNE)
|
||||||
p3.To.Type = obj.TYPE_BRANCH
|
p3.To.Type = obj.TYPE_BRANCH
|
||||||
gc.Patch(p3, p)
|
gc.Patch(p3, p)
|
||||||
pisync := s.Prog(ppc64.AISYNC)
|
|
||||||
pisync.To.Type = obj.TYPE_NONE
|
|
||||||
|
|
||||||
case ssa.OpPPC64LoweredAtomicAdd32,
|
case ssa.OpPPC64LoweredAtomicAdd32,
|
||||||
ssa.OpPPC64LoweredAtomicAdd64:
|
ssa.OpPPC64LoweredAtomicAdd64:
|
||||||
// SYNC
|
// LWSYNC
|
||||||
// LDAR/LWAR (Rarg0), Rout
|
// LDAR/LWAR (Rarg0), Rout
|
||||||
// ADD Rarg1, Rout
|
// ADD Rarg1, Rout
|
||||||
// STDCCC/STWCCC Rout, (Rarg0)
|
// STDCCC/STWCCC Rout, (Rarg0)
|
||||||
// BNE -3(PC)
|
// BNE -3(PC)
|
||||||
// ISYNC
|
|
||||||
// MOVW Rout,Rout (if Add32)
|
// MOVW Rout,Rout (if Add32)
|
||||||
ld := ppc64.ALDAR
|
ld := ppc64.ALDAR
|
||||||
st := ppc64.ASTDCCC
|
st := ppc64.ASTDCCC
|
||||||
@ -204,9 +202,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
r0 := v.Args[0].Reg()
|
r0 := v.Args[0].Reg()
|
||||||
r1 := v.Args[1].Reg()
|
r1 := v.Args[1].Reg()
|
||||||
out := v.Reg0()
|
out := v.Reg0()
|
||||||
// SYNC
|
// LWSYNC - Assuming shared data not write-through-required nor
|
||||||
psync := s.Prog(ppc64.ASYNC)
|
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
|
||||||
psync.To.Type = obj.TYPE_NONE
|
plwsync := s.Prog(ppc64.ALWSYNC)
|
||||||
|
plwsync.To.Type = obj.TYPE_NONE
|
||||||
// LDAR or LWAR
|
// LDAR or LWAR
|
||||||
p := s.Prog(ld)
|
p := s.Prog(ld)
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
@ -229,9 +228,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
p4 := s.Prog(ppc64.ABNE)
|
p4 := s.Prog(ppc64.ABNE)
|
||||||
p4.To.Type = obj.TYPE_BRANCH
|
p4.To.Type = obj.TYPE_BRANCH
|
||||||
gc.Patch(p4, p)
|
gc.Patch(p4, p)
|
||||||
// ISYNC
|
|
||||||
pisync := s.Prog(ppc64.AISYNC)
|
|
||||||
pisync.To.Type = obj.TYPE_NONE
|
|
||||||
|
|
||||||
// Ensure a 32 bit result
|
// Ensure a 32 bit result
|
||||||
if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
|
if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
|
||||||
@ -244,7 +240,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
|
|
||||||
case ssa.OpPPC64LoweredAtomicExchange32,
|
case ssa.OpPPC64LoweredAtomicExchange32,
|
||||||
ssa.OpPPC64LoweredAtomicExchange64:
|
ssa.OpPPC64LoweredAtomicExchange64:
|
||||||
// SYNC
|
// LWSYNC
|
||||||
// LDAR/LWAR (Rarg0), Rout
|
// LDAR/LWAR (Rarg0), Rout
|
||||||
// STDCCC/STWCCC Rout, (Rarg0)
|
// STDCCC/STWCCC Rout, (Rarg0)
|
||||||
// BNE -2(PC)
|
// BNE -2(PC)
|
||||||
@ -258,9 +254,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
r0 := v.Args[0].Reg()
|
r0 := v.Args[0].Reg()
|
||||||
r1 := v.Args[1].Reg()
|
r1 := v.Args[1].Reg()
|
||||||
out := v.Reg0()
|
out := v.Reg0()
|
||||||
// SYNC
|
// LWSYNC - Assuming shared data not write-through-required nor
|
||||||
psync := s.Prog(ppc64.ASYNC)
|
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
|
||||||
psync.To.Type = obj.TYPE_NONE
|
plwsync := s.Prog(ppc64.ALWSYNC)
|
||||||
|
plwsync.To.Type = obj.TYPE_NONE
|
||||||
// LDAR or LWAR
|
// LDAR or LWAR
|
||||||
p := s.Prog(ld)
|
p := s.Prog(ld)
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
@ -342,14 +339,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
|
|
||||||
case ssa.OpPPC64LoweredAtomicCas64,
|
case ssa.OpPPC64LoweredAtomicCas64,
|
||||||
ssa.OpPPC64LoweredAtomicCas32:
|
ssa.OpPPC64LoweredAtomicCas32:
|
||||||
// SYNC
|
// LWSYNC
|
||||||
// loop:
|
// loop:
|
||||||
// LDAR (Rarg0), Rtmp
|
// LDAR (Rarg0), Rtmp
|
||||||
// CMP Rarg1, Rtmp
|
// CMP Rarg1, Rtmp
|
||||||
// BNE fail
|
// BNE fail
|
||||||
// STDCCC Rarg2, (Rarg0)
|
// STDCCC Rarg2, (Rarg0)
|
||||||
// BNE loop
|
// BNE loop
|
||||||
// ISYNC
|
// LWSYNC
|
||||||
// MOVD $1, Rout
|
// MOVD $1, Rout
|
||||||
// BR end
|
// BR end
|
||||||
// fail:
|
// fail:
|
||||||
@ -367,9 +364,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
r1 := v.Args[1].Reg()
|
r1 := v.Args[1].Reg()
|
||||||
r2 := v.Args[2].Reg()
|
r2 := v.Args[2].Reg()
|
||||||
out := v.Reg0()
|
out := v.Reg0()
|
||||||
// SYNC
|
// LWSYNC - Assuming shared data not write-through-required nor
|
||||||
psync := s.Prog(ppc64.ASYNC)
|
// caching-inhibited. See Appendix B.2.2.2 in the ISA 2.07b.
|
||||||
psync.To.Type = obj.TYPE_NONE
|
plwsync1 := s.Prog(ppc64.ALWSYNC)
|
||||||
|
plwsync1.To.Type = obj.TYPE_NONE
|
||||||
// LDAR or LWAR
|
// LDAR or LWAR
|
||||||
p := s.Prog(ld)
|
p := s.Prog(ld)
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
@ -395,9 +393,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
p4 := s.Prog(ppc64.ABNE)
|
p4 := s.Prog(ppc64.ABNE)
|
||||||
p4.To.Type = obj.TYPE_BRANCH
|
p4.To.Type = obj.TYPE_BRANCH
|
||||||
gc.Patch(p4, p)
|
gc.Patch(p4, p)
|
||||||
// ISYNC
|
// LWSYNC - Assuming shared data not write-through-required nor
|
||||||
pisync := s.Prog(ppc64.AISYNC)
|
// caching-inhibited. See Appendix B.2.1.1 in the ISA 2.07b.
|
||||||
pisync.To.Type = obj.TYPE_NONE
|
plwsync2 := s.Prog(ppc64.ALWSYNC)
|
||||||
|
plwsync2.To.Type = obj.TYPE_NONE
|
||||||
// return true
|
// return true
|
||||||
p5 := s.Prog(ppc64.AMOVD)
|
p5 := s.Prog(ppc64.AMOVD)
|
||||||
p5.From.Type = obj.TYPE_CONST
|
p5.From.Type = obj.TYPE_CONST
|
||||||
|
@ -17,7 +17,7 @@ TEXT runtime∕internal∕atomic·Cas(SB), NOSPLIT, $0-17
|
|||||||
MOVD ptr+0(FP), R3
|
MOVD ptr+0(FP), R3
|
||||||
MOVWZ old+8(FP), R4
|
MOVWZ old+8(FP), R4
|
||||||
MOVWZ new+12(FP), R5
|
MOVWZ new+12(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
cas_again:
|
cas_again:
|
||||||
LWAR (R3), R6
|
LWAR (R3), R6
|
||||||
CMPW R6, R4
|
CMPW R6, R4
|
||||||
@ -25,7 +25,7 @@ cas_again:
|
|||||||
STWCCC R5, (R3)
|
STWCCC R5, (R3)
|
||||||
BNE cas_again
|
BNE cas_again
|
||||||
MOVD $1, R3
|
MOVD $1, R3
|
||||||
ISYNC
|
LWSYNC
|
||||||
MOVB R3, ret+16(FP)
|
MOVB R3, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
cas_fail:
|
cas_fail:
|
||||||
@ -44,7 +44,7 @@ TEXT runtime∕internal∕atomic·Cas64(SB), NOSPLIT, $0-25
|
|||||||
MOVD ptr+0(FP), R3
|
MOVD ptr+0(FP), R3
|
||||||
MOVD old+8(FP), R4
|
MOVD old+8(FP), R4
|
||||||
MOVD new+16(FP), R5
|
MOVD new+16(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
cas64_again:
|
cas64_again:
|
||||||
LDAR (R3), R6
|
LDAR (R3), R6
|
||||||
CMP R6, R4
|
CMP R6, R4
|
||||||
@ -52,7 +52,7 @@ cas64_again:
|
|||||||
STDCCC R5, (R3)
|
STDCCC R5, (R3)
|
||||||
BNE cas64_again
|
BNE cas64_again
|
||||||
MOVD $1, R3
|
MOVD $1, R3
|
||||||
ISYNC
|
LWSYNC
|
||||||
MOVB R3, ret+24(FP)
|
MOVB R3, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
cas64_fail:
|
cas64_fail:
|
||||||
@ -97,31 +97,29 @@ TEXT runtime∕internal∕atomic·Casp1(SB), NOSPLIT, $0-25
|
|||||||
TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-20
|
TEXT runtime∕internal∕atomic·Xadd(SB), NOSPLIT, $0-20
|
||||||
MOVD ptr+0(FP), R4
|
MOVD ptr+0(FP), R4
|
||||||
MOVW delta+8(FP), R5
|
MOVW delta+8(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
LWAR (R4), R3
|
LWAR (R4), R3
|
||||||
ADD R5, R3
|
ADD R5, R3
|
||||||
STWCCC R3, (R4)
|
STWCCC R3, (R4)
|
||||||
BNE -3(PC)
|
BNE -3(PC)
|
||||||
ISYNC
|
|
||||||
MOVW R3, ret+16(FP)
|
MOVW R3, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-24
|
TEXT runtime∕internal∕atomic·Xadd64(SB), NOSPLIT, $0-24
|
||||||
MOVD ptr+0(FP), R4
|
MOVD ptr+0(FP), R4
|
||||||
MOVD delta+8(FP), R5
|
MOVD delta+8(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
LDAR (R4), R3
|
LDAR (R4), R3
|
||||||
ADD R5, R3
|
ADD R5, R3
|
||||||
STDCCC R3, (R4)
|
STDCCC R3, (R4)
|
||||||
BNE -3(PC)
|
BNE -3(PC)
|
||||||
ISYNC
|
|
||||||
MOVD R3, ret+16(FP)
|
MOVD R3, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-20
|
TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-20
|
||||||
MOVD ptr+0(FP), R4
|
MOVD ptr+0(FP), R4
|
||||||
MOVW new+8(FP), R5
|
MOVW new+8(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
LWAR (R4), R3
|
LWAR (R4), R3
|
||||||
STWCCC R5, (R4)
|
STWCCC R5, (R4)
|
||||||
BNE -2(PC)
|
BNE -2(PC)
|
||||||
@ -132,7 +130,7 @@ TEXT runtime∕internal∕atomic·Xchg(SB), NOSPLIT, $0-20
|
|||||||
TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
|
TEXT runtime∕internal∕atomic·Xchg64(SB), NOSPLIT, $0-24
|
||||||
MOVD ptr+0(FP), R4
|
MOVD ptr+0(FP), R4
|
||||||
MOVD new+8(FP), R5
|
MOVD new+8(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
LDAR (R4), R3
|
LDAR (R4), R3
|
||||||
STDCCC R5, (R4)
|
STDCCC R5, (R4)
|
||||||
BNE -2(PC)
|
BNE -2(PC)
|
||||||
@ -165,24 +163,22 @@ TEXT runtime∕internal∕atomic·Store64(SB), NOSPLIT, $0-16
|
|||||||
TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9
|
TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9
|
||||||
MOVD ptr+0(FP), R3
|
MOVD ptr+0(FP), R3
|
||||||
MOVBZ val+8(FP), R4
|
MOVBZ val+8(FP), R4
|
||||||
SYNC
|
LWSYNC
|
||||||
again:
|
again:
|
||||||
LBAR (R3), R6
|
LBAR (R3), R6
|
||||||
OR R4, R6
|
OR R4, R6
|
||||||
STBCCC R6, (R3)
|
STBCCC R6, (R3)
|
||||||
BNE again
|
BNE again
|
||||||
ISYNC
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// void runtime∕internal∕atomic·And8(byte volatile*, byte);
|
// void runtime∕internal∕atomic·And8(byte volatile*, byte);
|
||||||
TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
|
TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
|
||||||
MOVD ptr+0(FP), R3
|
MOVD ptr+0(FP), R3
|
||||||
MOVBZ val+8(FP), R4
|
MOVBZ val+8(FP), R4
|
||||||
SYNC
|
LWSYNC
|
||||||
again:
|
again:
|
||||||
LBAR (R3),R6
|
LBAR (R3),R6
|
||||||
AND R4,R6
|
AND R4,R6
|
||||||
STBCCC R6,(R3)
|
STBCCC R6,(R3)
|
||||||
BNE again
|
BNE again
|
||||||
ISYNC
|
|
||||||
RET
|
RET
|
||||||
|
@ -12,7 +12,7 @@ TEXT ·SwapInt32(SB),NOSPLIT,$0-20
|
|||||||
TEXT ·SwapUint32(SB),NOSPLIT,$0-20
|
TEXT ·SwapUint32(SB),NOSPLIT,$0-20
|
||||||
MOVD addr+0(FP), R3
|
MOVD addr+0(FP), R3
|
||||||
MOVW new+8(FP), R4
|
MOVW new+8(FP), R4
|
||||||
SYNC
|
LWSYNC
|
||||||
LWAR (R3), R5
|
LWAR (R3), R5
|
||||||
STWCCC R4, (R3)
|
STWCCC R4, (R3)
|
||||||
BNE -2(PC)
|
BNE -2(PC)
|
||||||
@ -26,7 +26,7 @@ TEXT ·SwapInt64(SB),NOSPLIT,$0-24
|
|||||||
TEXT ·SwapUint64(SB),NOSPLIT,$0-24
|
TEXT ·SwapUint64(SB),NOSPLIT,$0-24
|
||||||
MOVD addr+0(FP), R3
|
MOVD addr+0(FP), R3
|
||||||
MOVD new+8(FP), R4
|
MOVD new+8(FP), R4
|
||||||
SYNC
|
LWSYNC
|
||||||
LDAR (R3), R5
|
LDAR (R3), R5
|
||||||
STDCCC R4, (R3)
|
STDCCC R4, (R3)
|
||||||
BNE -2(PC)
|
BNE -2(PC)
|
||||||
@ -44,13 +44,13 @@ TEXT ·CompareAndSwapUint32(SB),NOSPLIT,$0-17
|
|||||||
MOVD addr+0(FP), R3
|
MOVD addr+0(FP), R3
|
||||||
MOVW old+8(FP), R4
|
MOVW old+8(FP), R4
|
||||||
MOVW new+12(FP), R5
|
MOVW new+12(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
LWAR (R3), R6
|
LWAR (R3), R6
|
||||||
CMPW R6, R4
|
CMPW R6, R4
|
||||||
BNE 7(PC)
|
BNE 7(PC)
|
||||||
STWCCC R5, (R3)
|
STWCCC R5, (R3)
|
||||||
BNE -4(PC)
|
BNE -4(PC)
|
||||||
ISYNC
|
LWSYNC
|
||||||
MOVD $1, R3
|
MOVD $1, R3
|
||||||
MOVB R3, swapped+16(FP)
|
MOVB R3, swapped+16(FP)
|
||||||
RET
|
RET
|
||||||
@ -67,13 +67,13 @@ TEXT ·CompareAndSwapUint64(SB),NOSPLIT,$0-25
|
|||||||
MOVD addr+0(FP), R3
|
MOVD addr+0(FP), R3
|
||||||
MOVD old+8(FP), R4
|
MOVD old+8(FP), R4
|
||||||
MOVD new+16(FP), R5
|
MOVD new+16(FP), R5
|
||||||
SYNC
|
LWSYNC
|
||||||
LDAR (R3), R6
|
LDAR (R3), R6
|
||||||
CMP R6, R4
|
CMP R6, R4
|
||||||
BNE 7(PC)
|
BNE 7(PC)
|
||||||
STDCCC R5, (R3)
|
STDCCC R5, (R3)
|
||||||
BNE -4(PC)
|
BNE -4(PC)
|
||||||
ISYNC
|
LWSYNC
|
||||||
MOVD $1, R3
|
MOVD $1, R3
|
||||||
MOVB R3, swapped+24(FP)
|
MOVB R3, swapped+24(FP)
|
||||||
RET
|
RET
|
||||||
@ -86,12 +86,11 @@ TEXT ·AddInt32(SB),NOSPLIT,$0-20
|
|||||||
TEXT ·AddUint32(SB),NOSPLIT,$0-20
|
TEXT ·AddUint32(SB),NOSPLIT,$0-20
|
||||||
MOVD addr+0(FP), R3
|
MOVD addr+0(FP), R3
|
||||||
MOVW delta+8(FP), R4
|
MOVW delta+8(FP), R4
|
||||||
SYNC
|
LWSYNC
|
||||||
LWAR (R3), R5
|
LWAR (R3), R5
|
||||||
ADD R4, R5
|
ADD R4, R5
|
||||||
STWCCC R5, (R3)
|
STWCCC R5, (R3)
|
||||||
BNE -3(PC)
|
BNE -3(PC)
|
||||||
ISYNC
|
|
||||||
MOVW R5, new+16(FP)
|
MOVW R5, new+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
@ -104,12 +103,11 @@ TEXT ·AddInt64(SB),NOSPLIT,$0-24
|
|||||||
TEXT ·AddUint64(SB),NOSPLIT,$0-24
|
TEXT ·AddUint64(SB),NOSPLIT,$0-24
|
||||||
MOVD addr+0(FP), R3
|
MOVD addr+0(FP), R3
|
||||||
MOVD delta+8(FP), R4
|
MOVD delta+8(FP), R4
|
||||||
SYNC
|
LWSYNC
|
||||||
LDAR (R3), R5
|
LDAR (R3), R5
|
||||||
ADD R4, R5
|
ADD R4, R5
|
||||||
STDCCC R5, (R3)
|
STDCCC R5, (R3)
|
||||||
BNE -3(PC)
|
BNE -3(PC)
|
||||||
ISYNC
|
|
||||||
MOVD R5, new+16(FP)
|
MOVD R5, new+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user