mirror of
https://github.com/golang/go
synced 2024-11-12 08:20:22 -07:00
cmd/compile: improve atomic swap intrinsics on arm64
ARMv8.1 has added new instructions for atomic memory operations. This
change builds on the previous change which added support for atomic add,
0a7ac93c27
, to include similar support for
atomic-compare-and-swap, atomic-swap, atomic-or, and atomic-and
intrinsics. Since the new instructions are not guaranteed to be present,
we guard their usages with a branch on a CPU feature.
Peformance on an ARMv8.1 machine:
name old time/op new time/op delta
CompareAndSwap-16 37.9ns ±16% 24.1ns ± 4% -36.44% (p=0.000 n=10+9)
CompareAndSwap64-16 38.6ns ±15% 24.1ns ± 3% -37.47% (p=0.000 n=10+10)
name old time/op new time/op delta
Swap-16 46.9ns ±32% 12.5ns ± 6% -73.40% (p=0.000 n=10+10)
Swap64-16 53.4ns ± 1% 12.5ns ± 6% -76.56% (p=0.000 n=10+10)
name old time/op new time/op delta
Or8-16 8.81ns ± 0% 5.61ns ± 0% -36.32% (p=0.000 n=10+10)
Or-16 7.21ns ± 0% 5.61ns ± 0% -22.19% (p=0.000 n=10+10)
Or8Parallel-16 59.8ns ± 3% 12.5ns ± 2% -79.10% (p=0.000 n=10+10)
OrParallel-16 51.7ns ± 3% 12.5ns ± 2% -75.84% (p=0.000 n=10+10)
name old time/op new time/op delta
And8-16 8.81ns ± 0% 5.61ns ± 0% -36.32% (p=0.000 n=10+10)
And-16 7.21ns ± 0% 5.61ns ± 0% -22.19% (p=0.000 n=10+10)
And8Parallel-16 59.1ns ± 6% 12.8ns ± 3% -78.33% (p=0.000 n=10+10)
AndParallel-16 51.4ns ± 7% 12.8ns ± 3% -75.03% (p=0.000 n=10+10)
Performance on an ARMv8.0 machine (no atomics instructions):
name old time/op new time/op delta
CompareAndSwap-16 61.3ns ± 0% 62.4ns ± 0% +1.70% (p=0.000 n=8+9)
CompareAndSwap64-16 62.0ns ± 3% 61.3ns ± 2% ~ (p=0.093 n=10+10)
name old time/op new time/op delta
Swap-16 127ns ± 2% 131ns ± 2% +2.91% (p=0.001 n=10+10)
Swap64-16 128ns ± 1% 131ns ± 2% +2.43% (p=0.001 n=10+10)
name old time/op new time/op delta
Or8-16 14.9ns ± 0% 15.3ns ± 0% +2.68% (p=0.000 n=10+10)
Or-16 11.8ns ± 0% 12.3ns ± 0% +4.24% (p=0.000 n=10+10)
Or8Parallel-16 137ns ± 1% 144ns ± 1% +4.97% (p=0.000 n=10+10)
OrParallel-16 128ns ± 1% 136ns ± 1% +6.34% (p=0.000 n=10+10)
name old time/op new time/op delta
And8-16 14.9ns ± 0% 15.3ns ± 0% +2.68% (p=0.000 n=10+10)
And-16 11.8ns ± 0% 12.3ns ± 0% +4.24% (p=0.000 n=10+10)
And8Parallel-16 134ns ± 2% 141ns ± 1% +5.29% (p=0.000 n=10+10)
AndParallel-16 125ns ± 2% 134ns ± 1% +7.10% (p=0.000 n=10+10)
Fixes #39304
Change-Id: Idaca68701d4751650be6b4bedca3d57f51571712
Reviewed-on: https://go-review.googlesource.com/c/go/+/234217
Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Trust: fannie zhang <Fannie.Zhang@arm.com>
This commit is contained in:
parent
8e5778ed70
commit
ecc3f5112e
@ -581,6 +581,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p2.From.Reg = arm64.REGTMP
|
||||
p2.To.Type = obj.TYPE_BRANCH
|
||||
gc.Patch(p2, p)
|
||||
case ssa.OpARM64LoweredAtomicExchange64Variant,
|
||||
ssa.OpARM64LoweredAtomicExchange32Variant:
|
||||
swap := arm64.ASWPALD
|
||||
if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant {
|
||||
swap = arm64.ASWPALW
|
||||
}
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
out := v.Reg0()
|
||||
|
||||
// SWPALD Rarg1, (Rarg0), Rout
|
||||
p := s.Prog(swap)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = r0
|
||||
p.RegTo2 = out
|
||||
|
||||
case ssa.OpARM64LoweredAtomicAdd64,
|
||||
ssa.OpARM64LoweredAtomicAdd32:
|
||||
// LDAXR (Rarg0), Rout
|
||||
@ -687,6 +705,56 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p5.To.Type = obj.TYPE_REG
|
||||
p5.To.Reg = out
|
||||
gc.Patch(p2, p5)
|
||||
case ssa.OpARM64LoweredAtomicCas64Variant,
|
||||
ssa.OpARM64LoweredAtomicCas32Variant:
|
||||
// Rarg0: ptr
|
||||
// Rarg1: old
|
||||
// Rarg2: new
|
||||
// MOV Rarg1, Rtmp
|
||||
// CASAL Rtmp, (Rarg0), Rarg2
|
||||
// CMP Rarg1, Rtmp
|
||||
// CSET EQ, Rout
|
||||
cas := arm64.ACASALD
|
||||
cmp := arm64.ACMP
|
||||
mov := arm64.AMOVD
|
||||
if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
|
||||
cas = arm64.ACASALW
|
||||
cmp = arm64.ACMPW
|
||||
mov = arm64.AMOVW
|
||||
}
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
r2 := v.Args[2].Reg()
|
||||
out := v.Reg0()
|
||||
|
||||
// MOV Rarg1, Rtmp
|
||||
p := s.Prog(mov)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = arm64.REGTMP
|
||||
|
||||
// CASAL Rtmp, (Rarg0), Rarg2
|
||||
p1 := s.Prog(cas)
|
||||
p1.From.Type = obj.TYPE_REG
|
||||
p1.From.Reg = arm64.REGTMP
|
||||
p1.To.Type = obj.TYPE_MEM
|
||||
p1.To.Reg = r0
|
||||
p1.RegTo2 = r2
|
||||
|
||||
// CMP Rarg1, Rtmp
|
||||
p2 := s.Prog(cmp)
|
||||
p2.From.Type = obj.TYPE_REG
|
||||
p2.From.Reg = r1
|
||||
p2.Reg = arm64.REGTMP
|
||||
|
||||
// CSET EQ, Rout
|
||||
p3 := s.Prog(arm64.ACSET)
|
||||
p3.From.Type = obj.TYPE_REG
|
||||
p3.From.Reg = arm64.COND_EQ
|
||||
p3.To.Type = obj.TYPE_REG
|
||||
p3.To.Reg = out
|
||||
|
||||
case ssa.OpARM64LoweredAtomicAnd8,
|
||||
ssa.OpARM64LoweredAtomicAnd32,
|
||||
ssa.OpARM64LoweredAtomicOr8,
|
||||
@ -725,6 +793,63 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p3.From.Reg = arm64.REGTMP
|
||||
p3.To.Type = obj.TYPE_BRANCH
|
||||
gc.Patch(p3, p)
|
||||
case ssa.OpARM64LoweredAtomicAnd8Variant,
|
||||
ssa.OpARM64LoweredAtomicAnd32Variant:
|
||||
atomic_clear := arm64.ALDCLRALW
|
||||
if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
|
||||
atomic_clear = arm64.ALDCLRALB
|
||||
}
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
out := v.Reg0()
|
||||
|
||||
// MNV Rarg1 Rtemp
|
||||
p := s.Prog(arm64.AMVN)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = arm64.REGTMP
|
||||
|
||||
// LDCLRALW Rtemp, (Rarg0), Rout
|
||||
p1 := s.Prog(atomic_clear)
|
||||
p1.From.Type = obj.TYPE_REG
|
||||
p1.From.Reg = arm64.REGTMP
|
||||
p1.To.Type = obj.TYPE_MEM
|
||||
p1.To.Reg = r0
|
||||
p1.RegTo2 = out
|
||||
|
||||
// AND Rarg1, Rout
|
||||
p2 := s.Prog(arm64.AAND)
|
||||
p2.From.Type = obj.TYPE_REG
|
||||
p2.From.Reg = r1
|
||||
p2.To.Type = obj.TYPE_REG
|
||||
p2.To.Reg = out
|
||||
|
||||
case ssa.OpARM64LoweredAtomicOr8Variant,
|
||||
ssa.OpARM64LoweredAtomicOr32Variant:
|
||||
atomic_or := arm64.ALDORALW
|
||||
if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
|
||||
atomic_or = arm64.ALDORALB
|
||||
}
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
out := v.Reg0()
|
||||
|
||||
// LDORALW Rarg1, (Rarg0), Rout
|
||||
p := s.Prog(atomic_or)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = r0
|
||||
p.RegTo2 = out
|
||||
|
||||
// ORR Rarg1, Rout
|
||||
p2 := s.Prog(arm64.AORR)
|
||||
p2.From.Type = obj.TYPE_REG
|
||||
p2.From.Reg = r1
|
||||
p2.To.Type = obj.TYPE_REG
|
||||
p2.To.Reg = out
|
||||
|
||||
case ssa.OpARM64MOVBreg,
|
||||
ssa.OpARM64MOVBUreg,
|
||||
ssa.OpARM64MOVHreg,
|
||||
|
@ -3458,14 +3458,64 @@ func init() {
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
|
||||
return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
addF("runtime/internal/atomic", "Xchg64",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem())
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
|
||||
return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v)
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
|
||||
type atomicOpEmitter func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType)
|
||||
|
||||
makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.EType, emit atomicOpEmitter) intrinsicBuilder {
|
||||
|
||||
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
// Target Atomic feature is identified by dynamic detection
|
||||
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64HasATOMICS, s.sb)
|
||||
v := s.load(types.Types[TBOOL], addr)
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.SetControl(v)
|
||||
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||
b.AddEdgeTo(bTrue)
|
||||
b.AddEdgeTo(bFalse)
|
||||
b.Likely = ssa.BranchLikely
|
||||
|
||||
// We have atomic instructions - use it directly.
|
||||
s.startBlock(bTrue)
|
||||
emit(s, n, args, op1, typ)
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Use original instruction sequence.
|
||||
s.startBlock(bFalse)
|
||||
emit(s, n, args, op0, typ)
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Merge results.
|
||||
s.startBlock(bEnd)
|
||||
if rtyp == TNIL {
|
||||
return nil
|
||||
} else {
|
||||
return s.variable(n, types.Types[rtyp])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
atomicXchgXaddEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
|
||||
v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
|
||||
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
|
||||
}
|
||||
addF("runtime/internal/atomic", "Xchg",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64),
|
||||
sys.ARM64)
|
||||
addF("runtime/internal/atomic", "Xchg64",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64),
|
||||
sys.ARM64)
|
||||
|
||||
addF("runtime/internal/atomic", "Xadd",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
@ -3482,46 +3532,11 @@ func init() {
|
||||
},
|
||||
sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
|
||||
makeXaddARM64 := func(op0 ssa.Op, op1 ssa.Op, ty types.EType) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
// Target Atomic feature is identified by dynamic detection
|
||||
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64HasATOMICS, s.sb)
|
||||
v := s.load(types.Types[TBOOL], addr)
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.SetControl(v)
|
||||
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||
b.AddEdgeTo(bTrue)
|
||||
b.AddEdgeTo(bFalse)
|
||||
b.Likely = ssa.BranchUnlikely // most machines don't have Atomics nowadays
|
||||
|
||||
// We have atomic instructions - use it directly.
|
||||
s.startBlock(bTrue)
|
||||
v0 := s.newValue3(op1, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v0)
|
||||
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v0)
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Use original instruction sequence.
|
||||
s.startBlock(bFalse)
|
||||
v1 := s.newValue3(op0, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem())
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v1)
|
||||
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v1)
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Merge results.
|
||||
s.startBlock(bEnd)
|
||||
return s.variable(n, types.Types[ty])
|
||||
}
|
||||
}
|
||||
|
||||
addF("runtime/internal/atomic", "Xadd",
|
||||
makeXaddARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32),
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64),
|
||||
sys.ARM64)
|
||||
addF("runtime/internal/atomic", "Xadd64",
|
||||
makeXaddARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64),
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64),
|
||||
sys.ARM64)
|
||||
|
||||
addF("runtime/internal/atomic", "Cas",
|
||||
@ -3530,14 +3545,14 @@ func init() {
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
|
||||
return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
addF("runtime/internal/atomic", "Cas64",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
|
||||
return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v)
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
|
||||
addF("runtime/internal/atomic", "CasRel",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
|
||||
@ -3546,18 +3561,31 @@ func init() {
|
||||
},
|
||||
sys.PPC64)
|
||||
|
||||
atomicCasEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
|
||||
v := s.newValue4(op, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
|
||||
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
|
||||
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
|
||||
}
|
||||
|
||||
addF("runtime/internal/atomic", "Cas",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, TUINT32, TBOOL, atomicCasEmitterARM64),
|
||||
sys.ARM64)
|
||||
addF("runtime/internal/atomic", "Cas64",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, TUINT64, TBOOL, atomicCasEmitterARM64),
|
||||
sys.ARM64)
|
||||
|
||||
addF("runtime/internal/atomic", "And8",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
|
||||
return nil
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||
addF("runtime/internal/atomic", "And",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
|
||||
return nil
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||
addF("runtime/internal/atomic", "Or8",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
|
||||
@ -3569,7 +3597,24 @@ func init() {
|
||||
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
|
||||
return nil
|
||||
},
|
||||
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||
sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X)
|
||||
|
||||
atomicAndOrEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) {
|
||||
s.vars[&memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())
|
||||
}
|
||||
|
||||
addF("runtime/internal/atomic", "And8",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
|
||||
sys.ARM64)
|
||||
addF("runtime/internal/atomic", "And",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
|
||||
sys.ARM64)
|
||||
addF("runtime/internal/atomic", "Or8",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
|
||||
sys.ARM64)
|
||||
addF("runtime/internal/atomic", "Or",
|
||||
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, TNIL, TNIL, atomicAndOrEmitterARM64),
|
||||
sys.ARM64)
|
||||
|
||||
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
|
||||
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
|
||||
|
@ -543,17 +543,24 @@
|
||||
(AtomicStore64 ...) => (STLR ...)
|
||||
(AtomicStorePtrNoWB ...) => (STLR ...)
|
||||
|
||||
(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
|
||||
(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
|
||||
(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
|
||||
(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
|
||||
(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
|
||||
|
||||
(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
|
||||
(AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...)
|
||||
(AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...)
|
||||
|
||||
// Currently the updated value is not used, but we need a register to temporarily hold it.
|
||||
(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
|
||||
(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
|
||||
(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
|
||||
(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem))
|
||||
|
||||
(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
|
||||
(AtomicAnd8Variant ptr val mem) => (Select1 (LoweredAtomicAnd8Variant ptr val mem))
|
||||
(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem))
|
||||
(AtomicOr8Variant ptr val mem) => (Select1 (LoweredAtomicOr8Variant ptr val mem))
|
||||
(AtomicOr32Variant ptr val mem) => (Select1 (LoweredAtomicOr32Variant ptr val mem))
|
||||
|
||||
// Write barrier.
|
||||
(WB ...) => (LoweredWB ...)
|
||||
|
@ -621,6 +621,12 @@ func init() {
|
||||
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
|
||||
// atomic exchange variant.
|
||||
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
|
||||
// SWPALD Rarg1, (Rarg0), Rout
|
||||
{name: "LoweredAtomicExchange64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
|
||||
{name: "LoweredAtomicExchange32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
|
||||
|
||||
// atomic add.
|
||||
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
|
||||
// LDAXR (Rarg0), Rout
|
||||
@ -654,6 +660,21 @@ func init() {
|
||||
{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
|
||||
// atomic compare and swap variant.
|
||||
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
|
||||
// if *arg0 == arg1 {
|
||||
// *arg0 = arg2
|
||||
// return (true, memory)
|
||||
// } else {
|
||||
// return (false, memory)
|
||||
// }
|
||||
// MOV Rarg1, Rtmp
|
||||
// CASAL Rtmp, (Rarg0), Rarg2
|
||||
// CMP Rarg1, Rtmp
|
||||
// CSET EQ, Rout
|
||||
{name: "LoweredAtomicCas64Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
{name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
|
||||
// atomic and/or.
|
||||
// *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
|
||||
// LDAXR (Rarg0), Rout
|
||||
@ -665,6 +686,20 @@ func init() {
|
||||
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
{name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
|
||||
// atomic and/or variant.
|
||||
// *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
|
||||
// AND:
|
||||
// MNV Rarg1, Rtemp
|
||||
// LDANDALB Rtemp, (Rarg0), Rout
|
||||
// AND Rarg1, Rout
|
||||
// OR:
|
||||
// LDORALB Rarg1, (Rarg0), Rout
|
||||
// ORR Rarg1, Rout
|
||||
{name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
{name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
||||
{name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true},
|
||||
{name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true},
|
||||
|
||||
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
|
||||
// It saves all GP registers if necessary,
|
||||
// but clobbers R30 (LR) because it's a call.
|
||||
|
@ -574,8 +574,16 @@ var genericOps = []opData{
|
||||
// These variants have the same semantics as above atomic operations.
|
||||
// But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection.
|
||||
// Currently, they are used on ARM64 only.
|
||||
{name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
|
||||
{name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
|
||||
{name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
|
||||
{name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
|
||||
{name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
|
||||
{name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
|
||||
{name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
|
||||
{name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
|
||||
{name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
|
||||
{name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
|
||||
{name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
|
||||
{name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
|
||||
|
||||
// Clobber experiment op
|
||||
{name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable
|
||||
|
@ -1581,16 +1581,24 @@ const (
|
||||
OpARM64STLRW
|
||||
OpARM64LoweredAtomicExchange64
|
||||
OpARM64LoweredAtomicExchange32
|
||||
OpARM64LoweredAtomicExchange64Variant
|
||||
OpARM64LoweredAtomicExchange32Variant
|
||||
OpARM64LoweredAtomicAdd64
|
||||
OpARM64LoweredAtomicAdd32
|
||||
OpARM64LoweredAtomicAdd64Variant
|
||||
OpARM64LoweredAtomicAdd32Variant
|
||||
OpARM64LoweredAtomicCas64
|
||||
OpARM64LoweredAtomicCas32
|
||||
OpARM64LoweredAtomicCas64Variant
|
||||
OpARM64LoweredAtomicCas32Variant
|
||||
OpARM64LoweredAtomicAnd8
|
||||
OpARM64LoweredAtomicAnd32
|
||||
OpARM64LoweredAtomicOr8
|
||||
OpARM64LoweredAtomicOr32
|
||||
OpARM64LoweredAtomicAnd8Variant
|
||||
OpARM64LoweredAtomicAnd32Variant
|
||||
OpARM64LoweredAtomicOr8Variant
|
||||
OpARM64LoweredAtomicOr32Variant
|
||||
OpARM64LoweredWB
|
||||
OpARM64LoweredPanicBoundsA
|
||||
OpARM64LoweredPanicBoundsB
|
||||
@ -2881,6 +2889,14 @@ const (
|
||||
OpAtomicOr32
|
||||
OpAtomicAdd32Variant
|
||||
OpAtomicAdd64Variant
|
||||
OpAtomicExchange32Variant
|
||||
OpAtomicExchange64Variant
|
||||
OpAtomicCompareAndSwap32Variant
|
||||
OpAtomicCompareAndSwap64Variant
|
||||
OpAtomicAnd8Variant
|
||||
OpAtomicAnd32Variant
|
||||
OpAtomicOr8Variant
|
||||
OpAtomicOr32Variant
|
||||
OpClobber
|
||||
)
|
||||
|
||||
@ -20994,6 +21010,38 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicExchange64Variant",
|
||||
argLen: 3,
|
||||
resultNotInArgs: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicExchange32Variant",
|
||||
argLen: 3,
|
||||
resultNotInArgs: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicAdd64",
|
||||
argLen: 3,
|
||||
@ -21098,6 +21146,44 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicCas64Variant",
|
||||
argLen: 4,
|
||||
resultNotInArgs: true,
|
||||
clobberFlags: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
unsafePoint: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicCas32Variant",
|
||||
argLen: 4,
|
||||
resultNotInArgs: true,
|
||||
clobberFlags: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
unsafePoint: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicAnd8",
|
||||
argLen: 3,
|
||||
@ -21170,6 +21256,72 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicAnd8Variant",
|
||||
argLen: 3,
|
||||
resultNotInArgs: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
unsafePoint: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicAnd32Variant",
|
||||
argLen: 3,
|
||||
resultNotInArgs: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
unsafePoint: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicOr8Variant",
|
||||
argLen: 3,
|
||||
resultNotInArgs: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredAtomicOr32Variant",
|
||||
argLen: 3,
|
||||
resultNotInArgs: true,
|
||||
faultOnNilArg0: true,
|
||||
hasSideEffects: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredWB",
|
||||
auxType: auxSym,
|
||||
@ -35874,6 +36026,54 @@ var opcodeTable = [...]opInfo{
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicExchange32Variant",
|
||||
argLen: 3,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicExchange64Variant",
|
||||
argLen: 3,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicCompareAndSwap32Variant",
|
||||
argLen: 4,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicCompareAndSwap64Variant",
|
||||
argLen: 4,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicAnd8Variant",
|
||||
argLen: 3,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicAnd32Variant",
|
||||
argLen: 3,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicOr8Variant",
|
||||
argLen: 3,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "AtomicOr32Variant",
|
||||
argLen: 3,
|
||||
hasSideEffects: true,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Clobber",
|
||||
auxType: auxSymOff,
|
||||
|
@ -426,20 +426,36 @@ func rewriteValueARM64(v *Value) bool {
|
||||
return true
|
||||
case OpAtomicAnd32:
|
||||
return rewriteValueARM64_OpAtomicAnd32(v)
|
||||
case OpAtomicAnd32Variant:
|
||||
return rewriteValueARM64_OpAtomicAnd32Variant(v)
|
||||
case OpAtomicAnd8:
|
||||
return rewriteValueARM64_OpAtomicAnd8(v)
|
||||
case OpAtomicAnd8Variant:
|
||||
return rewriteValueARM64_OpAtomicAnd8Variant(v)
|
||||
case OpAtomicCompareAndSwap32:
|
||||
v.Op = OpARM64LoweredAtomicCas32
|
||||
return true
|
||||
case OpAtomicCompareAndSwap32Variant:
|
||||
v.Op = OpARM64LoweredAtomicCas32Variant
|
||||
return true
|
||||
case OpAtomicCompareAndSwap64:
|
||||
v.Op = OpARM64LoweredAtomicCas64
|
||||
return true
|
||||
case OpAtomicCompareAndSwap64Variant:
|
||||
v.Op = OpARM64LoweredAtomicCas64Variant
|
||||
return true
|
||||
case OpAtomicExchange32:
|
||||
v.Op = OpARM64LoweredAtomicExchange32
|
||||
return true
|
||||
case OpAtomicExchange32Variant:
|
||||
v.Op = OpARM64LoweredAtomicExchange32Variant
|
||||
return true
|
||||
case OpAtomicExchange64:
|
||||
v.Op = OpARM64LoweredAtomicExchange64
|
||||
return true
|
||||
case OpAtomicExchange64Variant:
|
||||
v.Op = OpARM64LoweredAtomicExchange64Variant
|
||||
return true
|
||||
case OpAtomicLoad32:
|
||||
v.Op = OpARM64LDARW
|
||||
return true
|
||||
@ -454,8 +470,12 @@ func rewriteValueARM64(v *Value) bool {
|
||||
return true
|
||||
case OpAtomicOr32:
|
||||
return rewriteValueARM64_OpAtomicOr32(v)
|
||||
case OpAtomicOr32Variant:
|
||||
return rewriteValueARM64_OpAtomicOr32Variant(v)
|
||||
case OpAtomicOr8:
|
||||
return rewriteValueARM64_OpAtomicOr8(v)
|
||||
case OpAtomicOr8Variant:
|
||||
return rewriteValueARM64_OpAtomicOr8Variant(v)
|
||||
case OpAtomicStore32:
|
||||
v.Op = OpARM64STLRW
|
||||
return true
|
||||
@ -21363,6 +21383,25 @@ func rewriteValueARM64_OpAtomicAnd32(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (AtomicAnd32Variant ptr val mem)
|
||||
// result: (Select1 (LoweredAtomicAnd32Variant ptr val mem))
|
||||
for {
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
v.reset(OpSelect1)
|
||||
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
|
||||
v0.AddArg3(ptr, val, mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@ -21382,6 +21421,25 @@ func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (AtomicAnd8Variant ptr val mem)
|
||||
// result: (Select1 (LoweredAtomicAnd8Variant ptr val mem))
|
||||
for {
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
v.reset(OpSelect1)
|
||||
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
|
||||
v0.AddArg3(ptr, val, mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@ -21401,6 +21459,25 @@ func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (AtomicOr32Variant ptr val mem)
|
||||
// result: (Select1 (LoweredAtomicOr32Variant ptr val mem))
|
||||
for {
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
v.reset(OpSelect1)
|
||||
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
|
||||
v0.AddArg3(ptr, val, mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
@ -21420,6 +21497,25 @@ func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (AtomicOr8Variant ptr val mem)
|
||||
// result: (Select1 (LoweredAtomicOr8Variant ptr val mem))
|
||||
for {
|
||||
ptr := v_0
|
||||
val := v_1
|
||||
mem := v_2
|
||||
v.reset(OpSelect1)
|
||||
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
|
||||
v0.AddArg3(ptr, val, mem)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValueARM64_OpAvg64u(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
@ -142,3 +142,54 @@ func BenchmarkXadd64(b *testing.B) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkCas(b *testing.B) {
|
||||
var x uint32
|
||||
x = 1
|
||||
ptr := &x
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
atomic.Cas(ptr, 1, 0)
|
||||
atomic.Cas(ptr, 0, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkCas64(b *testing.B) {
|
||||
var x uint64
|
||||
x = 1
|
||||
ptr := &x
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
atomic.Cas64(ptr, 1, 0)
|
||||
atomic.Cas64(ptr, 0, 1)
|
||||
}
|
||||
})
|
||||
}
|
||||
func BenchmarkXchg(b *testing.B) {
|
||||
var x uint32
|
||||
x = 1
|
||||
ptr := &x
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var y uint32
|
||||
y = 1
|
||||
for pb.Next() {
|
||||
y = atomic.Xchg(ptr, y)
|
||||
y += 1
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkXchg64(b *testing.B) {
|
||||
var x uint64
|
||||
x = 1
|
||||
ptr := &x
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
var y uint64
|
||||
y = 1
|
||||
for pb.Next() {
|
||||
y = atomic.Xchg64(ptr, y)
|
||||
y += 1
|
||||
}
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user