1
0
mirror of https://github.com/golang/go synced 2024-09-29 13:24:28 -06:00

cmd/compile, runtime: make atomic loads/stores sequentially consistent on s390x

The z/Architecture does not guarantee that a load following a store
will not be reordered with that store, unless they access the same
address. Therefore if we want to ensure the sequential consistency
of atomic loads and stores we need to perform serialization
operations after atomic stores.

We do not need to serialize in the runtime when using StoreRel[ease]
and LoadAcq[uire]. The z/Architecture already provides sufficient
ordering guarantees for these operations.

name              old time/op  new time/op  delta
AtomicLoad64-16   0.51ns ± 0%  0.51ns ± 0%     ~     (all equal)
AtomicStore64-16  0.51ns ± 0%  0.60ns ± 9%  +16.47%  (p=0.000 n=17+20)
AtomicLoad-16     0.51ns ± 0%  0.51ns ± 0%     ~     (all equal)
AtomicStore-16    0.51ns ± 0%  0.60ns ± 9%  +16.50%  (p=0.000 n=18+20)

Fixes #32428.

Change-Id: I88d19a4010c46070e4fff4b41587efe4c628d4d9
Reviewed-on: https://go-review.googlesource.com/c/go/+/180439
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
This commit is contained in:
Michael Munday 2019-06-04 19:17:41 +01:00
parent 53deb81219
commit ac8dbe7747
8 changed files with 125 additions and 60 deletions

View File

@ -3093,7 +3093,7 @@ func init() {
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
},
sys.PPC64)
sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Loadp",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
@ -3125,7 +3125,7 @@ func init() {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.PPC64)
sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Xchg",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {

View File

@ -800,6 +800,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
bne := s.Prog(s390x.ABNE)
bne.To.Type = obj.TYPE_BRANCH
gc.Patch(bne, cs)
case ssa.OpS390XSYNC:
s.Prog(s390x.ASYNC)
case ssa.OpClobber:
// TODO: implement for clobberdead experiment. Nop is ok for now.
default:

View File

@ -139,16 +139,15 @@
(RoundToEven x) -> (FIDBR [4] x)
(Round x) -> (FIDBR [1] x)
// Atomic loads.
(AtomicLoad8 ptr mem) -> (MOVBZatomicload ptr mem)
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)
// Atomic loads and stores.
// The SYNC instruction (fast-BCR-serialization) prevents store-load
// reordering. Other sequences of memory operations (load-load,
// store-store and load-store) are already guaranteed not to be reordered.
(AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
(AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))
// Atomic stores.
(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
// Store-release doesn't require store-load ordering.
(AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)
// Atomic adds.
(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))

View File

@ -187,6 +187,8 @@ func init() {
fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
sync = regInfo{inputs: []regMask{0}}
// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
@ -493,6 +495,9 @@ func init() {
{name: "FlagGT"}, // CC=2 (greater than)
{name: "FlagOV"}, // CC=3 (overflow)
// Fast-BCR-serialization to ensure store-load ordering.
{name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},
// Atomic loads. These are just normal loads but return <value,memory> tuples
// so they can be properly ordered with other loads.
// load from arg0+auxint+aux. arg1=mem.

View File

@ -2054,6 +2054,7 @@ const (
OpS390XFlagLT
OpS390XFlagGT
OpS390XFlagOV
OpS390XSYNC
OpS390XMOVBZatomicload
OpS390XMOVWZatomicload
OpS390XMOVDatomicload
@ -27614,6 +27615,12 @@ var opcodeTable = [...]opInfo{
argLen: 0,
reg: regInfo{},
},
{
name: "SYNC",
argLen: 1,
asm: s390x.ASYNC,
reg: regInfo{},
},
{
name: "MOVBZatomicload",
auxType: auxSymOff,

View File

@ -61,6 +61,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpAtomicLoad64_0(v)
case OpAtomicLoad8:
return rewriteValueS390X_OpAtomicLoad8_0(v)
case OpAtomicLoadAcq32:
return rewriteValueS390X_OpAtomicLoadAcq32_0(v)
case OpAtomicLoadPtr:
return rewriteValueS390X_OpAtomicLoadPtr_0(v)
case OpAtomicStore32:
@ -69,6 +71,8 @@ func rewriteValueS390X(v *Value) bool {
return rewriteValueS390X_OpAtomicStore64_0(v)
case OpAtomicStorePtrNoWB:
return rewriteValueS390X_OpAtomicStorePtrNoWB_0(v)
case OpAtomicStoreRel32:
return rewriteValueS390X_OpAtomicStoreRel32_0(v)
case OpAvg64u:
return rewriteValueS390X_OpAvg64u_0(v)
case OpBitLen64:
@ -1132,6 +1136,19 @@ func rewriteValueS390X_OpAtomicLoad8_0(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpAtomicLoadAcq32_0(v *Value) bool {
// match: (AtomicLoadAcq32 ptr mem)
// cond:
// result: (MOVWZatomicload ptr mem)
for {
mem := v.Args[1]
ptr := v.Args[0]
v.reset(OpS390XMOVWZatomicload)
v.AddArg(ptr)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicLoadPtr_0(v *Value) bool {
// match: (AtomicLoadPtr ptr mem)
// cond:
@ -1146,8 +1163,62 @@ func rewriteValueS390X_OpAtomicLoadPtr_0(v *Value) bool {
}
}
func rewriteValueS390X_OpAtomicStore32_0(v *Value) bool {
b := v.Block
// match: (AtomicStore32 ptr val mem)
// cond:
// result: (SYNC (MOVWatomicstore ptr val mem))
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
v.reset(OpS390XSYNC)
v0 := b.NewValue0(v.Pos, OpS390XMOVWatomicstore, types.TypeMem)
v0.AddArg(ptr)
v0.AddArg(val)
v0.AddArg(mem)
v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpAtomicStore64_0(v *Value) bool {
b := v.Block
// match: (AtomicStore64 ptr val mem)
// cond:
// result: (SYNC (MOVDatomicstore ptr val mem))
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
v.reset(OpS390XSYNC)
v0 := b.NewValue0(v.Pos, OpS390XMOVDatomicstore, types.TypeMem)
v0.AddArg(ptr)
v0.AddArg(val)
v0.AddArg(mem)
v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpAtomicStorePtrNoWB_0(v *Value) bool {
b := v.Block
// match: (AtomicStorePtrNoWB ptr val mem)
// cond:
// result: (SYNC (MOVDatomicstore ptr val mem))
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
v.reset(OpS390XSYNC)
v0 := b.NewValue0(v.Pos, OpS390XMOVDatomicstore, types.TypeMem)
v0.AddArg(ptr)
v0.AddArg(val)
v0.AddArg(mem)
v.AddArg(v0)
return true
}
}
func rewriteValueS390X_OpAtomicStoreRel32_0(v *Value) bool {
// match: (AtomicStoreRel32 ptr val mem)
// cond:
// result: (MOVWatomicstore ptr val mem)
for {
mem := v.Args[2]
@ -1160,36 +1231,6 @@ func rewriteValueS390X_OpAtomicStore32_0(v *Value) bool {
return true
}
}
func rewriteValueS390X_OpAtomicStore64_0(v *Value) bool {
// match: (AtomicStore64 ptr val mem)
// cond:
// result: (MOVDatomicstore ptr val mem)
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
v.reset(OpS390XMOVDatomicstore)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAtomicStorePtrNoWB_0(v *Value) bool {
// match: (AtomicStorePtrNoWB ptr val mem)
// cond:
// result: (MOVDatomicstore ptr val mem)
for {
mem := v.Args[2]
ptr := v.Args[0]
val := v.Args[1]
v.reset(OpS390XMOVDatomicstore)
v.AddArg(ptr)
v.AddArg(val)
v.AddArg(mem)
return true
}
}
func rewriteValueS390X_OpAvg64u_0(v *Value) bool {
b := v.Block
// match: (Avg64u <t> x y)

View File

@ -4,6 +4,30 @@
#include "textflag.h"
// func Store(ptr *uint32, val uint32)
TEXT ·Store(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
MOVWZ val+8(FP), R3
MOVW R3, 0(R2)
SYNC
RET
// func Store64(ptr *uint64, val uint64)
TEXT ·Store64(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
MOVD val+8(FP), R3
MOVD R3, 0(R2)
SYNC
RET
// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
TEXT ·StorepNoWB(SB), NOSPLIT, $0
MOVD ptr+0(FP), R2
MOVD val+8(FP), R3
MOVD R3, 0(R2)
SYNC
RET
// func Cas(ptr *uint32, old, new uint32) bool
// Atomically:
// if *ptr == old {

View File

@ -36,30 +36,17 @@ func LoadAcq(ptr *uint32) uint32 {
return *ptr
}
//go:noinline
//go:nosplit
func Store(ptr *uint32, val uint32) {
*ptr = val
}
//go:noescape
func Store(ptr *uint32, val uint32)
//go:noinline
//go:nosplit
func Store64(ptr *uint64, val uint64) {
*ptr = val
}
//go:notinheap
type noWB struct{}
//go:noescape
func Store64(ptr *uint64, val uint64)
// NO go:noescape annotation; see atomic_pointer.go.
//go:noinline
//go:nosplit
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) {
*(**noWB)(ptr) = (*noWB)(val)
}
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
//go:noinline
//go:nosplit
//go:noinline
func StoreRel(ptr *uint32, val uint32) {
*ptr = val
}