From 0b2ad1d815ea8967c49b32d848b2992d0c588d88 Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Mon, 19 Dec 2022 05:04:48 +0800 Subject: [PATCH] cmd/compile: sign-extend the 2nd argument of the LoweredAtomicCas32 on loong64,mips64x,riscv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function LoweredAtomicCas32 is implemented using the LL-SC instruction pair on loong64, mips64x, riscv64. However,the LL instruction on loong64, mips64x, riscv64 is sign-extended, so it is necessary to sign-extend the 2nd parameter "old" of the LoweredAtomicCas32, so that the instruction BNE after LL can get the desired result. The function prototype of LoweredAtomicCas32 in golang: func Cas32(ptr *uint32, old, new uint32) bool When using an intrinsify implementation: case 1: (*ptr) <= 0x80000000 && old < 0x80000000 E.g: (*ptr) = 0x7FFFFFFF, old = Rarg1= 0x7FFFFFFF After run the instruction "LL (Rarg0), Rtmp": Rtmp = 0x7FFFFFFF Rtmp ! = Rarg1(old) is false, the result we expect case 2: (*ptr) >= 0x80000000 && old >= 0x80000000 E.g: (*ptr) = 0x80000000, old = Rarg1= 0x80000000 After run the instruction "LL (Rarg0), Rtmp": Rtmp = 0xFFFFFFFF_80000000 Rtmp ! = Rarg1(old) is true, which we do not expect When using an non-intrinsify implementation: Because Rarg1 is loaded from the stack using sign-extended instructions ld.w, the situation described in Case 2 above does not occur Benchmarks on linux/loong64: name old time/op new time/op delta Cas 50.0ns ± 0% 50.1ns ± 0% ~ (p=1.000 n=1+1) Cas64 50.0ns ± 0% 50.1ns ± 0% ~ (p=1.000 n=1+1) Cas-4 56.0ns ± 0% 56.0ns ± 0% ~ (p=1.000 n=1+1) Cas64-4 56.0ns ± 0% 56.0ns ± 0% ~ (p=1.000 n=1+1) Benchmarks on Loongson 3A4000 (GOARCH=mips64le, 1.8GHz) name old time/op new time/op delta Cas 70.4ns ± 0% 70.3ns ± 0% ~ (p=1.000 n=1+1) Cas64 70.7ns ± 0% 70.6ns ± 0% ~ (p=1.000 n=1+1) Cas-4 81.1ns ± 0% 80.8ns ± 0% ~ (p=1.000 n=1+1) Cas64-4 80.9ns ± 0% 80.9ns ± 0% ~ (p=1.000 n=1+1) Fixes #57282 Change-Id: I190a7fc648023b15fa392f7fdda5ac18c1561bac Reviewed-on: https://go-review.googlesource.com/c/go/+/457135 Run-TryBot: Than McIntosh Reviewed-by: Cherry Mui Reviewed-by: Matthew Dempsky TryBot-Result: Gopher Robot Reviewed-by: Wayne Zuo Reviewed-by: Than McIntosh Reviewed-by: David Chase --- .../compile/internal/ssa/_gen/LOONG64.rules | 3 +- .../compile/internal/ssa/_gen/MIPS64.rules | 3 +- .../compile/internal/ssa/_gen/RISCV64.rules | 2 +- .../compile/internal/ssa/rewriteLOONG64.go | 24 +++++++++++++-- src/cmd/compile/internal/ssa/rewriteMIPS64.go | 24 +++++++++++++-- .../compile/internal/ssa/rewriteRISCV64.go | 24 +++++++++++++-- src/runtime/internal/atomic/atomic_test.go | 30 +++++++++++++++++++ 7 files changed, 101 insertions(+), 9 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 2810f0afe12..1caaf13600d 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -400,7 +400,8 @@ (AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) -(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) +(AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) +(AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) // checks (NilCheck ...) => (LoweredNilCheck ...) diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules index 17634afd729..a594df2b266 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules +++ b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules @@ -392,7 +392,8 @@ (AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) -(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) +(AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) +(AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) // checks (NilCheck ...) => (LoweredNilCheck ...) diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules index 78c3375e2d6..59f71be5baf 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules @@ -577,7 +577,7 @@ (AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) -(AtomicCompareAndSwap32 ...) => (LoweredAtomicCas32 ...) +(AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) (AtomicExchange32 ...) => (LoweredAtomicExchange32 ...) diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 26d6594fef3..f6da0b7ff0b 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -52,8 +52,7 @@ func rewriteValueLOONG64(v *Value) bool { v.Op = OpLOONG64LoweredAtomicAdd64 return true case OpAtomicCompareAndSwap32: - v.Op = OpLOONG64LoweredAtomicCas32 - return true + return rewriteValueLOONG64_OpAtomicCompareAndSwap32(v) case OpAtomicCompareAndSwap64: v.Op = OpLOONG64LoweredAtomicCas64 return true @@ -705,6 +704,27 @@ func rewriteValueLOONG64_OpAddr(v *Value) bool { return true } } +func rewriteValueLOONG64_OpAtomicCompareAndSwap32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicCompareAndSwap32 ptr old new mem) + // result: (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) + for { + ptr := v_0 + old := v_1 + new := v_2 + mem := v_3 + v.reset(OpLOONG64LoweredAtomicCas32) + v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) + v0.AddArg(old) + v.AddArg4(ptr, v0, new, mem) + return true + } +} func rewriteValueLOONG64_OpAvg64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go index 998b27dbb57..c0d42b55f51 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go @@ -52,8 +52,7 @@ func rewriteValueMIPS64(v *Value) bool { v.Op = OpMIPS64LoweredAtomicAdd64 return true case OpAtomicCompareAndSwap32: - v.Op = OpMIPS64LoweredAtomicCas32 - return true + return rewriteValueMIPS64_OpAtomicCompareAndSwap32(v) case OpAtomicCompareAndSwap64: v.Op = OpMIPS64LoweredAtomicCas64 return true @@ -697,6 +696,27 @@ func rewriteValueMIPS64_OpAddr(v *Value) bool { return true } } +func rewriteValueMIPS64_OpAtomicCompareAndSwap32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicCompareAndSwap32 ptr old new mem) + // result: (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) + for { + ptr := v_0 + old := v_1 + new := v_2 + mem := v_3 + v.reset(OpMIPS64LoweredAtomicCas32) + v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) + v0.AddArg(old) + v.AddArg4(ptr, v0, new, mem) + return true + } +} func rewriteValueMIPS64_OpAvg64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index f94e90f01ad..961230d8bb3 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -61,8 +61,7 @@ func rewriteValueRISCV64(v *Value) bool { case OpAtomicAnd8: return rewriteValueRISCV64_OpAtomicAnd8(v) case OpAtomicCompareAndSwap32: - v.Op = OpRISCV64LoweredAtomicCas32 - return true + return rewriteValueRISCV64_OpAtomicCompareAndSwap32(v) case OpAtomicCompareAndSwap64: v.Op = OpRISCV64LoweredAtomicCas64 return true @@ -776,6 +775,27 @@ func rewriteValueRISCV64_OpAtomicAnd8(v *Value) bool { return true } } +func rewriteValueRISCV64_OpAtomicCompareAndSwap32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicCompareAndSwap32 ptr old new mem) + // result: (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) + for { + ptr := v_0 + old := v_1 + new := v_2 + mem := v_3 + v.reset(OpRISCV64LoweredAtomicCas32) + v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) + v0.AddArg(old) + v.AddArg4(ptr, v0, new, mem) + return true + } +} func rewriteValueRISCV64_OpAtomicOr8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] diff --git a/src/runtime/internal/atomic/atomic_test.go b/src/runtime/internal/atomic/atomic_test.go index 2ae60b8507e..2427bfd211e 100644 --- a/src/runtime/internal/atomic/atomic_test.go +++ b/src/runtime/internal/atomic/atomic_test.go @@ -345,6 +345,36 @@ func TestBitwiseContended(t *testing.T) { } } +func TestCasRel(t *testing.T) { + const _magic = 0x5a5aa5a5 + var x struct { + before uint32 + i uint32 + after uint32 + o uint32 + n uint32 + } + + x.before = _magic + x.after = _magic + for j := 0; j < 32; j += 1 { + x.i = (1 << j) + 0 + x.o = (1 << j) + 0 + x.n = (1 << j) + 1 + if !atomic.CasRel(&x.i, x.o, x.n) { + t.Fatalf("should have swapped %#x %#x", x.o, x.n) + } + + if x.i != x.n { + t.Fatalf("wrong x.i after swap: x.i=%#x x.n=%#x", x.i, x.n) + } + + if x.before != _magic || x.after != _magic { + t.Fatalf("wrong magic: %#x _ %#x != %#x _ %#x", x.before, x.after, _magic, _magic) + } + } +} + func TestStorepNoWB(t *testing.T) { var p [2]*int for i := range p {