mirror of
https://github.com/golang/go
synced 2024-11-14 17:40:28 -07:00
cmd/compile/internal: intrinsify publicationBarrier on loong64
The publication barrier is a StoreStore barrier, which is implemented by "DBAR 0x1A" [1] on loong64. goos: linux goarch: loong64 pkg: runtime cpu: Loongson-3A6000 @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | Malloc8 31.76n ± 0% 22.79n ± 0% -28.24% (p=0.000 n=20) Malloc8-2 25.46n ± 0% 18.33n ± 0% -28.00% (p=0.000 n=20) Malloc8-4 25.75n ± 0% 18.43n ± 0% -28.41% (p=0.000 n=20) Malloc16 62.97n ± 0% 42.41n ± 0% -32.65% (p=0.000 n=20) Malloc16-2 49.11n ± 0% 31.68n ± 0% -35.50% (p=0.000 n=20) Malloc16-4 49.64n ± 1% 31.95n ± 0% -35.62% (p=0.000 n=20) MallocTypeInfo8 58.57n ± 0% 46.51n ± 0% -20.61% (p=0.000 n=20) MallocTypeInfo8-2 51.43n ± 0% 38.01n ± 0% -26.09% (p=0.000 n=20) MallocTypeInfo8-4 51.65n ± 0% 38.15n ± 0% -26.13% (p=0.000 n=20) MallocTypeInfo16 68.07n ± 0% 51.62n ± 0% -24.17% (p=0.000 n=20) MallocTypeInfo16-2 54.73n ± 0% 41.13n ± 0% -24.85% (p=0.000 n=20) MallocTypeInfo16-4 55.05n ± 0% 41.28n ± 0% -25.02% (p=0.000 n=20) MallocLargeStruct 491.5n ± 0% 454.8n ± 0% -7.47% (p=0.000 n=20) MallocLargeStruct-2 351.8n ± 1% 323.8n ± 0% -7.94% (p=0.000 n=20) MallocLargeStruct-4 333.6n ± 0% 316.7n ± 0% -5.10% (p=0.000 n=20) geomean 71.01n 53.78n -24.26% [1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html Change-Id: Ica0c89db6f2bebd55d9b3207a1c462a9454e9268 Reviewed-on: https://go-review.googlesource.com/c/go/+/577515 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn> Reviewed-by: Meidan Li <limeidan@loongson.cn> Reviewed-by: Carlos Amedee <carlos@golang.org>
This commit is contained in:
parent
4f7af5d192
commit
e534989d18
@ -589,6 +589,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
p.To.Name = obj.NAME_EXTERN
|
||||
// AuxInt encodes how many buffer entries we need.
|
||||
p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
|
||||
|
||||
case ssa.OpLOONG64LoweredPubBarrier:
|
||||
// DBAR 0x1A
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = 0x1A
|
||||
|
||||
case ssa.OpLOONG64LoweredPanicBoundsA, ssa.OpLOONG64LoweredPanicBoundsB, ssa.OpLOONG64LoweredPanicBoundsC:
|
||||
p := s.Prog(obj.ACALL)
|
||||
p.To.Type = obj.TYPE_MEM
|
||||
|
@ -464,6 +464,9 @@
|
||||
// Write barrier.
|
||||
(WB ...) => (LoweredWB ...)
|
||||
|
||||
// Publication barrier as intrinsic
|
||||
(PubBarrier ...) => (LoweredPubBarrier ...)
|
||||
|
||||
(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
|
||||
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
|
||||
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
|
||||
|
@ -497,6 +497,9 @@ func init() {
|
||||
// Returns a pointer to a write barrier buffer in R29.
|
||||
{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R1"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},
|
||||
|
||||
// Do data barrier. arg0=memorys
|
||||
{name: "LoweredPubBarrier", argLength: 1, asm: "DBAR", hasSideEffects: true},
|
||||
|
||||
// There are three of these functions so that they can have three different register inputs.
|
||||
// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
|
||||
// default registers to match so we don't need to copy registers around unnecessarily.
|
||||
|
@ -1917,6 +1917,7 @@ const (
|
||||
OpLOONG64LoweredGetCallerSP
|
||||
OpLOONG64LoweredGetCallerPC
|
||||
OpLOONG64LoweredWB
|
||||
OpLOONG64LoweredPubBarrier
|
||||
OpLOONG64LoweredPanicBoundsA
|
||||
OpLOONG64LoweredPanicBoundsB
|
||||
OpLOONG64LoweredPanicBoundsC
|
||||
@ -25711,6 +25712,13 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredPubBarrier",
|
||||
argLen: 1,
|
||||
hasSideEffects: true,
|
||||
asm: loong64.ADBAR,
|
||||
reg: regInfo{},
|
||||
},
|
||||
{
|
||||
name: "LoweredPanicBoundsA",
|
||||
auxType: auxInt64,
|
||||
|
@ -584,6 +584,9 @@ func rewriteValueLOONG64(v *Value) bool {
|
||||
return true
|
||||
case OpPanicBounds:
|
||||
return rewriteValueLOONG64_OpPanicBounds(v)
|
||||
case OpPubBarrier:
|
||||
v.Op = OpLOONG64LoweredPubBarrier
|
||||
return true
|
||||
case OpRotateLeft16:
|
||||
return rewriteValueLOONG64_OpRotateLeft16(v)
|
||||
case OpRotateLeft32:
|
||||
|
@ -162,7 +162,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
||||
s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
|
||||
return nil
|
||||
},
|
||||
sys.ARM64, sys.PPC64, sys.RISCV64)
|
||||
sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64)
|
||||
|
||||
/******** internal/runtime/sys ********/
|
||||
add("internal/runtime/sys", "GetCallerPC",
|
||||
|
@ -419,6 +419,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
||||
{"loong64", "math/bits", "Sub"}: struct{}{},
|
||||
{"loong64", "math/bits", "Sub64"}: struct{}{},
|
||||
{"loong64", "runtime", "KeepAlive"}: struct{}{},
|
||||
{"loong64", "runtime", "publicationBarrier"}: struct{}{},
|
||||
{"loong64", "runtime", "slicebytetostringtmp"}: struct{}{},
|
||||
{"loong64", "sync", "runtime_LoadAcquintptr"}: struct{}{},
|
||||
{"loong64", "sync", "runtime_StoreReluintptr"}: struct{}{},
|
||||
|
@ -5,5 +5,5 @@
|
||||
#include "textflag.h"
|
||||
|
||||
TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
|
||||
DBAR
|
||||
DBAR $0x1A // StoreStore barrier
|
||||
RET
|
||||
|
Loading…
Reference in New Issue
Block a user