1
0
mirror of https://github.com/golang/go synced 2024-11-14 19:50:21 -07:00

cmd/compiler,internal/runtime/atomic: optimize xchg{32,64} on loong64

Use Loong64's atomic operation instruction AMSWAPDB{W,V} (full barrier)
to implement atomic.Xchg{32,64}

goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A5000 @ 2500.00MHz
           |  old.bench    |  new.bench                          |
           |  sec/op       |  sec/op        vs base              |
Xchg          26.44n ± 0%     12.01n ± 0%   -54.58% (p=0.000 n=20)
Xchg-2        30.10n ± 0%     25.58n ± 0%   -15.02% (p=0.000 n=20)
Xchg-4        30.06n ± 0%     24.82n ± 0%   -17.43% (p=0.000 n=20)
Xchg64        26.44n ± 0%     12.02n ± 0%   -54.54% (p=0.000 n=20)
Xchg64-2      30.10n ± 0%     25.57n ± 0%   -15.05% (p=0.000 n=20)
Xchg64-4      30.05n ± 0%     24.80n ± 0%   -17.47% (p=0.000 n=20)
geomean       28.81n          19.68n        -31.69%

goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A6000 @ 2500.00MHz
           |  old.bench    |  new.bench                          |
           |  sec/op       |  sec/op        vs base              |
Xchg          25.62n ± 0%     12.41n ± 0%  -51.56% (p=0.000 n=20)
Xchg-2        35.01n ± 0%     20.59n ± 0%  -41.19% (p=0.000 n=20)
Xchg-4        34.63n ± 0%     19.59n ± 0%  -43.42% (p=0.000 n=20)
Xchg64        25.62n ± 0%     12.41n ± 0%  -51.56% (p=0.000 n=20)
Xchg64-2      35.01n ± 0%     20.59n ± 0%  -41.19% (p=0.000 n=20)
Xchg64-4      34.67n ± 0%     19.59n ± 0%  -43.50% (p=0.000 n=20)
geomean       31.44n          17.11n       -45.59%

Updates #59120.

Change-Id: Ied74fc20338b63799c6d6eeb122c31b42cff0f7e
Reviewed-on: https://go-review.googlesource.com/c/go/+/481578
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: WANG Xuerui <git@xen0n.name>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
This commit is contained in:
Guoqi Chen 2023-04-01 08:49:58 +08:00 committed by abner chenc
parent 5123f38e05
commit 72a92ab5b7
4 changed files with 23 additions and 56 deletions

View File

@ -693,40 +693,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.RegTo2 = loong64.REGZERO p.RegTo2 = loong64.REGZERO
case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64: case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
// DBAR // AMSWAPx Rarg1, (Rarg0), Rout
// MOVV Rarg1, Rtmp amswapx := loong64.AAMSWAPDBV
// LL (Rarg0), Rout
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// DBAR
ll := loong64.ALLV
sc := loong64.ASCV
if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 { if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
ll = loong64.ALL amswapx = loong64.AAMSWAPDBW
sc = loong64.ASC
} }
s.Prog(loong64.ADBAR) p := s.Prog(amswapx)
p := s.Prog(loong64.AMOVV)
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg() p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_MEM
p.To.Reg = loong64.REGTMP p.To.Reg = v.Args[0].Reg()
p1 := s.Prog(ll) p.RegTo2 = v.Reg0()
p1.From.Type = obj.TYPE_MEM
p1.From.Reg = v.Args[0].Reg()
p1.To.Type = obj.TYPE_REG
p1.To.Reg = v.Reg0()
p2 := s.Prog(sc)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = loong64.REGTMP
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = v.Args[0].Reg()
p3 := s.Prog(loong64.ABEQ)
p3.From.Type = obj.TYPE_REG
p3.From.Reg = loong64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
s.Prog(loong64.ADBAR)
case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
// AMADDx Rarg1, (Rarg0), Rout // AMADDx Rarg1, (Rarg0), Rout

View File

@ -451,14 +451,8 @@ func init() {
// atomic exchange. // atomic exchange.
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>. // store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>.
// DBAR {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
// LL (Rarg0), Rout {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
// MOVV Rarg1, Rtmp
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// DBAR
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// atomic add. // atomic add.
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>. // *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>.

View File

@ -25709,7 +25709,6 @@ var opcodeTable = [...]opInfo{
resultNotInArgs: true, resultNotInArgs: true,
faultOnNilArg0: true, faultOnNilArg0: true,
hasSideEffects: true, hasSideEffects: true,
unsafePoint: true,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
@ -25726,7 +25725,6 @@ var opcodeTable = [...]opInfo{
resultNotInArgs: true, resultNotInArgs: true,
faultOnNilArg0: true, faultOnNilArg0: true,
hasSideEffects: true, hasSideEffects: true,
unsafePoint: true,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31

View File

@ -116,35 +116,33 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24
MOVV R4, ret+16(FP) MOVV R4, ret+16(FP)
RET RET
// func Xchg(ptr *uint32, new uint32) uint32
TEXT ·Xchg(SB), NOSPLIT, $0-20 TEXT ·Xchg(SB), NOSPLIT, $0-20
MOVV ptr+0(FP), R4 MOVV ptr+0(FP), R4
MOVW new+8(FP), R5 MOVW new+8(FP), R5
AMSWAPDBW R5, (R4), R6
DBAR MOVW R6, ret+16(FP)
MOVV R5, R6
LL (R4), R7
SC R6, (R4)
BEQ R6, -3(PC)
MOVW R7, ret+16(FP)
DBAR
RET RET
// func Xchg64(ptr *uint64, new uint64) uint64
TEXT ·Xchg64(SB), NOSPLIT, $0-24 TEXT ·Xchg64(SB), NOSPLIT, $0-24
MOVV ptr+0(FP), R4 MOVV ptr+0(FP), R4
MOVV new+8(FP), R5 MOVV new+8(FP), R5
AMSWAPDBV R5, (R4), R6
DBAR MOVV R6, ret+16(FP)
MOVV R5, R6
LLV (R4), R7
SCV R6, (R4)
BEQ R6, -3(PC)
MOVV R7, ret+16(FP)
DBAR
RET RET
TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
JMP ·Xchg64(SB) JMP ·Xchg64(SB)
// func Xchgint32(ptr *int32, new int32) int32
TEXT ·Xchgint32(SB), NOSPLIT, $0-20
JMP ·Xchg(SB)
// func Xchgint64(ptr *int64, new int64) int64
TEXT ·Xchgint64(SB), NOSPLIT, $0-24
JMP ·Xchg64(SB)
TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
JMP ·Store64(SB) JMP ·Store64(SB)