mirror of
https://github.com/golang/go
synced 2024-11-14 19:50:21 -07:00
cmd/compiler,internal/runtime/atomic: optimize xchg{32,64} on loong64
Use Loong64's atomic operation instruction AMSWAPDB{W,V} (full barrier) to implement atomic.Xchg{32,64} goos: linux goarch: loong64 pkg: internal/runtime/atomic cpu: Loongson-3A5000 @ 2500.00MHz | old.bench | new.bench | | sec/op | sec/op vs base | Xchg 26.44n ± 0% 12.01n ± 0% -54.58% (p=0.000 n=20) Xchg-2 30.10n ± 0% 25.58n ± 0% -15.02% (p=0.000 n=20) Xchg-4 30.06n ± 0% 24.82n ± 0% -17.43% (p=0.000 n=20) Xchg64 26.44n ± 0% 12.02n ± 0% -54.54% (p=0.000 n=20) Xchg64-2 30.10n ± 0% 25.57n ± 0% -15.05% (p=0.000 n=20) Xchg64-4 30.05n ± 0% 24.80n ± 0% -17.47% (p=0.000 n=20) geomean 28.81n 19.68n -31.69% goos: linux goarch: loong64 pkg: internal/runtime/atomic cpu: Loongson-3A6000 @ 2500.00MHz | old.bench | new.bench | | sec/op | sec/op vs base | Xchg 25.62n ± 0% 12.41n ± 0% -51.56% (p=0.000 n=20) Xchg-2 35.01n ± 0% 20.59n ± 0% -41.19% (p=0.000 n=20) Xchg-4 34.63n ± 0% 19.59n ± 0% -43.42% (p=0.000 n=20) Xchg64 25.62n ± 0% 12.41n ± 0% -51.56% (p=0.000 n=20) Xchg64-2 35.01n ± 0% 20.59n ± 0% -41.19% (p=0.000 n=20) Xchg64-4 34.67n ± 0% 19.59n ± 0% -43.50% (p=0.000 n=20) geomean 31.44n 17.11n -45.59% Updates #59120. Change-Id: Ied74fc20338b63799c6d6eeb122c31b42cff0f7e Reviewed-on: https://go-review.googlesource.com/c/go/+/481578 Reviewed-by: Meidan Li <limeidan@loongson.cn> Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: WANG Xuerui <git@xen0n.name> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
This commit is contained in:
parent
5123f38e05
commit
72a92ab5b7
@ -693,40 +693,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||||||
p.RegTo2 = loong64.REGZERO
|
p.RegTo2 = loong64.REGZERO
|
||||||
|
|
||||||
case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
|
case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64:
|
||||||
// DBAR
|
// AMSWAPx Rarg1, (Rarg0), Rout
|
||||||
// MOVV Rarg1, Rtmp
|
amswapx := loong64.AAMSWAPDBV
|
||||||
// LL (Rarg0), Rout
|
|
||||||
// SC Rtmp, (Rarg0)
|
|
||||||
// BEQ Rtmp, -3(PC)
|
|
||||||
// DBAR
|
|
||||||
ll := loong64.ALLV
|
|
||||||
sc := loong64.ASCV
|
|
||||||
if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
|
if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 {
|
||||||
ll = loong64.ALL
|
amswapx = loong64.AAMSWAPDBW
|
||||||
sc = loong64.ASC
|
|
||||||
}
|
}
|
||||||
s.Prog(loong64.ADBAR)
|
p := s.Prog(amswapx)
|
||||||
p := s.Prog(loong64.AMOVV)
|
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
p.From.Reg = v.Args[1].Reg()
|
p.From.Reg = v.Args[1].Reg()
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_MEM
|
||||||
p.To.Reg = loong64.REGTMP
|
p.To.Reg = v.Args[0].Reg()
|
||||||
p1 := s.Prog(ll)
|
p.RegTo2 = v.Reg0()
|
||||||
p1.From.Type = obj.TYPE_MEM
|
|
||||||
p1.From.Reg = v.Args[0].Reg()
|
|
||||||
p1.To.Type = obj.TYPE_REG
|
|
||||||
p1.To.Reg = v.Reg0()
|
|
||||||
p2 := s.Prog(sc)
|
|
||||||
p2.From.Type = obj.TYPE_REG
|
|
||||||
p2.From.Reg = loong64.REGTMP
|
|
||||||
p2.To.Type = obj.TYPE_MEM
|
|
||||||
p2.To.Reg = v.Args[0].Reg()
|
|
||||||
p3 := s.Prog(loong64.ABEQ)
|
|
||||||
p3.From.Type = obj.TYPE_REG
|
|
||||||
p3.From.Reg = loong64.REGTMP
|
|
||||||
p3.To.Type = obj.TYPE_BRANCH
|
|
||||||
p3.To.SetTarget(p)
|
|
||||||
s.Prog(loong64.ADBAR)
|
|
||||||
|
|
||||||
case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
|
case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
|
||||||
// AMADDx Rarg1, (Rarg0), Rout
|
// AMADDx Rarg1, (Rarg0), Rout
|
||||||
|
@ -451,14 +451,8 @@ func init() {
|
|||||||
|
|
||||||
// atomic exchange.
|
// atomic exchange.
|
||||||
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>.
|
// store arg1 to arg0. arg2=mem. returns <old content of *arg0, memory>.
|
||||||
// DBAR
|
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
|
||||||
// LL (Rarg0), Rout
|
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
|
||||||
// MOVV Rarg1, Rtmp
|
|
||||||
// SC Rtmp, (Rarg0)
|
|
||||||
// BEQ Rtmp, -3(PC)
|
|
||||||
// DBAR
|
|
||||||
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
|
||||||
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
|
|
||||||
|
|
||||||
// atomic add.
|
// atomic add.
|
||||||
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>.
|
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>.
|
||||||
|
@ -25709,7 +25709,6 @@ var opcodeTable = [...]opInfo{
|
|||||||
resultNotInArgs: true,
|
resultNotInArgs: true,
|
||||||
faultOnNilArg0: true,
|
faultOnNilArg0: true,
|
||||||
hasSideEffects: true,
|
hasSideEffects: true,
|
||||||
unsafePoint: true,
|
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||||
@ -25726,7 +25725,6 @@ var opcodeTable = [...]opInfo{
|
|||||||
resultNotInArgs: true,
|
resultNotInArgs: true,
|
||||||
faultOnNilArg0: true,
|
faultOnNilArg0: true,
|
||||||
hasSideEffects: true,
|
hasSideEffects: true,
|
||||||
unsafePoint: true,
|
|
||||||
reg: regInfo{
|
reg: regInfo{
|
||||||
inputs: []inputInfo{
|
inputs: []inputInfo{
|
||||||
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
|
||||||
|
@ -116,35 +116,33 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24
|
|||||||
MOVV R4, ret+16(FP)
|
MOVV R4, ret+16(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
// func Xchg(ptr *uint32, new uint32) uint32
|
||||||
TEXT ·Xchg(SB), NOSPLIT, $0-20
|
TEXT ·Xchg(SB), NOSPLIT, $0-20
|
||||||
MOVV ptr+0(FP), R4
|
MOVV ptr+0(FP), R4
|
||||||
MOVW new+8(FP), R5
|
MOVW new+8(FP), R5
|
||||||
|
AMSWAPDBW R5, (R4), R6
|
||||||
DBAR
|
MOVW R6, ret+16(FP)
|
||||||
MOVV R5, R6
|
|
||||||
LL (R4), R7
|
|
||||||
SC R6, (R4)
|
|
||||||
BEQ R6, -3(PC)
|
|
||||||
MOVW R7, ret+16(FP)
|
|
||||||
DBAR
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
// func Xchg64(ptr *uint64, new uint64) uint64
|
||||||
TEXT ·Xchg64(SB), NOSPLIT, $0-24
|
TEXT ·Xchg64(SB), NOSPLIT, $0-24
|
||||||
MOVV ptr+0(FP), R4
|
MOVV ptr+0(FP), R4
|
||||||
MOVV new+8(FP), R5
|
MOVV new+8(FP), R5
|
||||||
|
AMSWAPDBV R5, (R4), R6
|
||||||
DBAR
|
MOVV R6, ret+16(FP)
|
||||||
MOVV R5, R6
|
|
||||||
LLV (R4), R7
|
|
||||||
SCV R6, (R4)
|
|
||||||
BEQ R6, -3(PC)
|
|
||||||
MOVV R7, ret+16(FP)
|
|
||||||
DBAR
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
|
TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
|
||||||
JMP ·Xchg64(SB)
|
JMP ·Xchg64(SB)
|
||||||
|
|
||||||
|
// func Xchgint32(ptr *int32, new int32) int32
|
||||||
|
TEXT ·Xchgint32(SB), NOSPLIT, $0-20
|
||||||
|
JMP ·Xchg(SB)
|
||||||
|
|
||||||
|
// func Xchgint64(ptr *int64, new int64) int64
|
||||||
|
TEXT ·Xchgint64(SB), NOSPLIT, $0-24
|
||||||
|
JMP ·Xchg64(SB)
|
||||||
|
|
||||||
TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
|
TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
|
||||||
JMP ·Store64(SB)
|
JMP ·Store64(SB)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user