1
0
mirror of https://github.com/golang/go synced 2024-11-14 20:10:30 -07:00

cmd/compiler,internal/runtime/atomic: optimize xadd{32,64} on loong64

Use Loong64's atomic operation instruction AMADDDB{W,V} (full barrier)
to implement atomic.Xadd{32,64}

goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A5000 @ 2500.00MHz
          |  bench.old    |  bench.new                            |
          |  sec/op       |  sec/op          vs base              |
Xadd         27.24n ± 0%     12.01n ± 0%    -55.91% (p=0.000 n=20)
Xadd-2       31.93n ± 0%     25.55n ± 0%    -19.98% (p=0.000 n=20)
Xadd-4       31.90n ± 0%     24.80n ± 0%    -22.26% (p=0.000 n=20)
Xadd64       27.23n ± 0%     12.01n ± 0%    -55.89% (p=0.000 n=20)
Xadd64-2     31.93n ± 0%     25.57n ± 0%    -19.90% (p=0.000 n=20)
Xadd64-4     31.89n ± 0%     24.80n ± 0%    -22.23% (p=0.000 n=20)
geomean      30.27n          19.67n         -35.01%

goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A6000 @ 2500.00MHz
          |  bench.old    |  bench.new                           |
          |  sec/op       |  sec/op         vs base              |
Xadd         26.02n ± 0%     12.41n ± 0%   -52.31% (p=0.000 n=20)
Xadd-2       37.36n ± 0%     20.60n ± 0%   -44.86% (p=0.000 n=20)
Xadd-4       37.22n ± 0%     19.59n ± 0%   -47.37% (p=0.000 n=20)
Xadd64       26.42n ± 0%     12.41n ± 0%   -53.03% (p=0.000 n=20)
Xadd64-2     37.77n ± 0%     20.60n ± 0%   -45.46% (p=0.000 n=20)
Xadd64-4     37.78n ± 0%     19.59n ± 0%   -48.15% (p=0.000 n=20)
geomean      33.30n          17.11n        -48.62%

Change-Id: I982539c2aa04680e9dd11b099ba8d5f215bf9b32
Reviewed-on: https://go-review.googlesource.com/c/go/+/481937
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: WANG Xuerui <git@xen0n.name>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
This commit is contained in:
Guoqi Chen 2023-04-03 12:11:46 +08:00 committed by abner chenc
parent acad0c2e9a
commit 4f7af5d192
6 changed files with 29 additions and 201 deletions

View File

@ -694,92 +694,29 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
s.Prog(loong64.ADBAR)
case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64:
// DBAR
// LL (Rarg0), Rout
// ADDV Rarg1, Rout, Rtmp
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// DBAR
// ADDV Rarg1, Rout
ll := loong64.ALLV
sc := loong64.ASCV
// AMADDx Rarg1, (Rarg0), Rout
// ADDV Rarg1, Rout, Rout
amaddx := loong64.AAMADDDBV
addx := loong64.AADDV
if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 {
ll = loong64.ALL
sc = loong64.ASC
amaddx = loong64.AAMADDDBW
}
s.Prog(loong64.ADBAR)
p := s.Prog(ll)
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
p1 := s.Prog(loong64.AADDVU)
p := s.Prog(amaddx)
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
p.RegTo2 = v.Reg0()
p1 := s.Prog(addx)
p1.From.Type = obj.TYPE_REG
p1.From.Reg = v.Args[1].Reg()
p1.Reg = v.Reg0()
p1.To.Type = obj.TYPE_REG
p1.To.Reg = loong64.REGTMP
p2 := s.Prog(sc)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = loong64.REGTMP
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = v.Args[0].Reg()
p3 := s.Prog(loong64.ABEQ)
p3.From.Type = obj.TYPE_REG
p3.From.Reg = loong64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
s.Prog(loong64.ADBAR)
p4 := s.Prog(loong64.AADDVU)
p4.From.Type = obj.TYPE_REG
p4.From.Reg = v.Args[1].Reg()
p4.Reg = v.Reg0()
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Reg0()
case ssa.OpLOONG64LoweredAtomicAddconst32, ssa.OpLOONG64LoweredAtomicAddconst64:
// DBAR
// LL (Rarg0), Rout
// ADDV $auxint, Rout, Rtmp
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// DBAR
// ADDV $auxint, Rout
ll := loong64.ALLV
sc := loong64.ASCV
if v.Op == ssa.OpLOONG64LoweredAtomicAddconst32 {
ll = loong64.ALL
sc = loong64.ASC
}
s.Prog(loong64.ADBAR)
p := s.Prog(ll)
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
p1 := s.Prog(loong64.AADDVU)
p1.From.Type = obj.TYPE_CONST
p1.From.Offset = v.AuxInt
p1.Reg = v.Reg0()
p1.To.Type = obj.TYPE_REG
p1.To.Reg = loong64.REGTMP
p2 := s.Prog(sc)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = loong64.REGTMP
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = v.Args[0].Reg()
p3 := s.Prog(loong64.ABEQ)
p3.From.Type = obj.TYPE_REG
p3.From.Reg = loong64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
s.Prog(loong64.ADBAR)
p4 := s.Prog(loong64.AADDVU)
p4.From.Type = obj.TYPE_CONST
p4.From.Offset = v.AuxInt
p4.Reg = v.Reg0()
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Reg0()
p1.To.Reg = v.Reg0()
case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64:
// MOVV $0, Rout
// DBAR

View File

@ -506,9 +506,6 @@
&& is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(MOV(B|H|W|V)storezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(LoweredAtomicAdd32 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst32 [int32(c)] ptr mem)
(LoweredAtomicAdd64 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst64 [c] ptr mem)
// don't extend after proper load
(MOVBreg x:(MOVBload _ _)) => (MOVVreg x)
(MOVBUreg x:(MOVBUload _ _)) => (MOVVreg x)

View File

@ -448,18 +448,8 @@ func init() {
// atomic add.
// *arg0 += arg1. arg2=mem. returns <new content of *arg0, memory>.
// DBAR
// LL (Rarg0), Rout
// ADDV Rarg1, Rout, Rtmp
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// DBAR
// ADDV Rarg1, Rout
{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// *arg0 += auxint. arg1=mem. returns <new content of *arg0, memory>. auxint is 32-bit.
{name: "LoweredAtomicAddconst32", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int32", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicAddconst64", argLength: 2, reg: regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}, aux: "Int64", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
// atomic compare and swap.
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.

View File

@ -1908,8 +1908,6 @@ const (
OpLOONG64LoweredAtomicExchange64
OpLOONG64LoweredAtomicAdd32
OpLOONG64LoweredAtomicAdd64
OpLOONG64LoweredAtomicAddconst32
OpLOONG64LoweredAtomicAddconst64
OpLOONG64LoweredAtomicCas32
OpLOONG64LoweredAtomicCas64
OpLOONG64LoweredNilCheck
@ -25580,7 +25578,6 @@ var opcodeTable = [...]opInfo{
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
@ -25597,7 +25594,6 @@ var opcodeTable = [...]opInfo{
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
@ -25608,40 +25604,6 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredAtomicAddconst32",
auxType: auxInt32,
argLen: 2,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB
},
outputs: []outputInfo{
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
},
},
},
{
name: "LoweredAtomicAddconst64",
auxType: auxInt64,
argLen: 2,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB
},
outputs: []outputInfo{
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
},
},
},
{
name: "LoweredAtomicCas32",
argLen: 4,

View File

@ -256,10 +256,6 @@ func rewriteValueLOONG64(v *Value) bool {
return rewriteValueLOONG64_OpLOONG64DIVV(v)
case OpLOONG64DIVVU:
return rewriteValueLOONG64_OpLOONG64DIVVU(v)
case OpLOONG64LoweredAtomicAdd32:
return rewriteValueLOONG64_OpLOONG64LoweredAtomicAdd32(v)
case OpLOONG64LoweredAtomicAdd64:
return rewriteValueLOONG64_OpLOONG64LoweredAtomicAdd64(v)
case OpLOONG64MASKEQZ:
return rewriteValueLOONG64_OpLOONG64MASKEQZ(v)
case OpLOONG64MASKNEZ:
@ -1694,54 +1690,6 @@ func rewriteValueLOONG64_OpLOONG64DIVVU(v *Value) bool {
}
return false
}
func rewriteValueLOONG64_OpLOONG64LoweredAtomicAdd32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (LoweredAtomicAdd32 ptr (MOVVconst [c]) mem)
// cond: is32Bit(c)
// result: (LoweredAtomicAddconst32 [int32(c)] ptr mem)
for {
ptr := v_0
if v_1.Op != OpLOONG64MOVVconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mem := v_2
if !(is32Bit(c)) {
break
}
v.reset(OpLOONG64LoweredAtomicAddconst32)
v.AuxInt = int32ToAuxInt(int32(c))
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueLOONG64_OpLOONG64LoweredAtomicAdd64(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (LoweredAtomicAdd64 ptr (MOVVconst [c]) mem)
// cond: is32Bit(c)
// result: (LoweredAtomicAddconst64 [c] ptr mem)
for {
ptr := v_0
if v_1.Op != OpLOONG64MOVVconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mem := v_2
if !(is32Bit(c)) {
break
}
v.reset(OpLOONG64LoweredAtomicAddconst64)
v.AuxInt = int64ToAuxInt(c)
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueLOONG64_OpLOONG64MASKEQZ(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View File

@ -79,6 +79,9 @@ TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
TEXT ·Loadint64(SB), NOSPLIT, $0-16
JMP ·Load64(SB)
TEXT ·Xaddint32(SB),NOSPLIT,$0-20
JMP ·Xadd(SB)
TEXT ·Xaddint64(SB), NOSPLIT, $0-24
JMP ·Xadd64(SB)
@ -92,34 +95,25 @@ TEXT ·Xaddint64(SB), NOSPLIT, $0-24
TEXT ·Casp1(SB), NOSPLIT, $0-25
JMP ·Cas64(SB)
// uint32 xadd(uint32 volatile *ptr, int32 delta)
// uint32 Xadd(uint32 volatile *ptr, int32 delta)
// Atomically:
// *val += delta;
// return *val;
TEXT ·Xadd(SB), NOSPLIT, $0-20
MOVV ptr+0(FP), R4
MOVW delta+8(FP), R5
DBAR
LL (R4), R6
ADDU R6, R5, R7
MOVV R7, R6
SC R7, (R4)
BEQ R7, -4(PC)
MOVW R6, ret+16(FP)
DBAR
AMADDDBW R5, (R4), R6
ADDV R6, R5, R4
MOVW R4, ret+16(FP)
RET
// func Xadd64(ptr *uint64, delta int64) uint64
TEXT ·Xadd64(SB), NOSPLIT, $0-24
MOVV ptr+0(FP), R4
MOVV delta+8(FP), R5
DBAR
LLV (R4), R6
ADDVU R6, R5, R7
MOVV R7, R6
SCV R7, (R4)
BEQ R7, -4(PC)
MOVV R6, ret+16(FP)
DBAR
AMADDDBV R5, (R4), R6
ADDV R6, R5, R4
MOVV R4, ret+16(FP)
RET
TEXT ·Xchg(SB), NOSPLIT, $0-20