1
0
mirror of https://github.com/golang/go synced 2024-11-23 08:40:08 -07:00

cmd/compile: intrinsify runtime/internal/atomic.{And,Or} on MIPS64x

This CL intrinsify atomic{And,Or} on mips64x, which already implemented on mipsx.

goos: linux
goarch: mips64le
pkg: runtime/internal/atomic
                _  oldatomic  _             newatomic              _
                _   sec/op    _   sec/op     vs base               _
AtomicLoad64-4    27.96n _ 0%   28.02n _ 0%   +0.20% (p=0.026 n=8)
AtomicStore64-4   29.14n _ 0%   29.21n _ 0%   +0.22% (p=0.004 n=8)
AtomicLoad-4      27.96n _ 0%   28.02n _ 0%        ~ (p=0.220 n=8)
AtomicStore-4     29.15n _ 0%   29.21n _ 0%   +0.19% (p=0.002 n=8)
And8-4            53.09n _ 0%   41.71n _ 0%  -21.44% (p=0.000 n=8)
And-4             49.87n _ 0%   39.93n _ 0%  -19.93% (p=0.000 n=8)
And8Parallel-4    70.45n _ 0%   68.58n _ 0%   -2.65% (p=0.000 n=8)
AndParallel-4     70.40n _ 0%   67.95n _ 0%   -3.47% (p=0.000 n=8)
Or8-4             52.09n _ 0%   41.11n _ 0%  -21.08% (p=0.000 n=8)
Or-4              49.80n _ 0%   39.87n _ 0%  -19.93% (p=0.000 n=8)
Or8Parallel-4     70.43n _ 0%   68.25n _ 0%   -3.08% (p=0.000 n=8)
OrParallel-4      70.42n _ 0%   67.94n _ 0%   -3.51% (p=0.000 n=8)
Xadd-4            67.83n _ 0%   67.92n _ 0%   +0.13% (p=0.003 n=8)
Xadd64-4          67.85n _ 0%   67.92n _ 0%   +0.09% (p=0.021 n=8)
Cas-4             81.34n _ 0%   81.37n _ 0%        ~ (p=0.859 n=8)
Cas64-4           81.43n _ 0%   81.53n _ 0%   +0.13% (p=0.001 n=8)
Xchg-4            67.15n _ 0%   67.18n _ 0%        ~ (p=0.367 n=8)
Xchg64-4          67.16n _ 0%   67.21n _ 0%   +0.08% (p=0.008 n=8)
geomean           54.04n        51.01n        -5.61%

Change-Id: I9a4353f4b14134f1e9cf0dcf99db3feb951328ed
Reviewed-on: https://go-review.googlesource.com/c/go/+/494875
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: Joel Sing <joel@sing.id.au>
Reviewed-by: Junxian Zhu <zhujunxian@oss.cipunited.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Junxian Zhu 2023-05-15 14:06:02 +08:00 committed by Joel Sing
parent 774f60223f
commit 75add1ce0e
6 changed files with 286 additions and 4 deletions

View File

@ -672,6 +672,43 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p4.Reg = v.Reg0()
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Reg0()
case ssa.OpMIPS64LoweredAtomicAnd32,
ssa.OpMIPS64LoweredAtomicOr32:
// SYNC
// LL (Rarg0), Rtmp
// AND/OR Rarg1, Rtmp
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// SYNC
s.Prog(mips.ASYNC)
p := s.Prog(mips.ALL)
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = mips.REGTMP
p1 := s.Prog(v.Op.Asm())
p1.From.Type = obj.TYPE_REG
p1.From.Reg = v.Args[1].Reg()
p1.Reg = mips.REGTMP
p1.To.Type = obj.TYPE_REG
p1.To.Reg = mips.REGTMP
p2 := s.Prog(mips.ASC)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = mips.REGTMP
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = v.Args[0].Reg()
p3 := s.Prog(mips.ABEQ)
p3.From.Type = obj.TYPE_REG
p3.From.Reg = mips.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
s.Prog(mips.ASYNC)
case ssa.OpMIPS64LoweredAtomicCas32, ssa.OpMIPS64LoweredAtomicCas64:
// MOVV $0, Rout
// SYNC

View File

@ -399,6 +399,46 @@
(AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem)
(AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...)
// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << ((ptr & 3) * 8))
(AtomicOr8 ptr val mem) && !config.BigEndian =>
(LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
(SLLV <typ.UInt32> (ZeroExt8to32 val)
(SLLVconst <typ.UInt64> [3]
(ANDconst <typ.UInt64> [3] ptr))) mem)
// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << ((ptr & 3) * 8)) | ^(uint32(0xFF) << ((ptr & 3) * 8))))
(AtomicAnd8 ptr val mem) && !config.BigEndian =>
(LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
(OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val)
(SLLVconst <typ.UInt64> [3]
(ANDconst <typ.UInt64> [3] ptr)))
(NORconst [0] <typ.UInt64> (SLLV <typ.UInt64>
(MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3]
(ANDconst <typ.UInt64> [3] ptr))))) mem)
// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << (((ptr^3) & 3) * 8))
(AtomicOr8 ptr val mem) && config.BigEndian =>
(LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
(SLLV <typ.UInt32> (ZeroExt8to32 val)
(SLLVconst <typ.UInt64> [3]
(ANDconst <typ.UInt64> [3]
(XORconst <typ.UInt64> [3] ptr)))) mem)
// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << (((ptr^3) & 3) * 8)) | ^(uint32(0xFF) << (((ptr^3) & 3) * 8))))
(AtomicAnd8 ptr val mem) && config.BigEndian =>
(LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr)
(OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val)
(SLLVconst <typ.UInt64> [3]
(ANDconst <typ.UInt64> [3]
(XORconst <typ.UInt64> [3] ptr))))
(NORconst [0] <typ.UInt64> (SLLV <typ.UInt64>
(MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3]
(ANDconst <typ.UInt64> [3]
(XORconst <typ.UInt64> [3] ptr)))))) mem)
(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...)
(AtomicOr32 ...) => (LoweredAtomicOr32 ...)
// checks
(NilCheck ...) => (LoweredNilCheck ...)
(IsNonNil ptr) => (SGTU ptr (MOVVconst [0]))

View File

@ -361,6 +361,17 @@ func init() {
faultOnNilArg1: true,
},
// atomic and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns memory.
// SYNC
// LL (Rarg0), Rtmp
// AND Rarg1, Rtmp
// SC Rtmp, (Rarg0)
// BEQ Rtmp, -3(PC)
// SYNC
{name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// atomic loads.
// load from arg0. arg1=mem.
// returns <value,memory> so they can be properly ordered with other loads.

View File

@ -2053,6 +2053,8 @@ const (
OpMIPS64DUFFCOPY
OpMIPS64LoweredZero
OpMIPS64LoweredMove
OpMIPS64LoweredAtomicAnd32
OpMIPS64LoweredAtomicOr32
OpMIPS64LoweredAtomicLoad8
OpMIPS64LoweredAtomicLoad32
OpMIPS64LoweredAtomicLoad64
@ -27484,6 +27486,34 @@ var opcodeTable = [...]opInfo{
clobbers: 6, // R1 R2
},
},
{
name: "LoweredAtomicAnd32",
argLen: 3,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
asm: mips.AAND,
reg: regInfo{
inputs: []inputInfo{
{1, 234881022}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 g R31
{0, 4611686018695823358}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 SP g R31 SB
},
},
},
{
name: "LoweredAtomicOr32",
argLen: 3,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
asm: mips.AOR,
reg: regInfo{
inputs: []inputInfo{
{1, 234881022}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 g R31
{0, 4611686018695823358}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 SP g R31 SB
},
},
},
{
name: "LoweredAtomicLoad8",
argLen: 2,

View File

@ -53,6 +53,11 @@ func rewriteValueMIPS64(v *Value) bool {
case OpAtomicAdd64:
v.Op = OpMIPS64LoweredAtomicAdd64
return true
case OpAtomicAnd32:
v.Op = OpMIPS64LoweredAtomicAnd32
return true
case OpAtomicAnd8:
return rewriteValueMIPS64_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
return rewriteValueMIPS64_OpAtomicCompareAndSwap32(v)
case OpAtomicCompareAndSwap64:
@ -76,6 +81,11 @@ func rewriteValueMIPS64(v *Value) bool {
case OpAtomicLoadPtr:
v.Op = OpMIPS64LoweredAtomicLoad64
return true
case OpAtomicOr32:
v.Op = OpMIPS64LoweredAtomicOr32
return true
case OpAtomicOr8:
return rewriteValueMIPS64_OpAtomicOr8(v)
case OpAtomicStore32:
v.Op = OpMIPS64LoweredAtomicStore32
return true
@ -698,6 +708,92 @@ func rewriteValueMIPS64_OpAddr(v *Value) bool {
return true
}
}
func rewriteValueMIPS64_OpAtomicAnd8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
typ := &b.Func.Config.Types
// match: (AtomicAnd8 ptr val mem)
// cond: !config.BigEndian
// result: (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))) (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64> (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))))) mem)
for {
ptr := v_0
val := v_1
mem := v_2
if !(!config.BigEndian) {
break
}
v.reset(OpMIPS64LoweredAtomicAnd32)
v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
v1.AuxInt = int64ToAuxInt(^3)
v0.AddArg2(v1, ptr)
v2 := b.NewValue0(v.Pos, OpMIPS64OR, typ.UInt64)
v3 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v4.AddArg(val)
v5 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
v5.AuxInt = int64ToAuxInt(3)
v6 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
v6.AuxInt = int64ToAuxInt(3)
v6.AddArg(ptr)
v5.AddArg(v6)
v3.AddArg2(v4, v5)
v7 := b.NewValue0(v.Pos, OpMIPS64NORconst, typ.UInt64)
v7.AuxInt = int64ToAuxInt(0)
v8 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt64)
v9 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
v9.AuxInt = int64ToAuxInt(0xff)
v8.AddArg2(v9, v5)
v7.AddArg(v8)
v2.AddArg2(v3, v7)
v.AddArg3(v0, v2, mem)
return true
}
// match: (AtomicAnd8 ptr val mem)
// cond: config.BigEndian
// result: (LoweredAtomicAnd32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (OR <typ.UInt64> (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))) (NORconst [0] <typ.UInt64> (SLLV <typ.UInt64> (MOVVconst [0xff]) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))))) mem)
for {
ptr := v_0
val := v_1
mem := v_2
if !(config.BigEndian) {
break
}
v.reset(OpMIPS64LoweredAtomicAnd32)
v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
v1.AuxInt = int64ToAuxInt(^3)
v0.AddArg2(v1, ptr)
v2 := b.NewValue0(v.Pos, OpMIPS64OR, typ.UInt64)
v3 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v4.AddArg(val)
v5 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
v5.AuxInt = int64ToAuxInt(3)
v6 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
v6.AuxInt = int64ToAuxInt(3)
v7 := b.NewValue0(v.Pos, OpMIPS64XORconst, typ.UInt64)
v7.AuxInt = int64ToAuxInt(3)
v7.AddArg(ptr)
v6.AddArg(v7)
v5.AddArg(v6)
v3.AddArg2(v4, v5)
v8 := b.NewValue0(v.Pos, OpMIPS64NORconst, typ.UInt64)
v8.AuxInt = int64ToAuxInt(0)
v9 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt64)
v10 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
v10.AuxInt = int64ToAuxInt(0xff)
v9.AddArg2(v10, v5)
v8.AddArg(v9)
v2.AddArg2(v3, v8)
v.AddArg3(v0, v2, mem)
return true
}
return false
}
func rewriteValueMIPS64_OpAtomicCompareAndSwap32(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
@ -719,6 +815,74 @@ func rewriteValueMIPS64_OpAtomicCompareAndSwap32(v *Value) bool {
return true
}
}
func rewriteValueMIPS64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
typ := &b.Func.Config.Types
// match: (AtomicOr8 ptr val mem)
// cond: !config.BigEndian
// result: (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] ptr))) mem)
for {
ptr := v_0
val := v_1
mem := v_2
if !(!config.BigEndian) {
break
}
v.reset(OpMIPS64LoweredAtomicOr32)
v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
v1.AuxInt = int64ToAuxInt(^3)
v0.AddArg2(v1, ptr)
v2 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v3.AddArg(val)
v4 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
v4.AuxInt = int64ToAuxInt(3)
v5 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
v5.AuxInt = int64ToAuxInt(3)
v5.AddArg(ptr)
v4.AddArg(v5)
v2.AddArg2(v3, v4)
v.AddArg3(v0, v2, mem)
return true
}
// match: (AtomicOr8 ptr val mem)
// cond: config.BigEndian
// result: (LoweredAtomicOr32 (AND <typ.UInt32Ptr> (MOVVconst [^3]) ptr) (SLLV <typ.UInt32> (ZeroExt8to32 val) (SLLVconst <typ.UInt64> [3] (ANDconst <typ.UInt64> [3] (XORconst <typ.UInt64> [3] ptr)))) mem)
for {
ptr := v_0
val := v_1
mem := v_2
if !(config.BigEndian) {
break
}
v.reset(OpMIPS64LoweredAtomicOr32)
v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr)
v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64)
v1.AuxInt = int64ToAuxInt(^3)
v0.AddArg2(v1, ptr)
v2 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32)
v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
v3.AddArg(val)
v4 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64)
v4.AuxInt = int64ToAuxInt(3)
v5 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64)
v5.AuxInt = int64ToAuxInt(3)
v6 := b.NewValue0(v.Pos, OpMIPS64XORconst, typ.UInt64)
v6.AuxInt = int64ToAuxInt(3)
v6.AddArg(ptr)
v5.AddArg(v6)
v4.AddArg(v5)
v2.AddArg2(v3, v4)
v.AddArg3(v0, v2, mem)
return true
}
return false
}
func rewriteValueMIPS64_OpAvg64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View File

@ -4250,25 +4250,25 @@ func InitTables() {
s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "And",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "Or8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("runtime/internal/atomic", "Or",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X)
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) {
s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())