From 75add1ce0e4424f2a72cd40835bafeafd80641d1 Mon Sep 17 00:00:00 2001 From: Junxian Zhu Date: Mon, 15 May 2023 14:06:02 +0800 Subject: [PATCH] cmd/compile: intrinsify runtime/internal/atomic.{And,Or} on MIPS64x This CL intrinsify atomic{And,Or} on mips64x, which already implemented on mipsx. goos: linux goarch: mips64le pkg: runtime/internal/atomic _ oldatomic _ newatomic _ _ sec/op _ sec/op vs base _ AtomicLoad64-4 27.96n _ 0% 28.02n _ 0% +0.20% (p=0.026 n=8) AtomicStore64-4 29.14n _ 0% 29.21n _ 0% +0.22% (p=0.004 n=8) AtomicLoad-4 27.96n _ 0% 28.02n _ 0% ~ (p=0.220 n=8) AtomicStore-4 29.15n _ 0% 29.21n _ 0% +0.19% (p=0.002 n=8) And8-4 53.09n _ 0% 41.71n _ 0% -21.44% (p=0.000 n=8) And-4 49.87n _ 0% 39.93n _ 0% -19.93% (p=0.000 n=8) And8Parallel-4 70.45n _ 0% 68.58n _ 0% -2.65% (p=0.000 n=8) AndParallel-4 70.40n _ 0% 67.95n _ 0% -3.47% (p=0.000 n=8) Or8-4 52.09n _ 0% 41.11n _ 0% -21.08% (p=0.000 n=8) Or-4 49.80n _ 0% 39.87n _ 0% -19.93% (p=0.000 n=8) Or8Parallel-4 70.43n _ 0% 68.25n _ 0% -3.08% (p=0.000 n=8) OrParallel-4 70.42n _ 0% 67.94n _ 0% -3.51% (p=0.000 n=8) Xadd-4 67.83n _ 0% 67.92n _ 0% +0.13% (p=0.003 n=8) Xadd64-4 67.85n _ 0% 67.92n _ 0% +0.09% (p=0.021 n=8) Cas-4 81.34n _ 0% 81.37n _ 0% ~ (p=0.859 n=8) Cas64-4 81.43n _ 0% 81.53n _ 0% +0.13% (p=0.001 n=8) Xchg-4 67.15n _ 0% 67.18n _ 0% ~ (p=0.367 n=8) Xchg64-4 67.16n _ 0% 67.21n _ 0% +0.08% (p=0.008 n=8) geomean 54.04n 51.01n -5.61% Change-Id: I9a4353f4b14134f1e9cf0dcf99db3feb951328ed Reviewed-on: https://go-review.googlesource.com/c/go/+/494875 Reviewed-by: Cherry Mui Reviewed-by: Keith Randall Reviewed-by: Keith Randall Run-TryBot: Joel Sing Reviewed-by: Junxian Zhu TryBot-Result: Gopher Robot --- src/cmd/compile/internal/mips64/ssa.go | 37 ++++ .../compile/internal/ssa/_gen/MIPS64.rules | 40 +++++ .../compile/internal/ssa/_gen/MIPS64Ops.go | 11 ++ src/cmd/compile/internal/ssa/opGen.go | 30 ++++ src/cmd/compile/internal/ssa/rewriteMIPS64.go | 164 ++++++++++++++++++ src/cmd/compile/internal/ssagen/ssa.go | 8 +- 6 files changed, 286 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/mips64/ssa.go b/src/cmd/compile/internal/mips64/ssa.go index dda7c4fdd61..f22ac156506 100644 --- a/src/cmd/compile/internal/mips64/ssa.go +++ b/src/cmd/compile/internal/mips64/ssa.go @@ -672,6 +672,43 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p4.Reg = v.Reg0() p4.To.Type = obj.TYPE_REG p4.To.Reg = v.Reg0() + case ssa.OpMIPS64LoweredAtomicAnd32, + ssa.OpMIPS64LoweredAtomicOr32: + // SYNC + // LL (Rarg0), Rtmp + // AND/OR Rarg1, Rtmp + // SC Rtmp, (Rarg0) + // BEQ Rtmp, -3(PC) + // SYNC + s.Prog(mips.ASYNC) + + p := s.Prog(mips.ALL) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = mips.REGTMP + + p1 := s.Prog(v.Op.Asm()) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = mips.REGTMP + p1.To.Type = obj.TYPE_REG + p1.To.Reg = mips.REGTMP + + p2 := s.Prog(mips.ASC) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = mips.REGTMP + p2.To.Type = obj.TYPE_MEM + p2.To.Reg = v.Args[0].Reg() + + p3 := s.Prog(mips.ABEQ) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = mips.REGTMP + p3.To.Type = obj.TYPE_BRANCH + p3.To.SetTarget(p) + + s.Prog(mips.ASYNC) + case ssa.OpMIPS64LoweredAtomicCas32, ssa.OpMIPS64LoweredAtomicCas64: // MOVV $0, Rout // SYNC diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules index 3aa6a1b4206..360a5c0905e 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules +++ b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules @@ -399,6 +399,46 @@ (AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) +// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << ((ptr & 3) * 8)) +(AtomicOr8 ptr val mem) && !config.BigEndian => + (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) + (SLLV (ZeroExt8to32 val) + (SLLVconst [3] + (ANDconst [3] ptr))) mem) + +// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << ((ptr & 3) * 8)) | ^(uint32(0xFF) << ((ptr & 3) * 8)))) +(AtomicAnd8 ptr val mem) && !config.BigEndian => + (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) + (OR (SLLV (ZeroExt8to32 val) + (SLLVconst [3] + (ANDconst [3] ptr))) + (NORconst [0] (SLLV + (MOVVconst [0xff]) (SLLVconst [3] + (ANDconst [3] ptr))))) mem) + +// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3,uint32(val) << (((ptr^3) & 3) * 8)) +(AtomicOr8 ptr val mem) && config.BigEndian => + (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) + (SLLV (ZeroExt8to32 val) + (SLLVconst [3] + (ANDconst [3] + (XORconst [3] ptr)))) mem) + +// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3,(uint32(val) << (((ptr^3) & 3) * 8)) | ^(uint32(0xFF) << (((ptr^3) & 3) * 8)))) +(AtomicAnd8 ptr val mem) && config.BigEndian => + (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) + (OR (SLLV (ZeroExt8to32 val) + (SLLVconst [3] + (ANDconst [3] + (XORconst [3] ptr)))) + (NORconst [0] (SLLV + (MOVVconst [0xff]) (SLLVconst [3] + (ANDconst [3] + (XORconst [3] ptr)))))) mem) + +(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) +(AtomicOr32 ...) => (LoweredAtomicOr32 ...) + // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (SGTU ptr (MOVVconst [0])) diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go b/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go index 2b3dd487cde..a5253d8667f 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go @@ -361,6 +361,17 @@ func init() { faultOnNilArg1: true, }, + // atomic and/or. + // *arg0 &= (|=) arg1. arg2=mem. returns memory. + // SYNC + // LL (Rarg0), Rtmp + // AND Rarg1, Rtmp + // SC Rtmp, (Rarg0) + // BEQ Rtmp, -3(PC) + // SYNC + {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + // atomic loads. // load from arg0. arg1=mem. // returns so they can be properly ordered with other loads. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index b8ef89d76e5..e429d0de53f 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2053,6 +2053,8 @@ const ( OpMIPS64DUFFCOPY OpMIPS64LoweredZero OpMIPS64LoweredMove + OpMIPS64LoweredAtomicAnd32 + OpMIPS64LoweredAtomicOr32 OpMIPS64LoweredAtomicLoad8 OpMIPS64LoweredAtomicLoad32 OpMIPS64LoweredAtomicLoad64 @@ -27484,6 +27486,34 @@ var opcodeTable = [...]opInfo{ clobbers: 6, // R1 R2 }, }, + { + name: "LoweredAtomicAnd32", + argLen: 3, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + asm: mips.AAND, + reg: regInfo{ + inputs: []inputInfo{ + {1, 234881022}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 g R31 + {0, 4611686018695823358}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 SP g R31 SB + }, + }, + }, + { + name: "LoweredAtomicOr32", + argLen: 3, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + asm: mips.AOR, + reg: regInfo{ + inputs: []inputInfo{ + {1, 234881022}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 g R31 + {0, 4611686018695823358}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R24 R25 SP g R31 SB + }, + }, + }, { name: "LoweredAtomicLoad8", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go index 16426c300fc..89deaf746d4 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go @@ -53,6 +53,11 @@ func rewriteValueMIPS64(v *Value) bool { case OpAtomicAdd64: v.Op = OpMIPS64LoweredAtomicAdd64 return true + case OpAtomicAnd32: + v.Op = OpMIPS64LoweredAtomicAnd32 + return true + case OpAtomicAnd8: + return rewriteValueMIPS64_OpAtomicAnd8(v) case OpAtomicCompareAndSwap32: return rewriteValueMIPS64_OpAtomicCompareAndSwap32(v) case OpAtomicCompareAndSwap64: @@ -76,6 +81,11 @@ func rewriteValueMIPS64(v *Value) bool { case OpAtomicLoadPtr: v.Op = OpMIPS64LoweredAtomicLoad64 return true + case OpAtomicOr32: + v.Op = OpMIPS64LoweredAtomicOr32 + return true + case OpAtomicOr8: + return rewriteValueMIPS64_OpAtomicOr8(v) case OpAtomicStore32: v.Op = OpMIPS64LoweredAtomicStore32 return true @@ -698,6 +708,92 @@ func rewriteValueMIPS64_OpAddr(v *Value) bool { return true } } +func rewriteValueMIPS64_OpAtomicAnd8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (AtomicAnd8 ptr val mem) + // cond: !config.BigEndian + // result: (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) (OR (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] ptr))) (NORconst [0] (SLLV (MOVVconst [0xff]) (SLLVconst [3] (ANDconst [3] ptr))))) mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + if !(!config.BigEndian) { + break + } + v.reset(OpMIPS64LoweredAtomicAnd32) + v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr) + v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(^3) + v0.AddArg2(v1, ptr) + v2 := b.NewValue0(v.Pos, OpMIPS64OR, typ.UInt64) + v3 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) + v4.AddArg(val) + v5 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64) + v5.AuxInt = int64ToAuxInt(3) + v6 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64) + v6.AuxInt = int64ToAuxInt(3) + v6.AddArg(ptr) + v5.AddArg(v6) + v3.AddArg2(v4, v5) + v7 := b.NewValue0(v.Pos, OpMIPS64NORconst, typ.UInt64) + v7.AuxInt = int64ToAuxInt(0) + v8 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt64) + v9 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64) + v9.AuxInt = int64ToAuxInt(0xff) + v8.AddArg2(v9, v5) + v7.AddArg(v8) + v2.AddArg2(v3, v7) + v.AddArg3(v0, v2, mem) + return true + } + // match: (AtomicAnd8 ptr val mem) + // cond: config.BigEndian + // result: (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) (OR (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] (XORconst [3] ptr)))) (NORconst [0] (SLLV (MOVVconst [0xff]) (SLLVconst [3] (ANDconst [3] (XORconst [3] ptr)))))) mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + if !(config.BigEndian) { + break + } + v.reset(OpMIPS64LoweredAtomicAnd32) + v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr) + v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(^3) + v0.AddArg2(v1, ptr) + v2 := b.NewValue0(v.Pos, OpMIPS64OR, typ.UInt64) + v3 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) + v4.AddArg(val) + v5 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64) + v5.AuxInt = int64ToAuxInt(3) + v6 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64) + v6.AuxInt = int64ToAuxInt(3) + v7 := b.NewValue0(v.Pos, OpMIPS64XORconst, typ.UInt64) + v7.AuxInt = int64ToAuxInt(3) + v7.AddArg(ptr) + v6.AddArg(v7) + v5.AddArg(v6) + v3.AddArg2(v4, v5) + v8 := b.NewValue0(v.Pos, OpMIPS64NORconst, typ.UInt64) + v8.AuxInt = int64ToAuxInt(0) + v9 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt64) + v10 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64) + v10.AuxInt = int64ToAuxInt(0xff) + v9.AddArg2(v10, v5) + v8.AddArg(v9) + v2.AddArg2(v3, v8) + v.AddArg3(v0, v2, mem) + return true + } + return false +} func rewriteValueMIPS64_OpAtomicCompareAndSwap32(v *Value) bool { v_3 := v.Args[3] v_2 := v.Args[2] @@ -719,6 +815,74 @@ func rewriteValueMIPS64_OpAtomicCompareAndSwap32(v *Value) bool { return true } } +func rewriteValueMIPS64_OpAtomicOr8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (AtomicOr8 ptr val mem) + // cond: !config.BigEndian + // result: (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] ptr))) mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + if !(!config.BigEndian) { + break + } + v.reset(OpMIPS64LoweredAtomicOr32) + v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr) + v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(^3) + v0.AddArg2(v1, ptr) + v2 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32) + v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) + v3.AddArg(val) + v4 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64) + v4.AuxInt = int64ToAuxInt(3) + v5 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64) + v5.AuxInt = int64ToAuxInt(3) + v5.AddArg(ptr) + v4.AddArg(v5) + v2.AddArg2(v3, v4) + v.AddArg3(v0, v2, mem) + return true + } + // match: (AtomicOr8 ptr val mem) + // cond: config.BigEndian + // result: (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] (XORconst [3] ptr)))) mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + if !(config.BigEndian) { + break + } + v.reset(OpMIPS64LoweredAtomicOr32) + v0 := b.NewValue0(v.Pos, OpMIPS64AND, typ.UInt32Ptr) + v1 := b.NewValue0(v.Pos, OpMIPS64MOVVconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(^3) + v0.AddArg2(v1, ptr) + v2 := b.NewValue0(v.Pos, OpMIPS64SLLV, typ.UInt32) + v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) + v3.AddArg(val) + v4 := b.NewValue0(v.Pos, OpMIPS64SLLVconst, typ.UInt64) + v4.AuxInt = int64ToAuxInt(3) + v5 := b.NewValue0(v.Pos, OpMIPS64ANDconst, typ.UInt64) + v5.AuxInt = int64ToAuxInt(3) + v6 := b.NewValue0(v.Pos, OpMIPS64XORconst, typ.UInt64) + v6.AuxInt = int64ToAuxInt(3) + v6.AddArg(ptr) + v5.AddArg(v6) + v4.AddArg(v5) + v2.AddArg2(v3, v4) + v.AddArg3(v0, v2, mem) + return true + } + return false +} func rewriteValueMIPS64_OpAvg64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 9248424b08d..fc57592084a 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -4250,25 +4250,25 @@ func InitTables() { s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("runtime/internal/atomic", "And", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("runtime/internal/atomic", "Or8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("runtime/internal/atomic", "Or", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) { s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())