From ae7d5f84f8b8fee22f65737f2c192b90667e2683 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 15 Nov 2017 14:54:24 -0800 Subject: [PATCH] runtime: buffered write barrier for ppc64 Updates #22460. Change-Id: I6040c4024111c80361c81eb7eec5071ec9efb4f9 Reviewed-on: https://go-review.googlesource.com/92702 Run-TryBot: Austin Clements TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/gc/main.go | 2 +- src/cmd/compile/internal/ppc64/ssa.go | 6 ++ src/cmd/compile/internal/ssa/gen/PPC64.rules | 3 + src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 5 ++ src/cmd/compile/internal/ssa/opGen.go | 15 +++++ src/cmd/compile/internal/ssa/rewritePPC64.go | 20 ++++++ src/cmd/vet/all/whitelist/ppc64x.txt | 2 + src/runtime/asm_ppc64x.s | 71 ++++++++++++++++++++ 8 files changed, 123 insertions(+), 1 deletion(-) diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 2eef7b7c7b..a463b222e1 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -408,7 +408,7 @@ func Main(archInit func(*Arch)) { } switch objabi.GOARCH { - case "amd64", "amd64p32", "386", "arm", "arm64", "mips64", "mips64le", "mips", "mipsle": + case "amd64", "amd64p32", "386", "arm", "arm64", "ppc64", "ppc64le", "mips64", "mips64le", "mips", "mipsle": default: // Other architectures don't support the buffered // write barrier yet. diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 008d9658f4..7a2e2c1878 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -1086,6 +1086,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { q.To.Reg = ppc64.REG_R2 } + case ssa.OpPPC64LoweredWB: + p := s.Prog(obj.ACALL) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = v.Aux.(*obj.LSym) + case ssa.OpPPC64LoweredNilCheck: // Issue a load which will fault if arg is nil. p := s.Prog(ppc64.AMOVBZ) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 4fd6a5a102..b9587b148d 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -651,6 +651,9 @@ (IsSliceInBounds idx len) -> (LessEqual (CMPU idx len)) (NilCheck ptr mem) -> (LoweredNilCheck ptr mem) +// Write barrier. +(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem) + // Optimizations // Note that PPC "logical" immediates come in 0:15 and 16:31 unsigned immediate forms, // so ORconst, XORconst easily expand into a pair. diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index c6269e0f48..2043887a78 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -474,6 +474,11 @@ func init() { {name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true}, + // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier + // It preserves R0 through R15, g, and its arguments R20 and R21, + // but may clobber anything else, including R31 (REGTMP). + {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R20 R21 g")) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"}, + // (InvertFlags (CMP a b)) == (CMP b a) // So if we want (LessThan (CMP a b)) but we can't do that because a is a constant, // then we do (LessThan (InvertFlags (CMP b a))) instead. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 4246048e63..6859f36e51 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1492,6 +1492,7 @@ const ( OpPPC64LoweredAtomicCas32 OpPPC64LoweredAtomicAnd8 OpPPC64LoweredAtomicOr8 + OpPPC64LoweredWB OpPPC64InvertFlags OpPPC64FlagEQ OpPPC64FlagLT @@ -19243,6 +19244,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredWB", + auxType: auxSym, + argLen: 3, + clobberFlags: true, + symEffect: SymNone, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1048576}, // R20 + {1, 2097152}, // R21 + }, + clobbers: 576460746931503104, // R16 R17 R18 R19 R22 R23 R24 R25 R26 R27 R28 R29 R31 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 + }, + }, { name: "InvertFlags", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 6a000f4431..ac7df6b997 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -623,6 +623,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpTrunc64to32_0(v) case OpTrunc64to8: return rewriteValuePPC64_OpTrunc64to8_0(v) + case OpWB: + return rewriteValuePPC64_OpWB_0(v) case OpXor16: return rewriteValuePPC64_OpXor16_0(v) case OpXor32: @@ -43015,6 +43017,24 @@ func rewriteValuePPC64_OpTrunc64to8_0(v *Value) bool { return true } } +func rewriteValuePPC64_OpWB_0(v *Value) bool { + // match: (WB {fn} destptr srcptr mem) + // cond: + // result: (LoweredWB {fn} destptr srcptr mem) + for { + fn := v.Aux + _ = v.Args[2] + destptr := v.Args[0] + srcptr := v.Args[1] + mem := v.Args[2] + v.reset(OpPPC64LoweredWB) + v.Aux = fn + v.AddArg(destptr) + v.AddArg(srcptr) + v.AddArg(mem) + return true + } +} func rewriteValuePPC64_OpXor16_0(v *Value) bool { // match: (Xor16 x y) // cond: diff --git a/src/cmd/vet/all/whitelist/ppc64x.txt b/src/cmd/vet/all/whitelist/ppc64x.txt index 4f6444e102..84b8f18b53 100644 --- a/src/cmd/vet/all/whitelist/ppc64x.txt +++ b/src/cmd/vet/all/whitelist/ppc64x.txt @@ -10,3 +10,5 @@ runtime/asm_ppc64x.s: [GOARCH] addmoduledata: function addmoduledata missing Go runtime/duff_ppc64x.s: [GOARCH] duffzero: function duffzero missing Go declaration runtime/tls_ppc64x.s: [GOARCH] save_g: function save_g missing Go declaration runtime/tls_ppc64x.s: [GOARCH] load_g: function load_g missing Go declaration + +runtime/asm_ARCHSUFF.s: [GOARCH] gcWriteBarrier: function gcWriteBarrier missing Go declaration diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index e02ca16907..c0e872f7a9 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -1495,3 +1495,74 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 MOVW $1, R3 MOVB R3, ret+0(FP) RET + +// gcWriteBarrier performs a heap pointer write and informs the GC. +// +// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: +// - R20 is the destination of the write +// - R21 is the value being written at R20. +// It clobbers condition codes. +// It does not clobber R0 through R15, +// but may clobber any other register, *including* R31. +TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$112 + // The standard prologue clobbers R31. + // We use R16 and R17 as scratch registers. + MOVD g_m(g), R16 + MOVD m_p(R16), R16 + MOVD (p_wbBuf+wbBuf_next)(R16), R17 + // Increment wbBuf.next position. + ADD $16, R17 + MOVD R17, (p_wbBuf+wbBuf_next)(R16) + MOVD (p_wbBuf+wbBuf_end)(R16), R16 + CMP R16, R17 + // Record the write. + MOVD R21, -16(R17) // Record value + MOVD (R20), R16 // TODO: This turns bad writes into bad reads. + MOVD R16, -8(R17) // Record *slot + // Is the buffer full? (flags set in CMP above) + BEQ flush +ret: + // Do the write. + MOVD R21, (R20) + RET + +flush: + // Save registers R0 through R15 since these were not saved by the caller. + // We don't save all registers on ppc64 because it takes too much space. + MOVD R20, (FIXED_FRAME+0)(R1) // Also first argument to wbBufFlush + MOVD R21, (FIXED_FRAME+8)(R1) // Also second argument to wbBufFlush + // R0 is always 0, so no need to spill. + // R1 is SP. + // R2 is SB. + MOVD R3, (FIXED_FRAME+16)(R1) + MOVD R4, (FIXED_FRAME+24)(R1) + MOVD R5, (FIXED_FRAME+32)(R1) + MOVD R6, (FIXED_FRAME+40)(R1) + MOVD R7, (FIXED_FRAME+48)(R1) + MOVD R8, (FIXED_FRAME+56)(R1) + MOVD R9, (FIXED_FRAME+64)(R1) + MOVD R10, (FIXED_FRAME+72)(R1) + MOVD R11, (FIXED_FRAME+80)(R1) + MOVD R12, (FIXED_FRAME+88)(R1) + // R13 is REGTLS + MOVD R14, (FIXED_FRAME+96)(R1) + MOVD R15, (FIXED_FRAME+104)(R1) + + // This takes arguments R20 and R21. + CALL runtime·wbBufFlush(SB) + + MOVD (FIXED_FRAME+0)(R1), R20 + MOVD (FIXED_FRAME+8)(R1), R21 + MOVD (FIXED_FRAME+16)(R1), R3 + MOVD (FIXED_FRAME+24)(R1), R4 + MOVD (FIXED_FRAME+32)(R1), R5 + MOVD (FIXED_FRAME+40)(R1), R6 + MOVD (FIXED_FRAME+48)(R1), R7 + MOVD (FIXED_FRAME+56)(R1), R8 + MOVD (FIXED_FRAME+64)(R1), R9 + MOVD (FIXED_FRAME+72)(R1), R10 + MOVD (FIXED_FRAME+80)(R1), R11 + MOVD (FIXED_FRAME+88)(R1), R12 + MOVD (FIXED_FRAME+96)(R1), R14 + MOVD (FIXED_FRAME+104)(R1), R15 + JMP ret