1
0
mirror of https://github.com/golang/go synced 2024-11-26 05:37:57 -07:00

runtime: emit a ProcSteal from entersyscall_gcwait

Currently entersyscall_gcwait always emits a ProcStop event. Most of the
time, this is correct, since the thread that just put the P into
_Psyscall is the same one that is putting it into _Pgcstop. However it's
possible for another thread to steal the P, start running a goroutine,
and then enter another syscall, putting the P back into _Psyscall. In
this case ProcStop is incorrect; the P is getting stolen. This leads to
broken traces.

Fix this by always emitting a ProcSteal event from entersyscall_gcwait.
This means that most of the time a thread will be 'stealing' the proc
from itself when it enters this function, but that's theoretically fine.
A ProcSteal is really just a fancy ProcStop.

Well, it would be if the parser correctly handled a self-steal. This is
a minor bug that just never came up before, but it's an update order
error (the mState is looked up and modified, but then it's modified
again at the end of the function to match newCtx). There's really no
reason a self-steal shouldn't be allowed, so fix that up and add a test.

Change-Id: Iec3d7639d331e3f2d127f92ce50c2c4a7818fcd3
Reviewed-on: https://go-review.googlesource.com/c/go/+/544215
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Michael Anthony Knyszek 2023-11-21 03:23:05 +00:00 committed by Michael Knyszek
parent ff05cdbd2b
commit b6b72c775a
5 changed files with 89 additions and 10 deletions

View File

@ -206,6 +206,17 @@ func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64)
// Validate that the M we're stealing from is what we expect.
mid := ThreadID(ev.args[2]) // The M we're stealing from.
if mid == curCtx.M {
// We're stealing from ourselves. This behaves like a ProcStop.
if curCtx.P != pid {
return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
}
newCtx.P = NoProc
return curCtx, true, nil
}
// We're stealing from some other M.
mState, ok := o.mStates[mid]
if !ok {
return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)

View File

@ -0,0 +1,37 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Tests syscall P stealing.
//
// Specifically, it tests a scenario where a thread 'steals'
// a P from itself. It's just a ProcStop with extra steps when
// it happens on the same P.
package main
import (
"internal/trace/v2"
"internal/trace/v2/event/go122"
testgen "internal/trace/v2/internal/testgen/go122"
)
func main() {
testgen.Main(gen)
}
func gen(t *testgen.Trace) {
t.DisableTimestamps()
g := t.Generation(1)
// A goroutine execute a syscall and steals its own P, then starts running
// on that P.
b0 := g.Batch(trace.ThreadID(0), 0)
b0.Event("ProcStatus", trace.ProcID(0), go122.ProcRunning)
b0.Event("GoStatus", trace.GoID(1), trace.ThreadID(0), go122.GoRunning)
b0.Event("GoSyscallBegin", testgen.Seq(1), testgen.NoStack)
b0.Event("ProcSteal", trace.ProcID(0), testgen.Seq(2), trace.ThreadID(0))
b0.Event("ProcStart", trace.ProcID(0), testgen.Seq(3))
b0.Event("GoSyscallEndBlocked")
}

View File

@ -0,0 +1,17 @@
-- expect --
SUCCESS
-- trace --
Trace Go1.22
EventBatch gen=1 m=0 time=0 size=24
ProcStatus dt=0 p=0 pstatus=1
GoStatus dt=0 g=1 m=0 gstatus=2
GoSyscallBegin dt=0 p_seq=1 stack=0
ProcSteal dt=0 p=0 p_seq=2 m=0
ProcStart dt=0 p=0 p_seq=3
GoSyscallEndBlocked dt=0
EventBatch gen=1 m=18446744073709551615 time=0 size=5
Frequency freq=15625000
EventBatch gen=1 m=18446744073709551615 time=0 size=1
Stacks
EventBatch gen=1 m=18446744073709551615 time=0 size=1
Strings

View File

@ -4407,10 +4407,21 @@ func entersyscall_gcwait() {
if sched.stopwait > 0 && atomic.Cas(&pp.status, _Psyscall, _Pgcstop) {
trace := traceAcquire()
if trace.ok() {
trace.GoSysBlock(pp)
// N.B. ProcSteal not necessary because if we succeed we're
// always stopping the P we just put into the syscall status.
trace.ProcStop(pp)
if goexperiment.ExecTracer2 {
// This is a steal in the new tracer. While it's very likely
// that we were the ones to put this P into _Psyscall, between
// then and now it's totally possible it had been stolen and
// then put back into _Psyscall for us to acquire here. In such
// case ProcStop would be incorrect.
//
// TODO(mknyszek): Consider emitting a ProcStop instead when
// gp.m.syscalltick == pp.syscalltick, since then we know we never
// lost the P.
trace.ProcSteal(pp, true)
} else {
trace.GoSysBlock(pp)
trace.ProcStop(pp)
}
traceRelease(trace)
}
pp.syscalltick++

View File

@ -493,10 +493,10 @@ func (tl traceLocker) GoSysExit(lostP bool) {
// ProcSteal indicates that our current M stole a P from another M.
//
// forMe indicates that the caller is stealing pp to wire it up to itself.
// inSyscall indicates that we're stealing the P from a syscall context.
//
// The caller must have ownership of pp.
func (tl traceLocker) ProcSteal(pp *p, forMe bool) {
func (tl traceLocker) ProcSteal(pp *p, inSyscall bool) {
// Grab the M ID we stole from.
mStolenFrom := pp.trace.mSyscallID
pp.trace.mSyscallID = -1
@ -506,17 +506,20 @@ func (tl traceLocker) ProcSteal(pp *p, forMe bool) {
// the P just to get its attention (e.g. STW or sysmon retake) or we're trying to steal a P for
// ourselves specifically to keep running. The two contexts look different, but can be summarized
// fairly succinctly. In the former, we're a regular running goroutine and proc, if we have either.
// In the latter, we're a goroutine in a syscall,
// In the latter, we're a goroutine in a syscall.
goStatus := traceGoRunning
procStatus := traceProcRunning
if forMe {
if inSyscall {
goStatus = traceGoSyscall
procStatus = traceProcSyscallAbandoned
}
w := tl.eventWriter(goStatus, procStatus)
// Emit the status of the P we're stealing. We may have *just* done this, but we may not have,
// even if forMe is true, depending on whether we wired the P to ourselves already.
// Emit the status of the P we're stealing. We may have *just* done this when creating the event
// writer but it's not guaranteed, even if inSyscall is true. Although it might seem like from a
// syscall context we're always stealing a P for ourselves, we may have not wired it up yet (so
// it wouldn't be visible to eventWriter) or we may not even intend to wire it up to ourselves
// at all (e.g. entersyscall_gcwait).
if !pp.trace.statusWasTraced(tl.gen) && pp.trace.acquireStatus(tl.gen) {
// Careful: don't use the event writer. We never want status or in-progress events
// to trigger more in-progress events.