1
0
mirror of https://github.com/golang/go synced 2024-11-26 16:46:58 -07:00

runtime: replace sysBlockTraced with tracedSyscallEnter

sysBlockTraced is a subtle and confusing flag.

Currently, it's only used in one place: a condition around whether to
traceGoSysExit when a goroutine is about to start running. That condition
looks like "gp.syscallsp != 0 && gp.trace.sysBlockTraced".

In every case but one, "gp.syscallsp != 0" is equivalent to
"gp.trace.sysBlockTraced."

That one case is where a goroutine is running without a P and racing
with trace start (world is stopped). It switches itself back to
_Grunnable from _Gsyscall before the trace start goroutine notices, such
that the trace start goroutine fails to emit a EvGoInSyscall event for
it (EvGoInSyscall or EvGoSysBlock must precede any EvGoSysExit event).
sysBlockTraced is set unconditionally on every syscall entry and the
trace start goroutine clears it if there was no EvGoInSyscall event
emitted (i.e. did not observe _Gsyscall on the goroutine). That way when
the goroutine-without-a-P wakes up and gets scheduled, it only emits
EvGoSysExit if the flag is set, i.e. trace start didn't _clear_ the
flag.

What makes this confusing is the fact that the flag is set
unconditionally and the code relies on it being *cleared*. Really, all
it's trying to communicate is whether the tracer is aware of a
goroutine's syscall at the point where a goroutine that lost its P
during a syscall is trying to run again.

Therefore, we can replace this flag with a less subtle one:
tracedSyscallEnter. It is set when GoSysCall is traced, indicating on
the goroutine that the tracer is aware of the syscall. Later, if
traceGoSysExit is called, the tracer knows its safe to emit an event
because the tracer is aware of the syscall.

This flag is then also set at trace start, when it emits EvGoInSyscall,
which again, lets the goroutine know the tracer is aware of its syscall.

The flag is cleared by GoSysExit to indicate that the tracer is no
longer aware of any syscalls on the goroutine. It's also cleared by
trace start. This is necessary because a syscall may have been started
while a trace was stopping. If the GoSysExit isn't emitted (because it
races with the trace end STW) then the flag will be left set at the
start of the next trace period, which will result in an erroneous
GoSysExit. Instead, the flag is cleared in the same way sysBlockTraced
is today: if the tracer doesn't notice the goroutine is in a syscall, it
makes that explicit to the goroutine.

A more direct flag to use here would be one that explicitly indicates
whether EvGoInSyscall or EvGoSysBlock specifically were already emitted
for a goroutine. The reason why we don't just do this is because setting
the flag when EvGoSysBlock is emitted would be racy: EvGoSysBlock is
emitted by whatever thread is stealing the P out from under the
syscalling goroutine, so it would need to synchronize with the goroutine
its emitting the event for.

The end result of all this is that the new flag can be managed entirely
within trace.go, hiding another implementation detail about the tracer.

Tested with `stress ./trace.test -test.run="TestTraceStressStartStop"`
which was occasionally failing before the CL in which sysBlockTraced was
added (CL 9132). I also confirmed also that this test is still sensitive
to `EvGoSysExit` by removing the one use of sysBlockTraced. The result
is about a 5% error rate. If there is something very subtly wrong about
how this CL emits `EvGoSysExit`, I would expect to see it as a test
failure. Instead:

    53m55s: 200434 runs so far, 0 failures

Change-Id: If1d24ee6b6926eec7e90cdb66039a5abac819d9b
Reviewed-on: https://go-review.googlesource.com/c/go/+/494715
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Michael Anthony Knyszek 2023-05-12 18:55:25 +00:00 committed by Gopher Robot
parent e3ada56537
commit 865179164e
2 changed files with 21 additions and 9 deletions

View File

@ -1999,7 +1999,6 @@ func oneNewExtraM() {
mp.lockedg.set(gp)
gp.lockedm.set(mp)
gp.goid = sched.goidgen.Add(1)
gp.trace.sysBlockTraced = true
if raceenabled {
gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum)
}
@ -2705,7 +2704,7 @@ func execute(gp *g, inheritTime bool) {
if traceEnabled() {
// GoSysExit has to happen when we have a P, but before GoStart.
// So we emit it here.
if gp.syscallsp != 0 && gp.trace.sysBlockTraced {
if gp.syscallsp != 0 {
traceGoSysExit()
}
traceGoStart()
@ -3856,7 +3855,6 @@ func reentersyscall(pc, sp uintptr) {
}
gp.m.syscalltick = gp.m.p.ptr().syscalltick
gp.trace.sysBlockTraced = true
pp := gp.m.p.ptr()
pp.m = 0
gp.m.oldp.set(pp)
@ -3917,7 +3915,6 @@ func entersyscallblock() {
gp.throwsplit = true
gp.stackguard0 = stackPreempt // see comment in entersyscall
gp.m.syscalltick = gp.m.p.ptr().syscalltick
gp.trace.sysBlockTraced = true
gp.m.p.ptr().syscalltick++
// Leave SP around for GC and traceback.

View File

@ -163,10 +163,10 @@ var trace struct {
// gTraceState is per-G state for the tracer.
type gTraceState struct {
sysExitTicks int64 // cputicks when syscall has returned
sysBlockTraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
seq uint64 // trace event sequencer
lastP puintptr // last P emitted an event for this goroutine
sysExitTicks int64 // cputicks when syscall has returned
tracedSyscallEnter bool // syscall or cgo was entered while trace was enabled or StartTrace has emitted EvGoInSyscall about this goroutine
seq uint64 // trace event sequencer
lastP puintptr // last P emitted an event for this goroutine
}
// mTraceState is per-M state for the tracer.
@ -309,6 +309,7 @@ func StartTrace() error {
}
if status == _Gsyscall {
gp.trace.seq++
gp.trace.tracedSyscallEnter = true
traceEvent(traceEvGoInSyscall, -1, gp.goid)
} else if status == _Gdead && gp.m != nil && gp.m.isextra {
// Trigger two trace events for the dead g in the extra m,
@ -320,9 +321,16 @@ func StartTrace() error {
id := trace.stackTab.put([]uintptr{logicalStackSentinel, startPCforTrace(0) + sys.PCQuantum}) // no start pc
traceEvent(traceEvGoCreate, -1, gp.goid, uint64(id), stackID)
gp.trace.seq++
gp.trace.tracedSyscallEnter = true
traceEvent(traceEvGoInSyscall, -1, gp.goid)
} else {
gp.trace.sysBlockTraced = false
// We need to explicitly clear the flag. A previous trace might have ended with a goroutine
// not emitting a GoSysExit and clearing the flag, leaving it in a stale state. Clearing
// it here makes it unambiguous to any goroutine exiting a syscall racing with us that
// no EvGoInSyscall event was emitted for it. (It's not racy to set this flag here, because
// it'll only get checked when the goroutine runs again, which will be after the world starts
// again.)
gp.trace.tracedSyscallEnter = false
}
})
traceProcStart()
@ -1603,11 +1611,18 @@ func traceGoSysCall() {
// Skip the extra trampoline frame used on most systems.
skip = 4
}
getg().m.curg.trace.tracedSyscallEnter = true
traceEvent(traceEvGoSysCall, skip)
}
func traceGoSysExit() {
gp := getg().m.curg
if !gp.trace.tracedSyscallEnter {
// There was no syscall entry traced for us at all, so there's definitely
// no EvGoSysBlock or EvGoInSyscall before us, which EvGoSysExit requires.
return
}
gp.trace.tracedSyscallEnter = false
ts := gp.trace.sysExitTicks
if ts != 0 && ts < trace.ticksStart {
// There is a race between the code that initializes sysExitTicks