1
0
mirror of https://github.com/golang/go synced 2024-11-23 05:00:07 -07:00

runtime: only acquire trace.lock on the system stack

Currently, trace.lock can be acquired while on a user G and stack
splits can happen while holding trace.lock. That means every lock used
by the stack allocator must be okay to acquire while holding
trace.lock, including various locks related to span allocation. In
turn, we cannot safely emit trace events while holding any
allocation-related locks because this would cause a cycle in the lock
rank graph.

To fix this, require that trace.lock only be acquired on the system
stack, like mheap.lock. This pushes it into the "bottom half" and
eliminates the lock rank relationship between tracing and stack
allocation, making it safe to emit trace events in many more places.

One subtlety is that the trace code has race annotations and uses
maps, which have race annotations. By default, we can't have race
annotations on the system stack, so we borrow the user race context
for these situations.

We'll update the lock graph itself in the next CL.

For #53979. This CL technically fixes the problem, but the lock rank
checker doesn't know that yet.

Change-Id: I9f5187a9c52a67bee4f7064db124b1ad53e5178f
Reviewed-on: https://go-review.googlesource.com/c/go/+/418956
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Austin Clements 2022-07-21 15:43:11 -04:00
parent 9923162f1c
commit 6c2e327e35

View File

@ -109,6 +109,8 @@ const (
// trace is global tracing context. // trace is global tracing context.
var trace struct { var trace struct {
// trace.lock must only be acquired on the system stack where
// stack splits cannot happen while it is held.
lock mutex // protects the following members lock mutex // protects the following members
lockOwner *g // to avoid deadlocks during recursive lock locks lockOwner *g // to avoid deadlocks during recursive lock locks
enabled bool // when set runtime traces events enabled bool // when set runtime traces events
@ -385,31 +387,33 @@ func StopTrace() {
raceacquire(unsafe.Pointer(&trace.shutdownSema)) raceacquire(unsafe.Pointer(&trace.shutdownSema))
} }
// The lock protects us from races with StartTrace/StopTrace because they do stop-the-world. systemstack(func() {
lock(&trace.lock) // The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.
for _, p := range allp[:cap(allp)] { lock(&trace.lock)
if p.tracebuf != 0 { for _, p := range allp[:cap(allp)] {
throw("trace: non-empty trace buffer in proc") if p.tracebuf != 0 {
throw("trace: non-empty trace buffer in proc")
}
} }
} if trace.buf != 0 {
if trace.buf != 0 { throw("trace: non-empty global trace buffer")
throw("trace: non-empty global trace buffer") }
} if trace.fullHead != 0 || trace.fullTail != 0 {
if trace.fullHead != 0 || trace.fullTail != 0 { throw("trace: non-empty full trace buffer")
throw("trace: non-empty full trace buffer") }
} if trace.reading != 0 || trace.reader.Load() != nil {
if trace.reading != 0 || trace.reader.Load() != nil { throw("trace: reading after shutdown")
throw("trace: reading after shutdown") }
} for trace.empty != 0 {
for trace.empty != 0 { buf := trace.empty
buf := trace.empty trace.empty = buf.ptr().link
trace.empty = buf.ptr().link sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys)
sysFree(unsafe.Pointer(buf), unsafe.Sizeof(*buf.ptr()), &memstats.other_sys) }
} trace.strings = nil
trace.strings = nil trace.shutdown = false
trace.shutdown = false trace.cpuLogRead = nil
trace.cpuLogRead = nil unlock(&trace.lock)
unlock(&trace.lock) })
} }
// ReadTrace returns the next chunk of binary tracing data, blocking until data // ReadTrace returns the next chunk of binary tracing data, blocking until data
@ -419,48 +423,12 @@ func StopTrace() {
// ReadTrace must be called from one goroutine at a time. // ReadTrace must be called from one goroutine at a time.
func ReadTrace() []byte { func ReadTrace() []byte {
top: top:
// This function may need to lock trace.lock recursively var buf []byte
// (goparkunlock -> traceGoPark -> traceEvent -> traceFlush). var park bool
// To allow this we use trace.lockOwner. systemstack(func() {
// Also this function must not allocate while holding trace.lock: buf, park = readTrace0()
// allocation can call heap allocate, which will try to emit a trace })
// event while holding heap lock. if park {
lock(&trace.lock)
trace.lockOwner = getg()
if trace.reader.Load() != nil {
// More than one goroutine reads trace. This is bad.
// But we rather do not crash the program because of tracing,
// because tracing can be enabled at runtime on prod servers.
trace.lockOwner = nil
unlock(&trace.lock)
println("runtime: ReadTrace called from multiple goroutines simultaneously")
return nil
}
// Recycle the old buffer.
if buf := trace.reading; buf != 0 {
buf.ptr().link = trace.empty
trace.empty = buf
trace.reading = 0
}
// Write trace header.
if !trace.headerWritten {
trace.headerWritten = true
trace.lockOwner = nil
unlock(&trace.lock)
return []byte("go 1.19 trace\x00\x00\x00")
}
// Optimistically look for CPU profile samples. This may write new stack
// records, and may write new tracing buffers.
if !trace.footerWritten && !trace.shutdown {
traceReadCPU()
}
// Wait for new data.
if trace.fullHead == 0 && !trace.shutdown {
// We don't simply use a note because the scheduler
// executes this goroutine directly when it wakes up
// (also a note would consume an M).
unlock(&trace.lock)
gopark(func(gp *g, _ unsafe.Pointer) bool { gopark(func(gp *g, _ unsafe.Pointer) bool {
if !trace.reader.CompareAndSwapNoWB(nil, gp) { if !trace.reader.CompareAndSwapNoWB(nil, gp) {
// We're racing with another reader. // We're racing with another reader.
@ -484,6 +452,70 @@ top:
goto top goto top
} }
return buf
}
// readTrace0 is ReadTrace's continuation on g0. This must run on the
// system stack because it acquires trace.lock.
//
//go:systemstack
func readTrace0() (buf []byte, park bool) {
if raceenabled {
// g0 doesn't have a race context. Borrow the user G's.
if getg().racectx != 0 {
throw("expected racectx == 0")
}
getg().racectx = getg().m.curg.racectx
// (This defer should get open-coded, which is safe on
// the system stack.)
defer func() { getg().racectx = 0 }()
}
// This function may need to lock trace.lock recursively
// (goparkunlock -> traceGoPark -> traceEvent -> traceFlush).
// To allow this we use trace.lockOwner.
// Also this function must not allocate while holding trace.lock:
// allocation can call heap allocate, which will try to emit a trace
// event while holding heap lock.
lock(&trace.lock)
trace.lockOwner = getg().m.curg
if trace.reader.Load() != nil {
// More than one goroutine reads trace. This is bad.
// But we rather do not crash the program because of tracing,
// because tracing can be enabled at runtime on prod servers.
trace.lockOwner = nil
unlock(&trace.lock)
println("runtime: ReadTrace called from multiple goroutines simultaneously")
return nil, false
}
// Recycle the old buffer.
if buf := trace.reading; buf != 0 {
buf.ptr().link = trace.empty
trace.empty = buf
trace.reading = 0
}
// Write trace header.
if !trace.headerWritten {
trace.headerWritten = true
trace.lockOwner = nil
unlock(&trace.lock)
return []byte("go 1.19 trace\x00\x00\x00"), false
}
// Optimistically look for CPU profile samples. This may write new stack
// records, and may write new tracing buffers.
if !trace.footerWritten && !trace.shutdown {
traceReadCPU()
}
// Wait for new data.
if trace.fullHead == 0 && !trace.shutdown {
// We don't simply use a note because the scheduler
// executes this goroutine directly when it wakes up
// (also a note would consume an M).
trace.lockOwner = nil
unlock(&trace.lock)
return nil, true
}
newFull: newFull:
assertLockHeld(&trace.lock) assertLockHeld(&trace.lock)
// Write a buffer. // Write a buffer.
@ -492,7 +524,7 @@ newFull:
trace.reading = buf trace.reading = buf
trace.lockOwner = nil trace.lockOwner = nil
unlock(&trace.lock) unlock(&trace.lock)
return buf.ptr().arr[:buf.ptr().pos] return buf.ptr().arr[:buf.ptr().pos], false
} }
// Write footer with timer frequency. // Write footer with timer frequency.
@ -534,17 +566,21 @@ newFull:
} }
// trace.enabled is already reset, so can call traceable functions. // trace.enabled is already reset, so can call traceable functions.
semrelease(&trace.shutdownSema) semrelease(&trace.shutdownSema)
return nil return nil, false
} }
// Also bad, but see the comment above. // Also bad, but see the comment above.
trace.lockOwner = nil trace.lockOwner = nil
unlock(&trace.lock) unlock(&trace.lock)
println("runtime: spurious wakeup of trace reader") println("runtime: spurious wakeup of trace reader")
return nil return nil, false
} }
// traceReader returns the trace reader that should be woken up, if any. // traceReader returns the trace reader that should be woken up, if any.
// Callers should first check that trace.enabled or trace.shutdown is set. // Callers should first check that trace.enabled or trace.shutdown is set.
//
// This must run on the system stack because it acquires trace.lock.
//
//go:systemstack
func traceReader() *g { func traceReader() *g {
// Optimistic check first // Optimistic check first
if traceReaderAvailable() == nil { if traceReaderAvailable() == nil {
@ -571,6 +607,10 @@ func traceReaderAvailable() *g {
} }
// traceProcFree frees trace buffer associated with pp. // traceProcFree frees trace buffer associated with pp.
//
// This must run on the system stack because it acquires trace.lock.
//
//go:systemstack
func traceProcFree(pp *p) { func traceProcFree(pp *p) {
buf := pp.tracebuf buf := pp.tracebuf
pp.tracebuf = 0 pp.tracebuf = 0
@ -661,7 +701,9 @@ func traceEventLocked(extraBytes int, mp *m, pid int32, bufp *traceBufPtr, ev by
// TODO: test on non-zero extraBytes param. // TODO: test on non-zero extraBytes param.
maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params maxSize := 2 + 5*traceBytesPerNumber + extraBytes // event type, length, sequence, timestamp, stack id and two add params
if buf == nil || len(buf.arr)-buf.pos < maxSize { if buf == nil || len(buf.arr)-buf.pos < maxSize {
buf = traceFlush(traceBufPtrOf(buf), pid).ptr() systemstack(func() {
buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
})
bufp.set(buf) bufp.set(buf)
} }
@ -802,7 +844,9 @@ func traceReadCPU() {
buf := bufp.ptr() buf := bufp.ptr()
if buf == nil { if buf == nil {
*bufp = traceFlush(*bufp, 0) systemstack(func() {
*bufp = traceFlush(*bufp, 0)
})
buf = bufp.ptr() buf = bufp.ptr()
} }
for i := range stk { for i := range stk {
@ -856,6 +900,10 @@ func traceReleaseBuffer(pid int32) {
} }
// traceFlush puts buf onto stack of full buffers and returns an empty buffer. // traceFlush puts buf onto stack of full buffers and returns an empty buffer.
//
// This must run on the system stack because it acquires trace.lock.
//
//go:systemstack
func traceFlush(buf traceBufPtr, pid int32) traceBufPtr { func traceFlush(buf traceBufPtr, pid int32) traceBufPtr {
owner := trace.lockOwner owner := trace.lockOwner
dolock := owner == nil || owner != getg().m.curg dolock := owner == nil || owner != getg().m.curg
@ -933,8 +981,10 @@ func traceString(bufp *traceBufPtr, pid int32, s string) (uint64, *traceBufPtr)
buf := bufp.ptr() buf := bufp.ptr()
size := 1 + 2*traceBytesPerNumber + len(s) size := 1 + 2*traceBytesPerNumber + len(s)
if buf == nil || len(buf.arr)-buf.pos < size { if buf == nil || len(buf.arr)-buf.pos < size {
buf = traceFlush(traceBufPtrOf(buf), pid).ptr() systemstack(func() {
bufp.set(buf) buf = traceFlush(traceBufPtrOf(buf), pid).ptr()
bufp.set(buf)
})
} }
buf.byte(traceEvString) buf.byte(traceEvString)
buf.varint(id) buf.varint(id)
@ -1088,6 +1138,10 @@ func traceFrames(bufp traceBufPtr, pcs []uintptr) ([]traceFrame, traceBufPtr) {
// dump writes all previously cached stacks to trace buffers, // dump writes all previously cached stacks to trace buffers,
// releases all memory and resets state. // releases all memory and resets state.
//
// This must run on the system stack because it calls traceFlush.
//
//go:systemstack
func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr { func (tab *traceStackTable) dump(bufp traceBufPtr) traceBufPtr {
for i, _ := range tab.tab { for i, _ := range tab.tab {
stk := tab.tab[i].ptr() stk := tab.tab[i].ptr()