1
0
mirror of https://github.com/golang/go synced 2024-11-17 13:04:54 -07:00

runtime/pprof: expand final stack frame to avoid truncation

When generating stacks, the runtime automatically expands inline
functions to inline all inline frames in the stack. However, due to the
stack size limit, the final frame may be truncated in the middle of
several inline frames at the same location.

As-is, we assume that the final frame is a normal function, and emit and
cache a Location for it. If we later receive a complete stack frame, we
will first use the cached Location for the inlined function and then
generate a new Location for the "caller" frame, in violation of the
pprof requirement to merge inlined functions into the same Location.

As a result, we:

1. Nondeterministically may generate a profile with the different stacks
combined or split, depending on which is encountered first. This is
particularly problematic when performing a diff of profiles.

2. When split stacks are generated, we lose the inlining information.

We avoid both of these problems by performing a second expansion of the
last stack frame to recover additional inline frames that may have been
lost. This expansion is a bit simpler than the one done by the runtime
because we don't have to handle skipping, and we know that the last
emitted frame is not an elided wrapper, since it by definition is
already included in the stack.

Fixes #37446

Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61
Reviewed-on: https://go-review.googlesource.com/c/go/+/221577
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
This commit is contained in:
Michael Pratt 2020-02-28 14:16:41 -05:00
parent 4a70ff425b
commit fadbf7404d
5 changed files with 115 additions and 31 deletions

View File

@ -68,7 +68,8 @@ Search:
if len(m.freeStk) < len(stk) {
m.freeStk = make([]uintptr, 1024)
}
e.stk = m.freeStk[:len(stk)]
// Limit cap to prevent append from clobbering freeStk.
e.stk = m.freeStk[:len(stk):len(stk)]
m.freeStk = m.freeStk[len(stk):]
for j := range stk {

View File

@ -1172,16 +1172,25 @@ func TestTryAdd(t *testing.T) {
{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
name: "recursive_inlined_funcs",
// If a function is called recursively then it must not be
// inlined in the caller.
//
// N.B. We're generating an impossible profile here, with a
// recursive inlineCallee call. This is simulating a non-Go
// function that looks like an inlined Go function other than
// its recursive property. See pcDeck.tryAdd.
name: "recursive_func_is_not_inlined",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
5, 0, 30, inlinedCalleePtr, inlinedCalleePtr,
4, 0, 40, inlinedCalleePtr,
},
wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}},
// inlinedCaller shows up here because
// runtime_expandFinalInlineFrame adds it to the stack frame.
wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}, {"runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}}},
{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
},
}, {
name: "truncated_stack_trace_later",
@ -1202,12 +1211,36 @@ func TestTryAdd(t *testing.T) {
4, 0, 70, inlinedCalleePtr,
5, 0, 80, inlinedCalleePtr, inlinedCallerPtr,
},
wantLocs: [][]string{ // the inline info is screwed up, but better than a crash.
{"runtime/pprof.inlinedCallee"},
wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
// We can recover the inlined caller from a truncated stack.
name: "truncated_stack_trace_only",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
4, 0, 70, inlinedCalleePtr,
},
wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
// The same location is used for duplicated stacks.
name: "truncated_stack_trace_twice",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
4, 0, 70, inlinedCalleePtr,
5, 0, 80, inlinedCallerPtr, inlinedCalleePtr,
},
wantLocs: [][]string{
{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"},
{"runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
},
}}

View File

@ -384,6 +384,10 @@ func (b *profileBuilder) build() {
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
// The last frame might be truncated. Recover lost inline frames.
stk = runtime_expandFinalInlineFrame(stk)
for len(stk) > 0 {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
@ -395,22 +399,12 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
// then, record the cached location.
locs = append(locs, l.id)
// The stk may be truncated due to the stack depth limit
// (e.g. See maxStack and maxCPUProfStack in runtime) or
// bugs in runtime. Avoid the crash in either case.
// TODO(hyangah): The correct fix may require using the exact
// pcs as the key for b.locs cache management instead of just
// relying on the very first pc. We are late in the go1.14 dev
// cycle, so this is a workaround with little code change.
if len(l.pcs) > len(stk) {
stk = nil
// TODO(hyangah): would be nice if we can enable
// debug print out on demand and report the problematic
// cached location entry and stack traces. Do we already
// have such facility to utilize (e.g. GODEBUG)?
} else {
stk = stk[len(l.pcs):] // skip the matching pcs.
}
// Skip the matching pcs.
//
// Even if stk was truncated due to the stack depth
// limit, expandFinalInlineFrame above has already
// fixed the truncation, ensuring it is long enough.
stk = stk[len(l.pcs):]
continue
}
@ -427,9 +421,9 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
stk = stk[1:]
continue
}
// add failed because this addr is not inlined with
// the existing PCs in the deck. Flush the deck and retry to
// handle this pc.
// add failed because this addr is not inlined with the
// existing PCs in the deck. Flush the deck and retry handling
// this pc.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
@ -463,8 +457,8 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
// the fake pcs and restore the inlined and entry functions. Inlined functions
// have the following properties:
// Frame's Func is nil (note: also true for non-Go functions), and
// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
// Frame's Name does not match its entry function frame's name.
// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be recursive).
//
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
@ -486,8 +480,8 @@ func (d *pcDeck) reset() {
// to the deck. If it fails the caller needs to flush the deck and retry.
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
if existing := len(d.pcs); existing > 0 {
// 'frames' are all expanded from one 'pc' and represent all inlined functions
// so we check only the last one.
// 'd.frames' are all expanded from one 'pc' and represent all
// inlined functions so we check only the last one.
newFrame := frames[0]
last := d.frames[existing-1]
if last.Func != nil { // the last frame can't be inlined. Flush.

View File

@ -9,6 +9,9 @@ import (
"unsafe"
)
// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
// runtime_setProfLabel is defined in runtime/proflabel.go.
func runtime_setProfLabel(labels unsafe.Pointer)

View File

@ -148,6 +148,59 @@ func (ci *Frames) Next() (frame Frame, more bool) {
return
}
// runtime_expandFinalInlineFrame expands the final pc in stk to include all
// "callers" if pc is inline.
//
//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr {
pc := stk[len(stk)-1]
tracepc := pc - 1
f := findfunc(tracepc)
if !f.valid() {
// Not a Go function.
return stk
}
inldata := funcdata(f, _FUNCDATA_InlTree)
if inldata == nil {
// Nothing inline in f.
return stk
}
// Treat the previous func as normal. We haven't actually checked, but
// since this pc was included in the stack, we know it shouldn't be
// elided.
lastFuncID := funcID_normal
// Remove pc from stk; we'll re-add it below.
stk = stk[:len(stk)-1]
// See inline expansion in gentraceback.
var cache pcvalueCache
inltree := (*[1 << 20]inlinedCall)(inldata)
for {
ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, &cache)
if ix < 0 {
break
}
if inltree[ix].funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) {
// ignore wrappers
} else {
stk = append(stk, pc)
}
lastFuncID = inltree[ix].funcID
// Back up to an instruction in the "caller".
tracepc = f.entry + uintptr(inltree[ix].parentPc)
pc = tracepc + 1
}
// N.B. we want to keep the last parentPC which is not inline.
stk = append(stk, pc)
return stk
}
// expandCgoFrames expands frame information for pc, known to be
// a non-Go function, using the cgoSymbolizer hook. expandCgoFrames
// returns nil if pc could not be expanded.