1
0
mirror of https://github.com/golang/go synced 2024-11-06 22:36:15 -07:00

runtime/pprof: correctly encode inlined functions in CPU profile

The pprof profile proto message expects inlined functions of a PC
to be encoded in one Location entry using multiple Line entries.
https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184

runtime/pprof has encoded the symbolization information by creating
a Location for each PC found in the stack trace and including info
from all the frames expanded from the PC using runtime.CallersFrames.
This assumes inlined functions are represented as a single PC in the
stack trace. (https://go-review.googlesource.com/41256)

In the recent years, behavior around inlining and the traceback
changed significantly (e.g. https://golang.org/cl/152537,
https://golang.org/issue/29582, and many changes). Now the PCs
in the stack trace represent user frames even including inline
marks. As a result, the profile proto started to allocate a Location
entry for each user frame, lose the inline information (so pprof
presented incorrect results when inlined functions are involved),
and confuse the pprof tool with those PCs made up for inline marks.

This CL attempts to detect inlined call frames from the stack traces
of CPU profiles, and organize the Location information as intended.
Currently, runtime does not provide a reliable and convenient way to
detect inlined call frames and expand user frames from a given externally
recognizable PCs. So we use heuristics to recover the groups
  - inlined call frames have nil Func field
  - inlined call frames will have the same Entry point
  - but must be careful with recursive functions that have the
    same Entry point by definition, and non-Go functions that
    may lack most of the fields of Frame.

The followup CL will address the issue with other profile types.

Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db
Reviewed-on: https://go-review.googlesource.com/c/go/+/204636
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Hana (Hyang-Ah) Kim 2019-11-02 17:16:36 +09:00 committed by Hyang-Ah Hana Kim
parent 33dfd3529b
commit e038c7e418
4 changed files with 318 additions and 54 deletions

View File

@ -49,8 +49,12 @@ var (
// Must not call other functions nor access heap/globals in the loop, // Must not call other functions nor access heap/globals in the loop,
// otherwise under race detector the samples will be in the race runtime. // otherwise under race detector the samples will be in the race runtime.
func cpuHog1(x int) int { func cpuHog1(x int) int {
return cpuHog0(x, 1e5)
}
func cpuHog0(x, n int) int {
foo := x foo := x
for i := 0; i < 1e5; i++ { for i := 0; i < n; i++ {
if foo > 0 { if foo > 0 {
foo *= foo foo *= foo
} else { } else {
@ -101,34 +105,69 @@ func TestCPUProfileMultithreaded(t *testing.T) {
} }
func TestCPUProfileInlining(t *testing.T) { func TestCPUProfileInlining(t *testing.T) {
testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) { p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(inlinedCaller, &salt1, dur) cpuHogger(inlinedCaller, &salt1, dur)
}) })
// Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
for _, loc := range p.Location {
hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
for _, line := range loc.Line {
if line.Function.Name == "runtime/pprof.inlinedCallee" {
hasInlinedCallee = true
}
if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
hasInlinedCallerAfterInlinedCallee = true
}
}
if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
}
}
} }
func inlinedCaller(x int) int { func inlinedCaller(x int) int {
x = inlinedCallee(x) x = inlinedCallee(x, 1e5)
return x return x
} }
func inlinedCallee(x int) int { func inlinedCallee(x, n int) int {
// We could just use cpuHog1, but for loops prevent inlining return cpuHog0(x, n)
// right now. :(
foo := x
i := 0
loop:
if foo > 0 {
foo *= foo
} else {
foo *= foo + 1
}
if i++; i < 1e5 {
goto loop
}
return foo
} }
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) { func TestCPUProfileRecursion(t *testing.T) {
p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
cpuHogger(recursionCaller, &salt1, dur)
})
// check the Location encoding was not confused by recursive calls.
for i, loc := range p.Location {
recursionFunc := 0
for _, line := range loc.Line {
if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
recursionFunc++
}
}
if recursionFunc > 1 {
t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
}
}
}
func recursionCaller(x int) int {
y := recursionCallee(3, x)
return y
}
func recursionCallee(n, x int) int {
if n == 0 {
return 1
}
y := inlinedCallee(x, 1e4)
return y * recursionCallee(n-1, x)
}
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
p, err := profile.Parse(bytes.NewReader(valBytes)) p, err := profile.Parse(bytes.NewReader(valBytes))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -137,11 +176,12 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca
count := uintptr(sample.Value[0]) count := uintptr(sample.Value[0])
f(count, sample.Location, sample.Label) f(count, sample.Location, sample.Label)
} }
return p
} }
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need, // testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
// as interpreted by matches. // as interpreted by matches, and returns the parsed profile.
func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) { func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
switch runtime.GOOS { switch runtime.GOOS {
case "darwin": case "darwin":
switch runtime.GOARCH { switch runtime.GOARCH {
@ -195,8 +235,8 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
f(duration) f(duration)
StopCPUProfile() StopCPUProfile()
if profileOk(t, matches, need, avoid, prof, duration) { if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
return return p
} }
duration *= 2 duration *= 2
@ -217,6 +257,7 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605") t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
} }
t.FailNow() t.FailNow()
return nil
} }
func contains(slice []string, s string) bool { func contains(slice []string, s string) bool {
@ -242,7 +283,7 @@ func stackContains(spec string, count uintptr, stk []*profile.Location, labels m
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) { func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
ok = true ok = true
// Check that profile is well formed, contains 'need', and does not contain // Check that profile is well formed, contains 'need', and does not contain
@ -251,7 +292,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
avoidSamples := make([]uintptr, len(avoid)) avoidSamples := make([]uintptr, len(avoid))
var samples uintptr var samples uintptr
var buf bytes.Buffer var buf bytes.Buffer
parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) { p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
fmt.Fprintf(&buf, "%d:", count) fmt.Fprintf(&buf, "%d:", count)
fprintStack(&buf, stk) fprintStack(&buf, stk)
samples += count samples += count
@ -278,7 +319,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
// not enough samples due to coarse timer // not enough samples due to coarse timer
// resolution. Let it go. // resolution. Let it go.
t.Log("too few samples on Windows (golang.org/issue/10842)") t.Log("too few samples on Windows (golang.org/issue/10842)")
return false return p, false
} }
// Check that we got a reasonable number of samples. // Check that we got a reasonable number of samples.
@ -300,7 +341,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
} }
if len(need) == 0 { if len(need) == 0 {
return ok return p, ok
} }
var total uintptr var total uintptr
@ -323,7 +364,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
ok = false ok = false
} }
} }
return ok return p, ok
} }
// Fork can hang if preempted with signals frequently enough (see issue 5517). // Fork can hang if preempted with signals frequently enough (see issue 5517).

View File

@ -41,8 +41,8 @@ type profileBuilder struct {
pb protobuf pb protobuf
strings []string strings []string
stringMap map[string]int stringMap map[string]int
locs map[uintptr]int locs map[uintptr]locInfo // list of locInfo starting with the given PC.
funcs map[string]int // Package path-qualified function name to Function.ID funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap mem []memMap
} }
@ -207,13 +207,43 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
b.pb.endMessage(tag, start) b.pb.endMessage(tag, start)
} }
func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
// Expand this one address using CallersFrames so we can cache
// each expansion. In general, CallersFrames takes a whole
// stack, but in this case we know there will be no skips in
// the stack and we have return PCs anyway.
frames := runtime.CallersFrames([]uintptr{addr})
frame, more := frames.Next()
if frame.Function == "runtime.goexit" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
return nil, 0
}
symbolizeResult := lookupTried
if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
symbolizeResult |= lookupFailed
}
if frame.PC == 0 {
// If we failed to resolve the frame, at least make up
// a reasonable call PC. This mostly happens in tests.
frame.PC = addr - 1
}
ret := []runtime.Frame{frame}
for frame.Function != "runtime.goexit" && more == true {
frame, more = frames.Next()
ret = append(ret, frame)
}
return ret, symbolizeResult
}
// locForPC returns the location ID for addr. // locForPC returns the location ID for addr.
// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call. // addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call.
// It may emit to b.pb, so there must be no message encoding in progress. // It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) locForPC(addr uintptr) uint64 { func (b *profileBuilder) locForPC(addr uintptr) uint64 {
id := uint64(b.locs[addr]) if loc, ok := b.locs[addr]; ok {
if id != 0 { return loc.id
return id
} }
// Expand this one address using CallersFrames so we can cache // Expand this one address using CallersFrames so we can cache
@ -248,8 +278,8 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
} }
newFuncs := make([]newFunc, 0, 8) newFuncs := make([]newFunc, 0, 8)
id = uint64(len(b.locs)) + 1 id := uint64(len(b.locs)) + 1
b.locs[addr] = int(id) b.locs[addr] = locInfo{id: id, pcs: []uintptr{addr}}
start := b.pb.startMessage() start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id) b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC)) b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
@ -293,6 +323,16 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
return id return id
} }
type locInfo struct {
// location id assigned by the profileBuilder
id uint64
// sequence of PCs, including the fake PCs returned by the traceback
// to represent inlined functions
// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
pcs []uintptr
}
// newProfileBuilder returns a new profileBuilder. // newProfileBuilder returns a new profileBuilder.
// CPU profiling data obtained from the runtime can be added // CPU profiling data obtained from the runtime can be added
// by calling b.addCPUData, and then the eventual profile // by calling b.addCPUData, and then the eventual profile
@ -305,7 +345,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
start: time.Now(), start: time.Now(),
strings: []string{""}, strings: []string{""},
stringMap: map[string]int{"": 0}, stringMap: map[string]int{"": 0},
locs: map[uintptr]int{}, locs: map[uintptr]locInfo{},
funcs: map[string]int{}, funcs: map[string]int{},
} }
b.readMapping() b.readMapping()
@ -388,7 +428,10 @@ func (b *profileBuilder) build() {
} }
values := []int64{0, 0} values := []int64{0, 0}
var deck = &pcDeck{}
var locs []uint64 var locs []uint64
for e := b.m.all; e != nil; e = e.nextAll { for e := b.m.all; e != nil; e = e.nextAll {
values[0] = e.count values[0] = e.count
values[1] = e.count * b.period values[1] = e.count * b.period
@ -402,23 +445,62 @@ func (b *profileBuilder) build() {
} }
} }
deck.reset()
locs = locs[:0] locs = locs[:0]
for i, addr := range e.stk {
// Addresses from stack traces point to the // Addresses from stack traces point to the next instruction after each call,
// next instruction after each call, except // except for the leaf, which points to where the signal occurred.
// for the leaf, which points to where the // deck.add+emitLocation expects return PCs so increment the leaf address to
// signal occurred. locForPC expects return // look like a return PC.
// PCs, so increment the leaf address to look e.stk[0] += 1
// like a return PC. for stk := e.stk; len(stk) > 0; {
if i == 0 { addr := stk[0]
addr++ if l, ok := b.locs[addr]; ok {
} // first record the location if there is any pending accumulated info.
l := b.locForPC(addr) if id := b.emitLocation(deck); id > 0 {
if l == 0 { // runtime.goexit locs = append(locs, id)
}
// then, record the cached location.
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
continue continue
} }
locs = append(locs, l)
frames, symbolizeResult := allFrames(addr)
if len(frames) == 0 { // runtime.goexit.
if id := b.emitLocation(deck); id > 0 {
locs = append(locs, id)
}
stk = stk[1:]
continue
}
if added := deck.tryAdd(addr, frames, symbolizeResult); added {
stk = stk[1:]
continue
}
// add failed because this addr is not inlined with
// the existing PCs in the deck. Flush the deck and retry to
// handle this pc.
if id := b.emitLocation(deck); id > 0 {
locs = append(locs, id)
}
// check cache again - previous emitLocation added a new entry
if l, ok := b.locs[addr]; ok {
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
} else {
deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
stk = stk[1:]
}
} }
if id := b.emitLocation(deck); id > 0 { // emit remaining location.
locs = append(locs, id)
}
e.stk[0] -= 1 // undo the adjustment on the leaf done before the loop.
b.pbSample(values, locs, labels) b.pbSample(values, locs, labels)
} }
@ -435,6 +517,133 @@ func (b *profileBuilder) build() {
b.zw.Close() b.zw.Close()
} }
// pcDeck is a helper to detect a sequence of inlined functions from
// a stack trace returned by the runtime.
//
// The stack traces returned by runtime's trackback functions are fully
// expanded (at least for Go functions) and include the fake pcs representing
// inlined functions. The profile proto expects the inlined functions to be
// encoded in one Location message.
// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
//
// Runtime does not directly expose whether a frame is for an inlined function
// and looking up debug info is not ideal, so we use a heuristic to filter
// the fake pcs and restore the inlined and entry functions. Inlined functions
// have the following properties:
// Frame's Func is nil (note: also true for non-Go functions), and
// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
// Frame's Name does not match its entry function frame's name.
//
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
// until we observe the entry function frame.
type pcDeck struct {
pcs []uintptr
frames []runtime.Frame
symbolizeResult symbolizeFlag
}
func (d *pcDeck) reset() {
d.pcs = d.pcs[:0]
d.frames = d.frames[:0]
d.symbolizeResult = 0
}
// tryAdd tries to add the pc and Frames expanded from it (most likely one,
// since the stack trace is already fully expanded) and the symbolizeResult
// to the deck. If it fails the caller needs to flush the deck and retry.
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
if existing := len(d.pcs); existing > 0 {
// 'frames' are all expanded from one 'pc' and represent all inlined functions
// so we check only the first one.
newFrame := frames[0]
last := d.frames[existing-1]
if last.Func != nil && newFrame.Func != nil { // Can't be an inlined frame.
return false
}
if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
return false
}
if last.Entry != newFrame.Entry { // newFrame is for a different function.
return false
}
if last.Function == newFrame.Function { // maybe recursion.
return false
}
}
d.pcs = append(d.pcs, pc)
d.frames = append(d.frames, frames...)
d.symbolizeResult |= symbolizeResult
return true
}
// emitLocation emits the new location and function information recorded in the deck
// and returns the location ID encoded in the profile protobuf.
// It emits to b.pb, so there must be no message encoding in progress.
// It resets the deck.
func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
defer deck.reset()
if len(deck.pcs) == 0 {
return 0
}
addr := deck.pcs[0]
firstFrame := deck.frames[0]
// We can't write out functions while in the middle of the
// Location message, so record new functions we encounter and
// write them out after the Location.
type newFunc struct {
id uint64
name, file string
}
newFuncs := make([]newFunc, 0, 8)
id := uint64(len(b.locs)) + 1
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, deck.pcs...)}
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
for _, frame := range deck.frames {
// Write out each line in frame expansion.
funcID := uint64(b.funcs[frame.Function])
if funcID == 0 {
funcID = uint64(len(b.funcs)) + 1
b.funcs[frame.Function] = int(funcID)
newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
}
b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
}
for i := range b.mem {
if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
m := b.mem[i]
m.funcs |= deck.symbolizeResult
b.mem[i] = m
break
}
}
b.pb.endMessage(tagProfile_Location, start)
// Write out functions we found during frame expansion.
for _, fn := range newFuncs {
start := b.pb.startMessage()
b.pb.uint64Opt(tagFunction_ID, fn.id)
b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
b.pb.endMessage(tagProfile_Function, start)
}
b.flush()
return id
}
// readMapping reads /proc/self/maps and writes mappings to b.pb. // readMapping reads /proc/self/maps and writes mappings to b.pb.
// It saves the address ranges of the mappings in b.mem for use // It saves the address ranges of the mappings in b.mem for use
// when emitting locations. // when emitting locations.

View File

@ -358,6 +358,17 @@ func TestMapping(t *testing.T) {
continue continue
} }
} }
if traceback == "Go+C" {
// The test code was arranged to have PCs from C and
// they are not symbolized.
// Check no Location containing those unsymbolized PCs contains multiple lines.
for i, loc := range prof.Location {
if !symbolized(loc) && len(loc.Line) > 1 {
t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc)
}
}
}
}) })
} }
} }

View File

@ -17,8 +17,7 @@ package main
int cpuHogCSalt1 = 0; int cpuHogCSalt1 = 0;
int cpuHogCSalt2 = 0; int cpuHogCSalt2 = 0;
void CPUHogCFunction() { void CPUHogCFunction0(int foo) {
int foo = cpuHogCSalt1;
int i; int i;
for (i = 0; i < 100000; i++) { for (i = 0; i < 100000; i++) {
if (foo > 0) { if (foo > 0) {
@ -30,6 +29,10 @@ void CPUHogCFunction() {
} }
} }
void CPUHogCFunction() {
CPUHogCFunction0(cpuHogCSalt1);
}
struct CgoTracebackArg { struct CgoTracebackArg {
uintptr_t context; uintptr_t context;
uintptr_t sigContext; uintptr_t sigContext;
@ -39,8 +42,9 @@ struct CgoTracebackArg {
void CollectCgoTraceback(void* parg) { void CollectCgoTraceback(void* parg) {
struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg); struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
arg->buf[0] = (uintptr_t)(CPUHogCFunction); arg->buf[0] = (uintptr_t)(CPUHogCFunction0);
arg->buf[1] = 0; arg->buf[1] = (uintptr_t)(CPUHogCFunction);
arg->buf[2] = 0;
}; };
*/ */
import "C" import "C"
@ -81,7 +85,6 @@ var salt1 int
var salt2 int var salt2 int
func cpuHogGoFunction() { func cpuHogGoFunction() {
// Generates CPU profile samples including a Go call path.
for { for {
foo := salt1 foo := salt1
for i := 0; i < 1e5; i++ { for i := 0; i < 1e5; i++ {