runtime/pprof: expand inlined frames in symbolized proto profiles

Currently proto symbolization uses runtime.FuncForPC and assumes each PC maps to a single frame. This isn't true in the presence of inlining (even with leaf-only inlining this can get incorrect results). Change PC symbolization to use runtime.CallersFrames to expand each PC to all of the frames at that PC. Change-Id: I8d20dff7495a5de495ae07f569122c225d433ced Reviewed-on: https://go-review.googlesource.com/41256 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Michael Matloob <matloob@golang.org>
2024-11-23 20:50:04 -07:00 · 2017-04-20 15:54:12 -04:00 · 2017-04-20 15:54:12 -04:00 · 9e83c11fca
commit 9e83c11fca
parent 1dc0f9696b
4 changed files with 93 additions and 45 deletions
--- a/src/runtime/pprof/pprof.go
+++ b/src/runtime/pprof/pprof.go
@ -400,9 +400,7 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
 		locs = locs[:0]
 		for _, addr := range p.Stack(index[k]) {
 			// For count profiles, all stack addresses are
-			// return PCs. Adjust them to be call PCs for
-			// locForPC.
-			addr--
+			// return PCs, which is what locForPC expects.
 			locs = append(locs, b.locForPC(addr))
 		}
 		b.pbSample(values, locs, nil)
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@ -87,6 +87,33 @@ func TestCPUProfileMultithreaded(t *testing.T) {
 	})
 }

+func TestCPUProfileInlining(t *testing.T) {
+	testCPUProfile(t, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, func(dur time.Duration) {
+		cpuHogger(inlinedCaller, dur)
+	})
+}
+
+func inlinedCaller() {
+	inlinedCallee()
+}
+
+func inlinedCallee() {
+	// We could just use cpuHog1, but for loops prevent inlining
+	// right now. :(
+	foo := salt1
+	i := 0
+loop:
+	if foo > 0 {
+		foo *= foo
+	} else {
+		foo *= foo + 1
+	}
+	if i++; i < 1e5 {
+		goto loop
+	}
+	salt1 = foo
+}
+
 func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
 	p, err := profile.Parse(bytes.NewReader(valBytes))
 	if err != nil {
--- a/src/runtime/pprof/proto.go
+++ b/src/runtime/pprof/proto.go
@ -43,7 +43,7 @@ type profileBuilder struct {
 	strings   []string
 	stringMap map[string]int
 	locs      map[uintptr]int
-	funcs     map[*runtime.Func]int
+	funcs     map[string]int // Package path-qualified function name to Function.ID
 	mem       []memMap
 }

@ -183,24 +183,60 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
 }

 // locForPC returns the location ID for addr.
-// addr must be a call address (not a return address).
+// addr must be a return PC. This returns the location of the call.
 // It may emit to b.pb, so there must be no message encoding in progress.
 func (b *profileBuilder) locForPC(addr uintptr) uint64 {
 	id := uint64(b.locs[addr])
 	if id != 0 {
 		return id
 	}
-	f := runtime.FuncForPC(addr)
-	if f != nil && f.Name() == "runtime.goexit" {
+
+	// Expand this one address using CallersFrames so we can cache
+	// each expansion. In general, CallersFrames takes a whole
+	// stack, but in this case we know there will be no skips in
+	// the stack and we have return PCs anyway.
+	frames := runtime.CallersFrames([]uintptr{addr})
+	frame, more := frames.Next()
+	if frame.Function == "runtime.goexit" {
+		// Short-circuit if we see runtime.goexit so the loop
+		// below doesn't allocate a useless empty location.
 		return 0
 	}
-	funcID, lineno := b.funcForPC(addr)
+
+	if frame.PC == 0 {
+		// If we failed to resolve the frame, at least make up
+		// a reasonable call PC. This mostly happens in tests.
+		frame.PC = addr - 1
+	}
+
+	// We can't write out functions while in the middle of the
+	// Location message, so record new functions we encounter and
+	// write them out after the Location.
+	type newFunc struct {
+		id         uint64
+		name, file string
+	}
+	newFuncs := make([]newFunc, 0, 8)
+
 	id = uint64(len(b.locs)) + 1
 	b.locs[addr] = int(id)
 	start := b.pb.startMessage()
 	b.pb.uint64Opt(tagLocation_ID, id)
-	b.pb.uint64Opt(tagLocation_Address, uint64(addr))
-	b.pbLine(tagLocation_Line, funcID, int64(lineno))
+	b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
+	for frame.Function != "runtime.goexit" {
+		// Write out each line in frame expansion.
+		funcID := uint64(b.funcs[frame.Function])
+		if funcID == 0 {
+			funcID = uint64(len(b.funcs)) + 1
+			b.funcs[frame.Function] = int(funcID)
+			newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
+		}
+		b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
+		if !more {
+			break
+		}
+		frame, more = frames.Next()
+	}
 	if len(b.mem) > 0 {
 		i := sort.Search(len(b.mem), func(i int) bool {
 			return b.mem[i].end > addr
@ -210,36 +246,21 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
 		}
 	}
 	b.pb.endMessage(tagProfile_Location, start)
+
+	// Write out functions we found during frame expansion.
+	for _, fn := range newFuncs {
+		start := b.pb.startMessage()
+		b.pb.uint64Opt(tagFunction_ID, fn.id)
+		b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
+		b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
+		b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
+		b.pb.endMessage(tagProfile_Function, start)
+	}
+
 	b.flush()
 	return id
 }

-// funcForPC returns the func ID and line number for addr.
-// It may emit to b.pb, so there must be no message encoding in progress.
-func (b *profileBuilder) funcForPC(addr uintptr) (funcID uint64, lineno int) {
-	f := runtime.FuncForPC(addr)
-	if f == nil {
-		return 0, 0
-	}
-	file, lineno := f.FileLine(addr)
-	funcID = uint64(b.funcs[f])
-	if funcID != 0 {
-		return funcID, lineno
-	}
-
-	funcID = uint64(len(b.funcs)) + 1
-	b.funcs[f] = int(funcID)
-	name := f.Name()
-	start := b.pb.startMessage()
-	b.pb.uint64Opt(tagFunction_ID, funcID)
-	b.pb.int64Opt(tagFunction_Name, b.stringIndex(name))
-	b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(name))
-	b.pb.int64Opt(tagFunction_Filename, b.stringIndex(file))
-	b.pb.endMessage(tagProfile_Function, start)
-	b.flush()
-	return funcID, lineno
-}
-
 // newProfileBuilder returns a new profileBuilder.
 // CPU profiling data obtained from the runtime can be added
 // by calling b.addCPUData, and then the eventual profile
@ -253,7 +274,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
 		strings:   []string{""},
 		stringMap: map[string]int{"": 0},
 		locs:      map[uintptr]int{},
-		funcs:     map[*runtime.Func]int{},
+		funcs:     map[string]int{},
 	}
 	b.readMapping()
 	return b
@ -351,11 +372,14 @@ func (b *profileBuilder) build() error {

 		locs = locs[:0]
 		for i, addr := range e.stk {
-			// Addresses from stack traces point to the next instruction after
-			// each call.  Adjust by -1 to land somewhere on the actual call
-			// (except for the leaf, which is not a call).
-			if i > 0 {
-				addr--
+			// Addresses from stack traces point to the
+			// next instruction after each call, except
+			// for the leaf, which points to where the
+			// signal occurred. locForPC expects return
+			// PCs, so increment the leaf address to look
+			// like a return PC.
+			if i == 0 {
+				addr++
 			}
 			l := b.locForPC(addr)
 			if l == 0 { // runtime.goexit
--- a/src/runtime/pprof/protomem.go
+++ b/src/runtime/pprof/protomem.go
@ -29,9 +29,8 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64) error
 		for tries := 0; tries < 2; tries++ {
 			for _, addr := range r.Stack() {
 				// For heap profiles, all stack
-				// addresses are return PCs. Adjust
-				// them to be call PCs for locForPC.
-				addr--
+				// addresses are return PCs, which is
+				// what locForPC expects.
 				if hideRuntime {
 					if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
 						continue