mirror of
https://github.com/golang/go
synced 2024-11-06 22:36:15 -07:00
runtime/pprof: correctly encode inlined functions in CPU profile
The pprof profile proto message expects inlined functions of a PC to be encoded in one Location entry using multiple Line entries. https://github.com/google/pprof/blob/5e96527/proto/profile.proto#L177-L184 runtime/pprof has encoded the symbolization information by creating a Location for each PC found in the stack trace and including info from all the frames expanded from the PC using runtime.CallersFrames. This assumes inlined functions are represented as a single PC in the stack trace. (https://go-review.googlesource.com/41256) In the recent years, behavior around inlining and the traceback changed significantly (e.g. https://golang.org/cl/152537, https://golang.org/issue/29582, and many changes). Now the PCs in the stack trace represent user frames even including inline marks. As a result, the profile proto started to allocate a Location entry for each user frame, lose the inline information (so pprof presented incorrect results when inlined functions are involved), and confuse the pprof tool with those PCs made up for inline marks. This CL attempts to detect inlined call frames from the stack traces of CPU profiles, and organize the Location information as intended. Currently, runtime does not provide a reliable and convenient way to detect inlined call frames and expand user frames from a given externally recognizable PCs. So we use heuristics to recover the groups - inlined call frames have nil Func field - inlined call frames will have the same Entry point - but must be careful with recursive functions that have the same Entry point by definition, and non-Go functions that may lack most of the fields of Frame. The followup CL will address the issue with other profile types. Change-Id: I0c9667ab016a3e898d648f31c3f82d84c15398db Reviewed-on: https://go-review.googlesource.com/c/go/+/204636 Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
33dfd3529b
commit
e038c7e418
@ -49,8 +49,12 @@ var (
|
|||||||
// Must not call other functions nor access heap/globals in the loop,
|
// Must not call other functions nor access heap/globals in the loop,
|
||||||
// otherwise under race detector the samples will be in the race runtime.
|
// otherwise under race detector the samples will be in the race runtime.
|
||||||
func cpuHog1(x int) int {
|
func cpuHog1(x int) int {
|
||||||
|
return cpuHog0(x, 1e5)
|
||||||
|
}
|
||||||
|
|
||||||
|
func cpuHog0(x, n int) int {
|
||||||
foo := x
|
foo := x
|
||||||
for i := 0; i < 1e5; i++ {
|
for i := 0; i < n; i++ {
|
||||||
if foo > 0 {
|
if foo > 0 {
|
||||||
foo *= foo
|
foo *= foo
|
||||||
} else {
|
} else {
|
||||||
@ -101,34 +105,69 @@ func TestCPUProfileMultithreaded(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestCPUProfileInlining(t *testing.T) {
|
func TestCPUProfileInlining(t *testing.T) {
|
||||||
testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
|
p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions(), func(dur time.Duration) {
|
||||||
cpuHogger(inlinedCaller, &salt1, dur)
|
cpuHogger(inlinedCaller, &salt1, dur)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
|
||||||
|
for _, loc := range p.Location {
|
||||||
|
hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
|
||||||
|
for _, line := range loc.Line {
|
||||||
|
if line.Function.Name == "runtime/pprof.inlinedCallee" {
|
||||||
|
hasInlinedCallee = true
|
||||||
|
}
|
||||||
|
if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
|
||||||
|
hasInlinedCallerAfterInlinedCallee = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
|
||||||
|
t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func inlinedCaller(x int) int {
|
func inlinedCaller(x int) int {
|
||||||
x = inlinedCallee(x)
|
x = inlinedCallee(x, 1e5)
|
||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
|
|
||||||
func inlinedCallee(x int) int {
|
func inlinedCallee(x, n int) int {
|
||||||
// We could just use cpuHog1, but for loops prevent inlining
|
return cpuHog0(x, n)
|
||||||
// right now. :(
|
|
||||||
foo := x
|
|
||||||
i := 0
|
|
||||||
loop:
|
|
||||||
if foo > 0 {
|
|
||||||
foo *= foo
|
|
||||||
} else {
|
|
||||||
foo *= foo + 1
|
|
||||||
}
|
|
||||||
if i++; i < 1e5 {
|
|
||||||
goto loop
|
|
||||||
}
|
|
||||||
return foo
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) {
|
func TestCPUProfileRecursion(t *testing.T) {
|
||||||
|
p := testCPUProfile(t, stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions(), func(dur time.Duration) {
|
||||||
|
cpuHogger(recursionCaller, &salt1, dur)
|
||||||
|
})
|
||||||
|
|
||||||
|
// check the Location encoding was not confused by recursive calls.
|
||||||
|
for i, loc := range p.Location {
|
||||||
|
recursionFunc := 0
|
||||||
|
for _, line := range loc.Line {
|
||||||
|
if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
|
||||||
|
recursionFunc++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if recursionFunc > 1 {
|
||||||
|
t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func recursionCaller(x int) int {
|
||||||
|
y := recursionCallee(3, x)
|
||||||
|
return y
|
||||||
|
}
|
||||||
|
|
||||||
|
func recursionCallee(n, x int) int {
|
||||||
|
if n == 0 {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
y := inlinedCallee(x, 1e4)
|
||||||
|
return y * recursionCallee(n-1, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
|
||||||
p, err := profile.Parse(bytes.NewReader(valBytes))
|
p, err := profile.Parse(bytes.NewReader(valBytes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -137,11 +176,12 @@ func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Loca
|
|||||||
count := uintptr(sample.Value[0])
|
count := uintptr(sample.Value[0])
|
||||||
f(count, sample.Location, sample.Label)
|
f(count, sample.Location, sample.Label)
|
||||||
}
|
}
|
||||||
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
|
// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
|
||||||
// as interpreted by matches.
|
// as interpreted by matches, and returns the parsed profile.
|
||||||
func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) {
|
func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []string, f func(dur time.Duration)) *profile.Profile {
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
case "darwin":
|
case "darwin":
|
||||||
switch runtime.GOARCH {
|
switch runtime.GOARCH {
|
||||||
@ -195,8 +235,8 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
|
|||||||
f(duration)
|
f(duration)
|
||||||
StopCPUProfile()
|
StopCPUProfile()
|
||||||
|
|
||||||
if profileOk(t, matches, need, avoid, prof, duration) {
|
if p, ok := profileOk(t, matches, need, avoid, prof, duration); ok {
|
||||||
return
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
duration *= 2
|
duration *= 2
|
||||||
@ -217,6 +257,7 @@ func testCPUProfile(t *testing.T, matches matchFunc, need []string, avoid []stri
|
|||||||
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
|
t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
|
||||||
}
|
}
|
||||||
t.FailNow()
|
t.FailNow()
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func contains(slice []string, s string) bool {
|
func contains(slice []string, s string) bool {
|
||||||
@ -242,7 +283,7 @@ func stackContains(spec string, count uintptr, stk []*profile.Location, labels m
|
|||||||
|
|
||||||
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
|
type matchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
|
||||||
|
|
||||||
func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (ok bool) {
|
func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
|
||||||
ok = true
|
ok = true
|
||||||
|
|
||||||
// Check that profile is well formed, contains 'need', and does not contain
|
// Check that profile is well formed, contains 'need', and does not contain
|
||||||
@ -251,7 +292,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
|
|||||||
avoidSamples := make([]uintptr, len(avoid))
|
avoidSamples := make([]uintptr, len(avoid))
|
||||||
var samples uintptr
|
var samples uintptr
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
|
p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
|
||||||
fmt.Fprintf(&buf, "%d:", count)
|
fmt.Fprintf(&buf, "%d:", count)
|
||||||
fprintStack(&buf, stk)
|
fprintStack(&buf, stk)
|
||||||
samples += count
|
samples += count
|
||||||
@ -278,7 +319,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
|
|||||||
// not enough samples due to coarse timer
|
// not enough samples due to coarse timer
|
||||||
// resolution. Let it go.
|
// resolution. Let it go.
|
||||||
t.Log("too few samples on Windows (golang.org/issue/10842)")
|
t.Log("too few samples on Windows (golang.org/issue/10842)")
|
||||||
return false
|
return p, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that we got a reasonable number of samples.
|
// Check that we got a reasonable number of samples.
|
||||||
@ -300,7 +341,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(need) == 0 {
|
if len(need) == 0 {
|
||||||
return ok
|
return p, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
var total uintptr
|
var total uintptr
|
||||||
@ -323,7 +364,7 @@ func profileOk(t *testing.T, matches matchFunc, need []string, avoid []string, p
|
|||||||
ok = false
|
ok = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ok
|
return p, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fork can hang if preempted with signals frequently enough (see issue 5517).
|
// Fork can hang if preempted with signals frequently enough (see issue 5517).
|
||||||
|
@ -41,8 +41,8 @@ type profileBuilder struct {
|
|||||||
pb protobuf
|
pb protobuf
|
||||||
strings []string
|
strings []string
|
||||||
stringMap map[string]int
|
stringMap map[string]int
|
||||||
locs map[uintptr]int
|
locs map[uintptr]locInfo // list of locInfo starting with the given PC.
|
||||||
funcs map[string]int // Package path-qualified function name to Function.ID
|
funcs map[string]int // Package path-qualified function name to Function.ID
|
||||||
mem []memMap
|
mem []memMap
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,13 +207,43 @@ func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file
|
|||||||
b.pb.endMessage(tag, start)
|
b.pb.endMessage(tag, start)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
|
||||||
|
// Expand this one address using CallersFrames so we can cache
|
||||||
|
// each expansion. In general, CallersFrames takes a whole
|
||||||
|
// stack, but in this case we know there will be no skips in
|
||||||
|
// the stack and we have return PCs anyway.
|
||||||
|
frames := runtime.CallersFrames([]uintptr{addr})
|
||||||
|
frame, more := frames.Next()
|
||||||
|
if frame.Function == "runtime.goexit" {
|
||||||
|
// Short-circuit if we see runtime.goexit so the loop
|
||||||
|
// below doesn't allocate a useless empty location.
|
||||||
|
return nil, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
symbolizeResult := lookupTried
|
||||||
|
if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
|
||||||
|
symbolizeResult |= lookupFailed
|
||||||
|
}
|
||||||
|
|
||||||
|
if frame.PC == 0 {
|
||||||
|
// If we failed to resolve the frame, at least make up
|
||||||
|
// a reasonable call PC. This mostly happens in tests.
|
||||||
|
frame.PC = addr - 1
|
||||||
|
}
|
||||||
|
ret := []runtime.Frame{frame}
|
||||||
|
for frame.Function != "runtime.goexit" && more == true {
|
||||||
|
frame, more = frames.Next()
|
||||||
|
ret = append(ret, frame)
|
||||||
|
}
|
||||||
|
return ret, symbolizeResult
|
||||||
|
}
|
||||||
|
|
||||||
// locForPC returns the location ID for addr.
|
// locForPC returns the location ID for addr.
|
||||||
// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call.
|
// addr must a return PC or 1 + the PC of an inline marker. This returns the location of the corresponding call.
|
||||||
// It may emit to b.pb, so there must be no message encoding in progress.
|
// It may emit to b.pb, so there must be no message encoding in progress.
|
||||||
func (b *profileBuilder) locForPC(addr uintptr) uint64 {
|
func (b *profileBuilder) locForPC(addr uintptr) uint64 {
|
||||||
id := uint64(b.locs[addr])
|
if loc, ok := b.locs[addr]; ok {
|
||||||
if id != 0 {
|
return loc.id
|
||||||
return id
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expand this one address using CallersFrames so we can cache
|
// Expand this one address using CallersFrames so we can cache
|
||||||
@ -248,8 +278,8 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
|
|||||||
}
|
}
|
||||||
newFuncs := make([]newFunc, 0, 8)
|
newFuncs := make([]newFunc, 0, 8)
|
||||||
|
|
||||||
id = uint64(len(b.locs)) + 1
|
id := uint64(len(b.locs)) + 1
|
||||||
b.locs[addr] = int(id)
|
b.locs[addr] = locInfo{id: id, pcs: []uintptr{addr}}
|
||||||
start := b.pb.startMessage()
|
start := b.pb.startMessage()
|
||||||
b.pb.uint64Opt(tagLocation_ID, id)
|
b.pb.uint64Opt(tagLocation_ID, id)
|
||||||
b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
|
b.pb.uint64Opt(tagLocation_Address, uint64(frame.PC))
|
||||||
@ -293,6 +323,16 @@ func (b *profileBuilder) locForPC(addr uintptr) uint64 {
|
|||||||
return id
|
return id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type locInfo struct {
|
||||||
|
// location id assigned by the profileBuilder
|
||||||
|
id uint64
|
||||||
|
|
||||||
|
// sequence of PCs, including the fake PCs returned by the traceback
|
||||||
|
// to represent inlined functions
|
||||||
|
// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
|
||||||
|
pcs []uintptr
|
||||||
|
}
|
||||||
|
|
||||||
// newProfileBuilder returns a new profileBuilder.
|
// newProfileBuilder returns a new profileBuilder.
|
||||||
// CPU profiling data obtained from the runtime can be added
|
// CPU profiling data obtained from the runtime can be added
|
||||||
// by calling b.addCPUData, and then the eventual profile
|
// by calling b.addCPUData, and then the eventual profile
|
||||||
@ -305,7 +345,7 @@ func newProfileBuilder(w io.Writer) *profileBuilder {
|
|||||||
start: time.Now(),
|
start: time.Now(),
|
||||||
strings: []string{""},
|
strings: []string{""},
|
||||||
stringMap: map[string]int{"": 0},
|
stringMap: map[string]int{"": 0},
|
||||||
locs: map[uintptr]int{},
|
locs: map[uintptr]locInfo{},
|
||||||
funcs: map[string]int{},
|
funcs: map[string]int{},
|
||||||
}
|
}
|
||||||
b.readMapping()
|
b.readMapping()
|
||||||
@ -388,7 +428,10 @@ func (b *profileBuilder) build() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
values := []int64{0, 0}
|
values := []int64{0, 0}
|
||||||
|
|
||||||
|
var deck = &pcDeck{}
|
||||||
var locs []uint64
|
var locs []uint64
|
||||||
|
|
||||||
for e := b.m.all; e != nil; e = e.nextAll {
|
for e := b.m.all; e != nil; e = e.nextAll {
|
||||||
values[0] = e.count
|
values[0] = e.count
|
||||||
values[1] = e.count * b.period
|
values[1] = e.count * b.period
|
||||||
@ -402,23 +445,62 @@ func (b *profileBuilder) build() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
deck.reset()
|
||||||
locs = locs[:0]
|
locs = locs[:0]
|
||||||
for i, addr := range e.stk {
|
|
||||||
// Addresses from stack traces point to the
|
// Addresses from stack traces point to the next instruction after each call,
|
||||||
// next instruction after each call, except
|
// except for the leaf, which points to where the signal occurred.
|
||||||
// for the leaf, which points to where the
|
// deck.add+emitLocation expects return PCs so increment the leaf address to
|
||||||
// signal occurred. locForPC expects return
|
// look like a return PC.
|
||||||
// PCs, so increment the leaf address to look
|
e.stk[0] += 1
|
||||||
// like a return PC.
|
for stk := e.stk; len(stk) > 0; {
|
||||||
if i == 0 {
|
addr := stk[0]
|
||||||
addr++
|
if l, ok := b.locs[addr]; ok {
|
||||||
}
|
// first record the location if there is any pending accumulated info.
|
||||||
l := b.locForPC(addr)
|
if id := b.emitLocation(deck); id > 0 {
|
||||||
if l == 0 { // runtime.goexit
|
locs = append(locs, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// then, record the cached location.
|
||||||
|
locs = append(locs, l.id)
|
||||||
|
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
locs = append(locs, l)
|
|
||||||
|
frames, symbolizeResult := allFrames(addr)
|
||||||
|
if len(frames) == 0 { // runtime.goexit.
|
||||||
|
if id := b.emitLocation(deck); id > 0 {
|
||||||
|
locs = append(locs, id)
|
||||||
|
}
|
||||||
|
stk = stk[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if added := deck.tryAdd(addr, frames, symbolizeResult); added {
|
||||||
|
stk = stk[1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// add failed because this addr is not inlined with
|
||||||
|
// the existing PCs in the deck. Flush the deck and retry to
|
||||||
|
// handle this pc.
|
||||||
|
if id := b.emitLocation(deck); id > 0 {
|
||||||
|
locs = append(locs, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// check cache again - previous emitLocation added a new entry
|
||||||
|
if l, ok := b.locs[addr]; ok {
|
||||||
|
locs = append(locs, l.id)
|
||||||
|
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||||
|
} else {
|
||||||
|
deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
|
||||||
|
stk = stk[1:]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if id := b.emitLocation(deck); id > 0 { // emit remaining location.
|
||||||
|
locs = append(locs, id)
|
||||||
|
}
|
||||||
|
e.stk[0] -= 1 // undo the adjustment on the leaf done before the loop.
|
||||||
|
|
||||||
b.pbSample(values, locs, labels)
|
b.pbSample(values, locs, labels)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,6 +517,133 @@ func (b *profileBuilder) build() {
|
|||||||
b.zw.Close()
|
b.zw.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pcDeck is a helper to detect a sequence of inlined functions from
|
||||||
|
// a stack trace returned by the runtime.
|
||||||
|
//
|
||||||
|
// The stack traces returned by runtime's trackback functions are fully
|
||||||
|
// expanded (at least for Go functions) and include the fake pcs representing
|
||||||
|
// inlined functions. The profile proto expects the inlined functions to be
|
||||||
|
// encoded in one Location message.
|
||||||
|
// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
|
||||||
|
//
|
||||||
|
// Runtime does not directly expose whether a frame is for an inlined function
|
||||||
|
// and looking up debug info is not ideal, so we use a heuristic to filter
|
||||||
|
// the fake pcs and restore the inlined and entry functions. Inlined functions
|
||||||
|
// have the following properties:
|
||||||
|
// Frame's Func is nil (note: also true for non-Go functions), and
|
||||||
|
// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
|
||||||
|
// Frame's Name does not match its entry function frame's name.
|
||||||
|
//
|
||||||
|
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
|
||||||
|
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
|
||||||
|
// until we observe the entry function frame.
|
||||||
|
type pcDeck struct {
|
||||||
|
pcs []uintptr
|
||||||
|
frames []runtime.Frame
|
||||||
|
symbolizeResult symbolizeFlag
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *pcDeck) reset() {
|
||||||
|
d.pcs = d.pcs[:0]
|
||||||
|
d.frames = d.frames[:0]
|
||||||
|
d.symbolizeResult = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryAdd tries to add the pc and Frames expanded from it (most likely one,
|
||||||
|
// since the stack trace is already fully expanded) and the symbolizeResult
|
||||||
|
// to the deck. If it fails the caller needs to flush the deck and retry.
|
||||||
|
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
|
||||||
|
if existing := len(d.pcs); existing > 0 {
|
||||||
|
// 'frames' are all expanded from one 'pc' and represent all inlined functions
|
||||||
|
// so we check only the first one.
|
||||||
|
newFrame := frames[0]
|
||||||
|
last := d.frames[existing-1]
|
||||||
|
if last.Func != nil && newFrame.Func != nil { // Can't be an inlined frame.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if last.Entry != newFrame.Entry { // newFrame is for a different function.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if last.Function == newFrame.Function { // maybe recursion.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.pcs = append(d.pcs, pc)
|
||||||
|
d.frames = append(d.frames, frames...)
|
||||||
|
d.symbolizeResult |= symbolizeResult
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitLocation emits the new location and function information recorded in the deck
|
||||||
|
// and returns the location ID encoded in the profile protobuf.
|
||||||
|
// It emits to b.pb, so there must be no message encoding in progress.
|
||||||
|
// It resets the deck.
|
||||||
|
func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
|
||||||
|
defer deck.reset()
|
||||||
|
|
||||||
|
if len(deck.pcs) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
addr := deck.pcs[0]
|
||||||
|
firstFrame := deck.frames[0]
|
||||||
|
|
||||||
|
// We can't write out functions while in the middle of the
|
||||||
|
// Location message, so record new functions we encounter and
|
||||||
|
// write them out after the Location.
|
||||||
|
type newFunc struct {
|
||||||
|
id uint64
|
||||||
|
name, file string
|
||||||
|
}
|
||||||
|
newFuncs := make([]newFunc, 0, 8)
|
||||||
|
|
||||||
|
id := uint64(len(b.locs)) + 1
|
||||||
|
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, deck.pcs...)}
|
||||||
|
|
||||||
|
start := b.pb.startMessage()
|
||||||
|
b.pb.uint64Opt(tagLocation_ID, id)
|
||||||
|
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
|
||||||
|
for _, frame := range deck.frames {
|
||||||
|
// Write out each line in frame expansion.
|
||||||
|
funcID := uint64(b.funcs[frame.Function])
|
||||||
|
if funcID == 0 {
|
||||||
|
funcID = uint64(len(b.funcs)) + 1
|
||||||
|
b.funcs[frame.Function] = int(funcID)
|
||||||
|
newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
|
||||||
|
}
|
||||||
|
b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
|
||||||
|
}
|
||||||
|
for i := range b.mem {
|
||||||
|
if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
|
||||||
|
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
|
||||||
|
|
||||||
|
m := b.mem[i]
|
||||||
|
m.funcs |= deck.symbolizeResult
|
||||||
|
b.mem[i] = m
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.pb.endMessage(tagProfile_Location, start)
|
||||||
|
|
||||||
|
// Write out functions we found during frame expansion.
|
||||||
|
for _, fn := range newFuncs {
|
||||||
|
start := b.pb.startMessage()
|
||||||
|
b.pb.uint64Opt(tagFunction_ID, fn.id)
|
||||||
|
b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
|
||||||
|
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
|
||||||
|
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
|
||||||
|
b.pb.endMessage(tagProfile_Function, start)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.flush()
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
// readMapping reads /proc/self/maps and writes mappings to b.pb.
|
// readMapping reads /proc/self/maps and writes mappings to b.pb.
|
||||||
// It saves the address ranges of the mappings in b.mem for use
|
// It saves the address ranges of the mappings in b.mem for use
|
||||||
// when emitting locations.
|
// when emitting locations.
|
||||||
|
@ -358,6 +358,17 @@ func TestMapping(t *testing.T) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if traceback == "Go+C" {
|
||||||
|
// The test code was arranged to have PCs from C and
|
||||||
|
// they are not symbolized.
|
||||||
|
// Check no Location containing those unsymbolized PCs contains multiple lines.
|
||||||
|
for i, loc := range prof.Location {
|
||||||
|
if !symbolized(loc) && len(loc.Line) > 1 {
|
||||||
|
t.Errorf("Location[%d] contains unsymbolized PCs and multiple lines: %v", i, loc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
13
src/runtime/pprof/testdata/mappingtest/main.go
vendored
13
src/runtime/pprof/testdata/mappingtest/main.go
vendored
@ -17,8 +17,7 @@ package main
|
|||||||
int cpuHogCSalt1 = 0;
|
int cpuHogCSalt1 = 0;
|
||||||
int cpuHogCSalt2 = 0;
|
int cpuHogCSalt2 = 0;
|
||||||
|
|
||||||
void CPUHogCFunction() {
|
void CPUHogCFunction0(int foo) {
|
||||||
int foo = cpuHogCSalt1;
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 100000; i++) {
|
for (i = 0; i < 100000; i++) {
|
||||||
if (foo > 0) {
|
if (foo > 0) {
|
||||||
@ -30,6 +29,10 @@ void CPUHogCFunction() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CPUHogCFunction() {
|
||||||
|
CPUHogCFunction0(cpuHogCSalt1);
|
||||||
|
}
|
||||||
|
|
||||||
struct CgoTracebackArg {
|
struct CgoTracebackArg {
|
||||||
uintptr_t context;
|
uintptr_t context;
|
||||||
uintptr_t sigContext;
|
uintptr_t sigContext;
|
||||||
@ -39,8 +42,9 @@ struct CgoTracebackArg {
|
|||||||
|
|
||||||
void CollectCgoTraceback(void* parg) {
|
void CollectCgoTraceback(void* parg) {
|
||||||
struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
|
struct CgoTracebackArg* arg = (struct CgoTracebackArg*)(parg);
|
||||||
arg->buf[0] = (uintptr_t)(CPUHogCFunction);
|
arg->buf[0] = (uintptr_t)(CPUHogCFunction0);
|
||||||
arg->buf[1] = 0;
|
arg->buf[1] = (uintptr_t)(CPUHogCFunction);
|
||||||
|
arg->buf[2] = 0;
|
||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
@ -81,7 +85,6 @@ var salt1 int
|
|||||||
var salt2 int
|
var salt2 int
|
||||||
|
|
||||||
func cpuHogGoFunction() {
|
func cpuHogGoFunction() {
|
||||||
// Generates CPU profile samples including a Go call path.
|
|
||||||
for {
|
for {
|
||||||
foo := salt1
|
foo := salt1
|
||||||
for i := 0; i < 1e5; i++ {
|
for i := 0; i < 1e5; i++ {
|
||||||
|
Loading…
Reference in New Issue
Block a user