mirror of
https://github.com/golang/go
synced 2024-11-06 22:56:12 -07:00
runtime/pprof: correct inlined function location encoding for non-CPU profiles
Change-Id: Id270a3477bf1a581755c4311eb12f990aa2260b5 Reviewed-on: https://go-review.googlesource.com/c/go/+/205097 Run-TryBot: Hyang-Ah Hana Kim <hyangah@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
f5e89c2214
commit
e25de44ef2
@ -2,6 +2,8 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !js
|
||||
|
||||
package pprof
|
||||
|
||||
import (
|
||||
@ -10,6 +12,7 @@ import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"runtime/pprof/internal/profile"
|
||||
"testing"
|
||||
"unsafe"
|
||||
)
|
||||
@ -27,6 +30,10 @@ func allocateTransient2M() {
|
||||
memSink = make([]byte, 2<<20)
|
||||
}
|
||||
|
||||
func allocateTransient2MInline() {
|
||||
memSink = make([]byte, 2<<20)
|
||||
}
|
||||
|
||||
type Obj32 struct {
|
||||
link *Obj32
|
||||
pad [32 - unsafe.Sizeof(uintptr(0))]byte
|
||||
@ -71,42 +78,93 @@ func TestMemoryProfiler(t *testing.T) {
|
||||
// Do the interesting allocations.
|
||||
allocateTransient1M()
|
||||
allocateTransient2M()
|
||||
allocateTransient2MInline()
|
||||
allocatePersistent1K()
|
||||
allocateReflect()
|
||||
memSink = nil
|
||||
|
||||
runtime.GC() // materialize stats
|
||||
|
||||
memoryProfilerRun++
|
||||
|
||||
tests := []struct {
|
||||
stk []string
|
||||
legacy string
|
||||
}{{
|
||||
stk: []string{"runtime/pprof.allocatePersistent1K", "runtime/pprof.TestMemoryProfiler"},
|
||||
legacy: fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:47
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:82
|
||||
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
|
||||
}, {
|
||||
stk: []string{"runtime/pprof.allocateTransient1M", "runtime/pprof.TestMemoryProfiler"},
|
||||
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:24
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:79
|
||||
`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
|
||||
}, {
|
||||
stk: []string{"runtime/pprof.allocateTransient2M", "runtime/pprof.TestMemoryProfiler"},
|
||||
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:30
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:80
|
||||
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
|
||||
}, {
|
||||
stk: []string{"runtime/pprof.allocateTransient2MInline", "runtime/pprof.TestMemoryProfiler"},
|
||||
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2MInline\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:34
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:81
|
||||
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
|
||||
}, {
|
||||
stk: []string{"runtime/pprof.allocateReflectTransient"},
|
||||
legacy: fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:55
|
||||
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
|
||||
}}
|
||||
|
||||
t.Run("debug=1", func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
if err := Lookup("heap").WriteTo(&buf, 1); err != nil {
|
||||
t.Fatalf("failed to write heap profile: %v", err)
|
||||
}
|
||||
|
||||
memoryProfilerRun++
|
||||
|
||||
tests := []string{
|
||||
fmt.Sprintf(`%v: %v \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocatePersistent1K\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:40
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test\.go:74
|
||||
`, 32*memoryProfilerRun, 1024*memoryProfilerRun, 32*memoryProfilerRun, 1024*memoryProfilerRun),
|
||||
|
||||
fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient1M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:21
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:72
|
||||
`, (1<<10)*memoryProfilerRun, (1<<20)*memoryProfilerRun),
|
||||
|
||||
fmt.Sprintf(`0: 0 \[%v: %v\] @ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+ 0x[0-9,a-f]+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateTransient2M\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:27
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.TestMemoryProfiler\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:73
|
||||
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
|
||||
|
||||
fmt.Sprintf(`0: 0 \[%v: %v\] @( 0x[0-9,a-f]+)+
|
||||
# 0x[0-9,a-f]+ runtime/pprof\.allocateReflectTransient\+0x[0-9,a-f]+ .*/runtime/pprof/mprof_test.go:48
|
||||
`, memoryProfilerRun, (2<<20)*memoryProfilerRun),
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
if !regexp.MustCompile(test).Match(buf.Bytes()) {
|
||||
t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test, buf.String())
|
||||
if !regexp.MustCompile(test.legacy).Match(buf.Bytes()) {
|
||||
t.Fatalf("The entry did not match:\n%v\n\nProfile:\n%v\n", test.legacy, buf.String())
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("proto", func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
if err := Lookup("heap").WriteTo(&buf, 0); err != nil {
|
||||
t.Fatalf("failed to write heap profile: %v", err)
|
||||
}
|
||||
p, err := profile.Parse(&buf)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse heap profile: %v", err)
|
||||
}
|
||||
t.Logf("Profile = %v", p)
|
||||
|
||||
stks := stacks(p)
|
||||
for _, test := range tests {
|
||||
if !containsStack(stks, test.stk) {
|
||||
t.Fatalf("No matching stack entry for %q\n\nProfile:\n%v\n", test.stk, p)
|
||||
}
|
||||
}
|
||||
// Check the inlined function location is encoded correctly.
|
||||
for _, loc := range p.Location {
|
||||
inlinedCaller, inlinedCallee := false, false
|
||||
for _, line := range loc.Line {
|
||||
if line.Function.Name == "runtime/pprof.allocateTransient2MInline" {
|
||||
inlinedCallee = true
|
||||
}
|
||||
if inlinedCallee && line.Function.Name == "runtime/pprof.TestMemoryProfiler" {
|
||||
inlinedCaller = true
|
||||
}
|
||||
}
|
||||
if inlinedCallee != inlinedCaller {
|
||||
t.Errorf("want allocateTransient2MInline after TestMemoryProfiler in one location, got separate location entries:\n%v", loc)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -386,16 +386,9 @@ func printCountCycleProfile(w io.Writer, countName, cycleName string, scaler fun
|
||||
count, nanosec := scaler(r.Count, float64(r.Cycles)/cpuGHz)
|
||||
values[0] = count
|
||||
values[1] = int64(nanosec)
|
||||
locs = locs[:0]
|
||||
for _, addr := range r.Stack() {
|
||||
// For count profiles, all stack addresses are
|
||||
// return PCs, which is what locForPC expects.
|
||||
l := b.locForPC(addr)
|
||||
if l == 0 { // runtime.goexit
|
||||
continue
|
||||
}
|
||||
locs = append(locs, l)
|
||||
}
|
||||
// return PCs, which is what appendLocsForStack expects.
|
||||
locs = b.appendLocsForStack(locs[:0], r.Stack())
|
||||
b.pbSample(values, locs, nil)
|
||||
}
|
||||
b.build()
|
||||
@ -451,16 +444,9 @@ func printCountProfile(w io.Writer, debug int, name string, p countProfile) erro
|
||||
var locs []uint64
|
||||
for _, k := range keys {
|
||||
values[0] = int64(count[k])
|
||||
locs = locs[:0]
|
||||
for _, addr := range p.Stack(index[k]) {
|
||||
// For count profiles, all stack addresses are
|
||||
// return PCs, which is what locForPC expects.
|
||||
l := b.locForPC(addr)
|
||||
if l == 0 { // runtime.goexit
|
||||
continue
|
||||
}
|
||||
locs = append(locs, l)
|
||||
}
|
||||
// return PCs, which is what appendLocsForStack expects.
|
||||
locs = b.appendLocsForStack(locs[:0], p.Stack(index[k]))
|
||||
b.pbSample(values, locs, nil)
|
||||
}
|
||||
b.build()
|
||||
|
@ -44,6 +44,7 @@ type profileBuilder struct {
|
||||
locs map[uintptr]locInfo // list of locInfo starting with the given PC.
|
||||
funcs map[string]int // Package path-qualified function name to Function.ID
|
||||
mem []memMap
|
||||
deck pcDeck
|
||||
}
|
||||
|
||||
type memMap struct {
|
||||
@ -428,8 +429,6 @@ func (b *profileBuilder) build() {
|
||||
}
|
||||
|
||||
values := []int64{0, 0}
|
||||
|
||||
var deck = &pcDeck{}
|
||||
var locs []uint64
|
||||
|
||||
for e := b.m.all; e != nil; e = e.nextAll {
|
||||
@ -445,61 +444,13 @@ func (b *profileBuilder) build() {
|
||||
}
|
||||
}
|
||||
|
||||
deck.reset()
|
||||
locs = locs[:0]
|
||||
|
||||
// Addresses from stack traces point to the next instruction after each call,
|
||||
// except for the leaf, which points to where the signal occurred.
|
||||
// deck.add+emitLocation expects return PCs so increment the leaf address to
|
||||
// appendLocsForStack expects return PCs so increment the leaf address to
|
||||
// look like a return PC.
|
||||
e.stk[0] += 1
|
||||
for stk := e.stk; len(stk) > 0; {
|
||||
addr := stk[0]
|
||||
if l, ok := b.locs[addr]; ok {
|
||||
// first record the location if there is any pending accumulated info.
|
||||
if id := b.emitLocation(deck); id > 0 {
|
||||
locs = append(locs, id)
|
||||
}
|
||||
|
||||
// then, record the cached location.
|
||||
locs = append(locs, l.id)
|
||||
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||
continue
|
||||
}
|
||||
|
||||
frames, symbolizeResult := allFrames(addr)
|
||||
if len(frames) == 0 { // runtime.goexit.
|
||||
if id := b.emitLocation(deck); id > 0 {
|
||||
locs = append(locs, id)
|
||||
}
|
||||
stk = stk[1:]
|
||||
continue
|
||||
}
|
||||
|
||||
if added := deck.tryAdd(addr, frames, symbolizeResult); added {
|
||||
stk = stk[1:]
|
||||
continue
|
||||
}
|
||||
// add failed because this addr is not inlined with
|
||||
// the existing PCs in the deck. Flush the deck and retry to
|
||||
// handle this pc.
|
||||
if id := b.emitLocation(deck); id > 0 {
|
||||
locs = append(locs, id)
|
||||
}
|
||||
|
||||
// check cache again - previous emitLocation added a new entry
|
||||
if l, ok := b.locs[addr]; ok {
|
||||
locs = append(locs, l.id)
|
||||
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||
} else {
|
||||
deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
|
||||
stk = stk[1:]
|
||||
}
|
||||
}
|
||||
if id := b.emitLocation(deck); id > 0 { // emit remaining location.
|
||||
locs = append(locs, id)
|
||||
}
|
||||
e.stk[0] -= 1 // undo the adjustment on the leaf done before the loop.
|
||||
locs = b.appendLocsForStack(locs[:0], e.stk)
|
||||
e.stk[0] -= 1 // undo the adjustment on the leaf.
|
||||
|
||||
b.pbSample(values, locs, labels)
|
||||
}
|
||||
@ -517,6 +468,62 @@ func (b *profileBuilder) build() {
|
||||
b.zw.Close()
|
||||
}
|
||||
|
||||
// appendLocsForStack appends the location IDs for the given stack trace to the given
|
||||
// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
|
||||
// an inline marker as the runtime traceback function returns.
|
||||
//
|
||||
// It may emit to b.pb, so there must be no message encoding in progress.
|
||||
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
|
||||
b.deck.reset()
|
||||
for len(stk) > 0 {
|
||||
addr := stk[0]
|
||||
if l, ok := b.locs[addr]; ok {
|
||||
// first record the location if there is any pending accumulated info.
|
||||
if id := b.emitLocation(); id > 0 {
|
||||
locs = append(locs, id)
|
||||
}
|
||||
|
||||
// then, record the cached location.
|
||||
locs = append(locs, l.id)
|
||||
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||
continue
|
||||
}
|
||||
|
||||
frames, symbolizeResult := allFrames(addr)
|
||||
if len(frames) == 0 { // runtime.goexit.
|
||||
if id := b.emitLocation(); id > 0 {
|
||||
locs = append(locs, id)
|
||||
}
|
||||
stk = stk[1:]
|
||||
continue
|
||||
}
|
||||
|
||||
if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
|
||||
stk = stk[1:]
|
||||
continue
|
||||
}
|
||||
// add failed because this addr is not inlined with
|
||||
// the existing PCs in the deck. Flush the deck and retry to
|
||||
// handle this pc.
|
||||
if id := b.emitLocation(); id > 0 {
|
||||
locs = append(locs, id)
|
||||
}
|
||||
|
||||
// check cache again - previous emitLocation added a new entry
|
||||
if l, ok := b.locs[addr]; ok {
|
||||
locs = append(locs, l.id)
|
||||
stk = stk[len(l.pcs):] // skip the matching pcs.
|
||||
} else {
|
||||
b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
|
||||
stk = stk[1:]
|
||||
}
|
||||
}
|
||||
if id := b.emitLocation(); id > 0 { // emit remaining location.
|
||||
locs = append(locs, id)
|
||||
}
|
||||
return locs
|
||||
}
|
||||
|
||||
// pcDeck is a helper to detect a sequence of inlined functions from
|
||||
// a stack trace returned by the runtime.
|
||||
//
|
||||
@ -583,15 +590,14 @@ func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symb
|
||||
// and returns the location ID encoded in the profile protobuf.
|
||||
// It emits to b.pb, so there must be no message encoding in progress.
|
||||
// It resets the deck.
|
||||
func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
|
||||
defer deck.reset()
|
||||
|
||||
if len(deck.pcs) == 0 {
|
||||
func (b *profileBuilder) emitLocation() uint64 {
|
||||
if len(b.deck.pcs) == 0 {
|
||||
return 0
|
||||
}
|
||||
defer b.deck.reset()
|
||||
|
||||
addr := deck.pcs[0]
|
||||
firstFrame := deck.frames[0]
|
||||
addr := b.deck.pcs[0]
|
||||
firstFrame := b.deck.frames[0]
|
||||
|
||||
// We can't write out functions while in the middle of the
|
||||
// Location message, so record new functions we encounter and
|
||||
@ -603,12 +609,12 @@ func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
|
||||
newFuncs := make([]newFunc, 0, 8)
|
||||
|
||||
id := uint64(len(b.locs)) + 1
|
||||
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, deck.pcs...)}
|
||||
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
|
||||
|
||||
start := b.pb.startMessage()
|
||||
b.pb.uint64Opt(tagLocation_ID, id)
|
||||
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
|
||||
for _, frame := range deck.frames {
|
||||
for _, frame := range b.deck.frames {
|
||||
// Write out each line in frame expansion.
|
||||
funcID := uint64(b.funcs[frame.Function])
|
||||
if funcID == 0 {
|
||||
@ -623,7 +629,7 @@ func (b *profileBuilder) emitLocation(deck *pcDeck) uint64 {
|
||||
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
|
||||
|
||||
m := b.mem[i]
|
||||
m.funcs |= deck.symbolizeResult
|
||||
m.funcs |= b.deck.symbolizeResult
|
||||
b.mem[i] = m
|
||||
break
|
||||
}
|
||||
|
@ -27,30 +27,27 @@ func writeHeapProto(w io.Writer, p []runtime.MemProfileRecord, rate int64, defau
|
||||
values := []int64{0, 0, 0, 0}
|
||||
var locs []uint64
|
||||
for _, r := range p {
|
||||
locs = locs[:0]
|
||||
hideRuntime := true
|
||||
for tries := 0; tries < 2; tries++ {
|
||||
for _, addr := range r.Stack() {
|
||||
stk := r.Stack()
|
||||
// For heap profiles, all stack
|
||||
// addresses are return PCs, which is
|
||||
// what locForPC expects.
|
||||
// what appendLocsForStack expects.
|
||||
if hideRuntime {
|
||||
for i, addr := range stk {
|
||||
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
|
||||
continue
|
||||
}
|
||||
// Found non-runtime. Show any runtime uses above it.
|
||||
hideRuntime = false
|
||||
stk = stk[i:]
|
||||
break
|
||||
}
|
||||
l := b.locForPC(addr)
|
||||
if l == 0 { // runtime.goexit
|
||||
continue
|
||||
}
|
||||
locs = append(locs, l)
|
||||
}
|
||||
locs = b.appendLocsForStack(locs[:0], stk)
|
||||
if len(locs) > 0 {
|
||||
break
|
||||
}
|
||||
hideRuntime = false // try again, and show all frames
|
||||
hideRuntime = false // try again, and show all frames next time.
|
||||
}
|
||||
|
||||
values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
|
||||
|
Loading…
Reference in New Issue
Block a user