1
0
mirror of https://github.com/golang/go synced 2024-11-15 05:20:21 -07:00

runtime: make profstackdepth a GODEBUG option

Allow users to decrease the profiling stack depth back to 32 in case
they experience any problems with the new default of 128.

Users may also use this option to increase the depth up to 1024.

Change-Id: Ieaab2513024915a223239278dd97a6e161dde1cf
Reviewed-on: https://go-review.googlesource.com/c/go/+/581917
Reviewed-by: Austin Clements <austin@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Felix Geisendörfer 2024-04-27 13:41:05 +02:00 committed by Austin Clements
parent 1b9dc3e178
commit 66cc2b7ca7
5 changed files with 46 additions and 26 deletions

View File

@ -142,6 +142,13 @@ It is a comma-separated list of name=val pairs setting these named variables:
When set to 0 memory profiling is disabled. Refer to the description of When set to 0 memory profiling is disabled. Refer to the description of
MemProfileRate for the default value. MemProfileRate for the default value.
profstackdepth: profstackdepth=128 (the default) will set the maximum stack
depth used by all pprof profilers except for the CPU profiler to 128 frames.
Stack traces that exceed this limit will be truncated to the limit starting
from the leaf frame. Setting profstackdepth to any value above 1024 will
silently default to 1024. Future versions of Go may remove this limitation
and extend profstackdepth to apply to the CPU profiler and execution tracer.
pagetrace: setting pagetrace=/path/to/file will write out a trace of page events pagetrace: setting pagetrace=/path/to/file will write out a trace of page events
that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool. that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool.
Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not

View File

@ -40,24 +40,6 @@ const (
// size of bucket hash table // size of bucket hash table
buckHashSize = 179999 buckHashSize = 179999
// maxStack is the max depth of stack to record in bucket.
// Note that it's only used internally as a guard against
// wildly out-of-bounds slicing of the PCs that come after
// a bucket struct, and it could increase in the future.
// The term "1" accounts for the first stack entry being
// taken up by a "skip" sentinel value for profilers which
// defer inline frame expansion until the profile is reported.
// The term "maxSkip" is for frame pointer unwinding, where we
// want to end up with maxLogicalStack frames but will discard
// some "physical" frames to account for skipping.
maxStack = 1 + maxSkip + maxLogicalStack
// maxLogicalStack is the maximum stack size of a call stack
// to encode in a profile. This counts "logical" frames, which
// includes inlined frames. We may record more than this many
// "physical" frames when using frame pointer unwinding to account
// for deferred handling of skipping frames & inline expansion.
maxLogicalStack = 128
// maxSkip is to account for deferred inline expansion // maxSkip is to account for deferred inline expansion
// when using frame pointer unwinding. We record the stack // when using frame pointer unwinding. We record the stack
// with "physical" frame pointers but handle skipping "logical" // with "physical" frame pointers but handle skipping "logical"
@ -67,6 +49,11 @@ const (
// This should be at least as large as the largest skip value // This should be at least as large as the largest skip value
// used for profiling; otherwise stacks may be truncated inconsistently // used for profiling; otherwise stacks may be truncated inconsistently
maxSkip = 5 maxSkip = 5
// maxProfStackDepth is the highest valid value for debug.profstackdepth.
// It's used for the bucket.stk func.
// TODO(fg): can we get rid of this?
maxProfStackDepth = 1024
) )
type bucketType int type bucketType int
@ -254,10 +241,11 @@ func newBucket(typ bucketType, nstk int) *bucket {
return b return b
} }
// stk returns the slice in b holding the stack. // stk returns the slice in b holding the stack. The caller can asssume that the
// backing array is immutable.
func (b *bucket) stk() []uintptr { func (b *bucket) stk() []uintptr {
stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) stk := (*[maxProfStackDepth]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
if b.nstk > maxStack { if b.nstk > maxProfStackDepth {
// prove that slicing works; otherwise a failure requires a P // prove that slicing works; otherwise a failure requires a P
throw("bad profile stack count") throw("bad profile stack count")
} }
@ -455,7 +443,7 @@ func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
} }
// Only use the part of mp.profStack we need and ignore the extra space // Only use the part of mp.profStack we need and ignore the extra space
// reserved for delayed inline expansion with frame pointer unwinding. // reserved for delayed inline expansion with frame pointer unwinding.
nstk := callers(4, mp.profStack[:maxLogicalStack]) nstk := callers(4, mp.profStack[:debug.profstackdepth])
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
b := stkbucket(memProfile, size, mp.profStack[:nstk], true) b := stkbucket(memProfile, size, mp.profStack[:nstk], true)
@ -542,12 +530,18 @@ func blocksampled(cycles, rate int64) bool {
// skip should be positive if this event is recorded from the current stack // skip should be positive if this event is recorded from the current stack
// (e.g. when this is not called from a system stack) // (e.g. when this is not called from a system stack)
func saveblockevent(cycles, rate int64, skip int, which bucketType) { func saveblockevent(cycles, rate int64, skip int, which bucketType) {
if debug.profstackdepth == 0 {
// profstackdepth is set to 0 by the user, so mp.profStack is nil and we
// can't record a stack trace.
return
}
if skip > maxSkip { if skip > maxSkip {
print("requested skip=", skip) print("requested skip=", skip)
throw("invalid skip value") throw("invalid skip value")
} }
gp := getg() gp := getg()
mp := acquirem() // we must not be preempted while accessing profstack mp := acquirem() // we must not be preempted while accessing profstack
nstk := 1 nstk := 1
if tracefpunwindoff() || gp.m.hasCgoOnStack() { if tracefpunwindoff() || gp.m.hasCgoOnStack() {
mp.profStack[0] = logicalStackSentinel mp.profStack[0] = logicalStackSentinel
@ -736,6 +730,12 @@ func (prof *mLockProfile) recordUnlock(l *mutex) {
} }
func (prof *mLockProfile) captureStack() { func (prof *mLockProfile) captureStack() {
if debug.profstackdepth == 0 {
// profstackdepth is set to 0 by the user, so mp.profStack is nil and we
// can't record a stack trace.
return
}
skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank
if staticLockRanking { if staticLockRanking {
// When static lock ranking is enabled, we'll always be on the system // When static lock ranking is enabled, we'll always be on the system
@ -780,7 +780,7 @@ func (prof *mLockProfile) store() {
mp := acquirem() mp := acquirem()
prof.disabled = true prof.disabled = true
nstk := maxStack nstk := int(debug.profstackdepth)
for i := 0; i < nstk; i++ { for i := 0; i < nstk; i++ {
if pc := prof.stack[i]; pc == 0 { if pc := prof.stack[i]; pc == 0 {
nstk = i nstk = i

View File

@ -818,6 +818,9 @@ func schedinit() {
MemProfileRate = 0 MemProfileRate = 0
} }
// mcommoninit runs before parsedebugvars, so init profstacks again.
mProfStackInit(gp.m)
lock(&sched.lock) lock(&sched.lock)
sched.lastpoll.Store(nanotime()) sched.lastpoll.Store(nanotime())
procs := ncpu procs := ncpu
@ -930,6 +933,11 @@ func mcommoninit(mp *m, id int64) {
// malloc and runtime locks for mLockProfile. // malloc and runtime locks for mLockProfile.
// TODO(mknyszek): Implement lazy allocation if this becomes a problem. // TODO(mknyszek): Implement lazy allocation if this becomes a problem.
func mProfStackInit(mp *m) { func mProfStackInit(mp *m) {
if debug.profstackdepth == 0 {
// debug.profstack is set to 0 by the user, or we're being called from
// schedinit before parsedebugvars.
return
}
mp.profStack = makeProfStackFP() mp.profStack = makeProfStackFP()
mp.mLockProfile.stack = makeProfStackFP() mp.mLockProfile.stack = makeProfStackFP()
} }
@ -944,12 +952,12 @@ func makeProfStackFP() []uintptr {
// The "maxSkip" term is for frame pointer unwinding, where we // The "maxSkip" term is for frame pointer unwinding, where we
// want to end up with debug.profstackdebth frames but will discard // want to end up with debug.profstackdebth frames but will discard
// some "physical" frames to account for skipping. // some "physical" frames to account for skipping.
return make([]uintptr, 1+maxSkip+maxLogicalStack) return make([]uintptr, 1+maxSkip+debug.profstackdepth)
} }
// makeProfStack returns a buffer large enough to hold a maximum-sized stack // makeProfStack returns a buffer large enough to hold a maximum-sized stack
// trace. // trace.
func makeProfStack() []uintptr { return make([]uintptr, maxLogicalStack) } func makeProfStack() []uintptr { return make([]uintptr, debug.profstackdepth) }
//go:linkname pprof_makeProfStack //go:linkname pprof_makeProfStack
func pprof_makeProfStack() []uintptr { return makeProfStack() } func pprof_makeProfStack() []uintptr { return makeProfStack() }

View File

@ -330,6 +330,7 @@ var debug struct {
tracefpunwindoff int32 tracefpunwindoff int32
traceadvanceperiod int32 traceadvanceperiod int32
traceCheckStackOwnership int32 traceCheckStackOwnership int32
profstackdepth int32
// debug.malloc is used as a combined debug check // debug.malloc is used as a combined debug check
// in the malloc function and should be set // in the malloc function and should be set
@ -379,6 +380,7 @@ var dbgvars = []*dbgVar{
{name: "invalidptr", value: &debug.invalidptr}, {name: "invalidptr", value: &debug.invalidptr},
{name: "madvdontneed", value: &debug.madvdontneed}, {name: "madvdontneed", value: &debug.madvdontneed},
{name: "panicnil", atomic: &debug.panicnil}, {name: "panicnil", atomic: &debug.panicnil},
{name: "profstackdepth", value: &debug.profstackdepth, def: 128},
{name: "runtimecontentionstacks", atomic: &debug.runtimeContentionStacks}, {name: "runtimecontentionstacks", atomic: &debug.runtimeContentionStacks},
{name: "sbrk", value: &debug.sbrk}, {name: "sbrk", value: &debug.sbrk},
{name: "scavtrace", value: &debug.scavtrace}, {name: "scavtrace", value: &debug.scavtrace},
@ -434,6 +436,7 @@ func parsedebugvars() {
parsegodebug(godebug, nil) parsegodebug(godebug, nil)
debug.malloc = (debug.inittrace | debug.sbrk) != 0 debug.malloc = (debug.inittrace | debug.sbrk) != 0
debug.profstackdepth = min(debug.profstackdepth, maxProfStackDepth)
setTraceback(gogetenv("GOTRACEBACK")) setTraceback(gogetenv("GOTRACEBACK"))
traceback_env = traceback_cache traceback_env = traceback_cache

View File

@ -277,7 +277,9 @@ func pprof_fpunwindExpand(dst, src []uintptr) int {
// sentinel. Physical frames are turned into logical frames via inline unwinding // sentinel. Physical frames are turned into logical frames via inline unwinding
// and by applying the skip value that's stored in pcBuf[0]. // and by applying the skip value that's stored in pcBuf[0].
func fpunwindExpand(dst, pcBuf []uintptr) int { func fpunwindExpand(dst, pcBuf []uintptr) int {
if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel { if len(pcBuf) == 0 {
return 0
} else if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel {
// pcBuf contains logical rather than inlined frames, skip has already been // pcBuf contains logical rather than inlined frames, skip has already been
// applied, just return it without the sentinel value in pcBuf[0]. // applied, just return it without the sentinel value in pcBuf[0].
return copy(dst, pcBuf[1:]) return copy(dst, pcBuf[1:])