diff --git a/src/runtime/extern.go b/src/runtime/extern.go index 833019a7b4..2019be4dde 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -142,6 +142,13 @@ It is a comma-separated list of name=val pairs setting these named variables: When set to 0 memory profiling is disabled. Refer to the description of MemProfileRate for the default value. + profstackdepth: profstackdepth=128 (the default) will set the maximum stack + depth used by all pprof profilers except for the CPU profiler to 128 frames. + Stack traces that exceed this limit will be truncated to the limit starting + from the leaf frame. Setting profstackdepth to any value above 1024 will + silently default to 1024. Future versions of Go may remove this limitation + and extend profstackdepth to apply to the CPU profiler and execution tracer. + pagetrace: setting pagetrace=/path/to/file will write out a trace of page events that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool. Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go index df0f2552af..b51a1ad3ce 100644 --- a/src/runtime/mprof.go +++ b/src/runtime/mprof.go @@ -40,24 +40,6 @@ const ( // size of bucket hash table buckHashSize = 179999 - // maxStack is the max depth of stack to record in bucket. - // Note that it's only used internally as a guard against - // wildly out-of-bounds slicing of the PCs that come after - // a bucket struct, and it could increase in the future. - // The term "1" accounts for the first stack entry being - // taken up by a "skip" sentinel value for profilers which - // defer inline frame expansion until the profile is reported. - // The term "maxSkip" is for frame pointer unwinding, where we - // want to end up with maxLogicalStack frames but will discard - // some "physical" frames to account for skipping. - maxStack = 1 + maxSkip + maxLogicalStack - - // maxLogicalStack is the maximum stack size of a call stack - // to encode in a profile. This counts "logical" frames, which - // includes inlined frames. We may record more than this many - // "physical" frames when using frame pointer unwinding to account - // for deferred handling of skipping frames & inline expansion. - maxLogicalStack = 128 // maxSkip is to account for deferred inline expansion // when using frame pointer unwinding. We record the stack // with "physical" frame pointers but handle skipping "logical" @@ -67,6 +49,11 @@ const ( // This should be at least as large as the largest skip value // used for profiling; otherwise stacks may be truncated inconsistently maxSkip = 5 + + // maxProfStackDepth is the highest valid value for debug.profstackdepth. + // It's used for the bucket.stk func. + // TODO(fg): can we get rid of this? + maxProfStackDepth = 1024 ) type bucketType int @@ -254,10 +241,11 @@ func newBucket(typ bucketType, nstk int) *bucket { return b } -// stk returns the slice in b holding the stack. +// stk returns the slice in b holding the stack. The caller can asssume that the +// backing array is immutable. func (b *bucket) stk() []uintptr { - stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) - if b.nstk > maxStack { + stk := (*[maxProfStackDepth]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) + if b.nstk > maxProfStackDepth { // prove that slicing works; otherwise a failure requires a P throw("bad profile stack count") } @@ -455,7 +443,7 @@ func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) { } // Only use the part of mp.profStack we need and ignore the extra space // reserved for delayed inline expansion with frame pointer unwinding. - nstk := callers(4, mp.profStack[:maxLogicalStack]) + nstk := callers(4, mp.profStack[:debug.profstackdepth]) index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future)) b := stkbucket(memProfile, size, mp.profStack[:nstk], true) @@ -542,12 +530,18 @@ func blocksampled(cycles, rate int64) bool { // skip should be positive if this event is recorded from the current stack // (e.g. when this is not called from a system stack) func saveblockevent(cycles, rate int64, skip int, which bucketType) { + if debug.profstackdepth == 0 { + // profstackdepth is set to 0 by the user, so mp.profStack is nil and we + // can't record a stack trace. + return + } if skip > maxSkip { print("requested skip=", skip) throw("invalid skip value") } gp := getg() mp := acquirem() // we must not be preempted while accessing profstack + nstk := 1 if tracefpunwindoff() || gp.m.hasCgoOnStack() { mp.profStack[0] = logicalStackSentinel @@ -736,6 +730,12 @@ func (prof *mLockProfile) recordUnlock(l *mutex) { } func (prof *mLockProfile) captureStack() { + if debug.profstackdepth == 0 { + // profstackdepth is set to 0 by the user, so mp.profStack is nil and we + // can't record a stack trace. + return + } + skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank if staticLockRanking { // When static lock ranking is enabled, we'll always be on the system @@ -780,7 +780,7 @@ func (prof *mLockProfile) store() { mp := acquirem() prof.disabled = true - nstk := maxStack + nstk := int(debug.profstackdepth) for i := 0; i < nstk; i++ { if pc := prof.stack[i]; pc == 0 { nstk = i diff --git a/src/runtime/proc.go b/src/runtime/proc.go index a9d60faa69..12f26fbb6c 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -818,6 +818,9 @@ func schedinit() { MemProfileRate = 0 } + // mcommoninit runs before parsedebugvars, so init profstacks again. + mProfStackInit(gp.m) + lock(&sched.lock) sched.lastpoll.Store(nanotime()) procs := ncpu @@ -930,6 +933,11 @@ func mcommoninit(mp *m, id int64) { // malloc and runtime locks for mLockProfile. // TODO(mknyszek): Implement lazy allocation if this becomes a problem. func mProfStackInit(mp *m) { + if debug.profstackdepth == 0 { + // debug.profstack is set to 0 by the user, or we're being called from + // schedinit before parsedebugvars. + return + } mp.profStack = makeProfStackFP() mp.mLockProfile.stack = makeProfStackFP() } @@ -944,12 +952,12 @@ func makeProfStackFP() []uintptr { // The "maxSkip" term is for frame pointer unwinding, where we // want to end up with debug.profstackdebth frames but will discard // some "physical" frames to account for skipping. - return make([]uintptr, 1+maxSkip+maxLogicalStack) + return make([]uintptr, 1+maxSkip+debug.profstackdepth) } // makeProfStack returns a buffer large enough to hold a maximum-sized stack // trace. -func makeProfStack() []uintptr { return make([]uintptr, maxLogicalStack) } +func makeProfStack() []uintptr { return make([]uintptr, debug.profstackdepth) } //go:linkname pprof_makeProfStack func pprof_makeProfStack() []uintptr { return makeProfStack() } diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go index c54ba19d07..3bbea46aff 100644 --- a/src/runtime/runtime1.go +++ b/src/runtime/runtime1.go @@ -330,6 +330,7 @@ var debug struct { tracefpunwindoff int32 traceadvanceperiod int32 traceCheckStackOwnership int32 + profstackdepth int32 // debug.malloc is used as a combined debug check // in the malloc function and should be set @@ -379,6 +380,7 @@ var dbgvars = []*dbgVar{ {name: "invalidptr", value: &debug.invalidptr}, {name: "madvdontneed", value: &debug.madvdontneed}, {name: "panicnil", atomic: &debug.panicnil}, + {name: "profstackdepth", value: &debug.profstackdepth, def: 128}, {name: "runtimecontentionstacks", atomic: &debug.runtimeContentionStacks}, {name: "sbrk", value: &debug.sbrk}, {name: "scavtrace", value: &debug.scavtrace}, @@ -434,6 +436,7 @@ func parsedebugvars() { parsegodebug(godebug, nil) debug.malloc = (debug.inittrace | debug.sbrk) != 0 + debug.profstackdepth = min(debug.profstackdepth, maxProfStackDepth) setTraceback(gogetenv("GOTRACEBACK")) traceback_env = traceback_cache diff --git a/src/runtime/tracestack.go b/src/runtime/tracestack.go index 69f6bb974e..225566d102 100644 --- a/src/runtime/tracestack.go +++ b/src/runtime/tracestack.go @@ -277,7 +277,9 @@ func pprof_fpunwindExpand(dst, src []uintptr) int { // sentinel. Physical frames are turned into logical frames via inline unwinding // and by applying the skip value that's stored in pcBuf[0]. func fpunwindExpand(dst, pcBuf []uintptr) int { - if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel { + if len(pcBuf) == 0 { + return 0 + } else if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel { // pcBuf contains logical rather than inlined frames, skip has already been // applied, just return it without the sentinel value in pcBuf[0]. return copy(dst, pcBuf[1:])