mirror of
https://github.com/golang/go
synced 2024-11-15 05:20:21 -07:00
runtime: make profstackdepth a GODEBUG option
Allow users to decrease the profiling stack depth back to 32 in case they experience any problems with the new default of 128. Users may also use this option to increase the depth up to 1024. Change-Id: Ieaab2513024915a223239278dd97a6e161dde1cf Reviewed-on: https://go-review.googlesource.com/c/go/+/581917 Reviewed-by: Austin Clements <austin@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
parent
1b9dc3e178
commit
66cc2b7ca7
@ -142,6 +142,13 @@ It is a comma-separated list of name=val pairs setting these named variables:
|
|||||||
When set to 0 memory profiling is disabled. Refer to the description of
|
When set to 0 memory profiling is disabled. Refer to the description of
|
||||||
MemProfileRate for the default value.
|
MemProfileRate for the default value.
|
||||||
|
|
||||||
|
profstackdepth: profstackdepth=128 (the default) will set the maximum stack
|
||||||
|
depth used by all pprof profilers except for the CPU profiler to 128 frames.
|
||||||
|
Stack traces that exceed this limit will be truncated to the limit starting
|
||||||
|
from the leaf frame. Setting profstackdepth to any value above 1024 will
|
||||||
|
silently default to 1024. Future versions of Go may remove this limitation
|
||||||
|
and extend profstackdepth to apply to the CPU profiler and execution tracer.
|
||||||
|
|
||||||
pagetrace: setting pagetrace=/path/to/file will write out a trace of page events
|
pagetrace: setting pagetrace=/path/to/file will write out a trace of page events
|
||||||
that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool.
|
that can be viewed, analyzed, and visualized using the x/debug/cmd/pagetrace tool.
|
||||||
Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not
|
Build your program with GOEXPERIMENT=pagetrace to enable this functionality. Do not
|
||||||
|
@ -40,24 +40,6 @@ const (
|
|||||||
// size of bucket hash table
|
// size of bucket hash table
|
||||||
buckHashSize = 179999
|
buckHashSize = 179999
|
||||||
|
|
||||||
// maxStack is the max depth of stack to record in bucket.
|
|
||||||
// Note that it's only used internally as a guard against
|
|
||||||
// wildly out-of-bounds slicing of the PCs that come after
|
|
||||||
// a bucket struct, and it could increase in the future.
|
|
||||||
// The term "1" accounts for the first stack entry being
|
|
||||||
// taken up by a "skip" sentinel value for profilers which
|
|
||||||
// defer inline frame expansion until the profile is reported.
|
|
||||||
// The term "maxSkip" is for frame pointer unwinding, where we
|
|
||||||
// want to end up with maxLogicalStack frames but will discard
|
|
||||||
// some "physical" frames to account for skipping.
|
|
||||||
maxStack = 1 + maxSkip + maxLogicalStack
|
|
||||||
|
|
||||||
// maxLogicalStack is the maximum stack size of a call stack
|
|
||||||
// to encode in a profile. This counts "logical" frames, which
|
|
||||||
// includes inlined frames. We may record more than this many
|
|
||||||
// "physical" frames when using frame pointer unwinding to account
|
|
||||||
// for deferred handling of skipping frames & inline expansion.
|
|
||||||
maxLogicalStack = 128
|
|
||||||
// maxSkip is to account for deferred inline expansion
|
// maxSkip is to account for deferred inline expansion
|
||||||
// when using frame pointer unwinding. We record the stack
|
// when using frame pointer unwinding. We record the stack
|
||||||
// with "physical" frame pointers but handle skipping "logical"
|
// with "physical" frame pointers but handle skipping "logical"
|
||||||
@ -67,6 +49,11 @@ const (
|
|||||||
// This should be at least as large as the largest skip value
|
// This should be at least as large as the largest skip value
|
||||||
// used for profiling; otherwise stacks may be truncated inconsistently
|
// used for profiling; otherwise stacks may be truncated inconsistently
|
||||||
maxSkip = 5
|
maxSkip = 5
|
||||||
|
|
||||||
|
// maxProfStackDepth is the highest valid value for debug.profstackdepth.
|
||||||
|
// It's used for the bucket.stk func.
|
||||||
|
// TODO(fg): can we get rid of this?
|
||||||
|
maxProfStackDepth = 1024
|
||||||
)
|
)
|
||||||
|
|
||||||
type bucketType int
|
type bucketType int
|
||||||
@ -254,10 +241,11 @@ func newBucket(typ bucketType, nstk int) *bucket {
|
|||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
// stk returns the slice in b holding the stack.
|
// stk returns the slice in b holding the stack. The caller can asssume that the
|
||||||
|
// backing array is immutable.
|
||||||
func (b *bucket) stk() []uintptr {
|
func (b *bucket) stk() []uintptr {
|
||||||
stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
|
stk := (*[maxProfStackDepth]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b)))
|
||||||
if b.nstk > maxStack {
|
if b.nstk > maxProfStackDepth {
|
||||||
// prove that slicing works; otherwise a failure requires a P
|
// prove that slicing works; otherwise a failure requires a P
|
||||||
throw("bad profile stack count")
|
throw("bad profile stack count")
|
||||||
}
|
}
|
||||||
@ -455,7 +443,7 @@ func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
|
|||||||
}
|
}
|
||||||
// Only use the part of mp.profStack we need and ignore the extra space
|
// Only use the part of mp.profStack we need and ignore the extra space
|
||||||
// reserved for delayed inline expansion with frame pointer unwinding.
|
// reserved for delayed inline expansion with frame pointer unwinding.
|
||||||
nstk := callers(4, mp.profStack[:maxLogicalStack])
|
nstk := callers(4, mp.profStack[:debug.profstackdepth])
|
||||||
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
|
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
|
||||||
|
|
||||||
b := stkbucket(memProfile, size, mp.profStack[:nstk], true)
|
b := stkbucket(memProfile, size, mp.profStack[:nstk], true)
|
||||||
@ -542,12 +530,18 @@ func blocksampled(cycles, rate int64) bool {
|
|||||||
// skip should be positive if this event is recorded from the current stack
|
// skip should be positive if this event is recorded from the current stack
|
||||||
// (e.g. when this is not called from a system stack)
|
// (e.g. when this is not called from a system stack)
|
||||||
func saveblockevent(cycles, rate int64, skip int, which bucketType) {
|
func saveblockevent(cycles, rate int64, skip int, which bucketType) {
|
||||||
|
if debug.profstackdepth == 0 {
|
||||||
|
// profstackdepth is set to 0 by the user, so mp.profStack is nil and we
|
||||||
|
// can't record a stack trace.
|
||||||
|
return
|
||||||
|
}
|
||||||
if skip > maxSkip {
|
if skip > maxSkip {
|
||||||
print("requested skip=", skip)
|
print("requested skip=", skip)
|
||||||
throw("invalid skip value")
|
throw("invalid skip value")
|
||||||
}
|
}
|
||||||
gp := getg()
|
gp := getg()
|
||||||
mp := acquirem() // we must not be preempted while accessing profstack
|
mp := acquirem() // we must not be preempted while accessing profstack
|
||||||
|
|
||||||
nstk := 1
|
nstk := 1
|
||||||
if tracefpunwindoff() || gp.m.hasCgoOnStack() {
|
if tracefpunwindoff() || gp.m.hasCgoOnStack() {
|
||||||
mp.profStack[0] = logicalStackSentinel
|
mp.profStack[0] = logicalStackSentinel
|
||||||
@ -736,6 +730,12 @@ func (prof *mLockProfile) recordUnlock(l *mutex) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (prof *mLockProfile) captureStack() {
|
func (prof *mLockProfile) captureStack() {
|
||||||
|
if debug.profstackdepth == 0 {
|
||||||
|
// profstackdepth is set to 0 by the user, so mp.profStack is nil and we
|
||||||
|
// can't record a stack trace.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank
|
skip := 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank
|
||||||
if staticLockRanking {
|
if staticLockRanking {
|
||||||
// When static lock ranking is enabled, we'll always be on the system
|
// When static lock ranking is enabled, we'll always be on the system
|
||||||
@ -780,7 +780,7 @@ func (prof *mLockProfile) store() {
|
|||||||
mp := acquirem()
|
mp := acquirem()
|
||||||
prof.disabled = true
|
prof.disabled = true
|
||||||
|
|
||||||
nstk := maxStack
|
nstk := int(debug.profstackdepth)
|
||||||
for i := 0; i < nstk; i++ {
|
for i := 0; i < nstk; i++ {
|
||||||
if pc := prof.stack[i]; pc == 0 {
|
if pc := prof.stack[i]; pc == 0 {
|
||||||
nstk = i
|
nstk = i
|
||||||
|
@ -818,6 +818,9 @@ func schedinit() {
|
|||||||
MemProfileRate = 0
|
MemProfileRate = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mcommoninit runs before parsedebugvars, so init profstacks again.
|
||||||
|
mProfStackInit(gp.m)
|
||||||
|
|
||||||
lock(&sched.lock)
|
lock(&sched.lock)
|
||||||
sched.lastpoll.Store(nanotime())
|
sched.lastpoll.Store(nanotime())
|
||||||
procs := ncpu
|
procs := ncpu
|
||||||
@ -930,6 +933,11 @@ func mcommoninit(mp *m, id int64) {
|
|||||||
// malloc and runtime locks for mLockProfile.
|
// malloc and runtime locks for mLockProfile.
|
||||||
// TODO(mknyszek): Implement lazy allocation if this becomes a problem.
|
// TODO(mknyszek): Implement lazy allocation if this becomes a problem.
|
||||||
func mProfStackInit(mp *m) {
|
func mProfStackInit(mp *m) {
|
||||||
|
if debug.profstackdepth == 0 {
|
||||||
|
// debug.profstack is set to 0 by the user, or we're being called from
|
||||||
|
// schedinit before parsedebugvars.
|
||||||
|
return
|
||||||
|
}
|
||||||
mp.profStack = makeProfStackFP()
|
mp.profStack = makeProfStackFP()
|
||||||
mp.mLockProfile.stack = makeProfStackFP()
|
mp.mLockProfile.stack = makeProfStackFP()
|
||||||
}
|
}
|
||||||
@ -944,12 +952,12 @@ func makeProfStackFP() []uintptr {
|
|||||||
// The "maxSkip" term is for frame pointer unwinding, where we
|
// The "maxSkip" term is for frame pointer unwinding, where we
|
||||||
// want to end up with debug.profstackdebth frames but will discard
|
// want to end up with debug.profstackdebth frames but will discard
|
||||||
// some "physical" frames to account for skipping.
|
// some "physical" frames to account for skipping.
|
||||||
return make([]uintptr, 1+maxSkip+maxLogicalStack)
|
return make([]uintptr, 1+maxSkip+debug.profstackdepth)
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeProfStack returns a buffer large enough to hold a maximum-sized stack
|
// makeProfStack returns a buffer large enough to hold a maximum-sized stack
|
||||||
// trace.
|
// trace.
|
||||||
func makeProfStack() []uintptr { return make([]uintptr, maxLogicalStack) }
|
func makeProfStack() []uintptr { return make([]uintptr, debug.profstackdepth) }
|
||||||
|
|
||||||
//go:linkname pprof_makeProfStack
|
//go:linkname pprof_makeProfStack
|
||||||
func pprof_makeProfStack() []uintptr { return makeProfStack() }
|
func pprof_makeProfStack() []uintptr { return makeProfStack() }
|
||||||
|
@ -330,6 +330,7 @@ var debug struct {
|
|||||||
tracefpunwindoff int32
|
tracefpunwindoff int32
|
||||||
traceadvanceperiod int32
|
traceadvanceperiod int32
|
||||||
traceCheckStackOwnership int32
|
traceCheckStackOwnership int32
|
||||||
|
profstackdepth int32
|
||||||
|
|
||||||
// debug.malloc is used as a combined debug check
|
// debug.malloc is used as a combined debug check
|
||||||
// in the malloc function and should be set
|
// in the malloc function and should be set
|
||||||
@ -379,6 +380,7 @@ var dbgvars = []*dbgVar{
|
|||||||
{name: "invalidptr", value: &debug.invalidptr},
|
{name: "invalidptr", value: &debug.invalidptr},
|
||||||
{name: "madvdontneed", value: &debug.madvdontneed},
|
{name: "madvdontneed", value: &debug.madvdontneed},
|
||||||
{name: "panicnil", atomic: &debug.panicnil},
|
{name: "panicnil", atomic: &debug.panicnil},
|
||||||
|
{name: "profstackdepth", value: &debug.profstackdepth, def: 128},
|
||||||
{name: "runtimecontentionstacks", atomic: &debug.runtimeContentionStacks},
|
{name: "runtimecontentionstacks", atomic: &debug.runtimeContentionStacks},
|
||||||
{name: "sbrk", value: &debug.sbrk},
|
{name: "sbrk", value: &debug.sbrk},
|
||||||
{name: "scavtrace", value: &debug.scavtrace},
|
{name: "scavtrace", value: &debug.scavtrace},
|
||||||
@ -434,6 +436,7 @@ func parsedebugvars() {
|
|||||||
parsegodebug(godebug, nil)
|
parsegodebug(godebug, nil)
|
||||||
|
|
||||||
debug.malloc = (debug.inittrace | debug.sbrk) != 0
|
debug.malloc = (debug.inittrace | debug.sbrk) != 0
|
||||||
|
debug.profstackdepth = min(debug.profstackdepth, maxProfStackDepth)
|
||||||
|
|
||||||
setTraceback(gogetenv("GOTRACEBACK"))
|
setTraceback(gogetenv("GOTRACEBACK"))
|
||||||
traceback_env = traceback_cache
|
traceback_env = traceback_cache
|
||||||
|
@ -277,7 +277,9 @@ func pprof_fpunwindExpand(dst, src []uintptr) int {
|
|||||||
// sentinel. Physical frames are turned into logical frames via inline unwinding
|
// sentinel. Physical frames are turned into logical frames via inline unwinding
|
||||||
// and by applying the skip value that's stored in pcBuf[0].
|
// and by applying the skip value that's stored in pcBuf[0].
|
||||||
func fpunwindExpand(dst, pcBuf []uintptr) int {
|
func fpunwindExpand(dst, pcBuf []uintptr) int {
|
||||||
if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel {
|
if len(pcBuf) == 0 {
|
||||||
|
return 0
|
||||||
|
} else if len(pcBuf) > 0 && pcBuf[0] == logicalStackSentinel {
|
||||||
// pcBuf contains logical rather than inlined frames, skip has already been
|
// pcBuf contains logical rather than inlined frames, skip has already been
|
||||||
// applied, just return it without the sentinel value in pcBuf[0].
|
// applied, just return it without the sentinel value in pcBuf[0].
|
||||||
return copy(dst, pcBuf[1:])
|
return copy(dst, pcBuf[1:])
|
||||||
|
Loading…
Reference in New Issue
Block a user