mirror of
https://github.com/golang/go
synced 2024-11-23 17:40:03 -07:00
runtime: simplify CPU profiling code
This makes Go's CPU profiling code somewhat more idiomatic; e.g., using := instead of forward declaring variables, using "int" for element counts instead of "uintptr", and slices instead of C-style pointer+length. This makes the code easier to read and eliminates a lot of type conversion clutter. Additionally, in sigprof we can collect just maxCPUProfStack stack frames, as cpuprof won't use more than that anyway. Change-Id: I0235b5ae552191bcbb453b14add6d8c01381bd06 Reviewed-on: https://go-review.googlesource.com/6072 Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
This commit is contained in:
parent
a32dd83253
commit
3c8a89daf3
@ -30,8 +30,8 @@
|
||||
// The state of this dance between the signal handler and the goroutine
|
||||
// is encoded in the Profile.handoff field. If handoff == 0, then the goroutine
|
||||
// is not using either log half and is waiting (or will soon be waiting) for
|
||||
// a new piece by calling notesleep(&p->wait). If the signal handler
|
||||
// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
|
||||
// a new piece by calling notesleep(&p.wait). If the signal handler
|
||||
// changes handoff from 0 to non-zero, it must call notewakeup(&p.wait)
|
||||
// to wake the goroutine. The value indicates the number of entries in the
|
||||
// log half being handed off. The goroutine leaves the non-zero value in
|
||||
// place until it has finished processing the log half and then flips the number
|
||||
@ -61,7 +61,7 @@ const (
|
||||
|
||||
type cpuprofEntry struct {
|
||||
count uintptr
|
||||
depth uintptr
|
||||
depth int
|
||||
stack [maxCPUProfStack]uintptr
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ type cpuProfile struct {
|
||||
// Signal handler has filled log[toggle][:nlog].
|
||||
// Goroutine is writing log[1-toggle][:handoff].
|
||||
log [2][logSize / 2]uintptr
|
||||
nlog uintptr
|
||||
nlog int
|
||||
toggle int32
|
||||
handoff uint32
|
||||
|
||||
@ -167,7 +167,7 @@ func SetCPUProfileRate(hz int) {
|
||||
cpuprof.on = false
|
||||
|
||||
// Now add is not running anymore, and getprofile owns the entire log.
|
||||
// Set the high bit in prof->handoff to tell getprofile.
|
||||
// Set the high bit in cpuprof.handoff to tell getprofile.
|
||||
for {
|
||||
n := cpuprof.handoff
|
||||
if n&0x80000000 != 0 {
|
||||
@ -185,20 +185,16 @@ func SetCPUProfileRate(hz int) {
|
||||
unlock(&cpuprofLock)
|
||||
}
|
||||
|
||||
func cpuproftick(pc *uintptr, n int32) {
|
||||
if n > maxCPUProfStack {
|
||||
n = maxCPUProfStack
|
||||
}
|
||||
s := (*[maxCPUProfStack]uintptr)(unsafe.Pointer(pc))[:n]
|
||||
cpuprof.add(s)
|
||||
}
|
||||
|
||||
// add adds the stack trace to the profile.
|
||||
// It is called from signal handlers and other limited environments
|
||||
// and cannot allocate memory or acquire locks that might be
|
||||
// held at the time of the signal, nor can it use substantial amounts
|
||||
// of stack. It is allowed to call evict.
|
||||
func (p *cpuProfile) add(pc []uintptr) {
|
||||
if len(pc) > maxCPUProfStack {
|
||||
pc = pc[:maxCPUProfStack]
|
||||
}
|
||||
|
||||
// Compute hash.
|
||||
h := uintptr(0)
|
||||
for _, x := range pc {
|
||||
@ -212,7 +208,7 @@ func (p *cpuProfile) add(pc []uintptr) {
|
||||
Assoc:
|
||||
for i := range b.entry {
|
||||
e := &b.entry[i]
|
||||
if e.depth != uintptr(len(pc)) {
|
||||
if e.depth != len(pc) {
|
||||
continue
|
||||
}
|
||||
for j := range pc {
|
||||
@ -241,7 +237,7 @@ Assoc:
|
||||
}
|
||||
|
||||
// Reuse the newly evicted entry.
|
||||
e.depth = uintptr(len(pc))
|
||||
e.depth = len(pc)
|
||||
e.count = 1
|
||||
copy(e.stack[:], pc)
|
||||
}
|
||||
@ -256,7 +252,7 @@ func (p *cpuProfile) evict(e *cpuprofEntry) bool {
|
||||
d := e.depth
|
||||
nslot := d + 2
|
||||
log := &p.log[p.toggle]
|
||||
if p.nlog+nslot > uintptr(len(p.log[0])) {
|
||||
if p.nlog+nslot > len(log) {
|
||||
if !p.flushlog() {
|
||||
return false
|
||||
}
|
||||
@ -266,7 +262,7 @@ func (p *cpuProfile) evict(e *cpuprofEntry) bool {
|
||||
q := p.nlog
|
||||
log[q] = e.count
|
||||
q++
|
||||
log[q] = d
|
||||
log[q] = uintptr(d)
|
||||
q++
|
||||
copy(log[q:], e.stack[:d])
|
||||
q += d
|
||||
@ -287,7 +283,7 @@ func (p *cpuProfile) flushlog() bool {
|
||||
|
||||
p.toggle = 1 - p.toggle
|
||||
log := &p.log[p.toggle]
|
||||
q := uintptr(0)
|
||||
q := 0
|
||||
if p.lost > 0 {
|
||||
lostPC := funcPC(lostProfileData)
|
||||
log[0] = p.lost
|
||||
@ -360,7 +356,7 @@ func (p *cpuProfile) getprofile() []byte {
|
||||
|
||||
// In flush mode.
|
||||
// Add is no longer being called. We own the log.
|
||||
// Also, p->handoff is non-zero, so flushlog will return false.
|
||||
// Also, p.handoff is non-zero, so flushlog will return false.
|
||||
// Evict the hash table into the log and return it.
|
||||
Flush:
|
||||
for i := range p.hash {
|
||||
|
@ -114,7 +114,7 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) {
|
||||
// and what it called, so that we can see if it
|
||||
// "called" sigpanic.
|
||||
var rpc [2]uintptr
|
||||
if callers(1+skip-1, &rpc[0], 2) < 2 {
|
||||
if callers(1+skip-1, rpc[:]) < 2 {
|
||||
return
|
||||
}
|
||||
f := findfunc(rpc[1])
|
||||
@ -161,7 +161,7 @@ func Callers(skip int, pc []uintptr) int {
|
||||
if len(pc) == 0 {
|
||||
return 0
|
||||
}
|
||||
return callers(skip, &pc[0], len(pc))
|
||||
return callers(skip, pc)
|
||||
}
|
||||
|
||||
// GOROOT returns the root of the Go tree.
|
||||
|
@ -232,7 +232,7 @@ func mProf_GC() {
|
||||
// Called by malloc to record a profiled block.
|
||||
func mProf_Malloc(p unsafe.Pointer, size uintptr) {
|
||||
var stk [maxStack]uintptr
|
||||
nstk := callers(4, &stk[0], len(stk))
|
||||
nstk := callers(4, stk[:])
|
||||
lock(&proflock)
|
||||
b := stkbucket(memProfile, size, stk[:nstk], true)
|
||||
mp := b.mp()
|
||||
@ -300,9 +300,9 @@ func blockevent(cycles int64, skip int) {
|
||||
var nstk int
|
||||
var stk [maxStack]uintptr
|
||||
if gp.m.curg == nil || gp.m.curg == gp {
|
||||
nstk = callers(skip, &stk[0], len(stk))
|
||||
nstk = callers(skip, stk[:])
|
||||
} else {
|
||||
nstk = gcallers(gp.m.curg, skip, &stk[0], len(stk))
|
||||
nstk = gcallers(gp.m.curg, skip, stk[:])
|
||||
}
|
||||
lock(&proflock)
|
||||
b := stkbucket(blockProfile, 0, stk[:nstk], true)
|
||||
|
@ -527,7 +527,7 @@ func profilem(mp *m) {
|
||||
r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
|
||||
r.contextflags = _CONTEXT_CONTROL
|
||||
stdcall2(_GetThreadContext, mp.thread, uintptr(unsafe.Pointer(r)))
|
||||
sigprof((*byte)(unsafe.Pointer(r.ip())), (*byte)(unsafe.Pointer(r.sp())), nil, gp, mp)
|
||||
sigprof(r.ip(), r.sp(), 0, gp, mp)
|
||||
}
|
||||
|
||||
func profileloop1() {
|
||||
|
@ -100,7 +100,7 @@ func mcommoninit(mp *m) {
|
||||
|
||||
// g0 stack won't make sense for user (and is not necessary unwindable).
|
||||
if _g_ != _g_.m.g0 {
|
||||
callers(1, &mp.createstack[0], len(mp.createstack))
|
||||
callers(1, mp.createstack[:])
|
||||
}
|
||||
|
||||
mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
|
||||
@ -2286,11 +2286,7 @@ func _GC() { _GC() }
|
||||
var etext struct{}
|
||||
|
||||
// Called if we receive a SIGPROF signal.
|
||||
func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
|
||||
var n int32
|
||||
var traceback bool
|
||||
var stk [100]uintptr
|
||||
|
||||
func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
|
||||
if prof.hz == 0 {
|
||||
return
|
||||
}
|
||||
@ -2370,18 +2366,18 @@ func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
|
||||
// To recap, there are no constraints on the assembly being used for the
|
||||
// transition. We simply require that g and SP match and that the PC is not
|
||||
// in gogo.
|
||||
traceback = true
|
||||
usp := uintptr(unsafe.Pointer(sp))
|
||||
traceback := true
|
||||
gogo := funcPC(gogo)
|
||||
if gp == nil || gp != mp.curg ||
|
||||
usp < gp.stack.lo || gp.stack.hi < usp ||
|
||||
(gogo <= uintptr(unsafe.Pointer(pc)) && uintptr(unsafe.Pointer(pc)) < gogo+_RuntimeGogoBytes) {
|
||||
sp < gp.stack.lo || gp.stack.hi < sp ||
|
||||
(gogo <= pc && pc < gogo+_RuntimeGogoBytes) {
|
||||
traceback = false
|
||||
}
|
||||
|
||||
n = 0
|
||||
var stk [maxCPUProfStack]uintptr
|
||||
n := 0
|
||||
if traceback {
|
||||
n = int32(gentraceback(uintptr(unsafe.Pointer(pc)), uintptr(unsafe.Pointer(sp)), uintptr(unsafe.Pointer(lr)), gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap))
|
||||
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap)
|
||||
}
|
||||
if !traceback || n <= 0 {
|
||||
// Normal traceback is impossible or has failed.
|
||||
@ -2391,21 +2387,21 @@ func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
|
||||
// Cgo, we can't unwind and symbolize arbitrary C code,
|
||||
// so instead collect Go stack that leads to the cgo call.
|
||||
// This is especially important on windows, since all syscalls are cgo calls.
|
||||
n = int32(gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0))
|
||||
n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
|
||||
}
|
||||
if GOOS == "windows" && n == 0 && mp.libcallg != nil && mp.libcallpc != 0 && mp.libcallsp != 0 {
|
||||
// Libcall, i.e. runtime syscall on windows.
|
||||
// Collect Go stack that leads to the call.
|
||||
n = int32(gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0))
|
||||
n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0)
|
||||
}
|
||||
if n == 0 {
|
||||
// If all of the above has failed, account it against abstract "System" or "GC".
|
||||
n = 2
|
||||
// "ExternalCode" is better than "etext".
|
||||
if uintptr(unsafe.Pointer(pc)) > uintptr(unsafe.Pointer(&etext)) {
|
||||
pc = (*uint8)(unsafe.Pointer(uintptr(funcPC(_ExternalCode) + _PCQuantum)))
|
||||
if pc > uintptr(unsafe.Pointer(&etext)) {
|
||||
pc = funcPC(_ExternalCode) + _PCQuantum
|
||||
}
|
||||
stk[0] = uintptr(unsafe.Pointer(pc))
|
||||
stk[0] = pc
|
||||
if mp.preemptoff != "" || mp.helpgc != 0 {
|
||||
stk[1] = funcPC(_GC) + _PCQuantum
|
||||
} else {
|
||||
@ -2420,7 +2416,7 @@ func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
|
||||
osyield()
|
||||
}
|
||||
if prof.hz != 0 {
|
||||
cpuproftick(&stk[0], n)
|
||||
cpuprof.add(stk[:n])
|
||||
}
|
||||
atomicstore(&prof.lock, 0)
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
|
||||
c := &sigctxt{info, ctxt}
|
||||
|
||||
if sig == _SIGPROF {
|
||||
sigprof((*byte)(unsafe.Pointer(uintptr(c.eip()))), (*byte)(unsafe.Pointer(uintptr(c.esp()))), nil, gp, _g_.m)
|
||||
sigprof(uintptr(c.eip()), uintptr(c.esp()), 0, gp, _g_.m)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -42,7 +42,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
|
||||
c := &sigctxt{info, ctxt}
|
||||
|
||||
if sig == _SIGPROF {
|
||||
sigprof((*byte)(unsafe.Pointer(uintptr(c.rip()))), (*byte)(unsafe.Pointer(uintptr(c.rsp()))), nil, gp, _g_.m)
|
||||
sigprof(uintptr(c.rip()), uintptr(c.rsp()), 0, gp, _g_.m)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -37,7 +37,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
|
||||
c := &sigctxt{info, ctxt}
|
||||
|
||||
if sig == _SIGPROF {
|
||||
sigprof((*byte)(unsafe.Pointer(uintptr(c.pc()))), (*byte)(unsafe.Pointer(uintptr(c.sp()))), (*byte)(unsafe.Pointer(uintptr(c.lr()))), gp, _g_.m)
|
||||
sigprof(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.lr()), gp, _g_.m)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
|
||||
c := &sigctxt{info, ctxt}
|
||||
|
||||
if sig == _SIGPROF {
|
||||
sigprof((*byte)(unsafe.Pointer(uintptr(c.pc()))), (*byte)(unsafe.Pointer(uintptr(c.sp()))), (*byte)(unsafe.Pointer(uintptr(c.link()))), gp, _g_.m)
|
||||
sigprof(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.link()), gp, _g_.m)
|
||||
return
|
||||
}
|
||||
flags := int32(_SigThrow)
|
||||
|
@ -468,9 +468,9 @@ func traceEvent(ev byte, stack bool, args ...uint64) {
|
||||
}
|
||||
var nstk int
|
||||
if gp == _g_ {
|
||||
nstk = callers(1, &buf.stk[0], len(buf.stk))
|
||||
nstk = callers(1, buf.stk[:])
|
||||
} else if gp != nil {
|
||||
nstk = gcallers(mp.curg, 1, &buf.stk[0], len(buf.stk))
|
||||
nstk = gcallers(mp.curg, 1, buf.stk[:])
|
||||
}
|
||||
id := trace.stackTab.put(buf.stk[:nstk])
|
||||
data = traceAppend(data, uint64(id))
|
||||
|
@ -104,7 +104,7 @@ func tracebackdefers(gp *g, callback func(*stkframe, unsafe.Pointer) bool, v uns
|
||||
// the runtime.Callers function (pcbuf != nil), as well as the garbage
|
||||
// collector (callback != nil). A little clunky to merge these, but avoids
|
||||
// duplicating the code and all its subtlety.
|
||||
func gentraceback(pc0 uintptr, sp0 uintptr, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max int, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer, flags uint) int {
|
||||
func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max int, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer, flags uint) int {
|
||||
if goexitPC == 0 {
|
||||
throw("gentraceback before goexitPC initialization")
|
||||
}
|
||||
@ -367,7 +367,7 @@ func gentraceback(pc0 uintptr, sp0 uintptr, lr0 uintptr, gp *g, skip int, pcbuf
|
||||
}
|
||||
}
|
||||
|
||||
if pcbuf == nil && callback == nil {
|
||||
if printing {
|
||||
n = nprint
|
||||
}
|
||||
|
||||
@ -474,7 +474,7 @@ func printcreatedby(gp *g) {
|
||||
}
|
||||
}
|
||||
|
||||
func traceback(pc uintptr, sp uintptr, lr uintptr, gp *g) {
|
||||
func traceback(pc, sp, lr uintptr, gp *g) {
|
||||
traceback1(pc, sp, lr, gp, 0)
|
||||
}
|
||||
|
||||
@ -484,11 +484,11 @@ func traceback(pc uintptr, sp uintptr, lr uintptr, gp *g) {
|
||||
// the initial PC must not be rewound to the previous instruction.
|
||||
// (All the saved pairs record a PC that is a return address, so we
|
||||
// rewind it into the CALL instruction.)
|
||||
func tracebacktrap(pc uintptr, sp uintptr, lr uintptr, gp *g) {
|
||||
func tracebacktrap(pc, sp, lr uintptr, gp *g) {
|
||||
traceback1(pc, sp, lr, gp, _TraceTrap)
|
||||
}
|
||||
|
||||
func traceback1(pc uintptr, sp uintptr, lr uintptr, gp *g, flags uint) {
|
||||
func traceback1(pc, sp, lr uintptr, gp *g, flags uint) {
|
||||
var n int
|
||||
if readgstatus(gp)&^_Gscan == _Gsyscall {
|
||||
// Override registers if blocked in system call.
|
||||
@ -508,18 +508,18 @@ func traceback1(pc uintptr, sp uintptr, lr uintptr, gp *g, flags uint) {
|
||||
printcreatedby(gp)
|
||||
}
|
||||
|
||||
func callers(skip int, pcbuf *uintptr, m int) int {
|
||||
func callers(skip int, pcbuf []uintptr) int {
|
||||
sp := getcallersp(unsafe.Pointer(&skip))
|
||||
pc := uintptr(getcallerpc(unsafe.Pointer(&skip)))
|
||||
var n int
|
||||
systemstack(func() {
|
||||
n = gentraceback(pc, sp, 0, getg(), skip, pcbuf, m, nil, nil, 0)
|
||||
n = gentraceback(pc, sp, 0, getg(), skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
|
||||
})
|
||||
return n
|
||||
}
|
||||
|
||||
func gcallers(gp *g, skip int, pcbuf *uintptr, m int) int {
|
||||
return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, pcbuf, m, nil, nil, 0)
|
||||
func gcallers(gp *g, skip int, pcbuf []uintptr) int {
|
||||
return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
|
||||
}
|
||||
|
||||
func showframe(f *_func, gp *g) bool {
|
||||
|
Loading…
Reference in New Issue
Block a user