From 142e434006e371440c8be4494d27d1774fe161e5 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Tue, 16 Jun 2015 16:37:48 -0400 Subject: [PATCH] runtime: implement GOTRACEBACK=crash for linux/386 Change-Id: I401ce8d612160a4f4ee617bddca6827fa544763a Reviewed-on: https://go-review.googlesource.com/11087 Reviewed-by: Austin Clements --- src/runtime/signal_386.go | 64 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/src/runtime/signal_386.go b/src/runtime/signal_386.go index f3c36cb07a..8fb197952e 100644 --- a/src/runtime/signal_386.go +++ b/src/runtime/signal_386.go @@ -24,6 +24,8 @@ func dumpregs(c *sigctxt) { print("gs ", hex(c.gs()), "\n") } +var crashing int32 + // May run during STW, so write barriers are not allowed. //go:nowritebarrier func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { @@ -101,7 +103,10 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { _g_.m.throwing = 1 _g_.m.caughtsig.set(gp) - startpanic() + + if crashing == 0 { + startpanic() + } if sig < uint32(len(sigtable)) { print(sigtable[sig].name, "\n") @@ -109,7 +114,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { print("Signal ", sig, "\n") } - print("PC=", hex(c.eip()), "\n") + print("PC=", hex(c.eip()), " m=", _g_.m.id, "\n") if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 { print("signal arrived during cgo execution\n") gp = _g_.m.lockedg @@ -119,13 +124,62 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { var docrash bool if gotraceback(&docrash) > 0 { goroutineheader(gp) - tracebacktrap(uintptr(c.eip()), uintptr(c.esp()), 0, gp) - tracebackothers(gp) - print("\n") + + // On Linux/386, all system calls go through the vdso kernel_vsyscall routine. + // Normally we don't see those PCs, but during signals we can. + // If we see a PC in the vsyscall area (it moves around, but near the top of memory), + // assume we're blocked in the vsyscall routine, which has saved + // three words on the stack after the initial call saved the caller PC. + // Pop all four words off SP and use the saved PC. + // The check of the stack bounds here should suffice to avoid a fault + // during the actual PC pop. + // If we do load a bogus PC, not much harm done: we weren't going + // to get a decent traceback anyway. + // TODO(rsc): Make this more precise: we should do more checks on the PC, + // and we should find out whether different versions of the vdso page + // use different prologues that store different amounts on the stack. + pc := uintptr(c.eip()) + sp := uintptr(c.esp()) + if GOOS == "linux" && pc >= 0xf4000000 && gp.stack.lo <= sp && sp+16 <= gp.stack.hi { + // Assume in vsyscall page. + sp += 16 + pc = *(*uintptr)(unsafe.Pointer(sp - 4)) + print("runtime: unwind vdso kernel_vsyscall: pc=", hex(pc), " sp=", hex(sp), "\n") + } + + tracebacktrap(pc, sp, 0, gp) + if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning { + // tracebackothers on original m skipped this one; trace it now. + goroutineheader(_g_.m.curg) + traceback(^uintptr(0), ^uintptr(0), 0, gp) + } else if crashing == 0 { + tracebackothers(gp) + print("\n") + } dumpregs(c) } if docrash { + // TODO(rsc): Implement raiseproc on other systems + // and then add to this if condition. + if GOOS == "linux" { + crashing++ + if crashing < sched.mcount { + // There are other m's that need to dump their stacks. + // Relay SIGQUIT to the next m by sending it to the current process. + // All m's that have already received SIGQUIT have signal masks blocking + // receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet. + // When the last m receives the SIGQUIT, it will fall through to the call to + // crash below. Just in case the relaying gets botched, each m involved in + // the relay sleeps for 5 seconds and then does the crash/exit itself. + // In expected operation, the last m has received the SIGQUIT and run + // crash/exit and the process is gone, all long before any of the + // 5-second sleeps have finished. + print("\n-----\n\n") + raiseproc(_SIGQUIT) + usleep(5 * 1000 * 1000) + } + } crash() }