1
0
mirror of https://github.com/golang/go synced 2024-09-30 06:14:31 -06:00

runtime: fix Windows profiling crash

I don't have any way to test or reproduce this problem,
but the current code is clearly wrong for Windows.
Make it better.

As I said on #17165:

But the borrowing of M's and the profiling of M's by the CPU profiler
seem not synchronized enough. This code implements the CPU profiler
on Windows:

	func profileloop1(param uintptr) uint32 {
		stdcall2(_SetThreadPriority, currentThread, _THREAD_PRIORITY_HIGHEST)

		for {
			stdcall2(_WaitForSingleObject, profiletimer, _INFINITE)
			first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
			for mp := first; mp != nil; mp = mp.alllink {
				thread := atomic.Loaduintptr(&mp.thread)
				// Do not profile threads blocked on Notes,
				// this includes idle worker threads,
				// idle timer thread, idle heap scavenger, etc.
				if thread == 0 || mp.profilehz == 0 || mp.blocked {
					continue
				}
				stdcall1(_SuspendThread, thread)
				if mp.profilehz != 0 && !mp.blocked {
					profilem(mp)
				}
				stdcall1(_ResumeThread, thread)
			}
		}
	}

	func profilem(mp *m) {
		var r *context
		rbuf := make([]byte, unsafe.Sizeof(*r)+15)

		tls := &mp.tls[0]
		gp := *((**g)(unsafe.Pointer(tls)))

		// align Context to 16 bytes
		r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
		r.contextflags = _CONTEXT_CONTROL
		stdcall2(_GetThreadContext, mp.thread, uintptr(unsafe.Pointer(r)))
		sigprof(r.ip(), r.sp(), 0, gp, mp)
	}

	func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
		if prof.hz == 0 {
			return
		}

		// Profiling runs concurrently with GC, so it must not allocate.
		mp.mallocing++

		... lots of code ...

		mp.mallocing--
	}

A borrowed M may migrate between threads. Between the
atomic.Loaduintptr(&mp.thread) and the SuspendThread, mp may have
moved to a new thread, so that it's in active use. In particular
it might be calling malloc, as in the crash stack trace. If so, the
mp.mallocing++ in sigprof would provoke the crash.

Those lines are trying to guard against allocation during sigprof.
But on Windows, mp is the thread being traced, not the current
thread. Those lines should really be using getg().m.mallocing, which
is the same on Unix but not on Windows. With that change, it's
possible the race on the actual thread is not a problem: the traceback
would get confused and eventually return an error, but that's fine.
The code expects that possibility.

Fixes #17165.

Change-Id: If6619731910d65ca4b1a6e7de761fa2518ef339e
Reviewed-on: https://go-review.googlesource.com/33132
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Russ Cox 2016-11-11 10:27:36 -05:00
parent b6a15683f0
commit e6da64b6c0

View File

@ -3112,7 +3112,12 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
}
// Profiling runs concurrently with GC, so it must not allocate.
mp.mallocing++
// Set a trap in case the code does allocate.
// Note that on windows, one thread takes profiles of all the
// other threads, so mp is usually not getg().m.
// In fact mp may not even be stopped.
// See golang.org/issue/17165.
getg().m.mallocing++
// Define that a "user g" is a user-created goroutine, and a "system g"
// is one that is m->g0 or m->gsignal.
@ -3262,7 +3267,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
}
atomic.Store(&prof.lock, 0)
}
mp.mallocing--
getg().m.mallocing--
}
// If the signal handler receives a SIGPROF signal on a non-Go thread,