From 4598c23c6c7a0062a21da1bad9fe73d149733da5 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 19 Apr 2019 17:50:37 -0400
Subject: [PATCH] runtime: switch to P 0 before destroying current P

Ps are strictly numbered from 0 to GOMAXPROCS-1, so if procresize
happens to be running on a P that's being destroyed, it moves itself
to P 0.

However, currently procresize destroys the unused Ps *before* moving
itself to P 0. This means it may briefly run on a destroyed P. This is
basically harmless, but has at least one very confusing consequence:
since destroying a P has write barriers, it may enqueue pointers to a
destroyed write barrier buffer. As far as I can tell, there are no
negative consequences of this, but this seems really fragile.

This CL swaps the order of things, so now procresize moves itself to P
0 if necessary before destroying Ps. This ensures it always has a
valid P.

This is part of refactoring for #10958 and #24543, but is a good
cleanup regardless.

Change-Id: I91a23dd6ed27e372f8d6291feec9bc991bcf9812
Reviewed-on: https://go-review.googlesource.com/c/go/+/173941
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
---
 src/runtime/proc.go | 48 +++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 83f3d5226f9..1871d3b2486 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -4017,34 +4017,25 @@ func procresize(nprocs int32) *p {
 		atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp))
 	}
 
-	// release resources from unused P's
-	for i := nprocs; i < old; i++ {
-		p := allp[i]
-		if trace.enabled && p == getg().m.p.ptr() {
-			// moving to p[0], pretend that we were descheduled
-			// and then scheduled again to keep the trace sane.
-			traceGoSched()
-			traceProcStop(p)
-		}
-		p.destroy()
-		// can't free P itself because it can be referenced by an M in syscall
-	}
-
-	// Trim allp.
-	if int32(len(allp)) != nprocs {
-		lock(&allpLock)
-		allp = allp[:nprocs]
-		unlock(&allpLock)
-	}
-
 	_g_ := getg()
 	if _g_.m.p != 0 && _g_.m.p.ptr().id < nprocs {
 		// continue to use the current P
 		_g_.m.p.ptr().status = _Prunning
 		_g_.m.p.ptr().mcache.prepareForSweep()
 	} else {
-		// release the current P and acquire allp[0]
+		// release the current P and acquire allp[0].
+		//
+		// We must do this before destroying our current P
+		// because p.destroy itself has write barriers, so we
+		// need to do that from a valid P.
 		if _g_.m.p != 0 {
+			if trace.enabled {
+				// Pretend that we were descheduled
+				// and then scheduled again to keep
+				// the trace sane.
+				traceGoSched()
+				traceProcStop(_g_.m.p.ptr())
+			}
 			_g_.m.p.ptr().m = 0
 		}
 		_g_.m.p = 0
@@ -4057,6 +4048,21 @@ func procresize(nprocs int32) *p {
 			traceGoStart()
 		}
 	}
+
+	// release resources from unused P's
+	for i := nprocs; i < old; i++ {
+		p := allp[i]
+		p.destroy()
+		// can't free P itself because it can be referenced by an M in syscall
+	}
+
+	// Trim allp.
+	if int32(len(allp)) != nprocs {
+		lock(&allpLock)
+		allp = allp[:nprocs]
+		unlock(&allpLock)
+	}
+
 	var runnablePs *p
 	for i := nprocs - 1; i >= 0; i-- {
 		p := allp[i]