From dba1205b2fc458829e783bd0a4d1eff7231ae16c Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Mon, 10 Feb 2020 23:11:30 +0000
Subject: [PATCH] runtime: avoid re-scanning scavenged and untouched memory

Currently the scavenger will reset to the top of the heap every GC. This
means if it scavenges a bunch of memory which doesn't get used again,
it's going to keep re-scanning that memory on subsequent cycles. This
problem is especially bad when it comes to heap spikes: suppose an
application's heap spikes to 2x its steady-state size. The scavenger
will run over the top half of that heap even if the heap shrinks, for
the rest of the application's lifetime.

To fix this, we maintain two numbers: a "free" high watermark, which
represents the highest address freed to the page allocator in that
cycle, and a "scavenged" low watermark, which represents how low of an
address the scavenger got to when scavenging. If the "free" watermark
exceeds the "scavenged" watermark, then we pick the "free" watermark as
the new "top of the heap" for the scavenger when starting the next
scavenger cycle. Otherwise, we have the scavenger pick up where it left
off.

With this mechanism, we only ever re-scan scavenged memory if a random
page gets freed very high up in the heap address space while most of the
action is happening in the lower parts. This case should be exceedingly
unlikely because the page reclaimer walks over the heap from low address
to high addresses, and we use a first-fit address-ordered allocation
policy.

Updates #35788.

Change-Id: Id335603b526ce3a0eb79ef286d1a4e876abc9cab
Reviewed-on: https://go-review.googlesource.com/c/go/+/218997
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 src/runtime/mgcscavenge.go | 26 ++++++++++++++++++++++++++
 src/runtime/mpagealloc.go  | 17 ++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go
index d428144db03..069f2671300 100644
--- a/src/runtime/mgcscavenge.go
+++ b/src/runtime/mgcscavenge.go
@@ -450,6 +450,25 @@ func (s *pageAlloc) scavengeStartGen() {
 		printScavTrace(s.scav.gen, s.scav.released, false)
 	}
 	s.inUse.cloneInto(&s.scav.inUse)
+
+	// Pick the new starting address for the scavenger cycle.
+	var startAddr uintptr
+	if s.scav.scavLWM < s.scav.freeHWM {
+		// The "free" high watermark exceeds the "scavenged" low watermark,
+		// so there are free scavengable pages in parts of the address space
+		// that the scavenger already searched, the high watermark being the
+		// highest one. Pick that as our new starting point to ensure we
+		// see those pages.
+		startAddr = s.scav.freeHWM
+	} else {
+		// The "free" high watermark does not exceed the "scavenged" low
+		// watermark. This means the allocator didn't free any memory in
+		// the range we scavenged last cycle, so we might as well continue
+		// scavenging from where we were.
+		startAddr = s.scav.scavLWM
+	}
+	s.scav.inUse.removeGreaterEqual(startAddr)
+
 	// reservationBytes may be zero if s.inUse.totalBytes is small, or if
 	// scavengeReservationShards is large. This case is fine as the scavenger
 	// will simply be turned off, but it does mean that scavengeReservationShards,
@@ -459,6 +478,8 @@ func (s *pageAlloc) scavengeStartGen() {
 	s.scav.reservationBytes = alignUp(s.inUse.totalBytes, pallocChunkBytes) / scavengeReservationShards
 	s.scav.gen++
 	s.scav.released = 0
+	s.scav.freeHWM = 0
+	s.scav.scavLWM = maxSearchAddr
 }
 
 // scavengeReserve reserves a contiguous range of the address space
@@ -676,6 +697,11 @@ func (s *pageAlloc) scavengeRangeLocked(ci chunkIdx, base, npages uint) uintptr
 	// Compute the full address for the start of the range.
 	addr := chunkBase(ci) + uintptr(base)*pageSize
 
+	// Update the scavenge low watermark.
+	if addr < s.scav.scavLWM {
+		s.scav.scavLWM = addr
+	}
+
 	// Only perform the actual scavenging if we're not in a test.
 	// It's dangerous to do so otherwise.
 	if s.test {
diff --git a/src/runtime/mpagealloc.go b/src/runtime/mpagealloc.go
index 771cb3a3ba2..905d49d7514 100644
--- a/src/runtime/mpagealloc.go
+++ b/src/runtime/mpagealloc.go
@@ -270,6 +270,14 @@ type pageAlloc struct {
 
 		// released is the amount of memory released this generation.
 		released uintptr
+
+		// scavLWM is the lowest address that the scavenger reached this
+		// scavenge generation.
+		scavLWM uintptr
+
+		// freeHWM is the highest address of a page that was freed to
+		// the page allocator this scavenge generation.
+		freeHWM uintptr
 	}
 
 	// mheap_.lock. This level of indirection makes it possible
@@ -306,6 +314,9 @@ func (s *pageAlloc) init(mheapLock *mutex, sysStat *uint64) {
 
 	// Set the mheapLock.
 	s.mheapLock = mheapLock
+
+	// Initialize scavenge tracking state.
+	s.scav.scavLWM = maxSearchAddr
 }
 
 // compareSearchAddrTo compares an address against s.searchAddr in a linearized
@@ -813,6 +824,11 @@ func (s *pageAlloc) free(base, npages uintptr) {
 	if s.compareSearchAddrTo(base) < 0 {
 		s.searchAddr = base
 	}
+	// Update the free high watermark for the scavenger.
+	limit := base + npages*pageSize - 1
+	if s.scav.freeHWM < limit {
+		s.scav.freeHWM = limit
+	}
 	if npages == 1 {
 		// Fast path: we're clearing a single bit, and we know exactly
 		// where it is, so mark it directly.
@@ -820,7 +836,6 @@ func (s *pageAlloc) free(base, npages uintptr) {
 		s.chunkOf(i).free1(chunkPageIndex(base))
 	} else {
 		// Slow path: we're clearing more bits so we may need to iterate.
-		limit := base + npages*pageSize - 1
 		sc, ec := chunkIndex(base), chunkIndex(limit)
 		si, ei := chunkPageIndex(base), chunkPageIndex(limit)