mirror of
https://github.com/golang/go
synced 2024-11-23 17:20:02 -07:00
runtime: make ReadMemStats STW for < 25µs
Currently ReadMemStats stops the world for ~1.7 ms/GB of heap because it collects statistics from every single span. For large heaps, this can be quite costly. This is particularly unfortunate because many production infrastructures call this function regularly to collect and report statistics. Fix this by tracking the necessary cumulative statistics in the mcaches. ReadMemStats still has to stop the world to stabilize these statistics, but there are only O(GOMAXPROCS) mcaches to collect statistics from, so this pause is only 25µs even at GOMAXPROCS=100. Fixes #13613. Change-Id: I3c0a4e14833f4760dab675efc1916e73b4c0032a Reviewed-on: https://go-review.googlesource.com/34937 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rick Hudson <rlh@golang.org>
This commit is contained in:
parent
3399fd254d
commit
4a7cf960c3
@ -271,3 +271,68 @@ func (p *ProfBuf) Read(mode profBufReadMode) ([]uint64, []unsafe.Pointer, bool)
|
||||
func (p *ProfBuf) Close() {
|
||||
(*profBuf)(p).close()
|
||||
}
|
||||
|
||||
// ReadMemStatsSlow returns both the runtime-computed MemStats and
|
||||
// MemStats accumulated by scanning the heap.
|
||||
func ReadMemStatsSlow() (base, slow MemStats) {
|
||||
stopTheWorld("ReadMemStatsSlow")
|
||||
|
||||
// Run on the system stack to avoid stack growth allocation.
|
||||
systemstack(func() {
|
||||
// Make sure stats don't change.
|
||||
getg().m.mallocing++
|
||||
|
||||
readmemstats_m(&base)
|
||||
|
||||
// Initialize slow from base and zero the fields we're
|
||||
// recomputing.
|
||||
slow = base
|
||||
slow.Alloc = 0
|
||||
slow.TotalAlloc = 0
|
||||
slow.Mallocs = 0
|
||||
slow.Frees = 0
|
||||
var bySize [_NumSizeClasses]struct {
|
||||
Mallocs, Frees uint64
|
||||
}
|
||||
|
||||
// Add up current allocations in spans.
|
||||
for _, s := range mheap_.allspans {
|
||||
if s.state != mSpanInUse {
|
||||
continue
|
||||
}
|
||||
if s.sizeclass == 0 {
|
||||
slow.Mallocs++
|
||||
slow.Alloc += uint64(s.elemsize)
|
||||
} else {
|
||||
slow.Mallocs += uint64(s.allocCount)
|
||||
slow.Alloc += uint64(s.allocCount) * uint64(s.elemsize)
|
||||
bySize[s.sizeclass].Mallocs += uint64(s.allocCount)
|
||||
}
|
||||
}
|
||||
|
||||
// Add in frees. readmemstats_m flushed the cached stats, so
|
||||
// these are up-to-date.
|
||||
var smallFree uint64
|
||||
slow.Frees = mheap_.nlargefree
|
||||
for i := range mheap_.nsmallfree {
|
||||
slow.Frees += mheap_.nsmallfree[i]
|
||||
bySize[i].Frees = mheap_.nsmallfree[i]
|
||||
bySize[i].Mallocs += mheap_.nsmallfree[i]
|
||||
smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
|
||||
}
|
||||
slow.Frees += memstats.tinyallocs
|
||||
slow.Mallocs += slow.Frees
|
||||
|
||||
slow.TotalAlloc = slow.Alloc + mheap_.largefree + smallFree
|
||||
|
||||
for i := range slow.BySize {
|
||||
slow.BySize[i].Mallocs = bySize[i].Mallocs
|
||||
slow.BySize[i].Frees = bySize[i].Frees
|
||||
}
|
||||
|
||||
getg().m.mallocing--
|
||||
})
|
||||
|
||||
startTheWorld()
|
||||
return
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
package runtime_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"runtime"
|
||||
@ -448,3 +449,53 @@ func TestPageAccounting(t *testing.T) {
|
||||
t.Fatalf("mheap_.pagesInUse is %d, but direct count is %d", pagesInUse, counted)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadMemStats(t *testing.T) {
|
||||
base, slow := runtime.ReadMemStatsSlow()
|
||||
if base != slow {
|
||||
logDiff(t, "MemStats", reflect.ValueOf(base), reflect.ValueOf(slow))
|
||||
t.Fatal("memstats mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func logDiff(t *testing.T, prefix string, got, want reflect.Value) {
|
||||
typ := got.Type()
|
||||
switch typ.Kind() {
|
||||
case reflect.Array, reflect.Slice:
|
||||
if got.Len() != want.Len() {
|
||||
t.Logf("len(%s): got %v, want %v", prefix, got, want)
|
||||
return
|
||||
}
|
||||
for i := 0; i < got.Len(); i++ {
|
||||
logDiff(t, fmt.Sprintf("%s[%d]", prefix, i), got.Index(i), want.Index(i))
|
||||
}
|
||||
case reflect.Struct:
|
||||
for i := 0; i < typ.NumField(); i++ {
|
||||
gf, wf := got.Field(i), want.Field(i)
|
||||
logDiff(t, prefix+"."+typ.Field(i).Name, gf, wf)
|
||||
}
|
||||
case reflect.Map:
|
||||
t.Fatal("not implemented: logDiff for map")
|
||||
default:
|
||||
if got.Interface() != want.Interface() {
|
||||
t.Logf("%s: got %v, want %v", prefix, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkReadMemStats(b *testing.B) {
|
||||
var ms runtime.MemStats
|
||||
const heapSize = 100 << 20
|
||||
x := make([]*[1024]byte, heapSize/1024)
|
||||
for i := range x {
|
||||
x[i] = new([1024]byte)
|
||||
}
|
||||
hugeSink = x
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
runtime.ReadMemStats(&ms)
|
||||
}
|
||||
|
||||
hugeSink = nil
|
||||
}
|
||||
|
@ -22,6 +22,11 @@ type mcentral struct {
|
||||
sizeclass int32
|
||||
nonempty mSpanList // list of spans with a free object, ie a nonempty free list
|
||||
empty mSpanList // list of spans with no free objects (or cached in an mcache)
|
||||
|
||||
// nmalloc is the cumulative count of objects allocated from
|
||||
// this mcentral, assuming all spans in mcaches are
|
||||
// fully-allocated. Written atomically, read under STW.
|
||||
nmalloc uint64
|
||||
}
|
||||
|
||||
// Initialize a single central free list.
|
||||
@ -106,6 +111,9 @@ havespan:
|
||||
if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
|
||||
throw("span has no free objects")
|
||||
}
|
||||
// Assume all objects from this span will be allocated in the
|
||||
// mcache. If it gets uncached, we'll adjust this.
|
||||
atomic.Xadd64(&c.nmalloc, int64(n))
|
||||
usedBytes := uintptr(s.allocCount) * s.elemsize
|
||||
if usedBytes > 0 {
|
||||
reimburseSweepCredit(usedBytes)
|
||||
@ -150,6 +158,10 @@ func (c *mcentral) uncacheSpan(s *mspan) {
|
||||
// mCentral_CacheSpan conservatively counted
|
||||
// unallocated slots in heap_live. Undo this.
|
||||
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
|
||||
// cacheSpan updated alloc assuming all objects on s
|
||||
// were going to be allocated. Adjust for any that
|
||||
// weren't.
|
||||
atomic.Xadd64(&c.nmalloc, -int64(n))
|
||||
}
|
||||
unlock(&c.lock)
|
||||
}
|
||||
|
@ -82,9 +82,11 @@ type mheap struct {
|
||||
// compiler can't 8-byte align fields.
|
||||
|
||||
// Malloc stats.
|
||||
largefree uint64 // bytes freed for large objects (>maxsmallsize)
|
||||
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
||||
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
||||
largealloc uint64 // bytes allocated for large objects
|
||||
nlargealloc uint64 // number of large object allocations
|
||||
largefree uint64 // bytes freed for large objects (>maxsmallsize)
|
||||
nlargefree uint64 // number of frees for large objects (>maxsmallsize)
|
||||
nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
|
||||
|
||||
// range of addresses we might see in the heap
|
||||
bitmap uintptr // Points to one byte past the end of the bitmap
|
||||
@ -236,7 +238,7 @@ type mspan struct {
|
||||
sweepgen uint32
|
||||
divMul uint16 // for divide by elemsize - divMagic.mul
|
||||
baseMask uint16 // if non-0, elemsize is a power of 2, & this will get object allocation base
|
||||
allocCount uint16 // capacity - number of objects in freelist
|
||||
allocCount uint16 // number of allocated objects
|
||||
sizeclass uint8 // size class
|
||||
incache bool // being used by an mcache
|
||||
state mSpanState // mspaninuse etc
|
||||
@ -587,6 +589,8 @@ func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
|
||||
h.pagesInUse += uint64(npage)
|
||||
if large {
|
||||
memstats.heap_objects++
|
||||
mheap_.largealloc += uint64(s.elemsize)
|
||||
mheap_.nlargealloc++
|
||||
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
|
||||
// Swept spans are at the end of lists.
|
||||
if s.npages < uintptr(len(h.busy)) {
|
||||
|
@ -534,37 +534,37 @@ func updatememstats() {
|
||||
// Aggregate local stats.
|
||||
cachestats()
|
||||
|
||||
// Scan all spans and count number of alive objects.
|
||||
lock(&mheap_.lock)
|
||||
for _, s := range mheap_.allspans {
|
||||
if s.state != mSpanInUse {
|
||||
// Collect allocation stats. This is safe and consistent
|
||||
// because the world is stopped.
|
||||
var smallFree, totalAlloc, totalFree uint64
|
||||
for i := range mheap_.central {
|
||||
if i == 0 {
|
||||
memstats.nmalloc += mheap_.nlargealloc
|
||||
totalAlloc += mheap_.largealloc
|
||||
totalFree += mheap_.largefree
|
||||
memstats.nfree += mheap_.nlargefree
|
||||
continue
|
||||
}
|
||||
if s.sizeclass == 0 {
|
||||
memstats.nmalloc++
|
||||
memstats.alloc += uint64(s.elemsize)
|
||||
} else {
|
||||
memstats.nmalloc += uint64(s.allocCount)
|
||||
memstats.by_size[s.sizeclass].nmalloc += uint64(s.allocCount)
|
||||
memstats.alloc += uint64(s.allocCount) * uint64(s.elemsize)
|
||||
}
|
||||
}
|
||||
unlock(&mheap_.lock)
|
||||
// The mcaches are now empty, so mcentral stats are
|
||||
// up-to-date.
|
||||
c := &mheap_.central[i].mcentral
|
||||
memstats.nmalloc += c.nmalloc
|
||||
memstats.by_size[i].nmalloc += c.nmalloc
|
||||
totalAlloc += c.nmalloc * uint64(class_to_size[i])
|
||||
|
||||
// Aggregate by size class.
|
||||
smallfree := uint64(0)
|
||||
memstats.nfree = mheap_.nlargefree
|
||||
for i := 0; i < len(memstats.by_size); i++ {
|
||||
// The mcache stats have been flushed to mheap_.
|
||||
memstats.nfree += mheap_.nsmallfree[i]
|
||||
memstats.by_size[i].nfree = mheap_.nsmallfree[i]
|
||||
memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
|
||||
smallfree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
|
||||
smallFree += mheap_.nsmallfree[i] * uint64(class_to_size[i])
|
||||
}
|
||||
totalFree += smallFree
|
||||
|
||||
memstats.nfree += memstats.tinyallocs
|
||||
memstats.nmalloc += memstats.nfree
|
||||
memstats.nmalloc += memstats.tinyallocs
|
||||
|
||||
// Calculate derived stats.
|
||||
memstats.total_alloc = memstats.alloc + mheap_.largefree + smallfree
|
||||
memstats.total_alloc = totalAlloc
|
||||
memstats.alloc = totalAlloc - totalFree
|
||||
memstats.heap_alloc = memstats.alloc
|
||||
memstats.heap_objects = memstats.nmalloc - memstats.nfree
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user