mirror of
https://github.com/golang/go
synced 2024-11-20 01:24:43 -07:00
38f674687a
Currently we always zero objects when we allocate them. We used to have an optimization that would not zero objects that had not been allocated since the whole span was last zeroed (either by getting it from the system or by getting it from the heap, which does a bulk zero), but this depended on the sweeper clobbering the first two words of each object. Hence, we lost this optimization when the bitmap sweeper went away. Re-introduce this optimization using a different mechanism. Each span already keeps a flag indicating that it just came from the OS or was just bulk zeroed by the mheap. We can simply use this flag to know when we don't need to zero an object. This is slightly less efficient than the old optimization: if a span gets allocated and partially used, then GC happens and the span gets returned to the mcentral, then the span gets re-acquired, the old optimization knew that it only had to re-zero the objects that had been reclaimed, whereas this optimization will re-zero everything. However, in this case, you're already paying for the garbage collection, and you've only wasted one zeroing of the span, so in practice there seems to be little difference. (If we did want to revive the full optimization, each span could keep track of a frontier beyond which all free slots are zeroed. I prototyped this and it didn't obvious do any better than the much simpler approach in this commit.) This significantly improves BinaryTree17, which is allocation-heavy (and runs first, so most pages are already zeroed), and slightly improves everything else. name old time/op new time/op delta XBenchGarbage-12 2.15ms ± 1% 2.14ms ± 1% -0.80% (p=0.000 n=17+17) name old time/op new time/op delta BinaryTree17-12 2.71s ± 1% 2.56s ± 1% -5.73% (p=0.000 n=18+19) DivconstI64-12 1.70ns ± 1% 1.70ns ± 1% ~ (p=0.562 n=18+18) DivconstU64-12 1.74ns ± 2% 1.74ns ± 1% ~ (p=0.394 n=20+20) DivconstI32-12 1.74ns ± 0% 1.74ns ± 0% ~ (all samples are equal) DivconstU32-12 1.66ns ± 1% 1.66ns ± 0% ~ (p=0.516 n=15+16) DivconstI16-12 1.84ns ± 0% 1.84ns ± 0% ~ (all samples are equal) DivconstU16-12 1.82ns ± 0% 1.82ns ± 0% ~ (all samples are equal) DivconstI8-12 1.79ns ± 0% 1.79ns ± 0% ~ (all samples are equal) DivconstU8-12 1.60ns ± 0% 1.60ns ± 1% ~ (p=0.603 n=17+19) Fannkuch11-12 2.11s ± 1% 2.11s ± 0% ~ (p=0.333 n=16+19) FmtFprintfEmpty-12 45.1ns ± 4% 45.4ns ± 5% ~ (p=0.111 n=20+20) FmtFprintfString-12 134ns ± 0% 129ns ± 0% -3.45% (p=0.000 n=18+16) FmtFprintfInt-12 131ns ± 1% 129ns ± 1% -1.54% (p=0.000 n=16+18) FmtFprintfIntInt-12 205ns ± 2% 203ns ± 0% -0.56% (p=0.014 n=20+18) FmtFprintfPrefixedInt-12 200ns ± 2% 197ns ± 1% -1.48% (p=0.000 n=20+18) FmtFprintfFloat-12 256ns ± 1% 256ns ± 0% -0.21% (p=0.008 n=18+20) FmtManyArgs-12 805ns ± 0% 804ns ± 0% -0.19% (p=0.001 n=18+18) GobDecode-12 7.21ms ± 1% 7.14ms ± 1% -0.92% (p=0.000 n=19+20) GobEncode-12 5.88ms ± 1% 5.88ms ± 1% ~ (p=0.641 n=18+19) Gzip-12 218ms ± 1% 218ms ± 1% ~ (p=0.271 n=19+18) Gunzip-12 37.1ms ± 0% 36.9ms ± 0% -0.29% (p=0.000 n=18+17) HTTPClientServer-12 78.1µs ± 2% 77.4µs ± 2% ~ (p=0.070 n=19+19) JSONEncode-12 15.5ms ± 1% 15.5ms ± 0% ~ (p=0.063 n=20+18) JSONDecode-12 56.1ms ± 0% 55.4ms ± 1% -1.18% (p=0.000 n=19+18) Mandelbrot200-12 4.05ms ± 0% 4.06ms ± 0% +0.29% (p=0.001 n=18+18) GoParse-12 3.28ms ± 1% 3.21ms ± 1% -2.30% (p=0.000 n=20+20) RegexpMatchEasy0_32-12 69.4ns ± 2% 69.3ns ± 1% ~ (p=0.205 n=18+16) RegexpMatchEasy0_1K-12 239ns ± 0% 239ns ± 0% ~ (all samples are equal) RegexpMatchEasy1_32-12 69.4ns ± 1% 69.4ns ± 1% ~ (p=0.620 n=15+18) RegexpMatchEasy1_1K-12 370ns ± 1% 369ns ± 2% ~ (p=0.088 n=20+20) RegexpMatchMedium_32-12 108ns ± 0% 108ns ± 0% ~ (all samples are equal) RegexpMatchMedium_1K-12 33.6µs ± 3% 33.5µs ± 3% ~ (p=0.718 n=20+20) RegexpMatchHard_32-12 1.68µs ± 1% 1.67µs ± 2% ~ (p=0.316 n=20+20) RegexpMatchHard_1K-12 50.5µs ± 3% 50.4µs ± 3% ~ (p=0.659 n=20+20) Revcomp-12 381ms ± 1% 381ms ± 1% ~ (p=0.916 n=19+18) Template-12 66.5ms ± 1% 65.8ms ± 2% -1.08% (p=0.000 n=20+20) TimeParse-12 317ns ± 0% 319ns ± 0% +0.48% (p=0.000 n=19+12) TimeFormat-12 338ns ± 0% 338ns ± 0% ~ (p=0.124 n=19+18) [Geo mean] 5.99µs 5.96µs -0.54% Change-Id: I638ffd9d9f178835bbfa499bac20bd7224f1a907 Reviewed-on: https://go-review.googlesource.com/22591 Reviewed-by: Rick Hudson <rlh@golang.org>
221 lines
5.7 KiB
Go
221 lines
5.7 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Central free lists.
|
|
//
|
|
// See malloc.go for an overview.
|
|
//
|
|
// The MCentral doesn't actually contain the list of free objects; the MSpan does.
|
|
// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
|
|
// and those that are completely allocated (c->empty).
|
|
|
|
package runtime
|
|
|
|
import "runtime/internal/atomic"
|
|
|
|
// Central list of free objects of a given size.
|
|
type mcentral struct {
|
|
lock mutex
|
|
sizeclass int32
|
|
nonempty mSpanList // list of spans with a free object, ie a nonempty free list
|
|
empty mSpanList // list of spans with no free objects (or cached in an mcache)
|
|
}
|
|
|
|
// Initialize a single central free list.
|
|
func (c *mcentral) init(sizeclass int32) {
|
|
c.sizeclass = sizeclass
|
|
c.nonempty.init()
|
|
c.empty.init()
|
|
}
|
|
|
|
// Allocate a span to use in an MCache.
|
|
func (c *mcentral) cacheSpan() *mspan {
|
|
// Deduct credit for this span allocation and sweep if necessary.
|
|
spanBytes := uintptr(class_to_allocnpages[c.sizeclass]) * _PageSize
|
|
deductSweepCredit(spanBytes, 0)
|
|
|
|
lock(&c.lock)
|
|
sg := mheap_.sweepgen
|
|
retry:
|
|
var s *mspan
|
|
for s = c.nonempty.first; s != nil; s = s.next {
|
|
if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
|
|
c.nonempty.remove(s)
|
|
c.empty.insertBack(s)
|
|
unlock(&c.lock)
|
|
s.sweep(true)
|
|
goto havespan
|
|
}
|
|
if s.sweepgen == sg-1 {
|
|
// the span is being swept by background sweeper, skip
|
|
continue
|
|
}
|
|
// we have a nonempty span that does not require sweeping, allocate from it
|
|
c.nonempty.remove(s)
|
|
c.empty.insertBack(s)
|
|
unlock(&c.lock)
|
|
goto havespan
|
|
}
|
|
|
|
for s = c.empty.first; s != nil; s = s.next {
|
|
if s.sweepgen == sg-2 && atomic.Cas(&s.sweepgen, sg-2, sg-1) {
|
|
// we have an empty span that requires sweeping,
|
|
// sweep it and see if we can free some space in it
|
|
c.empty.remove(s)
|
|
// swept spans are at the end of the list
|
|
c.empty.insertBack(s)
|
|
unlock(&c.lock)
|
|
s.sweep(true)
|
|
freeIndex := s.nextFreeIndex()
|
|
if freeIndex != s.nelems {
|
|
s.freeindex = freeIndex
|
|
goto havespan
|
|
}
|
|
lock(&c.lock)
|
|
// the span is still empty after sweep
|
|
// it is already in the empty list, so just retry
|
|
goto retry
|
|
}
|
|
if s.sweepgen == sg-1 {
|
|
// the span is being swept by background sweeper, skip
|
|
continue
|
|
}
|
|
// already swept empty span,
|
|
// all subsequent ones must also be either swept or in process of sweeping
|
|
break
|
|
}
|
|
unlock(&c.lock)
|
|
|
|
// Replenish central list if empty.
|
|
s = c.grow()
|
|
if s == nil {
|
|
return nil
|
|
}
|
|
lock(&c.lock)
|
|
c.empty.insertBack(s)
|
|
unlock(&c.lock)
|
|
|
|
// At this point s is a non-empty span, queued at the end of the empty list,
|
|
// c is unlocked.
|
|
havespan:
|
|
cap := int32((s.npages << _PageShift) / s.elemsize)
|
|
n := cap - int32(s.allocCount)
|
|
if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {
|
|
throw("span has no free objects")
|
|
}
|
|
usedBytes := uintptr(s.allocCount) * s.elemsize
|
|
if usedBytes > 0 {
|
|
reimburseSweepCredit(usedBytes)
|
|
}
|
|
atomic.Xadd64(&memstats.heap_live, int64(spanBytes)-int64(usedBytes))
|
|
if trace.enabled {
|
|
// heap_live changed.
|
|
traceHeapAlloc()
|
|
}
|
|
if gcBlackenEnabled != 0 {
|
|
// heap_live changed.
|
|
gcController.revise()
|
|
}
|
|
s.incache = true
|
|
freeByteBase := s.freeindex &^ (64 - 1)
|
|
whichByte := freeByteBase / 8
|
|
// Init alloc bits cache.
|
|
s.refillAllocCache(whichByte)
|
|
|
|
// Adjust the allocCache so that s.freeindex corresponds to the low bit in
|
|
// s.allocCache.
|
|
s.allocCache >>= s.freeindex % 64
|
|
|
|
return s
|
|
}
|
|
|
|
// Return span from an MCache.
|
|
func (c *mcentral) uncacheSpan(s *mspan) {
|
|
lock(&c.lock)
|
|
|
|
s.incache = false
|
|
|
|
if s.allocCount == 0 {
|
|
throw("uncaching span but s.allocCount == 0")
|
|
}
|
|
|
|
cap := int32((s.npages << _PageShift) / s.elemsize)
|
|
n := cap - int32(s.allocCount)
|
|
if n > 0 {
|
|
c.empty.remove(s)
|
|
c.nonempty.insert(s)
|
|
// mCentral_CacheSpan conservatively counted
|
|
// unallocated slots in heap_live. Undo this.
|
|
atomic.Xadd64(&memstats.heap_live, -int64(n)*int64(s.elemsize))
|
|
}
|
|
unlock(&c.lock)
|
|
}
|
|
|
|
// freeSpan updates c and s after sweeping s.
|
|
// It sets s's sweepgen to the latest generation,
|
|
// and, based on the number of free objects in s,
|
|
// moves s to the appropriate list of c or returns it
|
|
// to the heap.
|
|
// freeSpan returns true if s was returned to the heap.
|
|
// If preserve=true, it does not move s (the caller
|
|
// must take care of it).
|
|
func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
|
|
if s.incache {
|
|
throw("freeSpan given cached span")
|
|
}
|
|
s.needzero = 1
|
|
|
|
if preserve {
|
|
// preserve is set only when called from MCentral_CacheSpan above,
|
|
// the span must be in the empty list.
|
|
if !s.inList() {
|
|
throw("can't preserve unlinked span")
|
|
}
|
|
atomic.Store(&s.sweepgen, mheap_.sweepgen)
|
|
return false
|
|
}
|
|
|
|
lock(&c.lock)
|
|
|
|
// Move to nonempty if necessary.
|
|
if wasempty {
|
|
c.empty.remove(s)
|
|
c.nonempty.insert(s)
|
|
}
|
|
|
|
// delay updating sweepgen until here. This is the signal that
|
|
// the span may be used in an MCache, so it must come after the
|
|
// linked list operations above (actually, just after the
|
|
// lock of c above.)
|
|
atomic.Store(&s.sweepgen, mheap_.sweepgen)
|
|
|
|
if s.allocCount != 0 {
|
|
unlock(&c.lock)
|
|
return false
|
|
}
|
|
|
|
c.nonempty.remove(s)
|
|
unlock(&c.lock)
|
|
mheap_.freeSpan(s, 0)
|
|
return true
|
|
}
|
|
|
|
// grow allocates a new empty span from the heap and initializes it for c's size class.
|
|
func (c *mcentral) grow() *mspan {
|
|
npages := uintptr(class_to_allocnpages[c.sizeclass])
|
|
size := uintptr(class_to_size[c.sizeclass])
|
|
n := (npages << _PageShift) / size
|
|
|
|
s := mheap_.alloc(npages, c.sizeclass, false, true)
|
|
if s == nil {
|
|
return nil
|
|
}
|
|
|
|
p := s.base()
|
|
s.limit = p + size*n
|
|
|
|
heapBitsForSpan(s.base()).initSpan(s)
|
|
return s
|
|
}
|