2015-02-12 07:58:25 -07:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package runtime
|
|
|
|
|
|
|
|
import "unsafe"
|
|
|
|
|
|
|
|
const (
|
runtime: turn work buffer tracing off by default
During development we ran with monitoring code turned
on by default. This CL turns the work buffer monitoring
off. Performance change on most go1 benchmarks is small
or insignificant.
name old mean new mean delta
BinaryTree17 3.35s × (0.99,1.01) 3.35s × (0.99,1.01) ~ (p=0.841 n=5+5)
Fannkuch11 2.59s × (1.00,1.01) 2.55s × (1.00,1.00) -1.65% (p=0.008 n=5+5)
FmtFprintfEmpty 52.5ns × (0.99,1.02) 53.2ns × (0.98,1.01) ~ (p=0.063 n=5+5)
FmtFprintfString 181ns × (1.00,1.00) 180ns × (1.00,1.00) -0.55% (p=0.029 n=4+4)
FmtFprintfInt 176ns × (1.00,1.01) 174ns × (1.00,1.00) -0.91% (p=0.000 n=5+4)
FmtFprintfIntInt 298ns × (1.00,1.00) 299ns × (1.00,1.00) ~ (p=0.143 n=4+4)
FmtFprintfPrefixedInt 250ns × (1.00,1.01) 246ns × (1.00,1.00) -1.68% (p=0.000 n=5+4)
FmtFprintfFloat 340ns × (1.00,1.00) 340ns × (1.00,1.01) ~ (p=0.643 n=5+5)
FmtManyArgs 1.16µs × (1.00,1.00) 1.15µs × (1.00,1.00) -0.47% (p=0.016 n=5+5)
GobDecode 9.22ms × (1.00,1.00) 9.23ms × (1.00,1.00) ~ (p=0.841 n=5+5)
GobEncode 7.00ms × (1.00,1.01) 7.09ms × (0.99,1.01) +1.26% (p=0.016 n=5+5)
Gzip 387ms × (1.00,1.00) 389ms × (0.99,1.02) ~ (p=1.000 n=5+5)
Gunzip 97.8ms × (1.00,1.00) 98.3ms × (1.00,1.00) +0.51% (p=0.016 n=5+4)
HTTPClientServer 52.6µs × (1.00,1.01) 52.7µs × (1.00,1.01) ~ (p=1.000 n=5+5)
JSONEncode 18.0ms × (0.99,1.02) 17.9ms × (1.00,1.00) ~ (p=0.310 n=5+5)
JSONDecode 64.8ms × (0.99,1.02) 63.6ms × (1.00,1.00) -1.94% (p=0.008 n=5+5)
Mandelbrot200 4.05ms × (1.00,1.00) 4.05ms × (1.00,1.00) ~ (p=0.421 n=5+5)
GoParse 3.86ms × (1.00,1.01) 3.84ms × (0.99,1.01) ~ (p=0.421 n=5+5)
RegexpMatchEasy0_32 101ns × (1.00,1.00) 102ns × (0.99,1.02) ~ (p=0.238 n=4+5)
RegexpMatchEasy0_1K 346ns × (1.00,1.01) 345ns × (1.00,1.00) ~ (p=0.333 n=5+4)
RegexpMatchEasy1_32 87.3ns × (0.99,1.02) 87.4ns × (1.00,1.00) ~ (p=0.190 n=5+4)
RegexpMatchEasy1_1K 520ns × (1.00,1.00) 520ns × (1.00,1.01) ~ (p=1.000 n=4+5)
RegexpMatchMedium_32 143ns × (1.00,1.00) 142ns × (1.00,1.00) -0.70% (p=0.029 n=4+4)
RegexpMatchMedium_1K 43.2µs × (1.00,1.01) 43.2µs × (1.00,1.00) ~ (p=0.841 n=5+5)
RegexpMatchHard_32 2.24µs × (1.00,1.01) 2.23µs × (1.00,1.01) -0.63% (p=0.048 n=5+5)
RegexpMatchHard_1K 68.7µs × (1.00,1.00) 68.3µs × (1.00,1.00) -0.56% (p=0.008 n=5+5)
Revcomp 577ms × (1.00,1.01) 579ms × (1.00,1.00) ~ (p=0.151 n=5+5)
Template 74.9ms × (1.00,1.00) 76.5ms × (1.00,1.00) +2.11% (p=0.008 n=5+5)
TimeParse 359ns × (1.00,1.00) 362ns × (1.00,1.00) +0.72% (p=0.008 n=5+5)
TimeFormat 369ns × (1.00,1.00) 371ns × (1.00,1.01) ~ (p=0.071 n=5+5)
Change-Id: I4206a3f77a3d1450966b7a62ea7597aec44cb72f
Reviewed-on: https://go-review.googlesource.com/10294
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
2015-05-21 07:09:35 -06:00
|
|
|
_Debugwbufs = false // if true check wbufs consistency
|
2015-02-12 07:58:25 -07:00
|
|
|
_WorkbufSize = 1 * 256 // in bytes - if small wbufs are passed to GC in a timely fashion.
|
|
|
|
)
|
|
|
|
|
2015-02-12 10:53:48 -07:00
|
|
|
// Garbage collector work pool abstraction.
|
|
|
|
//
|
|
|
|
// This implements a producer/consumer model for pointers to grey
|
|
|
|
// objects. A grey object is one that is marked and on a work
|
|
|
|
// queue. A black object is marked and not on a work queue.
|
|
|
|
//
|
|
|
|
// Write barriers, root discovery, stack scanning, and object scanning
|
|
|
|
// produce pointers to grey objects. Scanning consumes pointers to
|
|
|
|
// grey objects, thus blackening them, and then scans them,
|
|
|
|
// potentially producing new pointers to grey objects.
|
|
|
|
|
|
|
|
// A wbufptr holds a workbuf*, but protects it from write barriers.
|
|
|
|
// workbufs never live on the heap, so write barriers are unnecessary.
|
|
|
|
// Write barriers on workbuf pointers may also be dangerous in the GC.
|
|
|
|
type wbufptr uintptr
|
|
|
|
|
|
|
|
func wbufptrOf(w *workbuf) wbufptr {
|
|
|
|
return wbufptr(unsafe.Pointer(w))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (wp wbufptr) ptr() *workbuf {
|
|
|
|
return (*workbuf)(unsafe.Pointer(wp))
|
|
|
|
}
|
|
|
|
|
2015-03-12 11:09:30 -06:00
|
|
|
// A gcWork provides the interface to produce and consume work for the
|
2015-02-12 10:53:48 -07:00
|
|
|
// garbage collector.
|
|
|
|
//
|
runtime: replace per-M workbuf cache with per-P gcWork cache
Currently, each M has a cache of the most recently used *workbuf. This
is used primarily by the write barrier so it doesn't have to access
the global workbuf lists on every write barrier. It's also used by
stack scanning because it's convenient.
This cache is important for write barrier performance, but this
particular approach has several downsides. It's faster than no cache,
but far from optimal (as the benchmarks below show). It's complex:
access to the cache is sprinkled through most of the workbuf list
operations and it requires special care to transform into and back out
of the gcWork cache that's actually used for scanning and marking. It
requires atomic exchanges to take ownership of the cached workbuf and
to return it to the M's cache even though it's almost always used by
only the current M. Since it's per-M, flushing these caches is O(# of
Ms), which may be high. And it has some significant subtleties: for
example, in general the cache shouldn't be used after the
harvestwbufs() in mark termination because it could hide work from
mark termination, but stack scanning can happen after this and *will*
use the cache (but it turns out this is okay because it will always be
followed by a getfull(), which drains the cache).
This change replaces this cache with a per-P gcWork object. This
gcWork cache can be used directly by scanning and marking (as long as
preemption is disabled, which is a general requirement of gcWork).
Since it's per-P, it doesn't require synchronization, which simplifies
things and means the only atomic operations in the write barrier are
occasionally fetching new work buffers and setting a mark bit if the
object isn't already marked. This cache can be flushed in O(# of Ps),
which is generally small. It follows a simple flushing rule: the cache
can be used during any phase, but during mark termination it must be
flushed before allowing preemption. This also makes the dispose during
mutator assist no longer necessary, which eliminates the vast majority
of gcWork dispose calls and reduces contention on the global workbuf
lists. And it's a lot faster on some benchmarks:
benchmark old ns/op new ns/op delta
BenchmarkBinaryTree17 11963668673 11206112763 -6.33%
BenchmarkFannkuch11 2643217136 2649182499 +0.23%
BenchmarkFmtFprintfEmpty 70.4 70.2 -0.28%
BenchmarkFmtFprintfString 364 307 -15.66%
BenchmarkFmtFprintfInt 317 282 -11.04%
BenchmarkFmtFprintfIntInt 512 483 -5.66%
BenchmarkFmtFprintfPrefixedInt 404 380 -5.94%
BenchmarkFmtFprintfFloat 521 479 -8.06%
BenchmarkFmtManyArgs 2164 1894 -12.48%
BenchmarkGobDecode 30366146 22429593 -26.14%
BenchmarkGobEncode 29867472 26663152 -10.73%
BenchmarkGzip 391236616 396779490 +1.42%
BenchmarkGunzip 96639491 96297024 -0.35%
BenchmarkHTTPClientServer 100110 70763 -29.31%
BenchmarkJSONEncode 51866051 52511382 +1.24%
BenchmarkJSONDecode 103813138 86094963 -17.07%
BenchmarkMandelbrot200 4121834 4120886 -0.02%
BenchmarkGoParse 16472789 5879949 -64.31%
BenchmarkRegexpMatchEasy0_32 140 140 +0.00%
BenchmarkRegexpMatchEasy0_1K 394 394 +0.00%
BenchmarkRegexpMatchEasy1_32 120 120 +0.00%
BenchmarkRegexpMatchEasy1_1K 621 614 -1.13%
BenchmarkRegexpMatchMedium_32 209 202 -3.35%
BenchmarkRegexpMatchMedium_1K 54889 55175 +0.52%
BenchmarkRegexpMatchHard_32 2682 2675 -0.26%
BenchmarkRegexpMatchHard_1K 79383 79524 +0.18%
BenchmarkRevcomp 584116718 584595320 +0.08%
BenchmarkTemplate 125400565 109620196 -12.58%
BenchmarkTimeParse 386 387 +0.26%
BenchmarkTimeFormat 580 447 -22.93%
(Best out of 10 runs. The delta of averages is similar.)
This also puts us in a good position to flush these caches when
nearing the end of concurrent marking, which will let us increase the
size of the work buffers while still controlling mark termination
pause time.
Change-Id: I2dd94c8517a19297a98ec280203cccaa58792522
Reviewed-on: https://go-review.googlesource.com/9178
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-19 13:22:20 -06:00
|
|
|
// A gcWork can be used on the stack as follows:
|
2015-02-12 10:53:48 -07:00
|
|
|
//
|
2015-03-12 11:09:30 -06:00
|
|
|
// var gcw gcWork
|
2015-03-20 11:34:03 -06:00
|
|
|
// disable preemption
|
2015-03-12 11:09:30 -06:00
|
|
|
// .. call gcw.put() to produce and gcw.get() to consume ..
|
2015-02-12 10:53:48 -07:00
|
|
|
// gcw.dispose()
|
2015-03-20 11:34:03 -06:00
|
|
|
// enable preemption
|
|
|
|
//
|
runtime: replace per-M workbuf cache with per-P gcWork cache
Currently, each M has a cache of the most recently used *workbuf. This
is used primarily by the write barrier so it doesn't have to access
the global workbuf lists on every write barrier. It's also used by
stack scanning because it's convenient.
This cache is important for write barrier performance, but this
particular approach has several downsides. It's faster than no cache,
but far from optimal (as the benchmarks below show). It's complex:
access to the cache is sprinkled through most of the workbuf list
operations and it requires special care to transform into and back out
of the gcWork cache that's actually used for scanning and marking. It
requires atomic exchanges to take ownership of the cached workbuf and
to return it to the M's cache even though it's almost always used by
only the current M. Since it's per-M, flushing these caches is O(# of
Ms), which may be high. And it has some significant subtleties: for
example, in general the cache shouldn't be used after the
harvestwbufs() in mark termination because it could hide work from
mark termination, but stack scanning can happen after this and *will*
use the cache (but it turns out this is okay because it will always be
followed by a getfull(), which drains the cache).
This change replaces this cache with a per-P gcWork object. This
gcWork cache can be used directly by scanning and marking (as long as
preemption is disabled, which is a general requirement of gcWork).
Since it's per-P, it doesn't require synchronization, which simplifies
things and means the only atomic operations in the write barrier are
occasionally fetching new work buffers and setting a mark bit if the
object isn't already marked. This cache can be flushed in O(# of Ps),
which is generally small. It follows a simple flushing rule: the cache
can be used during any phase, but during mark termination it must be
flushed before allowing preemption. This also makes the dispose during
mutator assist no longer necessary, which eliminates the vast majority
of gcWork dispose calls and reduces contention on the global workbuf
lists. And it's a lot faster on some benchmarks:
benchmark old ns/op new ns/op delta
BenchmarkBinaryTree17 11963668673 11206112763 -6.33%
BenchmarkFannkuch11 2643217136 2649182499 +0.23%
BenchmarkFmtFprintfEmpty 70.4 70.2 -0.28%
BenchmarkFmtFprintfString 364 307 -15.66%
BenchmarkFmtFprintfInt 317 282 -11.04%
BenchmarkFmtFprintfIntInt 512 483 -5.66%
BenchmarkFmtFprintfPrefixedInt 404 380 -5.94%
BenchmarkFmtFprintfFloat 521 479 -8.06%
BenchmarkFmtManyArgs 2164 1894 -12.48%
BenchmarkGobDecode 30366146 22429593 -26.14%
BenchmarkGobEncode 29867472 26663152 -10.73%
BenchmarkGzip 391236616 396779490 +1.42%
BenchmarkGunzip 96639491 96297024 -0.35%
BenchmarkHTTPClientServer 100110 70763 -29.31%
BenchmarkJSONEncode 51866051 52511382 +1.24%
BenchmarkJSONDecode 103813138 86094963 -17.07%
BenchmarkMandelbrot200 4121834 4120886 -0.02%
BenchmarkGoParse 16472789 5879949 -64.31%
BenchmarkRegexpMatchEasy0_32 140 140 +0.00%
BenchmarkRegexpMatchEasy0_1K 394 394 +0.00%
BenchmarkRegexpMatchEasy1_32 120 120 +0.00%
BenchmarkRegexpMatchEasy1_1K 621 614 -1.13%
BenchmarkRegexpMatchMedium_32 209 202 -3.35%
BenchmarkRegexpMatchMedium_1K 54889 55175 +0.52%
BenchmarkRegexpMatchHard_32 2682 2675 -0.26%
BenchmarkRegexpMatchHard_1K 79383 79524 +0.18%
BenchmarkRevcomp 584116718 584595320 +0.08%
BenchmarkTemplate 125400565 109620196 -12.58%
BenchmarkTimeParse 386 387 +0.26%
BenchmarkTimeFormat 580 447 -22.93%
(Best out of 10 runs. The delta of averages is similar.)
This also puts us in a good position to flush these caches when
nearing the end of concurrent marking, which will let us increase the
size of the work buffers while still controlling mark termination
pause time.
Change-Id: I2dd94c8517a19297a98ec280203cccaa58792522
Reviewed-on: https://go-review.googlesource.com/9178
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-19 13:22:20 -06:00
|
|
|
// Or from the per-P gcWork cache:
|
|
|
|
//
|
|
|
|
// (preemption must be disabled)
|
|
|
|
// gcw := &getg().m.p.ptr().gcw
|
|
|
|
// .. call gcw.put() to produce and gcw.get() to consume ..
|
|
|
|
// if gcphase == _GCmarktermination {
|
|
|
|
// gcw.dispose()
|
|
|
|
// }
|
|
|
|
//
|
2015-03-20 11:34:03 -06:00
|
|
|
// It's important that any use of gcWork during the mark phase prevent
|
|
|
|
// the garbage collector from transitioning to mark termination since
|
|
|
|
// gcWork may locally hold GC work buffers. This can be done by
|
|
|
|
// disabling preemption (systemstack or acquirem).
|
2015-03-12 11:09:30 -06:00
|
|
|
type gcWork struct {
|
2015-02-12 10:53:48 -07:00
|
|
|
// Invariant: wbuf is never full or empty
|
|
|
|
wbuf wbufptr
|
2015-03-12 14:53:57 -06:00
|
|
|
|
|
|
|
// Bytes marked (blackened) on this gcWork. This is aggregated
|
|
|
|
// into work.bytesMarked by dispose.
|
|
|
|
bytesMarked uint64
|
2015-03-12 10:08:47 -06:00
|
|
|
|
|
|
|
// Scan work performed on this gcWork. This is aggregated into
|
|
|
|
// gcController by dispose.
|
|
|
|
scanWork int64
|
2015-02-12 10:53:48 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// put enqueues a pointer for the garbage collector to trace.
|
runtime: eliminate one heapBitsForObject from scanobject
scanobject with ptrmask!=nil is only ever called with the base
pointer of a heap object. Currently, scanobject calls
heapBitsForObject, which goes to a great deal of trouble to check
that the pointer points into the heap and to find the base of the
object it points to, both of which are completely unnecessary in
this case.
Replace this call to heapBitsForObject with much simpler logic to
fetch the span and compute the heap bits.
Benchmark results with five runs:
name old mean new mean delta
BenchmarkBinaryTree17 9.21s × (0.95,1.02) 8.55s × (0.91,1.03) -7.16% (p=0.022)
BenchmarkFannkuch11 2.65s × (1.00,1.00) 2.62s × (1.00,1.00) -1.10% (p=0.000)
BenchmarkFmtFprintfEmpty 73.2ns × (0.99,1.01) 71.7ns × (1.00,1.01) -1.99% (p=0.004)
BenchmarkFmtFprintfString 302ns × (0.99,1.00) 292ns × (0.98,1.02) -3.31% (p=0.020)
BenchmarkFmtFprintfInt 281ns × (0.98,1.01) 279ns × (0.96,1.02) ~ (p=0.596)
BenchmarkFmtFprintfIntInt 482ns × (0.98,1.01) 488ns × (0.95,1.02) ~ (p=0.419)
BenchmarkFmtFprintfPrefixedInt 382ns × (0.99,1.01) 365ns × (0.96,1.02) -4.35% (p=0.015)
BenchmarkFmtFprintfFloat 475ns × (0.99,1.01) 472ns × (1.00,1.00) ~ (p=0.108)
BenchmarkFmtManyArgs 1.89µs × (1.00,1.01) 1.90µs × (0.94,1.02) ~ (p=0.883)
BenchmarkGobDecode 22.4ms × (0.99,1.01) 21.9ms × (0.92,1.04) ~ (p=0.332)
BenchmarkGobEncode 24.7ms × (0.98,1.02) 23.9ms × (0.87,1.07) ~ (p=0.407)
BenchmarkGzip 397ms × (0.99,1.01) 398ms × (0.99,1.01) ~ (p=0.718)
BenchmarkGunzip 96.7ms × (1.00,1.00) 96.9ms × (1.00,1.00) ~ (p=0.230)
BenchmarkHTTPClientServer 71.5µs × (0.98,1.01) 68.5µs × (0.92,1.06) ~ (p=0.243)
BenchmarkJSONEncode 46.1ms × (0.98,1.01) 44.9ms × (0.98,1.03) -2.51% (p=0.040)
BenchmarkJSONDecode 86.1ms × (0.99,1.01) 86.5ms × (0.99,1.01) ~ (p=0.343)
BenchmarkMandelbrot200 4.12ms × (1.00,1.00) 4.13ms × (1.00,1.00) +0.23% (p=0.000)
BenchmarkGoParse 5.89ms × (0.96,1.03) 5.82ms × (0.96,1.04) ~ (p=0.522)
BenchmarkRegexpMatchEasy0_32 141ns × (0.99,1.01) 142ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy0_1K 408ns × (1.00,1.00) 392ns × (0.99,1.00) -3.83% (p=0.000)
BenchmarkRegexpMatchEasy1_32 122ns × (1.00,1.00) 122ns × (1.00,1.00) ~ (p=0.178)
BenchmarkRegexpMatchEasy1_1K 626ns × (1.00,1.01) 624ns × (0.99,1.00) ~ (p=0.122)
BenchmarkRegexpMatchMedium_32 202ns × (0.99,1.00) 205ns × (0.99,1.01) +1.58% (p=0.001)
BenchmarkRegexpMatchMedium_1K 54.4µs × (1.00,1.00) 55.5µs × (1.00,1.00) +1.86% (p=0.000)
BenchmarkRegexpMatchHard_32 2.68µs × (1.00,1.00) 2.71µs × (1.00,1.00) +0.97% (p=0.002)
BenchmarkRegexpMatchHard_1K 79.8µs × (1.00,1.01) 80.5µs × (1.00,1.01) +0.94% (p=0.003)
BenchmarkRevcomp 590ms × (0.99,1.01) 585ms × (1.00,1.00) ~ (p=0.066)
BenchmarkTemplate 111ms × (0.97,1.02) 112ms × (0.99,1.01) ~ (p=0.201)
BenchmarkTimeParse 392ns × (1.00,1.00) 385ns × (1.00,1.00) -1.69% (p=0.000)
BenchmarkTimeFormat 449ns × (0.98,1.01) 448ns × (0.99,1.01) ~ (p=0.550)
Change-Id: Ie7c3830c481d96c9043e7bf26853c6c1d05dc9f4
Reviewed-on: https://go-review.googlesource.com/9364
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-26 16:27:17 -06:00
|
|
|
// obj must point to the beginning of a heap object.
|
2015-02-12 10:53:48 -07:00
|
|
|
//go:nowritebarrier
|
2015-03-12 11:09:30 -06:00
|
|
|
func (ww *gcWork) put(obj uintptr) {
|
|
|
|
w := (*gcWork)(noescape(unsafe.Pointer(ww))) // TODO: remove when escape analysis is fixed
|
2015-02-12 10:53:48 -07:00
|
|
|
|
|
|
|
wbuf := w.wbuf.ptr()
|
|
|
|
if wbuf == nil {
|
|
|
|
wbuf = getpartialorempty(42)
|
|
|
|
w.wbuf = wbufptrOf(wbuf)
|
|
|
|
}
|
|
|
|
|
|
|
|
wbuf.obj[wbuf.nobj] = obj
|
|
|
|
wbuf.nobj++
|
|
|
|
|
2015-02-18 19:56:12 -07:00
|
|
|
if wbuf.nobj == len(wbuf.obj) {
|
2015-02-12 10:53:48 -07:00
|
|
|
putfull(wbuf, 50)
|
|
|
|
w.wbuf = 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// tryGet dequeues a pointer for the garbage collector to trace.
|
|
|
|
//
|
|
|
|
// If there are no pointers remaining in this gcWork or in the global
|
|
|
|
// queue, tryGet returns 0. Note that there may still be pointers in
|
|
|
|
// other gcWork instances or other caches.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func (ww *gcWork) tryGet() uintptr {
|
|
|
|
w := (*gcWork)(noescape(unsafe.Pointer(ww))) // TODO: remove when escape analysis is fixed
|
|
|
|
|
|
|
|
wbuf := w.wbuf.ptr()
|
|
|
|
if wbuf == nil {
|
|
|
|
wbuf = trygetfull(74)
|
|
|
|
if wbuf == nil {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
w.wbuf = wbufptrOf(wbuf)
|
|
|
|
}
|
|
|
|
|
|
|
|
wbuf.nobj--
|
|
|
|
obj := wbuf.obj[wbuf.nobj]
|
|
|
|
|
|
|
|
if wbuf.nobj == 0 {
|
|
|
|
putempty(wbuf, 86)
|
|
|
|
w.wbuf = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return obj
|
|
|
|
}
|
|
|
|
|
|
|
|
// get dequeues a pointer for the garbage collector to trace, blocking
|
|
|
|
// if necessary to ensure all pointers from all queues and caches have
|
|
|
|
// been retrieved. get returns 0 if there are no pointers remaining.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func (ww *gcWork) get() uintptr {
|
|
|
|
w := (*gcWork)(noescape(unsafe.Pointer(ww))) // TODO: remove when escape analysis is fixed
|
|
|
|
|
|
|
|
wbuf := w.wbuf.ptr()
|
|
|
|
if wbuf == nil {
|
|
|
|
wbuf = getfull(103)
|
|
|
|
if wbuf == nil {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
wbuf.checknonempty()
|
|
|
|
w.wbuf = wbufptrOf(wbuf)
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: This might be a good place to add prefetch code
|
|
|
|
|
|
|
|
wbuf.nobj--
|
|
|
|
obj := wbuf.obj[wbuf.nobj]
|
|
|
|
|
|
|
|
if wbuf.nobj == 0 {
|
|
|
|
putempty(wbuf, 115)
|
|
|
|
w.wbuf = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
return obj
|
|
|
|
}
|
|
|
|
|
|
|
|
// dispose returns any cached pointers to the global queue.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func (w *gcWork) dispose() {
|
|
|
|
if wbuf := w.wbuf; wbuf != 0 {
|
2015-03-12 11:09:30 -06:00
|
|
|
putpartial(wbuf.ptr(), 167)
|
|
|
|
w.wbuf = 0
|
|
|
|
}
|
2015-03-12 14:53:57 -06:00
|
|
|
if w.bytesMarked != 0 {
|
|
|
|
// dispose happens relatively infrequently. If this
|
|
|
|
// atomic becomes a problem, we should first try to
|
|
|
|
// dispose less and if necessary aggregate in a per-P
|
|
|
|
// counter.
|
|
|
|
xadd64(&work.bytesMarked, int64(w.bytesMarked))
|
|
|
|
w.bytesMarked = 0
|
|
|
|
}
|
2015-03-12 10:08:47 -06:00
|
|
|
if w.scanWork != 0 {
|
|
|
|
xaddint64(&gcController.scanWork, w.scanWork)
|
|
|
|
w.scanWork = 0
|
|
|
|
}
|
2015-03-12 11:09:30 -06:00
|
|
|
}
|
|
|
|
|
2015-02-12 10:53:48 -07:00
|
|
|
// balance moves some work that's cached in this gcWork back on the
|
|
|
|
// global queue.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func (w *gcWork) balance() {
|
|
|
|
if wbuf := w.wbuf; wbuf != 0 && wbuf.ptr().nobj > 4 {
|
|
|
|
w.wbuf = wbufptrOf(handoff(wbuf.ptr()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-18 14:02:37 -06:00
|
|
|
// empty returns true if w has no mark work available.
|
|
|
|
//go:nowritebarrier
|
|
|
|
func (w *gcWork) empty() bool {
|
|
|
|
wbuf := w.wbuf
|
|
|
|
return wbuf == 0 || wbuf.ptr().nobj == 0
|
|
|
|
}
|
|
|
|
|
2015-02-12 10:53:48 -07:00
|
|
|
// Internally, the GC work pool is kept in arrays in work buffers.
|
|
|
|
// The gcWork interface caches a work buffer until full (or empty) to
|
|
|
|
// avoid contending on the global work buffer lists.
|
|
|
|
|
2015-02-12 07:58:25 -07:00
|
|
|
type workbufhdr struct {
|
|
|
|
node lfnode // must be first
|
2015-02-18 19:56:12 -07:00
|
|
|
nobj int
|
|
|
|
inuse bool // This workbuf is in use by some gorotuine and is not on the work.empty/partial/full queues.
|
|
|
|
log [4]int // line numbers forming a history of ownership changes to workbuf
|
2015-02-12 07:58:25 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
type workbuf struct {
|
|
|
|
workbufhdr
|
|
|
|
// account for the above fields
|
|
|
|
obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / ptrSize]uintptr
|
|
|
|
}
|
|
|
|
|
|
|
|
// workbuf factory routines. These funcs are used to manage the
|
runtime: replace per-M workbuf cache with per-P gcWork cache
Currently, each M has a cache of the most recently used *workbuf. This
is used primarily by the write barrier so it doesn't have to access
the global workbuf lists on every write barrier. It's also used by
stack scanning because it's convenient.
This cache is important for write barrier performance, but this
particular approach has several downsides. It's faster than no cache,
but far from optimal (as the benchmarks below show). It's complex:
access to the cache is sprinkled through most of the workbuf list
operations and it requires special care to transform into and back out
of the gcWork cache that's actually used for scanning and marking. It
requires atomic exchanges to take ownership of the cached workbuf and
to return it to the M's cache even though it's almost always used by
only the current M. Since it's per-M, flushing these caches is O(# of
Ms), which may be high. And it has some significant subtleties: for
example, in general the cache shouldn't be used after the
harvestwbufs() in mark termination because it could hide work from
mark termination, but stack scanning can happen after this and *will*
use the cache (but it turns out this is okay because it will always be
followed by a getfull(), which drains the cache).
This change replaces this cache with a per-P gcWork object. This
gcWork cache can be used directly by scanning and marking (as long as
preemption is disabled, which is a general requirement of gcWork).
Since it's per-P, it doesn't require synchronization, which simplifies
things and means the only atomic operations in the write barrier are
occasionally fetching new work buffers and setting a mark bit if the
object isn't already marked. This cache can be flushed in O(# of Ps),
which is generally small. It follows a simple flushing rule: the cache
can be used during any phase, but during mark termination it must be
flushed before allowing preemption. This also makes the dispose during
mutator assist no longer necessary, which eliminates the vast majority
of gcWork dispose calls and reduces contention on the global workbuf
lists. And it's a lot faster on some benchmarks:
benchmark old ns/op new ns/op delta
BenchmarkBinaryTree17 11963668673 11206112763 -6.33%
BenchmarkFannkuch11 2643217136 2649182499 +0.23%
BenchmarkFmtFprintfEmpty 70.4 70.2 -0.28%
BenchmarkFmtFprintfString 364 307 -15.66%
BenchmarkFmtFprintfInt 317 282 -11.04%
BenchmarkFmtFprintfIntInt 512 483 -5.66%
BenchmarkFmtFprintfPrefixedInt 404 380 -5.94%
BenchmarkFmtFprintfFloat 521 479 -8.06%
BenchmarkFmtManyArgs 2164 1894 -12.48%
BenchmarkGobDecode 30366146 22429593 -26.14%
BenchmarkGobEncode 29867472 26663152 -10.73%
BenchmarkGzip 391236616 396779490 +1.42%
BenchmarkGunzip 96639491 96297024 -0.35%
BenchmarkHTTPClientServer 100110 70763 -29.31%
BenchmarkJSONEncode 51866051 52511382 +1.24%
BenchmarkJSONDecode 103813138 86094963 -17.07%
BenchmarkMandelbrot200 4121834 4120886 -0.02%
BenchmarkGoParse 16472789 5879949 -64.31%
BenchmarkRegexpMatchEasy0_32 140 140 +0.00%
BenchmarkRegexpMatchEasy0_1K 394 394 +0.00%
BenchmarkRegexpMatchEasy1_32 120 120 +0.00%
BenchmarkRegexpMatchEasy1_1K 621 614 -1.13%
BenchmarkRegexpMatchMedium_32 209 202 -3.35%
BenchmarkRegexpMatchMedium_1K 54889 55175 +0.52%
BenchmarkRegexpMatchHard_32 2682 2675 -0.26%
BenchmarkRegexpMatchHard_1K 79383 79524 +0.18%
BenchmarkRevcomp 584116718 584595320 +0.08%
BenchmarkTemplate 125400565 109620196 -12.58%
BenchmarkTimeParse 386 387 +0.26%
BenchmarkTimeFormat 580 447 -22.93%
(Best out of 10 runs. The delta of averages is similar.)
This also puts us in a good position to flush these caches when
nearing the end of concurrent marking, which will let us increase the
size of the work buffers while still controlling mark termination
pause time.
Change-Id: I2dd94c8517a19297a98ec280203cccaa58792522
Reviewed-on: https://go-review.googlesource.com/9178
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-19 13:22:20 -06:00
|
|
|
// workbufs.
|
2015-02-12 07:58:25 -07:00
|
|
|
// If the GC asks for some work these are the only routines that
|
|
|
|
// make partially full wbufs available to the GC.
|
|
|
|
// Each of the gets and puts also take an distinct integer that is used
|
|
|
|
// to record a brief history of changes to ownership of the workbuf.
|
|
|
|
// The convention is to use a unique line number but any encoding
|
|
|
|
// is permissible. For example if you want to pass in 2 bits of information
|
|
|
|
// you could simple add lineno1*100000+lineno2.
|
|
|
|
|
|
|
|
// logget records the past few values of entry to aid in debugging.
|
|
|
|
// logget checks the buffer b is not currently in use.
|
2015-02-18 19:56:12 -07:00
|
|
|
func (b *workbuf) logget(entry int) {
|
2015-02-12 07:58:25 -07:00
|
|
|
if !_Debugwbufs {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if b.inuse {
|
|
|
|
println("runtime: logget fails log entry=", entry,
|
|
|
|
"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
|
|
|
|
"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
|
|
|
|
throw("logget: get not legal")
|
|
|
|
}
|
|
|
|
b.inuse = true
|
|
|
|
copy(b.log[1:], b.log[:])
|
|
|
|
b.log[0] = entry
|
|
|
|
}
|
|
|
|
|
|
|
|
// logput records the past few values of entry to aid in debugging.
|
|
|
|
// logput checks the buffer b is currently in use.
|
2015-02-18 19:56:12 -07:00
|
|
|
func (b *workbuf) logput(entry int) {
|
2015-02-12 07:58:25 -07:00
|
|
|
if !_Debugwbufs {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if !b.inuse {
|
|
|
|
println("runtime:logput fails log entry=", entry,
|
|
|
|
"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
|
|
|
|
"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
|
|
|
|
throw("logput: put not legal")
|
|
|
|
}
|
|
|
|
b.inuse = false
|
|
|
|
copy(b.log[1:], b.log[:])
|
|
|
|
b.log[0] = entry
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *workbuf) checknonempty() {
|
|
|
|
if b.nobj == 0 {
|
|
|
|
println("runtime: nonempty check fails",
|
|
|
|
"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
|
|
|
|
"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
|
|
|
|
throw("workbuf is empty")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *workbuf) checkempty() {
|
|
|
|
if b.nobj != 0 {
|
|
|
|
println("runtime: empty check fails",
|
|
|
|
"b.log[0]=", b.log[0], "b.log[1]=", b.log[1],
|
|
|
|
"b.log[2]=", b.log[2], "b.log[3]=", b.log[3])
|
|
|
|
throw("workbuf is not empty")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// getempty pops an empty work buffer off the work.empty list,
|
|
|
|
// allocating new buffers if none are available.
|
|
|
|
// entry is used to record a brief history of ownership.
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func getempty(entry int) *workbuf {
|
2015-02-12 07:58:25 -07:00
|
|
|
var b *workbuf
|
|
|
|
if work.empty != 0 {
|
|
|
|
b = (*workbuf)(lfstackpop(&work.empty))
|
|
|
|
if b != nil {
|
|
|
|
b.checkempty()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if b == nil {
|
|
|
|
b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
|
|
|
|
}
|
|
|
|
b.logget(entry)
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
|
|
|
// putempty puts a workbuf onto the work.empty list.
|
|
|
|
// Upon entry this go routine owns b. The lfstackpush relinquishes ownership.
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func putempty(b *workbuf, entry int) {
|
2015-02-12 07:58:25 -07:00
|
|
|
b.checkempty()
|
|
|
|
b.logput(entry)
|
|
|
|
lfstackpush(&work.empty, &b.node)
|
|
|
|
}
|
|
|
|
|
|
|
|
// putfull puts the workbuf on the work.full list for the GC.
|
|
|
|
// putfull accepts partially full buffers so the GC can avoid competing
|
|
|
|
// with the mutators for ownership of partially full buffers.
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func putfull(b *workbuf, entry int) {
|
2015-02-12 07:58:25 -07:00
|
|
|
b.checknonempty()
|
|
|
|
b.logput(entry)
|
|
|
|
lfstackpush(&work.full, &b.node)
|
|
|
|
}
|
|
|
|
|
|
|
|
// getpartialorempty tries to return a partially empty
|
|
|
|
// and if none are available returns an empty one.
|
|
|
|
// entry is used to provide a brief histoy of ownership
|
|
|
|
// using entry + xxx00000 to
|
|
|
|
// indicating that two line numbers in the call chain.
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func getpartialorempty(entry int) *workbuf {
|
runtime: replace per-M workbuf cache with per-P gcWork cache
Currently, each M has a cache of the most recently used *workbuf. This
is used primarily by the write barrier so it doesn't have to access
the global workbuf lists on every write barrier. It's also used by
stack scanning because it's convenient.
This cache is important for write barrier performance, but this
particular approach has several downsides. It's faster than no cache,
but far from optimal (as the benchmarks below show). It's complex:
access to the cache is sprinkled through most of the workbuf list
operations and it requires special care to transform into and back out
of the gcWork cache that's actually used for scanning and marking. It
requires atomic exchanges to take ownership of the cached workbuf and
to return it to the M's cache even though it's almost always used by
only the current M. Since it's per-M, flushing these caches is O(# of
Ms), which may be high. And it has some significant subtleties: for
example, in general the cache shouldn't be used after the
harvestwbufs() in mark termination because it could hide work from
mark termination, but stack scanning can happen after this and *will*
use the cache (but it turns out this is okay because it will always be
followed by a getfull(), which drains the cache).
This change replaces this cache with a per-P gcWork object. This
gcWork cache can be used directly by scanning and marking (as long as
preemption is disabled, which is a general requirement of gcWork).
Since it's per-P, it doesn't require synchronization, which simplifies
things and means the only atomic operations in the write barrier are
occasionally fetching new work buffers and setting a mark bit if the
object isn't already marked. This cache can be flushed in O(# of Ps),
which is generally small. It follows a simple flushing rule: the cache
can be used during any phase, but during mark termination it must be
flushed before allowing preemption. This also makes the dispose during
mutator assist no longer necessary, which eliminates the vast majority
of gcWork dispose calls and reduces contention on the global workbuf
lists. And it's a lot faster on some benchmarks:
benchmark old ns/op new ns/op delta
BenchmarkBinaryTree17 11963668673 11206112763 -6.33%
BenchmarkFannkuch11 2643217136 2649182499 +0.23%
BenchmarkFmtFprintfEmpty 70.4 70.2 -0.28%
BenchmarkFmtFprintfString 364 307 -15.66%
BenchmarkFmtFprintfInt 317 282 -11.04%
BenchmarkFmtFprintfIntInt 512 483 -5.66%
BenchmarkFmtFprintfPrefixedInt 404 380 -5.94%
BenchmarkFmtFprintfFloat 521 479 -8.06%
BenchmarkFmtManyArgs 2164 1894 -12.48%
BenchmarkGobDecode 30366146 22429593 -26.14%
BenchmarkGobEncode 29867472 26663152 -10.73%
BenchmarkGzip 391236616 396779490 +1.42%
BenchmarkGunzip 96639491 96297024 -0.35%
BenchmarkHTTPClientServer 100110 70763 -29.31%
BenchmarkJSONEncode 51866051 52511382 +1.24%
BenchmarkJSONDecode 103813138 86094963 -17.07%
BenchmarkMandelbrot200 4121834 4120886 -0.02%
BenchmarkGoParse 16472789 5879949 -64.31%
BenchmarkRegexpMatchEasy0_32 140 140 +0.00%
BenchmarkRegexpMatchEasy0_1K 394 394 +0.00%
BenchmarkRegexpMatchEasy1_32 120 120 +0.00%
BenchmarkRegexpMatchEasy1_1K 621 614 -1.13%
BenchmarkRegexpMatchMedium_32 209 202 -3.35%
BenchmarkRegexpMatchMedium_1K 54889 55175 +0.52%
BenchmarkRegexpMatchHard_32 2682 2675 -0.26%
BenchmarkRegexpMatchHard_1K 79383 79524 +0.18%
BenchmarkRevcomp 584116718 584595320 +0.08%
BenchmarkTemplate 125400565 109620196 -12.58%
BenchmarkTimeParse 386 387 +0.26%
BenchmarkTimeFormat 580 447 -22.93%
(Best out of 10 runs. The delta of averages is similar.)
This also puts us in a good position to flush these caches when
nearing the end of concurrent marking, which will let us increase the
size of the work buffers while still controlling mark termination
pause time.
Change-Id: I2dd94c8517a19297a98ec280203cccaa58792522
Reviewed-on: https://go-review.googlesource.com/9178
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-19 13:22:20 -06:00
|
|
|
b := (*workbuf)(lfstackpop(&work.partial))
|
2015-02-12 07:58:25 -07:00
|
|
|
if b != nil {
|
|
|
|
b.logget(entry)
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
// Let getempty do the logget check but
|
|
|
|
// use the entry to encode that it passed
|
|
|
|
// through this routine.
|
|
|
|
b = getempty(entry + 80700000)
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
|
|
|
// putpartial puts empty buffers on the work.empty queue,
|
|
|
|
// full buffers on the work.full queue and
|
|
|
|
// others on the work.partial queue.
|
|
|
|
// entry is used to provide a brief histoy of ownership
|
|
|
|
// using entry + xxx00000 to
|
|
|
|
// indicating that two call chain line numbers.
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func putpartial(b *workbuf, entry int) {
|
2015-02-12 07:58:25 -07:00
|
|
|
if b.nobj == 0 {
|
|
|
|
putempty(b, entry+81500000)
|
2015-02-18 19:56:12 -07:00
|
|
|
} else if b.nobj < len(b.obj) {
|
2015-02-12 07:58:25 -07:00
|
|
|
b.logput(entry)
|
|
|
|
lfstackpush(&work.partial, &b.node)
|
2015-02-18 19:56:12 -07:00
|
|
|
} else if b.nobj == len(b.obj) {
|
2015-02-12 07:58:25 -07:00
|
|
|
b.logput(entry)
|
|
|
|
lfstackpush(&work.full, &b.node)
|
|
|
|
} else {
|
|
|
|
throw("putpartial: bad Workbuf b.nobj")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// trygetfull tries to get a full or partially empty workbuffer.
|
|
|
|
// If one is not immediately available return nil
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func trygetfull(entry int) *workbuf {
|
2015-02-12 07:58:25 -07:00
|
|
|
b := (*workbuf)(lfstackpop(&work.full))
|
|
|
|
if b == nil {
|
|
|
|
b = (*workbuf)(lfstackpop(&work.partial))
|
|
|
|
}
|
|
|
|
if b != nil {
|
|
|
|
b.logget(entry)
|
|
|
|
b.checknonempty()
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get a full work buffer off the work.full or a partially
|
|
|
|
// filled one off the work.partial list. If nothing is available
|
|
|
|
// wait until all the other gc helpers have finished and then
|
|
|
|
// return nil.
|
|
|
|
// getfull acts as a barrier for work.nproc helpers. As long as one
|
|
|
|
// gchelper is actively marking objects it
|
|
|
|
// may create a workbuffer that the other helpers can work on.
|
|
|
|
// The for loop either exits when a work buffer is found
|
|
|
|
// or when _all_ of the work.nproc GC helpers are in the loop
|
|
|
|
// looking for work and thus not capable of creating new work.
|
|
|
|
// This is in fact the termination condition for the STW mark
|
|
|
|
// phase.
|
|
|
|
//go:nowritebarrier
|
2015-02-18 19:56:12 -07:00
|
|
|
func getfull(entry int) *workbuf {
|
2015-02-12 07:58:25 -07:00
|
|
|
b := (*workbuf)(lfstackpop(&work.full))
|
|
|
|
if b != nil {
|
|
|
|
b.logget(entry)
|
|
|
|
b.checknonempty()
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
b = (*workbuf)(lfstackpop(&work.partial))
|
|
|
|
if b != nil {
|
|
|
|
b.logget(entry)
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
|
|
|
xadd(&work.nwait, +1)
|
|
|
|
for i := 0; ; i++ {
|
2015-03-19 15:46:18 -06:00
|
|
|
if work.full != 0 || work.partial != 0 {
|
2015-02-12 07:58:25 -07:00
|
|
|
xadd(&work.nwait, -1)
|
|
|
|
b = (*workbuf)(lfstackpop(&work.full))
|
|
|
|
if b == nil {
|
|
|
|
b = (*workbuf)(lfstackpop(&work.partial))
|
|
|
|
}
|
|
|
|
if b != nil {
|
|
|
|
b.logget(entry)
|
|
|
|
b.checknonempty()
|
|
|
|
return b
|
|
|
|
}
|
|
|
|
xadd(&work.nwait, +1)
|
|
|
|
}
|
|
|
|
if work.nwait == work.nproc {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
_g_ := getg()
|
|
|
|
if i < 10 {
|
|
|
|
_g_.m.gcstats.nprocyield++
|
|
|
|
procyield(20)
|
|
|
|
} else if i < 20 {
|
|
|
|
_g_.m.gcstats.nosyield++
|
|
|
|
osyield()
|
|
|
|
} else {
|
|
|
|
_g_.m.gcstats.nsleep++
|
|
|
|
usleep(100)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:nowritebarrier
|
|
|
|
func handoff(b *workbuf) *workbuf {
|
|
|
|
// Make new buffer with half of b's pointers.
|
|
|
|
b1 := getempty(915)
|
|
|
|
n := b.nobj / 2
|
|
|
|
b.nobj -= n
|
|
|
|
b1.nobj = n
|
2015-02-18 19:56:12 -07:00
|
|
|
memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0]))
|
2015-02-12 07:58:25 -07:00
|
|
|
_g_ := getg()
|
|
|
|
_g_.m.gcstats.nhandoff++
|
|
|
|
_g_.m.gcstats.nhandoffcnt += uint64(n)
|
|
|
|
|
|
|
|
// Put b on full list - let first half of b get stolen.
|
|
|
|
putfull(b, 942)
|
|
|
|
return b1
|
|
|
|
}
|