1
0
mirror of https://github.com/golang/go synced 2024-11-12 13:00:57 -07:00
go/src/runtime/proc_test.go

513 lines
9.3 KiB
Go
Raw Normal View History

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime_test
import (
"math"
"runtime"
cmd/gc: capture variables by value Language specification says that variables are captured by reference. And that is what gc compiler does. However, in lots of cases it is possible to capture variables by value under the hood without affecting visible behavior of programs. For example, consider the following typical pattern: func (o *Obj) requestMany(urls []string) []Result { wg := new(sync.WaitGroup) wg.Add(len(urls)) res := make([]Result, len(urls)) for i := range urls { i := i go func() { res[i] = o.requestOne(urls[i]) wg.Done() }() } wg.Wait() return res } Currently o, wg, res, and i are captured by reference causing 3+len(urls) allocations (e.g. PPARAM o is promoted to PPARAMREF and moved to heap). But all of them can be captured by value without changing behavior. This change implements simple strategy for capturing by value: if a captured variable is not addrtaken and never assigned to, then it is captured by value (it is effectively const). This simple strategy turned out to be very effective: ~80% of all captures in std lib are turned into value captures. The remaining 20% are mostly in defers and non-escaping closures, that is, they do not cause allocations anyway. benchmark old allocs new allocs delta BenchmarkCompressedZipGarbage 153 126 -17.65% BenchmarkEncodeDigitsSpeed1e4 91 69 -24.18% BenchmarkEncodeDigitsSpeed1e5 178 129 -27.53% BenchmarkEncodeDigitsSpeed1e6 1510 1051 -30.40% BenchmarkEncodeDigitsDefault1e4 100 75 -25.00% BenchmarkEncodeDigitsDefault1e5 193 139 -27.98% BenchmarkEncodeDigitsDefault1e6 1420 985 -30.63% BenchmarkEncodeDigitsCompress1e4 100 75 -25.00% BenchmarkEncodeDigitsCompress1e5 193 139 -27.98% BenchmarkEncodeDigitsCompress1e6 1420 985 -30.63% BenchmarkEncodeTwainSpeed1e4 109 81 -25.69% BenchmarkEncodeTwainSpeed1e5 211 151 -28.44% BenchmarkEncodeTwainSpeed1e6 1588 1097 -30.92% BenchmarkEncodeTwainDefault1e4 103 77 -25.24% BenchmarkEncodeTwainDefault1e5 199 143 -28.14% BenchmarkEncodeTwainDefault1e6 1324 917 -30.74% BenchmarkEncodeTwainCompress1e4 103 77 -25.24% BenchmarkEncodeTwainCompress1e5 190 137 -27.89% BenchmarkEncodeTwainCompress1e6 1327 919 -30.75% BenchmarkConcurrentDBExec 16223 16220 -0.02% BenchmarkConcurrentStmtQuery 17687 16182 -8.51% BenchmarkConcurrentStmtExec 5191 5186 -0.10% BenchmarkConcurrentTxQuery 17665 17661 -0.02% BenchmarkConcurrentTxExec 15154 15150 -0.03% BenchmarkConcurrentTxStmtQuery 17661 16157 -8.52% BenchmarkConcurrentTxStmtExec 3677 3673 -0.11% BenchmarkConcurrentRandom 14000 13614 -2.76% BenchmarkManyConcurrentQueries 25 22 -12.00% BenchmarkDecodeComplex128Slice 318 252 -20.75% BenchmarkDecodeFloat64Slice 318 252 -20.75% BenchmarkDecodeInt32Slice 318 252 -20.75% BenchmarkDecodeStringSlice 2318 2252 -2.85% BenchmarkDecode 11 8 -27.27% BenchmarkEncodeGray 64 56 -12.50% BenchmarkEncodeNRGBOpaque 64 56 -12.50% BenchmarkEncodeNRGBA 67 58 -13.43% BenchmarkEncodePaletted 68 60 -11.76% BenchmarkEncodeRGBOpaque 64 56 -12.50% BenchmarkGoLookupIP 153 139 -9.15% BenchmarkGoLookupIPNoSuchHost 508 466 -8.27% BenchmarkGoLookupIPWithBrokenNameServer 245 226 -7.76% BenchmarkClientServer 62 59 -4.84% BenchmarkClientServerParallel4 62 59 -4.84% BenchmarkClientServerParallel64 62 59 -4.84% BenchmarkClientServerParallelTLS4 79 76 -3.80% BenchmarkClientServerParallelTLS64 112 109 -2.68% BenchmarkCreateGoroutinesCapture 10 6 -40.00% BenchmarkAfterFunc 1006 1005 -0.10% Fixes #6632. Change-Id: I0cd51e4d356331d7f3c5f447669080cd19b0d2ca Reviewed-on: https://go-review.googlesource.com/3166 Reviewed-by: Russ Cox <rsc@golang.org>
2015-01-19 12:59:58 -07:00
"sync"
"sync/atomic"
"syscall"
"testing"
"time"
)
var stop = make(chan bool, 1)
func perpetuumMobile() {
select {
case <-stop:
default:
go perpetuumMobile()
}
}
func TestStopTheWorldDeadlock(t *testing.T) {
if testing.Short() {
t.Skip("skipping during short test")
}
maxprocs := runtime.GOMAXPROCS(3)
compl := make(chan bool, 2)
go func() {
for i := 0; i != 1000; i += 1 {
runtime.GC()
}
compl <- true
}()
go func() {
for i := 0; i != 1000; i += 1 {
runtime.GOMAXPROCS(3)
}
compl <- true
}()
go perpetuumMobile()
<-compl
<-compl
stop <- true
runtime.GOMAXPROCS(maxprocs)
}
func TestYieldProgress(t *testing.T) {
testYieldProgress(t, false)
}
func TestYieldLockedProgress(t *testing.T) {
testYieldProgress(t, true)
}
func testYieldProgress(t *testing.T, locked bool) {
c := make(chan bool)
cack := make(chan bool)
go func() {
if locked {
runtime.LockOSThread()
}
for {
select {
case <-c:
cack <- true
return
default:
runtime.Gosched()
}
}
}()
time.Sleep(10 * time.Millisecond)
c <- true
<-cack
}
func TestYieldLocked(t *testing.T) {
const N = 10
c := make(chan bool)
go func() {
runtime.LockOSThread()
for i := 0; i < N; i++ {
runtime.Gosched()
time.Sleep(time.Millisecond)
}
c <- true
// runtime.UnlockOSThread() is deliberately omitted
}()
<-c
}
func TestGoroutineParallelism(t *testing.T) {
P := 4
N := 10
if testing.Short() {
P = 3
N = 3
}
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
// If runtime triggers a forced GC during this test then it will deadlock,
// since the goroutines can't be stopped/preempted.
// So give this test as much time as possible.
runtime.GC()
for try := 0; try < N; try++ {
done := make(chan bool)
x := uint32(0)
for p := 0; p < P; p++ {
// Test that all P goroutines are scheduled at the same time
go func(p int) {
for i := 0; i < 3; i++ {
expected := uint32(P*i + p)
for atomic.LoadUint32(&x) != expected {
}
atomic.StoreUint32(&x, expected+1)
}
done <- true
}(p)
}
for p := 0; p < P; p++ {
<-done
}
}
}
func TestBlockLocked(t *testing.T) {
const N = 10
c := make(chan bool)
go func() {
runtime.LockOSThread()
for i := 0; i < N; i++ {
c <- true
}
runtime.UnlockOSThread()
}()
for i := 0; i < N; i++ {
<-c
}
}
func TestTimerFairness(t *testing.T) {
done := make(chan bool)
c := make(chan bool)
for i := 0; i < 2; i++ {
go func() {
for {
select {
case c <- true:
case <-done:
return
}
}
}()
}
timer := time.After(20 * time.Millisecond)
for {
select {
case <-c:
case <-timer:
close(done)
return
}
}
}
func TestTimerFairness2(t *testing.T) {
done := make(chan bool)
c := make(chan bool)
for i := 0; i < 2; i++ {
go func() {
timer := time.After(20 * time.Millisecond)
var buf [1]byte
for {
syscall.Read(0, buf[0:0])
select {
case c <- true:
case <-c:
case <-timer:
done <- true
return
}
}
}()
}
<-done
<-done
}
// The function is used to test preemption at split stack checks.
// Declaring a var avoids inlining at the call site.
var preempt = func() int {
var a [128]int
sum := 0
for _, v := range a {
sum += v
}
return sum
}
func TestPreemption(t *testing.T) {
// Test that goroutines are preempted at function calls.
N := 5
if testing.Short() {
N = 2
}
c := make(chan bool)
var x uint32
for g := 0; g < 2; g++ {
go func(g int) {
for i := 0; i < N; i++ {
for atomic.LoadUint32(&x) != uint32(g) {
preempt()
}
atomic.StoreUint32(&x, uint32(1-g))
}
c <- true
}(g)
}
<-c
<-c
}
func TestPreemptionGC(t *testing.T) {
// Test that pending GC preempts running goroutines.
P := 5
N := 10
if testing.Short() {
P = 3
N = 2
}
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P + 1))
var stop uint32
for i := 0; i < P; i++ {
go func() {
for atomic.LoadUint32(&stop) == 0 {
preempt()
}
}()
}
for i := 0; i < N; i++ {
runtime.Gosched()
runtime.GC()
}
atomic.StoreUint32(&stop, 1)
}
func TestGCFairness(t *testing.T) {
output := executeTest(t, testGCFairnessSource, nil)
want := "OK\n"
if output != want {
t.Fatalf("want %s, got %s\n", want, output)
}
}
const testGCFairnessSource = `
package main
import (
"fmt"
"os"
"runtime"
"time"
)
func main() {
runtime.GOMAXPROCS(1)
f, err := os.Open("/dev/null")
if os.IsNotExist(err) {
// This test tests what it is intended to test only if writes are fast.
// If there is no /dev/null, we just don't execute the test.
fmt.Println("OK")
return
}
if err != nil {
fmt.Println(err)
os.Exit(1)
}
for i := 0; i < 2; i++ {
go func() {
for {
f.Write([]byte("."))
}
}()
}
time.Sleep(10 * time.Millisecond)
fmt.Println("OK")
}
`
func stackGrowthRecursive(i int) {
var pad [128]uint64
if i != 0 && pad[0] == 0 {
stackGrowthRecursive(i - 1)
}
}
func TestPreemptSplitBig(t *testing.T) {
if testing.Short() {
t.Skip("skipping in -short mode")
}
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
stop := make(chan int)
go big(stop)
for i := 0; i < 3; i++ {
time.Sleep(10 * time.Microsecond) // let big start running
runtime.GC()
}
close(stop)
}
func big(stop chan int) int {
n := 0
for {
// delay so that gc is sure to have asked for a preemption
for i := 0; i < 1e9; i++ {
n++
}
// call bigframe, which used to miss the preemption in its prologue.
bigframe(stop)
// check if we've been asked to stop.
select {
case <-stop:
return n
}
}
}
func bigframe(stop chan int) int {
// not splitting the stack will overflow.
// small will notice that it needs a stack split and will
// catch the overflow.
var x [8192]byte
return small(stop, &x)
}
func small(stop chan int, x *[8192]byte) int {
for i := range x {
x[i] = byte(i)
}
sum := 0
for i := range x {
sum += int(x[i])
}
// keep small from being a leaf function, which might
// make it not do any stack check at all.
nonleaf(stop)
return sum
}
func nonleaf(stop chan int) bool {
// do something that won't be inlined:
select {
case <-stop:
return true
default:
return false
}
}
func TestSchedLocalQueue(t *testing.T) {
runtime.RunSchedLocalQueueTest()
}
func TestSchedLocalQueueSteal(t *testing.T) {
runtime.RunSchedLocalQueueStealTest()
}
func benchmarkStackGrowth(b *testing.B, rec int) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
stackGrowthRecursive(rec)
}
})
}
func BenchmarkStackGrowth(b *testing.B) {
benchmarkStackGrowth(b, 10)
}
func BenchmarkStackGrowthDeep(b *testing.B) {
benchmarkStackGrowth(b, 1024)
}
func BenchmarkCreateGoroutines(b *testing.B) {
benchmarkCreateGoroutines(b, 1)
}
func BenchmarkCreateGoroutinesParallel(b *testing.B) {
benchmarkCreateGoroutines(b, runtime.GOMAXPROCS(-1))
}
func benchmarkCreateGoroutines(b *testing.B, procs int) {
c := make(chan bool)
var f func(n int)
f = func(n int) {
if n == 0 {
c <- true
return
}
go f(n - 1)
}
for i := 0; i < procs; i++ {
go f(b.N / procs)
}
for i := 0; i < procs; i++ {
<-c
}
}
cmd/gc: capture variables by value Language specification says that variables are captured by reference. And that is what gc compiler does. However, in lots of cases it is possible to capture variables by value under the hood without affecting visible behavior of programs. For example, consider the following typical pattern: func (o *Obj) requestMany(urls []string) []Result { wg := new(sync.WaitGroup) wg.Add(len(urls)) res := make([]Result, len(urls)) for i := range urls { i := i go func() { res[i] = o.requestOne(urls[i]) wg.Done() }() } wg.Wait() return res } Currently o, wg, res, and i are captured by reference causing 3+len(urls) allocations (e.g. PPARAM o is promoted to PPARAMREF and moved to heap). But all of them can be captured by value without changing behavior. This change implements simple strategy for capturing by value: if a captured variable is not addrtaken and never assigned to, then it is captured by value (it is effectively const). This simple strategy turned out to be very effective: ~80% of all captures in std lib are turned into value captures. The remaining 20% are mostly in defers and non-escaping closures, that is, they do not cause allocations anyway. benchmark old allocs new allocs delta BenchmarkCompressedZipGarbage 153 126 -17.65% BenchmarkEncodeDigitsSpeed1e4 91 69 -24.18% BenchmarkEncodeDigitsSpeed1e5 178 129 -27.53% BenchmarkEncodeDigitsSpeed1e6 1510 1051 -30.40% BenchmarkEncodeDigitsDefault1e4 100 75 -25.00% BenchmarkEncodeDigitsDefault1e5 193 139 -27.98% BenchmarkEncodeDigitsDefault1e6 1420 985 -30.63% BenchmarkEncodeDigitsCompress1e4 100 75 -25.00% BenchmarkEncodeDigitsCompress1e5 193 139 -27.98% BenchmarkEncodeDigitsCompress1e6 1420 985 -30.63% BenchmarkEncodeTwainSpeed1e4 109 81 -25.69% BenchmarkEncodeTwainSpeed1e5 211 151 -28.44% BenchmarkEncodeTwainSpeed1e6 1588 1097 -30.92% BenchmarkEncodeTwainDefault1e4 103 77 -25.24% BenchmarkEncodeTwainDefault1e5 199 143 -28.14% BenchmarkEncodeTwainDefault1e6 1324 917 -30.74% BenchmarkEncodeTwainCompress1e4 103 77 -25.24% BenchmarkEncodeTwainCompress1e5 190 137 -27.89% BenchmarkEncodeTwainCompress1e6 1327 919 -30.75% BenchmarkConcurrentDBExec 16223 16220 -0.02% BenchmarkConcurrentStmtQuery 17687 16182 -8.51% BenchmarkConcurrentStmtExec 5191 5186 -0.10% BenchmarkConcurrentTxQuery 17665 17661 -0.02% BenchmarkConcurrentTxExec 15154 15150 -0.03% BenchmarkConcurrentTxStmtQuery 17661 16157 -8.52% BenchmarkConcurrentTxStmtExec 3677 3673 -0.11% BenchmarkConcurrentRandom 14000 13614 -2.76% BenchmarkManyConcurrentQueries 25 22 -12.00% BenchmarkDecodeComplex128Slice 318 252 -20.75% BenchmarkDecodeFloat64Slice 318 252 -20.75% BenchmarkDecodeInt32Slice 318 252 -20.75% BenchmarkDecodeStringSlice 2318 2252 -2.85% BenchmarkDecode 11 8 -27.27% BenchmarkEncodeGray 64 56 -12.50% BenchmarkEncodeNRGBOpaque 64 56 -12.50% BenchmarkEncodeNRGBA 67 58 -13.43% BenchmarkEncodePaletted 68 60 -11.76% BenchmarkEncodeRGBOpaque 64 56 -12.50% BenchmarkGoLookupIP 153 139 -9.15% BenchmarkGoLookupIPNoSuchHost 508 466 -8.27% BenchmarkGoLookupIPWithBrokenNameServer 245 226 -7.76% BenchmarkClientServer 62 59 -4.84% BenchmarkClientServerParallel4 62 59 -4.84% BenchmarkClientServerParallel64 62 59 -4.84% BenchmarkClientServerParallelTLS4 79 76 -3.80% BenchmarkClientServerParallelTLS64 112 109 -2.68% BenchmarkCreateGoroutinesCapture 10 6 -40.00% BenchmarkAfterFunc 1006 1005 -0.10% Fixes #6632. Change-Id: I0cd51e4d356331d7f3c5f447669080cd19b0d2ca Reviewed-on: https://go-review.googlesource.com/3166 Reviewed-by: Russ Cox <rsc@golang.org>
2015-01-19 12:59:58 -07:00
func BenchmarkCreateGoroutinesCapture(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
const N = 4
var wg sync.WaitGroup
wg.Add(N)
for i := 0; i < N; i++ {
i := i
go func() {
if i >= N {
b.Logf("bad") // just to capture b
}
wg.Done()
}()
}
wg.Wait()
}
}
cmd/gc: transform closure calls to function calls Currently we always create context objects for closures that capture variables. However, it is completely unnecessary for direct calls of closures (whether it is func()(), defer func()() or go func()()). This change transforms any OCALLFUNC(OCLOSURE) to normal function call. Closed variables become function arguments. This transformation is especially beneficial for go func(), because we do not need to allocate context object on heap. But it makes direct closure calls a bit faster as well (see BenchmarkClosureCall). On implementation level it required to introduce yet another compiler pass. However, the pass iterates only over xtop, so it should not be an issue. Transformation consists of two parts: closure transformation and call site transformation. We can't run these parts on different sides of escape analysis, because tree state is inconsistent. We can do both parts during typecheck, we don't know how to capture variables and don't have call site. We can't do both parts during walk of OCALLFUNC, because we can walk OCLOSURE body earlier. So now capturevars pass only decides how to capture variables (this info is required for escape analysis). New transformclosure pass, that runs just before order/walk, does all transformations of a closure. And later walk of OCALLFUNC(OCLOSURE) transforms call site. benchmark old ns/op new ns/op delta BenchmarkClosureCall 4.89 3.09 -36.81% BenchmarkCreateGoroutinesCapture 1634 1294 -20.81% benchmark old allocs new allocs delta BenchmarkCreateGoroutinesCapture 6 2 -66.67% benchmark old bytes new bytes delta BenchmarkCreateGoroutinesCapture 176 48 -72.73% Change-Id: Ic85e1706e18c3235cc45b3c0c031a9c1cdb7a40e Reviewed-on: https://go-review.googlesource.com/4050 Reviewed-by: Russ Cox <rsc@golang.org>
2015-02-06 05:09:46 -07:00
func BenchmarkClosureCall(b *testing.B) {
sum := 0
off1 := 1
for i := 0; i < b.N; i++ {
off2 := 2
func() {
sum += i + off1 + off2
}()
}
_ = sum
}
type Matrix [][]float64
func BenchmarkMatmult(b *testing.B) {
b.StopTimer()
// matmult is O(N**3) but testing expects O(b.N),
// so we need to take cube root of b.N
n := int(math.Cbrt(float64(b.N))) + 1
A := makeMatrix(n)
B := makeMatrix(n)
C := makeMatrix(n)
b.StartTimer()
matmult(nil, A, B, C, 0, n, 0, n, 0, n, 8)
}
func makeMatrix(n int) Matrix {
m := make(Matrix, n)
for i := 0; i < n; i++ {
m[i] = make([]float64, n)
for j := 0; j < n; j++ {
m[i][j] = float64(i*n + j)
}
}
return m
}
func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, threshold int) {
di := i1 - i0
dj := j1 - j0
dk := k1 - k0
if di >= dj && di >= dk && di >= threshold {
// divide in two by y axis
mi := i0 + di/2
done1 := make(chan struct{}, 1)
go matmult(done1, A, B, C, i0, mi, j0, j1, k0, k1, threshold)
matmult(nil, A, B, C, mi, i1, j0, j1, k0, k1, threshold)
<-done1
} else if dj >= dk && dj >= threshold {
// divide in two by x axis
mj := j0 + dj/2
done1 := make(chan struct{}, 1)
go matmult(done1, A, B, C, i0, i1, j0, mj, k0, k1, threshold)
matmult(nil, A, B, C, i0, i1, mj, j1, k0, k1, threshold)
<-done1
} else if dk >= threshold {
// divide in two by "k" axis
// deliberately not parallel because of data races
mk := k0 + dk/2
matmult(nil, A, B, C, i0, i1, j0, j1, k0, mk, threshold)
matmult(nil, A, B, C, i0, i1, j0, j1, mk, k1, threshold)
} else {
// the matrices are small enough, compute directly
for i := i0; i < i1; i++ {
for j := j0; j < j1; j++ {
for k := k0; k < k1; k++ {
C[i][j] += A[i][k] * B[k][j]
}
}
}
}
if done != nil {
done <- struct{}{}
}
}