2014-07-29 01:01:02 -06:00
|
|
|
// Copyright 2014 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package runtime_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"runtime"
|
|
|
|
"testing"
|
|
|
|
)
|
|
|
|
|
2015-05-04 08:19:24 -06:00
|
|
|
const (
|
|
|
|
typeScalar = 0
|
|
|
|
typePointer = 1
|
|
|
|
)
|
|
|
|
|
2014-07-29 01:01:02 -06:00
|
|
|
// TestGCInfo tests that various objects in heap, data and bss receive correct GC pointer type info.
|
|
|
|
func TestGCInfo(t *testing.T) {
|
2015-05-15 12:23:23 -06:00
|
|
|
verifyGCInfo(t, "bss Ptr", &bssPtr, infoPtr)
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
verifyGCInfo(t, "bss ScalarPtr", &bssScalarPtr, infoScalarPtr)
|
|
|
|
verifyGCInfo(t, "bss PtrScalar", &bssPtrScalar, infoPtrScalar)
|
|
|
|
verifyGCInfo(t, "bss BigStruct", &bssBigStruct, infoBigStruct())
|
|
|
|
verifyGCInfo(t, "bss string", &bssString, infoString)
|
|
|
|
verifyGCInfo(t, "bss slice", &bssSlice, infoSlice)
|
|
|
|
verifyGCInfo(t, "bss eface", &bssEface, infoEface)
|
|
|
|
verifyGCInfo(t, "bss iface", &bssIface, infoIface)
|
|
|
|
|
2015-05-15 12:23:23 -06:00
|
|
|
verifyGCInfo(t, "data Ptr", &dataPtr, infoPtr)
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
verifyGCInfo(t, "data ScalarPtr", &dataScalarPtr, infoScalarPtr)
|
|
|
|
verifyGCInfo(t, "data PtrScalar", &dataPtrScalar, infoPtrScalar)
|
|
|
|
verifyGCInfo(t, "data BigStruct", &dataBigStruct, infoBigStruct())
|
|
|
|
verifyGCInfo(t, "data string", &dataString, infoString)
|
|
|
|
verifyGCInfo(t, "data slice", &dataSlice, infoSlice)
|
|
|
|
verifyGCInfo(t, "data eface", &dataEface, infoEface)
|
|
|
|
verifyGCInfo(t, "data iface", &dataIface, infoIface)
|
2014-08-15 12:36:12 -06:00
|
|
|
|
2015-05-15 12:23:23 -06:00
|
|
|
verifyGCInfo(t, "stack Ptr", new(Ptr), infoPtr)
|
runtime: reintroduce ``dead'' space during GC scan
Reintroduce an optimization discarded during the initial conversion
from 4-bit heap bitmaps to 2-bit heap bitmaps: when we reach the
place in the bitmap where there are no more pointers, mark that position
for the GC so that it can avoid scanning past that place.
During heapBitsSetType we can also avoid initializing heap bitmap
beyond that location, which gives a bit of a win compared to Go 1.4.
This particular optimization (not initializing the heap bitmap) may not last:
we might change typedmemmove to use the heap bitmap, in which
case it would all need to be initialized. The early stop in the GC scan
will stay no matter what.
Compared to Go 1.4 (github.com/rsc/go, branch go14bench):
name old mean new mean delta
SetTypeNode64 80.7ns × (1.00,1.01) 57.4ns × (1.00,1.01) -28.83% (p=0.000)
SetTypeNode64Dead 80.5ns × (1.00,1.01) 13.1ns × (0.99,1.02) -83.77% (p=0.000)
SetTypeNode64Slice 2.16µs × (1.00,1.01) 1.54µs × (1.00,1.01) -28.75% (p=0.000)
SetTypeNode64DeadSlice 2.16µs × (1.00,1.01) 1.52µs × (1.00,1.00) -29.74% (p=0.000)
Compared to previous CL:
name old mean new mean delta
SetTypeNode64 56.7ns × (1.00,1.00) 57.4ns × (1.00,1.01) +1.19% (p=0.000)
SetTypeNode64Dead 57.2ns × (1.00,1.00) 13.1ns × (0.99,1.02) -77.15% (p=0.000)
SetTypeNode64Slice 1.56µs × (1.00,1.01) 1.54µs × (1.00,1.01) -0.89% (p=0.000)
SetTypeNode64DeadSlice 1.55µs × (1.00,1.01) 1.52µs × (1.00,1.00) -2.23% (p=0.000)
This is the last CL in the sequence converting from the 4-bit heap
to the 2-bit heap, with all the same optimizations reenabled.
Compared to before that process began (compared to CL 9701 patch set 1):
name old mean new mean delta
BinaryTree17 5.87s × (0.94,1.09) 5.91s × (0.96,1.06) ~ (p=0.578)
Fannkuch11 4.32s × (1.00,1.00) 4.32s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 89.1ns × (0.95,1.16) 89.0ns × (0.93,1.10) ~ (p=0.942)
FmtFprintfString 283ns × (0.98,1.02) 298ns × (0.98,1.06) +5.33% (p=0.000)
FmtFprintfInt 284ns × (0.98,1.04) 286ns × (0.98,1.03) ~ (p=0.208)
FmtFprintfIntInt 486ns × (0.98,1.03) 498ns × (0.97,1.06) +2.48% (p=0.000)
FmtFprintfPrefixedInt 400ns × (0.99,1.02) 408ns × (0.98,1.02) +2.23% (p=0.000)
FmtFprintfFloat 566ns × (0.99,1.01) 587ns × (0.98,1.01) +3.69% (p=0.000)
FmtManyArgs 1.91µs × (0.99,1.02) 1.94µs × (0.99,1.02) +1.81% (p=0.000)
GobDecode 15.5ms × (0.98,1.05) 15.8ms × (0.98,1.03) +1.94% (p=0.002)
GobEncode 11.9ms × (0.97,1.03) 12.0ms × (0.96,1.09) ~ (p=0.263)
Gzip 648ms × (0.99,1.01) 648ms × (0.99,1.01) ~ (p=0.992)
Gunzip 143ms × (1.00,1.00) 143ms × (1.00,1.01) ~ (p=0.585)
HTTPClientServer 89.2µs × (0.99,1.02) 90.3µs × (0.98,1.01) +1.24% (p=0.000)
JSONEncode 32.3ms × (0.97,1.06) 31.6ms × (0.99,1.01) -2.29% (p=0.000)
JSONDecode 106ms × (0.99,1.01) 107ms × (1.00,1.01) +0.62% (p=0.000)
Mandelbrot200 6.02ms × (1.00,1.00) 6.03ms × (1.00,1.01) ~ (p=0.250)
GoParse 6.57ms × (0.97,1.06) 6.53ms × (0.99,1.03) ~ (p=0.243)
RegexpMatchEasy0_32 162ns × (1.00,1.00) 161ns × (1.00,1.01) -0.80% (p=0.000)
RegexpMatchEasy0_1K 561ns × (0.99,1.02) 541ns × (0.99,1.01) -3.67% (p=0.000)
RegexpMatchEasy1_32 145ns × (0.95,1.04) 138ns × (1.00,1.00) -5.04% (p=0.000)
RegexpMatchEasy1_1K 864ns × (0.99,1.04) 887ns × (0.99,1.01) +2.57% (p=0.000)
RegexpMatchMedium_32 255ns × (0.99,1.04) 253ns × (0.99,1.01) -1.05% (p=0.012)
RegexpMatchMedium_1K 73.9µs × (0.98,1.04) 72.8µs × (1.00,1.00) -1.51% (p=0.005)
RegexpMatchHard_32 3.92µs × (0.98,1.04) 3.85µs × (1.00,1.01) -1.88% (p=0.002)
RegexpMatchHard_1K 120µs × (0.98,1.04) 117µs × (1.00,1.01) -2.02% (p=0.001)
Revcomp 936ms × (0.95,1.08) 922ms × (0.97,1.08) ~ (p=0.234)
Template 130ms × (0.98,1.04) 126ms × (0.99,1.01) -2.99% (p=0.000)
TimeParse 638ns × (0.98,1.05) 628ns × (0.99,1.01) -1.54% (p=0.004)
TimeFormat 674ns × (0.99,1.01) 668ns × (0.99,1.01) -0.80% (p=0.001)
The slowdown of the first few benchmarks seems to be due to the new
atomic operations for certain small size allocations. But the larger
benchmarks mostly improve, probably due to the decreased memory
pressure from having half as much heap bitmap.
CL 9706, which removes the (never used anymore) wbshadow mode,
gets back what is lost in the early microbenchmarks.
Change-Id: I37423a209e8ec2a2e92538b45cac5422a6acd32d
Reviewed-on: https://go-review.googlesource.com/9705
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-05-04 20:53:54 -06:00
|
|
|
verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr)
|
|
|
|
verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar)
|
|
|
|
verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct())
|
|
|
|
verifyGCInfo(t, "stack string", new(string), infoString)
|
|
|
|
verifyGCInfo(t, "stack slice", new([]string), infoSlice)
|
|
|
|
verifyGCInfo(t, "stack eface", new(interface{}), infoEface)
|
|
|
|
verifyGCInfo(t, "stack iface", new(Iface), infoIface)
|
2014-07-29 01:01:02 -06:00
|
|
|
|
2014-08-18 12:21:55 -06:00
|
|
|
for i := 0; i < 10; i++ {
|
2015-05-15 12:23:23 -06:00
|
|
|
verifyGCInfo(t, "heap Ptr", escape(new(Ptr)), trimDead(padDead(infoPtr)))
|
runtime: reintroduce ``dead'' space during GC scan
Reintroduce an optimization discarded during the initial conversion
from 4-bit heap bitmaps to 2-bit heap bitmaps: when we reach the
place in the bitmap where there are no more pointers, mark that position
for the GC so that it can avoid scanning past that place.
During heapBitsSetType we can also avoid initializing heap bitmap
beyond that location, which gives a bit of a win compared to Go 1.4.
This particular optimization (not initializing the heap bitmap) may not last:
we might change typedmemmove to use the heap bitmap, in which
case it would all need to be initialized. The early stop in the GC scan
will stay no matter what.
Compared to Go 1.4 (github.com/rsc/go, branch go14bench):
name old mean new mean delta
SetTypeNode64 80.7ns × (1.00,1.01) 57.4ns × (1.00,1.01) -28.83% (p=0.000)
SetTypeNode64Dead 80.5ns × (1.00,1.01) 13.1ns × (0.99,1.02) -83.77% (p=0.000)
SetTypeNode64Slice 2.16µs × (1.00,1.01) 1.54µs × (1.00,1.01) -28.75% (p=0.000)
SetTypeNode64DeadSlice 2.16µs × (1.00,1.01) 1.52µs × (1.00,1.00) -29.74% (p=0.000)
Compared to previous CL:
name old mean new mean delta
SetTypeNode64 56.7ns × (1.00,1.00) 57.4ns × (1.00,1.01) +1.19% (p=0.000)
SetTypeNode64Dead 57.2ns × (1.00,1.00) 13.1ns × (0.99,1.02) -77.15% (p=0.000)
SetTypeNode64Slice 1.56µs × (1.00,1.01) 1.54µs × (1.00,1.01) -0.89% (p=0.000)
SetTypeNode64DeadSlice 1.55µs × (1.00,1.01) 1.52µs × (1.00,1.00) -2.23% (p=0.000)
This is the last CL in the sequence converting from the 4-bit heap
to the 2-bit heap, with all the same optimizations reenabled.
Compared to before that process began (compared to CL 9701 patch set 1):
name old mean new mean delta
BinaryTree17 5.87s × (0.94,1.09) 5.91s × (0.96,1.06) ~ (p=0.578)
Fannkuch11 4.32s × (1.00,1.00) 4.32s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 89.1ns × (0.95,1.16) 89.0ns × (0.93,1.10) ~ (p=0.942)
FmtFprintfString 283ns × (0.98,1.02) 298ns × (0.98,1.06) +5.33% (p=0.000)
FmtFprintfInt 284ns × (0.98,1.04) 286ns × (0.98,1.03) ~ (p=0.208)
FmtFprintfIntInt 486ns × (0.98,1.03) 498ns × (0.97,1.06) +2.48% (p=0.000)
FmtFprintfPrefixedInt 400ns × (0.99,1.02) 408ns × (0.98,1.02) +2.23% (p=0.000)
FmtFprintfFloat 566ns × (0.99,1.01) 587ns × (0.98,1.01) +3.69% (p=0.000)
FmtManyArgs 1.91µs × (0.99,1.02) 1.94µs × (0.99,1.02) +1.81% (p=0.000)
GobDecode 15.5ms × (0.98,1.05) 15.8ms × (0.98,1.03) +1.94% (p=0.002)
GobEncode 11.9ms × (0.97,1.03) 12.0ms × (0.96,1.09) ~ (p=0.263)
Gzip 648ms × (0.99,1.01) 648ms × (0.99,1.01) ~ (p=0.992)
Gunzip 143ms × (1.00,1.00) 143ms × (1.00,1.01) ~ (p=0.585)
HTTPClientServer 89.2µs × (0.99,1.02) 90.3µs × (0.98,1.01) +1.24% (p=0.000)
JSONEncode 32.3ms × (0.97,1.06) 31.6ms × (0.99,1.01) -2.29% (p=0.000)
JSONDecode 106ms × (0.99,1.01) 107ms × (1.00,1.01) +0.62% (p=0.000)
Mandelbrot200 6.02ms × (1.00,1.00) 6.03ms × (1.00,1.01) ~ (p=0.250)
GoParse 6.57ms × (0.97,1.06) 6.53ms × (0.99,1.03) ~ (p=0.243)
RegexpMatchEasy0_32 162ns × (1.00,1.00) 161ns × (1.00,1.01) -0.80% (p=0.000)
RegexpMatchEasy0_1K 561ns × (0.99,1.02) 541ns × (0.99,1.01) -3.67% (p=0.000)
RegexpMatchEasy1_32 145ns × (0.95,1.04) 138ns × (1.00,1.00) -5.04% (p=0.000)
RegexpMatchEasy1_1K 864ns × (0.99,1.04) 887ns × (0.99,1.01) +2.57% (p=0.000)
RegexpMatchMedium_32 255ns × (0.99,1.04) 253ns × (0.99,1.01) -1.05% (p=0.012)
RegexpMatchMedium_1K 73.9µs × (0.98,1.04) 72.8µs × (1.00,1.00) -1.51% (p=0.005)
RegexpMatchHard_32 3.92µs × (0.98,1.04) 3.85µs × (1.00,1.01) -1.88% (p=0.002)
RegexpMatchHard_1K 120µs × (0.98,1.04) 117µs × (1.00,1.01) -2.02% (p=0.001)
Revcomp 936ms × (0.95,1.08) 922ms × (0.97,1.08) ~ (p=0.234)
Template 130ms × (0.98,1.04) 126ms × (0.99,1.01) -2.99% (p=0.000)
TimeParse 638ns × (0.98,1.05) 628ns × (0.99,1.01) -1.54% (p=0.004)
TimeFormat 674ns × (0.99,1.01) 668ns × (0.99,1.01) -0.80% (p=0.001)
The slowdown of the first few benchmarks seems to be due to the new
atomic operations for certain small size allocations. But the larger
benchmarks mostly improve, probably due to the decreased memory
pressure from having half as much heap bitmap.
CL 9706, which removes the (never used anymore) wbshadow mode,
gets back what is lost in the early microbenchmarks.
Change-Id: I37423a209e8ec2a2e92538b45cac5422a6acd32d
Reviewed-on: https://go-review.googlesource.com/9705
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-05-04 20:53:54 -06:00
|
|
|
verifyGCInfo(t, "heap PtrSlice", escape(&make([]*byte, 10)[0]), trimDead(infoPtr10))
|
|
|
|
verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), trimDead(infoScalarPtr))
|
|
|
|
verifyGCInfo(t, "heap ScalarPtrSlice", escape(&make([]ScalarPtr, 4)[0]), trimDead(infoScalarPtr4))
|
|
|
|
verifyGCInfo(t, "heap PtrScalar", escape(new(PtrScalar)), trimDead(infoPtrScalar))
|
|
|
|
verifyGCInfo(t, "heap BigStruct", escape(new(BigStruct)), trimDead(infoBigStruct()))
|
|
|
|
verifyGCInfo(t, "heap string", escape(new(string)), trimDead(infoString))
|
|
|
|
verifyGCInfo(t, "heap eface", escape(new(interface{})), trimDead(infoEface))
|
|
|
|
verifyGCInfo(t, "heap iface", escape(new(Iface)), trimDead(infoIface))
|
2014-07-29 01:01:02 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func verifyGCInfo(t *testing.T, name string, p interface{}, mask0 []byte) {
|
|
|
|
mask := runtime.GCMask(p)
|
2016-03-31 23:34:18 -06:00
|
|
|
if !bytes.Equal(mask, mask0) {
|
2014-07-29 01:01:02 -06:00
|
|
|
t.Errorf("bad GC program for %v:\nwant %+v\ngot %+v", name, mask0, mask)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-15 12:23:23 -06:00
|
|
|
func padDead(mask []byte) []byte {
|
runtime: reclaim scan/dead bit in first word
With the switch to separate mark bitmaps, the scan/dead bit for the
first word of each object is now unused. Reclaim this bit and use it
as a scan/dead bit, just like words three and on. The second word is
still used for checkmark.
This dramatically simplifies heapBitsSetTypeNoScan and hasPointers,
since they no longer need different cases for 1, 2, and 3+ word
objects. They can instead just manipulate the heap bitmap for the
first word and be done with it.
In order to enable this, we change heapBitsSetType and runGCProg to
always set the scan/dead bit to scan for the first word on every code
path. Since these functions only apply to types that have pointers,
there's no need to do this conditionally: it's *always* necessary to
set the scan bit in the first word.
We also change every place that scans an object and checks if there
are more pointers. Rather than only checking morePointers if the word
is >= 2, we now check morePointers if word != 1 (since that's the
checkmark word).
Looking forward, we should probably reclaim the checkmark bit, too,
but that's going to be quite a bit more work.
Tested by setting doubleCheck in heapBitsSetType and running all.bash
on both linux/amd64 and linux/386, and by running GOGC=10 all.bash.
This particularly improves the FmtFprintf* go1 benchmarks, since they
do a large amount of noscan allocation.
name old time/op new time/op delta
BinaryTree17-12 2.34s ± 1% 2.38s ± 1% +1.70% (p=0.000 n=17+19)
Fannkuch11-12 2.09s ± 0% 2.09s ± 1% ~ (p=0.276 n=17+16)
FmtFprintfEmpty-12 44.9ns ± 2% 44.8ns ± 2% ~ (p=0.340 n=19+18)
FmtFprintfString-12 127ns ± 0% 125ns ± 0% -1.57% (p=0.000 n=16+15)
FmtFprintfInt-12 128ns ± 0% 122ns ± 1% -4.45% (p=0.000 n=15+20)
FmtFprintfIntInt-12 207ns ± 1% 193ns ± 0% -6.55% (p=0.000 n=19+14)
FmtFprintfPrefixedInt-12 197ns ± 1% 191ns ± 0% -2.93% (p=0.000 n=17+18)
FmtFprintfFloat-12 263ns ± 0% 248ns ± 1% -5.88% (p=0.000 n=15+19)
FmtManyArgs-12 794ns ± 0% 779ns ± 1% -1.90% (p=0.000 n=18+18)
GobDecode-12 7.14ms ± 2% 7.11ms ± 1% ~ (p=0.072 n=20+20)
GobEncode-12 5.85ms ± 1% 5.82ms ± 1% -0.49% (p=0.000 n=20+20)
Gzip-12 218ms ± 1% 215ms ± 1% -1.22% (p=0.000 n=19+19)
Gunzip-12 36.8ms ± 0% 36.7ms ± 0% -0.18% (p=0.006 n=18+20)
HTTPClientServer-12 77.1µs ± 4% 77.1µs ± 3% ~ (p=0.945 n=19+20)
JSONEncode-12 15.6ms ± 1% 15.9ms ± 1% +1.68% (p=0.000 n=18+20)
JSONDecode-12 55.2ms ± 1% 53.6ms ± 1% -2.93% (p=0.000 n=17+19)
Mandelbrot200-12 4.05ms ± 1% 4.05ms ± 0% ~ (p=0.306 n=17+17)
GoParse-12 3.14ms ± 1% 3.10ms ± 1% -1.31% (p=0.000 n=19+18)
RegexpMatchEasy0_32-12 69.3ns ± 1% 70.0ns ± 0% +0.89% (p=0.000 n=19+17)
RegexpMatchEasy0_1K-12 237ns ± 1% 236ns ± 0% -0.62% (p=0.000 n=19+16)
RegexpMatchEasy1_32-12 69.5ns ± 1% 70.3ns ± 1% +1.14% (p=0.000 n=18+17)
RegexpMatchEasy1_1K-12 377ns ± 1% 366ns ± 1% -3.03% (p=0.000 n=15+19)
RegexpMatchMedium_32-12 107ns ± 1% 107ns ± 2% ~ (p=0.318 n=20+19)
RegexpMatchMedium_1K-12 33.8µs ± 3% 33.5µs ± 1% -1.04% (p=0.001 n=20+19)
RegexpMatchHard_32-12 1.68µs ± 1% 1.73µs ± 0% +2.50% (p=0.000 n=20+18)
RegexpMatchHard_1K-12 50.8µs ± 1% 52.0µs ± 1% +2.50% (p=0.000 n=19+18)
Revcomp-12 381ms ± 1% 385ms ± 1% +1.00% (p=0.000 n=17+18)
Template-12 64.9ms ± 3% 62.6ms ± 1% -3.55% (p=0.000 n=19+18)
TimeParse-12 324ns ± 0% 328ns ± 1% +1.25% (p=0.000 n=18+18)
TimeFormat-12 345ns ± 0% 334ns ± 0% -3.31% (p=0.000 n=15+17)
[Geo mean] 52.1µs 51.5µs -1.00%
Change-Id: I13e74da3193a7f80794c654f944d1f0d60817049
Reviewed-on: https://go-review.googlesource.com/22632
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-29 12:51:48 -06:00
|
|
|
// Because the dead bit isn't encoded in the second word,
|
2015-05-15 12:23:23 -06:00
|
|
|
// and because on 32-bit systems a one-word allocation
|
|
|
|
// uses a two-word block, the pointer info for a one-word
|
|
|
|
// object needs to be expanded to include an extra scalar
|
|
|
|
// on 32-bit systems to match the heap bitmap.
|
|
|
|
if runtime.PtrSize == 4 && len(mask) == 1 {
|
|
|
|
return []byte{mask[0], 0}
|
|
|
|
}
|
|
|
|
return mask
|
|
|
|
}
|
|
|
|
|
runtime: reintroduce ``dead'' space during GC scan
Reintroduce an optimization discarded during the initial conversion
from 4-bit heap bitmaps to 2-bit heap bitmaps: when we reach the
place in the bitmap where there are no more pointers, mark that position
for the GC so that it can avoid scanning past that place.
During heapBitsSetType we can also avoid initializing heap bitmap
beyond that location, which gives a bit of a win compared to Go 1.4.
This particular optimization (not initializing the heap bitmap) may not last:
we might change typedmemmove to use the heap bitmap, in which
case it would all need to be initialized. The early stop in the GC scan
will stay no matter what.
Compared to Go 1.4 (github.com/rsc/go, branch go14bench):
name old mean new mean delta
SetTypeNode64 80.7ns × (1.00,1.01) 57.4ns × (1.00,1.01) -28.83% (p=0.000)
SetTypeNode64Dead 80.5ns × (1.00,1.01) 13.1ns × (0.99,1.02) -83.77% (p=0.000)
SetTypeNode64Slice 2.16µs × (1.00,1.01) 1.54µs × (1.00,1.01) -28.75% (p=0.000)
SetTypeNode64DeadSlice 2.16µs × (1.00,1.01) 1.52µs × (1.00,1.00) -29.74% (p=0.000)
Compared to previous CL:
name old mean new mean delta
SetTypeNode64 56.7ns × (1.00,1.00) 57.4ns × (1.00,1.01) +1.19% (p=0.000)
SetTypeNode64Dead 57.2ns × (1.00,1.00) 13.1ns × (0.99,1.02) -77.15% (p=0.000)
SetTypeNode64Slice 1.56µs × (1.00,1.01) 1.54µs × (1.00,1.01) -0.89% (p=0.000)
SetTypeNode64DeadSlice 1.55µs × (1.00,1.01) 1.52µs × (1.00,1.00) -2.23% (p=0.000)
This is the last CL in the sequence converting from the 4-bit heap
to the 2-bit heap, with all the same optimizations reenabled.
Compared to before that process began (compared to CL 9701 patch set 1):
name old mean new mean delta
BinaryTree17 5.87s × (0.94,1.09) 5.91s × (0.96,1.06) ~ (p=0.578)
Fannkuch11 4.32s × (1.00,1.00) 4.32s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 89.1ns × (0.95,1.16) 89.0ns × (0.93,1.10) ~ (p=0.942)
FmtFprintfString 283ns × (0.98,1.02) 298ns × (0.98,1.06) +5.33% (p=0.000)
FmtFprintfInt 284ns × (0.98,1.04) 286ns × (0.98,1.03) ~ (p=0.208)
FmtFprintfIntInt 486ns × (0.98,1.03) 498ns × (0.97,1.06) +2.48% (p=0.000)
FmtFprintfPrefixedInt 400ns × (0.99,1.02) 408ns × (0.98,1.02) +2.23% (p=0.000)
FmtFprintfFloat 566ns × (0.99,1.01) 587ns × (0.98,1.01) +3.69% (p=0.000)
FmtManyArgs 1.91µs × (0.99,1.02) 1.94µs × (0.99,1.02) +1.81% (p=0.000)
GobDecode 15.5ms × (0.98,1.05) 15.8ms × (0.98,1.03) +1.94% (p=0.002)
GobEncode 11.9ms × (0.97,1.03) 12.0ms × (0.96,1.09) ~ (p=0.263)
Gzip 648ms × (0.99,1.01) 648ms × (0.99,1.01) ~ (p=0.992)
Gunzip 143ms × (1.00,1.00) 143ms × (1.00,1.01) ~ (p=0.585)
HTTPClientServer 89.2µs × (0.99,1.02) 90.3µs × (0.98,1.01) +1.24% (p=0.000)
JSONEncode 32.3ms × (0.97,1.06) 31.6ms × (0.99,1.01) -2.29% (p=0.000)
JSONDecode 106ms × (0.99,1.01) 107ms × (1.00,1.01) +0.62% (p=0.000)
Mandelbrot200 6.02ms × (1.00,1.00) 6.03ms × (1.00,1.01) ~ (p=0.250)
GoParse 6.57ms × (0.97,1.06) 6.53ms × (0.99,1.03) ~ (p=0.243)
RegexpMatchEasy0_32 162ns × (1.00,1.00) 161ns × (1.00,1.01) -0.80% (p=0.000)
RegexpMatchEasy0_1K 561ns × (0.99,1.02) 541ns × (0.99,1.01) -3.67% (p=0.000)
RegexpMatchEasy1_32 145ns × (0.95,1.04) 138ns × (1.00,1.00) -5.04% (p=0.000)
RegexpMatchEasy1_1K 864ns × (0.99,1.04) 887ns × (0.99,1.01) +2.57% (p=0.000)
RegexpMatchMedium_32 255ns × (0.99,1.04) 253ns × (0.99,1.01) -1.05% (p=0.012)
RegexpMatchMedium_1K 73.9µs × (0.98,1.04) 72.8µs × (1.00,1.00) -1.51% (p=0.005)
RegexpMatchHard_32 3.92µs × (0.98,1.04) 3.85µs × (1.00,1.01) -1.88% (p=0.002)
RegexpMatchHard_1K 120µs × (0.98,1.04) 117µs × (1.00,1.01) -2.02% (p=0.001)
Revcomp 936ms × (0.95,1.08) 922ms × (0.97,1.08) ~ (p=0.234)
Template 130ms × (0.98,1.04) 126ms × (0.99,1.01) -2.99% (p=0.000)
TimeParse 638ns × (0.98,1.05) 628ns × (0.99,1.01) -1.54% (p=0.004)
TimeFormat 674ns × (0.99,1.01) 668ns × (0.99,1.01) -0.80% (p=0.001)
The slowdown of the first few benchmarks seems to be due to the new
atomic operations for certain small size allocations. But the larger
benchmarks mostly improve, probably due to the decreased memory
pressure from having half as much heap bitmap.
CL 9706, which removes the (never used anymore) wbshadow mode,
gets back what is lost in the early microbenchmarks.
Change-Id: I37423a209e8ec2a2e92538b45cac5422a6acd32d
Reviewed-on: https://go-review.googlesource.com/9705
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-05-04 20:53:54 -06:00
|
|
|
func trimDead(mask []byte) []byte {
|
|
|
|
for len(mask) > 2 && mask[len(mask)-1] == typeScalar {
|
|
|
|
mask = mask[:len(mask)-1]
|
2014-08-15 12:36:12 -06:00
|
|
|
}
|
runtime: reclaim scan/dead bit in first word
With the switch to separate mark bitmaps, the scan/dead bit for the
first word of each object is now unused. Reclaim this bit and use it
as a scan/dead bit, just like words three and on. The second word is
still used for checkmark.
This dramatically simplifies heapBitsSetTypeNoScan and hasPointers,
since they no longer need different cases for 1, 2, and 3+ word
objects. They can instead just manipulate the heap bitmap for the
first word and be done with it.
In order to enable this, we change heapBitsSetType and runGCProg to
always set the scan/dead bit to scan for the first word on every code
path. Since these functions only apply to types that have pointers,
there's no need to do this conditionally: it's *always* necessary to
set the scan bit in the first word.
We also change every place that scans an object and checks if there
are more pointers. Rather than only checking morePointers if the word
is >= 2, we now check morePointers if word != 1 (since that's the
checkmark word).
Looking forward, we should probably reclaim the checkmark bit, too,
but that's going to be quite a bit more work.
Tested by setting doubleCheck in heapBitsSetType and running all.bash
on both linux/amd64 and linux/386, and by running GOGC=10 all.bash.
This particularly improves the FmtFprintf* go1 benchmarks, since they
do a large amount of noscan allocation.
name old time/op new time/op delta
BinaryTree17-12 2.34s ± 1% 2.38s ± 1% +1.70% (p=0.000 n=17+19)
Fannkuch11-12 2.09s ± 0% 2.09s ± 1% ~ (p=0.276 n=17+16)
FmtFprintfEmpty-12 44.9ns ± 2% 44.8ns ± 2% ~ (p=0.340 n=19+18)
FmtFprintfString-12 127ns ± 0% 125ns ± 0% -1.57% (p=0.000 n=16+15)
FmtFprintfInt-12 128ns ± 0% 122ns ± 1% -4.45% (p=0.000 n=15+20)
FmtFprintfIntInt-12 207ns ± 1% 193ns ± 0% -6.55% (p=0.000 n=19+14)
FmtFprintfPrefixedInt-12 197ns ± 1% 191ns ± 0% -2.93% (p=0.000 n=17+18)
FmtFprintfFloat-12 263ns ± 0% 248ns ± 1% -5.88% (p=0.000 n=15+19)
FmtManyArgs-12 794ns ± 0% 779ns ± 1% -1.90% (p=0.000 n=18+18)
GobDecode-12 7.14ms ± 2% 7.11ms ± 1% ~ (p=0.072 n=20+20)
GobEncode-12 5.85ms ± 1% 5.82ms ± 1% -0.49% (p=0.000 n=20+20)
Gzip-12 218ms ± 1% 215ms ± 1% -1.22% (p=0.000 n=19+19)
Gunzip-12 36.8ms ± 0% 36.7ms ± 0% -0.18% (p=0.006 n=18+20)
HTTPClientServer-12 77.1µs ± 4% 77.1µs ± 3% ~ (p=0.945 n=19+20)
JSONEncode-12 15.6ms ± 1% 15.9ms ± 1% +1.68% (p=0.000 n=18+20)
JSONDecode-12 55.2ms ± 1% 53.6ms ± 1% -2.93% (p=0.000 n=17+19)
Mandelbrot200-12 4.05ms ± 1% 4.05ms ± 0% ~ (p=0.306 n=17+17)
GoParse-12 3.14ms ± 1% 3.10ms ± 1% -1.31% (p=0.000 n=19+18)
RegexpMatchEasy0_32-12 69.3ns ± 1% 70.0ns ± 0% +0.89% (p=0.000 n=19+17)
RegexpMatchEasy0_1K-12 237ns ± 1% 236ns ± 0% -0.62% (p=0.000 n=19+16)
RegexpMatchEasy1_32-12 69.5ns ± 1% 70.3ns ± 1% +1.14% (p=0.000 n=18+17)
RegexpMatchEasy1_1K-12 377ns ± 1% 366ns ± 1% -3.03% (p=0.000 n=15+19)
RegexpMatchMedium_32-12 107ns ± 1% 107ns ± 2% ~ (p=0.318 n=20+19)
RegexpMatchMedium_1K-12 33.8µs ± 3% 33.5µs ± 1% -1.04% (p=0.001 n=20+19)
RegexpMatchHard_32-12 1.68µs ± 1% 1.73µs ± 0% +2.50% (p=0.000 n=20+18)
RegexpMatchHard_1K-12 50.8µs ± 1% 52.0µs ± 1% +2.50% (p=0.000 n=19+18)
Revcomp-12 381ms ± 1% 385ms ± 1% +1.00% (p=0.000 n=17+18)
Template-12 64.9ms ± 3% 62.6ms ± 1% -3.55% (p=0.000 n=19+18)
TimeParse-12 324ns ± 0% 328ns ± 1% +1.25% (p=0.000 n=18+18)
TimeFormat-12 345ns ± 0% 334ns ± 0% -3.31% (p=0.000 n=15+17)
[Geo mean] 52.1µs 51.5µs -1.00%
Change-Id: I13e74da3193a7f80794c654f944d1f0d60817049
Reviewed-on: https://go-review.googlesource.com/22632
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-04-29 12:51:48 -06:00
|
|
|
if len(mask) == 2 && mask[0] == typeScalar && mask[1] == typeScalar {
|
|
|
|
mask = mask[:0]
|
|
|
|
}
|
runtime: reintroduce ``dead'' space during GC scan
Reintroduce an optimization discarded during the initial conversion
from 4-bit heap bitmaps to 2-bit heap bitmaps: when we reach the
place in the bitmap where there are no more pointers, mark that position
for the GC so that it can avoid scanning past that place.
During heapBitsSetType we can also avoid initializing heap bitmap
beyond that location, which gives a bit of a win compared to Go 1.4.
This particular optimization (not initializing the heap bitmap) may not last:
we might change typedmemmove to use the heap bitmap, in which
case it would all need to be initialized. The early stop in the GC scan
will stay no matter what.
Compared to Go 1.4 (github.com/rsc/go, branch go14bench):
name old mean new mean delta
SetTypeNode64 80.7ns × (1.00,1.01) 57.4ns × (1.00,1.01) -28.83% (p=0.000)
SetTypeNode64Dead 80.5ns × (1.00,1.01) 13.1ns × (0.99,1.02) -83.77% (p=0.000)
SetTypeNode64Slice 2.16µs × (1.00,1.01) 1.54µs × (1.00,1.01) -28.75% (p=0.000)
SetTypeNode64DeadSlice 2.16µs × (1.00,1.01) 1.52µs × (1.00,1.00) -29.74% (p=0.000)
Compared to previous CL:
name old mean new mean delta
SetTypeNode64 56.7ns × (1.00,1.00) 57.4ns × (1.00,1.01) +1.19% (p=0.000)
SetTypeNode64Dead 57.2ns × (1.00,1.00) 13.1ns × (0.99,1.02) -77.15% (p=0.000)
SetTypeNode64Slice 1.56µs × (1.00,1.01) 1.54µs × (1.00,1.01) -0.89% (p=0.000)
SetTypeNode64DeadSlice 1.55µs × (1.00,1.01) 1.52µs × (1.00,1.00) -2.23% (p=0.000)
This is the last CL in the sequence converting from the 4-bit heap
to the 2-bit heap, with all the same optimizations reenabled.
Compared to before that process began (compared to CL 9701 patch set 1):
name old mean new mean delta
BinaryTree17 5.87s × (0.94,1.09) 5.91s × (0.96,1.06) ~ (p=0.578)
Fannkuch11 4.32s × (1.00,1.00) 4.32s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 89.1ns × (0.95,1.16) 89.0ns × (0.93,1.10) ~ (p=0.942)
FmtFprintfString 283ns × (0.98,1.02) 298ns × (0.98,1.06) +5.33% (p=0.000)
FmtFprintfInt 284ns × (0.98,1.04) 286ns × (0.98,1.03) ~ (p=0.208)
FmtFprintfIntInt 486ns × (0.98,1.03) 498ns × (0.97,1.06) +2.48% (p=0.000)
FmtFprintfPrefixedInt 400ns × (0.99,1.02) 408ns × (0.98,1.02) +2.23% (p=0.000)
FmtFprintfFloat 566ns × (0.99,1.01) 587ns × (0.98,1.01) +3.69% (p=0.000)
FmtManyArgs 1.91µs × (0.99,1.02) 1.94µs × (0.99,1.02) +1.81% (p=0.000)
GobDecode 15.5ms × (0.98,1.05) 15.8ms × (0.98,1.03) +1.94% (p=0.002)
GobEncode 11.9ms × (0.97,1.03) 12.0ms × (0.96,1.09) ~ (p=0.263)
Gzip 648ms × (0.99,1.01) 648ms × (0.99,1.01) ~ (p=0.992)
Gunzip 143ms × (1.00,1.00) 143ms × (1.00,1.01) ~ (p=0.585)
HTTPClientServer 89.2µs × (0.99,1.02) 90.3µs × (0.98,1.01) +1.24% (p=0.000)
JSONEncode 32.3ms × (0.97,1.06) 31.6ms × (0.99,1.01) -2.29% (p=0.000)
JSONDecode 106ms × (0.99,1.01) 107ms × (1.00,1.01) +0.62% (p=0.000)
Mandelbrot200 6.02ms × (1.00,1.00) 6.03ms × (1.00,1.01) ~ (p=0.250)
GoParse 6.57ms × (0.97,1.06) 6.53ms × (0.99,1.03) ~ (p=0.243)
RegexpMatchEasy0_32 162ns × (1.00,1.00) 161ns × (1.00,1.01) -0.80% (p=0.000)
RegexpMatchEasy0_1K 561ns × (0.99,1.02) 541ns × (0.99,1.01) -3.67% (p=0.000)
RegexpMatchEasy1_32 145ns × (0.95,1.04) 138ns × (1.00,1.00) -5.04% (p=0.000)
RegexpMatchEasy1_1K 864ns × (0.99,1.04) 887ns × (0.99,1.01) +2.57% (p=0.000)
RegexpMatchMedium_32 255ns × (0.99,1.04) 253ns × (0.99,1.01) -1.05% (p=0.012)
RegexpMatchMedium_1K 73.9µs × (0.98,1.04) 72.8µs × (1.00,1.00) -1.51% (p=0.005)
RegexpMatchHard_32 3.92µs × (0.98,1.04) 3.85µs × (1.00,1.01) -1.88% (p=0.002)
RegexpMatchHard_1K 120µs × (0.98,1.04) 117µs × (1.00,1.01) -2.02% (p=0.001)
Revcomp 936ms × (0.95,1.08) 922ms × (0.97,1.08) ~ (p=0.234)
Template 130ms × (0.98,1.04) 126ms × (0.99,1.01) -2.99% (p=0.000)
TimeParse 638ns × (0.98,1.05) 628ns × (0.99,1.01) -1.54% (p=0.004)
TimeFormat 674ns × (0.99,1.01) 668ns × (0.99,1.01) -0.80% (p=0.001)
The slowdown of the first few benchmarks seems to be due to the new
atomic operations for certain small size allocations. But the larger
benchmarks mostly improve, probably due to the decreased memory
pressure from having half as much heap bitmap.
CL 9706, which removes the (never used anymore) wbshadow mode,
gets back what is lost in the early microbenchmarks.
Change-Id: I37423a209e8ec2a2e92538b45cac5422a6acd32d
Reviewed-on: https://go-review.googlesource.com/9705
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-05-04 20:53:54 -06:00
|
|
|
return mask
|
2014-08-15 12:36:12 -06:00
|
|
|
}
|
|
|
|
|
2014-07-29 01:01:02 -06:00
|
|
|
var gcinfoSink interface{}
|
|
|
|
|
|
|
|
func escape(p interface{}) interface{} {
|
|
|
|
gcinfoSink = p
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
|
2015-05-15 12:23:23 -06:00
|
|
|
var infoPtr = []byte{typePointer}
|
|
|
|
|
|
|
|
type Ptr struct {
|
|
|
|
*byte
|
|
|
|
}
|
|
|
|
|
2015-05-04 08:19:24 -06:00
|
|
|
var infoPtr10 = []byte{typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer, typePointer}
|
2014-07-29 01:01:02 -06:00
|
|
|
|
|
|
|
type ScalarPtr struct {
|
|
|
|
q int
|
|
|
|
w *int
|
|
|
|
e int
|
|
|
|
r *int
|
|
|
|
t int
|
|
|
|
y *int
|
|
|
|
}
|
|
|
|
|
2015-01-16 12:43:38 -07:00
|
|
|
var infoScalarPtr = []byte{typeScalar, typePointer, typeScalar, typePointer, typeScalar, typePointer}
|
2014-07-29 01:01:02 -06:00
|
|
|
|
2015-05-04 08:19:24 -06:00
|
|
|
var infoScalarPtr4 = append(append(append(append([]byte(nil), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...), infoScalarPtr...)
|
|
|
|
|
2014-07-29 01:01:02 -06:00
|
|
|
type PtrScalar struct {
|
|
|
|
q *int
|
|
|
|
w int
|
|
|
|
e *int
|
|
|
|
r int
|
|
|
|
t *int
|
|
|
|
y int
|
|
|
|
}
|
|
|
|
|
2015-01-16 12:43:38 -07:00
|
|
|
var infoPtrScalar = []byte{typePointer, typeScalar, typePointer, typeScalar, typePointer, typeScalar}
|
2014-07-29 01:01:02 -06:00
|
|
|
|
2014-08-25 12:38:19 -06:00
|
|
|
type BigStruct struct {
|
2014-07-29 01:01:02 -06:00
|
|
|
q *int
|
|
|
|
w byte
|
|
|
|
e [17]byte
|
|
|
|
r []byte
|
|
|
|
t int
|
|
|
|
y uint16
|
|
|
|
u uint64
|
|
|
|
i string
|
|
|
|
}
|
|
|
|
|
2014-08-25 12:38:19 -06:00
|
|
|
func infoBigStruct() []byte {
|
2014-07-29 01:01:02 -06:00
|
|
|
switch runtime.GOARCH {
|
|
|
|
case "386", "arm":
|
|
|
|
return []byte{
|
2015-01-16 12:43:38 -07:00
|
|
|
typePointer, // q *int
|
|
|
|
typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
typePointer, typeScalar, typeScalar, // r []byte
|
2015-01-16 12:43:38 -07:00
|
|
|
typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
typePointer, typeScalar, // i string
|
2014-07-29 01:01:02 -06:00
|
|
|
}
|
2016-03-18 17:02:52 -06:00
|
|
|
case "arm64", "amd64", "mips64", "mips64le", "ppc64", "ppc64le", "s390x":
|
2014-07-29 01:01:02 -06:00
|
|
|
return []byte{
|
2015-01-16 12:43:38 -07:00
|
|
|
typePointer, // q *int
|
|
|
|
typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
typePointer, typeScalar, typeScalar, // r []byte
|
2015-01-16 12:43:38 -07:00
|
|
|
typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
typePointer, typeScalar, // i string
|
2014-07-29 01:01:02 -06:00
|
|
|
}
|
|
|
|
case "amd64p32":
|
|
|
|
return []byte{
|
2015-01-16 12:43:38 -07:00
|
|
|
typePointer, // q *int
|
|
|
|
typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // w byte; e [17]byte
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
typePointer, typeScalar, typeScalar, // r []byte
|
|
|
|
typeScalar, typeScalar, typeScalar, typeScalar, typeScalar, // t int; y uint16; u uint64
|
|
|
|
typePointer, typeScalar, // i string
|
2014-07-29 01:01:02 -06:00
|
|
|
}
|
|
|
|
default:
|
|
|
|
panic("unknown arch")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-18 12:21:55 -06:00
|
|
|
type Iface interface {
|
|
|
|
f()
|
|
|
|
}
|
|
|
|
|
|
|
|
type IfaceImpl int
|
|
|
|
|
|
|
|
func (IfaceImpl) f() {
|
|
|
|
}
|
|
|
|
|
2014-07-29 01:01:02 -06:00
|
|
|
var (
|
|
|
|
// BSS
|
2015-05-15 12:23:23 -06:00
|
|
|
bssPtr Ptr
|
2014-07-29 01:01:02 -06:00
|
|
|
bssScalarPtr ScalarPtr
|
|
|
|
bssPtrScalar PtrScalar
|
2014-08-25 12:38:19 -06:00
|
|
|
bssBigStruct BigStruct
|
2014-07-29 01:01:02 -06:00
|
|
|
bssString string
|
2014-08-18 12:21:55 -06:00
|
|
|
bssSlice []string
|
2014-07-29 01:01:02 -06:00
|
|
|
bssEface interface{}
|
2014-08-18 12:21:55 -06:00
|
|
|
bssIface Iface
|
2014-07-29 01:01:02 -06:00
|
|
|
|
|
|
|
// DATA
|
2015-05-15 12:23:23 -06:00
|
|
|
dataPtr = Ptr{new(byte)}
|
2014-07-29 01:01:02 -06:00
|
|
|
dataScalarPtr = ScalarPtr{q: 1}
|
|
|
|
dataPtrScalar = PtrScalar{w: 1}
|
2014-08-25 12:38:19 -06:00
|
|
|
dataBigStruct = BigStruct{w: 1}
|
2014-07-29 01:01:02 -06:00
|
|
|
dataString = "foo"
|
2014-08-18 12:21:55 -06:00
|
|
|
dataSlice = []string{"foo"}
|
2014-07-29 01:01:02 -06:00
|
|
|
dataEface interface{} = 42
|
2014-08-18 12:21:55 -06:00
|
|
|
dataIface Iface = IfaceImpl(42)
|
2014-07-29 01:01:02 -06:00
|
|
|
|
cmd/internal/gc, runtime: use 1-bit bitmap for stack frames, data, bss
The bitmaps were 2 bits per pointer because we needed to distinguish
scalar, pointer, multiword, and we used the leftover value to distinguish
uninitialized from scalar, even though the garbage collector (GC) didn't care.
Now that there are no multiword structures from the GC's point of view,
cut the bitmaps down to 1 bit per pointer, recording just live pointer vs not.
The GC assumes the same layout for stack frames and for the maps
describing the global data and bss sections, so change them all in one CL.
The code still refers to 4-bit heap bitmaps and 2-bit "type bitmaps", since
the 2-bit representation lives (at least for now) in some of the reflect data.
Because these stack frame bitmaps are stored directly in the rodata in
the binary, this CL reduces the size of the 6g binary by about 1.1%.
Performance change is basically a wash, but using less memory,
and smaller binaries, and enables other bitmap reductions.
name old mean new mean delta
BenchmarkBinaryTree17 13.2s × (0.97,1.03) 13.0s × (0.99,1.01) -0.93% (p=0.005)
BenchmarkBinaryTree17-2 9.69s × (0.96,1.05) 9.51s × (0.96,1.03) -1.86% (p=0.001)
BenchmarkBinaryTree17-4 10.1s × (0.97,1.05) 10.0s × (0.96,1.05) ~ (p=0.141)
BenchmarkFannkuch11 4.35s × (0.99,1.01) 4.43s × (0.98,1.04) +1.75% (p=0.001)
BenchmarkFannkuch11-2 4.31s × (0.99,1.03) 4.32s × (1.00,1.00) ~ (p=0.095)
BenchmarkFannkuch11-4 4.32s × (0.99,1.02) 4.38s × (0.98,1.04) +1.38% (p=0.008)
BenchmarkFmtFprintfEmpty 83.5ns × (0.97,1.10) 87.3ns × (0.92,1.11) +4.55% (p=0.014)
BenchmarkFmtFprintfEmpty-2 81.8ns × (0.98,1.04) 82.5ns × (0.97,1.08) ~ (p=0.364)
BenchmarkFmtFprintfEmpty-4 80.9ns × (0.99,1.01) 82.6ns × (0.97,1.08) +2.12% (p=0.010)
BenchmarkFmtFprintfString 320ns × (0.95,1.04) 322ns × (0.97,1.05) ~ (p=0.368)
BenchmarkFmtFprintfString-2 303ns × (0.97,1.04) 304ns × (0.97,1.04) ~ (p=0.484)
BenchmarkFmtFprintfString-4 305ns × (0.97,1.05) 306ns × (0.98,1.05) ~ (p=0.543)
BenchmarkFmtFprintfInt 311ns × (0.98,1.03) 319ns × (0.97,1.03) +2.63% (p=0.000)
BenchmarkFmtFprintfInt-2 297ns × (0.98,1.04) 301ns × (0.97,1.04) +1.19% (p=0.023)
BenchmarkFmtFprintfInt-4 302ns × (0.98,1.02) 304ns × (0.97,1.03) ~ (p=0.126)
BenchmarkFmtFprintfIntInt 554ns × (0.96,1.05) 554ns × (0.97,1.03) ~ (p=0.975)
BenchmarkFmtFprintfIntInt-2 520ns × (0.98,1.03) 517ns × (0.98,1.02) ~ (p=0.153)
BenchmarkFmtFprintfIntInt-4 524ns × (0.98,1.02) 525ns × (0.98,1.03) ~ (p=0.597)
BenchmarkFmtFprintfPrefixedInt 433ns × (0.97,1.06) 434ns × (0.97,1.06) ~ (p=0.804)
BenchmarkFmtFprintfPrefixedInt-2 413ns × (0.98,1.04) 413ns × (0.98,1.03) ~ (p=0.881)
BenchmarkFmtFprintfPrefixedInt-4 420ns × (0.97,1.03) 421ns × (0.97,1.03) ~ (p=0.561)
BenchmarkFmtFprintfFloat 620ns × (0.99,1.03) 636ns × (0.97,1.03) +2.57% (p=0.000)
BenchmarkFmtFprintfFloat-2 601ns × (0.98,1.02) 617ns × (0.98,1.03) +2.58% (p=0.000)
BenchmarkFmtFprintfFloat-4 613ns × (0.98,1.03) 626ns × (0.98,1.02) +2.15% (p=0.000)
BenchmarkFmtManyArgs 2.19µs × (0.96,1.04) 2.23µs × (0.97,1.02) +1.65% (p=0.000)
BenchmarkFmtManyArgs-2 2.08µs × (0.98,1.03) 2.10µs × (0.99,1.02) +0.79% (p=0.019)
BenchmarkFmtManyArgs-4 2.10µs × (0.98,1.02) 2.13µs × (0.98,1.02) +1.72% (p=0.000)
BenchmarkGobDecode 21.3ms × (0.97,1.05) 21.1ms × (0.97,1.04) -1.36% (p=0.025)
BenchmarkGobDecode-2 20.0ms × (0.97,1.03) 19.2ms × (0.97,1.03) -4.00% (p=0.000)
BenchmarkGobDecode-4 19.5ms × (0.99,1.02) 19.0ms × (0.99,1.01) -2.39% (p=0.000)
BenchmarkGobEncode 18.3ms × (0.95,1.07) 18.1ms × (0.96,1.08) ~ (p=0.305)
BenchmarkGobEncode-2 16.8ms × (0.97,1.02) 16.4ms × (0.98,1.02) -2.79% (p=0.000)
BenchmarkGobEncode-4 15.4ms × (0.98,1.02) 15.4ms × (0.98,1.02) ~ (p=0.465)
BenchmarkGzip 650ms × (0.98,1.03) 655ms × (0.97,1.04) ~ (p=0.075)
BenchmarkGzip-2 652ms × (0.98,1.03) 655ms × (0.98,1.02) ~ (p=0.337)
BenchmarkGzip-4 656ms × (0.98,1.04) 653ms × (0.98,1.03) ~ (p=0.291)
BenchmarkGunzip 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.507)
BenchmarkGunzip-2 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.313)
BenchmarkGunzip-4 143ms × (1.00,1.01) 143ms × (1.00,1.01) ~ (p=0.312)
BenchmarkHTTPClientServer 110µs × (0.98,1.03) 109µs × (0.99,1.02) -1.40% (p=0.000)
BenchmarkHTTPClientServer-2 154µs × (0.90,1.08) 149µs × (0.90,1.08) -3.43% (p=0.007)
BenchmarkHTTPClientServer-4 138µs × (0.97,1.04) 138µs × (0.96,1.04) ~ (p=0.670)
BenchmarkJSONEncode 40.2ms × (0.98,1.02) 40.2ms × (0.98,1.05) ~ (p=0.828)
BenchmarkJSONEncode-2 35.1ms × (0.99,1.02) 35.2ms × (0.98,1.03) ~ (p=0.392)
BenchmarkJSONEncode-4 35.3ms × (0.98,1.03) 35.3ms × (0.98,1.02) ~ (p=0.813)
BenchmarkJSONDecode 119ms × (0.97,1.02) 117ms × (0.98,1.02) -1.80% (p=0.000)
BenchmarkJSONDecode-2 115ms × (0.99,1.02) 114ms × (0.98,1.02) -1.18% (p=0.000)
BenchmarkJSONDecode-4 116ms × (0.98,1.02) 114ms × (0.98,1.02) -1.43% (p=0.000)
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.985)
BenchmarkMandelbrot200-2 6.03ms × (1.00,1.01) 6.02ms × (1.00,1.01) ~ (p=0.320)
BenchmarkMandelbrot200-4 6.03ms × (1.00,1.01) 6.03ms × (1.00,1.01) ~ (p=0.799)
BenchmarkGoParse 8.63ms × (0.89,1.10) 8.58ms × (0.93,1.09) ~ (p=0.667)
BenchmarkGoParse-2 8.20ms × (0.97,1.04) 8.37ms × (0.97,1.04) +1.96% (p=0.001)
BenchmarkGoParse-4 8.00ms × (0.98,1.02) 8.14ms × (0.99,1.02) +1.75% (p=0.000)
BenchmarkRegexpMatchEasy0_32 162ns × (1.00,1.01) 164ns × (0.98,1.04) +1.35% (p=0.011)
BenchmarkRegexpMatchEasy0_32-2 161ns × (1.00,1.01) 161ns × (1.00,1.00) ~ (p=0.185)
BenchmarkRegexpMatchEasy0_32-4 161ns × (1.00,1.00) 161ns × (1.00,1.00) -0.19% (p=0.001)
BenchmarkRegexpMatchEasy0_1K 540ns × (0.99,1.02) 566ns × (0.98,1.04) +4.98% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-2 540ns × (0.99,1.01) 557ns × (0.99,1.01) +3.21% (p=0.000)
BenchmarkRegexpMatchEasy0_1K-4 541ns × (0.99,1.01) 559ns × (0.99,1.01) +3.26% (p=0.000)
BenchmarkRegexpMatchEasy1_32 139ns × (0.98,1.04) 139ns × (0.99,1.03) ~ (p=0.979)
BenchmarkRegexpMatchEasy1_32-2 139ns × (0.99,1.04) 139ns × (0.99,1.02) ~ (p=0.777)
BenchmarkRegexpMatchEasy1_32-4 139ns × (0.98,1.04) 139ns × (0.99,1.04) ~ (p=0.771)
BenchmarkRegexpMatchEasy1_1K 890ns × (0.99,1.03) 885ns × (1.00,1.01) -0.50% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-2 888ns × (0.99,1.01) 885ns × (0.99,1.01) -0.37% (p=0.004)
BenchmarkRegexpMatchEasy1_1K-4 890ns × (0.99,1.02) 884ns × (1.00,1.00) -0.70% (p=0.000)
BenchmarkRegexpMatchMedium_32 252ns × (0.99,1.01) 251ns × (0.99,1.01) ~ (p=0.081)
BenchmarkRegexpMatchMedium_32-2 254ns × (0.99,1.04) 252ns × (0.99,1.01) -0.78% (p=0.027)
BenchmarkRegexpMatchMedium_32-4 253ns × (0.99,1.04) 252ns × (0.99,1.01) -0.70% (p=0.022)
BenchmarkRegexpMatchMedium_1K 72.9µs × (0.99,1.01) 72.7µs × (1.00,1.00) ~ (p=0.064)
BenchmarkRegexpMatchMedium_1K-2 74.1µs × (0.98,1.05) 72.9µs × (1.00,1.01) -1.61% (p=0.001)
BenchmarkRegexpMatchMedium_1K-4 73.6µs × (0.99,1.05) 72.8µs × (1.00,1.00) -1.13% (p=0.007)
BenchmarkRegexpMatchHard_32 3.88µs × (0.99,1.03) 3.92µs × (0.98,1.05) ~ (p=0.143)
BenchmarkRegexpMatchHard_32-2 3.89µs × (0.99,1.03) 3.93µs × (0.98,1.09) ~ (p=0.278)
BenchmarkRegexpMatchHard_32-4 3.90µs × (0.99,1.05) 3.93µs × (0.98,1.05) ~ (p=0.252)
BenchmarkRegexpMatchHard_1K 118µs × (0.99,1.01) 117µs × (0.99,1.02) -0.54% (p=0.003)
BenchmarkRegexpMatchHard_1K-2 118µs × (0.99,1.01) 118µs × (0.99,1.03) ~ (p=0.581)
BenchmarkRegexpMatchHard_1K-4 118µs × (0.99,1.02) 117µs × (0.99,1.01) -0.54% (p=0.002)
BenchmarkRevcomp 991ms × (0.95,1.10) 989ms × (0.94,1.08) ~ (p=0.879)
BenchmarkRevcomp-2 978ms × (0.95,1.11) 962ms × (0.96,1.08) ~ (p=0.257)
BenchmarkRevcomp-4 979ms × (0.96,1.07) 974ms × (0.96,1.11) ~ (p=0.678)
BenchmarkTemplate 141ms × (0.99,1.02) 145ms × (0.99,1.02) +2.75% (p=0.000)
BenchmarkTemplate-2 135ms × (0.98,1.02) 138ms × (0.99,1.02) +2.34% (p=0.000)
BenchmarkTemplate-4 136ms × (0.98,1.02) 140ms × (0.99,1.02) +2.71% (p=0.000)
BenchmarkTimeParse 640ns × (0.99,1.01) 622ns × (0.99,1.01) -2.88% (p=0.000)
BenchmarkTimeParse-2 640ns × (0.99,1.01) 622ns × (1.00,1.00) -2.81% (p=0.000)
BenchmarkTimeParse-4 640ns × (1.00,1.01) 622ns × (0.99,1.01) -2.82% (p=0.000)
BenchmarkTimeFormat 730ns × (0.98,1.02) 731ns × (0.98,1.03) ~ (p=0.767)
BenchmarkTimeFormat-2 709ns × (0.99,1.02) 707ns × (0.99,1.02) ~ (p=0.347)
BenchmarkTimeFormat-4 717ns × (0.98,1.01) 718ns × (0.98,1.02) ~ (p=0.793)
Change-Id: Ie779c47e912bf80eb918bafa13638bd8dfd6c2d9
Reviewed-on: https://go-review.googlesource.com/9406
Reviewed-by: Rick Hudson <rlh@golang.org>
2015-04-27 20:45:57 -06:00
|
|
|
infoString = []byte{typePointer, typeScalar}
|
|
|
|
infoSlice = []byte{typePointer, typeScalar, typeScalar}
|
2015-01-16 12:43:38 -07:00
|
|
|
infoEface = []byte{typePointer, typePointer}
|
|
|
|
infoIface = []byte{typePointer, typePointer}
|
2014-07-29 01:01:02 -06:00
|
|
|
)
|