Dmitriy Vyukov
4e5086b993
runtime: improve Linux mutex
...
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 12:44:06 -04:00
Russ Cox
bed7e3ed78
gc: fix pprof deadlock
...
Fixes #2051 .
R=golang-dev, dsymonds
CC=golang-dev
https://golang.org/cl/4834041
2011-07-28 21:03:40 -04:00
Russ Cox
db9229def8
cgo: add GoBytes, fix gmp example
...
Fixes #1640 .
Fixes #2007 .
R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/4815063
2011-07-28 12:39:50 -04:00
Russ Cox
1bd4b6371a
gc: use more Go-like names for methods
...
Fixes #991 .
R=ken2
CC=golang-dev
https://golang.org/cl/4819049
2011-07-27 17:56:13 -04:00
Russ Cox
a84abbe508
gc: zero-width struct, zero-length array fixes
...
Fixes #1774 .
Fixes #2095 .
Fixes #2097 .
R=ken2
CC=golang-dev
https://golang.org/cl/4826046
2011-07-27 16:47:45 -04:00
Russ Cox
100a034120
runtime: higher goroutine arg limit, clearer error
...
Fixes #591 .
R=ken2
CC=golang-dev
https://golang.org/cl/4803054
2011-07-27 12:41:46 -04:00
Russ Cox
12a5774cde
gc, runtime: fix range+panic line number bugs
...
Fixes #1856 .
R=ken2
CC=golang-dev
https://golang.org/cl/4810054
2011-07-26 00:52:46 -04:00
Mikio Hara
e5437ab065
runtime: fix freebsd build
...
Fixes #2078 .
R=rsc
CC=golang-dev
https://golang.org/cl/4800052
2011-07-26 00:49:32 -04:00
Yuval Pavel Zholkover
2aa2ceb873
runtime: Plan 9, skip calling runtime·ldt0setup.
...
R=golang-dev
CC=alex.brainman, golang-dev
https://golang.org/cl/4816049
2011-07-25 12:25:41 -04:00
Dmitriy Vyukov
33ff947cac
runtime: fix compilation of send select cases
...
Fixes #2102 .
R=fullung, rsc
CC=golang-dev
https://golang.org/cl/4825043
2011-07-25 12:25:37 -04:00
Ian Lance Taylor
3a07d516b4
runtime: remove rnd calls that pass a second argument of 1
...
When rnd is called with a second argument of 1, it simply
returns the first argument anyway.
R=golang-dev, r
CC=golang-dev
https://golang.org/cl/4820045
2011-07-24 22:03:17 -07:00
Quan Yong Zhai
47410a2490
runtime: replace byte-at-a-time zeroing loop with memclr
...
R=golang-dev, r, r, dsymonds, rsc
CC=golang-dev
https://golang.org/cl/4813043
2011-07-23 15:46:58 -04:00
Russ Cox
ba134539c5
runtime: faster entersyscall/exitsyscall
...
Replace cas with xadd in scheduler.
Suggested by Dmitriy in last code review.
Verified with Promela model.
When there's actual contention for the atomic word,
this avoids the looping that compare-and-swap requires.
benchmark old ns/op new ns/op delta
runtime_test.BenchmarkSyscall 32 26 -17.08%
runtime_test.BenchmarkSyscall-2 155 59 -61.81%
runtime_test.BenchmarkSyscall-3 112 52 -52.95%
runtime_test.BenchmarkSyscall-4 94 48 -48.57%
runtime_test.BenchmarkSyscallWork 871 872 +0.11%
runtime_test.BenchmarkSyscallWork-2 481 477 -0.83%
runtime_test.BenchmarkSyscallWork-3 338 335 -0.89%
runtime_test.BenchmarkSyscallWork-4 263 256 -2.66%
R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/4800047
2011-07-23 12:22:55 -04:00
Russ Cox
226fb099d9
runtime: add UpdateMemStats, use in tests
...
Drops mallocrep1.go back to a reasonable
amount of time. (154 -> 0.8 seconds on my Mac)
Fixes #2085 .
R=golang-dev, dvyukov, r
CC=golang-dev
https://golang.org/cl/4811045
2011-07-22 00:55:01 -04:00
Russ Cox
22853098a9
gc: select functions are no longer special
...
R=ken2
CC=golang-dev
https://golang.org/cl/4794049
2011-07-21 14:10:39 -04:00
Dmitriy Vyukov
6b2ec06587
runtime: faster select
...
Make selectsend() accept pointer to the element,
it makes it possible to make Scase fixed-size
and allocate/free Select, all Scase's and all SudoG at once.
As a consequence SudoG freelist die out.
benchmark old,ns/op new,ns/op
BenchmarkSelectUncontended 1080 558
BenchmarkSelectUncontended-2 675 264
BenchmarkSelectUncontended-4 459 205
BenchmarkSelectContended 1086 560
BenchmarkSelectContended-2 1775 1672
BenchmarkSelectContended-4 2668 2149
(on Intel Q6600, 4 cores, 2.4GHz)
benchmark old ns/op new ns/op delta
BenchmarkSelectUncontended 517.00 326.00 -36.94%
BenchmarkSelectUncontended-2 281.00 166.00 -40.93%
BenchmarkSelectUncontended-4 250.00 83.10 -66.76%
BenchmarkSelectUncontended-8 107.00 47.40 -55.70%
BenchmarkSelectUncontended-16 67.80 41.30 -39.09%
BenchmarkSelectContended 513.00 325.00 -36.65%
BenchmarkSelectContended-2 699.00 628.00 -10.16%
BenchmarkSelectContended-4 1085.00 1092.00 +0.65%
BenchmarkSelectContended-8 3253.00 2477.00 -23.85%
BenchmarkSelectContended-16 5313.00 5116.00 -3.71%
(on Intel E5620, 8 HT cores, 2.4 GHz)
R=rsc, ken
CC=golang-dev
https://golang.org/cl/4811041
2011-07-21 13:57:13 -04:00
Dmitriy Vyukov
d6ed1b70ad
runtime: replace centralized ncgocall counter with a distributed one
...
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4809042
2011-07-21 11:29:08 -04:00
Dmitriy Vyukov
102b5b34a7
runtime: apply minor tweaks to channels
...
Remove complicated PRNG algorithm
(argument is limited by uint16 and can't be <= 1).
Do not require chansend/chanrecv selgen to be bumped with CAS.
R=rsc, ken
CC=golang-dev
https://golang.org/cl/4816041
2011-07-20 14:28:55 -04:00
Dmitriy Vyukov
90f3cb13fb
runtime: improve performance of sync channels
...
1. SudoG always contains a pointer to the element
(thus no variable size, and less copying).
2. chansend/chanrecv allocate SudoG on the stack.
3. Copying of elements and gorotuine notifications
are moved out of critical sections.
benchmark old ns/op new ns/op delta
BenchmarkSelectUncontended 515.00 514.00 -0.19%
BenchmarkSelectUncontended-2 291.00 281.00 -3.44%
BenchmarkSelectUncontended-4 213.00 189.00 -11.27%
BenchmarkSelectUncontended-8 78.30 79.00 +0.89%
BenchmarkSelectContended 518.00 514.00 -0.77%
BenchmarkSelectContended-2 655.00 631.00 -3.66%
BenchmarkSelectContended-4 1026.00 1051.00 +2.44%
BenchmarkSelectContended-8 2026.00 2128.00 +5.03%
BenchmarkSelectNonblock 175.00 173.00 -1.14%
BenchmarkSelectNonblock-2 85.10 87.70 +3.06%
BenchmarkSelectNonblock-4 60.10 43.30 -27.95%
BenchmarkSelectNonblock-8 37.60 25.50 -32.18%
BenchmarkChanUncontended 109.00 114.00 +4.59%
BenchmarkChanUncontended-2 54.60 57.20 +4.76%
BenchmarkChanUncontended-4 27.40 28.70 +4.74%
BenchmarkChanUncontended-8 14.60 15.10 +3.42%
BenchmarkChanContended 108.00 114.00 +5.56%
BenchmarkChanContended-2 621.00 617.00 -0.64%
BenchmarkChanContended-4 759.00 677.00 -10.80%
BenchmarkChanContended-8 1635.00 1517.00 -7.22%
BenchmarkChanSync 299.00 256.00 -14.38%
BenchmarkChanSync-2 5055.00 4624.00 -8.53%
BenchmarkChanSync-4 4998.00 4680.00 -6.36%
BenchmarkChanSync-8 5019.00 4760.00 -5.16%
BenchmarkChanProdCons0 316.00 274.00 -13.29%
BenchmarkChanProdCons0-2 1280.00 617.00 -51.80%
BenchmarkChanProdCons0-4 2433.00 1332.00 -45.25%
BenchmarkChanProdCons0-8 3651.00 1934.00 -47.03%
BenchmarkChanProdCons10 153.00 152.00 -0.65%
BenchmarkChanProdCons10-2 626.00 581.00 -7.19%
BenchmarkChanProdCons10-4 1440.00 1323.00 -8.12%
BenchmarkChanProdCons10-8 2036.00 2017.00 -0.93%
R=rsc, ken
CC=golang-dev
https://golang.org/cl/4790042
2011-07-20 11:51:25 -04:00
Lucio De Re
b546f50716
runtime: make goc2c build on Plan 9
...
pkg/runtime/Makefile:
. Adjusted so "goc2c.c" is built using the Plan 9 libraries.
pkg/runtime/goc2c.c:
. Added/subtracted #include headers to correspond to Plan 9
toolkit.
. Changed fprintf(stderr,...)/exit() combinations to
sysfatal() calls, adjusted the "%u" format to "%ud".
. Added exits(0) at the end of main().
. Made main() a void-returning function and removed the
"return 0" at the end of it.
Tested on UBUNTU and Plan 9 only.
R=r, rsc
CC=golang-dev
https://golang.org/cl/4626093
2011-07-19 11:04:33 -04:00
Russ Cox
025abd530e
runtime: faster entersyscall, exitsyscall
...
Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.
benchmark old ns/op new ns/op delta
runtime_test.BenchmarkSyscall 73 31 -56.63%
runtime_test.BenchmarkSyscall-2 538 74 -86.23%
runtime_test.BenchmarkSyscall-3 508 103 -79.72%
runtime_test.BenchmarkSyscall-4 721 97 -86.52%
runtime_test.BenchmarkSyscallWork 920 873 -5.11%
runtime_test.BenchmarkSyscallWork-2 516 481 -6.78%
runtime_test.BenchmarkSyscallWork-3 550 343 -37.64%
runtime_test.BenchmarkSyscallWork-4 632 263 -58.39%
(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)
Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.
R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042
2011-07-19 11:01:17 -04:00
Wei Guangjing
9f636598ba
cgo: windows amd64 port
...
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4725041
2011-07-19 10:47:33 -04:00
Hector Chu
47e6042f73
runtime: fix select pass 3
...
Fixes #2075
R=rsc, ken, r
CC=golang-dev
https://golang.org/cl/4748045
2011-07-18 16:15:01 -04:00
Russ Cox
bd77619142
runtime: track running goroutine count
...
Used to use mcpu+msyscall but that's
problematic for packing into a single
atomic word. The running goroutine count
(where running == Go code or syscall)
can be maintained separately, always
manipulated under lock.
R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/4767041
2011-07-18 15:50:55 -04:00
Dmitriy Vyukov
27753ff108
runtime: add per-M caches for MemStats
...
Avoid touching centralized state during
memory manager operations.
R=mirtchovski
CC=golang-dev, rsc
https://golang.org/cl/4766042
2011-07-18 14:56:22 -04:00
Dmitriy Vyukov
66d5c9b1e9
runtime: add per-M caches for MemStats
...
Avoid touching centralized state during
memory manager opreations.
R=rsc
CC=golang-dev
https://golang.org/cl/4766042
2011-07-18 14:52:57 -04:00
Dmitriy Vyukov
c1f035ba4c
runtime: fix data race in Plan9 sysalloc
...
Add mutex to protect brk limit.
Add mstats.sys update.
R=rsc
CC=golang-dev
https://golang.org/cl/4762045
2011-07-18 10:50:04 -04:00
Nigel Tao
95323c59ea
runtime: fix panic for make(chan [0]byte).
...
I suspect that this was introduced by
http://code.google.com/p/go/source/detail?r=6e4ee32fffd1
R=r
CC=golang-dev
https://golang.org/cl/4764045
2011-07-18 15:54:11 +10:00
Dmitriy Vyukov
491aa1579d
runtime: native xadd for 386/amd64
...
benchmark old ns/op new ns/op delta
BenchmarkSemaUncontended 37.40 34.10 -8.82%
BenchmarkSemaUncontended-2 18.90 17.70 -6.35%
BenchmarkSemaUncontended-4 11.90 10.90 -8.40%
BenchmarkSemaUncontended-8 6.26 5.19 -17.09%
BenchmarkSemaUncontended-16 4.39 3.91 -10.93%
BenchmarkSemaSyntNonblock 38.00 35.30 -7.11%
BenchmarkSemaSyntNonblock-2 83.00 46.70 -43.73%
BenchmarkSemaSyntNonblock-4 124.00 101.00 -18.55%
BenchmarkSemaSyntNonblock-8 124.00 116.00 -6.45%
BenchmarkSemaSyntNonblock-16 148.00 114.00 -22.97%
(on HP Z600 2 x Xeon E5620, 8 HT cores, 2.40GHz)
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4755041
2011-07-15 11:27:16 -04:00
Russ Cox
29125be5c7
runtime: make TestSideEffectOrder work twice
...
R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/4714045
2011-07-14 23:43:03 -04:00
Alex Brainman
dde435587d
runtime: correct FixedStack value (fixes windows build)
...
Fixes #2068 .
R=rsc
CC=golang-dev
https://golang.org/cl/4705046
2011-07-14 09:13:39 +10:00
Wei Guangjing
a6e60916c1
runtime: stdcall_raw stack 16byte align for Win64
...
R=alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4681049
2011-07-13 11:44:44 -07:00
Dmitriy Vyukov
86a659cad0
runtime: fix data race during Itab hash update/lookup
...
The data race is on newly published Itab nodes, which are
both unsafely published and unsafely acquired. It can
break on IA-32/Intel64 due to compiler optimizations
(most likely not an issue as of now) and on ARM due to
hardware memory access reorderings.
R=rsc
CC=golang-dev
https://golang.org/cl/4673055
2011-07-13 11:22:41 -07:00
Quan Yong Zhai
fe9991e8b2
runtime: replace runtime.mcpy with runtime.memmove
...
faster string operations, and more
tested on linux/386
runtime_test.BenchmarkSliceToString 642 532 -17.13%
runtime_test.BenchmarkStringToSlice 636 528 -16.98%
runtime_test.BenchmarkConcatString 1109 897 -19.12%
R=r, iant, rsc
CC=golang-dev
https://golang.org/cl/4674042
2011-07-12 17:30:40 -07:00
Dmitriy Vyukov
86e7323bdf
runtime: eliminate false sharing during stack growth
...
Remove static variable from runtime·oldstack().
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows (with CL 4657091 applied):
benchmark old ns/op new ns/op delta
BenchmarkStackGrowth 1183.00 1180.00 -0.25%
BenchmarkStackGrowth-2 1249.00 1211.00 -3.04%
BenchmarkStackGrowth-4 954.00 805.00 -15.62%
BenchmarkStackGrowth-8 701.00 683.00 -2.57%
BenchmarkStackGrowth-16 465.00 415.00 -10.75%
R=rsc
CC=golang-dev
https://golang.org/cl/4693042
2011-07-12 10:56:21 -07:00
Russ Cox
88e0c0517a
runtime: fix comment (lost in shuffle)
...
TBR=dvyukov
CC=golang-dev
https://golang.org/cl/4710041
2011-07-12 09:26:05 -07:00
Dmitriy Vyukov
c9152a8568
runtime: eliminate contention during stack allocation
...
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark old ns/op new ns/op delta
BenchmarkStackGrowth 1045.00 949.00 -9.19%
BenchmarkStackGrowth-2 3450.00 800.00 -76.81%
BenchmarkStackGrowth-4 5076.00 513.00 -89.89%
BenchmarkStackGrowth-8 7805.00 471.00 -93.97%
BenchmarkStackGrowth-16 11751.00 321.00 -97.27%
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091
2011-07-12 09:24:32 -07:00
Dmitriy Vyukov
013ad89c9b
runtime: eliminate false sharing on runtime.goidgen
...
runtime.goidgen can be quite frequently modified and
shares cache line with the following variables,
it leads to false sharing.
50c6b0 b nfname
50c6b4 b nfunc
50c6b8 b nfunc$17
50c6bc b nhist$17
50c6c0 B runtime.checking
50c6c4 B runtime.gcwaiting
50c6c8 B runtime.goidgen
50c6cc B runtime.gomaxprocs
50c6d0 B runtime.panicking
50c6d4 B strconv.IntSize
50c6d8 B src/pkg/runtime/_xtest_.ss
50c6e0 B src/pkg/runtime/_xtest_.stop
50c6e8 b addrfree
50c6f0 b addrmem
50c6f8 b argv
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4673054
2011-07-12 01:25:14 -04:00
Dmitriy Vyukov
909f31872a
runtime: eliminate false sharing on random number generators
...
Use machine-local random number generator instead of
racy global ones.
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4674049
2011-07-12 01:23:58 -04:00
Dmitriy Vyukov
f9f21aa1fb
runtime: fix data race on runtime·maxstring
...
The data race can lead to erroneous output of
"[invalid string]" instead of a string.
R=golang-dev
CC=golang-dev
https://golang.org/cl/4678049
2011-07-12 01:21:06 -04:00
Wei Guangjing
f83609f642
runtime: windows/amd64 port
...
R=rsc, alex.brainman, hectorchu, r
CC=golang-dev
https://golang.org/cl/3759042
2011-06-29 17:37:56 +10:00
Mikio Hara
161deaa85c
runtime/cgo: fix build
...
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4629082
2011-06-28 22:26:31 -04:00
Dmitriy Vyukov
997c00f991
runtime: replace Semacquire/Semrelease implementation
...
1. The implementation uses distributed hash table of waitlists instead of a centralized one.
It significantly improves scalability for uncontended semaphores.
2. The implementation provides wait-free fast-path for signalers.
3. The implementation uses less locks (1 lock/unlock instead of 5 for Semacquire).
4. runtime·ready() call is moved out of critical section.
5. Semacquire() does not call semwake().
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark old ns/op new ns/op delta
runtime_test.BenchmarkSemaUncontended 58.20 36.30 -37.63%
runtime_test.BenchmarkSemaUncontended-2 199.00 18.30 -90.80%
runtime_test.BenchmarkSemaUncontended-4 327.00 9.20 -97.19%
runtime_test.BenchmarkSemaUncontended-8 491.00 5.32 -98.92%
runtime_test.BenchmarkSemaUncontended-16 946.00 4.18 -99.56%
runtime_test.BenchmarkSemaSyntNonblock 59.00 36.80 -37.63%
runtime_test.BenchmarkSemaSyntNonblock-2 167.00 138.00 -17.37%
runtime_test.BenchmarkSemaSyntNonblock-4 333.00 129.00 -61.26%
runtime_test.BenchmarkSemaSyntNonblock-8 464.00 130.00 -71.98%
runtime_test.BenchmarkSemaSyntNonblock-16 1015.00 136.00 -86.60%
runtime_test.BenchmarkSemaSyntBlock 58.80 36.70 -37.59%
runtime_test.BenchmarkSemaSyntBlock-2 294.00 149.00 -49.32%
runtime_test.BenchmarkSemaSyntBlock-4 333.00 177.00 -46.85%
runtime_test.BenchmarkSemaSyntBlock-8 471.00 221.00 -53.08%
runtime_test.BenchmarkSemaSyntBlock-16 990.00 227.00 -77.07%
runtime_test.BenchmarkSemaWorkNonblock 829.00 832.00 +0.36%
runtime_test.BenchmarkSemaWorkNonblock-2 425.00 419.00 -1.41%
runtime_test.BenchmarkSemaWorkNonblock-4 308.00 220.00 -28.57%
runtime_test.BenchmarkSemaWorkNonblock-8 394.00 147.00 -62.69%
runtime_test.BenchmarkSemaWorkNonblock-16 1510.00 149.00 -90.13%
runtime_test.BenchmarkSemaWorkBlock 828.00 813.00 -1.81%
runtime_test.BenchmarkSemaWorkBlock-2 428.00 436.00 +1.87%
runtime_test.BenchmarkSemaWorkBlock-4 232.00 219.00 -5.60%
runtime_test.BenchmarkSemaWorkBlock-8 392.00 251.00 -35.97%
runtime_test.BenchmarkSemaWorkBlock-16 1524.00 298.00 -80.45%
sync_test.BenchmarkMutexUncontended 24.10 24.00 -0.41%
sync_test.BenchmarkMutexUncontended-2 12.00 12.00 +0.00%
sync_test.BenchmarkMutexUncontended-4 6.25 6.17 -1.28%
sync_test.BenchmarkMutexUncontended-8 3.43 3.34 -2.62%
sync_test.BenchmarkMutexUncontended-16 2.34 2.32 -0.85%
sync_test.BenchmarkMutex 24.70 24.70 +0.00%
sync_test.BenchmarkMutex-2 208.00 99.50 -52.16%
sync_test.BenchmarkMutex-4 2744.00 256.00 -90.67%
sync_test.BenchmarkMutex-8 5137.00 556.00 -89.18%
sync_test.BenchmarkMutex-16 5368.00 1284.00 -76.08%
sync_test.BenchmarkMutexSlack 24.70 25.00 +1.21%
sync_test.BenchmarkMutexSlack-2 1094.00 186.00 -83.00%
sync_test.BenchmarkMutexSlack-4 3430.00 402.00 -88.28%
sync_test.BenchmarkMutexSlack-8 5051.00 1066.00 -78.90%
sync_test.BenchmarkMutexSlack-16 6806.00 1363.00 -79.97%
sync_test.BenchmarkMutexWork 793.00 792.00 -0.13%
sync_test.BenchmarkMutexWork-2 398.00 398.00 +0.00%
sync_test.BenchmarkMutexWork-4 1441.00 308.00 -78.63%
sync_test.BenchmarkMutexWork-8 8532.00 847.00 -90.07%
sync_test.BenchmarkMutexWork-16 8225.00 2760.00 -66.44%
sync_test.BenchmarkMutexWorkSlack 793.00 793.00 +0.00%
sync_test.BenchmarkMutexWorkSlack-2 418.00 414.00 -0.96%
sync_test.BenchmarkMutexWorkSlack-4 4481.00 480.00 -89.29%
sync_test.BenchmarkMutexWorkSlack-8 6317.00 1598.00 -74.70%
sync_test.BenchmarkMutexWorkSlack-16 9111.00 3038.00 -66.66%
R=rsc
CC=golang-dev
https://golang.org/cl/4631059
2011-06-28 15:09:53 -04:00
Albert Strasheim
a026d0fc76
runtime/cgo: check for errors from pthread_create
...
R=rsc, iant, dvyukov
CC=golang-dev
https://golang.org/cl/4643057
2011-06-28 12:04:50 -04:00
Dmitriy Vyukov
660b22988b
runtime: add Semacquire/Semrelease benchmarks
...
R=rsc
CC=golang-dev
https://golang.org/cl/4625065
2011-06-28 11:15:24 -04:00
Alex Brainman
6b648cafde
runtime: another attempt to allow stdcall to be used from both 386 and amd64 arch
...
R=rsc
CC=golang-dev, vcc.163
https://golang.org/cl/4627071
2011-06-28 12:46:16 +10:00
Rob Pike
ebb1566a46
strings.Split: make the default to split all.
...
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.
R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051
2011-06-28 09:43:14 +10:00
Gustavo Niemeyer
65b036c381
runtime: don't use twice the memory with grsec-like kernels
...
grsec needs the FIXED flag to be provided to mmap, which
works now. That said, when the allocation fails to be made
in the specific address, we're still given back a writable
page. This change will unmap that page to avoid using
twice the amount of memory needed.
It'd also be pretty easy to avoid the extra system calls
once we detected that the flag is needed, but I'm not sure
if that edge case is worth the effort.
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4634086
2011-06-24 00:29:59 -03:00
Russ Cox
c475c3071a
5c: do not use R9 and R10
...
This program used to use R9 and R10.
Now it fails to compile (out of registers).
I used to know a simpler test but can't remember it.
Learned something new: Rietveld refuses change
list descriptions bigger than 10 kB.
int sum(int x, int y, int z, int w) {
return
(((((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))))%
(((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))))*
((((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))))%
(((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))))))*
(((((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))))%
(((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))))*
((((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))))%
(((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))))/
((((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))|
(((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w))&
((x*y+z*w|x*y+z*w)^
(x*y+z*w|x*y+z*w)))))))
;
}
R=ken2
CC=golang-dev
https://golang.org/cl/4650053
2011-06-22 23:22:36 -04:00
Robert Hencke
b88e669a8f
nacl, tiny: remove vestiges
...
R=golang-dev, r, rsc
CC=golang-dev
https://golang.org/cl/4635053
2011-06-21 12:02:40 -04:00