1
0
mirror of https://github.com/golang/go synced 2024-11-20 02:34:42 -07:00
Commit Graph

571 Commits

Author SHA1 Message Date
Dmitriy Vyukov
4e5086b993 runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.

On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark                     old ns/op    new ns/op    delta
BenchmarkSelectContended         521.00       503.00   -3.45%
BenchmarkSelectContended-2       661.00       320.00  -51.59%
BenchmarkSelectContended-4      1139.00       629.00  -44.78%
BenchmarkSelectContended-8      2870.00       878.00  -69.41%
BenchmarkSelectContended-16     5276.00       818.00  -84.50%
BenchmarkChanContended           112.00       103.00   -8.04%
BenchmarkChanContended-2         631.00       174.00  -72.42%
BenchmarkChanContended-4         682.00       272.00  -60.12%
BenchmarkChanContended-8        1601.00       520.00  -67.52%
BenchmarkChanContended-16       3100.00       372.00  -88.00%
BenchmarkChanSync                253.00       239.00   -5.53%
BenchmarkChanSync-2             5030.00      4648.00   -7.59%
BenchmarkChanSync-4             4826.00      4694.00   -2.74%
BenchmarkChanSync-8             4778.00      4713.00   -1.36%
BenchmarkChanSync-16            5289.00      4710.00  -10.95%
BenchmarkChanProdCons0           273.00       254.00   -6.96%
BenchmarkChanProdCons0-2         599.00       400.00  -33.22%
BenchmarkChanProdCons0-4        1168.00       659.00  -43.58%
BenchmarkChanProdCons0-8        2831.00      1057.00  -62.66%
BenchmarkChanProdCons0-16       4197.00      1037.00  -75.29%
BenchmarkChanProdCons10          150.00       140.00   -6.67%
BenchmarkChanProdCons10-2        607.00       268.00  -55.85%
BenchmarkChanProdCons10-4       1137.00       404.00  -64.47%
BenchmarkChanProdCons10-8       2115.00       828.00  -60.85%
BenchmarkChanProdCons10-16      4283.00       855.00  -80.04%
BenchmarkChanProdCons100         117.00       110.00   -5.98%
BenchmarkChanProdCons100-2       558.00       218.00  -60.93%
BenchmarkChanProdCons100-4       722.00       287.00  -60.25%
BenchmarkChanProdCons100-8      1840.00       431.00  -76.58%
BenchmarkChanProdCons100-16     3394.00       448.00  -86.80%
BenchmarkChanProdConsWork0      2014.00      1996.00   -0.89%
BenchmarkChanProdConsWork0-2    1207.00      1127.00   -6.63%
BenchmarkChanProdConsWork0-4    1913.00       611.00  -68.06%
BenchmarkChanProdConsWork0-8    3016.00       949.00  -68.53%
BenchmarkChanProdConsWork0-16   4320.00      1154.00  -73.29%
BenchmarkChanProdConsWork10     1906.00      1897.00   -0.47%
BenchmarkChanProdConsWork10-2   1123.00      1033.00   -8.01%
BenchmarkChanProdConsWork10-4   1076.00       571.00  -46.93%
BenchmarkChanProdConsWork10-8   2748.00      1096.00  -60.12%
BenchmarkChanProdConsWork10-16  4600.00      1105.00  -75.98%
BenchmarkChanProdConsWork100    1884.00      1852.00   -1.70%
BenchmarkChanProdConsWork100-2  1235.00      1146.00   -7.21%
BenchmarkChanProdConsWork100-4  1217.00       619.00  -49.14%
BenchmarkChanProdConsWork100-8  1534.00       509.00  -66.82%
BenchmarkChanProdConsWork100-16 4126.00       918.00  -77.75%
BenchmarkSyscall                  34.40        33.30   -3.20%
BenchmarkSyscall-2               160.00       121.00  -24.38%
BenchmarkSyscall-4               131.00       136.00   +3.82%
BenchmarkSyscall-8               139.00       131.00   -5.76%
BenchmarkSyscall-16              161.00       168.00   +4.35%
BenchmarkSyscallWork             950.00       950.00   +0.00%
BenchmarkSyscallWork-2           481.00       480.00   -0.21%
BenchmarkSyscallWork-4           268.00       270.00   +0.75%
BenchmarkSyscallWork-8           156.00       169.00   +8.33%
BenchmarkSyscallWork-16          188.00       184.00   -2.13%
BenchmarkSemaSyntNonblock         36.40        35.60   -2.20%
BenchmarkSemaSyntNonblock-2       81.40        45.10  -44.59%
BenchmarkSemaSyntNonblock-4      126.00       108.00  -14.29%
BenchmarkSemaSyntNonblock-8      112.00       112.00   +0.00%
BenchmarkSemaSyntNonblock-16     110.00       112.00   +1.82%
BenchmarkSemaSyntBlock            35.30        35.30   +0.00%
BenchmarkSemaSyntBlock-2         118.00       124.00   +5.08%
BenchmarkSemaSyntBlock-4         105.00       108.00   +2.86%
BenchmarkSemaSyntBlock-8         101.00       111.00   +9.90%
BenchmarkSemaSyntBlock-16        112.00       118.00   +5.36%
BenchmarkSemaWorkNonblock        810.00       811.00   +0.12%
BenchmarkSemaWorkNonblock-2      476.00       414.00  -13.03%
BenchmarkSemaWorkNonblock-4      238.00       228.00   -4.20%
BenchmarkSemaWorkNonblock-8      140.00       126.00  -10.00%
BenchmarkSemaWorkNonblock-16     117.00       116.00   -0.85%
BenchmarkSemaWorkBlock           810.00       811.00   +0.12%
BenchmarkSemaWorkBlock-2         454.00       466.00   +2.64%
BenchmarkSemaWorkBlock-4         243.00       241.00   -0.82%
BenchmarkSemaWorkBlock-8         145.00       137.00   -5.52%
BenchmarkSemaWorkBlock-16        132.00       123.00   -6.82%
BenchmarkContendedSemaphore      123.00       102.00  -17.07%
BenchmarkContendedSemaphore-2     34.80        34.90   +0.29%
BenchmarkContendedSemaphore-4     34.70        34.80   +0.29%
BenchmarkContendedSemaphore-8     34.70        34.70   +0.00%
BenchmarkContendedSemaphore-16    34.80        34.70   -0.29%
BenchmarkMutex                    26.80        26.00   -2.99%
BenchmarkMutex-2                 108.00        45.20  -58.15%
BenchmarkMutex-4                 103.00       127.00  +23.30%
BenchmarkMutex-8                 109.00       147.00  +34.86%
BenchmarkMutex-16                102.00       152.00  +49.02%
BenchmarkMutexSlack               27.00        26.90   -0.37%
BenchmarkMutexSlack-2            149.00       165.00  +10.74%
BenchmarkMutexSlack-4            121.00       209.00  +72.73%
BenchmarkMutexSlack-8            101.00       158.00  +56.44%
BenchmarkMutexSlack-16            97.00       129.00  +32.99%
BenchmarkMutexWork               792.00       794.00   +0.25%
BenchmarkMutexWork-2             407.00       409.00   +0.49%
BenchmarkMutexWork-4             220.00       209.00   -5.00%
BenchmarkMutexWork-8             267.00       160.00  -40.07%
BenchmarkMutexWork-16            315.00       300.00   -4.76%
BenchmarkMutexWorkSlack          792.00       793.00   +0.13%
BenchmarkMutexWorkSlack-2        406.00       404.00   -0.49%
BenchmarkMutexWorkSlack-4        225.00       212.00   -5.78%
BenchmarkMutexWorkSlack-8        268.00       136.00  -49.25%
BenchmarkMutexWorkSlack-16       300.00       300.00   +0.00%
BenchmarkRWMutexWrite100          27.10        27.00   -0.37%
BenchmarkRWMutexWrite100-2        33.10        40.80  +23.26%
BenchmarkRWMutexWrite100-4       113.00        88.10  -22.04%
BenchmarkRWMutexWrite100-8       119.00        95.30  -19.92%
BenchmarkRWMutexWrite100-16      148.00       109.00  -26.35%
BenchmarkRWMutexWrite10           29.60        29.40   -0.68%
BenchmarkRWMutexWrite10-2        111.00        61.40  -44.68%
BenchmarkRWMutexWrite10-4        270.00       208.00  -22.96%
BenchmarkRWMutexWrite10-8        204.00       185.00   -9.31%
BenchmarkRWMutexWrite10-16       261.00       190.00  -27.20%
BenchmarkRWMutexWorkWrite100    1040.00      1036.00   -0.38%
BenchmarkRWMutexWorkWrite100-2   593.00       580.00   -2.19%
BenchmarkRWMutexWorkWrite100-4   470.00       365.00  -22.34%
BenchmarkRWMutexWorkWrite100-8   468.00       289.00  -38.25%
BenchmarkRWMutexWorkWrite100-16  604.00       374.00  -38.08%
BenchmarkRWMutexWorkWrite10      951.00       951.00   +0.00%
BenchmarkRWMutexWorkWrite10-2   1001.00       928.00   -7.29%
BenchmarkRWMutexWorkWrite10-4   1555.00      1006.00  -35.31%
BenchmarkRWMutexWorkWrite10-8   2085.00      1171.00  -43.84%
BenchmarkRWMutexWorkWrite10-16  2082.00      1614.00  -22.48%

R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 12:44:06 -04:00
Russ Cox
bed7e3ed78 gc: fix pprof deadlock
Fixes #2051.

R=golang-dev, dsymonds
CC=golang-dev
https://golang.org/cl/4834041
2011-07-28 21:03:40 -04:00
Russ Cox
db9229def8 cgo: add GoBytes, fix gmp example
Fixes #1640.
Fixes #2007.

R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/4815063
2011-07-28 12:39:50 -04:00
Russ Cox
1bd4b6371a gc: use more Go-like names for methods
Fixes #991.

R=ken2
CC=golang-dev
https://golang.org/cl/4819049
2011-07-27 17:56:13 -04:00
Russ Cox
a84abbe508 gc: zero-width struct, zero-length array fixes
Fixes #1774.
Fixes #2095.
Fixes #2097.

R=ken2
CC=golang-dev
https://golang.org/cl/4826046
2011-07-27 16:47:45 -04:00
Russ Cox
100a034120 runtime: higher goroutine arg limit, clearer error
Fixes #591.

R=ken2
CC=golang-dev
https://golang.org/cl/4803054
2011-07-27 12:41:46 -04:00
Russ Cox
12a5774cde gc, runtime: fix range+panic line number bugs
Fixes #1856.

R=ken2
CC=golang-dev
https://golang.org/cl/4810054
2011-07-26 00:52:46 -04:00
Mikio Hara
e5437ab065 runtime: fix freebsd build
Fixes #2078.

R=rsc
CC=golang-dev
https://golang.org/cl/4800052
2011-07-26 00:49:32 -04:00
Yuval Pavel Zholkover
2aa2ceb873 runtime: Plan 9, skip calling runtime·ldt0setup.
R=golang-dev
CC=alex.brainman, golang-dev
https://golang.org/cl/4816049
2011-07-25 12:25:41 -04:00
Dmitriy Vyukov
33ff947cac runtime: fix compilation of send select cases
Fixes #2102.

R=fullung, rsc
CC=golang-dev
https://golang.org/cl/4825043
2011-07-25 12:25:37 -04:00
Ian Lance Taylor
3a07d516b4 runtime: remove rnd calls that pass a second argument of 1
When rnd is called with a second argument of 1, it simply
returns the first argument anyway.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/4820045
2011-07-24 22:03:17 -07:00
Quan Yong Zhai
47410a2490 runtime: replace byte-at-a-time zeroing loop with memclr
R=golang-dev, r, r, dsymonds, rsc
CC=golang-dev
https://golang.org/cl/4813043
2011-07-23 15:46:58 -04:00
Russ Cox
ba134539c5 runtime: faster entersyscall/exitsyscall
Replace cas with xadd in scheduler.
Suggested by Dmitriy in last code review.
Verified with Promela model.

When there's actual contention for the atomic word,
this avoids the looping that compare-and-swap requires.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               32           26  -17.08%
runtime_test.BenchmarkSyscall-2            155           59  -61.81%
runtime_test.BenchmarkSyscall-3            112           52  -52.95%
runtime_test.BenchmarkSyscall-4             94           48  -48.57%
runtime_test.BenchmarkSyscallWork          871          872   +0.11%
runtime_test.BenchmarkSyscallWork-2        481          477   -0.83%
runtime_test.BenchmarkSyscallWork-3        338          335   -0.89%
runtime_test.BenchmarkSyscallWork-4        263          256   -2.66%

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/4800047
2011-07-23 12:22:55 -04:00
Russ Cox
226fb099d9 runtime: add UpdateMemStats, use in tests
Drops mallocrep1.go back to a reasonable
amount of time.  (154 -> 0.8 seconds on my Mac)

Fixes #2085.

R=golang-dev, dvyukov, r
CC=golang-dev
https://golang.org/cl/4811045
2011-07-22 00:55:01 -04:00
Russ Cox
22853098a9 gc: select functions are no longer special
R=ken2
CC=golang-dev
https://golang.org/cl/4794049
2011-07-21 14:10:39 -04:00
Dmitriy Vyukov
6b2ec06587 runtime: faster select
Make selectsend() accept pointer to the element,
it makes it possible to make Scase fixed-size
and allocate/free Select, all Scase's and all SudoG at once.
As a consequence SudoG freelist die out.

benchmark                       old,ns/op  new,ns/op
BenchmarkSelectUncontended	     1080        558
BenchmarkSelectUncontended-2	      675        264
BenchmarkSelectUncontended-4	      459        205
BenchmarkSelectContended	     1086        560
BenchmarkSelectContended-2	     1775       1672
BenchmarkSelectContended-4	     2668       2149
(on Intel Q6600, 4 cores, 2.4GHz)

benchmark                       old ns/op    new ns/op    delta
BenchmarkSelectUncontended         517.00       326.00  -36.94%
BenchmarkSelectUncontended-2       281.00       166.00  -40.93%
BenchmarkSelectUncontended-4       250.00        83.10  -66.76%
BenchmarkSelectUncontended-8       107.00        47.40  -55.70%
BenchmarkSelectUncontended-16       67.80        41.30  -39.09%
BenchmarkSelectContended           513.00       325.00  -36.65%
BenchmarkSelectContended-2         699.00       628.00  -10.16%
BenchmarkSelectContended-4        1085.00      1092.00   +0.65%
BenchmarkSelectContended-8        3253.00      2477.00  -23.85%
BenchmarkSelectContended-16       5313.00      5116.00   -3.71%
(on Intel E5620, 8 HT cores, 2.4 GHz)

R=rsc, ken
CC=golang-dev
https://golang.org/cl/4811041
2011-07-21 13:57:13 -04:00
Dmitriy Vyukov
d6ed1b70ad runtime: replace centralized ncgocall counter with a distributed one
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4809042
2011-07-21 11:29:08 -04:00
Dmitriy Vyukov
102b5b34a7 runtime: apply minor tweaks to channels
Remove complicated PRNG algorithm
(argument is limited by uint16 and can't be <= 1).
Do not require chansend/chanrecv selgen to be bumped with CAS.

R=rsc, ken
CC=golang-dev
https://golang.org/cl/4816041
2011-07-20 14:28:55 -04:00
Dmitriy Vyukov
90f3cb13fb runtime: improve performance of sync channels
1. SudoG always contains a pointer to the element
(thus no variable size, and less copying).
2. chansend/chanrecv allocate SudoG on the stack.
3. Copying of elements and gorotuine notifications
are moved out of critical sections.

benchmark                        old ns/op    new ns/op    delta
BenchmarkSelectUncontended          515.00       514.00   -0.19%
BenchmarkSelectUncontended-2        291.00       281.00   -3.44%
BenchmarkSelectUncontended-4        213.00       189.00  -11.27%
BenchmarkSelectUncontended-8         78.30        79.00   +0.89%
BenchmarkSelectContended            518.00       514.00   -0.77%
BenchmarkSelectContended-2          655.00       631.00   -3.66%
BenchmarkSelectContended-4         1026.00      1051.00   +2.44%
BenchmarkSelectContended-8         2026.00      2128.00   +5.03%
BenchmarkSelectNonblock             175.00       173.00   -1.14%
BenchmarkSelectNonblock-2            85.10        87.70   +3.06%
BenchmarkSelectNonblock-4            60.10        43.30  -27.95%
BenchmarkSelectNonblock-8            37.60        25.50  -32.18%
BenchmarkChanUncontended            109.00       114.00   +4.59%
BenchmarkChanUncontended-2           54.60        57.20   +4.76%
BenchmarkChanUncontended-4           27.40        28.70   +4.74%
BenchmarkChanUncontended-8           14.60        15.10   +3.42%
BenchmarkChanContended              108.00       114.00   +5.56%
BenchmarkChanContended-2            621.00       617.00   -0.64%
BenchmarkChanContended-4            759.00       677.00  -10.80%
BenchmarkChanContended-8           1635.00      1517.00   -7.22%
BenchmarkChanSync                   299.00       256.00  -14.38%
BenchmarkChanSync-2                5055.00      4624.00   -8.53%
BenchmarkChanSync-4                4998.00      4680.00   -6.36%
BenchmarkChanSync-8                5019.00      4760.00   -5.16%
BenchmarkChanProdCons0              316.00       274.00  -13.29%
BenchmarkChanProdCons0-2           1280.00       617.00  -51.80%
BenchmarkChanProdCons0-4           2433.00      1332.00  -45.25%
BenchmarkChanProdCons0-8           3651.00      1934.00  -47.03%
BenchmarkChanProdCons10             153.00       152.00   -0.65%
BenchmarkChanProdCons10-2           626.00       581.00   -7.19%
BenchmarkChanProdCons10-4          1440.00      1323.00   -8.12%
BenchmarkChanProdCons10-8          2036.00      2017.00   -0.93%

R=rsc, ken
CC=golang-dev
https://golang.org/cl/4790042
2011-07-20 11:51:25 -04:00
Lucio De Re
b546f50716 runtime: make goc2c build on Plan 9
pkg/runtime/Makefile:
. Adjusted so "goc2c.c" is built using the Plan 9 libraries.

pkg/runtime/goc2c.c:
. Added/subtracted #include headers to correspond to Plan 9
  toolkit.
. Changed fprintf(stderr,...)/exit() combinations to
  sysfatal() calls, adjusted the "%u" format to "%ud".
. Added exits(0) at the end of main().
. Made main() a void-returning function and removed the
  "return 0" at the end of it.

Tested on UBUNTU and Plan 9 only.

R=r, rsc
CC=golang-dev
https://golang.org/cl/4626093
2011-07-19 11:04:33 -04:00
Russ Cox
025abd530e runtime: faster entersyscall, exitsyscall
Uses atomic memory accesses to avoid the need to acquire
and release schedlock on fast paths.

benchmark                            old ns/op    new ns/op    delta
runtime_test.BenchmarkSyscall               73           31  -56.63%
runtime_test.BenchmarkSyscall-2            538           74  -86.23%
runtime_test.BenchmarkSyscall-3            508          103  -79.72%
runtime_test.BenchmarkSyscall-4            721           97  -86.52%
runtime_test.BenchmarkSyscallWork          920          873   -5.11%
runtime_test.BenchmarkSyscallWork-2        516          481   -6.78%
runtime_test.BenchmarkSyscallWork-3        550          343  -37.64%
runtime_test.BenchmarkSyscallWork-4        632          263  -58.39%

(Intel Core i7 L640 2.13 GHz-based Lenovo X201s)

Reduced a less artificial server benchmark
from 11.5r 12.0u 8.0s to 8.3r 9.1u 1.0s.

R=dvyukov, r, bradfitz, r, iant, iant
CC=golang-dev
https://golang.org/cl/4723042
2011-07-19 11:01:17 -04:00
Wei Guangjing
9f636598ba cgo: windows amd64 port
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4725041
2011-07-19 10:47:33 -04:00
Hector Chu
47e6042f73 runtime: fix select pass 3
Fixes #2075

R=rsc, ken, r
CC=golang-dev
https://golang.org/cl/4748045
2011-07-18 16:15:01 -04:00
Russ Cox
bd77619142 runtime: track running goroutine count
Used to use mcpu+msyscall but that's
problematic for packing into a single
atomic word.  The running goroutine count
(where running == Go code or syscall)
can be maintained separately, always
manipulated under lock.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/4767041
2011-07-18 15:50:55 -04:00
Dmitriy Vyukov
27753ff108 runtime: add per-M caches for MemStats
Avoid touching centralized state during
memory manager operations.

R=mirtchovski
CC=golang-dev, rsc
https://golang.org/cl/4766042
2011-07-18 14:56:22 -04:00
Dmitriy Vyukov
66d5c9b1e9 runtime: add per-M caches for MemStats
Avoid touching centralized state during
memory manager opreations.

R=rsc
CC=golang-dev
https://golang.org/cl/4766042
2011-07-18 14:52:57 -04:00
Dmitriy Vyukov
c1f035ba4c runtime: fix data race in Plan9 sysalloc
Add mutex to protect brk limit.
Add mstats.sys update.

R=rsc
CC=golang-dev
https://golang.org/cl/4762045
2011-07-18 10:50:04 -04:00
Nigel Tao
95323c59ea runtime: fix panic for make(chan [0]byte).
I suspect that this was introduced by
http://code.google.com/p/go/source/detail?r=6e4ee32fffd1

R=r
CC=golang-dev
https://golang.org/cl/4764045
2011-07-18 15:54:11 +10:00
Dmitriy Vyukov
491aa1579d runtime: native xadd for 386/amd64
benchmark                          old ns/op    new ns/op    delta
BenchmarkSemaUncontended               37.40        34.10   -8.82%
BenchmarkSemaUncontended-2             18.90        17.70   -6.35%
BenchmarkSemaUncontended-4             11.90        10.90   -8.40%
BenchmarkSemaUncontended-8              6.26         5.19  -17.09%
BenchmarkSemaUncontended-16             4.39         3.91  -10.93%
BenchmarkSemaSyntNonblock              38.00        35.30   -7.11%
BenchmarkSemaSyntNonblock-2            83.00        46.70  -43.73%
BenchmarkSemaSyntNonblock-4           124.00       101.00  -18.55%
BenchmarkSemaSyntNonblock-8           124.00       116.00   -6.45%
BenchmarkSemaSyntNonblock-16          148.00       114.00  -22.97%

(on HP Z600 2 x Xeon E5620, 8 HT cores, 2.40GHz)

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4755041
2011-07-15 11:27:16 -04:00
Russ Cox
29125be5c7 runtime: make TestSideEffectOrder work twice
R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/4714045
2011-07-14 23:43:03 -04:00
Alex Brainman
dde435587d runtime: correct FixedStack value (fixes windows build)
Fixes #2068.

R=rsc
CC=golang-dev
https://golang.org/cl/4705046
2011-07-14 09:13:39 +10:00
Wei Guangjing
a6e60916c1 runtime: stdcall_raw stack 16byte align for Win64
R=alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4681049
2011-07-13 11:44:44 -07:00
Dmitriy Vyukov
86a659cad0 runtime: fix data race during Itab hash update/lookup
The data race is on newly published Itab nodes, which are
both unsafely published and unsafely acquired. It can
break on IA-32/Intel64 due to compiler optimizations
(most likely not an issue as of now) and on ARM due to
hardware memory access reorderings.

R=rsc
CC=golang-dev
https://golang.org/cl/4673055
2011-07-13 11:22:41 -07:00
Quan Yong Zhai
fe9991e8b2 runtime: replace runtime.mcpy with runtime.memmove
faster string operations, and more

tested on linux/386

runtime_test.BenchmarkSliceToString                    642          532  -17.13%
runtime_test.BenchmarkStringToSlice                    636          528  -16.98%
runtime_test.BenchmarkConcatString                    1109          897  -19.12%

R=r, iant, rsc
CC=golang-dev
https://golang.org/cl/4674042
2011-07-12 17:30:40 -07:00
Dmitriy Vyukov
86e7323bdf runtime: eliminate false sharing during stack growth
Remove static variable from runtime·oldstack().
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows (with CL 4657091 applied):
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1183.00      1180.00   -0.25%
BenchmarkStackGrowth-2                             1249.00      1211.00   -3.04%
BenchmarkStackGrowth-4                              954.00       805.00  -15.62%
BenchmarkStackGrowth-8                              701.00       683.00   -2.57%
BenchmarkStackGrowth-16                             465.00       415.00  -10.75%

R=rsc
CC=golang-dev
https://golang.org/cl/4693042
2011-07-12 10:56:21 -07:00
Russ Cox
88e0c0517a runtime: fix comment (lost in shuffle)
TBR=dvyukov
CC=golang-dev
https://golang.org/cl/4710041
2011-07-12 09:26:05 -07:00
Dmitriy Vyukov
c9152a8568 runtime: eliminate contention during stack allocation
Standard-sized stack frames use plain malloc/free
instead of centralized lock-protected FixAlloc.
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
BenchmarkStackGrowth                               1045.00       949.00   -9.19%
BenchmarkStackGrowth-2                             3450.00       800.00  -76.81%
BenchmarkStackGrowth-4                             5076.00       513.00  -89.89%
BenchmarkStackGrowth-8                             7805.00       471.00  -93.97%
BenchmarkStackGrowth-16                           11751.00       321.00  -97.27%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4657091
2011-07-12 09:24:32 -07:00
Dmitriy Vyukov
013ad89c9b runtime: eliminate false sharing on runtime.goidgen
runtime.goidgen can be quite frequently modified and
shares cache line with the following variables,
it leads to false sharing.
50c6b0 b nfname
50c6b4 b nfunc
50c6b8 b nfunc$17
50c6bc b nhist$17
50c6c0 B runtime.checking
50c6c4 B runtime.gcwaiting
50c6c8 B runtime.goidgen
50c6cc B runtime.gomaxprocs
50c6d0 B runtime.panicking
50c6d4 B strconv.IntSize
50c6d8 B src/pkg/runtime/_xtest_.ss
50c6e0 B src/pkg/runtime/_xtest_.stop
50c6e8 b addrfree
50c6f0 b addrmem
50c6f8 b argv

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4673054
2011-07-12 01:25:14 -04:00
Dmitriy Vyukov
909f31872a runtime: eliminate false sharing on random number generators
Use machine-local random number generator instead of
racy global ones.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4674049
2011-07-12 01:23:58 -04:00
Dmitriy Vyukov
f9f21aa1fb runtime: fix data race on runtime·maxstring
The data race can lead to erroneous output of
"[invalid string]" instead of a string.

R=golang-dev
CC=golang-dev
https://golang.org/cl/4678049
2011-07-12 01:21:06 -04:00
Wei Guangjing
f83609f642 runtime: windows/amd64 port
R=rsc, alex.brainman, hectorchu, r
CC=golang-dev
https://golang.org/cl/3759042
2011-06-29 17:37:56 +10:00
Mikio Hara
161deaa85c runtime/cgo: fix build
R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4629082
2011-06-28 22:26:31 -04:00
Dmitriy Vyukov
997c00f991 runtime: replace Semacquire/Semrelease implementation
1. The implementation uses distributed hash table of waitlists instead of a centralized one.
  It significantly improves scalability for uncontended semaphores.
2. The implementation provides wait-free fast-path for signalers.
3. The implementation uses less locks (1 lock/unlock instead of 5 for Semacquire).
4. runtime·ready() call is moved out of critical section.
5. Semacquire() does not call semwake().
Benchmark results on HP Z600 (2 x Xeon E5620, 8 HT cores, 2.40GHz)
are as follows:
benchmark                                        old ns/op    new ns/op    delta
runtime_test.BenchmarkSemaUncontended                58.20        36.30  -37.63%
runtime_test.BenchmarkSemaUncontended-2             199.00        18.30  -90.80%
runtime_test.BenchmarkSemaUncontended-4             327.00         9.20  -97.19%
runtime_test.BenchmarkSemaUncontended-8             491.00         5.32  -98.92%
runtime_test.BenchmarkSemaUncontended-16            946.00         4.18  -99.56%

runtime_test.BenchmarkSemaSyntNonblock               59.00        36.80  -37.63%
runtime_test.BenchmarkSemaSyntNonblock-2            167.00       138.00  -17.37%
runtime_test.BenchmarkSemaSyntNonblock-4            333.00       129.00  -61.26%
runtime_test.BenchmarkSemaSyntNonblock-8            464.00       130.00  -71.98%
runtime_test.BenchmarkSemaSyntNonblock-16          1015.00       136.00  -86.60%

runtime_test.BenchmarkSemaSyntBlock                  58.80        36.70  -37.59%
runtime_test.BenchmarkSemaSyntBlock-2               294.00       149.00  -49.32%
runtime_test.BenchmarkSemaSyntBlock-4               333.00       177.00  -46.85%
runtime_test.BenchmarkSemaSyntBlock-8               471.00       221.00  -53.08%
runtime_test.BenchmarkSemaSyntBlock-16              990.00       227.00  -77.07%

runtime_test.BenchmarkSemaWorkNonblock              829.00       832.00   +0.36%
runtime_test.BenchmarkSemaWorkNonblock-2            425.00       419.00   -1.41%
runtime_test.BenchmarkSemaWorkNonblock-4            308.00       220.00  -28.57%
runtime_test.BenchmarkSemaWorkNonblock-8            394.00       147.00  -62.69%
runtime_test.BenchmarkSemaWorkNonblock-16          1510.00       149.00  -90.13%

runtime_test.BenchmarkSemaWorkBlock                 828.00       813.00   -1.81%
runtime_test.BenchmarkSemaWorkBlock-2               428.00       436.00   +1.87%
runtime_test.BenchmarkSemaWorkBlock-4               232.00       219.00   -5.60%
runtime_test.BenchmarkSemaWorkBlock-8               392.00       251.00  -35.97%
runtime_test.BenchmarkSemaWorkBlock-16             1524.00       298.00  -80.45%

sync_test.BenchmarkMutexUncontended                  24.10        24.00   -0.41%
sync_test.BenchmarkMutexUncontended-2                12.00        12.00   +0.00%
sync_test.BenchmarkMutexUncontended-4                 6.25         6.17   -1.28%
sync_test.BenchmarkMutexUncontended-8                 3.43         3.34   -2.62%
sync_test.BenchmarkMutexUncontended-16                2.34         2.32   -0.85%

sync_test.BenchmarkMutex                             24.70        24.70   +0.00%
sync_test.BenchmarkMutex-2                          208.00        99.50  -52.16%
sync_test.BenchmarkMutex-4                         2744.00       256.00  -90.67%
sync_test.BenchmarkMutex-8                         5137.00       556.00  -89.18%
sync_test.BenchmarkMutex-16                        5368.00      1284.00  -76.08%

sync_test.BenchmarkMutexSlack                        24.70        25.00   +1.21%
sync_test.BenchmarkMutexSlack-2                    1094.00       186.00  -83.00%
sync_test.BenchmarkMutexSlack-4                    3430.00       402.00  -88.28%
sync_test.BenchmarkMutexSlack-8                    5051.00      1066.00  -78.90%
sync_test.BenchmarkMutexSlack-16                   6806.00      1363.00  -79.97%

sync_test.BenchmarkMutexWork                        793.00       792.00   -0.13%
sync_test.BenchmarkMutexWork-2                      398.00       398.00   +0.00%
sync_test.BenchmarkMutexWork-4                     1441.00       308.00  -78.63%
sync_test.BenchmarkMutexWork-8                     8532.00       847.00  -90.07%
sync_test.BenchmarkMutexWork-16                    8225.00      2760.00  -66.44%

sync_test.BenchmarkMutexWorkSlack                   793.00       793.00   +0.00%
sync_test.BenchmarkMutexWorkSlack-2                 418.00       414.00   -0.96%
sync_test.BenchmarkMutexWorkSlack-4                4481.00       480.00  -89.29%
sync_test.BenchmarkMutexWorkSlack-8                6317.00      1598.00  -74.70%
sync_test.BenchmarkMutexWorkSlack-16               9111.00      3038.00  -66.66%

R=rsc
CC=golang-dev
https://golang.org/cl/4631059
2011-06-28 15:09:53 -04:00
Albert Strasheim
a026d0fc76 runtime/cgo: check for errors from pthread_create
R=rsc, iant, dvyukov
CC=golang-dev
https://golang.org/cl/4643057
2011-06-28 12:04:50 -04:00
Dmitriy Vyukov
660b22988b runtime: add Semacquire/Semrelease benchmarks
R=rsc
CC=golang-dev
https://golang.org/cl/4625065
2011-06-28 11:15:24 -04:00
Alex Brainman
6b648cafde runtime: another attempt to allow stdcall to be used from both 386 and amd64 arch
R=rsc
CC=golang-dev, vcc.163
https://golang.org/cl/4627071
2011-06-28 12:46:16 +10:00
Rob Pike
ebb1566a46 strings.Split: make the default to split all.
Change the signature of Split to have no count,
assuming a full split, and rename the existing
Split with a count to SplitN.
Do the same to package bytes.
Add a gofix module.

R=adg, dsymonds, alex.brainman, rsc
CC=golang-dev
https://golang.org/cl/4661051
2011-06-28 09:43:14 +10:00
Gustavo Niemeyer
65b036c381 runtime: don't use twice the memory with grsec-like kernels
grsec needs the FIXED flag to be provided to mmap, which
works now.  That said, when the allocation fails to be made
in the specific address, we're still given back a writable
page.  This change will unmap that page to avoid using
twice the amount of memory needed.

It'd also be pretty easy to avoid the extra system calls
once we detected that the flag is needed, but I'm not sure
if that edge case is worth the effort.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/4634086
2011-06-24 00:29:59 -03:00
Russ Cox
c475c3071a 5c: do not use R9 and R10
This program used to use R9 and R10.
Now it fails to compile (out of registers).
I used to know a simpler test but can't remember it.

Learned something new: Rietveld refuses change
list descriptions bigger than 10 kB.

int sum(int x, int y, int z, int w) {
        return
        (((((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))))%
        (((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))))*
        ((((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))))%
        (((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))))))*
        (((((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))))%
        (((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))))*
        ((((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))))%
        (((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))))/
        ((((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))|
        (((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w))&
        ((x*y+z*w|x*y+z*w)^
        (x*y+z*w|x*y+z*w)))))))
        ;
}

R=ken2
CC=golang-dev
https://golang.org/cl/4650053
2011-06-22 23:22:36 -04:00
Robert Hencke
b88e669a8f nacl, tiny: remove vestiges
R=golang-dev, r, rsc
CC=golang-dev
https://golang.org/cl/4635053
2011-06-21 12:02:40 -04:00