2009-05-26 12:18:42 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
//
|
|
|
|
// System calls and other sys.stuff for arm, Linux
|
|
|
|
//
|
|
|
|
|
2011-12-19 13:51:13 -07:00
|
|
|
#include "zasm_GOOS_GOARCH.h"
|
2013-08-07 13:20:05 -06:00
|
|
|
#include "../../cmd/ld/textflag.h"
|
2009-10-29 22:21:14 -06:00
|
|
|
|
2012-02-23 13:43:14 -07:00
|
|
|
// for EABI, as we don't support OABI
|
2009-11-05 23:53:08 -07:00
|
|
|
#define SYS_BASE 0x0
|
|
|
|
|
2009-06-23 12:54:23 -06:00
|
|
|
#define SYS_exit (SYS_BASE + 1)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
#define SYS_read (SYS_BASE + 3)
|
2009-06-23 12:54:23 -06:00
|
|
|
#define SYS_write (SYS_BASE + 4)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
#define SYS_open (SYS_BASE + 5)
|
|
|
|
#define SYS_close (SYS_BASE + 6)
|
2010-02-10 16:01:02 -07:00
|
|
|
#define SYS_gettimeofday (SYS_BASE + 78)
|
2009-10-29 22:21:14 -06:00
|
|
|
#define SYS_clone (SYS_BASE + 120)
|
2010-04-05 13:51:09 -06:00
|
|
|
#define SYS_rt_sigreturn (SYS_BASE + 173)
|
|
|
|
#define SYS_rt_sigaction (SYS_BASE + 174)
|
2012-02-23 12:43:58 -07:00
|
|
|
#define SYS_rt_sigprocmask (SYS_BASE + 175)
|
2010-04-05 13:51:09 -06:00
|
|
|
#define SYS_sigaltstack (SYS_BASE + 186)
|
2009-06-23 12:54:23 -06:00
|
|
|
#define SYS_mmap2 (SYS_BASE + 192)
|
2009-10-29 22:21:14 -06:00
|
|
|
#define SYS_futex (SYS_BASE + 240)
|
|
|
|
#define SYS_exit_group (SYS_BASE + 248)
|
2010-09-07 07:57:22 -06:00
|
|
|
#define SYS_munmap (SYS_BASE + 91)
|
2011-12-12 14:33:13 -07:00
|
|
|
#define SYS_madvise (SYS_BASE + 220)
|
2011-03-23 09:31:42 -06:00
|
|
|
#define SYS_setitimer (SYS_BASE + 104)
|
2011-06-07 22:50:10 -06:00
|
|
|
#define SYS_mincore (SYS_BASE + 219)
|
2011-04-25 14:58:00 -06:00
|
|
|
#define SYS_gettid (SYS_BASE + 224)
|
|
|
|
#define SYS_tkill (SYS_BASE + 238)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
#define SYS_sched_yield (SYS_BASE + 158)
|
2012-01-10 21:48:02 -07:00
|
|
|
#define SYS_select (SYS_BASE + 142) // newselect
|
2012-02-24 13:28:51 -07:00
|
|
|
#define SYS_ugetrlimit (SYS_BASE + 191)
|
2012-08-09 20:05:26 -06:00
|
|
|
#define SYS_sched_getaffinity (SYS_BASE + 242)
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
#define SYS_clock_gettime (SYS_BASE + 263)
|
2013-03-15 14:01:56 -06:00
|
|
|
#define SYS_epoll_create (SYS_BASE + 250)
|
|
|
|
#define SYS_epoll_ctl (SYS_BASE + 251)
|
|
|
|
#define SYS_epoll_wait (SYS_BASE + 252)
|
|
|
|
#define SYS_epoll_create1 (SYS_BASE + 357)
|
|
|
|
#define SYS_fcntl (SYS_BASE + 55)
|
2009-06-23 12:54:23 -06:00
|
|
|
|
2009-11-12 00:23:11 -07:00
|
|
|
#define ARM_BASE (SYS_BASE + 0x0f0000)
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·open(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW $SYS_open, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·close(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW $SYS_close, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·write(SB),NOSPLIT,$0
|
2009-09-18 17:45:41 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_write, R7
|
|
|
|
SWI $0
|
2009-05-26 12:18:42 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·read(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW $SYS_read, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·getrlimit(SB),NOSPLIT,$0
|
2012-02-24 13:28:51 -07:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW $SYS_ugetrlimit, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·exit(SB),NOSPLIT,$-4
|
2009-10-29 22:21:14 -06:00
|
|
|
MOVW 0(FP), R0
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_exit_group, R7
|
|
|
|
SWI $0
|
2009-10-29 22:21:14 -06:00
|
|
|
MOVW $1234, R0
|
|
|
|
MOVW $1002, R1
|
|
|
|
MOVW R0, (R1) // fail hard
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·exit1(SB),NOSPLIT,$-4
|
2009-10-25 12:51:16 -06:00
|
|
|
MOVW 0(FP), R0
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_exit, R7
|
|
|
|
SWI $0
|
2009-10-29 22:21:14 -06:00
|
|
|
MOVW $1234, R0
|
|
|
|
MOVW $1003, R1
|
|
|
|
MOVW R0, (R1) // fail hard
|
2009-06-10 12:53:07 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·raise(SB),NOSPLIT,$-4
|
2011-04-25 14:58:00 -06:00
|
|
|
MOVW $SYS_gettid, R7
|
|
|
|
SWI $0
|
|
|
|
// arg 1 tid already in R0 from gettid
|
2013-03-14 23:11:03 -06:00
|
|
|
MOVW sig+0(FP), R1 // arg 2 - signal
|
2011-04-25 14:58:00 -06:00
|
|
|
MOVW $SYS_tkill, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·mmap(SB),NOSPLIT,$0
|
2009-09-18 17:45:41 -06:00
|
|
|
MOVW 0(FP), R0
|
2009-06-23 12:54:23 -06:00
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW 12(FP), R3
|
|
|
|
MOVW 16(FP), R4
|
|
|
|
MOVW 20(FP), R5
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_mmap2, R7
|
|
|
|
SWI $0
|
2012-02-23 13:43:14 -07:00
|
|
|
MOVW $0xfffff001, R6
|
|
|
|
CMP R6, R0
|
|
|
|
RSB.HI $0, R0
|
2009-06-10 12:53:07 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·munmap(SB),NOSPLIT,$0
|
2010-09-07 07:57:22 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW $SYS_munmap, R7
|
|
|
|
SWI $0
|
2012-02-23 13:43:14 -07:00
|
|
|
MOVW $0xfffff001, R6
|
|
|
|
CMP R6, R0
|
2013-05-28 06:13:02 -06:00
|
|
|
MOVW.HI $0, R8 // crash on syscall failure
|
|
|
|
MOVW.HI R8, (R8)
|
2010-09-07 07:57:22 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·madvise(SB),NOSPLIT,$0
|
2011-12-12 14:33:13 -07:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW $SYS_madvise, R7
|
|
|
|
SWI $0
|
2012-12-22 13:06:28 -07:00
|
|
|
// ignore failure - maybe pages are locked
|
2011-12-12 14:33:13 -07:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·setitimer(SB),NOSPLIT,$0
|
2011-03-23 09:31:42 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
2011-03-25 10:30:49 -06:00
|
|
|
MOVW 8(FP), R2
|
2011-03-23 09:31:42 -06:00
|
|
|
MOVW $SYS_setitimer, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·mincore(SB),NOSPLIT,$0
|
2011-06-07 22:50:10 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW $SYS_mincore, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT time·now(SB), NOSPLIT, $32
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVW $0, R0 // CLOCK_REALTIME
|
|
|
|
MOVW $8(R13), R1 // timespec
|
|
|
|
MOVW $SYS_clock_gettime, R7
|
2011-11-30 09:59:44 -07:00
|
|
|
SWI $0
|
|
|
|
|
|
|
|
MOVW 8(R13), R0 // sec
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVW 12(R13), R2 // nsec
|
2011-11-30 09:59:44 -07:00
|
|
|
|
|
|
|
MOVW R0, 0(FP)
|
|
|
|
MOVW $0, R1
|
|
|
|
MOVW R1, 4(FP)
|
|
|
|
MOVW R2, 8(FP)
|
|
|
|
RET
|
|
|
|
|
2011-11-03 15:35:28 -06:00
|
|
|
// int64 nanotime(void) so really
|
|
|
|
// void nanotime(int64 *nsec)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·nanotime(SB),NOSPLIT,$32
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVW $0, R0 // CLOCK_REALTIME
|
|
|
|
MOVW $8(R13), R1 // timespec
|
|
|
|
MOVW $SYS_clock_gettime, R7
|
2010-02-10 16:01:02 -07:00
|
|
|
SWI $0
|
2011-11-04 06:38:10 -06:00
|
|
|
|
|
|
|
MOVW 8(R13), R0 // sec
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVW 12(R13), R2 // nsec
|
2011-11-04 06:38:10 -06:00
|
|
|
|
|
|
|
MOVW $1000000000, R3
|
|
|
|
MULLU R0, R3, (R1, R0)
|
|
|
|
MOVW $0, R4
|
|
|
|
ADD.S R2, R0
|
|
|
|
ADC R4, R1
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
|
2011-11-04 06:38:10 -06:00
|
|
|
MOVW 0(FP), R3
|
|
|
|
MOVW R0, 0(R3)
|
|
|
|
MOVW R1, 4(R3)
|
2010-02-10 16:01:02 -07:00
|
|
|
RET
|
|
|
|
|
2009-10-29 22:21:14 -06:00
|
|
|
// int32 futex(int32 *uaddr, int32 op, int32 val,
|
2009-06-10 12:53:07 -06:00
|
|
|
// struct timespec *timeout, int32 *uaddr2, int32 val2);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·futex(SB),NOSPLIT,$0
|
2009-10-29 22:21:14 -06:00
|
|
|
MOVW 4(SP), R0
|
|
|
|
MOVW 8(SP), R1
|
|
|
|
MOVW 12(SP), R2
|
|
|
|
MOVW 16(SP), R3
|
|
|
|
MOVW 20(SP), R4
|
|
|
|
MOVW 24(SP), R5
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_futex, R7
|
|
|
|
SWI $0
|
2009-06-10 12:53:07 -06:00
|
|
|
RET
|
|
|
|
|
2009-10-29 22:21:14 -06:00
|
|
|
|
2012-12-18 09:30:29 -07:00
|
|
|
// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·clone(SB),NOSPLIT,$0
|
2009-10-29 22:21:14 -06:00
|
|
|
MOVW flags+0(FP), R0
|
|
|
|
MOVW stack+4(FP), R1
|
|
|
|
MOVW $0, R2 // parent tid ptr
|
|
|
|
MOVW $0, R3 // tls_val
|
|
|
|
MOVW $0, R4 // child tid ptr
|
|
|
|
MOVW $0, R5
|
|
|
|
|
2012-12-18 09:30:29 -07:00
|
|
|
// Copy mp, gp, fn off parent stack for use by child.
|
2009-10-29 22:21:14 -06:00
|
|
|
// TODO(kaib): figure out which registers are clobbered by clone and avoid stack copying
|
|
|
|
MOVW $-16(R1), R1
|
|
|
|
MOVW mm+8(FP), R6
|
|
|
|
MOVW R6, 0(R1)
|
|
|
|
MOVW gg+12(FP), R6
|
|
|
|
MOVW R6, 4(R1)
|
|
|
|
MOVW fn+16(FP), R6
|
|
|
|
MOVW R6, 8(R1)
|
|
|
|
MOVW $1234, R6
|
|
|
|
MOVW R6, 12(R1)
|
|
|
|
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_clone, R7
|
|
|
|
SWI $0
|
2009-10-29 22:21:14 -06:00
|
|
|
|
|
|
|
// In parent, return.
|
|
|
|
CMP $0, R0
|
|
|
|
BEQ 2(PC)
|
|
|
|
RET
|
|
|
|
|
|
|
|
// Paranoia: check that SP is as we expect. Use R13 to avoid linker 'fixup'
|
|
|
|
MOVW 12(R13), R0
|
|
|
|
MOVW $1234, R1
|
|
|
|
CMP R0, R1
|
|
|
|
BEQ 2(PC)
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
BL runtime·abort(SB)
|
2009-10-29 22:21:14 -06:00
|
|
|
|
|
|
|
MOVW 0(R13), m
|
|
|
|
MOVW 4(R13), g
|
|
|
|
|
|
|
|
// paranoia; check they are not nil
|
|
|
|
MOVW 0(m), R0
|
|
|
|
MOVW 0(g), R0
|
|
|
|
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
BL runtime·emptyfunc(SB) // fault if stack check is wrong
|
2009-10-29 22:21:14 -06:00
|
|
|
|
|
|
|
// Initialize m->procid to Linux tid
|
2009-11-05 23:53:08 -07:00
|
|
|
MOVW $SYS_gettid, R7
|
|
|
|
SWI $0
|
2009-10-29 22:21:14 -06:00
|
|
|
MOVW R0, m_procid(m)
|
|
|
|
|
|
|
|
// Call fn
|
|
|
|
MOVW 8(R13), R0
|
|
|
|
MOVW $16(R13), R13
|
|
|
|
BL (R0)
|
|
|
|
|
|
|
|
MOVW $0, R0
|
|
|
|
MOVW R0, 4(R13)
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
BL runtime·exit1(SB)
|
2009-10-29 22:21:14 -06:00
|
|
|
|
|
|
|
// It shouldn't return
|
|
|
|
MOVW $1234, R0
|
|
|
|
MOVW $1005, R1
|
|
|
|
MOVW R0, (R1)
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sigaltstack(SB),NOSPLIT,$0
|
2010-04-05 13:51:09 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW $SYS_sigaltstack, R7
|
|
|
|
SWI $0
|
2012-02-23 13:43:14 -07:00
|
|
|
MOVW $0xfffff001, R6
|
|
|
|
CMP R6, R0
|
2013-05-28 06:13:02 -06:00
|
|
|
MOVW.HI $0, R8 // crash on syscall failure
|
|
|
|
MOVW.HI R8, (R8)
|
2010-04-05 13:51:09 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sigtramp(SB),NOSPLIT,$24
|
2012-05-04 04:20:09 -06:00
|
|
|
// this might be called in external code context,
|
|
|
|
// where g and m are not set.
|
2013-02-28 14:24:38 -07:00
|
|
|
// first save R0, because _cgo_load_gm will clobber it
|
2012-05-04 04:20:09 -06:00
|
|
|
MOVW R0, 4(R13)
|
2013-02-28 14:24:38 -07:00
|
|
|
MOVW _cgo_load_gm(SB), R0
|
2012-05-04 04:20:09 -06:00
|
|
|
CMP $0, R0
|
|
|
|
BL.NE (R0)
|
|
|
|
|
2013-05-27 06:46:53 -06:00
|
|
|
CMP $0, m
|
2013-07-11 14:39:39 -06:00
|
|
|
BNE 4(PC)
|
2013-05-27 06:46:53 -06:00
|
|
|
// signal number is already prepared in 4(R13)
|
2013-07-11 14:39:39 -06:00
|
|
|
MOVW $runtime·badsignal(SB), R11
|
|
|
|
BL (R11)
|
2013-05-27 06:46:53 -06:00
|
|
|
RET
|
|
|
|
|
2011-02-23 12:47:42 -07:00
|
|
|
// save g
|
|
|
|
MOVW g, R3
|
|
|
|
MOVW g, 20(R13)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2011-02-23 12:47:42 -07:00
|
|
|
// g = m->gsignal
|
2010-04-05 13:51:09 -06:00
|
|
|
MOVW m_gsignal(m), g
|
2011-02-23 12:47:42 -07:00
|
|
|
|
|
|
|
// copy arguments for call to sighandler
|
2012-05-04 04:20:09 -06:00
|
|
|
// R0 is already saved above
|
2010-04-05 13:51:09 -06:00
|
|
|
MOVW R1, 8(R13)
|
|
|
|
MOVW R2, 12(R13)
|
2011-02-23 12:47:42 -07:00
|
|
|
MOVW R3, 16(R13)
|
|
|
|
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
BL runtime·sighandler(SB)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2011-02-23 12:47:42 -07:00
|
|
|
// restore g
|
|
|
|
MOVW 20(R13), g
|
|
|
|
|
2010-04-05 13:51:09 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
|
2012-02-23 12:43:58 -07:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW 12(FP), R3
|
|
|
|
MOVW $SYS_rt_sigprocmask, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
|
2010-04-05 13:51:09 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW 12(FP), R3
|
|
|
|
MOVW $SYS_rt_sigaction, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sigreturn(SB),NOSPLIT,$0
|
2010-04-05 13:51:09 -06:00
|
|
|
MOVW $SYS_rt_sigreturn, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
2011-02-25 12:29:55 -07:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·usleep(SB),NOSPLIT,$12
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
MOVW usec+0(FP), R0
|
|
|
|
MOVW R0, R1
|
|
|
|
MOVW $1000000, R2
|
2012-01-10 21:48:02 -07:00
|
|
|
DIV R2, R0
|
|
|
|
MOD R2, R1
|
|
|
|
MOVW R0, 4(SP)
|
|
|
|
MOVW R1, 8(SP)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
MOVW $0, R0
|
|
|
|
MOVW $0, R1
|
|
|
|
MOVW $0, R2
|
|
|
|
MOVW $0, R3
|
|
|
|
MOVW $4(SP), R4
|
|
|
|
MOVW $SYS_select, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
2012-02-23 13:43:14 -07:00
|
|
|
// Use kernel version instead of native armcas in asm_arm.s.
|
|
|
|
// See ../sync/atomic/asm_linux_arm.s for details.
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT cas<>(SB),NOSPLIT,$0
|
2011-02-25 12:29:55 -07:00
|
|
|
MOVW $0xffff0fc0, PC
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·cas(SB),NOSPLIT,$0
|
2011-02-25 12:29:55 -07:00
|
|
|
MOVW valptr+0(FP), R2
|
|
|
|
MOVW old+4(FP), R0
|
2011-05-02 08:49:19 -06:00
|
|
|
casagain:
|
2011-02-25 12:29:55 -07:00
|
|
|
MOVW new+8(FP), R1
|
|
|
|
BL cas<>(SB)
|
2011-05-02 08:49:19 -06:00
|
|
|
BCC cascheck
|
|
|
|
MOVW $1, R0
|
2011-02-25 12:29:55 -07:00
|
|
|
RET
|
2011-05-02 08:49:19 -06:00
|
|
|
cascheck:
|
|
|
|
// Kernel lies; double-check.
|
|
|
|
MOVW valptr+0(FP), R2
|
|
|
|
MOVW old+4(FP), R0
|
|
|
|
MOVW 0(R2), R3
|
|
|
|
CMP R0, R3
|
|
|
|
BEQ casagain
|
|
|
|
MOVW $0, R0
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·casp(SB),NOSPLIT,$0
|
2011-02-25 12:29:55 -07:00
|
|
|
B runtime·cas(SB)
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·osyield(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVW $SYS_sched_yield, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
2012-08-09 20:05:26 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
|
2012-08-09 20:05:26 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW $SYS_sched_getaffinity, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
2013-03-15 14:01:56 -06:00
|
|
|
|
|
|
|
// int32 runtime·epollcreate(int32 size)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollcreate(SB),NOSPLIT,$0
|
2013-03-15 14:01:56 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW $SYS_epoll_create, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
|
|
|
// int32 runtime·epollcreate1(int32 flags)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollcreate1(SB),NOSPLIT,$0
|
2013-03-15 14:01:56 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW $SYS_epoll_create1, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
|
|
|
// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollctl(SB),NOSPLIT,$0
|
2013-03-15 14:01:56 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW 12(FP), R3
|
|
|
|
MOVW $SYS_epoll_ctl, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
|
|
|
// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollwait(SB),NOSPLIT,$0
|
2013-03-15 14:01:56 -06:00
|
|
|
MOVW 0(FP), R0
|
|
|
|
MOVW 4(FP), R1
|
|
|
|
MOVW 8(FP), R2
|
|
|
|
MOVW 12(FP), R3
|
|
|
|
MOVW $SYS_epoll_wait, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|
|
|
|
|
|
|
|
// void runtime·closeonexec(int32 fd)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·closeonexec(SB),NOSPLIT,$0
|
2013-03-15 14:01:56 -06:00
|
|
|
MOVW 0(FP), R0 // fd
|
|
|
|
MOVW $2, R1 // F_SETFD
|
|
|
|
MOVW $1, R2 // FD_CLOEXEC
|
|
|
|
MOVW $SYS_fcntl, R7
|
|
|
|
SWI $0
|
|
|
|
RET
|