2009-03-31 16:45:12 -06:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
//
|
|
|
|
// System calls and other sys.stuff for 386, Linux
|
|
|
|
//
|
|
|
|
|
2011-12-19 13:51:13 -07:00
|
|
|
#include "zasm_GOOS_GOARCH.h"
|
2013-08-07 13:20:05 -06:00
|
|
|
#include "../../cmd/ld/textflag.h"
|
2009-06-17 16:15:55 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·exit(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $252, AX // syscall number
|
|
|
|
MOVL 4(SP), BX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
INT $3 // not reached
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·exit1(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $1, AX // exit - exit the current os thread
|
|
|
|
MOVL 4(SP), BX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
INT $3 // not reached
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·open(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVL $5, AX // syscall - open
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·close(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVL $6, AX // syscall - close
|
|
|
|
MOVL 4(SP), BX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·write(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $4, AX // syscall - write
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·read(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVL $3, AX // syscall - read
|
|
|
|
MOVL 4(SP), BX
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·getrlimit(SB),NOSPLIT,$0
|
2012-02-24 13:28:51 -07:00
|
|
|
MOVL $191, AX // syscall - ugetrlimit
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·usleep(SB),NOSPLIT,$8
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
MOVL $0, DX
|
|
|
|
MOVL usec+0(FP), AX
|
|
|
|
MOVL $1000000, CX
|
|
|
|
DIVL CX
|
2011-10-03 12:08:59 -06:00
|
|
|
MOVL AX, 0(SP)
|
|
|
|
MOVL DX, 4(SP)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
|
|
|
// select(0, 0, 0, 0, &tv)
|
2011-10-03 12:08:59 -06:00
|
|
|
MOVL $142, AX
|
|
|
|
MOVL $0, BX
|
|
|
|
MOVL $0, CX
|
|
|
|
MOVL $0, DX
|
|
|
|
MOVL $0, SI
|
|
|
|
LEAL 0(SP), DI
|
|
|
|
CALL *runtime·_vdso(SB)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·raise(SB),NOSPLIT,$12
|
2011-04-25 14:58:00 -06:00
|
|
|
MOVL $224, AX // syscall - gettid
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2013-03-14 23:11:03 -06:00
|
|
|
MOVL AX, BX // arg 1 tid
|
|
|
|
MOVL sig+0(FP), CX // arg 2 signal
|
2011-04-25 14:58:00 -06:00
|
|
|
MOVL $238, AX // syscall - tkill
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2011-04-25 14:58:00 -06:00
|
|
|
RET
|
2011-03-23 09:31:42 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·setitimer(SB),NOSPLIT,$0-24
|
2011-03-23 09:31:42 -06:00
|
|
|
MOVL $104, AX // syscall - setitimer
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2011-03-23 09:31:42 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·mincore(SB),NOSPLIT,$0-24
|
2011-06-07 22:50:10 -06:00
|
|
|
MOVL $218, AX // syscall - mincore
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2011-06-07 22:50:10 -06:00
|
|
|
RET
|
|
|
|
|
2011-11-30 09:59:44 -07:00
|
|
|
// func now() (sec int64, nsec int32)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT time·now(SB), NOSPLIT, $32
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVL $265, AX // syscall - clock_gettime
|
runtime: use monotonic clock for timers (linux/386, linux/amd64)
This lays the groundwork for making Go robust when the system's
calendar time jumps around. All input values to the runtimeTimer
struct now use the runtime clock as a common reference point.
This affects net.Conn.Set[Read|Write]Deadline(), time.Sleep(),
time.Timer, etc. Under normal conditions, behavior is unchanged.
Each platform and architecture's implementation of runtime·nanotime()
should be modified to use a monotonic system clock when possible.
Platforms/architectures modified and tested with monotonic clock:
linux/x86 - clock_gettime(CLOCK_MONOTONIC)
Update #6007
LGTM=dvyukov, rsc
R=golang-codereviews, dvyukov, alex.brainman, stephen.gutekanst, dave, rsc, mikioh.mikioh
CC=golang-codereviews
https://golang.org/cl/53010043
2014-02-24 08:57:46 -07:00
|
|
|
MOVL $0, BX // CLOCK_REALTIME
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
LEAL 8(SP), CX
|
2011-11-30 09:59:44 -07:00
|
|
|
MOVL $0, DX
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
MOVL 8(SP), AX // sec
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVL 12(SP), BX // nsec
|
2011-11-30 09:59:44 -07:00
|
|
|
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
// sec is in AX, nsec in BX
|
2011-11-30 09:59:44 -07:00
|
|
|
MOVL AX, sec+0(FP)
|
|
|
|
MOVL $0, sec+4(FP)
|
|
|
|
MOVL BX, nsec+8(FP)
|
|
|
|
RET
|
|
|
|
|
2011-11-03 15:35:28 -06:00
|
|
|
// int64 nanotime(void) so really
|
|
|
|
// void nanotime(int64 *nsec)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·nanotime(SB), NOSPLIT, $32
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVL $265, AX // syscall - clock_gettime
|
runtime: use monotonic clock for timers (linux/386, linux/amd64)
This lays the groundwork for making Go robust when the system's
calendar time jumps around. All input values to the runtimeTimer
struct now use the runtime clock as a common reference point.
This affects net.Conn.Set[Read|Write]Deadline(), time.Sleep(),
time.Timer, etc. Under normal conditions, behavior is unchanged.
Each platform and architecture's implementation of runtime·nanotime()
should be modified to use a monotonic system clock when possible.
Platforms/architectures modified and tested with monotonic clock:
linux/x86 - clock_gettime(CLOCK_MONOTONIC)
Update #6007
LGTM=dvyukov, rsc
R=golang-codereviews, dvyukov, alex.brainman, stephen.gutekanst, dave, rsc, mikioh.mikioh
CC=golang-codereviews
https://golang.org/cl/53010043
2014-02-24 08:57:46 -07:00
|
|
|
MOVL $1, BX // CLOCK_MONOTONIC
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
LEAL 8(SP), CX
|
2010-02-08 15:32:22 -07:00
|
|
|
MOVL $0, DX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2011-11-03 15:35:28 -06:00
|
|
|
MOVL 8(SP), AX // sec
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
MOVL 12(SP), BX // nsec
|
2011-11-03 15:35:28 -06:00
|
|
|
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
// sec is in AX, nsec in BX
|
2011-11-03 15:35:28 -06:00
|
|
|
// convert to DX:AX nsec
|
|
|
|
MOVL $1000000000, CX
|
|
|
|
MULL CX
|
|
|
|
ADDL BX, AX
|
|
|
|
ADCL $0, DX
|
runtime: use clock_gettime to get ns resolution for time.now & runtime.nanotime
For Linux/{386,arm}, FreeBSD/{386,amd64,arm}, NetBSD/{386,amd64}, OpenBSD/{386,amd64}.
Note: our Darwin implementation already has ns resolution.
Linux/386 (Core i7-2600 @ 3.40GHz, kernel 3.5.2-gentoo)
benchmark old ns/op new ns/op delta
BenchmarkNow 110 118 +7.27%
Linux/ARM (ARM Cortex-A8 @ 800MHz, kernel 2.6.32.28 android)
benchmark old ns/op new ns/op delta
BenchmarkNow 625 542 -13.28%
Linux/ARM (ARM Cortex-A9 @ 1GHz, Pandaboard)
benchmark old ns/op new ns/op delta
BenchmarkNow 992 909 -8.37%
FreeBSD 9-REL-p1/amd64 (Dell R610 Server with Xeon X5650 @ 2.67GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 699 695 -0.57%
FreeBSD 9-REL-p1/amd64 (Atom D525 @ 1.80GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1553 1658 +6.76%
OpenBSD/amd64 (Dell E6410 with i5 CPU M 540 @ 2.53GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 1262 1236 -2.06%
OpenBSD/i386 (Asus eeePC 701 with Intel Celeron M 900MHz - locked to 631MHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 5089 5043 -0.90%
NetBSD/i386 (VMware VM with Core i5 CPU @ 2.7GHz)
benchmark old ns/op new ns/op delta
BenchmarkNow 277 278 +0.36%
NetBSD/amd64 (VMware VM with Core i5 CPU @ 2.7Ghz)
benchmark old ns/op new ns/op delta
BenchmarkNow 103 105 +1.94%
Thanks Maxim Khitrov, Joel Sing, and Dave Cheney for providing benchmark data.
R=jsing, dave, rsc
CC=golang-dev
https://golang.org/cl/6820120
2012-12-18 07:57:25 -07:00
|
|
|
|
2011-11-03 15:35:28 -06:00
|
|
|
MOVL ret+0(FP), DI
|
|
|
|
MOVL AX, 0(DI)
|
|
|
|
MOVL DX, 4(DI)
|
2010-02-08 15:32:22 -07:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
|
2012-02-23 12:43:58 -07:00
|
|
|
MOVL $175, AX // syscall entry
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
MOVL 16(SP), SI
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
CMPL AX, $0xfffff001
|
|
|
|
JLS 2(PC)
|
|
|
|
INT $3
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $174, AX // syscall - rt_sigaction
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
MOVL 16(SP), SI
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sigtramp(SB),NOSPLIT,$44
|
2010-01-06 18:58:55 -07:00
|
|
|
get_tls(CX)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
// check that g exists
|
|
|
|
MOVL g(CX), DI
|
|
|
|
CMPL DI, $0
|
2013-07-11 14:39:39 -06:00
|
|
|
JNE 6(PC)
|
2012-09-04 12:40:49 -06:00
|
|
|
MOVL sig+0(FP), BX
|
|
|
|
MOVL BX, 0(SP)
|
2013-07-11 14:39:39 -06:00
|
|
|
MOVL $runtime·badsignal(SB), AX
|
|
|
|
CALL AX
|
2012-09-04 12:40:49 -06:00
|
|
|
RET
|
2012-03-12 13:55:18 -06:00
|
|
|
|
2010-06-12 11:48:04 -06:00
|
|
|
// save g
|
2011-02-23 12:47:42 -07:00
|
|
|
MOVL DI, 20(SP)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2010-06-12 11:48:04 -06:00
|
|
|
// g = m->gsignal
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
MOVL g_m(DI), BX
|
2010-06-12 11:48:04 -06:00
|
|
|
MOVL m_gsignal(BX), BX
|
|
|
|
MOVL BX, g(CX)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2010-06-12 11:48:04 -06:00
|
|
|
// copy arguments for call to sighandler
|
|
|
|
MOVL sig+0(FP), BX
|
|
|
|
MOVL BX, 0(SP)
|
|
|
|
MOVL info+4(FP), BX
|
|
|
|
MOVL BX, 4(SP)
|
|
|
|
MOVL context+8(FP), BX
|
|
|
|
MOVL BX, 8(SP)
|
2011-02-23 12:47:42 -07:00
|
|
|
MOVL DI, 12(SP)
|
2010-06-12 11:48:04 -06:00
|
|
|
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
CALL runtime·sighandler(SB)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2010-06-12 11:48:04 -06:00
|
|
|
// restore g
|
|
|
|
get_tls(CX)
|
|
|
|
MOVL 20(SP), BX
|
|
|
|
MOVL BX, g(CX)
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
|
2010-06-12 11:48:04 -06:00
|
|
|
RET
|
2009-03-31 16:45:12 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sigreturn(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $173, AX // rt_sigreturn
|
2011-08-29 08:36:06 -06:00
|
|
|
// Sigreturn expects same SP as signal handler,
|
|
|
|
// so cannot CALL *runtime._vsdo(SB) here.
|
|
|
|
INT $0x80
|
2009-03-31 16:45:12 -06:00
|
|
|
INT $3 // not reached
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·mmap(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $192, AX // mmap2
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
MOVL 16(SP), SI
|
|
|
|
MOVL 20(SP), DI
|
|
|
|
MOVL 24(SP), BP
|
|
|
|
SHRL $12, BP
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
CMPL AX, $0xfffff001
|
2009-11-13 11:08:51 -07:00
|
|
|
JLS 3(PC)
|
|
|
|
NOTL AX
|
|
|
|
INCL AX
|
2009-03-31 16:45:12 -06:00
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·munmap(SB),NOSPLIT,$0
|
2010-09-07 07:57:22 -06:00
|
|
|
MOVL $91, AX // munmap
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2010-09-07 07:57:22 -06:00
|
|
|
CMPL AX, $0xfffff001
|
|
|
|
JLS 2(PC)
|
|
|
|
INT $3
|
|
|
|
RET
|
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·madvise(SB),NOSPLIT,$0
|
2011-12-12 14:33:13 -07:00
|
|
|
MOVL $219, AX // madvise
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
CALL *runtime·_vdso(SB)
|
2012-12-22 13:06:28 -07:00
|
|
|
// ignore failure - maybe pages are locked
|
2011-12-12 14:33:13 -07:00
|
|
|
RET
|
|
|
|
|
2009-06-17 16:15:55 -06:00
|
|
|
// int32 futex(int32 *uaddr, int32 op, int32 val,
|
2009-03-31 16:45:12 -06:00
|
|
|
// struct timespec *timeout, int32 *uaddr2, int32 val2);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·futex(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $240, AX // futex
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
MOVL 16(SP), SI
|
|
|
|
MOVL 20(SP), DI
|
|
|
|
MOVL 24(SP), BP
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
RET
|
|
|
|
|
2012-12-18 09:30:29 -07:00
|
|
|
// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·clone(SB),NOSPLIT,$0
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $120, AX // clone
|
|
|
|
MOVL flags+4(SP), BX
|
|
|
|
MOVL stack+8(SP), CX
|
2009-06-17 16:15:55 -06:00
|
|
|
MOVL $0, DX // parent tid ptr
|
|
|
|
MOVL $0, DI // child tid ptr
|
2009-03-31 16:45:12 -06:00
|
|
|
|
2012-12-18 09:30:29 -07:00
|
|
|
// Copy mp, gp, fn off parent stack for use by child.
|
2009-06-17 16:15:55 -06:00
|
|
|
SUBL $16, CX
|
|
|
|
MOVL mm+12(SP), SI
|
|
|
|
MOVL SI, 0(CX)
|
|
|
|
MOVL gg+16(SP), SI
|
|
|
|
MOVL SI, 4(CX)
|
|
|
|
MOVL fn+20(SP), SI
|
|
|
|
MOVL SI, 8(CX)
|
|
|
|
MOVL $1234, 12(CX)
|
|
|
|
|
2011-08-29 08:36:06 -06:00
|
|
|
// cannot use CALL *runtime·_vdso(SB) here, because
|
runtime: parallelize garbage collector mark + sweep
Running test/garbage/parser.out.
On a 4-core Lenovo X201s (Linux):
31.12u 0.60s 31.74r 1 cpu, no atomics
32.27u 0.58s 32.86r 1 cpu, atomic instructions
33.04u 0.83s 27.47r 2 cpu
On a 16-core Xeon (Linux):
33.08u 0.65s 33.80r 1 cpu, no atomics
34.87u 1.12s 29.60r 2 cpu
36.00u 1.87s 28.43r 3 cpu
36.46u 2.34s 27.10r 4 cpu
38.28u 3.85s 26.92r 5 cpu
37.72u 5.25s 26.73r 6 cpu
39.63u 7.11s 26.95r 7 cpu
39.67u 8.10s 26.68r 8 cpu
On a 2-core MacBook Pro Core 2 Duo 2.26 (circa 2009, MacBookPro5,5):
39.43u 1.45s 41.27r 1 cpu, no atomics
43.98u 2.95s 38.69r 2 cpu
On a 2-core Mac Mini Core 2 Duo 1.83 (circa 2008; Macmini2,1):
48.81u 2.12s 51.76r 1 cpu, no atomics
57.15u 4.72s 51.54r 2 cpu
The handoff algorithm is really only good for two cores.
Beyond that we will need to so something more sophisticated,
like have each core hand off to the next one, around a circle.
Even so, the code is a good checkpoint; for now we'll limit the
number of gc procs to at most 2.
R=dvyukov
CC=golang-dev
https://golang.org/cl/4641082
2011-09-30 07:40:01 -06:00
|
|
|
// the stack changes during the system call (after
|
2011-08-29 08:36:06 -06:00
|
|
|
// CALL *runtime·_vdso(SB), the child is still using
|
|
|
|
// the parent's stack when executing its RET instruction).
|
2009-03-31 16:45:12 -06:00
|
|
|
INT $0x80
|
|
|
|
|
|
|
|
// In parent, return.
|
|
|
|
CMPL AX, $0
|
|
|
|
JEQ 2(PC)
|
|
|
|
RET
|
|
|
|
|
2009-06-17 16:15:55 -06:00
|
|
|
// Paranoia: check that SP is as we expect.
|
|
|
|
MOVL 12(SP), BP
|
|
|
|
CMPL BP, $1234
|
|
|
|
JEQ 2(PC)
|
|
|
|
INT $3
|
2009-03-31 16:45:12 -06:00
|
|
|
|
2009-06-17 16:15:55 -06:00
|
|
|
// Initialize AX to Linux tid
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $224, AX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
|
2009-06-17 16:15:55 -06:00
|
|
|
// In child on new stack. Reload registers (paranoia).
|
|
|
|
MOVL 0(SP), BX // m
|
|
|
|
MOVL 4(SP), DX // g
|
2010-07-17 17:54:03 -06:00
|
|
|
MOVL 8(SP), SI // fn
|
2009-06-17 16:15:55 -06:00
|
|
|
|
|
|
|
MOVL AX, m_procid(BX) // save tid as m->procid
|
|
|
|
|
|
|
|
// set up ldt 7+id to point at m->tls.
|
2010-07-17 17:54:03 -06:00
|
|
|
// newosproc left the id in tls[0].
|
2009-06-17 16:15:55 -06:00
|
|
|
LEAL m_tls(BX), BP
|
|
|
|
MOVL 0(BP), DI
|
|
|
|
ADDL $7, DI // m0 is LDT#7. count up.
|
|
|
|
// setldt(tls#, &tls, sizeof tls)
|
|
|
|
PUSHAL // save registers
|
|
|
|
PUSHL $32 // sizeof tls
|
|
|
|
PUSHL BP // &tls
|
|
|
|
PUSHL DI // tls #
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
CALL runtime·setldt(SB)
|
2009-06-17 16:15:55 -06:00
|
|
|
POPL AX
|
|
|
|
POPL AX
|
|
|
|
POPL AX
|
|
|
|
POPAL
|
|
|
|
|
|
|
|
// Now segment is established. Initialize m, g.
|
2010-01-06 18:58:55 -07:00
|
|
|
get_tls(AX)
|
|
|
|
MOVL DX, g(AX)
|
all: remove 'extern register M *m' from runtime
The runtime has historically held two dedicated values g (current goroutine)
and m (current thread) in 'extern register' slots (TLS on x86, real registers
backed by TLS on ARM).
This CL removes the extern register m; code now uses g->m.
On ARM, this frees up the register that formerly held m (R9).
This is important for NaCl, because NaCl ARM code cannot use R9 at all.
The Go 1 macrobenchmarks (those with per-op times >= 10 µs) are unaffected:
BenchmarkBinaryTree17 5491374955 5471024381 -0.37%
BenchmarkFannkuch11 4357101311 4275174828 -1.88%
BenchmarkGobDecode 11029957 11364184 +3.03%
BenchmarkGobEncode 6852205 6784822 -0.98%
BenchmarkGzip 650795967 650152275 -0.10%
BenchmarkGunzip 140962363 141041670 +0.06%
BenchmarkHTTPClientServer 71581 73081 +2.10%
BenchmarkJSONEncode 31928079 31913356 -0.05%
BenchmarkJSONDecode 117470065 113689916 -3.22%
BenchmarkMandelbrot200 6008923 5998712 -0.17%
BenchmarkGoParse 6310917 6327487 +0.26%
BenchmarkRegexpMatchMedium_1K 114568 114763 +0.17%
BenchmarkRegexpMatchHard_1K 168977 169244 +0.16%
BenchmarkRevcomp 935294971 914060918 -2.27%
BenchmarkTemplate 145917123 148186096 +1.55%
Minux previous reported larger variations, but these were caused by
run-to-run noise, not repeatable slowdowns.
Actual code changes by Minux.
I only did the docs and the benchmarking.
LGTM=dvyukov, iant, minux
R=minux, josharian, iant, dave, bradfitz, dvyukov
CC=golang-codereviews
https://golang.org/cl/109050043
2014-06-26 09:54:39 -06:00
|
|
|
MOVL BX, g_m(DX)
|
2009-06-17 16:15:55 -06:00
|
|
|
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
CALL runtime·stackcheck(SB) // smashes AX, CX
|
2009-06-17 16:15:55 -06:00
|
|
|
MOVL 0(DX), DX // paranoia; check they are not nil
|
|
|
|
MOVL 0(BX), BX
|
|
|
|
|
|
|
|
// more paranoia; check that stack splitting code works
|
|
|
|
PUSHAL
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
CALL runtime·emptyfunc(SB)
|
2009-06-17 16:15:55 -06:00
|
|
|
POPAL
|
|
|
|
|
2010-07-17 17:54:03 -06:00
|
|
|
CALL SI // fn()
|
runtime: ,s/[a-zA-Z0-9_]+/runtime·&/g, almost
Prefix all external symbols in runtime by runtime·,
to avoid conflicts with possible symbols of the same
name in linked-in C libraries. The obvious conflicts
are printf, malloc, and free, but hide everything to
avoid future pain.
The symbols left alone are:
** known to cgo **
_cgo_free
_cgo_malloc
libcgo_thread_start
initcgo
ncgocall
** known to linker **
_rt0_$GOARCH
_rt0_$GOARCH_$GOOS
text
etext
data
end
pclntab
epclntab
symtab
esymtab
** known to C compiler **
_divv
_modv
_div64by32
etc (arch specific)
Tested on darwin/386, darwin/amd64, linux/386, linux/amd64.
Built (but not tested) for freebsd/386, freebsd/amd64, linux/arm, windows/386.
R=r, PeterGo
CC=golang-dev
https://golang.org/cl/2899041
2010-11-04 12:00:19 -06:00
|
|
|
CALL runtime·exit1(SB)
|
2009-06-17 16:15:55 -06:00
|
|
|
MOVL $0x1234, 0x1005
|
|
|
|
RET
|
2009-03-31 16:45:12 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
|
2009-03-31 16:45:12 -06:00
|
|
|
MOVL $186, AX // sigaltstack
|
|
|
|
MOVL new+4(SP), BX
|
|
|
|
MOVL old+8(SP), CX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-03-31 16:45:12 -06:00
|
|
|
CMPL AX, $0xfffff001
|
|
|
|
JLS 2(PC)
|
|
|
|
INT $3
|
|
|
|
RET
|
|
|
|
|
|
|
|
// <asm-i386/ldt.h>
|
|
|
|
// struct user_desc {
|
2009-09-21 16:46:50 -06:00
|
|
|
// unsigned int entry_number;
|
|
|
|
// unsigned long base_addr;
|
|
|
|
// unsigned int limit;
|
|
|
|
// unsigned int seg_32bit:1;
|
|
|
|
// unsigned int contents:2;
|
|
|
|
// unsigned int read_exec_only:1;
|
|
|
|
// unsigned int limit_in_pages:1;
|
|
|
|
// unsigned int seg_not_present:1;
|
|
|
|
// unsigned int useable:1;
|
2009-03-31 16:45:12 -06:00
|
|
|
// };
|
|
|
|
#define SEG_32BIT 0x01
|
|
|
|
// contents are the 2 bits 0x02 and 0x04.
|
|
|
|
#define CONTENTS_DATA 0x00
|
|
|
|
#define CONTENTS_STACK 0x02
|
|
|
|
#define CONTENTS_CODE 0x04
|
|
|
|
#define READ_EXEC_ONLY 0x08
|
|
|
|
#define LIMIT_IN_PAGES 0x10
|
|
|
|
#define SEG_NOT_PRESENT 0x20
|
|
|
|
#define USEABLE 0x40
|
|
|
|
|
|
|
|
// setldt(int entry, int address, int limit)
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·setldt(SB),NOSPLIT,$32
|
2009-10-03 11:37:12 -06:00
|
|
|
MOVL entry+0(FP), BX // entry
|
|
|
|
MOVL address+4(FP), CX // base address
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When linking against the system libraries,
|
|
|
|
* we use its pthread_create and let it set up %gs
|
|
|
|
* for us. When we do that, the private storage
|
|
|
|
* we get is not at 0(GS), 4(GS), but -8(GS), -4(GS).
|
|
|
|
* To insulate the rest of the tool chain from this
|
liblink: introduce TLS register on 386 and amd64
When I did the original 386 ports on Linux and OS X, I chose to
define GS-relative expressions like 4(GS) as relative to the actual
thread-local storage base, which was usually GS but might not be
(it might be FS, or it might be a different constant offset from GS or FS).
The original scope was limited but since then the rewrites have
gotten out of control. Sometimes GS is rewritten, sometimes FS.
Some ports do other rewrites to enable shared libraries and
other linking. At no point in the code is it clear whether you are
looking at the real GS/FS or some synthesized thing that will be
rewritten. The code manipulating all these is duplicated in many
places.
The first step to fixing issue 7719 is to make the code intelligible
again.
This CL adds an explicit TLS pseudo-register to the 386 and amd64.
As a register, TLS refers to the thread-local storage base, and it
can only be loaded into another register:
MOVQ TLS, AX
An offset from the thread-local storage base is written off(reg)(TLS*1).
Semantically it is off(reg), but the (TLS*1) annotation marks this as
indexing from the loaded TLS base. This emits a relocation so that
if the linker needs to adjust the offset, it can. For example:
MOVQ TLS, AX
MOVQ 8(AX)(TLS*1), CX // load m into CX
On systems that support direct access to the TLS memory, this
pair of instructions can be reduced to a direct TLS memory reference:
MOVQ 8(TLS), CX // load m into CX
The 2-instruction and 1-instruction forms correspond roughly to
ELF TLS initial exec mode and ELF TLS local exec mode, respectively.
Liblink applies this rewrite on systems that support the 1-instruction form.
The decision is made using only the operating system (and probably
the -shared flag, eventually), not the link mode. If some link modes
on a particular operating system require the 2-instruction form,
then all builds for that operating system will use the 2-instruction
form, so that the link mode decision can be delayed to link time.
Obviously it is late to be making changes like this, but I despair
of correcting issue 7719 and issue 7164 without it. To make sure
I am not changing existing behavior, I built a "hello world" program
for every GOOS/GOARCH combination we have and then worked
to make sure that the rewrite generates exactly the same binaries,
byte for byte. There are a handful of TODOs in the code marking
kludges to get the byte-for-byte property, but at least now I can
explain exactly how each binary is handled.
The targets I tested this way are:
darwin-386
darwin-amd64
dragonfly-386
dragonfly-amd64
freebsd-386
freebsd-amd64
freebsd-arm
linux-386
linux-amd64
linux-arm
nacl-386
nacl-amd64p32
netbsd-386
netbsd-amd64
openbsd-386
openbsd-amd64
plan9-386
plan9-amd64
solaris-amd64
windows-386
windows-amd64
There were four exceptions to the byte-for-byte goal:
windows-386 and windows-amd64 have a time stamp
at bytes 137 and 138 of the header.
darwin-386 and plan9-386 have five or six modified
bytes in the middle of the Go symbol table, caused by
editing comments in runtime/sys_{darwin,plan9}_386.s.
Fixes #7164.
LGTM=iant
R=iant, aram, minux.ma, dave
CC=golang-codereviews
https://golang.org/cl/87920043
2014-04-15 11:45:39 -06:00
|
|
|
* ugliness, 8l rewrites 0(TLS) into -8(GS) for us.
|
2009-10-03 11:37:12 -06:00
|
|
|
* To accommodate that rewrite, we translate
|
|
|
|
* the address here and bump the limit to 0xffffffff (no limit)
|
|
|
|
* so that -8(GS) maps to 0(address).
|
2010-07-17 17:54:03 -06:00
|
|
|
* Also, the final 0(GS) (current 8(CX)) has to point
|
|
|
|
* to itself, to mimic ELF.
|
2009-10-03 11:37:12 -06:00
|
|
|
*/
|
|
|
|
ADDL $0x8, CX // address
|
2010-07-17 17:54:03 -06:00
|
|
|
MOVL CX, 0(CX)
|
2009-10-03 11:37:12 -06:00
|
|
|
|
2009-03-31 16:45:12 -06:00
|
|
|
// set up user_desc
|
|
|
|
LEAL 16(SP), AX // struct user_desc
|
|
|
|
MOVL BX, 0(AX)
|
2009-10-03 11:37:12 -06:00
|
|
|
MOVL CX, 4(AX)
|
|
|
|
MOVL $0xfffff, 8(AX)
|
|
|
|
MOVL $(SEG_32BIT|LIMIT_IN_PAGES|USEABLE|CONTENTS_DATA), 12(AX) // flag bits
|
2009-03-31 16:45:12 -06:00
|
|
|
|
|
|
|
// call modify_ldt
|
2009-09-21 16:46:50 -06:00
|
|
|
MOVL $1, BX // func = 1 (write)
|
|
|
|
MOVL AX, CX // user_desc
|
|
|
|
MOVL $16, DX // sizeof(user_desc)
|
|
|
|
MOVL $123, AX // syscall - modify_ldt
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
2009-09-21 16:46:50 -06:00
|
|
|
|
|
|
|
// breakpoint on error
|
|
|
|
CMPL AX, $0xfffff001
|
|
|
|
JLS 2(PC)
|
|
|
|
INT $3
|
2009-09-22 17:28:32 -06:00
|
|
|
|
|
|
|
// compute segment selector - (entry*8+7)
|
|
|
|
MOVL entry+0(FP), AX
|
|
|
|
SHLL $3, AX
|
|
|
|
ADDL $7, AX
|
|
|
|
MOVW AX, GS
|
|
|
|
|
2009-03-31 16:45:12 -06:00
|
|
|
RET
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·osyield(SB),NOSPLIT,$0
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
MOVL $158, AX
|
2011-08-29 08:36:06 -06:00
|
|
|
CALL *runtime·_vdso(SB)
|
runtime: improve Linux mutex
The implementation is hybrid active/passive spin/blocking mutex.
The design minimizes amount of context switches and futex calls.
The idea is that all critical sections in runtime are intentially
small, so pure blocking mutex behaves badly causing
a lot of context switches, thread parking/unparking and kernel calls.
Note that some synthetic benchmarks become somewhat slower,
that's due to increased contention on other data structures,
it should not affect programs that do any real work.
On 2 x Intel E5620, 8 HT cores, 2.4GHz
benchmark old ns/op new ns/op delta
BenchmarkSelectContended 521.00 503.00 -3.45%
BenchmarkSelectContended-2 661.00 320.00 -51.59%
BenchmarkSelectContended-4 1139.00 629.00 -44.78%
BenchmarkSelectContended-8 2870.00 878.00 -69.41%
BenchmarkSelectContended-16 5276.00 818.00 -84.50%
BenchmarkChanContended 112.00 103.00 -8.04%
BenchmarkChanContended-2 631.00 174.00 -72.42%
BenchmarkChanContended-4 682.00 272.00 -60.12%
BenchmarkChanContended-8 1601.00 520.00 -67.52%
BenchmarkChanContended-16 3100.00 372.00 -88.00%
BenchmarkChanSync 253.00 239.00 -5.53%
BenchmarkChanSync-2 5030.00 4648.00 -7.59%
BenchmarkChanSync-4 4826.00 4694.00 -2.74%
BenchmarkChanSync-8 4778.00 4713.00 -1.36%
BenchmarkChanSync-16 5289.00 4710.00 -10.95%
BenchmarkChanProdCons0 273.00 254.00 -6.96%
BenchmarkChanProdCons0-2 599.00 400.00 -33.22%
BenchmarkChanProdCons0-4 1168.00 659.00 -43.58%
BenchmarkChanProdCons0-8 2831.00 1057.00 -62.66%
BenchmarkChanProdCons0-16 4197.00 1037.00 -75.29%
BenchmarkChanProdCons10 150.00 140.00 -6.67%
BenchmarkChanProdCons10-2 607.00 268.00 -55.85%
BenchmarkChanProdCons10-4 1137.00 404.00 -64.47%
BenchmarkChanProdCons10-8 2115.00 828.00 -60.85%
BenchmarkChanProdCons10-16 4283.00 855.00 -80.04%
BenchmarkChanProdCons100 117.00 110.00 -5.98%
BenchmarkChanProdCons100-2 558.00 218.00 -60.93%
BenchmarkChanProdCons100-4 722.00 287.00 -60.25%
BenchmarkChanProdCons100-8 1840.00 431.00 -76.58%
BenchmarkChanProdCons100-16 3394.00 448.00 -86.80%
BenchmarkChanProdConsWork0 2014.00 1996.00 -0.89%
BenchmarkChanProdConsWork0-2 1207.00 1127.00 -6.63%
BenchmarkChanProdConsWork0-4 1913.00 611.00 -68.06%
BenchmarkChanProdConsWork0-8 3016.00 949.00 -68.53%
BenchmarkChanProdConsWork0-16 4320.00 1154.00 -73.29%
BenchmarkChanProdConsWork10 1906.00 1897.00 -0.47%
BenchmarkChanProdConsWork10-2 1123.00 1033.00 -8.01%
BenchmarkChanProdConsWork10-4 1076.00 571.00 -46.93%
BenchmarkChanProdConsWork10-8 2748.00 1096.00 -60.12%
BenchmarkChanProdConsWork10-16 4600.00 1105.00 -75.98%
BenchmarkChanProdConsWork100 1884.00 1852.00 -1.70%
BenchmarkChanProdConsWork100-2 1235.00 1146.00 -7.21%
BenchmarkChanProdConsWork100-4 1217.00 619.00 -49.14%
BenchmarkChanProdConsWork100-8 1534.00 509.00 -66.82%
BenchmarkChanProdConsWork100-16 4126.00 918.00 -77.75%
BenchmarkSyscall 34.40 33.30 -3.20%
BenchmarkSyscall-2 160.00 121.00 -24.38%
BenchmarkSyscall-4 131.00 136.00 +3.82%
BenchmarkSyscall-8 139.00 131.00 -5.76%
BenchmarkSyscall-16 161.00 168.00 +4.35%
BenchmarkSyscallWork 950.00 950.00 +0.00%
BenchmarkSyscallWork-2 481.00 480.00 -0.21%
BenchmarkSyscallWork-4 268.00 270.00 +0.75%
BenchmarkSyscallWork-8 156.00 169.00 +8.33%
BenchmarkSyscallWork-16 188.00 184.00 -2.13%
BenchmarkSemaSyntNonblock 36.40 35.60 -2.20%
BenchmarkSemaSyntNonblock-2 81.40 45.10 -44.59%
BenchmarkSemaSyntNonblock-4 126.00 108.00 -14.29%
BenchmarkSemaSyntNonblock-8 112.00 112.00 +0.00%
BenchmarkSemaSyntNonblock-16 110.00 112.00 +1.82%
BenchmarkSemaSyntBlock 35.30 35.30 +0.00%
BenchmarkSemaSyntBlock-2 118.00 124.00 +5.08%
BenchmarkSemaSyntBlock-4 105.00 108.00 +2.86%
BenchmarkSemaSyntBlock-8 101.00 111.00 +9.90%
BenchmarkSemaSyntBlock-16 112.00 118.00 +5.36%
BenchmarkSemaWorkNonblock 810.00 811.00 +0.12%
BenchmarkSemaWorkNonblock-2 476.00 414.00 -13.03%
BenchmarkSemaWorkNonblock-4 238.00 228.00 -4.20%
BenchmarkSemaWorkNonblock-8 140.00 126.00 -10.00%
BenchmarkSemaWorkNonblock-16 117.00 116.00 -0.85%
BenchmarkSemaWorkBlock 810.00 811.00 +0.12%
BenchmarkSemaWorkBlock-2 454.00 466.00 +2.64%
BenchmarkSemaWorkBlock-4 243.00 241.00 -0.82%
BenchmarkSemaWorkBlock-8 145.00 137.00 -5.52%
BenchmarkSemaWorkBlock-16 132.00 123.00 -6.82%
BenchmarkContendedSemaphore 123.00 102.00 -17.07%
BenchmarkContendedSemaphore-2 34.80 34.90 +0.29%
BenchmarkContendedSemaphore-4 34.70 34.80 +0.29%
BenchmarkContendedSemaphore-8 34.70 34.70 +0.00%
BenchmarkContendedSemaphore-16 34.80 34.70 -0.29%
BenchmarkMutex 26.80 26.00 -2.99%
BenchmarkMutex-2 108.00 45.20 -58.15%
BenchmarkMutex-4 103.00 127.00 +23.30%
BenchmarkMutex-8 109.00 147.00 +34.86%
BenchmarkMutex-16 102.00 152.00 +49.02%
BenchmarkMutexSlack 27.00 26.90 -0.37%
BenchmarkMutexSlack-2 149.00 165.00 +10.74%
BenchmarkMutexSlack-4 121.00 209.00 +72.73%
BenchmarkMutexSlack-8 101.00 158.00 +56.44%
BenchmarkMutexSlack-16 97.00 129.00 +32.99%
BenchmarkMutexWork 792.00 794.00 +0.25%
BenchmarkMutexWork-2 407.00 409.00 +0.49%
BenchmarkMutexWork-4 220.00 209.00 -5.00%
BenchmarkMutexWork-8 267.00 160.00 -40.07%
BenchmarkMutexWork-16 315.00 300.00 -4.76%
BenchmarkMutexWorkSlack 792.00 793.00 +0.13%
BenchmarkMutexWorkSlack-2 406.00 404.00 -0.49%
BenchmarkMutexWorkSlack-4 225.00 212.00 -5.78%
BenchmarkMutexWorkSlack-8 268.00 136.00 -49.25%
BenchmarkMutexWorkSlack-16 300.00 300.00 +0.00%
BenchmarkRWMutexWrite100 27.10 27.00 -0.37%
BenchmarkRWMutexWrite100-2 33.10 40.80 +23.26%
BenchmarkRWMutexWrite100-4 113.00 88.10 -22.04%
BenchmarkRWMutexWrite100-8 119.00 95.30 -19.92%
BenchmarkRWMutexWrite100-16 148.00 109.00 -26.35%
BenchmarkRWMutexWrite10 29.60 29.40 -0.68%
BenchmarkRWMutexWrite10-2 111.00 61.40 -44.68%
BenchmarkRWMutexWrite10-4 270.00 208.00 -22.96%
BenchmarkRWMutexWrite10-8 204.00 185.00 -9.31%
BenchmarkRWMutexWrite10-16 261.00 190.00 -27.20%
BenchmarkRWMutexWorkWrite100 1040.00 1036.00 -0.38%
BenchmarkRWMutexWorkWrite100-2 593.00 580.00 -2.19%
BenchmarkRWMutexWorkWrite100-4 470.00 365.00 -22.34%
BenchmarkRWMutexWorkWrite100-8 468.00 289.00 -38.25%
BenchmarkRWMutexWorkWrite100-16 604.00 374.00 -38.08%
BenchmarkRWMutexWorkWrite10 951.00 951.00 +0.00%
BenchmarkRWMutexWorkWrite10-2 1001.00 928.00 -7.29%
BenchmarkRWMutexWorkWrite10-4 1555.00 1006.00 -35.31%
BenchmarkRWMutexWorkWrite10-8 2085.00 1171.00 -43.84%
BenchmarkRWMutexWorkWrite10-16 2082.00 1614.00 -22.48%
R=rsc, iant, msolo, fw, iant
CC=golang-dev
https://golang.org/cl/4711045
2011-07-29 10:44:06 -06:00
|
|
|
RET
|
2012-08-09 20:05:26 -06:00
|
|
|
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
|
2012-08-09 20:05:26 -06:00
|
|
|
MOVL $242, AX // syscall - sched_getaffinity
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|
2013-03-14 09:06:35 -06:00
|
|
|
|
|
|
|
// int32 runtime·epollcreate(int32 size);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollcreate(SB),NOSPLIT,$0
|
2013-03-14 09:06:35 -06:00
|
|
|
MOVL $254, AX
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|
|
|
|
|
|
|
|
// int32 runtime·epollcreate1(int32 flags);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollcreate1(SB),NOSPLIT,$0
|
2013-03-14 09:06:35 -06:00
|
|
|
MOVL $329, AX
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|
|
|
|
|
|
|
|
// int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollctl(SB),NOSPLIT,$0
|
2013-03-14 09:06:35 -06:00
|
|
|
MOVL $255, AX
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
MOVL 16(SP), SI
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|
|
|
|
|
|
|
|
// int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·epollwait(SB),NOSPLIT,$0
|
2013-03-14 09:06:35 -06:00
|
|
|
MOVL $256, AX
|
|
|
|
MOVL 4(SP), BX
|
|
|
|
MOVL 8(SP), CX
|
|
|
|
MOVL 12(SP), DX
|
|
|
|
MOVL 16(SP), SI
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|
|
|
|
|
|
|
|
// void runtime·closeonexec(int32 fd);
|
2013-08-07 13:20:05 -06:00
|
|
|
TEXT runtime·closeonexec(SB),NOSPLIT,$0
|
2013-03-14 09:06:35 -06:00
|
|
|
MOVL $55, AX // fcntl
|
|
|
|
MOVL 4(SP), BX // fd
|
|
|
|
MOVL $2, CX // F_SETFD
|
|
|
|
MOVL $1, DX // FD_CLOEXEC
|
|
|
|
CALL *runtime·_vdso(SB)
|
|
|
|
RET
|