go/test/bench/timing.log

All tests on r45 or r70

Aug 3 2009

First version of fasta. Translation of fasta.c, fetched from
	http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4

fasta -n 25000000
	gcc -O2 fasta.c	5.98u 0.00s 6.01r
	gccgo -O2 fasta.go	8.82u 0.02s 8.85r
	6g fasta.go	13.50u 0.02s 13.53r
	6g -B fata.go	12.99u 0.02s 13.02r

Aug 4 2009
[added timing.sh]

# myrandom:
#   hand-written optimization of integer division
#   use int32->float conversion
fasta -n 25000000
	# probably I/O library inefficiencies
	gcc -O2 fasta.c	5.99u 0.00s 6.00r 
	gccgo -O2 fasta.go	8.82u 0.02s 8.85r
	gc fasta	10.70u 0.00s 10.77r
	gc_B fasta	10.09u 0.03s 10.12r

reverse-complement < output-of-fasta-25000000
	# we don't know - memory cache behavior?
	gcc -O2 reverse-complement.c	2.04u 0.94s 10.54r
	gccgo -O2 reverse-complement.go	6.54u 0.63s 7.17r
	gc reverse-complement	6.55u 0.70s 7.26r
	gc_B reverse-complement	6.32u 0.70s 7.10r

nbody 50000000
	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
	gcc -O2 nbody.c	21.61u 0.01s 24.80r
	gccgo -O2 nbody.go	118.55u 0.02s 120.32r
	gc nbody	100.84u 0.00s 100.85r
	gc_B nbody	103.33u 0.00s 103.39r
[
hacked Sqrt in assembler
	gc nbody	31.97u 0.00s 32.01r
]

binary-tree 15 # too slow to use 20
	# memory allocation and garbage collection
	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
	gccgo -O2 binary-tree.go	1.69u 0.46s 2.15r
	gccgo -O2 binary-tree-freelist.go	8.48u 0.00s 8.48r
	gc binary-tree	9.60u 0.01s 9.62r
	gc binary-tree-freelist	0.48u 0.01s 0.50r

August 5, 2009

fannkuch 12
	# bounds checking is half the difference
	# rest might be registerization
	gcc -O2 fannkuch.c	60.09u 0.01s 60.32r
	gccgo -O2 fannkuch.go	64.89u 0.00s 64.92r
	gc fannkuch	124.59u 0.00s 124.67r
	gc_B fannkuch	91.14u 0.00s 91.16r

regex-dna 100000
	# regexp code is slow on trivial regexp
	gcc -O2 regex-dna.c -lpcre	0.92u 0.00s 0.99r
	gc regexp-dna	26.94u 0.18s 28.75r
	gc_B regexp-dna	26.51u 0.09s 26.75r

spectral-norm 5500
	gcc -O2 spectral-norm.c -lm	11.54u 0.00s 11.55r
	gccgo -O2 spectral-norm.go	12.20u 0.00s 12.23r
	gc spectral-norm	50.23u 0.00s 50.36r
	gc_B spectral-norm	49.69u 0.01s 49.83r
	gc spectral-norm-parallel	24.47u 0.03s 11.05r  # has shift >>1 not div /2
	[using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]

August 6, 2009

k-nucleotide 5000000
	# string maps are slower than glib string maps
	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	k-nucleotide.c: 10.72u 0.01s 10.74r
	gccgo -O2 k-nucleotide.go	21.64u 0.83s 22.78r
	gc k-nucleotide	16.08u 0.06s 16.50r
	gc_B k-nucleotide	17.32u 0.02s 17.37r

mandelbrot 5500
	# floating point code generator should use more registers
	gcc -O2 mandelbrot.c	56.13u 0.02s 56.17r
	gccgo -O2 mandelbrot.go	57.49u 0.01s 57.51r
	gc mandelbrot	74.32u 0.00s 74.35r
	gc_B mandelbrot	74.28u 0.01s 74.31r

meteor 16000
	# we don't know
	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.14r
	gc meteor-contest	0.24u 0.00s 0.26r
	gc_B meteor-contest	0.23u 0.00s 0.24r

pidigits 10000
	# bignum is slower than gmp
	gcc -O2 pidigits.c -lgmp	2.60u 0.00s 2.62r
	gc pidigits	77.69u 0.14s 78.18r
	gc_B pidigits	74.26u 0.18s 75.41r
	gc_B pidigits	68.48u 0.20s 69.31r   # special case: no bounds checking in bignum

August 7 2009

# New gc does better division by powers of 2.  Significant improvements:

spectral-norm 5500
	# floating point code generator should use more registers; possibly inline evalA
	gcc -O2 spectral-norm.c -lm	11.50u 0.00s 11.50r
	gccgo -O2 spectral-norm.go	12.02u 0.00s 12.02r
	gc spectral-norm	23.98u 0.00s 24.00r	# new time is 0.48 times old time, 52% faster
	gc_B spectral-norm	23.71u 0.01s 23.72r	# ditto
	gc spectral-norm-parallel	24.04u 0.00s 6.26r  # /2 put back.  note: 4x faster (on r70, idle)

k-nucleotide 1000000
	# string maps are slower than glib string maps
	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.82u 0.04s 10.87r
	gccgo -O2 k-nucleotide.go	22.73u 0.89s 23.63r
	gc k-nucleotide	15.97u 0.03s 16.04r
	gc_B k-nucleotide	15.86u 0.06s 15.93r	# 8.5% faster, but probably due to weird cache effeccts in previous version

pidigits 10000
	# bignum is slower than gmp
	gcc -O2 pidigits.c -lgmp	2.58u 0.00s 2.58r
	gc pidigits	71.24u 0.04s 71.28r	# 8.5% faster
	gc_B pidigits	71.25u 0.03s 71.29r	# 4% faster

threadring 50000000
	gcc -O2 threadring.c -lpthread	35.51u 160.21s 199.50r
	gccgo -O2 threadring.go	90.33u 459.95s 448.03r
	gc threadring	33.11u 0.00s 33.14r
	GOMAXPROCS=4 gc threadring	114.48u 226.65s 371.59r
	# change wait code to do <-make(chan int) instead of time.Sleep
	gc threadring	28.41u 0.01s 29.35r
	GOMAXPROCS=4 gc threadring	112.59u 232.83s 384.72r
	
chameneos 6000000
	gcc -O2 chameneosredux.c -lpthread	18.14u 276.52s 76.93r
	gc chameneosredux	20.19u 0.01s 20.23r

Aug 10 2009

# new 6g with better fp registers, fast div and mod of integers
# complete set of timings listed. significant changes marked ***

fasta -n 25000000
	# probably I/O library inefficiencies
	gcc -O2 fasta.c	5.96u 0.00s 5.97r
	gc fasta	10.59u 0.01s 10.61r
	gc_B fasta	9.92u 0.02s 9.95r

reverse-complement < output-of-fasta-25000000
	# we don't know - memory cache behavior?
	gcc -O2 reverse-complement.c	1.96u 1.56s 16.23r
	gccgo -O2 reverse-complement.go	6.41u 0.62s 7.05r
	gc reverse-complement	6.46u 0.70s 7.17r
	gc_B reverse-complement	6.22u 0.72s 6.95r

nbody 50000000
	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
	gcc -O2 nbody.c	21.26u 0.01s 21.28r
	gccgo -O2 nbody.go	116.68u 0.07s 116.80r
	gc nbody	86.64u 0.01s 86.68r	# -14%
	gc_B nbody	85.72u 0.02s 85.77r	# *** -17%

binary-tree 15 # too slow to use 20
	# memory allocation and garbage collection
	gcc -O2 binary-tree.c -lm	0.87u 0.00s 0.87r
	gccgo -O2 binary-tree.go	1.61u 0.47s 2.09r
	gccgo -O2 binary-tree-freelist.go	0.00u 0.00s 0.01r
	gc binary-tree	9.11u 0.01s 9.13r	# *** -5%
	gc binary-tree-freelist	0.47u 0.01s 0.48r

fannkuch 12
	# bounds checking is half the difference
	# rest might be registerization
	gcc -O2 fannkuch.c	59.92u 0.00s 59.94r
	gccgo -O2 fannkuch.go	65.54u 0.00s 65.58r
	gc fannkuch	123.98u 0.01s 124.04r
	gc_B fannkuch	90.75u 0.00s 90.78r

regex-dna 100000
	# regexp code is slow on trivial regexp
	gcc -O2 regex-dna.c -lpcre	0.91u 0.00s 0.92r
	gc regex-dna	27.25u 0.02s 27.28r
	gc_B regex-dna	29.51u 0.03s 29.55r

spectral-norm 5500
	# possibly inline evalA
	gcc -O2 spectral-norm.c -lm	11.57u 0.00s 11.57r
	gccgo -O2 spectral-norm.go	12.07u 0.01s 12.08r
	gc spectral-norm	23.99u 0.00s 24.00r
	gc_B spectral-norm	23.73u 0.00s 23.75r

k-nucleotide 1000000
	# string maps are slower than glib string maps
	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.63u 0.02s 10.69r
	gccgo -O2 k-nucleotide.go	23.19u 0.91s 24.12r
	gc k-nucleotide	16.73u 0.04s 16.78r	# *** +5% (but this one seems to vary by more than that)
	gc_B k-nucleotide	16.46u 0.04s 16.51r	# *** +5%

mandelbrot 16000
	gcc -O2 mandelbrot.c	56.16u 0.00s 56.16r
	gccgo -O2 mandelbrot.go	57.41u 0.01s 57.42r
	gc mandelbrot	64.05u 0.02s 64.08r	# *** -14%
	gc_B mandelbrot	64.10u 0.02s 64.14r	# *** -14%

meteor 16000
	# we don't know
	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.12r
	gc meteor-contest	0.18u 0.00s 0.20r	# *** -25%
	gc_B meteor-contest	0.17u 0.00s 0.18r	# *** -24%

pidigits 10000
	# bignum is slower than gmp
	gcc -O2 pidigits.c -lgmp	2.57u 0.00s 2.57r
	gc pidigits	71.82u 0.04s 71.89r
	gc_B pidigits	71.84u 0.08s 71.98r

threadring 50000000
	gcc -O2 threadring.c -lpthread	30.91u 164.33s 204.57r
	gccgo -O2 threadring.go	87.12u 460.04s 447.61r
	gc threadring	38.55u 0.00s 38.56r	# *** +16%

chameneos 6000000
	gcc -O2 chameneosredux.c -lpthread	17.93u 323.65s 88.47r
	gc chameneosredux	21.72u 0.00s 21.73r

August 10 2009

# In-place versions for some bignum operations.
pidigits 10000
	gcc -O2 pidigits.c -lgmp	2.56u 0.00s 2.57r
	gc pidigits	55.22u 0.04s 55.29r	# *** -23%
	gc_B pidigits	55.49u 0.02s 55.60r	# *** -23%
mandelbrot R=rsc DELTA=147 (145 added, 0 deleted, 2 changed) OCL=32840 CL=32845 2009-08-06 15:21:21 -06:00			`All tests on r45 or r70`
clean up the code a bit start a log of progress R=rsc DELTA=222 (185 added, 17 deleted, 20 changed) OCL=32701 CL=32718 2009-08-04 11:13:58 -06:00
			`Aug 3 2009`

			`First version of fasta. Translation of fasta.c, fetched from`
			`http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4`

			`fasta -n 25000000`
improve myrandom() in fasta. add script to automate timing R=rsc DELTA=78 (68 added, 0 deleted, 10 changed) OCL=32729 CL=32732 2009-08-04 15:07:17 -06:00			`gcc -O2 fasta.c 5.98u 0.00s 6.01r`
			`gccgo -O2 fasta.go 8.82u 0.02s 8.85r`
			`6g fasta.go 13.50u 0.02s 13.53r`
			`6g -B fata.go 12.99u 0.02s 13.02r`

			`Aug 4 2009`
			`[added timing.sh]`

			`# myrandom:`
			`# hand-written optimization of integer division`
			`# use int32->float conversion`
			`fasta -n 25000000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# probably I/O library inefficiencies`
improve myrandom() in fasta. add script to automate timing R=rsc DELTA=78 (68 added, 0 deleted, 10 changed) OCL=32729 CL=32732 2009-08-04 15:07:17 -06:00			`gcc -O2 fasta.c 5.99u 0.00s 6.00r`
			`gccgo -O2 fasta.go 8.82u 0.02s 8.85r`
			`gc fasta 10.70u 0.00s 10.77r`
			`gc_B fasta 10.09u 0.03s 10.12r`
clean up the code a bit start a log of progress R=rsc DELTA=222 (185 added, 17 deleted, 20 changed) OCL=32701 CL=32718 2009-08-04 11:13:58 -06:00
another benchmark: reverse-complement R=rsc DELTA=682 (511 added, 171 deleted, 0 changed) OCL=32725 CL=32727 2009-08-04 14:04:37 -06:00			`reverse-complement < output-of-fasta-25000000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# we don't know - memory cache behavior?`
improve myrandom() in fasta. add script to automate timing R=rsc DELTA=78 (68 added, 0 deleted, 10 changed) OCL=32729 CL=32732 2009-08-04 15:07:17 -06:00			`gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r`
			`gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r`
			`gc reverse-complement 6.55u 0.70s 7.26r`
			`gc_B reverse-complement 6.32u 0.70s 7.10r`

nbody benchmark timing.sh improvements R=rsc DELTA=334 (319 added, 0 deleted, 15 changed) OCL=32734 CL=32736 2009-08-04 15:27:46 -06:00			`nbody 50000000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# math.Sqrt needs to be in assembly; inlining is probably the other 50%`
nbody benchmark timing.sh improvements R=rsc DELTA=334 (319 added, 0 deleted, 15 changed) OCL=32734 CL=32736 2009-08-04 15:27:46 -06:00			`gcc -O2 nbody.c 21.61u 0.01s 24.80r`
			`gccgo -O2 nbody.go 118.55u 0.02s 120.32r`
			`gc nbody 100.84u 0.00s 100.85r`
			`gc_B nbody 103.33u 0.00s 103.39r`
add info about Sqrt instruction couple of fixes to timing.sh R=rsc DELTA=10 (5 added, 0 deleted, 5 changed) OCL=32742 CL=32756 2009-08-04 18:31:19 -06:00			`[`
			`hacked Sqrt in assembler`
			`gc nbody 31.97u 0.00s 32.01r`
			`]`
binary tree R=rsc DELTA=324 (323 added, 0 deleted, 1 changed) OCL=32759 CL=32768 2009-08-04 20:38:08 -06:00
			`binary-tree 15 # too slow to use 20`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# memory allocation and garbage collection`
binary tree R=rsc DELTA=324 (323 added, 0 deleted, 1 changed) OCL=32759 CL=32768 2009-08-04 20:38:08 -06:00			`gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r`
			`gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r`
			`gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r`
			`gc binary-tree 9.60u 0.01s 9.62r`
			`gc binary-tree-freelist 0.48u 0.01s 0.50r`
fannkuch R=rsc DELTA=240 (239 added, 0 deleted, 1 changed) OCL=32783 CL=32785 2009-08-05 12:33:59 -06:00
			`August 5, 2009`

			`fannkuch 12`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# bounds checking is half the difference`
			`# rest might be registerization`
fannkuch R=rsc DELTA=240 (239 added, 0 deleted, 1 changed) OCL=32783 CL=32785 2009-08-05 12:33:59 -06:00			`gcc -O2 fannkuch.c 60.09u 0.01s 60.32r`
			`gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r`
			`gc fannkuch 124.59u 0.00s 124.67r`
			`gc_B fannkuch 91.14u 0.00s 91.16r`
regex-dna R=rsc DELTA=243 (242 added, 0 deleted, 1 changed) OCL=32786 CL=32791 2009-08-05 14:03:46 -06:00
			`regex-dna 100000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# regexp code is slow on trivial regexp`
regex-dna R=rsc DELTA=243 (242 added, 0 deleted, 1 changed) OCL=32786 CL=32791 2009-08-05 14:03:46 -06:00			`gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r`
			`gc regexp-dna 26.94u 0.18s 28.75r`
			`gc_B regexp-dna 26.51u 0.09s 26.75r`
spectral-norm make regexp-dna use bytes not strings (no significant timing change) R=rsc DELTA=149 (138 added, 1 deleted, 10 changed) OCL=32804 CL=32807 2009-08-05 18:25:38 -06:00
			`spectral-norm 5500`
			`gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55r`
			`gccgo -O2 spectral-norm.go 12.20u 0.00s 12.23r`
			`gc spectral-norm 50.23u 0.00s 50.36r`
			`gc_B spectral-norm 49.69u 0.01s 49.83r`
parallel spectral-norm R=rsc DELTA=85 (84 added, 1 deleted, 0 changed) OCL=32810 CL=32810 2009-08-05 19:44:49 -06:00			`gc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2`
spectral-norm make regexp-dna use bytes not strings (no significant timing change) R=rsc DELTA=149 (138 added, 1 deleted, 10 changed) OCL=32804 CL=32807 2009-08-05 18:25:38 -06:00			`[using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]`
k-nucleotide R=rsc DELTA=367 (366 added, 0 deleted, 1 changed) OCL=32832 CL=32836 2009-08-06 14:00:26 -06:00
			`August 6, 2009`

			`k-nucleotide 5000000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# string maps are slower than glib string maps`
k-nucleotide R=rsc DELTA=367 (366 added, 0 deleted, 1 changed) OCL=32832 CL=32836 2009-08-06 14:00:26 -06:00			`gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r`
timings for pidigits TBR=rsc OCL=32857 CL=32857 2009-08-06 19:27:15 -06:00			`gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r`
			`gc k-nucleotide 16.08u 0.06s 16.50r`
			`gc_B k-nucleotide 17.32u 0.02s 17.37r`
mandelbrot R=rsc DELTA=147 (145 added, 0 deleted, 2 changed) OCL=32840 CL=32845 2009-08-06 15:21:21 -06:00
			`mandelbrot 5500`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# floating point code generator should use more registers`
mandelbrot R=rsc DELTA=147 (145 added, 0 deleted, 2 changed) OCL=32840 CL=32845 2009-08-06 15:21:21 -06:00			`gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r`
			`gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r`
			`gc mandelbrot 74.32u 0.00s 74.35r`
			`gc_B mandelbrot 74.28u 0.01s 74.31r`
meteor-contest R=rsc DELTA=1276 (1275 added, 0 deleted, 1 changed) OCL=32851 CL=32854 2009-08-06 19:04:48 -06:00
			`meteor 16000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# we don't know`
meteor-contest R=rsc DELTA=1276 (1275 added, 0 deleted, 1 changed) OCL=32851 CL=32854 2009-08-06 19:04:48 -06:00			`gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r`
			`gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r`
			`gc meteor-contest 0.24u 0.00s 0.26r`
			`gc_B meteor-contest 0.23u 0.00s 0.24r`
timings for pidigits TBR=rsc OCL=32857 CL=32857 2009-08-06 19:27:15 -06:00
			`pidigits 10000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# bignum is slower than gmp`
timings for pidigits TBR=rsc OCL=32857 CL=32857 2009-08-06 19:27:15 -06:00			`gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r`
			`gc pidigits 77.69u 0.14s 78.18r`
			`gc_B pidigits 74.26u 0.18s 75.41r`
			`gc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignum`
timing updates after 6g change for efficient division by powers of two R=rsc DELTA=25 (23 added, 0 deleted, 2 changed) OCL=32873 CL=32875 2009-08-07 11:39:45 -06:00
			`August 7 2009`

			`# New gc does better division by powers of 2. Significant improvements:`

			`spectral-norm 5500`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# floating point code generator should use more registers; possibly inline evalA`
timing updates after 6g change for efficient division by powers of two R=rsc DELTA=25 (23 added, 0 deleted, 2 changed) OCL=32873 CL=32875 2009-08-07 11:39:45 -06:00			`gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r`
			`gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r`
			`gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster`
			`gc_B spectral-norm 23.71u 0.01s 23.72r # ditto`
			`gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle)`

			`k-nucleotide 1000000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# string maps are slower than glib string maps`
timing updates after 6g change for efficient division by powers of two R=rsc DELTA=25 (23 added, 0 deleted, 2 changed) OCL=32873 CL=32875 2009-08-07 11:39:45 -06:00			`gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r`
			`gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r`
			`gc k-nucleotide 15.97u 0.03s 16.04r`
			`gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version`

			`pidigits 10000`
annotations TBR=r OCL=32896 CL=32896 2009-08-07 15:18:30 -06:00			`# bignum is slower than gmp`
timing updates after 6g change for efficient division by powers of two R=rsc DELTA=25 (23 added, 0 deleted, 2 changed) OCL=32873 CL=32875 2009-08-07 11:39:45 -06:00			`gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r`
			`gc pidigits 71.24u 0.04s 71.28r # 8.5% faster`
			`gc_B pidigits 71.25u 0.03s 71.29r # 4% faster`

threadring more interesting than most R=rsc DELTA=132 (131 added, 0 deleted, 1 changed) OCL=32876 CL=32881 2009-08-07 13:53:51 -06:00			`threadring 50000000`
			`gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50r`
			`gccgo -O2 threadring.go 90.33u 459.95s 448.03r`
			`gc threadring 33.11u 0.00s 33.14r`
			`GOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r`
simplify threadring slightly; slight speed improvement R=rsc DELTA=7 (3 added, 3 deleted, 1 changed) OCL=32885 CL=32889 2009-08-07 14:30:20 -06:00			`# change wait code to do <-make(chan int) instead of time.Sleep`
			`gc threadring 28.41u 0.01s 29.35r`
			`GOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72r`
chameneos R=rsc DELTA=514 (513 added, 0 deleted, 1 changed) OCL=32898 CL=32910 2009-08-07 16:28:46 -06:00
			`chameneos 6000000`
			`gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93r`
			`gc chameneosredux 20.19u 0.01s 20.23r`
new timing data after compiler updates (fp regs, integer div and mod) R=rsc DELTA=90 (90 added, 0 deleted, 0 changed) OCL=32993 CL=33001 2009-08-10 18:04:59 -06:00
			`Aug 10 2009`

			`# new 6g with better fp registers, fast div and mod of integers`
			`# complete set of timings listed. significant changes marked ***`

			`fasta -n 25000000`
			`# probably I/O library inefficiencies`
			`gcc -O2 fasta.c 5.96u 0.00s 5.97r`
			`gc fasta 10.59u 0.01s 10.61r`
			`gc_B fasta 9.92u 0.02s 9.95r`

			`reverse-complement < output-of-fasta-25000000`
			`# we don't know - memory cache behavior?`
			`gcc -O2 reverse-complement.c 1.96u 1.56s 16.23r`
			`gccgo -O2 reverse-complement.go 6.41u 0.62s 7.05r`
			`gc reverse-complement 6.46u 0.70s 7.17r`
			`gc_B reverse-complement 6.22u 0.72s 6.95r`

			`nbody 50000000`
			`# math.Sqrt needs to be in assembly; inlining is probably the other 50%`
			`gcc -O2 nbody.c 21.26u 0.01s 21.28r`
			`gccgo -O2 nbody.go 116.68u 0.07s 116.80r`
			`gc nbody 86.64u 0.01s 86.68r # -14%`
			`gc_B nbody 85.72u 0.02s 85.77r # *** -17%`

			`binary-tree 15 # too slow to use 20`
			`# memory allocation and garbage collection`
			`gcc -O2 binary-tree.c -lm 0.87u 0.00s 0.87r`
			`gccgo -O2 binary-tree.go 1.61u 0.47s 2.09r`
			`gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.01r`
			`gc binary-tree 9.11u 0.01s 9.13r # *** -5%`
			`gc binary-tree-freelist 0.47u 0.01s 0.48r`

			`fannkuch 12`
			`# bounds checking is half the difference`
			`# rest might be registerization`
			`gcc -O2 fannkuch.c 59.92u 0.00s 59.94r`
			`gccgo -O2 fannkuch.go 65.54u 0.00s 65.58r`
			`gc fannkuch 123.98u 0.01s 124.04r`
			`gc_B fannkuch 90.75u 0.00s 90.78r`

			`regex-dna 100000`
			`# regexp code is slow on trivial regexp`
			`gcc -O2 regex-dna.c -lpcre 0.91u 0.00s 0.92r`
			`gc regex-dna 27.25u 0.02s 27.28r`
			`gc_B regex-dna 29.51u 0.03s 29.55r`

			`spectral-norm 5500`
			`# possibly inline evalA`
			`gcc -O2 spectral-norm.c -lm 11.57u 0.00s 11.57r`
			`gccgo -O2 spectral-norm.go 12.07u 0.01s 12.08r`
			`gc spectral-norm 23.99u 0.00s 24.00r`
			`gc_B spectral-norm 23.73u 0.00s 23.75r`

			`k-nucleotide 1000000`
			`# string maps are slower than glib string maps`
			`gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.63u 0.02s 10.69r`
			`gccgo -O2 k-nucleotide.go 23.19u 0.91s 24.12r`
			`gc k-nucleotide 16.73u 0.04s 16.78r # *** +5% (but this one seems to vary by more than that)`
			`gc_B k-nucleotide 16.46u 0.04s 16.51r # *** +5%`

			`mandelbrot 16000`
			`gcc -O2 mandelbrot.c 56.16u 0.00s 56.16r`
			`gccgo -O2 mandelbrot.go 57.41u 0.01s 57.42r`
			`gc mandelbrot 64.05u 0.02s 64.08r # *** -14%`
			`gc_B mandelbrot 64.10u 0.02s 64.14r # *** -14%`

			`meteor 16000`
			`# we don't know`
			`gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r`
			`gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r`
			`gc meteor-contest 0.18u 0.00s 0.20r # *** -25%`
			`gc_B meteor-contest 0.17u 0.00s 0.18r # *** -24%`

			`pidigits 10000`
			`# bignum is slower than gmp`
			`gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.57r`
			`gc pidigits 71.82u 0.04s 71.89r`
			`gc_B pidigits 71.84u 0.08s 71.98r`

			`threadring 50000000`
			`gcc -O2 threadring.c -lpthread 30.91u 164.33s 204.57r`
			`gccgo -O2 threadring.go 87.12u 460.04s 447.61r`
			`gc threadring 38.55u 0.00s 38.56r # *** +16%`

			`chameneos 6000000`
			`gcc -O2 chameneosredux.c -lpthread 17.93u 323.65s 88.47r`
			`gc chameneosredux 21.72u 0.00s 21.73r`

- use in-place bignum operations where available - runs approx. 30% faster R=r DELTA=24 (10 added, 2 deleted, 12 changed) OCL=32984 CL=33005 2009-08-10 18:44:46 -06:00			`August 10 2009`

			`# In-place versions for some bignum operations.`
			`pidigits 10000`
			`gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r`
			`gc pidigits 55.22u 0.04s 55.29r # *** -23%`
			`gc_B pidigits 55.49u 0.02s 55.60r # *** -23%`