All tests on r45 or r70 Aug 3 2009 First version of fasta. Translation of fasta.c, fetched from http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4 fasta -n 25000000 gcc -O2 fasta.c 5.98u 0.00s 6.01r gccgo -O2 fasta.go 8.82u 0.02s 8.85r 6g fasta.go 13.50u 0.02s 13.53r 6g -B fata.go 12.99u 0.02s 13.02r Aug 4 2009 [added timing.sh] # myrandom: # hand-written optimization of integer division # use int32->float conversion fasta -n 25000000 # probably I/O library inefficiencies gcc -O2 fasta.c 5.99u 0.00s 6.00r gccgo -O2 fasta.go 8.82u 0.02s 8.85r gc fasta 10.70u 0.00s 10.77r gc_B fasta 10.09u 0.03s 10.12r reverse-complement < output-of-fasta-25000000 # we don't know - memory cache behavior? gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r gc reverse-complement 6.55u 0.70s 7.26r gc_B reverse-complement 6.32u 0.70s 7.10r nbody 50000000 # math.Sqrt needs to be in assembly; inlining is probably the other 50% gcc -O2 nbody.c 21.61u 0.01s 24.80r gccgo -O2 nbody.go 118.55u 0.02s 120.32r gc nbody 100.84u 0.00s 100.85r gc_B nbody 103.33u 0.00s 103.39r [ hacked Sqrt in assembler gc nbody 31.97u 0.00s 32.01r ] binary-tree 15 # too slow to use 20 # memory allocation and garbage collection gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r gc binary-tree 9.60u 0.01s 9.62r gc binary-tree-freelist 0.48u 0.01s 0.50r August 5, 2009 fannkuch 12 # bounds checking is half the difference # rest might be registerization gcc -O2 fannkuch.c 60.09u 0.01s 60.32r gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r gc fannkuch 124.59u 0.00s 124.67r gc_B fannkuch 91.14u 0.00s 91.16r regex-dna 100000 # regexp code is slow on trivial regexp gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r gc regexp-dna 26.94u 0.18s 28.75r gc_B regexp-dna 26.51u 0.09s 26.75r spectral-norm 5500 gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55r gccgo -O2 spectral-norm.go 12.20u 0.00s 12.23r gc spectral-norm 50.23u 0.00s 50.36r gc_B spectral-norm 49.69u 0.01s 49.83r gc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2 [using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r] August 6, 2009 k-nucleotide 5000000 # string maps are slower than glib string maps gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r gc k-nucleotide 16.08u 0.06s 16.50r gc_B k-nucleotide 17.32u 0.02s 17.37r mandelbrot 5500 # floating point code generator should use more registers gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r gc mandelbrot 74.32u 0.00s 74.35r gc_B mandelbrot 74.28u 0.01s 74.31r meteor 16000 # we don't know gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r gc meteor-contest 0.24u 0.00s 0.26r gc_B meteor-contest 0.23u 0.00s 0.24r pidigits 10000 # bignum is slower than gmp gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r gc pidigits 77.69u 0.14s 78.18r gc_B pidigits 74.26u 0.18s 75.41r gc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignum August 7 2009 # New gc does better division by powers of 2. Significant improvements: spectral-norm 5500 # floating point code generator should use more registers; possibly inline evalA gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster gc_B spectral-norm 23.71u 0.01s 23.72r # ditto gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle) k-nucleotide 1000000 # string maps are slower than glib string maps gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r gc k-nucleotide 15.97u 0.03s 16.04r gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version pidigits 10000 # bignum is slower than gmp gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r gc pidigits 71.24u 0.04s 71.28r # 8.5% faster gc_B pidigits 71.25u 0.03s 71.29r # 4% faster threadring 50000000 gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50r gccgo -O2 threadring.go 90.33u 459.95s 448.03r gc threadring 33.11u 0.00s 33.14r GOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r # change wait code to do <-make(chan int) instead of time.Sleep gc threadring 28.41u 0.01s 29.35r GOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72r chameneos 6000000 gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93r gc chameneosredux 20.19u 0.01s 20.23r