Home | History | Annotate | Download | only in shootout
      1 All tests on r45 or r70
      2 
      3 Aug 3 2009
      4 
      5 First version of fasta. Translation of fasta.c, fetched from
      6 	http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4
      7 
      8 fasta -n 25000000
      9 	gcc -O2 fasta.c	5.98u 0.00s 6.01r
     10 	gccgo -O2 fasta.go	8.82u 0.02s 8.85r
     11 	6g fasta.go	13.50u 0.02s 13.53r
     12 	6g -B fata.go	12.99u 0.02s 13.02r
     13 
     14 Aug 4 2009
     15 [added timing.sh]
     16 
     17 # myrandom:
     18 #   hand-written optimization of integer division
     19 #   use int32->float conversion
     20 fasta -n 25000000
     21 	# probably I/O library inefficiencies
     22 	gcc -O2 fasta.c	5.99u 0.00s 6.00r 
     23 	gccgo -O2 fasta.go	8.82u 0.02s 8.85r
     24 	gc fasta	10.70u 0.00s 10.77r
     25 	gc_B fasta	10.09u 0.03s 10.12r
     26 
     27 reverse-complement < output-of-fasta-25000000
     28 	# we don't know - memory cache behavior?
     29 	gcc -O2 reverse-complement.c	2.04u 0.94s 10.54r
     30 	gccgo -O2 reverse-complement.go	6.54u 0.63s 7.17r
     31 	gc reverse-complement	6.55u 0.70s 7.26r
     32 	gc_B reverse-complement	6.32u 0.70s 7.10r
     33 
     34 nbody 50000000
     35 	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
     36 	gcc -O2 nbody.c	21.61u 0.01s 24.80r
     37 	gccgo -O2 nbody.go	118.55u 0.02s 120.32r
     38 	gc nbody	100.84u 0.00s 100.85r
     39 	gc_B nbody	103.33u 0.00s 103.39r
     40 [
     41 hacked Sqrt in assembler
     42 	gc nbody	31.97u 0.00s 32.01r
     43 ]
     44 
     45 binary-tree 15 # too slow to use 20
     46 	# memory allocation and garbage collection
     47 	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
     48 	gccgo -O2 binary-tree.go	1.69u 0.46s 2.15r
     49 	gccgo -O2 binary-tree-freelist.go	8.48u 0.00s 8.48r
     50 	gc binary-tree	9.60u 0.01s 9.62r
     51 	gc binary-tree-freelist	0.48u 0.01s 0.50r
     52 
     53 August 5, 2009
     54 
     55 fannkuch 12
     56 	# bounds checking is half the difference
     57 	# rest might be registerization
     58 	gcc -O2 fannkuch.c	60.09u 0.01s 60.32r
     59 	gccgo -O2 fannkuch.go	64.89u 0.00s 64.92r
     60 	gc fannkuch	124.59u 0.00s 124.67r
     61 	gc_B fannkuch	91.14u 0.00s 91.16r
     62 
     63 regex-dna 100000
     64 	# regexp code is slow on trivial regexp
     65 	gcc -O2 regex-dna.c -lpcre	0.92u 0.00s 0.99r
     66 	gc regexp-dna	26.94u 0.18s 28.75r
     67 	gc_B regexp-dna	26.51u 0.09s 26.75r
     68 
     69 spectral-norm 5500
     70 	gcc -O2 spectral-norm.c -lm	11.54u 0.00s 11.55r
     71 	gccgo -O2 spectral-norm.go	12.20u 0.00s 12.23r
     72 	gc spectral-norm	50.23u 0.00s 50.36r
     73 	gc_B spectral-norm	49.69u 0.01s 49.83r
     74 	gc spectral-norm-parallel	24.47u 0.03s 11.05r  # has shift >>1 not div /2
     75 	[using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]
     76 
     77 August 6, 2009
     78 
     79 k-nucleotide 5000000
     80 	# string maps are slower than glib string maps
     81 	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	k-nucleotide.c: 10.72u 0.01s 10.74r
     82 	gccgo -O2 k-nucleotide.go	21.64u 0.83s 22.78r
     83 	gc k-nucleotide	16.08u 0.06s 16.50r
     84 	gc_B k-nucleotide	17.32u 0.02s 17.37r
     85 
     86 mandelbrot 5500
     87 	# floating point code generator should use more registers
     88 	gcc -O2 mandelbrot.c	56.13u 0.02s 56.17r
     89 	gccgo -O2 mandelbrot.go	57.49u 0.01s 57.51r
     90 	gc mandelbrot	74.32u 0.00s 74.35r
     91 	gc_B mandelbrot	74.28u 0.01s 74.31r
     92 
     93 meteor 2100
     94 	# we don't know
     95 	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
     96 	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.14r
     97 	gc meteor-contest	0.24u 0.00s 0.26r
     98 	gc_B meteor-contest	0.23u 0.00s 0.24r
     99 
    100 pidigits 10000
    101 	# bignum is slower than gmp
    102 	gcc -O2 pidigits.c -lgmp	2.60u 0.00s 2.62r
    103 	gc pidigits	77.69u 0.14s 78.18r
    104 	gc_B pidigits	74.26u 0.18s 75.41r
    105 	gc_B pidigits	68.48u 0.20s 69.31r   # special case: no bounds checking in bignum
    106 
    107 August 7 2009
    108 
    109 # New gc does better division by powers of 2.  Significant improvements:
    110 
    111 spectral-norm 5500
    112 	# floating point code generator should use more registers; possibly inline evalA
    113 	gcc -O2 spectral-norm.c -lm	11.50u 0.00s 11.50r
    114 	gccgo -O2 spectral-norm.go	12.02u 0.00s 12.02r
    115 	gc spectral-norm	23.98u 0.00s 24.00r	# new time is 0.48 times old time, 52% faster
    116 	gc_B spectral-norm	23.71u 0.01s 23.72r	# ditto
    117 	gc spectral-norm-parallel	24.04u 0.00s 6.26r  # /2 put back.  note: 4x faster (on r70, idle)
    118 
    119 k-nucleotide 1000000
    120 	# string maps are slower than glib string maps
    121 	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.82u 0.04s 10.87r
    122 	gccgo -O2 k-nucleotide.go	22.73u 0.89s 23.63r
    123 	gc k-nucleotide	15.97u 0.03s 16.04r
    124 	gc_B k-nucleotide	15.86u 0.06s 15.93r	# 8.5% faster, but probably due to weird cache effeccts in previous version
    125 
    126 pidigits 10000
    127 	# bignum is slower than gmp
    128 	gcc -O2 pidigits.c -lgmp	2.58u 0.00s 2.58r
    129 	gc pidigits	71.24u 0.04s 71.28r	# 8.5% faster
    130 	gc_B pidigits	71.25u 0.03s 71.29r	# 4% faster
    131 
    132 threadring 50000000
    133 	gcc -O2 threadring.c -lpthread	35.51u 160.21s 199.50r
    134 	gccgo -O2 threadring.go	90.33u 459.95s 448.03r
    135 	gc threadring	33.11u 0.00s 33.14r
    136 	GOMAXPROCS=4 gc threadring	114.48u 226.65s 371.59r
    137 	# change wait code to do <-make(chan int) instead of time.Sleep
    138 	gc threadring	28.41u 0.01s 29.35r
    139 	GOMAXPROCS=4 gc threadring	112.59u 232.83s 384.72r
    140 	
    141 chameneos 6000000
    142 	gcc -O2 chameneosredux.c -lpthread	18.14u 276.52s 76.93r
    143 	gc chameneosredux	20.19u 0.01s 20.23r
    144 
    145 Aug 10 2009
    146 
    147 # new 6g with better fp registers, fast div and mod of integers
    148 # complete set of timings listed. significant changes marked ***
    149 
    150 fasta -n 25000000
    151 	# probably I/O library inefficiencies
    152 	gcc -O2 fasta.c	5.96u 0.00s 5.97r
    153 	gc fasta	10.59u 0.01s 10.61r
    154 	gc_B fasta	9.92u 0.02s 9.95r
    155 
    156 reverse-complement < output-of-fasta-25000000
    157 	# we don't know - memory cache behavior?
    158 	gcc -O2 reverse-complement.c	1.96u 1.56s 16.23r
    159 	gccgo -O2 reverse-complement.go	6.41u 0.62s 7.05r
    160 	gc reverse-complement	6.46u 0.70s 7.17r
    161 	gc_B reverse-complement	6.22u 0.72s 6.95r
    162 
    163 nbody 50000000
    164 	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
    165 	gcc -O2 nbody.c	21.26u 0.01s 21.28r
    166 	gccgo -O2 nbody.go	116.68u 0.07s 116.80r
    167 	gc nbody	86.64u 0.01s 86.68r	# -14%
    168 	gc_B nbody	85.72u 0.02s 85.77r	# *** -17%
    169 
    170 binary-tree 15 # too slow to use 20
    171 	# memory allocation and garbage collection
    172 	gcc -O2 binary-tree.c -lm	0.87u 0.00s 0.87r
    173 	gccgo -O2 binary-tree.go	1.61u 0.47s 2.09r
    174 	gccgo -O2 binary-tree-freelist.go	0.00u 0.00s 0.01r
    175 	gc binary-tree	9.11u 0.01s 9.13r	# *** -5%
    176 	gc binary-tree-freelist	0.47u 0.01s 0.48r
    177 
    178 fannkuch 12
    179 	# bounds checking is half the difference
    180 	# rest might be registerization
    181 	gcc -O2 fannkuch.c	59.92u 0.00s 59.94r
    182 	gccgo -O2 fannkuch.go	65.54u 0.00s 65.58r
    183 	gc fannkuch	123.98u 0.01s 124.04r
    184 	gc_B fannkuch	90.75u 0.00s 90.78r
    185 
    186 regex-dna 100000
    187 	# regexp code is slow on trivial regexp
    188 	gcc -O2 regex-dna.c -lpcre	0.91u 0.00s 0.92r
    189 	gc regex-dna	27.25u 0.02s 27.28r
    190 	gc_B regex-dna	29.51u 0.03s 29.55r
    191 
    192 spectral-norm 5500
    193 	# possibly inline evalA
    194 	gcc -O2 spectral-norm.c -lm	11.57u 0.00s 11.57r
    195 	gccgo -O2 spectral-norm.go	12.07u 0.01s 12.08r
    196 	gc spectral-norm	23.99u 0.00s 24.00r
    197 	gc_B spectral-norm	23.73u 0.00s 23.75r
    198 
    199 k-nucleotide 1000000
    200 	# string maps are slower than glib string maps
    201 	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.63u 0.02s 10.69r
    202 	gccgo -O2 k-nucleotide.go	23.19u 0.91s 24.12r
    203 	gc k-nucleotide	16.73u 0.04s 16.78r	# *** +5% (but this one seems to vary by more than that)
    204 	gc_B k-nucleotide	16.46u 0.04s 16.51r	# *** +5%
    205 
    206 mandelbrot 16000
    207 	gcc -O2 mandelbrot.c	56.16u 0.00s 56.16r
    208 	gccgo -O2 mandelbrot.go	57.41u 0.01s 57.42r
    209 	gc mandelbrot	64.05u 0.02s 64.08r	# *** -14%
    210 	gc_B mandelbrot	64.10u 0.02s 64.14r	# *** -14%
    211 
    212 meteor 2100
    213 	# we don't know
    214 	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
    215 	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.12r
    216 	gc meteor-contest	0.18u 0.00s 0.20r	# *** -25%
    217 	gc_B meteor-contest	0.17u 0.00s 0.18r	# *** -24%
    218 
    219 pidigits 10000
    220 	# bignum is slower than gmp
    221 	gcc -O2 pidigits.c -lgmp	2.57u 0.00s 2.57r
    222 	gc pidigits	71.82u 0.04s 71.89r
    223 	gc_B pidigits	71.84u 0.08s 71.98r
    224 
    225 threadring 50000000
    226 	gcc -O2 threadring.c -lpthread	30.91u 164.33s 204.57r
    227 	gccgo -O2 threadring.go	87.12u 460.04s 447.61r
    228 	gc threadring	38.55u 0.00s 38.56r	# *** +16%
    229 
    230 chameneos 6000000
    231 	gcc -O2 chameneosredux.c -lpthread	17.93u 323.65s 88.47r
    232 	gc chameneosredux	21.72u 0.00s 21.73r
    233 
    234 August 10 2009
    235 
    236 # In-place versions for some bignum operations.
    237 pidigits 10000
    238 	gcc -O2 pidigits.c -lgmp	2.56u 0.00s 2.57r
    239 	gc pidigits	55.22u 0.04s 55.29r	# *** -23%
    240 	gc_B pidigits	55.49u 0.02s 55.60r	# *** -23%
    241 
    242 September 3 2009
    243 
    244 # New 6g inlines slices, has a few other tweaks.
    245 # Complete rerun. Significant changes marked.
    246 
    247 fasta -n 25000000
    248 	# probably I/O library inefficiencies
    249 	gcc -O2 fasta.c	5.96u 0.00s 5.96r
    250 	gc fasta	10.63u 0.02s 10.66r
    251 	gc_B fasta	9.92u 0.01s 9.94r
    252 
    253 reverse-complement < output-of-fasta-25000000
    254 	# we don't know - memory cache behavior?
    255 	gcc -O2 reverse-complement.c	1.92u 0.33s 2.93r
    256 	gccgo -O2 reverse-complement.go	6.76u 0.72s 7.58r	# +5%
    257 	gc reverse-complement	6.59u 0.70s 7.29r	# +2%
    258 	gc_B reverse-complement	5.57u 0.80s 6.37r	# -10%
    259 
    260 nbody 50000000
    261 	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
    262 	# also loop alignment appears to be critical
    263 	gcc -O2 nbody.c	21.28u 0.00s 21.28r
    264 	gccgo -O2 nbody.go	119.21u 0.00s 119.22r	# +2%
    265 	gc nbody	109.72u 0.00s 109.78r	# + 28% *****
    266 	gc_B nbody	85.90u 0.00s 85.91r
    267 
    268 binary-tree 15 # too slow to use 20
    269 	# memory allocation and garbage collection
    270 	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
    271 	gccgo -O2 binary-tree.go	1.88u 0.54s 2.42r	# +17%
    272 	gccgo -O2 binary-tree-freelist.go	0.01u 0.01s 0.02r
    273 	gc binary-tree	8.94u 0.01s 8.96r	# -2%
    274 	gc binary-tree-freelist	0.47u 0.01s 0.48r
    275 
    276 fannkuch 12
    277 	# bounds checking is half the difference
    278 	# rest might be registerization
    279 	gcc -O2 fannkuch.c	60.12u 0.00s 60.12r
    280 	gccgo -O2 fannkuch.go	92.62u 0.00s 92.66r		# +41% ***
    281 	gc fannkuch	123.90u 0.00s 123.92r
    282 	gc_B fannkuch	89.71u 0.00s 89.74r	# -1%
    283 
    284 regex-dna 100000
    285 	# regexp code is slow on trivial regexp
    286 	gcc -O2 regex-dna.c -lpcre	0.88u 0.00s 0.88r
    287 	gc regex-dna	25.77u 0.01s 25.79r		# -5%
    288 	gc_B regex-dna	26.05u 0.02s 26.09r	# -12% ***
    289 
    290 spectral-norm 5500
    291 	# possibly inline evalA
    292 	gcc -O2 spectral-norm.c -lm	11.51u 0.00s 11.51r
    293 	gccgo -O2 spectral-norm.go	11.95u 0.00s 11.96r
    294 	gc spectral-norm	24.23u 0.00s 24.23r
    295 	gc_B spectral-norm	23.83u 0.00s 23.84r
    296 
    297 k-nucleotide 1000000
    298 	# string maps are slower than glib string maps
    299 	gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0	10.68u 0.04s 10.72r
    300 	gccgo -O2 k-nucleotide.go	23.03u 0.88s 23.92r
    301 	gc k-nucleotide	15.79u 0.05s 15.85r	# -5% (but this one seems to vary by more than that)
    302 	gc_B k-nucleotide	17.88u 0.05s 17.95r # +8% (ditto)
    303 
    304 mandelbrot 16000
    305 	gcc -O2 mandelbrot.c	56.17u 0.02s 56.20r
    306 	gccgo -O2 mandelbrot.go	56.74u 0.02s 56.79r	 # -1%
    307 	gc mandelbrot	63.31u 0.01s 63.35r	# -1%
    308 	gc_B mandelbrot	63.29u 0.00s 63.31r	# -1%
    309 
    310 meteor 2100
    311 	# we don't know
    312 	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
    313 	gccgo -O2 meteor-contest.go	0.11u 0.00s 0.12r
    314 	gc meteor-contest	0.18u 0.00s 0.19r
    315 	gc_B meteor-contest	0.17u 0.00s 0.18r
    316 
    317 pidigits 10000
    318 	# bignum is slower than gmp
    319 	gcc -O2 pidigits.c -lgmp	2.56u 0.00s 2.57r
    320 	gc pidigits	55.87u 0.03s 55.91r
    321 	gc_B pidigits	55.93u 0.03s 55.99r
    322 
    323 # these tests are compared using real time, since they run multiple processors
    324 # accuracy probably low
    325 threadring 50000000
    326 	gcc -O2 threadring.c -lpthread	26.31u 164.69s 199.92r	# -2%
    327 	gccgo -O2 threadring.go	87.90u 487.26s 472.81r	# +6%
    328 	gc threadring	28.89u 0.00s 28.90r	# -25% ***
    329 
    330 chameneos 6000000
    331 	gcc -O2 chameneosredux.c -lpthread	16.41u 296.91s 81.17r	# -8%
    332 	gc chameneosredux	19.97u 0.00s 19.97r	# -8%
    333 
    334 Sep 22, 2009
    335 
    336 # 6g inlines sliceslice in most cases.
    337 
    338 fasta -n 25000000
    339 	# probably I/O library inefficiencies
    340 	gc fasta	10.24u 0.00s 10.25r	# -4%
    341 	gc_B fasta	9.68u 0.01s 9.69r	# -3%
    342 
    343 reverse-complement < output-of-fasta-25000000
    344 	# we don't know - memory cache behavior?
    345 	gc reverse-complement	6.67u 0.69s 7.37r	# +1%
    346 	gc_B reverse-complement	6.00u 0.64s 6.65r	# +7%
    347 
    348 nbody -n 50000000
    349 	# math.Sqrt needs to be in assembly; inlining is probably the other 50%
    350 	# also loop alignment appears to be critical
    351 	gc nbody	86.27u 0.00s 86.29r	# -21%
    352 	gc_B nbody	104.52u 0.00s 104.54r	# +22%
    353 
    354 fannkuch 12
    355 	# bounds checking is half the difference
    356 	# rest might be registerization
    357 	gc fannkuch	128.36u 0.00s 128.37r	# +4%
    358 	gc_B fannkuch	89.32u 0.00s 89.34r
    359 
    360 regex-dna 100000
    361 	# regexp code is slow on trivial regexp
    362 	gc regex-dna	24.82u 0.01s 24.86r	# -4%
    363 	gc_B regex-dna	24.55u 0.01s 24.57r	# -6%
    364 
    365 spectral-norm 5500
    366 	# possibly inline evalA
    367 	gc spectral-norm	24.05u 0.00s 24.07r	# -1%
    368 	gc_B spectral-norm	23.60u 0.00s 23.65r	 # -1%
    369 
    370 k-nucleotide 1000000
    371 	# string maps are slower than glib string maps
    372 	gc k-nucleotide	17.84u 0.04s 17.89r	# +13% but mysterious variation continues
    373 	gc_B k-nucleotide	15.56u 0.08s 15.65r	# -13% (ditto)
    374 
    375 mandelbrot 16000
    376 	gc mandelbrot	64.08u 0.01s 64.11r	# +1%
    377 	gc_B mandelbrot	64.04u 0.00s 64.05r	# +1%
    378 
    379 pidigits 10000
    380 	# bignum is slower than gmp
    381 	gc pidigits	58.68u 0.02s 58.72r	# +5%
    382 	gc_B pidigits	58.86u 0.05s 58.99r	# +5%
    383 
    384 # these tests are compared using real time, since they run multiple processors
    385 # accuracy probably low
    386 threadring 50000000
    387 	gc threadring	32.70u 0.02s 32.77r	# +13%
    388 
    389 chameneos 6000000
    390 	gc chameneosredux	26.62u 0.00s 26.63r	# +13%
    391 
    392 Sep 24, 2009
    393 
    394 # Sqrt now in assembler for 6g.
    395 nbody -n 50000000
    396 	# remember, at least for 6g, alignment of loops may be important
    397 	gcc -O2 nbody.c	21.24u 0.00s 21.25r
    398 	gccgo -O2 nbody.go	121.03u 0.00s 121.04r
    399 	gc nbody	30.26u 0.00s 30.27r	# -65% ***
    400 	gc_B nbody	30.20u 0.02s 30.22r	# -72% *** 
    401 
    402 Nov 13 2009
    403 
    404 # fix bug in regexp; take performance hit.  good regexps will come in time.
    405 regex-dna 100000
    406 	gcc -O2 regex-dna.c -lpcre	0.92u 0.00s 0.94r
    407 	gc regex-dna	29.78u 0.03s 29.83r
    408 	gc_B regex-dna	32.63u 0.03s 32.74r
    409 
    410 Nov 24 2009
    411 
    412 # Roger Peppe's rewrite of the benchmark
    413 chameneos 6000000
    414 	gcc -O2 chameneosredux.c -lpthread	18.00u 303.29s 83.64r
    415 	gc chameneosredux	12.10u 0.00s 12.10r  # 2.22X faster
    416 
    417 Jan 6, 2010
    418 
    419 # Long-overdue update.  All numbers included in this complete run.
    420 # Some programs (e.g. reverse-complement) rewritten for speed.
    421 # Regular expressions much faster in common cases (although still far behind PCRE)
    422 # Bignum stuff improved
    423 # Better (but sometimes slower) locking in channels.
    424 
    425 fasta -n 25000000
    426 	gcc -O2 fasta.c	5.99u 0.01s 6.00r
    427 	gc fasta	9.11u 0.00s 9.12r	# -11%
    428 	gc_B fasta	8.60u 0.00s 8.62r	# +12% ??
    429 
    430 reverse-complement < output-of-fasta-25000000
    431 	gcc -O2 reverse-complement.c	2.00u 0.80s 9.54r
    432 #	gccgo -O2 reverse-complement.go	4.57u 0.35s 4.94r	# 33% faster
    433 	gc reverse-complement	2.01u 0.38s 2.40r	# 3.3X faster
    434 	gc_B reverse-complement	1.88u 0.36s 2.24r	# 3.2X faster
    435 GOGC=off
    436 	gc reverse-complement	2.01u 0.35s 2.37r
    437 	gc_B reverse-complement	1.86u 0.32s 2.19r
    438 
    439 nbody -n 50000000
    440 	gcc -O2 nbody.c	21.28u 0.00s 21.31r
    441 	gccgo -O2 nbody.go	80.02u 0.00s 80.05r	# 33% faster
    442 	gc nbody	30.13u 0.00s 30.13r
    443 	gc_B nbody	29.89u 0.01s 29.91r
    444 
    445 binary-tree 15 # too slow to use 20
    446 	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.87r
    447 	gccgo -O2 binary-tree.go	4.82u 0.41s 5.24r	# 2.5X slower
    448 	gc binary-tree	7.23u 0.01s 7.25r	# # -19%
    449 	gc binary-tree-freelist	0.43u 0.00s 0.44r	# -9%
    450 
    451 fannkuch 12
    452 	gcc -O2 fannkuch.c	60.17u 0.00s 60.17r
    453 	gccgo -O2 fannkuch.go	78.47u 0.01s 78.49r
    454 	gc fannkuch	128.86u 0.00s 128.96r
    455 	gc_B fannkuch	90.17u 0.00s 90.21r
    456 
    457 regex-dna 100000
    458 	gcc -O2 regex-dna.c -lpcre	0.90u 0.00s 0.92r
    459 	gc regex-dna	9.48u 0.01s 9.50r	# 3.1X faster
    460 	gc_B regex-dna	9.08u 0.00s 9.10r	# 3.6X faster
    461 
    462 spectral-norm 5500
    463 	gcc -O2 spectral-norm.c -lm	11.48u 0.00s 11.48r
    464 	gccgo -O2 spectral-norm.go	11.68u 0.00s 11.70r
    465 	gc spectral-norm	23.98u 0.00s 23.99r
    466 	gc_B spectral-norm	23.68u 0.00s 23.69r
    467 
    468 k-nucleotide 1000000
    469 	gcc -O2 k-nucleotide.c	10.85u 0.04s 10.90r
    470 	gccgo -O2 k-nucleotide.go	25.26u 0.87s 26.14r
    471 	gc k-nucleotide	15.28u 0.06s 15.37r	# restored; mysterious variation continues
    472 	gc_B k-nucleotide	15.97u 0.03s 16.00r
    473 
    474 mandelbrot 16000
    475 	gcc -O2 mandelbrot.c	56.12u 0.01s 56.15r
    476 	gccgo -O2 mandelbrot.go	56.86u 0.01s 56.89r
    477 	gc mandelbrot	66.05u 0.00s 66.07r	# -3%
    478 	gc_B mandelbrot	66.06u 0.00s 66.07r	# -3%
    479 
    480 meteor 2100
    481 	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
    482 	gccgo -O2 meteor-contest.go	0.12u 0.00s 0.12r
    483 	gc meteor-contest	0.17u 0.00s 0.17r
    484 	gc_B meteor-contest	0.15u 0.00s 0.16r
    485 
    486 pidigits 10000
    487 	gcc -O2 pidigits.c -lgmp	2.57u 0.00s 2.59r
    488 	gc pidigits	38.27u 0.02s 38.30r	# 1.5X faster
    489 	gc_B pidigits	38.27u 0.02s 38.31r	# 1.5X faster
    490 
    491 threadring 50000000
    492 	gcc -O2 threadring.c	37.11u 170.59s 212.75r
    493 	gccgo -O2 threadring.go	89.67u 447.56s 442.55r	# -6.5%
    494 	gc threadring	36.08u 0.04s 36.15r	# +10%
    495 
    496 chameneos 6000000
    497 	gcc -O2 chameneosredux.c -lpthread	19.02u 331.08s 90.79r
    498 	gc chameneosredux	12.54u 0.00s 12.55r
    499 
    500 Oct 19, 2010
    501 
    502 # Another long-overdue update. Some of the code is new; parallel versions
    503 # of some are added.  A few significant improvements.
    504 
    505 fasta -n 25000000
    506 	gcc -O2 fasta.c	4.92u 0.00s 4.93r
    507 	gccgo -O2 fasta.go	3.31u 0.00s 3.34r  # new code
    508 	gc fasta	3.68u 0.00s 3.69r  # 2.5X faster with no code
    509 	gc_B fasta	3.68u 0.00s 3.69r  # 2.3X faster with no code
    510 
    511 reverse-complement < output-of-fasta-25000000
    512 	gcc -O2 reverse-complement.c	1.93u 0.81s 11.24r
    513 	gccgo -O2 reverse-complement.go	1.58u 0.43s 2.04r  # first run with new code?
    514 	gc reverse-complement	1.84u 0.34s 2.20r  # 10% faster
    515 	gc_B reverse-complement	1.85u 0.32s 2.18r
    516 
    517 nbody -n 50000000
    518 	gcc -O2 nbody.c	21.35u 0.00s 21.36r
    519 	gccgo -O2 nbody.go	21.62u 0.00s 21.66r  # 3.7X faster - why??
    520 	gc nbody	29.78u 0.00s 29.79r
    521 	gc_B nbody	29.72u 0.00s 29.72r
    522 
    523 binary-tree 15 # too slow to use 20
    524 	gcc -O2 binary-tree.c -lm	0.86u 0.00s 0.88r
    525 	gccgo -O2 binary-tree.go	4.05u 0.02s 4.08r  # 28% faster
    526 	gccgo -O2 binary-tree-freelist	0.34u 0.08s 0.34r
    527 	gc binary-tree	5.94u 0.00s 5.95r  # 20% faster
    528 	gc binary-tree-freelist	0.50u 0.01s 0.54r
    529 
    530 fannkuch 12
    531 	gcc -O2 fannkuch.c	60.45u 0.00s 60.45r
    532 	gccgo -O2 fannkuch.go	64.64u 0.00s 64.64r
    533 	gccgo -O2 fannkuch-parallel.go	115.63u 0.00s 31.58r
    534 	gc fannkuch	126.52u 0.04s 126.68r
    535 	gc fannkuch-parallel	238.82u 0.10s 65.93r  # GOMAXPROCS=4
    536 	gc_B fannkuch	88.99u 0.00s 89.02r
    537 
    538 regex-dna 100000
    539 	gcc -O2 regex-dna.c -lpcre	0.89u 0.00s 0.89r
    540 	gc regex-dna	8.99u 0.02s 9.03r
    541 	gc regex-dna-parallel	8.94u 0.02s 3.68r  # GOMAXPROCS=4
    542 	gc_B regex-dna	9.12u 0.00s 9.14r
    543 
    544 spectral-norm 5500
    545 	gcc -O2 spectral-norm.c -lm	11.55u 0.00s 11.57r
    546 	gccgo -O2 spectral-norm.go	11.73u 0.00s 11.75r
    547 	gc spectral-norm	23.74u 0.00s 23.79r
    548 	gc_B spectral-norm	24.49u 0.02s 24.54r
    549 
    550 k-nucleotide 1000000
    551 	gcc -O2 k-nucleotide.c	11.44u 0.06s 11.50r
    552 	gccgo -O2 k-nucleotide.go	8.65u 0.04s 8.71r
    553 	gccgo -O2 k-nucleotide-parallel.go	8.75u 0.03s 2.97r # set GOMAXPROCS=4
    554 	gc k-nucleotide	14.92u 0.05s 15.01r
    555 	gc k-nucleotide-parallel	16.96u 0.06s 6.53r  # set GOMAXPROCS=4
    556 	gc_B k-nucleotide	15.97u 0.03s 16.08r
    557 
    558 mandelbrot 16000
    559 	gcc -O2 mandelbrot.c	56.32u 0.00s 56.35r
    560 	gccgo -O2 mandelbrot.go	55.62u 0.02s 55.77r
    561 	gc mandelbrot	64.85u 0.01s 64.94r
    562 	gc_B mandelbrot	65.02u 0.01s 65.14r
    563 
    564 meteor 2100
    565 	gcc -O2 meteor-contest.c	0.10u 0.00s 0.10r
    566 	gccgo -O2 meteor-contest.go	0.10u 0.00s 0.11r
    567 	gc meteor-contest	0.17u 0.00s 0.18r
    568 	gc_B meteor-contest	0.16u 0.00s 0.16r
    569 
    570 pidigits 10000
    571 	gcc -O2 pidigits.c -lgmp	2.58u 0.00s 2.59r
    572 	gccgo -O2 pidigits.go	14.06u 0.01s 14.09r # first run?
    573 	gc pidigits	8.47u 0.05s 8.55r # 4.5X faster due to package big
    574 	gc_B pidigits	8.33u 0.01s 8.36r # 4.5X faster due to package big
    575 
    576 threadring 50000000
    577 	gcc -O2 threadring.c	28.18u 153.19s 186.47r
    578 	gccgo -O2 threadring.go	110.10u 516.48s 515.25r
    579 	gc threadring	40.39u 0.00s 40.40r
    580 
    581 chameneos 6000000
    582 	gcc -O2 chameneosredux.c -lpthread	18.20u 301.55s 83.10r
    583 	gccgo -O2 chameneosredux.go	52.22u 324.54s 201.21r
    584 	gc chameneosredux	13.52u 0.00s 13.54r
    585 
    586 Dec 14, 2010
    587 
    588 # Improved regex code (same algorithm) gets ~30%.
    589 
    590 regex-dna 100000
    591 	gcc -O2 regex-dna.c -lpcre	0.77u 0.01s 0.78r
    592 	gc regex-dna	6.80u 0.00s 6.81r
    593 	gc regex-dna-parallel	6.82u 0.01s 2.75r
    594 	gc_B regex-dna	6.69u 0.02s 6.70r
    595 
    596 Feb 15, 2011
    597 
    598 # Improved GC, still single-threaded but more efficient
    599 
    600 fasta -n 25000000
    601 	gcc -O2 fasta.c	3.40u 0.00s 3.40r
    602 	gccgo -O2 fasta.go	3.51u 0.00s 3.50r
    603 	gc fasta	3.66u 0.01s 3.66r
    604 	gc_B fasta	3.66u 0.00s 3.66r
    605 
    606 reverse-complement < output-of-fasta-25000000
    607 	gcc -O2 reverse-complement.c	1.86u 1.29s 4.93r
    608 	gccgo -O2 reverse-complement.go	2.18u 0.41s 2.60r
    609 	gc reverse-complement	1.67u 0.48s 2.15r
    610 	gc_B reverse-complement	1.71u 0.45s 2.15r
    611 
    612 nbody -n 50000000
    613 	gcc -O2 -lm nbody.c	21.64u 0.00s 21.64r
    614 	gccgo -O2 nbody.go	21.46u 0.00s 21.45r
    615 	gc nbody	29.07u 0.00s 29.06r
    616 	gc_B nbody	31.61u 0.00s 31.61r
    617 
    618 binary-tree 15 # too slow to use 20
    619 	gcc -O2 binary-tree.c -lm	0.88u 0.00s 0.87r
    620 	gccgo -O2 binary-tree.go	2.74u 0.07s 2.81r
    621 	gccgo -O2 binary-tree-freelist.go	0.01u 0.00s 0.00r
    622 	gc binary-tree	4.22u 0.02s 4.24r
    623 	gc binary-tree-freelist	0.54u 0.02s 0.55r
    624 
    625 fannkuch 12
    626 	gcc -O2 fannkuch.c	57.64u 0.00s 57.64r
    627 	gccgo -O2 fannkuch.go	65.79u 0.00s 65.82r
    628 	gccgo -O2 fannkuch-parallel.go	160.91u 0.02s 43.90r
    629 	gc fannkuch	126.36u 0.03s 126.53r
    630 	gc fannkuch-parallel	175.23u 0.04s 45.49r
    631 	gc_B fannkuch	89.23u 0.00s 89.24r
    632 
    633 regex-dna 100000
    634 	gcc -O2 regex-dna.c -lpcre	0.77u 0.01s 0.80r
    635 	gccgo -O2 regex-dna.go	12.38u 0.10s 12.52r
    636 	gccgo -O2 regex-dna-parallel.go	43.96u 4.64s 15.11r
    637 	gc regex-dna	7.03u 0.01s 7.05r
    638 	gc regex-dna-parallel	6.85u 0.05s 2.70r
    639 	gc_B regex-dna	6.87u 0.02s 6.89r
    640 
    641 spectral-norm 5500
    642 	gcc -O2 spectral-norm.c -lm	12.29u 0.00s 12.28r
    643 	gccgo -O2 spectral-norm.go	11.79u 0.00s 11.79r
    644 	gc spectral-norm	24.00u 0.02s 24.05r
    645 	gc_B spectral-norm	24.59u 0.01s 24.59r
    646 
    647 k-nucleotide 1000000
    648 	gcc -O2 k-nucleotide.c	9.75u 0.07s 9.82r
    649 	gccgo -O2 k-nucleotide.go	8.92u 0.06s 8.98r
    650 	gccgo -O2 k-nucleotide-parallel.go	8.40u 0.04s 2.76r
    651 	gc k-nucleotide	17.01u 0.03s 17.04r
    652 	gc k-nucleotide-parallel	16.51u 0.08s 6.21r
    653 	gc_B k-nucleotide	16.94u 0.08s 17.02r
    654 
    655 mandelbrot 16000
    656 	gcc -O2 mandelbrot.c	54.60u 0.00s 54.66r
    657 	gccgo -O2 mandelbrot.go	59.38u 0.00s 59.41r
    658 	gc mandelbrot	64.93u 0.04s 65.08r
    659 	gc_B mandelbrot	64.85u 0.03s 64.92r
    660 
    661 meteor 2098
    662 	gcc -O2 meteor-contest.c	0.10u 0.01s 0.10r
    663 	gccgo -O2 meteor-contest.go	0.11u 0.00s 0.11r
    664 	gc meteor-contest	0.18u 0.00s 0.17r
    665 	gc_B meteor-contest	0.17u 0.00s 0.16r
    666 
    667 pidigits 10000
    668 	gcc -O2 pidigits.c -lgmp	2.24u 0.00s 2.23r
    669 	gccgo -O2 pidigits.go	14.05u 0.00s 14.06r
    670 	gc pidigits	6.34u 0.05s 6.38r
    671 	gc_B pidigits	6.37u 0.02s 6.38r
    672 
    673 threadring 50000000
    674 	gcc -O2 threadring.c	30.50u 258.05s 325.72r
    675 	gccgo -O2 threadring.go	92.87u 748.39s 728.46r
    676 	gc threadring	38.03u 0.01s 38.04r
    677 
    678 # Apr 15, 2011
    679 # Move to new machine, Intel Xeon E5520 (a] 2.27GHz.
    680 # (Was Opteron(tm) Processor 8214 HE)
    681 
    682 fasta -n 25000000
    683 OLD:
    684 	gcc -O2 fasta.c	3.39u 0.04s 3.42r
    685 	gccgo -O2 fasta.go	3.52u 0.00s 3.52r
    686 	gc fasta	3.63u 0.04s 3.67r
    687 	gc_B fasta	3.66u 0.00s 3.66r
    688 NEW:
    689 	gcc -O2 fasta.c	1.45u 0.02s 1.47r
    690 	gccgo -O2 fasta.go	1.51u 0.01s 1.51r
    691 	gc fasta	2.04u 0.00s 2.04r
    692 	gc_B fasta	2.05u 0.00s 2.04r
    693 
    694 reverse-complement < output-of-fasta-25000000
    695 OLD:
    696 	gcc -O2 reverse-complement.c	1.87u 1.51s 7.02r
    697 	gccgo -O2 reverse-complement.go	1.56u 0.54s 3.37r
    698 	gc reverse-complement	1.73u 0.36s 2.08r
    699 	gc_B reverse-complement	1.75u 0.37s 2.12r
    700 NEW:
    701 	gcc -O2 reverse-complement.c	1.20u 0.47s 12.96r
    702 	gccgo -O2 reverse-complement.go	0.88u 0.14s 1.01r
    703 	gc reverse-complement	1.13u 0.17s 1.30r
    704 	gc_B reverse-complement	1.11u 0.09s 1.20r
    705 
    706 nbody -n 50000000
    707 OLD:
    708 	gcc -O2 -lm nbody.c	21.90u 0.00s 21.92r
    709 	gccgo -O2 nbody.go	23.12u 0.03s 23.19r
    710 	gc nbody	29.07u 0.00s 29.07r
    711 	gc_B nbody	31.84u 0.00s 31.85r
    712 NEW:
    713 	gcc -O2 -lm nbody.c	13.01u 0.00s 13.03r
    714 	gccgo -O2 nbody.go	13.35u 0.00s 13.37r
    715 	gc nbody	21.78u 0.00s 21.82r
    716 	gc_B nbody	21.72u 0.00s 21.76r
    717 
    718 binary-tree 15 # too slow to use 20
    719 OLD:
    720 	gcc -O2 binary-tree.c -lm	0.83u 0.02s 0.84r
    721 	gccgo -O2 binary-tree.go	2.61u 0.02s 2.62r
    722 	gccgo -O2 binary-tree-freelist.go	0.32u 0.01s 0.32r
    723 	gc binary-tree	3.93u 0.04s 3.97r
    724 	gc binary-tree-freelist	0.47u 0.03s 0.50r
    725 NEW:
    726 	gcc -O2 binary-tree.c -lm	0.60u 0.00s 0.59r
    727 	gccgo -O2 binary-tree.go	1.53u 0.00s 1.52r
    728 	gccgo -O2 binary-tree-freelist.go	0.01u 0.00s 0.00r
    729 	gc binary-tree	1.93u 0.02s 1.95r
    730 	gc binary-tree-freelist	0.32u 0.01s 0.32r
    731 
    732 fannkuch 12
    733 OLD:
    734 	gcc -O2 fannkuch.c	57.64u 0.00s 57.64r
    735 	gccgo -O2 fannkuch.go	65.56u 0.01s 65.65r
    736 	gccgo -O2 fannkuch-parallel.go	179.12u 0.00s 49.82r
    737 	gc fannkuch	126.39u 0.00s 126.39r
    738 	gc fannkuch-parallel	172.49u 0.02s 45.44r
    739 	gc_B fannkuch	89.30u 0.00s 89.28r
    740 NEW:
    741 	gcc -O2 fannkuch.c	45.17u 0.00s 45.26r
    742 	gccgo -O2 fannkuch.go	53.63u 0.00s 53.73r
    743 	gccgo -O2 fannkuch-parallel.go	216.72u 0.00s 58.42r
    744 	gc fannkuch	108.21u 0.00s 108.44r
    745 	gc fannkuch-parallel	227.20u 0.00s 57.27r
    746 	gc_B fannkuch	56.14u 0.00s 56.26r
    747 
    748 regex-dna 100000
    749 OLD:
    750 	gcc -O2 regex-dna.c -lpcre	0.77u 0.01s 0.78r
    751 	gccgo -O2 regex-dna.go	10.15u 0.02s 10.23r
    752 	gccgo -O2 regex-dna-parallel.go	33.81u 3.22s 11.62r
    753 	gc regex-dna	6.52u 0.04s 6.56r
    754 	gc regex-dna-parallel	6.84u 0.03s 2.70r
    755 	gc_B regex-dna	6.83u 0.01s 6.84r
    756 NEW:
    757 	gcc -O2 regex-dna.c -lpcre	0.47u 0.00s 0.47r
    758 	gccgo -O2 regex-dna.go	6.00u 0.00s 6.00r
    759 	gccgo -O2 regex-dna-parallel.go	44.54u 1.57s 6.51r
    760 	gc regex-dna	5.41u 0.01s 5.42r
    761 	gc regex-dna-parallel	5.62u 0.01s 2.20r
    762 	gc_B regex-dna	5.50u 0.00s 5.50r
    763 
    764 spectral-norm 5500
    765 OLD:
    766 	gcc -O2 spectral-norm.c -lm	12.29u 0.00s 12.28r
    767 	gccgo -O2 spectral-norm.go	11.56u 0.00s 11.55r
    768 	gc spectral-norm	23.98u 0.00s 24.00r
    769 	gc_B spectral-norm	24.62u 0.00s 24.65r
    770 NEW:
    771 	gcc -O2 spectral-norm.c -lm	15.79u 0.00s 15.82r
    772 	gccgo -O2 spectral-norm.go	15.32u 0.00s 15.35r
    773 	gc spectral-norm	19.62u 0.01s 19.67r
    774 	gc_B spectral-norm	19.62u 0.00s 19.66r
    775 
    776 k-nucleotide 1000000
    777 OLD:
    778 	gcc -O2 k-nucleotide.c	9.82u 0.06s 9.87r
    779 	gccgo -O2 k-nucleotide.go	8.30u 0.02s 8.32r
    780 	gccgo -O2 k-nucleotide-parallel.go	8.84u 0.05s 3.02r
    781 	gc k-nucleotide	15.38u 0.07s 15.44r
    782 	gc k-nucleotide-parallel	16.40u 0.03s 5.93r
    783 	gc_B k-nucleotide	15.19u 0.05s 15.23r
    784 NEW:
    785 	gcc -O2 -k-nucleotide.c	4.88u 0.03s 4.92r
    786 	gccgo -O2 k-nucleotide.go	5.94u 0.01s 5.96r
    787 	gccgo -O2 k-nucleotide-parallel.go	6.44u 0.03s 1.47r
    788 	gc k-nucleotide	9.61u 0.01s 9.63r
    789 	gc k-nucleotide-parallel	9.70u 0.00s 3.39r
    790 	gc_B k-nucleotide	9.19u 0.03s 9.23r
    791 
    792 mandelbrot 16000
    793 OLD:
    794 	gcc -O2 mandelbrot.c	54.54u 0.00s 54.56r
    795 	gccgo -O2 mandelbrot.go	59.63u 0.03s 59.67r
    796 	gc mandelbrot	64.82u 0.00s 64.83r
    797 	gc_B mandelbrot	64.84u 0.00s 64.91r
    798 NEW:
    799 	gcc -O2 mandelbrot.c	36.07u 0.01s 36.15r
    800 	gccgo -O2 mandelbrot.go	43.57u 0.00s 43.66r
    801 	gc mandelbrot	60.66u 0.00s 60.79r
    802 	gc_B mandelbrot	60.90u 0.00s 61.03r
    803 
    804 meteor 2098
    805 OLD:
    806 	gcc -O2 meteor-contest.c	0.11u 0.00s 0.10r
    807 	gccgo -O2 meteor-contest.go	0.10u 0.01s 0.10r
    808 	gc meteor-contest	0.18u 0.00s 0.17r
    809 	gc_B meteor-contest	0.17u 0.00s 0.16r
    810 NEW:
    811 	gcc -O2 meteor-contest.c	0.10u 0.00s 0.09r
    812 	gccgo -O2 meteor-contest.go	0.10u 0.00s 0.09r
    813 	gc meteor-contest	0.14u 0.00s 0.14r
    814 	gc_B meteor-contest	0.13u 0.00s 0.13r
    815 
    816 pidigits 10000
    817 OLD:
    818 	gcc -O2 pidigits.c -lgmp	2.22u 0.00s 2.21r
    819 	gccgo -O2 pidigits.go	13.39u 0.00s 13.40r
    820 	gc pidigits	6.42u 0.04s 6.45r
    821 	gc_B pidigits	6.45u 0.02s 6.47r
    822 NEW:
    823 	gcc -O2 pidigits.c -lgmp	2.27u 0.00s 2.29r
    824 	gccgo -O2 pidigits.go	9.21u 0.00s 9.22r
    825 	gc pidigits	3.60u 0.00s 3.60r
    826 	gc_B pidigits	3.56u 0.02s 3.58r
    827 
    828 threadring 50000000
    829 OLD:
    830 	gcc -O2 threadring.c -lpthread	34.51u 267.95s 336.12r
    831 	gccgo -O2 threadring.go	103.51u 588.57s 627.16r
    832 	gc threadring	54.68u 0.00s 54.73r
    833 NEW:
    834 	gcc -O2 threadring.c 32.00u 259.39s 369.74r
    835 	gccgo -O2 threadring.go	133.06u 546.02s 595.33r
    836 	gc threadring	16.75u 0.02s 16.80r
    837 
    838 chameneos 6000000
    839 OLD:
    840 	gcc -O2 chameneosredux.c -lpthread	12.65u 31.02s 13.33r
    841 	gccgo -O2 chameneosredux.go	47.04u 302.84s 252.29r
    842 	gc chameneosredux	14.14u 0.00s 14.14r
    843 NEW:
    844 	gcc -O2 chameneosredux.c -lpthread	8.05u 63.43s 11.16r
    845 	gccgo -O2 chameneosredux.go	82.95u 304.37s 207.64r
    846 	gc chameneosredux	9.42u 0.00s 9.43r
    847 
    848 # May 13, 2011
    849 # after gc update to inline append when possible - 35% faster
    850 
    851 regex-dna 100000
    852 	gc regex-dna	3.94u 0.00s 3.95r
    853 	gc regex-dna-parallel	4.15u 0.01s 1.63r
    854 	gc_B regex-dna	4.01u 0.01s 4.02r
    855 
    856 # Aug 4, 2011
    857 # After various updates to locking code and some runtime changes.
    858 # Slowdowns believed due to slower (but more correct) memmove.
    859 
    860 fannkuch 12
    861 	gccgo -O2 fannkuch.go	51.59u 0.00s 51.69r # -4%
    862 	gccgo -O2 fannkuch-parallel.go	253.17u 0.00s 64.67r # -11%
    863 	gc fannkuch	103.14u 0.00s 103.36r # -5%
    864 	gc fannkuch-parallel	189.63u 0.00s 49.37r # +9%
    865 	gc_B fannkuch	49.19u 0.00s 49.29r # -14%
    866 
    867 regex-dna 100000
    868 	gc regex-dna	3.78u 0.00s 3.78r # -43%
    869 	gc regex-dna-parallel	3.84u 0.02s 1.48r # -49%
    870 	gc_B regex-dna	3.62u 0.00s 3.63r # -52%
    871 
    872 k-nucleotide 1000000
    873 	gc k-nucleotide	12.23u 0.02s 12.27r # +27%
    874 	gc k-nucleotide-parallel	12.76u 0.02s 4.37r # +29%
    875 	gc_B k-nucleotide	12.18u 0.01s 12.21r # +33%
    876 
    877 threadring 50000000
    878 	gc threadring	17.49u 0.00s 17.53r # +4%
    879 
    880 chameneos 6000000
    881 	gc chameneosredux	7.61u 0.00s 7.63r # -24%
    882 
    883 Aug 9, 2011
    884 # After custom algorithms for 1- 2- 4- 8-byte scalars.
    885 
    886 fannkuch 12
    887 	gc fannkuch-parallel	157.17u 0.00s 41.08r # -17%
    888 
    889 k-nucleotide 1000000
    890 	gc k-nucleotide	8.72u 0.03s 8.76r # -39%
    891 	gc k-nucleotide-parallel	8.79u 0.01s 3.14r # -39%
    892 	gc_B k-nucleotide	8.65u 0.03s 8.69r # -39%
    893 
    894 pidigits 10000
    895 	gc pidigits	3.71u 0.02s 3.73r # +4%
    896 	gc_B pidigits	3.73u 0.00s 3.73r # +4%
    897 
    898 threadring 50000000
    899 	gc threadring	14.51u 0.00s 14.54r # -17%
    900 
    901 chameneos 6000000
    902 	gc chameneosredux	7.41u 0.00s 7.42r # -3%
    903 
    904 # A complete run at the Go 1 release.
    905 # Significant changes:
    906 # - gccgo is now enabled for all tests (goroutines are cheap enough)
    907 # - threadring and chameneos are 14% faster, probably due to runtime changes
    908 # - regex-dna 36% faster
    909 # - fannkuch-parallel (only) slowed down 40%
    910 # - gccgo on binary-tree-freelist is still optimized to nothing
    911 # Other changes are modest.
    912 
    913 fasta -n 25000000
    914 	gcc -O2 fasta.c	1.45u 0.02s 1.48r
    915 	gccgo -O2 fasta.go	1.46u 0.00s 1.47r
    916 	gc fasta	1.99u 0.01s 2.00r
    917 	gc_B fasta	1.99u 0.01s 2.01r
    918 
    919 reverse-complement < output-of-fasta-25000000
    920 	gcc -O2 reverse-complement.c	0.95u 0.48s 4.99r
    921 	gccgo -O2 reverse-complement.go	0.93u 0.16s 1.09r
    922 	gc reverse-complement	1.20u 0.19s 1.39r
    923 	gc_B reverse-complement	1.04u 0.16s 1.20r
    924 
    925 nbody -n 50000000
    926 	gcc -O2 -lm nbody.c	13.02u 0.00s 13.05r
    927 	gccgo -O2 nbody.go	14.46u 0.00s 14.49r
    928 	gc nbody	21.79u 0.00s 21.84r
    929 	gc_B nbody	21.74u 0.00s 21.79r
    930 
    931 binary-tree 15 # too slow to use 20
    932 	gcc -O2 binary-tree.c -lm	0.60u 0.01s 0.61r
    933 	gccgo -O2 binary-tree.go	1.30u 0.01s 1.32r
    934 	gccgo -O2 binary-tree-freelist.go	0.00u 0.00s 0.00r
    935 	gc binary-tree	1.84u 0.01s 1.86r
    936 	gc binary-tree-freelist	0.33u 0.00s 0.33r
    937 
    938 fannkuch 12
    939 	gcc -O2 fannkuch.c	45.24u 0.00s 45.34r
    940 	gccgo -O2 fannkuch.go	59.76u 0.01s 59.90r
    941 	gccgo -O2 fannkuch-parallel.go	218.20u 0.01s 61.60r
    942 	gc fannkuch	103.92u 0.00s 104.16r
    943 	gc fannkuch-parallel	221.61u 0.00s 60.49r
    944 	gc_B fannkuch	53.17u 0.00s 53.30r
    945 
    946 regex-dna 100000
    947 	gcc -O2 regex-dna.c -lpcre	0.47u 0.00s 0.48r
    948 	gccgo -O2 regex-dna.go	6.52u 0.00s 6.54r
    949 	gccgo -O2 regex-dna-parallel.go	14.40u 0.73s 4.35r
    950 	gc regex-dna	2.63u 0.02s 2.66r # -36%
    951 	gc regex-dna-parallel	2.87u 0.01s 1.11r
    952 	gc_B regex-dna	2.65u 0.00s 2.66r
    953 
    954 spectral-norm 5500
    955 	gcc -O2 spectral-norm.c -lm	15.78u 0.00s 15.82r
    956 	gccgo -O2 spectral-norm.go	15.79u 0.00s 15.83r
    957 	gc spectral-norm	19.76u 0.00s 19.80r
    958 	gc_B spectral-norm	19.73u 0.01s 19.78r
    959 
    960 k-nucleotide 1000000
    961 	gcc -O2  k-nucleotide.c	5.59u 0.03s 5.63r
    962 	gccgo -O2 k-nucleotide.go	4.09u 0.03s 4.13r
    963 	gccgo -O2 k-nucleotide-parallel.go	4.50u 0.06s 1.63r
    964 	gc k-nucleotide	9.23u 0.02s 9.27r
    965 	gc k-nucleotide-parallel	9.87u 0.03s 3.55r
    966 	gc_B k-nucleotide	9.20u 0.00s 9.22r
    967 
    968 mandelbrot 16000
    969 	gcc -O2 mandelbrot.c	36.09u 0.00s 36.18r
    970 	gccgo -O2 mandelbrot.go	41.69u 0.01s 41.80r
    971 	gc mandelbrot	60.91u 0.02s 61.07r
    972 	gc_B mandelbrot	60.90u 0.00s 61.04r
    973 
    974 meteor 2098
    975 	gcc -O2 meteor-contest.c	0.09u 0.00s 0.09r
    976 	gccgo -O2 meteor-contest.go	0.09u 0.00s 0.09r
    977 	gc meteor-contest	0.14u 0.00s 0.15r
    978 	gc_B meteor-contest	0.14u 0.00s 0.14r
    979 
    980 pidigits 10000
    981 	gcc -O2 pidigits.c -lgmp	2.27u 0.00s 2.27r
    982 	gccgo -O2 pidigits.go	8.65u 0.00s 8.67r
    983 	gc pidigits	3.70u 0.04s 3.75r
    984 	gc_B pidigits	3.72u 0.02s 3.75r
    985 
    986 threadring 50000000
    987 	gcc -O2 threadring.c	40.91u 369.85s 323.31r
    988 	gccgo -O2 threadring.go	26.97u 30.82s 57.93r
    989 	gc threadring	12.81u 0.01s 12.85r # -13%
    990 
    991 chameneos 6000000
    992 	gcc -O2 chameneosredux.c -lpthread	9.44u 72.90s 12.65r
    993 	gccgo -O2 chameneosredux.go	7.73u 7.53s 15.30r
    994 	gc chameneosredux	6.51u 0.00s 6.53r # - 14%
    995 
    996 # After http://codereview.appspot.com/6248049, moving panicindex
    997 # calls out of line (putting the likely code into a single path and shortening
    998 # loops). Significant changes since the last run (note: some are slower for
    999 # unrelated and as yet undiagnosed reasons):
   1000 
   1001 nbody -n 50000000
   1002 	gc nbody	19.10u 0.01s 19.19r # -12%
   1003 	gc_B nbody	19.19u 0.00s 19.23r # -12%
   1004 
   1005 binary-tree 15 # too slow to use 20
   1006 	gc binary-tree	1.49u 0.01s 1.51r # -19%
   1007 	
   1008 fannkuch 12
   1009 	gc fannkuch	60.79u 0.00s 60.92r # -41%
   1010 	gc fannkuch-parallel	183.51u 0.01s 51.75r # -14%
   1011 	gc_B fannkuch	51.68u 0.00s 51.79r # -3%
   1012 
   1013 k-nucleotide 1000000
   1014 	gc k-nucleotide	9.74u 0.04s 9.80r # +6%
   1015 	gc k-nucleotide-parallel	9.89u 0.05s 3.59r # +1%
   1016 	gc_B k-nucleotide	9.39u 0.02s 9.43r # +2%
   1017 
   1018 mandelbrot (much slower, due to unrelated http://codereview.appspot.com/6209077)
   1019 	gc mandelbrot	100.98u 0.00s 101.20r # +65%
   1020 	gc_B mandelbrot	100.90u 0.01s 101.17r # +65%
   1021 
   1022 meteor 2098
   1023 	gc meteor-contest	0.13u 0.00s 0.13r # -13%
   1024 	gc_B meteor-contest	0.13u 0.00s 0.13r # -7%
   1025 
   1026 # May 30, 2012.
   1027 # After http://codereview.appspot.com/6261051, restoring old code generated
   1028 # for floating-point constants. Mandelbrot is back to its previous numbers.
   1029 
   1030 mandelbrot 16000
   1031 	gcc -O2 mandelbrot.c	36.07u 0.00s 36.16r
   1032 	gccgo -O2 mandelbrot.go	41.72u 0.01s 41.90r
   1033 	gc mandelbrot	60.62u 0.00s 60.76r
   1034 	gc_B mandelbrot	60.68u 0.00s 60.82r
   1035 
   1036 # May 30, 2012.
   1037 # After http://codereview.appspot.com/6248068, better FP code
   1038 # by avoiding MOVSD between registers.
   1039 # Plus some other timing changes that have crept in from other speedups,
   1040 # from garbage collection to Printf.
   1041 
   1042 fasta -n 25000000
   1043 	gc fasta	1.76u 0.00s 1.76r # -12%
   1044 	gc_B fasta	1.71u 0.00s 1.72r # -12%
   1045 
   1046 nbody -n 50000000
   1047 	gc nbody	17.56u 0.00s 17.60r # -8%
   1048 	gc_B nbody	17.30u 0.00s 17.34r # -10%
   1049 
   1050 fannkuch 12
   1051 	gc fannkuch-parallel	155.92u 0.01s 44.05r # -15%
   1052 
   1053 k-nucleotide 1000000
   1054 	gc k-nucleotide	9.22u 0.01s 9.26r # -5%
   1055 	gc k-nucleotide-parallel	9.23u 0.03s 3.26r # -9%
   1056 	gc_B k-nucleotide	9.22u 0.03s 9.28r # -2%
   1057 
   1058 mandelbrot 16000
   1059 	gc mandelbrot	44.80u 0.00s 44.90r # -27%
   1060 	gc_B mandelbrot	44.81u 0.00s 44.92r # -26%
   1061 
   1062 pidigits 10000
   1063 	gc pidigits	3.51u 0.00s 3.52r # -6%
   1064 	gc_B pidigits	3.51u 0.00s 3.52r # -6%
   1065 
   1066 # Aug 28, 2012
   1067 # After some assembler work in package big.
   1068 pidigits 10000
   1069 	gc pidigits	2.85u 0.02s 2.88r # -22%
   1070 	gc_B pidigits	2.88u 0.01s 2.90r # -21%
   1071 
   1072 # Sep 26, 2012
   1073 # 64-bit ints, plus significantly better floating-point code.
   1074 # Interesting details:
   1075 # 	Generally something in the 0-10% slower range, some (binary tree) more
   1076 #	Floating-point noticeably faster:
   1077 #		nbody -25%
   1078 #		mandelbrot -37% relative to Go 1.
   1079 #	Other:
   1080 #		regex-dna +47%
   1081 fasta -n 25000000
   1082 	gcc -O2 fasta.c	1.43u 0.03s 1.46r
   1083 	gccgo -O2 fasta.go	1.47u 0.00s 1.47r
   1084 	gc fasta	1.78u 0.01s 1.80r
   1085 	gc_B fasta	1.76u 0.00s 1.76r
   1086 
   1087 reverse-complement < output-of-fasta-25000000
   1088 	gcc -O2 reverse-complement.c	1.14u 0.39s 11.19r
   1089 	gccgo -O2 reverse-complement.go	0.91u 0.17s 1.09r
   1090 	gc reverse-complement	1.12u 0.18s 1.31r
   1091 	gc_B reverse-complement	1.12u 0.15s 1.28r
   1092 
   1093 nbody -n 50000000
   1094 	gcc -O2 nbody.c -lm	13.02u 0.00s 13.05r
   1095 	gccgo -O2 nbody.go	13.90u 0.00s 13.93r
   1096 	gc nbody	17.05u 0.00s 17.09r
   1097 	gc_B nbody	16.30u 0.00s 16.34r
   1098 
   1099 binary-tree 15 # too slow to use 20
   1100 	gcc -O2 binary-tree.c -lm	0.61u 0.00s 0.61r
   1101 	gccgo -O2 binary-tree.go	1.24u 0.04s 1.29r
   1102 	gccgo -O2 binary-tree-freelist.go	0.21u 0.01s 0.22r
   1103 	gc binary-tree	1.93u 0.02s 1.96r
   1104 	gc binary-tree-freelist	0.32u 0.00s 0.33r
   1105 
   1106 fannkuch 12
   1107 	gcc -O2 fannkuch.c	45.19u 0.00s 45.29r
   1108 	gccgo -O2 fannkuch.go	60.32u 0.00s 60.45r
   1109 	gccgo -O2 fannkuch-parallel.go	185.59u 0.00s 59.49r
   1110 	gc fannkuch	72.14u 0.00s 72.30r
   1111 	gc fannkuch-parallel	172.54u 0.00s 43.59r
   1112 	gc_B fannkuch	53.55u 0.00s 53.67r
   1113 
   1114 regex-dna 100000
   1115 	gcc -O2 regex-dna.c -lpcre	0.47u 0.00s 0.47r
   1116 	gccgo -O2 regex-dna.go	6.49u 0.05s 6.56r
   1117 	gccgo -O2 regex-dna-parallel.go	14.60u 0.67s 4.42r
   1118 	gc regex-dna	3.91u 0.00s 3.92r
   1119 	gc regex-dna-parallel	4.01u 0.03s 1.56r
   1120 	gc_B regex-dna	3.91u 0.00s 3.92r
   1121 
   1122 spectral-norm 5500
   1123 	gcc -O2 spectral-norm.c -lm	15.85u 0.00s 15.89r
   1124 	gccgo -O2 spectral-norm.go	15.86u 0.00s 15.89r
   1125 	gc spectral-norm	19.72u 0.00s 19.76r
   1126 	gc_B spectral-norm	19.68u 0.01s 19.74r
   1127 
   1128 k-nucleotide 1000000
   1129 	gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include -lglib-2.0 	4.90u 0.01s 4.93r
   1130 	gccgo -O2 k-nucleotide.go	4.78u 0.01s 4.80r
   1131 	gccgo -O2 k-nucleotide-parallel.go	6.49u 0.02s 2.18r
   1132 	gc k-nucleotide	9.05u 0.02s 9.09r
   1133 	gc k-nucleotide-parallel	9.27u 0.01s 3.29r
   1134 	gc_B k-nucleotide	8.95u 0.03s 9.00r
   1135 
   1136 mandelbrot 16000
   1137 	gcc -O2 mandelbrot.c	36.11u 0.00s 36.19r
   1138 	gccgo -O2 mandelbrot.go	43.67u 0.00s 43.77r
   1139 	gc mandelbrot	38.57u 0.00s 38.66r
   1140 	gc_B mandelbrot	38.59u 0.00s 38.68r
   1141 
   1142 meteor 2098
   1143 	gcc -O2 meteor-contest.c	0.09u 0.00s 0.09r
   1144 	gccgo -O2 meteor-contest.go	0.09u 0.00s 0.09r
   1145 	gc meteor-contest	0.13u 0.00s 0.14r
   1146 	gc_B meteor-contest	0.12u 0.00s 0.13r
   1147 
   1148 pidigits 10000
   1149 	gcc -O2 pidigits.c -lgmp	2.26u 0.00s 2.27r
   1150 	gccgo -O2 pidigits.go	9.05u 0.00s 9.07r
   1151 	gc pidigits	2.88u 0.02s 2.90r
   1152 	gc_B pidigits	2.89u 0.00s 2.90r
   1153 
   1154 threadring 50000000
   1155 	gcc -O2 threadring.c -lpthread	37.30u 327.81s 289.28r
   1156 	gccgo -O2 threadring.go	42.83u 26.15s 69.14r
   1157 	gc threadring	13.00u 0.00s 13.03r
   1158 
   1159 chameneos 6000000
   1160 	gcc -O2 chameneosredux.c -lpthread	8.80u 71.67s 12.19r
   1161 	gccgo -O2 chameneosredux.go	11.28u 6.68s 18.00r
   1162 	gc chameneosredux	6.94u 0.00s 6.96r
   1163 
   1164 # May 23, 2013
   1165 # Go 1.1, which includes precise GC, new scheduler, faster maps.
   1166 # 20%-ish speedups across many benchmarks.
   1167 # gccgo showing significant improvement (even though it's not yet up to Go 1.1)
   1168 #
   1169 # Standouts:
   1170 #	fannkuch, regex-dna, k-nucleotide, threadring, chameneos
   1171 
   1172 fasta -n 25000000
   1173 	gcc -m64 -O2 fasta.c	1.54u 0.01s 1.55r
   1174 	gccgo -O2 fasta.go	1.42u 0.00s 1.43r
   1175 	gc fasta	1.50u 0.01s 1.52r # -16%
   1176 	gc_B fasta	1.46u 0.00s 1.46r # -17%
   1177 
   1178 reverse-complement < output-of-fasta-25000000
   1179 	gcc -m64 -O2 reverse-complement.c	0.87u 0.37s 4.36r
   1180 	gccgo -O2 reverse-complement.go	0.77u 0.15s 0.93r # -15%
   1181 	gc reverse-complement	0.99u 0.12s 1.12r # -15%
   1182 	gc_B reverse-complement	0.85u 0.17s 1.02r # -21%
   1183 
   1184 nbody -n 50000000
   1185 	gcc -m64 -O2 nbody.c -lm	13.50u 0.00s 13.53r
   1186 	gccgo -O2 nbody.go	13.98u 0.01s 14.02r
   1187 	gc nbody	16.63u 0.01s 16.67r
   1188 	gc_B nbody	15.74u 0.00s 15.76r
   1189 
   1190 binary-tree 15 # too slow to use 20
   1191 	gcc -m64 -O2 binary-tree.c -lm	0.61u 0.00s 0.61r
   1192 	gccgo -O2 binary-tree.go	1.11u 0.01s 1.12r # -13%
   1193 	gccgo -O2 binary-tree-freelist.go	0.22u 0.01s 0.23r
   1194 	gc binary-tree	1.83u 0.02s 1.83r # -7%
   1195 	gc binary-tree-freelist	0.32u 0.00s 0.32r
   1196 
   1197 fannkuch 12
   1198 	gcc -m64 -O2 fannkuch.c	45.56u 0.00s 45.67r
   1199 	gccgo -O2 fannkuch.go	57.71u 0.00s 57.85r # -4%
   1200 	gccgo -O2 fannkuch-parallel.go	146.31u 0.00s 37.50r #-37%
   1201 	gc fannkuch	70.06u 0.03s 70.17r # -3%
   1202 	gc fannkuch-parallel	131.88u 0.06s 33.59r # -23%
   1203 	gc_B fannkuch	45.55u 0.02s 45.63r # -15%
   1204 
   1205 regex-dna 100000
   1206 	gcc -m64 -O2 regex-dna.c -lpcre	0.44u 0.01s 0.45r
   1207 	gccgo -O2 regex-dna.go	5.59u 0.00s 5.61r # -14%
   1208 	gccgo -O2 regex-dna-parallel.go	10.85u 0.30s 3.34r # -24%
   1209 	gc regex-dna	2.23u 0.01s 2.25r # -43%
   1210 	gc regex-dna-parallel	2.35u 0.00s 0.93r # -40%
   1211 	gc_B regex-dna	2.24u 0.01s 2.25r # -43%
   1212 
   1213 spectral-norm 5500
   1214 	gcc -m64 -O2 spectral-norm.c -lm	14.84u 0.00s 14.88r
   1215 	gccgo -O2 spectral-norm.go	15.33u 0.00s 15.37r
   1216 	gc spectral-norm	16.75u 0.02s 16.79r # -15%
   1217 	gc_B spectral-norm	16.77u 0.01s 16.79r # -15%
   1218 
   1219 k-nucleotide 1000000
   1220 	gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 	4.50u 0.00s 4.52r
   1221 	gccgo -O2 k-nucleotide.go	3.72u 0.04s 3.77r # -21%
   1222 	gccgo -O2 k-nucleotide-parallel.go	3.88u 0.03s 1.42r # -35%
   1223 	gc k-nucleotide	6.32u 0.01s 6.33r # -31%
   1224 	gc k-nucleotide-parallel	6.47u 0.05s 2.13r # -33%
   1225 	gc_B k-nucleotide	6.45u 0.01s 6.47r # - 28%
   1226 
   1227 mandelbrot 16000
   1228 	gcc -m64 -O2 mandelbrot.c	36.03u 0.00s 36.11r
   1229 	gccgo -O2 mandelbrot.go	37.61u 0.00s 37.74r # -14%
   1230 	gc mandelbrot	38.19u 0.05s 38.29r
   1231 	gc_B mandelbrot	38.19u 0.03s 38.26r
   1232 
   1233 meteor 2098
   1234 	gcc -m64 -O2 meteor-contest.c	0.08u 0.00s 0.08r
   1235 	gccgo -O2 meteor-contest.go	0.09u 0.01s 0.10r
   1236 	gc meteor-contest	0.12u 0.00s 0.12r # -15% although perhaps just noise
   1237 	gc_B meteor-contest	0.11u 0.00s 0.12r # -8% although perhaps just noise
   1238 
   1239 pidigits 10000
   1240 	gcc -m64 -O2 pidigits.c -lgmp	2.27u 0.00s 2.28r
   1241 	gccgo -O2 pidigits.go	8.95u 0.02s 8.99r
   1242 	gc pidigits	2.88u 0.14s 2.91r
   1243 	gc_B pidigits	2.92u 0.10s 2.91r
   1244 
   1245 threadring 50000000
   1246 	gcc -m64 -O2 threadring.c -lpthread	14.75u 167.88s 212.23r
   1247 	gccgo -O2 threadring.go	36.72u 12.08s 48.91r # -29%
   1248 	gc threadring	10.93u 0.01s 10.95r # -16%
   1249 
   1250 chameneos 6000000
   1251 	gcc -m64 -O2 chameneosredux.c -lpthread	8.89u 56.62s 9.75r
   1252 	gccgo -O2 chameneosredux.go	9.48u 2.48s 11.99r # -33%
   1253 	gc chameneosredux	5.80u 0.00s 5.81r # -16%
   1254 
   1255