1 All tests on r45 or r70 2 3 Aug 3 2009 4 5 First version of fasta. Translation of fasta.c, fetched from 6 http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4 7 8 fasta -n 25000000 9 gcc -O2 fasta.c 5.98u 0.00s 6.01r 10 gccgo -O2 fasta.go 8.82u 0.02s 8.85r 11 6g fasta.go 13.50u 0.02s 13.53r 12 6g -B fata.go 12.99u 0.02s 13.02r 13 14 Aug 4 2009 15 [added timing.sh] 16 17 # myrandom: 18 # hand-written optimization of integer division 19 # use int32->float conversion 20 fasta -n 25000000 21 # probably I/O library inefficiencies 22 gcc -O2 fasta.c 5.99u 0.00s 6.00r 23 gccgo -O2 fasta.go 8.82u 0.02s 8.85r 24 gc fasta 10.70u 0.00s 10.77r 25 gc_B fasta 10.09u 0.03s 10.12r 26 27 reverse-complement < output-of-fasta-25000000 28 # we don't know - memory cache behavior? 29 gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r 30 gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r 31 gc reverse-complement 6.55u 0.70s 7.26r 32 gc_B reverse-complement 6.32u 0.70s 7.10r 33 34 nbody 50000000 35 # math.Sqrt needs to be in assembly; inlining is probably the other 50% 36 gcc -O2 nbody.c 21.61u 0.01s 24.80r 37 gccgo -O2 nbody.go 118.55u 0.02s 120.32r 38 gc nbody 100.84u 0.00s 100.85r 39 gc_B nbody 103.33u 0.00s 103.39r 40 [ 41 hacked Sqrt in assembler 42 gc nbody 31.97u 0.00s 32.01r 43 ] 44 45 binary-tree 15 # too slow to use 20 46 # memory allocation and garbage collection 47 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r 48 gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r 49 gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r 50 gc binary-tree 9.60u 0.01s 9.62r 51 gc binary-tree-freelist 0.48u 0.01s 0.50r 52 53 August 5, 2009 54 55 fannkuch 12 56 # bounds checking is half the difference 57 # rest might be registerization 58 gcc -O2 fannkuch.c 60.09u 0.01s 60.32r 59 gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r 60 gc fannkuch 124.59u 0.00s 124.67r 61 gc_B fannkuch 91.14u 0.00s 91.16r 62 63 regex-dna 100000 64 # regexp code is slow on trivial regexp 65 gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r 66 gc regexp-dna 26.94u 0.18s 28.75r 67 gc_B regexp-dna 26.51u 0.09s 26.75r 68 69 spectral-norm 5500 70 gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55r 71 gccgo -O2 spectral-norm.go 12.20u 0.00s 12.23r 72 gc spectral-norm 50.23u 0.00s 50.36r 73 gc_B spectral-norm 49.69u 0.01s 49.83r 74 gc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2 75 [using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r] 76 77 August 6, 2009 78 79 k-nucleotide 5000000 80 # string maps are slower than glib string maps 81 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r 82 gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r 83 gc k-nucleotide 16.08u 0.06s 16.50r 84 gc_B k-nucleotide 17.32u 0.02s 17.37r 85 86 mandelbrot 5500 87 # floating point code generator should use more registers 88 gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r 89 gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r 90 gc mandelbrot 74.32u 0.00s 74.35r 91 gc_B mandelbrot 74.28u 0.01s 74.31r 92 93 meteor 2100 94 # we don't know 95 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r 96 gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r 97 gc meteor-contest 0.24u 0.00s 0.26r 98 gc_B meteor-contest 0.23u 0.00s 0.24r 99 100 pidigits 10000 101 # bignum is slower than gmp 102 gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r 103 gc pidigits 77.69u 0.14s 78.18r 104 gc_B pidigits 74.26u 0.18s 75.41r 105 gc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignum 106 107 August 7 2009 108 109 # New gc does better division by powers of 2. Significant improvements: 110 111 spectral-norm 5500 112 # floating point code generator should use more registers; possibly inline evalA 113 gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r 114 gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r 115 gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster 116 gc_B spectral-norm 23.71u 0.01s 23.72r # ditto 117 gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle) 118 119 k-nucleotide 1000000 120 # string maps are slower than glib string maps 121 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r 122 gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r 123 gc k-nucleotide 15.97u 0.03s 16.04r 124 gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version 125 126 pidigits 10000 127 # bignum is slower than gmp 128 gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r 129 gc pidigits 71.24u 0.04s 71.28r # 8.5% faster 130 gc_B pidigits 71.25u 0.03s 71.29r # 4% faster 131 132 threadring 50000000 133 gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50r 134 gccgo -O2 threadring.go 90.33u 459.95s 448.03r 135 gc threadring 33.11u 0.00s 33.14r 136 GOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r 137 # change wait code to do <-make(chan int) instead of time.Sleep 138 gc threadring 28.41u 0.01s 29.35r 139 GOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72r 140 141 chameneos 6000000 142 gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93r 143 gc chameneosredux 20.19u 0.01s 20.23r 144 145 Aug 10 2009 146 147 # new 6g with better fp registers, fast div and mod of integers 148 # complete set of timings listed. significant changes marked *** 149 150 fasta -n 25000000 151 # probably I/O library inefficiencies 152 gcc -O2 fasta.c 5.96u 0.00s 5.97r 153 gc fasta 10.59u 0.01s 10.61r 154 gc_B fasta 9.92u 0.02s 9.95r 155 156 reverse-complement < output-of-fasta-25000000 157 # we don't know - memory cache behavior? 158 gcc -O2 reverse-complement.c 1.96u 1.56s 16.23r 159 gccgo -O2 reverse-complement.go 6.41u 0.62s 7.05r 160 gc reverse-complement 6.46u 0.70s 7.17r 161 gc_B reverse-complement 6.22u 0.72s 6.95r 162 163 nbody 50000000 164 # math.Sqrt needs to be in assembly; inlining is probably the other 50% 165 gcc -O2 nbody.c 21.26u 0.01s 21.28r 166 gccgo -O2 nbody.go 116.68u 0.07s 116.80r 167 gc nbody 86.64u 0.01s 86.68r # -14% 168 gc_B nbody 85.72u 0.02s 85.77r # *** -17% 169 170 binary-tree 15 # too slow to use 20 171 # memory allocation and garbage collection 172 gcc -O2 binary-tree.c -lm 0.87u 0.00s 0.87r 173 gccgo -O2 binary-tree.go 1.61u 0.47s 2.09r 174 gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.01r 175 gc binary-tree 9.11u 0.01s 9.13r # *** -5% 176 gc binary-tree-freelist 0.47u 0.01s 0.48r 177 178 fannkuch 12 179 # bounds checking is half the difference 180 # rest might be registerization 181 gcc -O2 fannkuch.c 59.92u 0.00s 59.94r 182 gccgo -O2 fannkuch.go 65.54u 0.00s 65.58r 183 gc fannkuch 123.98u 0.01s 124.04r 184 gc_B fannkuch 90.75u 0.00s 90.78r 185 186 regex-dna 100000 187 # regexp code is slow on trivial regexp 188 gcc -O2 regex-dna.c -lpcre 0.91u 0.00s 0.92r 189 gc regex-dna 27.25u 0.02s 27.28r 190 gc_B regex-dna 29.51u 0.03s 29.55r 191 192 spectral-norm 5500 193 # possibly inline evalA 194 gcc -O2 spectral-norm.c -lm 11.57u 0.00s 11.57r 195 gccgo -O2 spectral-norm.go 12.07u 0.01s 12.08r 196 gc spectral-norm 23.99u 0.00s 24.00r 197 gc_B spectral-norm 23.73u 0.00s 23.75r 198 199 k-nucleotide 1000000 200 # string maps are slower than glib string maps 201 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.63u 0.02s 10.69r 202 gccgo -O2 k-nucleotide.go 23.19u 0.91s 24.12r 203 gc k-nucleotide 16.73u 0.04s 16.78r # *** +5% (but this one seems to vary by more than that) 204 gc_B k-nucleotide 16.46u 0.04s 16.51r # *** +5% 205 206 mandelbrot 16000 207 gcc -O2 mandelbrot.c 56.16u 0.00s 56.16r 208 gccgo -O2 mandelbrot.go 57.41u 0.01s 57.42r 209 gc mandelbrot 64.05u 0.02s 64.08r # *** -14% 210 gc_B mandelbrot 64.10u 0.02s 64.14r # *** -14% 211 212 meteor 2100 213 # we don't know 214 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r 215 gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r 216 gc meteor-contest 0.18u 0.00s 0.20r # *** -25% 217 gc_B meteor-contest 0.17u 0.00s 0.18r # *** -24% 218 219 pidigits 10000 220 # bignum is slower than gmp 221 gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.57r 222 gc pidigits 71.82u 0.04s 71.89r 223 gc_B pidigits 71.84u 0.08s 71.98r 224 225 threadring 50000000 226 gcc -O2 threadring.c -lpthread 30.91u 164.33s 204.57r 227 gccgo -O2 threadring.go 87.12u 460.04s 447.61r 228 gc threadring 38.55u 0.00s 38.56r # *** +16% 229 230 chameneos 6000000 231 gcc -O2 chameneosredux.c -lpthread 17.93u 323.65s 88.47r 232 gc chameneosredux 21.72u 0.00s 21.73r 233 234 August 10 2009 235 236 # In-place versions for some bignum operations. 237 pidigits 10000 238 gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r 239 gc pidigits 55.22u 0.04s 55.29r # *** -23% 240 gc_B pidigits 55.49u 0.02s 55.60r # *** -23% 241 242 September 3 2009 243 244 # New 6g inlines slices, has a few other tweaks. 245 # Complete rerun. Significant changes marked. 246 247 fasta -n 25000000 248 # probably I/O library inefficiencies 249 gcc -O2 fasta.c 5.96u 0.00s 5.96r 250 gc fasta 10.63u 0.02s 10.66r 251 gc_B fasta 9.92u 0.01s 9.94r 252 253 reverse-complement < output-of-fasta-25000000 254 # we don't know - memory cache behavior? 255 gcc -O2 reverse-complement.c 1.92u 0.33s 2.93r 256 gccgo -O2 reverse-complement.go 6.76u 0.72s 7.58r # +5% 257 gc reverse-complement 6.59u 0.70s 7.29r # +2% 258 gc_B reverse-complement 5.57u 0.80s 6.37r # -10% 259 260 nbody 50000000 261 # math.Sqrt needs to be in assembly; inlining is probably the other 50% 262 # also loop alignment appears to be critical 263 gcc -O2 nbody.c 21.28u 0.00s 21.28r 264 gccgo -O2 nbody.go 119.21u 0.00s 119.22r # +2% 265 gc nbody 109.72u 0.00s 109.78r # + 28% ***** 266 gc_B nbody 85.90u 0.00s 85.91r 267 268 binary-tree 15 # too slow to use 20 269 # memory allocation and garbage collection 270 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r 271 gccgo -O2 binary-tree.go 1.88u 0.54s 2.42r # +17% 272 gccgo -O2 binary-tree-freelist.go 0.01u 0.01s 0.02r 273 gc binary-tree 8.94u 0.01s 8.96r # -2% 274 gc binary-tree-freelist 0.47u 0.01s 0.48r 275 276 fannkuch 12 277 # bounds checking is half the difference 278 # rest might be registerization 279 gcc -O2 fannkuch.c 60.12u 0.00s 60.12r 280 gccgo -O2 fannkuch.go 92.62u 0.00s 92.66r # +41% *** 281 gc fannkuch 123.90u 0.00s 123.92r 282 gc_B fannkuch 89.71u 0.00s 89.74r # -1% 283 284 regex-dna 100000 285 # regexp code is slow on trivial regexp 286 gcc -O2 regex-dna.c -lpcre 0.88u 0.00s 0.88r 287 gc regex-dna 25.77u 0.01s 25.79r # -5% 288 gc_B regex-dna 26.05u 0.02s 26.09r # -12% *** 289 290 spectral-norm 5500 291 # possibly inline evalA 292 gcc -O2 spectral-norm.c -lm 11.51u 0.00s 11.51r 293 gccgo -O2 spectral-norm.go 11.95u 0.00s 11.96r 294 gc spectral-norm 24.23u 0.00s 24.23r 295 gc_B spectral-norm 23.83u 0.00s 23.84r 296 297 k-nucleotide 1000000 298 # string maps are slower than glib string maps 299 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.68u 0.04s 10.72r 300 gccgo -O2 k-nucleotide.go 23.03u 0.88s 23.92r 301 gc k-nucleotide 15.79u 0.05s 15.85r # -5% (but this one seems to vary by more than that) 302 gc_B k-nucleotide 17.88u 0.05s 17.95r # +8% (ditto) 303 304 mandelbrot 16000 305 gcc -O2 mandelbrot.c 56.17u 0.02s 56.20r 306 gccgo -O2 mandelbrot.go 56.74u 0.02s 56.79r # -1% 307 gc mandelbrot 63.31u 0.01s 63.35r # -1% 308 gc_B mandelbrot 63.29u 0.00s 63.31r # -1% 309 310 meteor 2100 311 # we don't know 312 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r 313 gccgo -O2 meteor-contest.go 0.11u 0.00s 0.12r 314 gc meteor-contest 0.18u 0.00s 0.19r 315 gc_B meteor-contest 0.17u 0.00s 0.18r 316 317 pidigits 10000 318 # bignum is slower than gmp 319 gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r 320 gc pidigits 55.87u 0.03s 55.91r 321 gc_B pidigits 55.93u 0.03s 55.99r 322 323 # these tests are compared using real time, since they run multiple processors 324 # accuracy probably low 325 threadring 50000000 326 gcc -O2 threadring.c -lpthread 26.31u 164.69s 199.92r # -2% 327 gccgo -O2 threadring.go 87.90u 487.26s 472.81r # +6% 328 gc threadring 28.89u 0.00s 28.90r # -25% *** 329 330 chameneos 6000000 331 gcc -O2 chameneosredux.c -lpthread 16.41u 296.91s 81.17r # -8% 332 gc chameneosredux 19.97u 0.00s 19.97r # -8% 333 334 Sep 22, 2009 335 336 # 6g inlines sliceslice in most cases. 337 338 fasta -n 25000000 339 # probably I/O library inefficiencies 340 gc fasta 10.24u 0.00s 10.25r # -4% 341 gc_B fasta 9.68u 0.01s 9.69r # -3% 342 343 reverse-complement < output-of-fasta-25000000 344 # we don't know - memory cache behavior? 345 gc reverse-complement 6.67u 0.69s 7.37r # +1% 346 gc_B reverse-complement 6.00u 0.64s 6.65r # +7% 347 348 nbody -n 50000000 349 # math.Sqrt needs to be in assembly; inlining is probably the other 50% 350 # also loop alignment appears to be critical 351 gc nbody 86.27u 0.00s 86.29r # -21% 352 gc_B nbody 104.52u 0.00s 104.54r # +22% 353 354 fannkuch 12 355 # bounds checking is half the difference 356 # rest might be registerization 357 gc fannkuch 128.36u 0.00s 128.37r # +4% 358 gc_B fannkuch 89.32u 0.00s 89.34r 359 360 regex-dna 100000 361 # regexp code is slow on trivial regexp 362 gc regex-dna 24.82u 0.01s 24.86r # -4% 363 gc_B regex-dna 24.55u 0.01s 24.57r # -6% 364 365 spectral-norm 5500 366 # possibly inline evalA 367 gc spectral-norm 24.05u 0.00s 24.07r # -1% 368 gc_B spectral-norm 23.60u 0.00s 23.65r # -1% 369 370 k-nucleotide 1000000 371 # string maps are slower than glib string maps 372 gc k-nucleotide 17.84u 0.04s 17.89r # +13% but mysterious variation continues 373 gc_B k-nucleotide 15.56u 0.08s 15.65r # -13% (ditto) 374 375 mandelbrot 16000 376 gc mandelbrot 64.08u 0.01s 64.11r # +1% 377 gc_B mandelbrot 64.04u 0.00s 64.05r # +1% 378 379 pidigits 10000 380 # bignum is slower than gmp 381 gc pidigits 58.68u 0.02s 58.72r # +5% 382 gc_B pidigits 58.86u 0.05s 58.99r # +5% 383 384 # these tests are compared using real time, since they run multiple processors 385 # accuracy probably low 386 threadring 50000000 387 gc threadring 32.70u 0.02s 32.77r # +13% 388 389 chameneos 6000000 390 gc chameneosredux 26.62u 0.00s 26.63r # +13% 391 392 Sep 24, 2009 393 394 # Sqrt now in assembler for 6g. 395 nbody -n 50000000 396 # remember, at least for 6g, alignment of loops may be important 397 gcc -O2 nbody.c 21.24u 0.00s 21.25r 398 gccgo -O2 nbody.go 121.03u 0.00s 121.04r 399 gc nbody 30.26u 0.00s 30.27r # -65% *** 400 gc_B nbody 30.20u 0.02s 30.22r # -72% *** 401 402 Nov 13 2009 403 404 # fix bug in regexp; take performance hit. good regexps will come in time. 405 regex-dna 100000 406 gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.94r 407 gc regex-dna 29.78u 0.03s 29.83r 408 gc_B regex-dna 32.63u 0.03s 32.74r 409 410 Nov 24 2009 411 412 # Roger Peppe's rewrite of the benchmark 413 chameneos 6000000 414 gcc -O2 chameneosredux.c -lpthread 18.00u 303.29s 83.64r 415 gc chameneosredux 12.10u 0.00s 12.10r # 2.22X faster 416 417 Jan 6, 2010 418 419 # Long-overdue update. All numbers included in this complete run. 420 # Some programs (e.g. reverse-complement) rewritten for speed. 421 # Regular expressions much faster in common cases (although still far behind PCRE) 422 # Bignum stuff improved 423 # Better (but sometimes slower) locking in channels. 424 425 fasta -n 25000000 426 gcc -O2 fasta.c 5.99u 0.01s 6.00r 427 gc fasta 9.11u 0.00s 9.12r # -11% 428 gc_B fasta 8.60u 0.00s 8.62r # +12% ?? 429 430 reverse-complement < output-of-fasta-25000000 431 gcc -O2 reverse-complement.c 2.00u 0.80s 9.54r 432 # gccgo -O2 reverse-complement.go 4.57u 0.35s 4.94r # 33% faster 433 gc reverse-complement 2.01u 0.38s 2.40r # 3.3X faster 434 gc_B reverse-complement 1.88u 0.36s 2.24r # 3.2X faster 435 GOGC=off 436 gc reverse-complement 2.01u 0.35s 2.37r 437 gc_B reverse-complement 1.86u 0.32s 2.19r 438 439 nbody -n 50000000 440 gcc -O2 nbody.c 21.28u 0.00s 21.31r 441 gccgo -O2 nbody.go 80.02u 0.00s 80.05r # 33% faster 442 gc nbody 30.13u 0.00s 30.13r 443 gc_B nbody 29.89u 0.01s 29.91r 444 445 binary-tree 15 # too slow to use 20 446 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r 447 gccgo -O2 binary-tree.go 4.82u 0.41s 5.24r # 2.5X slower 448 gc binary-tree 7.23u 0.01s 7.25r # # -19% 449 gc binary-tree-freelist 0.43u 0.00s 0.44r # -9% 450 451 fannkuch 12 452 gcc -O2 fannkuch.c 60.17u 0.00s 60.17r 453 gccgo -O2 fannkuch.go 78.47u 0.01s 78.49r 454 gc fannkuch 128.86u 0.00s 128.96r 455 gc_B fannkuch 90.17u 0.00s 90.21r 456 457 regex-dna 100000 458 gcc -O2 regex-dna.c -lpcre 0.90u 0.00s 0.92r 459 gc regex-dna 9.48u 0.01s 9.50r # 3.1X faster 460 gc_B regex-dna 9.08u 0.00s 9.10r # 3.6X faster 461 462 spectral-norm 5500 463 gcc -O2 spectral-norm.c -lm 11.48u 0.00s 11.48r 464 gccgo -O2 spectral-norm.go 11.68u 0.00s 11.70r 465 gc spectral-norm 23.98u 0.00s 23.99r 466 gc_B spectral-norm 23.68u 0.00s 23.69r 467 468 k-nucleotide 1000000 469 gcc -O2 k-nucleotide.c 10.85u 0.04s 10.90r 470 gccgo -O2 k-nucleotide.go 25.26u 0.87s 26.14r 471 gc k-nucleotide 15.28u 0.06s 15.37r # restored; mysterious variation continues 472 gc_B k-nucleotide 15.97u 0.03s 16.00r 473 474 mandelbrot 16000 475 gcc -O2 mandelbrot.c 56.12u 0.01s 56.15r 476 gccgo -O2 mandelbrot.go 56.86u 0.01s 56.89r 477 gc mandelbrot 66.05u 0.00s 66.07r # -3% 478 gc_B mandelbrot 66.06u 0.00s 66.07r # -3% 479 480 meteor 2100 481 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r 482 gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r 483 gc meteor-contest 0.17u 0.00s 0.17r 484 gc_B meteor-contest 0.15u 0.00s 0.16r 485 486 pidigits 10000 487 gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.59r 488 gc pidigits 38.27u 0.02s 38.30r # 1.5X faster 489 gc_B pidigits 38.27u 0.02s 38.31r # 1.5X faster 490 491 threadring 50000000 492 gcc -O2 threadring.c 37.11u 170.59s 212.75r 493 gccgo -O2 threadring.go 89.67u 447.56s 442.55r # -6.5% 494 gc threadring 36.08u 0.04s 36.15r # +10% 495 496 chameneos 6000000 497 gcc -O2 chameneosredux.c -lpthread 19.02u 331.08s 90.79r 498 gc chameneosredux 12.54u 0.00s 12.55r 499 500 Oct 19, 2010 501 502 # Another long-overdue update. Some of the code is new; parallel versions 503 # of some are added. A few significant improvements. 504 505 fasta -n 25000000 506 gcc -O2 fasta.c 4.92u 0.00s 4.93r 507 gccgo -O2 fasta.go 3.31u 0.00s 3.34r # new code 508 gc fasta 3.68u 0.00s 3.69r # 2.5X faster with no code 509 gc_B fasta 3.68u 0.00s 3.69r # 2.3X faster with no code 510 511 reverse-complement < output-of-fasta-25000000 512 gcc -O2 reverse-complement.c 1.93u 0.81s 11.24r 513 gccgo -O2 reverse-complement.go 1.58u 0.43s 2.04r # first run with new code? 514 gc reverse-complement 1.84u 0.34s 2.20r # 10% faster 515 gc_B reverse-complement 1.85u 0.32s 2.18r 516 517 nbody -n 50000000 518 gcc -O2 nbody.c 21.35u 0.00s 21.36r 519 gccgo -O2 nbody.go 21.62u 0.00s 21.66r # 3.7X faster - why?? 520 gc nbody 29.78u 0.00s 29.79r 521 gc_B nbody 29.72u 0.00s 29.72r 522 523 binary-tree 15 # too slow to use 20 524 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.88r 525 gccgo -O2 binary-tree.go 4.05u 0.02s 4.08r # 28% faster 526 gccgo -O2 binary-tree-freelist 0.34u 0.08s 0.34r 527 gc binary-tree 5.94u 0.00s 5.95r # 20% faster 528 gc binary-tree-freelist 0.50u 0.01s 0.54r 529 530 fannkuch 12 531 gcc -O2 fannkuch.c 60.45u 0.00s 60.45r 532 gccgo -O2 fannkuch.go 64.64u 0.00s 64.64r 533 gccgo -O2 fannkuch-parallel.go 115.63u 0.00s 31.58r 534 gc fannkuch 126.52u 0.04s 126.68r 535 gc fannkuch-parallel 238.82u 0.10s 65.93r # GOMAXPROCS=4 536 gc_B fannkuch 88.99u 0.00s 89.02r 537 538 regex-dna 100000 539 gcc -O2 regex-dna.c -lpcre 0.89u 0.00s 0.89r 540 gc regex-dna 8.99u 0.02s 9.03r 541 gc regex-dna-parallel 8.94u 0.02s 3.68r # GOMAXPROCS=4 542 gc_B regex-dna 9.12u 0.00s 9.14r 543 544 spectral-norm 5500 545 gcc -O2 spectral-norm.c -lm 11.55u 0.00s 11.57r 546 gccgo -O2 spectral-norm.go 11.73u 0.00s 11.75r 547 gc spectral-norm 23.74u 0.00s 23.79r 548 gc_B spectral-norm 24.49u 0.02s 24.54r 549 550 k-nucleotide 1000000 551 gcc -O2 k-nucleotide.c 11.44u 0.06s 11.50r 552 gccgo -O2 k-nucleotide.go 8.65u 0.04s 8.71r 553 gccgo -O2 k-nucleotide-parallel.go 8.75u 0.03s 2.97r # set GOMAXPROCS=4 554 gc k-nucleotide 14.92u 0.05s 15.01r 555 gc k-nucleotide-parallel 16.96u 0.06s 6.53r # set GOMAXPROCS=4 556 gc_B k-nucleotide 15.97u 0.03s 16.08r 557 558 mandelbrot 16000 559 gcc -O2 mandelbrot.c 56.32u 0.00s 56.35r 560 gccgo -O2 mandelbrot.go 55.62u 0.02s 55.77r 561 gc mandelbrot 64.85u 0.01s 64.94r 562 gc_B mandelbrot 65.02u 0.01s 65.14r 563 564 meteor 2100 565 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r 566 gccgo -O2 meteor-contest.go 0.10u 0.00s 0.11r 567 gc meteor-contest 0.17u 0.00s 0.18r 568 gc_B meteor-contest 0.16u 0.00s 0.16r 569 570 pidigits 10000 571 gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.59r 572 gccgo -O2 pidigits.go 14.06u 0.01s 14.09r # first run? 573 gc pidigits 8.47u 0.05s 8.55r # 4.5X faster due to package big 574 gc_B pidigits 8.33u 0.01s 8.36r # 4.5X faster due to package big 575 576 threadring 50000000 577 gcc -O2 threadring.c 28.18u 153.19s 186.47r 578 gccgo -O2 threadring.go 110.10u 516.48s 515.25r 579 gc threadring 40.39u 0.00s 40.40r 580 581 chameneos 6000000 582 gcc -O2 chameneosredux.c -lpthread 18.20u 301.55s 83.10r 583 gccgo -O2 chameneosredux.go 52.22u 324.54s 201.21r 584 gc chameneosredux 13.52u 0.00s 13.54r 585 586 Dec 14, 2010 587 588 # Improved regex code (same algorithm) gets ~30%. 589 590 regex-dna 100000 591 gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78r 592 gc regex-dna 6.80u 0.00s 6.81r 593 gc regex-dna-parallel 6.82u 0.01s 2.75r 594 gc_B regex-dna 6.69u 0.02s 6.70r 595 596 Feb 15, 2011 597 598 # Improved GC, still single-threaded but more efficient 599 600 fasta -n 25000000 601 gcc -O2 fasta.c 3.40u 0.00s 3.40r 602 gccgo -O2 fasta.go 3.51u 0.00s 3.50r 603 gc fasta 3.66u 0.01s 3.66r 604 gc_B fasta 3.66u 0.00s 3.66r 605 606 reverse-complement < output-of-fasta-25000000 607 gcc -O2 reverse-complement.c 1.86u 1.29s 4.93r 608 gccgo -O2 reverse-complement.go 2.18u 0.41s 2.60r 609 gc reverse-complement 1.67u 0.48s 2.15r 610 gc_B reverse-complement 1.71u 0.45s 2.15r 611 612 nbody -n 50000000 613 gcc -O2 -lm nbody.c 21.64u 0.00s 21.64r 614 gccgo -O2 nbody.go 21.46u 0.00s 21.45r 615 gc nbody 29.07u 0.00s 29.06r 616 gc_B nbody 31.61u 0.00s 31.61r 617 618 binary-tree 15 # too slow to use 20 619 gcc -O2 binary-tree.c -lm 0.88u 0.00s 0.87r 620 gccgo -O2 binary-tree.go 2.74u 0.07s 2.81r 621 gccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00r 622 gc binary-tree 4.22u 0.02s 4.24r 623 gc binary-tree-freelist 0.54u 0.02s 0.55r 624 625 fannkuch 12 626 gcc -O2 fannkuch.c 57.64u 0.00s 57.64r 627 gccgo -O2 fannkuch.go 65.79u 0.00s 65.82r 628 gccgo -O2 fannkuch-parallel.go 160.91u 0.02s 43.90r 629 gc fannkuch 126.36u 0.03s 126.53r 630 gc fannkuch-parallel 175.23u 0.04s 45.49r 631 gc_B fannkuch 89.23u 0.00s 89.24r 632 633 regex-dna 100000 634 gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.80r 635 gccgo -O2 regex-dna.go 12.38u 0.10s 12.52r 636 gccgo -O2 regex-dna-parallel.go 43.96u 4.64s 15.11r 637 gc regex-dna 7.03u 0.01s 7.05r 638 gc regex-dna-parallel 6.85u 0.05s 2.70r 639 gc_B regex-dna 6.87u 0.02s 6.89r 640 641 spectral-norm 5500 642 gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28r 643 gccgo -O2 spectral-norm.go 11.79u 0.00s 11.79r 644 gc spectral-norm 24.00u 0.02s 24.05r 645 gc_B spectral-norm 24.59u 0.01s 24.59r 646 647 k-nucleotide 1000000 648 gcc -O2 k-nucleotide.c 9.75u 0.07s 9.82r 649 gccgo -O2 k-nucleotide.go 8.92u 0.06s 8.98r 650 gccgo -O2 k-nucleotide-parallel.go 8.40u 0.04s 2.76r 651 gc k-nucleotide 17.01u 0.03s 17.04r 652 gc k-nucleotide-parallel 16.51u 0.08s 6.21r 653 gc_B k-nucleotide 16.94u 0.08s 17.02r 654 655 mandelbrot 16000 656 gcc -O2 mandelbrot.c 54.60u 0.00s 54.66r 657 gccgo -O2 mandelbrot.go 59.38u 0.00s 59.41r 658 gc mandelbrot 64.93u 0.04s 65.08r 659 gc_B mandelbrot 64.85u 0.03s 64.92r 660 661 meteor 2098 662 gcc -O2 meteor-contest.c 0.10u 0.01s 0.10r 663 gccgo -O2 meteor-contest.go 0.11u 0.00s 0.11r 664 gc meteor-contest 0.18u 0.00s 0.17r 665 gc_B meteor-contest 0.17u 0.00s 0.16r 666 667 pidigits 10000 668 gcc -O2 pidigits.c -lgmp 2.24u 0.00s 2.23r 669 gccgo -O2 pidigits.go 14.05u 0.00s 14.06r 670 gc pidigits 6.34u 0.05s 6.38r 671 gc_B pidigits 6.37u 0.02s 6.38r 672 673 threadring 50000000 674 gcc -O2 threadring.c 30.50u 258.05s 325.72r 675 gccgo -O2 threadring.go 92.87u 748.39s 728.46r 676 gc threadring 38.03u 0.01s 38.04r 677 678 # Apr 15, 2011 679 # Move to new machine, Intel Xeon E5520 (a] 2.27GHz. 680 # (Was Opteron(tm) Processor 8214 HE) 681 682 fasta -n 25000000 683 OLD: 684 gcc -O2 fasta.c 3.39u 0.04s 3.42r 685 gccgo -O2 fasta.go 3.52u 0.00s 3.52r 686 gc fasta 3.63u 0.04s 3.67r 687 gc_B fasta 3.66u 0.00s 3.66r 688 NEW: 689 gcc -O2 fasta.c 1.45u 0.02s 1.47r 690 gccgo -O2 fasta.go 1.51u 0.01s 1.51r 691 gc fasta 2.04u 0.00s 2.04r 692 gc_B fasta 2.05u 0.00s 2.04r 693 694 reverse-complement < output-of-fasta-25000000 695 OLD: 696 gcc -O2 reverse-complement.c 1.87u 1.51s 7.02r 697 gccgo -O2 reverse-complement.go 1.56u 0.54s 3.37r 698 gc reverse-complement 1.73u 0.36s 2.08r 699 gc_B reverse-complement 1.75u 0.37s 2.12r 700 NEW: 701 gcc -O2 reverse-complement.c 1.20u 0.47s 12.96r 702 gccgo -O2 reverse-complement.go 0.88u 0.14s 1.01r 703 gc reverse-complement 1.13u 0.17s 1.30r 704 gc_B reverse-complement 1.11u 0.09s 1.20r 705 706 nbody -n 50000000 707 OLD: 708 gcc -O2 -lm nbody.c 21.90u 0.00s 21.92r 709 gccgo -O2 nbody.go 23.12u 0.03s 23.19r 710 gc nbody 29.07u 0.00s 29.07r 711 gc_B nbody 31.84u 0.00s 31.85r 712 NEW: 713 gcc -O2 -lm nbody.c 13.01u 0.00s 13.03r 714 gccgo -O2 nbody.go 13.35u 0.00s 13.37r 715 gc nbody 21.78u 0.00s 21.82r 716 gc_B nbody 21.72u 0.00s 21.76r 717 718 binary-tree 15 # too slow to use 20 719 OLD: 720 gcc -O2 binary-tree.c -lm 0.83u 0.02s 0.84r 721 gccgo -O2 binary-tree.go 2.61u 0.02s 2.62r 722 gccgo -O2 binary-tree-freelist.go 0.32u 0.01s 0.32r 723 gc binary-tree 3.93u 0.04s 3.97r 724 gc binary-tree-freelist 0.47u 0.03s 0.50r 725 NEW: 726 gcc -O2 binary-tree.c -lm 0.60u 0.00s 0.59r 727 gccgo -O2 binary-tree.go 1.53u 0.00s 1.52r 728 gccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00r 729 gc binary-tree 1.93u 0.02s 1.95r 730 gc binary-tree-freelist 0.32u 0.01s 0.32r 731 732 fannkuch 12 733 OLD: 734 gcc -O2 fannkuch.c 57.64u 0.00s 57.64r 735 gccgo -O2 fannkuch.go 65.56u 0.01s 65.65r 736 gccgo -O2 fannkuch-parallel.go 179.12u 0.00s 49.82r 737 gc fannkuch 126.39u 0.00s 126.39r 738 gc fannkuch-parallel 172.49u 0.02s 45.44r 739 gc_B fannkuch 89.30u 0.00s 89.28r 740 NEW: 741 gcc -O2 fannkuch.c 45.17u 0.00s 45.26r 742 gccgo -O2 fannkuch.go 53.63u 0.00s 53.73r 743 gccgo -O2 fannkuch-parallel.go 216.72u 0.00s 58.42r 744 gc fannkuch 108.21u 0.00s 108.44r 745 gc fannkuch-parallel 227.20u 0.00s 57.27r 746 gc_B fannkuch 56.14u 0.00s 56.26r 747 748 regex-dna 100000 749 OLD: 750 gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78r 751 gccgo -O2 regex-dna.go 10.15u 0.02s 10.23r 752 gccgo -O2 regex-dna-parallel.go 33.81u 3.22s 11.62r 753 gc regex-dna 6.52u 0.04s 6.56r 754 gc regex-dna-parallel 6.84u 0.03s 2.70r 755 gc_B regex-dna 6.83u 0.01s 6.84r 756 NEW: 757 gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.47r 758 gccgo -O2 regex-dna.go 6.00u 0.00s 6.00r 759 gccgo -O2 regex-dna-parallel.go 44.54u 1.57s 6.51r 760 gc regex-dna 5.41u 0.01s 5.42r 761 gc regex-dna-parallel 5.62u 0.01s 2.20r 762 gc_B regex-dna 5.50u 0.00s 5.50r 763 764 spectral-norm 5500 765 OLD: 766 gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28r 767 gccgo -O2 spectral-norm.go 11.56u 0.00s 11.55r 768 gc spectral-norm 23.98u 0.00s 24.00r 769 gc_B spectral-norm 24.62u 0.00s 24.65r 770 NEW: 771 gcc -O2 spectral-norm.c -lm 15.79u 0.00s 15.82r 772 gccgo -O2 spectral-norm.go 15.32u 0.00s 15.35r 773 gc spectral-norm 19.62u 0.01s 19.67r 774 gc_B spectral-norm 19.62u 0.00s 19.66r 775 776 k-nucleotide 1000000 777 OLD: 778 gcc -O2 k-nucleotide.c 9.82u 0.06s 9.87r 779 gccgo -O2 k-nucleotide.go 8.30u 0.02s 8.32r 780 gccgo -O2 k-nucleotide-parallel.go 8.84u 0.05s 3.02r 781 gc k-nucleotide 15.38u 0.07s 15.44r 782 gc k-nucleotide-parallel 16.40u 0.03s 5.93r 783 gc_B k-nucleotide 15.19u 0.05s 15.23r 784 NEW: 785 gcc -O2 -k-nucleotide.c 4.88u 0.03s 4.92r 786 gccgo -O2 k-nucleotide.go 5.94u 0.01s 5.96r 787 gccgo -O2 k-nucleotide-parallel.go 6.44u 0.03s 1.47r 788 gc k-nucleotide 9.61u 0.01s 9.63r 789 gc k-nucleotide-parallel 9.70u 0.00s 3.39r 790 gc_B k-nucleotide 9.19u 0.03s 9.23r 791 792 mandelbrot 16000 793 OLD: 794 gcc -O2 mandelbrot.c 54.54u 0.00s 54.56r 795 gccgo -O2 mandelbrot.go 59.63u 0.03s 59.67r 796 gc mandelbrot 64.82u 0.00s 64.83r 797 gc_B mandelbrot 64.84u 0.00s 64.91r 798 NEW: 799 gcc -O2 mandelbrot.c 36.07u 0.01s 36.15r 800 gccgo -O2 mandelbrot.go 43.57u 0.00s 43.66r 801 gc mandelbrot 60.66u 0.00s 60.79r 802 gc_B mandelbrot 60.90u 0.00s 61.03r 803 804 meteor 2098 805 OLD: 806 gcc -O2 meteor-contest.c 0.11u 0.00s 0.10r 807 gccgo -O2 meteor-contest.go 0.10u 0.01s 0.10r 808 gc meteor-contest 0.18u 0.00s 0.17r 809 gc_B meteor-contest 0.17u 0.00s 0.16r 810 NEW: 811 gcc -O2 meteor-contest.c 0.10u 0.00s 0.09r 812 gccgo -O2 meteor-contest.go 0.10u 0.00s 0.09r 813 gc meteor-contest 0.14u 0.00s 0.14r 814 gc_B meteor-contest 0.13u 0.00s 0.13r 815 816 pidigits 10000 817 OLD: 818 gcc -O2 pidigits.c -lgmp 2.22u 0.00s 2.21r 819 gccgo -O2 pidigits.go 13.39u 0.00s 13.40r 820 gc pidigits 6.42u 0.04s 6.45r 821 gc_B pidigits 6.45u 0.02s 6.47r 822 NEW: 823 gcc -O2 pidigits.c -lgmp 2.27u 0.00s 2.29r 824 gccgo -O2 pidigits.go 9.21u 0.00s 9.22r 825 gc pidigits 3.60u 0.00s 3.60r 826 gc_B pidigits 3.56u 0.02s 3.58r 827 828 threadring 50000000 829 OLD: 830 gcc -O2 threadring.c -lpthread 34.51u 267.95s 336.12r 831 gccgo -O2 threadring.go 103.51u 588.57s 627.16r 832 gc threadring 54.68u 0.00s 54.73r 833 NEW: 834 gcc -O2 threadring.c 32.00u 259.39s 369.74r 835 gccgo -O2 threadring.go 133.06u 546.02s 595.33r 836 gc threadring 16.75u 0.02s 16.80r 837 838 chameneos 6000000 839 OLD: 840 gcc -O2 chameneosredux.c -lpthread 12.65u 31.02s 13.33r 841 gccgo -O2 chameneosredux.go 47.04u 302.84s 252.29r 842 gc chameneosredux 14.14u 0.00s 14.14r 843 NEW: 844 gcc -O2 chameneosredux.c -lpthread 8.05u 63.43s 11.16r 845 gccgo -O2 chameneosredux.go 82.95u 304.37s 207.64r 846 gc chameneosredux 9.42u 0.00s 9.43r 847 848 # May 13, 2011 849 # after gc update to inline append when possible - 35% faster 850 851 regex-dna 100000 852 gc regex-dna 3.94u 0.00s 3.95r 853 gc regex-dna-parallel 4.15u 0.01s 1.63r 854 gc_B regex-dna 4.01u 0.01s 4.02r 855 856 # Aug 4, 2011 857 # After various updates to locking code and some runtime changes. 858 # Slowdowns believed due to slower (but more correct) memmove. 859 860 fannkuch 12 861 gccgo -O2 fannkuch.go 51.59u 0.00s 51.69r # -4% 862 gccgo -O2 fannkuch-parallel.go 253.17u 0.00s 64.67r # -11% 863 gc fannkuch 103.14u 0.00s 103.36r # -5% 864 gc fannkuch-parallel 189.63u 0.00s 49.37r # +9% 865 gc_B fannkuch 49.19u 0.00s 49.29r # -14% 866 867 regex-dna 100000 868 gc regex-dna 3.78u 0.00s 3.78r # -43% 869 gc regex-dna-parallel 3.84u 0.02s 1.48r # -49% 870 gc_B regex-dna 3.62u 0.00s 3.63r # -52% 871 872 k-nucleotide 1000000 873 gc k-nucleotide 12.23u 0.02s 12.27r # +27% 874 gc k-nucleotide-parallel 12.76u 0.02s 4.37r # +29% 875 gc_B k-nucleotide 12.18u 0.01s 12.21r # +33% 876 877 threadring 50000000 878 gc threadring 17.49u 0.00s 17.53r # +4% 879 880 chameneos 6000000 881 gc chameneosredux 7.61u 0.00s 7.63r # -24% 882 883 Aug 9, 2011 884 # After custom algorithms for 1- 2- 4- 8-byte scalars. 885 886 fannkuch 12 887 gc fannkuch-parallel 157.17u 0.00s 41.08r # -17% 888 889 k-nucleotide 1000000 890 gc k-nucleotide 8.72u 0.03s 8.76r # -39% 891 gc k-nucleotide-parallel 8.79u 0.01s 3.14r # -39% 892 gc_B k-nucleotide 8.65u 0.03s 8.69r # -39% 893 894 pidigits 10000 895 gc pidigits 3.71u 0.02s 3.73r # +4% 896 gc_B pidigits 3.73u 0.00s 3.73r # +4% 897 898 threadring 50000000 899 gc threadring 14.51u 0.00s 14.54r # -17% 900 901 chameneos 6000000 902 gc chameneosredux 7.41u 0.00s 7.42r # -3% 903 904 # A complete run at the Go 1 release. 905 # Significant changes: 906 # - gccgo is now enabled for all tests (goroutines are cheap enough) 907 # - threadring and chameneos are 14% faster, probably due to runtime changes 908 # - regex-dna 36% faster 909 # - fannkuch-parallel (only) slowed down 40% 910 # - gccgo on binary-tree-freelist is still optimized to nothing 911 # Other changes are modest. 912 913 fasta -n 25000000 914 gcc -O2 fasta.c 1.45u 0.02s 1.48r 915 gccgo -O2 fasta.go 1.46u 0.00s 1.47r 916 gc fasta 1.99u 0.01s 2.00r 917 gc_B fasta 1.99u 0.01s 2.01r 918 919 reverse-complement < output-of-fasta-25000000 920 gcc -O2 reverse-complement.c 0.95u 0.48s 4.99r 921 gccgo -O2 reverse-complement.go 0.93u 0.16s 1.09r 922 gc reverse-complement 1.20u 0.19s 1.39r 923 gc_B reverse-complement 1.04u 0.16s 1.20r 924 925 nbody -n 50000000 926 gcc -O2 -lm nbody.c 13.02u 0.00s 13.05r 927 gccgo -O2 nbody.go 14.46u 0.00s 14.49r 928 gc nbody 21.79u 0.00s 21.84r 929 gc_B nbody 21.74u 0.00s 21.79r 930 931 binary-tree 15 # too slow to use 20 932 gcc -O2 binary-tree.c -lm 0.60u 0.01s 0.61r 933 gccgo -O2 binary-tree.go 1.30u 0.01s 1.32r 934 gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.00r 935 gc binary-tree 1.84u 0.01s 1.86r 936 gc binary-tree-freelist 0.33u 0.00s 0.33r 937 938 fannkuch 12 939 gcc -O2 fannkuch.c 45.24u 0.00s 45.34r 940 gccgo -O2 fannkuch.go 59.76u 0.01s 59.90r 941 gccgo -O2 fannkuch-parallel.go 218.20u 0.01s 61.60r 942 gc fannkuch 103.92u 0.00s 104.16r 943 gc fannkuch-parallel 221.61u 0.00s 60.49r 944 gc_B fannkuch 53.17u 0.00s 53.30r 945 946 regex-dna 100000 947 gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.48r 948 gccgo -O2 regex-dna.go 6.52u 0.00s 6.54r 949 gccgo -O2 regex-dna-parallel.go 14.40u 0.73s 4.35r 950 gc regex-dna 2.63u 0.02s 2.66r # -36% 951 gc regex-dna-parallel 2.87u 0.01s 1.11r 952 gc_B regex-dna 2.65u 0.00s 2.66r 953 954 spectral-norm 5500 955 gcc -O2 spectral-norm.c -lm 15.78u 0.00s 15.82r 956 gccgo -O2 spectral-norm.go 15.79u 0.00s 15.83r 957 gc spectral-norm 19.76u 0.00s 19.80r 958 gc_B spectral-norm 19.73u 0.01s 19.78r 959 960 k-nucleotide 1000000 961 gcc -O2 k-nucleotide.c 5.59u 0.03s 5.63r 962 gccgo -O2 k-nucleotide.go 4.09u 0.03s 4.13r 963 gccgo -O2 k-nucleotide-parallel.go 4.50u 0.06s 1.63r 964 gc k-nucleotide 9.23u 0.02s 9.27r 965 gc k-nucleotide-parallel 9.87u 0.03s 3.55r 966 gc_B k-nucleotide 9.20u 0.00s 9.22r 967 968 mandelbrot 16000 969 gcc -O2 mandelbrot.c 36.09u 0.00s 36.18r 970 gccgo -O2 mandelbrot.go 41.69u 0.01s 41.80r 971 gc mandelbrot 60.91u 0.02s 61.07r 972 gc_B mandelbrot 60.90u 0.00s 61.04r 973 974 meteor 2098 975 gcc -O2 meteor-contest.c 0.09u 0.00s 0.09r 976 gccgo -O2 meteor-contest.go 0.09u 0.00s 0.09r 977 gc meteor-contest 0.14u 0.00s 0.15r 978 gc_B meteor-contest 0.14u 0.00s 0.14r 979 980 pidigits 10000 981 gcc -O2 pidigits.c -lgmp 2.27u 0.00s 2.27r 982 gccgo -O2 pidigits.go 8.65u 0.00s 8.67r 983 gc pidigits 3.70u 0.04s 3.75r 984 gc_B pidigits 3.72u 0.02s 3.75r 985 986 threadring 50000000 987 gcc -O2 threadring.c 40.91u 369.85s 323.31r 988 gccgo -O2 threadring.go 26.97u 30.82s 57.93r 989 gc threadring 12.81u 0.01s 12.85r # -13% 990 991 chameneos 6000000 992 gcc -O2 chameneosredux.c -lpthread 9.44u 72.90s 12.65r 993 gccgo -O2 chameneosredux.go 7.73u 7.53s 15.30r 994 gc chameneosredux 6.51u 0.00s 6.53r # - 14% 995 996 # After http://codereview.appspot.com/6248049, moving panicindex 997 # calls out of line (putting the likely code into a single path and shortening 998 # loops). Significant changes since the last run (note: some are slower for 999 # unrelated and as yet undiagnosed reasons): 1000 1001 nbody -n 50000000 1002 gc nbody 19.10u 0.01s 19.19r # -12% 1003 gc_B nbody 19.19u 0.00s 19.23r # -12% 1004 1005 binary-tree 15 # too slow to use 20 1006 gc binary-tree 1.49u 0.01s 1.51r # -19% 1007 1008 fannkuch 12 1009 gc fannkuch 60.79u 0.00s 60.92r # -41% 1010 gc fannkuch-parallel 183.51u 0.01s 51.75r # -14% 1011 gc_B fannkuch 51.68u 0.00s 51.79r # -3% 1012 1013 k-nucleotide 1000000 1014 gc k-nucleotide 9.74u 0.04s 9.80r # +6% 1015 gc k-nucleotide-parallel 9.89u 0.05s 3.59r # +1% 1016 gc_B k-nucleotide 9.39u 0.02s 9.43r # +2% 1017 1018 mandelbrot (much slower, due to unrelated http://codereview.appspot.com/6209077) 1019 gc mandelbrot 100.98u 0.00s 101.20r # +65% 1020 gc_B mandelbrot 100.90u 0.01s 101.17r # +65% 1021 1022 meteor 2098 1023 gc meteor-contest 0.13u 0.00s 0.13r # -13% 1024 gc_B meteor-contest 0.13u 0.00s 0.13r # -7% 1025 1026 # May 30, 2012. 1027 # After http://codereview.appspot.com/6261051, restoring old code generated 1028 # for floating-point constants. Mandelbrot is back to its previous numbers. 1029 1030 mandelbrot 16000 1031 gcc -O2 mandelbrot.c 36.07u 0.00s 36.16r 1032 gccgo -O2 mandelbrot.go 41.72u 0.01s 41.90r 1033 gc mandelbrot 60.62u 0.00s 60.76r 1034 gc_B mandelbrot 60.68u 0.00s 60.82r 1035 1036 # May 30, 2012. 1037 # After http://codereview.appspot.com/6248068, better FP code 1038 # by avoiding MOVSD between registers. 1039 # Plus some other timing changes that have crept in from other speedups, 1040 # from garbage collection to Printf. 1041 1042 fasta -n 25000000 1043 gc fasta 1.76u 0.00s 1.76r # -12% 1044 gc_B fasta 1.71u 0.00s 1.72r # -12% 1045 1046 nbody -n 50000000 1047 gc nbody 17.56u 0.00s 17.60r # -8% 1048 gc_B nbody 17.30u 0.00s 17.34r # -10% 1049 1050 fannkuch 12 1051 gc fannkuch-parallel 155.92u 0.01s 44.05r # -15% 1052 1053 k-nucleotide 1000000 1054 gc k-nucleotide 9.22u 0.01s 9.26r # -5% 1055 gc k-nucleotide-parallel 9.23u 0.03s 3.26r # -9% 1056 gc_B k-nucleotide 9.22u 0.03s 9.28r # -2% 1057 1058 mandelbrot 16000 1059 gc mandelbrot 44.80u 0.00s 44.90r # -27% 1060 gc_B mandelbrot 44.81u 0.00s 44.92r # -26% 1061 1062 pidigits 10000 1063 gc pidigits 3.51u 0.00s 3.52r # -6% 1064 gc_B pidigits 3.51u 0.00s 3.52r # -6% 1065 1066 # Aug 28, 2012 1067 # After some assembler work in package big. 1068 pidigits 10000 1069 gc pidigits 2.85u 0.02s 2.88r # -22% 1070 gc_B pidigits 2.88u 0.01s 2.90r # -21% 1071 1072 # Sep 26, 2012 1073 # 64-bit ints, plus significantly better floating-point code. 1074 # Interesting details: 1075 # Generally something in the 0-10% slower range, some (binary tree) more 1076 # Floating-point noticeably faster: 1077 # nbody -25% 1078 # mandelbrot -37% relative to Go 1. 1079 # Other: 1080 # regex-dna +47% 1081 fasta -n 25000000 1082 gcc -O2 fasta.c 1.43u 0.03s 1.46r 1083 gccgo -O2 fasta.go 1.47u 0.00s 1.47r 1084 gc fasta 1.78u 0.01s 1.80r 1085 gc_B fasta 1.76u 0.00s 1.76r 1086 1087 reverse-complement < output-of-fasta-25000000 1088 gcc -O2 reverse-complement.c 1.14u 0.39s 11.19r 1089 gccgo -O2 reverse-complement.go 0.91u 0.17s 1.09r 1090 gc reverse-complement 1.12u 0.18s 1.31r 1091 gc_B reverse-complement 1.12u 0.15s 1.28r 1092 1093 nbody -n 50000000 1094 gcc -O2 nbody.c -lm 13.02u 0.00s 13.05r 1095 gccgo -O2 nbody.go 13.90u 0.00s 13.93r 1096 gc nbody 17.05u 0.00s 17.09r 1097 gc_B nbody 16.30u 0.00s 16.34r 1098 1099 binary-tree 15 # too slow to use 20 1100 gcc -O2 binary-tree.c -lm 0.61u 0.00s 0.61r 1101 gccgo -O2 binary-tree.go 1.24u 0.04s 1.29r 1102 gccgo -O2 binary-tree-freelist.go 0.21u 0.01s 0.22r 1103 gc binary-tree 1.93u 0.02s 1.96r 1104 gc binary-tree-freelist 0.32u 0.00s 0.33r 1105 1106 fannkuch 12 1107 gcc -O2 fannkuch.c 45.19u 0.00s 45.29r 1108 gccgo -O2 fannkuch.go 60.32u 0.00s 60.45r 1109 gccgo -O2 fannkuch-parallel.go 185.59u 0.00s 59.49r 1110 gc fannkuch 72.14u 0.00s 72.30r 1111 gc fannkuch-parallel 172.54u 0.00s 43.59r 1112 gc_B fannkuch 53.55u 0.00s 53.67r 1113 1114 regex-dna 100000 1115 gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.47r 1116 gccgo -O2 regex-dna.go 6.49u 0.05s 6.56r 1117 gccgo -O2 regex-dna-parallel.go 14.60u 0.67s 4.42r 1118 gc regex-dna 3.91u 0.00s 3.92r 1119 gc regex-dna-parallel 4.01u 0.03s 1.56r 1120 gc_B regex-dna 3.91u 0.00s 3.92r 1121 1122 spectral-norm 5500 1123 gcc -O2 spectral-norm.c -lm 15.85u 0.00s 15.89r 1124 gccgo -O2 spectral-norm.go 15.86u 0.00s 15.89r 1125 gc spectral-norm 19.72u 0.00s 19.76r 1126 gc_B spectral-norm 19.68u 0.01s 19.74r 1127 1128 k-nucleotide 1000000 1129 gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include -lglib-2.0 4.90u 0.01s 4.93r 1130 gccgo -O2 k-nucleotide.go 4.78u 0.01s 4.80r 1131 gccgo -O2 k-nucleotide-parallel.go 6.49u 0.02s 2.18r 1132 gc k-nucleotide 9.05u 0.02s 9.09r 1133 gc k-nucleotide-parallel 9.27u 0.01s 3.29r 1134 gc_B k-nucleotide 8.95u 0.03s 9.00r 1135 1136 mandelbrot 16000 1137 gcc -O2 mandelbrot.c 36.11u 0.00s 36.19r 1138 gccgo -O2 mandelbrot.go 43.67u 0.00s 43.77r 1139 gc mandelbrot 38.57u 0.00s 38.66r 1140 gc_B mandelbrot 38.59u 0.00s 38.68r 1141 1142 meteor 2098 1143 gcc -O2 meteor-contest.c 0.09u 0.00s 0.09r 1144 gccgo -O2 meteor-contest.go 0.09u 0.00s 0.09r 1145 gc meteor-contest 0.13u 0.00s 0.14r 1146 gc_B meteor-contest 0.12u 0.00s 0.13r 1147 1148 pidigits 10000 1149 gcc -O2 pidigits.c -lgmp 2.26u 0.00s 2.27r 1150 gccgo -O2 pidigits.go 9.05u 0.00s 9.07r 1151 gc pidigits 2.88u 0.02s 2.90r 1152 gc_B pidigits 2.89u 0.00s 2.90r 1153 1154 threadring 50000000 1155 gcc -O2 threadring.c -lpthread 37.30u 327.81s 289.28r 1156 gccgo -O2 threadring.go 42.83u 26.15s 69.14r 1157 gc threadring 13.00u 0.00s 13.03r 1158 1159 chameneos 6000000 1160 gcc -O2 chameneosredux.c -lpthread 8.80u 71.67s 12.19r 1161 gccgo -O2 chameneosredux.go 11.28u 6.68s 18.00r 1162 gc chameneosredux 6.94u 0.00s 6.96r 1163 1164 # May 23, 2013 1165 # Go 1.1, which includes precise GC, new scheduler, faster maps. 1166 # 20%-ish speedups across many benchmarks. 1167 # gccgo showing significant improvement (even though it's not yet up to Go 1.1) 1168 # 1169 # Standouts: 1170 # fannkuch, regex-dna, k-nucleotide, threadring, chameneos 1171 1172 fasta -n 25000000 1173 gcc -m64 -O2 fasta.c 1.54u 0.01s 1.55r 1174 gccgo -O2 fasta.go 1.42u 0.00s 1.43r 1175 gc fasta 1.50u 0.01s 1.52r # -16% 1176 gc_B fasta 1.46u 0.00s 1.46r # -17% 1177 1178 reverse-complement < output-of-fasta-25000000 1179 gcc -m64 -O2 reverse-complement.c 0.87u 0.37s 4.36r 1180 gccgo -O2 reverse-complement.go 0.77u 0.15s 0.93r # -15% 1181 gc reverse-complement 0.99u 0.12s 1.12r # -15% 1182 gc_B reverse-complement 0.85u 0.17s 1.02r # -21% 1183 1184 nbody -n 50000000 1185 gcc -m64 -O2 nbody.c -lm 13.50u 0.00s 13.53r 1186 gccgo -O2 nbody.go 13.98u 0.01s 14.02r 1187 gc nbody 16.63u 0.01s 16.67r 1188 gc_B nbody 15.74u 0.00s 15.76r 1189 1190 binary-tree 15 # too slow to use 20 1191 gcc -m64 -O2 binary-tree.c -lm 0.61u 0.00s 0.61r 1192 gccgo -O2 binary-tree.go 1.11u 0.01s 1.12r # -13% 1193 gccgo -O2 binary-tree-freelist.go 0.22u 0.01s 0.23r 1194 gc binary-tree 1.83u 0.02s 1.83r # -7% 1195 gc binary-tree-freelist 0.32u 0.00s 0.32r 1196 1197 fannkuch 12 1198 gcc -m64 -O2 fannkuch.c 45.56u 0.00s 45.67r 1199 gccgo -O2 fannkuch.go 57.71u 0.00s 57.85r # -4% 1200 gccgo -O2 fannkuch-parallel.go 146.31u 0.00s 37.50r #-37% 1201 gc fannkuch 70.06u 0.03s 70.17r # -3% 1202 gc fannkuch-parallel 131.88u 0.06s 33.59r # -23% 1203 gc_B fannkuch 45.55u 0.02s 45.63r # -15% 1204 1205 regex-dna 100000 1206 gcc -m64 -O2 regex-dna.c -lpcre 0.44u 0.01s 0.45r 1207 gccgo -O2 regex-dna.go 5.59u 0.00s 5.61r # -14% 1208 gccgo -O2 regex-dna-parallel.go 10.85u 0.30s 3.34r # -24% 1209 gc regex-dna 2.23u 0.01s 2.25r # -43% 1210 gc regex-dna-parallel 2.35u 0.00s 0.93r # -40% 1211 gc_B regex-dna 2.24u 0.01s 2.25r # -43% 1212 1213 spectral-norm 5500 1214 gcc -m64 -O2 spectral-norm.c -lm 14.84u 0.00s 14.88r 1215 gccgo -O2 spectral-norm.go 15.33u 0.00s 15.37r 1216 gc spectral-norm 16.75u 0.02s 16.79r # -15% 1217 gc_B spectral-norm 16.77u 0.01s 16.79r # -15% 1218 1219 k-nucleotide 1000000 1220 gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 4.50u 0.00s 4.52r 1221 gccgo -O2 k-nucleotide.go 3.72u 0.04s 3.77r # -21% 1222 gccgo -O2 k-nucleotide-parallel.go 3.88u 0.03s 1.42r # -35% 1223 gc k-nucleotide 6.32u 0.01s 6.33r # -31% 1224 gc k-nucleotide-parallel 6.47u 0.05s 2.13r # -33% 1225 gc_B k-nucleotide 6.45u 0.01s 6.47r # - 28% 1226 1227 mandelbrot 16000 1228 gcc -m64 -O2 mandelbrot.c 36.03u 0.00s 36.11r 1229 gccgo -O2 mandelbrot.go 37.61u 0.00s 37.74r # -14% 1230 gc mandelbrot 38.19u 0.05s 38.29r 1231 gc_B mandelbrot 38.19u 0.03s 38.26r 1232 1233 meteor 2098 1234 gcc -m64 -O2 meteor-contest.c 0.08u 0.00s 0.08r 1235 gccgo -O2 meteor-contest.go 0.09u 0.01s 0.10r 1236 gc meteor-contest 0.12u 0.00s 0.12r # -15% although perhaps just noise 1237 gc_B meteor-contest 0.11u 0.00s 0.12r # -8% although perhaps just noise 1238 1239 pidigits 10000 1240 gcc -m64 -O2 pidigits.c -lgmp 2.27u 0.00s 2.28r 1241 gccgo -O2 pidigits.go 8.95u 0.02s 8.99r 1242 gc pidigits 2.88u 0.14s 2.91r 1243 gc_B pidigits 2.92u 0.10s 2.91r 1244 1245 threadring 50000000 1246 gcc -m64 -O2 threadring.c -lpthread 14.75u 167.88s 212.23r 1247 gccgo -O2 threadring.go 36.72u 12.08s 48.91r # -29% 1248 gc threadring 10.93u 0.01s 10.95r # -16% 1249 1250 chameneos 6000000 1251 gcc -m64 -O2 chameneosredux.c -lpthread 8.89u 56.62s 9.75r 1252 gccgo -O2 chameneosredux.go 9.48u 2.48s 11.99r # -33% 1253 gc chameneosredux 5.80u 0.00s 5.81r # -16% 1254 1255