Home | History | Annotate | Download | only in testing
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package testing
      6 
      7 import (
      8 	"flag"
      9 	"fmt"
     10 	"internal/race"
     11 	"os"
     12 	"runtime"
     13 	"sync"
     14 	"sync/atomic"
     15 	"time"
     16 )
     17 
     18 var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
     19 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`")
     20 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
     21 
     22 // Global lock to ensure only one benchmark runs at a time.
     23 var benchmarkLock sync.Mutex
     24 
     25 // Used for every benchmark for measuring memory.
     26 var memStats runtime.MemStats
     27 
     28 // An internal type but exported because it is cross-package; part of the implementation
     29 // of the "go test" command.
     30 type InternalBenchmark struct {
     31 	Name string
     32 	F    func(b *B)
     33 }
     34 
     35 // B is a type passed to Benchmark functions to manage benchmark
     36 // timing and to specify the number of iterations to run.
     37 //
     38 // A benchmark ends when its Benchmark function returns or calls any of the methods
     39 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
     40 // only from the goroutine running the Benchmark function.
     41 // The other reporting methods, such as the variations of Log and Error,
     42 // may be called simultaneously from multiple goroutines.
     43 //
     44 // Like in tests, benchmark logs are accumulated during execution
     45 // and dumped to standard error when done. Unlike in tests, benchmark logs
     46 // are always printed, so as not to hide output whose existence may be
     47 // affecting benchmark results.
     48 type B struct {
     49 	common
     50 	context          *benchContext
     51 	N                int
     52 	previousN        int           // number of iterations in the previous run
     53 	previousDuration time.Duration // total duration of the previous run
     54 	benchFunc        func(b *B)
     55 	benchTime        time.Duration
     56 	bytes            int64
     57 	missingBytes     bool // one of the subbenchmarks does not have bytes set.
     58 	timerOn          bool
     59 	showAllocResult  bool
     60 	result           BenchmarkResult
     61 	parallelism      int // RunParallel creates parallelism*GOMAXPROCS goroutines
     62 	// The initial states of memStats.Mallocs and memStats.TotalAlloc.
     63 	startAllocs uint64
     64 	startBytes  uint64
     65 	// The net total of this test after being run.
     66 	netAllocs uint64
     67 	netBytes  uint64
     68 }
     69 
     70 // StartTimer starts timing a test. This function is called automatically
     71 // before a benchmark starts, but it can also used to resume timing after
     72 // a call to StopTimer.
     73 func (b *B) StartTimer() {
     74 	if !b.timerOn {
     75 		runtime.ReadMemStats(&memStats)
     76 		b.startAllocs = memStats.Mallocs
     77 		b.startBytes = memStats.TotalAlloc
     78 		b.start = time.Now()
     79 		b.timerOn = true
     80 	}
     81 }
     82 
     83 // StopTimer stops timing a test. This can be used to pause the timer
     84 // while performing complex initialization that you don't
     85 // want to measure.
     86 func (b *B) StopTimer() {
     87 	if b.timerOn {
     88 		b.duration += time.Now().Sub(b.start)
     89 		runtime.ReadMemStats(&memStats)
     90 		b.netAllocs += memStats.Mallocs - b.startAllocs
     91 		b.netBytes += memStats.TotalAlloc - b.startBytes
     92 		b.timerOn = false
     93 	}
     94 }
     95 
     96 // ResetTimer zeros the elapsed benchmark time and memory allocation counters.
     97 // It does not affect whether the timer is running.
     98 func (b *B) ResetTimer() {
     99 	if b.timerOn {
    100 		runtime.ReadMemStats(&memStats)
    101 		b.startAllocs = memStats.Mallocs
    102 		b.startBytes = memStats.TotalAlloc
    103 		b.start = time.Now()
    104 	}
    105 	b.duration = 0
    106 	b.netAllocs = 0
    107 	b.netBytes = 0
    108 }
    109 
    110 // SetBytes records the number of bytes processed in a single operation.
    111 // If this is called, the benchmark will report ns/op and MB/s.
    112 func (b *B) SetBytes(n int64) { b.bytes = n }
    113 
    114 // ReportAllocs enables malloc statistics for this benchmark.
    115 // It is equivalent to setting -test.benchmem, but it only affects the
    116 // benchmark function that calls ReportAllocs.
    117 func (b *B) ReportAllocs() {
    118 	b.showAllocResult = true
    119 }
    120 
    121 func (b *B) nsPerOp() int64 {
    122 	if b.N <= 0 {
    123 		return 0
    124 	}
    125 	return b.duration.Nanoseconds() / int64(b.N)
    126 }
    127 
    128 // runN runs a single benchmark for the specified number of iterations.
    129 func (b *B) runN(n int) {
    130 	benchmarkLock.Lock()
    131 	defer benchmarkLock.Unlock()
    132 	// Try to get a comparable environment for each run
    133 	// by clearing garbage from previous runs.
    134 	runtime.GC()
    135 	b.raceErrors = -race.Errors()
    136 	b.N = n
    137 	b.parallelism = 1
    138 	b.ResetTimer()
    139 	b.StartTimer()
    140 	b.benchFunc(b)
    141 	b.StopTimer()
    142 	b.previousN = n
    143 	b.previousDuration = b.duration
    144 	b.raceErrors += race.Errors()
    145 	if b.raceErrors > 0 {
    146 		b.Errorf("race detected during execution of benchmark")
    147 	}
    148 }
    149 
    150 func min(x, y int) int {
    151 	if x > y {
    152 		return y
    153 	}
    154 	return x
    155 }
    156 
    157 func max(x, y int) int {
    158 	if x < y {
    159 		return y
    160 	}
    161 	return x
    162 }
    163 
    164 // roundDown10 rounds a number down to the nearest power of 10.
    165 func roundDown10(n int) int {
    166 	var tens = 0
    167 	// tens = floor(log_10(n))
    168 	for n >= 10 {
    169 		n = n / 10
    170 		tens++
    171 	}
    172 	// result = 10^tens
    173 	result := 1
    174 	for i := 0; i < tens; i++ {
    175 		result *= 10
    176 	}
    177 	return result
    178 }
    179 
    180 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX].
    181 func roundUp(n int) int {
    182 	base := roundDown10(n)
    183 	switch {
    184 	case n <= base:
    185 		return base
    186 	case n <= (2 * base):
    187 		return 2 * base
    188 	case n <= (3 * base):
    189 		return 3 * base
    190 	case n <= (5 * base):
    191 		return 5 * base
    192 	default:
    193 		return 10 * base
    194 	}
    195 }
    196 
    197 // run1 runs the first iteration of benchFunc. It returns whether more
    198 // iterations of this benchmarks should be run.
    199 func (b *B) run1() bool {
    200 	if ctx := b.context; ctx != nil {
    201 		// Extend maxLen, if needed.
    202 		if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
    203 			ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
    204 		}
    205 	}
    206 	go func() {
    207 		// Signal that we're done whether we return normally
    208 		// or by FailNow's runtime.Goexit.
    209 		defer func() {
    210 			b.signal <- true
    211 		}()
    212 
    213 		b.runN(1)
    214 	}()
    215 	<-b.signal
    216 	if b.failed {
    217 		fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output)
    218 		return false
    219 	}
    220 	// Only print the output if we know we are not going to proceed.
    221 	// Otherwise it is printed in processBench.
    222 	if atomic.LoadInt32(&b.hasSub) != 0 || b.finished {
    223 		tag := "BENCH"
    224 		if b.skipped {
    225 			tag = "SKIP"
    226 		}
    227 		if b.chatty && (len(b.output) > 0 || b.finished) {
    228 			b.trimOutput()
    229 			fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
    230 		}
    231 		return false
    232 	}
    233 	return true
    234 }
    235 
    236 // run executes the benchmark in a separate goroutine, including all of its
    237 // subbenchmarks. b must not have subbenchmarks.
    238 func (b *B) run() BenchmarkResult {
    239 	if b.context != nil {
    240 		// Running go test --test.bench
    241 		b.context.processBench(b) // Must call doBench.
    242 	} else {
    243 		// Running func Benchmark.
    244 		b.doBench()
    245 	}
    246 	return b.result
    247 }
    248 
    249 func (b *B) doBench() BenchmarkResult {
    250 	go b.launch()
    251 	<-b.signal
    252 	return b.result
    253 }
    254 
    255 // launch launches the benchmark function. It gradually increases the number
    256 // of benchmark iterations until the benchmark runs for the requested benchtime.
    257 // launch is run by the doBench function as a separate goroutine.
    258 // run1 must have been called on b.
    259 func (b *B) launch() {
    260 	// Signal that we're done whether we return normally
    261 	// or by FailNow's runtime.Goexit.
    262 	defer func() {
    263 		b.signal <- true
    264 	}()
    265 
    266 	// Run the benchmark for at least the specified amount of time.
    267 	d := b.benchTime
    268 	for n := 1; !b.failed && b.duration < d && n < 1e9; {
    269 		last := n
    270 		// Predict required iterations.
    271 		n = int(d.Nanoseconds())
    272 		if nsop := b.nsPerOp(); nsop != 0 {
    273 			n /= int(nsop)
    274 		}
    275 		// Run more iterations than we think we'll need (1.2x).
    276 		// Don't grow too fast in case we had timing errors previously.
    277 		// Be sure to run at least one more than last time.
    278 		n = max(min(n+n/5, 100*last), last+1)
    279 		// Round up to something easy to read.
    280 		n = roundUp(n)
    281 		b.runN(n)
    282 	}
    283 	b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes}
    284 }
    285 
    286 // The results of a benchmark run.
    287 type BenchmarkResult struct {
    288 	N         int           // The number of iterations.
    289 	T         time.Duration // The total time taken.
    290 	Bytes     int64         // Bytes processed in one iteration.
    291 	MemAllocs uint64        // The total number of memory allocations.
    292 	MemBytes  uint64        // The total number of bytes allocated.
    293 }
    294 
    295 func (r BenchmarkResult) NsPerOp() int64 {
    296 	if r.N <= 0 {
    297 		return 0
    298 	}
    299 	return r.T.Nanoseconds() / int64(r.N)
    300 }
    301 
    302 func (r BenchmarkResult) mbPerSec() float64 {
    303 	if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
    304 		return 0
    305 	}
    306 	return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
    307 }
    308 
    309 func (r BenchmarkResult) AllocsPerOp() int64 {
    310 	if r.N <= 0 {
    311 		return 0
    312 	}
    313 	return int64(r.MemAllocs) / int64(r.N)
    314 }
    315 
    316 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
    317 	if r.N <= 0 {
    318 		return 0
    319 	}
    320 	return int64(r.MemBytes) / int64(r.N)
    321 }
    322 
    323 func (r BenchmarkResult) String() string {
    324 	mbs := r.mbPerSec()
    325 	mb := ""
    326 	if mbs != 0 {
    327 		mb = fmt.Sprintf("\t%7.2f MB/s", mbs)
    328 	}
    329 	nsop := r.NsPerOp()
    330 	ns := fmt.Sprintf("%10d ns/op", nsop)
    331 	if r.N > 0 && nsop < 100 {
    332 		// The format specifiers here make sure that
    333 		// the ones digits line up for all three possible formats.
    334 		if nsop < 10 {
    335 			ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
    336 		} else {
    337 			ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
    338 		}
    339 	}
    340 	return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb)
    341 }
    342 
    343 func (r BenchmarkResult) MemString() string {
    344 	return fmt.Sprintf("%8d B/op\t%8d allocs/op",
    345 		r.AllocedBytesPerOp(), r.AllocsPerOp())
    346 }
    347 
    348 // benchmarkName returns full name of benchmark including procs suffix.
    349 func benchmarkName(name string, n int) string {
    350 	if n != 1 {
    351 		return fmt.Sprintf("%s-%d", name, n)
    352 	}
    353 	return name
    354 }
    355 
    356 type benchContext struct {
    357 	match *matcher
    358 
    359 	maxLen int // The largest recorded benchmark name.
    360 	extLen int // Maximum extension length.
    361 }
    362 
    363 // An internal function but exported because it is cross-package; part of the implementation
    364 // of the "go test" command.
    365 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
    366 	runBenchmarks(matchString, benchmarks)
    367 }
    368 
    369 func runBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
    370 	// If no flag was specified, don't run benchmarks.
    371 	if len(*matchBenchmarks) == 0 {
    372 		return true
    373 	}
    374 	// Collect matching benchmarks and determine longest name.
    375 	maxprocs := 1
    376 	for _, procs := range cpuList {
    377 		if procs > maxprocs {
    378 			maxprocs = procs
    379 		}
    380 	}
    381 	ctx := &benchContext{
    382 		match:  newMatcher(matchString, *matchBenchmarks, "-test.bench"),
    383 		extLen: len(benchmarkName("", maxprocs)),
    384 	}
    385 	var bs []InternalBenchmark
    386 	for _, Benchmark := range benchmarks {
    387 		if _, matched := ctx.match.fullName(nil, Benchmark.Name); matched {
    388 			bs = append(bs, Benchmark)
    389 			benchName := benchmarkName(Benchmark.Name, maxprocs)
    390 			if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
    391 				ctx.maxLen = l
    392 			}
    393 		}
    394 	}
    395 	main := &B{
    396 		common: common{
    397 			name:   "Main",
    398 			w:      os.Stdout,
    399 			chatty: *chatty,
    400 		},
    401 		benchFunc: func(b *B) {
    402 			for _, Benchmark := range bs {
    403 				b.Run(Benchmark.Name, Benchmark.F)
    404 			}
    405 		},
    406 		benchTime: *benchTime,
    407 		context:   ctx,
    408 	}
    409 	main.runN(1)
    410 	return !main.failed
    411 }
    412 
    413 // processBench runs bench b for the configured CPU counts and prints the results.
    414 func (ctx *benchContext) processBench(b *B) {
    415 	for i, procs := range cpuList {
    416 		runtime.GOMAXPROCS(procs)
    417 		benchName := benchmarkName(b.name, procs)
    418 		fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
    419 		// Recompute the running time for all but the first iteration.
    420 		if i > 0 {
    421 			b = &B{
    422 				common: common{
    423 					signal: make(chan bool),
    424 					name:   b.name,
    425 					w:      b.w,
    426 					chatty: b.chatty,
    427 				},
    428 				benchFunc: b.benchFunc,
    429 				benchTime: b.benchTime,
    430 			}
    431 			b.run1()
    432 		}
    433 		r := b.doBench()
    434 		if b.failed {
    435 			// The output could be very long here, but probably isn't.
    436 			// We print it all, regardless, because we don't want to trim the reason
    437 			// the benchmark failed.
    438 			fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output)
    439 			continue
    440 		}
    441 		results := r.String()
    442 		if *benchmarkMemory || b.showAllocResult {
    443 			results += "\t" + r.MemString()
    444 		}
    445 		fmt.Fprintln(b.w, results)
    446 		// Unlike with tests, we ignore the -chatty flag and always print output for
    447 		// benchmarks since the output generation time will skew the results.
    448 		if len(b.output) > 0 {
    449 			b.trimOutput()
    450 			fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output)
    451 		}
    452 		if p := runtime.GOMAXPROCS(-1); p != procs {
    453 			fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
    454 		}
    455 	}
    456 }
    457 
    458 // Run benchmarks f as a subbenchmark with the given name. It reports
    459 // whether there were any failures.
    460 //
    461 // A subbenchmark is like any other benchmark. A benchmark that calls Run at
    462 // least once will not be measured itself and will be called once with N=1.
    463 //
    464 // Run may be called simultaneously from multiple goroutines, but all such
    465 // calls must happen before the outer benchmark function for b returns.
    466 func (b *B) Run(name string, f func(b *B)) bool {
    467 	// Since b has subbenchmarks, we will no longer run it as a benchmark itself.
    468 	// Release the lock and acquire it on exit to ensure locks stay paired.
    469 	atomic.StoreInt32(&b.hasSub, 1)
    470 	benchmarkLock.Unlock()
    471 	defer benchmarkLock.Lock()
    472 
    473 	benchName, ok := b.name, true
    474 	if b.context != nil {
    475 		benchName, ok = b.context.match.fullName(&b.common, name)
    476 	}
    477 	if !ok {
    478 		return true
    479 	}
    480 	sub := &B{
    481 		common: common{
    482 			signal: make(chan bool),
    483 			name:   benchName,
    484 			parent: &b.common,
    485 			level:  b.level + 1,
    486 			w:      b.w,
    487 			chatty: b.chatty,
    488 		},
    489 		benchFunc: f,
    490 		benchTime: b.benchTime,
    491 		context:   b.context,
    492 	}
    493 	if sub.run1() {
    494 		sub.run()
    495 	}
    496 	b.add(sub.result)
    497 	return !sub.failed
    498 }
    499 
    500 // add simulates running benchmarks in sequence in a single iteration. It is
    501 // used to give some meaningful results in case func Benchmark is used in
    502 // combination with Run.
    503 func (b *B) add(other BenchmarkResult) {
    504 	r := &b.result
    505 	// The aggregated BenchmarkResults resemble running all subbenchmarks as
    506 	// in sequence in a single benchmark.
    507 	r.N = 1
    508 	r.T += time.Duration(other.NsPerOp())
    509 	if other.Bytes == 0 {
    510 		// Summing Bytes is meaningless in aggregate if not all subbenchmarks
    511 		// set it.
    512 		b.missingBytes = true
    513 		r.Bytes = 0
    514 	}
    515 	if !b.missingBytes {
    516 		r.Bytes += other.Bytes
    517 	}
    518 	r.MemAllocs += uint64(other.AllocsPerOp())
    519 	r.MemBytes += uint64(other.AllocedBytesPerOp())
    520 }
    521 
    522 // trimOutput shortens the output from a benchmark, which can be very long.
    523 func (b *B) trimOutput() {
    524 	// The output is likely to appear multiple times because the benchmark
    525 	// is run multiple times, but at least it will be seen. This is not a big deal
    526 	// because benchmarks rarely print, but just in case, we trim it if it's too long.
    527 	const maxNewlines = 10
    528 	for nlCount, j := 0, 0; j < len(b.output); j++ {
    529 		if b.output[j] == '\n' {
    530 			nlCount++
    531 			if nlCount >= maxNewlines {
    532 				b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
    533 				break
    534 			}
    535 		}
    536 	}
    537 }
    538 
    539 // A PB is used by RunParallel for running parallel benchmarks.
    540 type PB struct {
    541 	globalN *uint64 // shared between all worker goroutines iteration counter
    542 	grain   uint64  // acquire that many iterations from globalN at once
    543 	cache   uint64  // local cache of acquired iterations
    544 	bN      uint64  // total number of iterations to execute (b.N)
    545 }
    546 
    547 // Next reports whether there are more iterations to execute.
    548 func (pb *PB) Next() bool {
    549 	if pb.cache == 0 {
    550 		n := atomic.AddUint64(pb.globalN, pb.grain)
    551 		if n <= pb.bN {
    552 			pb.cache = pb.grain
    553 		} else if n < pb.bN+pb.grain {
    554 			pb.cache = pb.bN + pb.grain - n
    555 		} else {
    556 			return false
    557 		}
    558 	}
    559 	pb.cache--
    560 	return true
    561 }
    562 
    563 // RunParallel runs a benchmark in parallel.
    564 // It creates multiple goroutines and distributes b.N iterations among them.
    565 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
    566 // non-CPU-bound benchmarks, call SetParallelism before RunParallel.
    567 // RunParallel is usually used with the go test -cpu flag.
    568 //
    569 // The body function will be run in each goroutine. It should set up any
    570 // goroutine-local state and then iterate until pb.Next returns false.
    571 // It should not use the StartTimer, StopTimer, or ResetTimer functions,
    572 // because they have global effect. It should also not call Run.
    573 func (b *B) RunParallel(body func(*PB)) {
    574 	if b.N == 0 {
    575 		return // Nothing to do when probing.
    576 	}
    577 	// Calculate grain size as number of iterations that take ~100s.
    578 	// 100s is enough to amortize the overhead and provide sufficient
    579 	// dynamic load balancing.
    580 	grain := uint64(0)
    581 	if b.previousN > 0 && b.previousDuration > 0 {
    582 		grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
    583 	}
    584 	if grain < 1 {
    585 		grain = 1
    586 	}
    587 	// We expect the inner loop and function call to take at least 10ns,
    588 	// so do not do more than 100s/10ns=1e4 iterations.
    589 	if grain > 1e4 {
    590 		grain = 1e4
    591 	}
    592 
    593 	n := uint64(0)
    594 	numProcs := b.parallelism * runtime.GOMAXPROCS(0)
    595 	var wg sync.WaitGroup
    596 	wg.Add(numProcs)
    597 	for p := 0; p < numProcs; p++ {
    598 		go func() {
    599 			defer wg.Done()
    600 			pb := &PB{
    601 				globalN: &n,
    602 				grain:   grain,
    603 				bN:      uint64(b.N),
    604 			}
    605 			body(pb)
    606 		}()
    607 	}
    608 	wg.Wait()
    609 	if n <= uint64(b.N) && !b.Failed() {
    610 		b.Fatal("RunParallel: body exited without pb.Next() == false")
    611 	}
    612 }
    613 
    614 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
    615 // There is usually no need to call SetParallelism for CPU-bound benchmarks.
    616 // If p is less than 1, this call will have no effect.
    617 func (b *B) SetParallelism(p int) {
    618 	if p >= 1 {
    619 		b.parallelism = p
    620 	}
    621 }
    622 
    623 // Benchmark benchmarks a single function. Useful for creating
    624 // custom benchmarks that do not use the "go test" command.
    625 //
    626 // If f calls Run, the result will be an estimate of running all its
    627 // subbenchmarks that don't call Run in sequence in a single benchmark.
    628 func Benchmark(f func(b *B)) BenchmarkResult {
    629 	b := &B{
    630 		common: common{
    631 			signal: make(chan bool),
    632 			w:      discard{},
    633 		},
    634 		benchFunc: f,
    635 		benchTime: *benchTime,
    636 	}
    637 	if !b.run1() {
    638 		return BenchmarkResult{}
    639 	}
    640 	return b.run()
    641 }
    642 
    643 type discard struct{}
    644 
    645 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
    646