Home | History | Annotate | Download | only in runtime
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // CPU profiling.
      6 // Based on algorithms and data structures used in
      7 // https://github.com/google/pprof.
      8 //
      9 // The main difference between this code and the google-perftools
     10 // code is that this code is written to allow copying the profile data
     11 // to an arbitrary io.Writer, while the google-perftools code always
     12 // writes to an operating system file.
     13 //
     14 // The signal handler for the profiling clock tick adds a new stack trace
     15 // to a hash table tracking counts for recent traces. Most clock ticks
     16 // hit in the cache. In the event of a cache miss, an entry must be
     17 // evicted from the hash table, copied to a log that will eventually be
     18 // written as profile data. The google-perftools code flushed the
     19 // log itself during the signal handler. This code cannot do that, because
     20 // the io.Writer might block or need system calls or locks that are not
     21 // safe to use from within the signal handler. Instead, we split the log
     22 // into two halves and let the signal handler fill one half while a goroutine
     23 // is writing out the other half. When the signal handler fills its half, it
     24 // offers to swap with the goroutine. If the writer is not done with its half,
     25 // we lose the stack trace for this clock tick (and record that loss).
     26 // The goroutine interacts with the signal handler by calling getprofile() to
     27 // get the next log piece to write, implicitly handing back the last log
     28 // piece it obtained.
     29 //
     30 // The state of this dance between the signal handler and the goroutine
     31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine
     32 // is not using either log half and is waiting (or will soon be waiting) for
     33 // a new piece by calling notesleep(&p.wait).  If the signal handler
     34 // changes handoff from 0 to non-zero, it must call notewakeup(&p.wait)
     35 // to wake the goroutine. The value indicates the number of entries in the
     36 // log half being handed off. The goroutine leaves the non-zero value in
     37 // place until it has finished processing the log half and then flips the number
     38 // back to zero. Setting the high bit in handoff means that the profiling is over,
     39 // and the goroutine is now in charge of flushing the data left in the hash table
     40 // to the log and returning that data.
     41 //
     42 // The handoff field is manipulated using atomic operations.
     43 // For the most part, the manipulation of handoff is orderly: if handoff == 0
     44 // then the signal handler owns it and can change it to non-zero.
     45 // If handoff != 0 then the goroutine owns it and can change it to zero.
     46 // If that were the end of the story then we would not need to manipulate
     47 // handoff using atomic operations. The operations are needed, however,
     48 // in order to let the log closer set the high bit to indicate "EOF" safely
     49 // in the situation when normally the goroutine "owns" handoff.
     50 
     51 package runtime
     52 
     53 import (
     54 	"runtime/internal/atomic"
     55 	"unsafe"
     56 )
     57 
     58 const (
     59 	numBuckets      = 1 << 10
     60 	logSize         = 1 << 17
     61 	assoc           = 4
     62 	maxCPUProfStack = 64
     63 )
     64 
     65 type cpuprofEntry struct {
     66 	count uintptr
     67 	depth int
     68 	stack [maxCPUProfStack]uintptr
     69 }
     70 
     71 //go:notinheap
     72 type cpuProfile struct {
     73 	on     bool    // profiling is on
     74 	wait   note    // goroutine waits here
     75 	count  uintptr // tick count
     76 	evicts uintptr // eviction count
     77 	lost   uintptr // lost ticks that need to be logged
     78 
     79 	// Active recent stack traces.
     80 	hash [numBuckets]struct {
     81 		entry [assoc]cpuprofEntry
     82 	}
     83 
     84 	// Log of traces evicted from hash.
     85 	// Signal handler has filled log[toggle][:nlog].
     86 	// Goroutine is writing log[1-toggle][:handoff].
     87 	log     [2][logSize / 2]uintptr
     88 	nlog    int
     89 	toggle  int32
     90 	handoff uint32
     91 
     92 	// Writer state.
     93 	// Writer maintains its own toggle to avoid races
     94 	// looking at signal handler's toggle.
     95 	wtoggle  uint32
     96 	wholding bool // holding & need to release a log half
     97 	flushing bool // flushing hash table - profile is over
     98 	eodSent  bool // special end-of-data record sent; => flushing
     99 }
    100 
    101 var (
    102 	cpuprofLock mutex
    103 	cpuprof     *cpuProfile
    104 
    105 	eod = [3]uintptr{0, 1, 0}
    106 )
    107 
    108 func setcpuprofilerate(hz int32) {
    109 	systemstack(func() {
    110 		setcpuprofilerate_m(hz)
    111 	})
    112 }
    113 
    114 // lostProfileData is a no-op function used in profiles
    115 // to mark the number of profiling stack traces that were
    116 // discarded due to slow data writers.
    117 func lostProfileData() {}
    118 
    119 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
    120 // If hz <= 0, SetCPUProfileRate turns off profiling.
    121 // If the profiler is on, the rate cannot be changed without first turning it off.
    122 //
    123 // Most clients should use the runtime/pprof package or
    124 // the testing package's -test.cpuprofile flag instead of calling
    125 // SetCPUProfileRate directly.
    126 func SetCPUProfileRate(hz int) {
    127 	// Clamp hz to something reasonable.
    128 	if hz < 0 {
    129 		hz = 0
    130 	}
    131 	if hz > 1000000 {
    132 		hz = 1000000
    133 	}
    134 
    135 	lock(&cpuprofLock)
    136 	if hz > 0 {
    137 		if cpuprof == nil {
    138 			cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys))
    139 			if cpuprof == nil {
    140 				print("runtime: cpu profiling cannot allocate memory\n")
    141 				unlock(&cpuprofLock)
    142 				return
    143 			}
    144 		}
    145 		if cpuprof.on || cpuprof.handoff != 0 {
    146 			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
    147 			unlock(&cpuprofLock)
    148 			return
    149 		}
    150 
    151 		cpuprof.on = true
    152 		// pprof binary header format.
    153 		// https://github.com/gperftools/gperftools/blob/master/src/profiledata.cc#L119
    154 		p := &cpuprof.log[0]
    155 		p[0] = 0                 // count for header
    156 		p[1] = 3                 // depth for header
    157 		p[2] = 0                 // version number
    158 		p[3] = uintptr(1e6 / hz) // period (microseconds)
    159 		p[4] = 0
    160 		cpuprof.nlog = 5
    161 		cpuprof.toggle = 0
    162 		cpuprof.wholding = false
    163 		cpuprof.wtoggle = 0
    164 		cpuprof.flushing = false
    165 		cpuprof.eodSent = false
    166 		noteclear(&cpuprof.wait)
    167 
    168 		setcpuprofilerate(int32(hz))
    169 	} else if cpuprof != nil && cpuprof.on {
    170 		setcpuprofilerate(0)
    171 		cpuprof.on = false
    172 
    173 		// Now add is not running anymore, and getprofile owns the entire log.
    174 		// Set the high bit in cpuprof.handoff to tell getprofile.
    175 		for {
    176 			n := cpuprof.handoff
    177 			if n&0x80000000 != 0 {
    178 				print("runtime: setcpuprofile(off) twice\n")
    179 			}
    180 			if atomic.Cas(&cpuprof.handoff, n, n|0x80000000) {
    181 				if n == 0 {
    182 					// we did the transition from 0 -> nonzero so we wake getprofile
    183 					notewakeup(&cpuprof.wait)
    184 				}
    185 				break
    186 			}
    187 		}
    188 	}
    189 	unlock(&cpuprofLock)
    190 }
    191 
    192 // add adds the stack trace to the profile.
    193 // It is called from signal handlers and other limited environments
    194 // and cannot allocate memory or acquire locks that might be
    195 // held at the time of the signal, nor can it use substantial amounts
    196 // of stack. It is allowed to call evict.
    197 //go:nowritebarrierrec
    198 func (p *cpuProfile) add(pc []uintptr) {
    199 	p.addWithFlushlog(pc, p.flushlog)
    200 }
    201 
    202 // addWithFlushlog implements add and addNonGo.
    203 // It is called from signal handlers and other limited environments
    204 // and cannot allocate memory or acquire locks that might be
    205 // held at the time of the signal, nor can it use substantial amounts
    206 // of stack. It may be called by a signal handler with no g or m.
    207 // It is allowed to call evict, passing the flushlog parameter.
    208 //go:nosplit
    209 //go:nowritebarrierrec
    210 func (p *cpuProfile) addWithFlushlog(pc []uintptr, flushlog func() bool) {
    211 	if len(pc) > maxCPUProfStack {
    212 		pc = pc[:maxCPUProfStack]
    213 	}
    214 
    215 	// Compute hash.
    216 	h := uintptr(0)
    217 	for _, x := range pc {
    218 		h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
    219 		h += x * 41
    220 	}
    221 	p.count++
    222 
    223 	// Add to entry count if already present in table.
    224 	b := &p.hash[h%numBuckets]
    225 Assoc:
    226 	for i := range b.entry {
    227 		e := &b.entry[i]
    228 		if e.depth != len(pc) {
    229 			continue
    230 		}
    231 		for j := range pc {
    232 			if e.stack[j] != pc[j] {
    233 				continue Assoc
    234 			}
    235 		}
    236 		e.count++
    237 		return
    238 	}
    239 
    240 	// Evict entry with smallest count.
    241 	var e *cpuprofEntry
    242 	for i := range b.entry {
    243 		if e == nil || b.entry[i].count < e.count {
    244 			e = &b.entry[i]
    245 		}
    246 	}
    247 	if e.count > 0 {
    248 		if !p.evict(e, flushlog) {
    249 			// Could not evict entry. Record lost stack.
    250 			p.lost++
    251 			return
    252 		}
    253 		p.evicts++
    254 	}
    255 
    256 	// Reuse the newly evicted entry.
    257 	e.depth = len(pc)
    258 	e.count = 1
    259 	copy(e.stack[:], pc)
    260 }
    261 
    262 // evict copies the given entry's data into the log, so that
    263 // the entry can be reused.  evict is called from add, which
    264 // is called from the profiling signal handler, so it must not
    265 // allocate memory or block, and it may be called with no g or m.
    266 // It is safe to call flushlog. evict returns true if the entry was
    267 // copied to the log, false if there was no room available.
    268 //go:nosplit
    269 //go:nowritebarrierrec
    270 func (p *cpuProfile) evict(e *cpuprofEntry, flushlog func() bool) bool {
    271 	d := e.depth
    272 	nslot := d + 2
    273 	log := &p.log[p.toggle]
    274 	if p.nlog+nslot > len(log) {
    275 		if !flushlog() {
    276 			return false
    277 		}
    278 		log = &p.log[p.toggle]
    279 	}
    280 
    281 	q := p.nlog
    282 	log[q] = e.count
    283 	q++
    284 	log[q] = uintptr(d)
    285 	q++
    286 	copy(log[q:], e.stack[:d])
    287 	q += d
    288 	p.nlog = q
    289 	e.count = 0
    290 	return true
    291 }
    292 
    293 // flushlog tries to flush the current log and switch to the other one.
    294 // flushlog is called from evict, called from add, called from the signal handler,
    295 // so it cannot allocate memory or block. It can try to swap logs with
    296 // the writing goroutine, as explained in the comment at the top of this file.
    297 //go:nowritebarrierrec
    298 func (p *cpuProfile) flushlog() bool {
    299 	if !atomic.Cas(&p.handoff, 0, uint32(p.nlog)) {
    300 		return false
    301 	}
    302 	notewakeup(&p.wait)
    303 
    304 	p.toggle = 1 - p.toggle
    305 	log := &p.log[p.toggle]
    306 	q := 0
    307 	if p.lost > 0 {
    308 		lostPC := funcPC(lostProfileData)
    309 		log[0] = p.lost
    310 		log[1] = 1
    311 		log[2] = lostPC
    312 		q = 3
    313 		p.lost = 0
    314 	}
    315 	p.nlog = q
    316 	return true
    317 }
    318 
    319 // addNonGo is like add, but runs on a non-Go thread.
    320 // It can't do anything that might need a g or an m.
    321 // With this entry point, we don't try to flush the log when evicting an
    322 // old entry. Instead, we just drop the stack trace if we're out of space.
    323 //go:nosplit
    324 //go:nowritebarrierrec
    325 func (p *cpuProfile) addNonGo(pc []uintptr) {
    326 	p.addWithFlushlog(pc, func() bool { return false })
    327 }
    328 
    329 // getprofile blocks until the next block of profiling data is available
    330 // and returns it as a []byte. It is called from the writing goroutine.
    331 func (p *cpuProfile) getprofile() []byte {
    332 	if p == nil {
    333 		return nil
    334 	}
    335 
    336 	if p.wholding {
    337 		// Release previous log to signal handling side.
    338 		// Loop because we are racing against SetCPUProfileRate(0).
    339 		for {
    340 			n := p.handoff
    341 			if n == 0 {
    342 				print("runtime: phase error during cpu profile handoff\n")
    343 				return nil
    344 			}
    345 			if n&0x80000000 != 0 {
    346 				p.wtoggle = 1 - p.wtoggle
    347 				p.wholding = false
    348 				p.flushing = true
    349 				goto Flush
    350 			}
    351 			if atomic.Cas(&p.handoff, n, 0) {
    352 				break
    353 			}
    354 		}
    355 		p.wtoggle = 1 - p.wtoggle
    356 		p.wholding = false
    357 	}
    358 
    359 	if p.flushing {
    360 		goto Flush
    361 	}
    362 
    363 	if !p.on && p.handoff == 0 {
    364 		return nil
    365 	}
    366 
    367 	// Wait for new log.
    368 	notetsleepg(&p.wait, -1)
    369 	noteclear(&p.wait)
    370 
    371 	switch n := p.handoff; {
    372 	case n == 0:
    373 		print("runtime: phase error during cpu profile wait\n")
    374 		return nil
    375 	case n == 0x80000000:
    376 		p.flushing = true
    377 		goto Flush
    378 	default:
    379 		n &^= 0x80000000
    380 
    381 		// Return new log to caller.
    382 		p.wholding = true
    383 
    384 		return uintptrBytes(p.log[p.wtoggle][:n])
    385 	}
    386 
    387 	// In flush mode.
    388 	// Add is no longer being called. We own the log.
    389 	// Also, p.handoff is non-zero, so flushlog will return false.
    390 	// Evict the hash table into the log and return it.
    391 Flush:
    392 	for i := range p.hash {
    393 		b := &p.hash[i]
    394 		for j := range b.entry {
    395 			e := &b.entry[j]
    396 			if e.count > 0 && !p.evict(e, p.flushlog) {
    397 				// Filled the log. Stop the loop and return what we've got.
    398 				break Flush
    399 			}
    400 		}
    401 	}
    402 
    403 	// Return pending log data.
    404 	if p.nlog > 0 {
    405 		// Note that we're using toggle now, not wtoggle,
    406 		// because we're working on the log directly.
    407 		n := p.nlog
    408 		p.nlog = 0
    409 		return uintptrBytes(p.log[p.toggle][:n])
    410 	}
    411 
    412 	// Made it through the table without finding anything to log.
    413 	if !p.eodSent {
    414 		// We may not have space to append this to the partial log buf,
    415 		// so we always return a new slice for the end-of-data marker.
    416 		p.eodSent = true
    417 		return uintptrBytes(eod[:])
    418 	}
    419 
    420 	// Finally done. Clean up and return nil.
    421 	p.flushing = false
    422 	if !atomic.Cas(&p.handoff, p.handoff, 0) {
    423 		print("runtime: profile flush racing with something\n")
    424 	}
    425 	return nil
    426 }
    427 
    428 func uintptrBytes(p []uintptr) (ret []byte) {
    429 	pp := (*slice)(unsafe.Pointer(&p))
    430 	rp := (*slice)(unsafe.Pointer(&ret))
    431 
    432 	rp.array = pp.array
    433 	rp.len = pp.len * int(unsafe.Sizeof(p[0]))
    434 	rp.cap = rp.len
    435 
    436 	return
    437 }
    438 
    439 // CPUProfile returns the next chunk of binary CPU profiling stack trace data,
    440 // blocking until data is available. If profiling is turned off and all the profile
    441 // data accumulated while it was on has been returned, CPUProfile returns nil.
    442 // The caller must save the returned data before calling CPUProfile again.
    443 //
    444 // Most clients should use the runtime/pprof package or
    445 // the testing package's -test.cpuprofile flag instead of calling
    446 // CPUProfile directly.
    447 func CPUProfile() []byte {
    448 	return cpuprof.getprofile()
    449 }
    450 
    451 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
    452 func runtime_pprof_runtime_cyclesPerSecond() int64 {
    453 	return tickspersecond()
    454 }
    455