Home | History | Annotate | Download | only in runtime
      1 // Copyright 2011 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // CPU profiling.
      6 // Based on algorithms and data structures used in
      7 // http://code.google.com/p/google-perftools/.
      8 //
      9 // The main difference between this code and the google-perftools
     10 // code is that this code is written to allow copying the profile data
     11 // to an arbitrary io.Writer, while the google-perftools code always
     12 // writes to an operating system file.
     13 //
     14 // The signal handler for the profiling clock tick adds a new stack trace
     15 // to a hash table tracking counts for recent traces.  Most clock ticks
     16 // hit in the cache.  In the event of a cache miss, an entry must be
     17 // evicted from the hash table, copied to a log that will eventually be
     18 // written as profile data.  The google-perftools code flushed the
     19 // log itself during the signal handler.  This code cannot do that, because
     20 // the io.Writer might block or need system calls or locks that are not
     21 // safe to use from within the signal handler.  Instead, we split the log
     22 // into two halves and let the signal handler fill one half while a goroutine
     23 // is writing out the other half.  When the signal handler fills its half, it
     24 // offers to swap with the goroutine.  If the writer is not done with its half,
     25 // we lose the stack trace for this clock tick (and record that loss).
     26 // The goroutine interacts with the signal handler by calling getprofile() to
     27 // get the next log piece to write, implicitly handing back the last log
     28 // piece it obtained.
     29 //
     30 // The state of this dance between the signal handler and the goroutine
     31 // is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
     32 // is not using either log half and is waiting (or will soon be waiting) for
     33 // a new piece by calling notesleep(&p.wait).  If the signal handler
     34 // changes handoff from 0 to non-zero, it must call notewakeup(&p.wait)
     35 // to wake the goroutine.  The value indicates the number of entries in the
     36 // log half being handed off.  The goroutine leaves the non-zero value in
     37 // place until it has finished processing the log half and then flips the number
     38 // back to zero.  Setting the high bit in handoff means that the profiling is over,
     39 // and the goroutine is now in charge of flushing the data left in the hash table
     40 // to the log and returning that data.
     41 //
     42 // The handoff field is manipulated using atomic operations.
     43 // For the most part, the manipulation of handoff is orderly: if handoff == 0
     44 // then the signal handler owns it and can change it to non-zero.
     45 // If handoff != 0 then the goroutine owns it and can change it to zero.
     46 // If that were the end of the story then we would not need to manipulate
     47 // handoff using atomic operations.  The operations are needed, however,
     48 // in order to let the log closer set the high bit to indicate "EOF" safely
     49 // in the situation when normally the goroutine "owns" handoff.
     50 
     51 package runtime
     52 
     53 import "unsafe"
     54 
     55 const (
     56 	numBuckets      = 1 << 10
     57 	logSize         = 1 << 17
     58 	assoc           = 4
     59 	maxCPUProfStack = 64
     60 )
     61 
     62 type cpuprofEntry struct {
     63 	count uintptr
     64 	depth int
     65 	stack [maxCPUProfStack]uintptr
     66 }
     67 
     68 type cpuProfile struct {
     69 	on     bool    // profiling is on
     70 	wait   note    // goroutine waits here
     71 	count  uintptr // tick count
     72 	evicts uintptr // eviction count
     73 	lost   uintptr // lost ticks that need to be logged
     74 
     75 	// Active recent stack traces.
     76 	hash [numBuckets]struct {
     77 		entry [assoc]cpuprofEntry
     78 	}
     79 
     80 	// Log of traces evicted from hash.
     81 	// Signal handler has filled log[toggle][:nlog].
     82 	// Goroutine is writing log[1-toggle][:handoff].
     83 	log     [2][logSize / 2]uintptr
     84 	nlog    int
     85 	toggle  int32
     86 	handoff uint32
     87 
     88 	// Writer state.
     89 	// Writer maintains its own toggle to avoid races
     90 	// looking at signal handler's toggle.
     91 	wtoggle  uint32
     92 	wholding bool // holding & need to release a log half
     93 	flushing bool // flushing hash table - profile is over
     94 	eodSent  bool // special end-of-data record sent; => flushing
     95 }
     96 
     97 var (
     98 	cpuprofLock mutex
     99 	cpuprof     *cpuProfile
    100 
    101 	eod = [3]uintptr{0, 1, 0}
    102 )
    103 
    104 func setcpuprofilerate(hz int32) {
    105 	systemstack(func() {
    106 		setcpuprofilerate_m(hz)
    107 	})
    108 }
    109 
    110 // lostProfileData is a no-op function used in profiles
    111 // to mark the number of profiling stack traces that were
    112 // discarded due to slow data writers.
    113 func lostProfileData() {}
    114 
    115 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
    116 // If hz <= 0, SetCPUProfileRate turns off profiling.
    117 // If the profiler is on, the rate cannot be changed without first turning it off.
    118 //
    119 // Most clients should use the runtime/pprof package or
    120 // the testing package's -test.cpuprofile flag instead of calling
    121 // SetCPUProfileRate directly.
    122 func SetCPUProfileRate(hz int) {
    123 	// Clamp hz to something reasonable.
    124 	if hz < 0 {
    125 		hz = 0
    126 	}
    127 	if hz > 1000000 {
    128 		hz = 1000000
    129 	}
    130 
    131 	lock(&cpuprofLock)
    132 	if hz > 0 {
    133 		if cpuprof == nil {
    134 			cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys))
    135 			if cpuprof == nil {
    136 				print("runtime: cpu profiling cannot allocate memory\n")
    137 				unlock(&cpuprofLock)
    138 				return
    139 			}
    140 		}
    141 		if cpuprof.on || cpuprof.handoff != 0 {
    142 			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
    143 			unlock(&cpuprofLock)
    144 			return
    145 		}
    146 
    147 		cpuprof.on = true
    148 		// pprof binary header format.
    149 		// http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
    150 		p := &cpuprof.log[0]
    151 		p[0] = 0                 // count for header
    152 		p[1] = 3                 // depth for header
    153 		p[2] = 0                 // version number
    154 		p[3] = uintptr(1e6 / hz) // period (microseconds)
    155 		p[4] = 0
    156 		cpuprof.nlog = 5
    157 		cpuprof.toggle = 0
    158 		cpuprof.wholding = false
    159 		cpuprof.wtoggle = 0
    160 		cpuprof.flushing = false
    161 		cpuprof.eodSent = false
    162 		noteclear(&cpuprof.wait)
    163 
    164 		setcpuprofilerate(int32(hz))
    165 	} else if cpuprof != nil && cpuprof.on {
    166 		setcpuprofilerate(0)
    167 		cpuprof.on = false
    168 
    169 		// Now add is not running anymore, and getprofile owns the entire log.
    170 		// Set the high bit in cpuprof.handoff to tell getprofile.
    171 		for {
    172 			n := cpuprof.handoff
    173 			if n&0x80000000 != 0 {
    174 				print("runtime: setcpuprofile(off) twice\n")
    175 			}
    176 			if cas(&cpuprof.handoff, n, n|0x80000000) {
    177 				if n == 0 {
    178 					// we did the transition from 0 -> nonzero so we wake getprofile
    179 					notewakeup(&cpuprof.wait)
    180 				}
    181 				break
    182 			}
    183 		}
    184 	}
    185 	unlock(&cpuprofLock)
    186 }
    187 
    188 // add adds the stack trace to the profile.
    189 // It is called from signal handlers and other limited environments
    190 // and cannot allocate memory or acquire locks that might be
    191 // held at the time of the signal, nor can it use substantial amounts
    192 // of stack.  It is allowed to call evict.
    193 func (p *cpuProfile) add(pc []uintptr) {
    194 	if len(pc) > maxCPUProfStack {
    195 		pc = pc[:maxCPUProfStack]
    196 	}
    197 
    198 	// Compute hash.
    199 	h := uintptr(0)
    200 	for _, x := range pc {
    201 		h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1)))
    202 		h += x * 41
    203 	}
    204 	p.count++
    205 
    206 	// Add to entry count if already present in table.
    207 	b := &p.hash[h%numBuckets]
    208 Assoc:
    209 	for i := range b.entry {
    210 		e := &b.entry[i]
    211 		if e.depth != len(pc) {
    212 			continue
    213 		}
    214 		for j := range pc {
    215 			if e.stack[j] != pc[j] {
    216 				continue Assoc
    217 			}
    218 		}
    219 		e.count++
    220 		return
    221 	}
    222 
    223 	// Evict entry with smallest count.
    224 	var e *cpuprofEntry
    225 	for i := range b.entry {
    226 		if e == nil || b.entry[i].count < e.count {
    227 			e = &b.entry[i]
    228 		}
    229 	}
    230 	if e.count > 0 {
    231 		if !p.evict(e) {
    232 			// Could not evict entry.  Record lost stack.
    233 			p.lost++
    234 			return
    235 		}
    236 		p.evicts++
    237 	}
    238 
    239 	// Reuse the newly evicted entry.
    240 	e.depth = len(pc)
    241 	e.count = 1
    242 	copy(e.stack[:], pc)
    243 }
    244 
    245 // evict copies the given entry's data into the log, so that
    246 // the entry can be reused.  evict is called from add, which
    247 // is called from the profiling signal handler, so it must not
    248 // allocate memory or block.  It is safe to call flushlog.
    249 // evict returns true if the entry was copied to the log,
    250 // false if there was no room available.
    251 func (p *cpuProfile) evict(e *cpuprofEntry) bool {
    252 	d := e.depth
    253 	nslot := d + 2
    254 	log := &p.log[p.toggle]
    255 	if p.nlog+nslot > len(log) {
    256 		if !p.flushlog() {
    257 			return false
    258 		}
    259 		log = &p.log[p.toggle]
    260 	}
    261 
    262 	q := p.nlog
    263 	log[q] = e.count
    264 	q++
    265 	log[q] = uintptr(d)
    266 	q++
    267 	copy(log[q:], e.stack[:d])
    268 	q += d
    269 	p.nlog = q
    270 	e.count = 0
    271 	return true
    272 }
    273 
    274 // flushlog tries to flush the current log and switch to the other one.
    275 // flushlog is called from evict, called from add, called from the signal handler,
    276 // so it cannot allocate memory or block.  It can try to swap logs with
    277 // the writing goroutine, as explained in the comment at the top of this file.
    278 func (p *cpuProfile) flushlog() bool {
    279 	if !cas(&p.handoff, 0, uint32(p.nlog)) {
    280 		return false
    281 	}
    282 	notewakeup(&p.wait)
    283 
    284 	p.toggle = 1 - p.toggle
    285 	log := &p.log[p.toggle]
    286 	q := 0
    287 	if p.lost > 0 {
    288 		lostPC := funcPC(lostProfileData)
    289 		log[0] = p.lost
    290 		log[1] = 1
    291 		log[2] = lostPC
    292 		q = 3
    293 		p.lost = 0
    294 	}
    295 	p.nlog = q
    296 	return true
    297 }
    298 
    299 // getprofile blocks until the next block of profiling data is available
    300 // and returns it as a []byte.  It is called from the writing goroutine.
    301 func (p *cpuProfile) getprofile() []byte {
    302 	if p == nil {
    303 		return nil
    304 	}
    305 
    306 	if p.wholding {
    307 		// Release previous log to signal handling side.
    308 		// Loop because we are racing against SetCPUProfileRate(0).
    309 		for {
    310 			n := p.handoff
    311 			if n == 0 {
    312 				print("runtime: phase error during cpu profile handoff\n")
    313 				return nil
    314 			}
    315 			if n&0x80000000 != 0 {
    316 				p.wtoggle = 1 - p.wtoggle
    317 				p.wholding = false
    318 				p.flushing = true
    319 				goto Flush
    320 			}
    321 			if cas(&p.handoff, n, 0) {
    322 				break
    323 			}
    324 		}
    325 		p.wtoggle = 1 - p.wtoggle
    326 		p.wholding = false
    327 	}
    328 
    329 	if p.flushing {
    330 		goto Flush
    331 	}
    332 
    333 	if !p.on && p.handoff == 0 {
    334 		return nil
    335 	}
    336 
    337 	// Wait for new log.
    338 	notetsleepg(&p.wait, -1)
    339 	noteclear(&p.wait)
    340 
    341 	switch n := p.handoff; {
    342 	case n == 0:
    343 		print("runtime: phase error during cpu profile wait\n")
    344 		return nil
    345 	case n == 0x80000000:
    346 		p.flushing = true
    347 		goto Flush
    348 	default:
    349 		n &^= 0x80000000
    350 
    351 		// Return new log to caller.
    352 		p.wholding = true
    353 
    354 		return uintptrBytes(p.log[p.wtoggle][:n])
    355 	}
    356 
    357 	// In flush mode.
    358 	// Add is no longer being called.  We own the log.
    359 	// Also, p.handoff is non-zero, so flushlog will return false.
    360 	// Evict the hash table into the log and return it.
    361 Flush:
    362 	for i := range p.hash {
    363 		b := &p.hash[i]
    364 		for j := range b.entry {
    365 			e := &b.entry[j]
    366 			if e.count > 0 && !p.evict(e) {
    367 				// Filled the log.  Stop the loop and return what we've got.
    368 				break Flush
    369 			}
    370 		}
    371 	}
    372 
    373 	// Return pending log data.
    374 	if p.nlog > 0 {
    375 		// Note that we're using toggle now, not wtoggle,
    376 		// because we're working on the log directly.
    377 		n := p.nlog
    378 		p.nlog = 0
    379 		return uintptrBytes(p.log[p.toggle][:n])
    380 	}
    381 
    382 	// Made it through the table without finding anything to log.
    383 	if !p.eodSent {
    384 		// We may not have space to append this to the partial log buf,
    385 		// so we always return a new slice for the end-of-data marker.
    386 		p.eodSent = true
    387 		return uintptrBytes(eod[:])
    388 	}
    389 
    390 	// Finally done.  Clean up and return nil.
    391 	p.flushing = false
    392 	if !cas(&p.handoff, p.handoff, 0) {
    393 		print("runtime: profile flush racing with something\n")
    394 	}
    395 	return nil
    396 }
    397 
    398 func uintptrBytes(p []uintptr) (ret []byte) {
    399 	pp := (*slice)(unsafe.Pointer(&p))
    400 	rp := (*slice)(unsafe.Pointer(&ret))
    401 
    402 	rp.array = pp.array
    403 	rp.len = pp.len * int(unsafe.Sizeof(p[0]))
    404 	rp.cap = rp.len
    405 
    406 	return
    407 }
    408 
    409 // CPUProfile returns the next chunk of binary CPU profiling stack trace data,
    410 // blocking until data is available.  If profiling is turned off and all the profile
    411 // data accumulated while it was on has been returned, CPUProfile returns nil.
    412 // The caller must save the returned data before calling CPUProfile again.
    413 //
    414 // Most clients should use the runtime/pprof package or
    415 // the testing package's -test.cpuprofile flag instead of calling
    416 // CPUProfile directly.
    417 func CPUProfile() []byte {
    418 	return cpuprof.getprofile()
    419 }
    420 
    421 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
    422 func runtime_pprof_runtime_cyclesPerSecond() int64 {
    423 	return tickspersecond()
    424 }
    425