Home | History | Annotate | Download | only in runtime
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // CPU profiling.
      6 //
      7 // The signal handler for the profiling clock tick adds a new stack trace
      8 // to a log of recent traces. The log is read by a user goroutine that
      9 // turns it into formatted profile data. If the reader does not keep up
     10 // with the log, those writes will be recorded as a count of lost records.
     11 // The actual profile buffer is in profbuf.go.
     12 
     13 package runtime
     14 
     15 import (
     16 	"runtime/internal/atomic"
     17 	"runtime/internal/sys"
     18 	"unsafe"
     19 )
     20 
     21 const maxCPUProfStack = 64
     22 
     23 type cpuProfile struct {
     24 	lock mutex
     25 	on   bool     // profiling is on
     26 	log  *profBuf // profile events written here
     27 
     28 	// extra holds extra stacks accumulated in addNonGo
     29 	// corresponding to profiling signals arriving on
     30 	// non-Go-created threads. Those stacks are written
     31 	// to log the next time a normal Go thread gets the
     32 	// signal handler.
     33 	// Assuming the stacks are 2 words each (we don't get
     34 	// a full traceback from those threads), plus one word
     35 	// size for framing, 100 Hz profiling would generate
     36 	// 300 words per second.
     37 	// Hopefully a normal Go thread will get the profiling
     38 	// signal at least once every few seconds.
     39 	extra     [1000]uintptr
     40 	numExtra  int
     41 	lostExtra uint64 // count of frames lost because extra is full
     42 }
     43 
     44 var cpuprof cpuProfile
     45 
     46 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
     47 // If hz <= 0, SetCPUProfileRate turns off profiling.
     48 // If the profiler is on, the rate cannot be changed without first turning it off.
     49 //
     50 // Most clients should use the runtime/pprof package or
     51 // the testing package's -test.cpuprofile flag instead of calling
     52 // SetCPUProfileRate directly.
     53 func SetCPUProfileRate(hz int) {
     54 	// Clamp hz to something reasonable.
     55 	if hz < 0 {
     56 		hz = 0
     57 	}
     58 	if hz > 1000000 {
     59 		hz = 1000000
     60 	}
     61 
     62 	lock(&cpuprof.lock)
     63 	if hz > 0 {
     64 		if cpuprof.on || cpuprof.log != nil {
     65 			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
     66 			unlock(&cpuprof.lock)
     67 			return
     68 		}
     69 
     70 		cpuprof.on = true
     71 		cpuprof.log = newProfBuf(1, 1<<17, 1<<14)
     72 		hdr := [1]uint64{uint64(hz)}
     73 		cpuprof.log.write(nil, nanotime(), hdr[:], nil)
     74 		setcpuprofilerate(int32(hz))
     75 	} else if cpuprof.on {
     76 		setcpuprofilerate(0)
     77 		cpuprof.on = false
     78 		cpuprof.addExtra()
     79 		cpuprof.log.close()
     80 	}
     81 	unlock(&cpuprof.lock)
     82 }
     83 
     84 // add adds the stack trace to the profile.
     85 // It is called from signal handlers and other limited environments
     86 // and cannot allocate memory or acquire locks that might be
     87 // held at the time of the signal, nor can it use substantial amounts
     88 // of stack.
     89 //go:nowritebarrierrec
     90 func (p *cpuProfile) add(gp *g, stk []uintptr) {
     91 	// Simple cas-lock to coordinate with setcpuprofilerate.
     92 	for !atomic.Cas(&prof.signalLock, 0, 1) {
     93 		osyield()
     94 	}
     95 
     96 	if prof.hz != 0 { // implies cpuprof.log != nil
     97 		if p.numExtra > 0 || p.lostExtra > 0 {
     98 			p.addExtra()
     99 		}
    100 		hdr := [1]uint64{1}
    101 		// Note: write "knows" that the argument is &gp.labels,
    102 		// because otherwise its write barrier behavior may not
    103 		// be correct. See the long comment there before
    104 		// changing the argument here.
    105 		cpuprof.log.write(&gp.labels, nanotime(), hdr[:], stk)
    106 	}
    107 
    108 	atomic.Store(&prof.signalLock, 0)
    109 }
    110 
    111 // addNonGo adds the non-Go stack trace to the profile.
    112 // It is called from a non-Go thread, so we cannot use much stack at all,
    113 // nor do anything that needs a g or an m.
    114 // In particular, we can't call cpuprof.log.write.
    115 // Instead, we copy the stack into cpuprof.extra,
    116 // which will be drained the next time a Go thread
    117 // gets the signal handling event.
    118 //go:nosplit
    119 //go:nowritebarrierrec
    120 func (p *cpuProfile) addNonGo(stk []uintptr) {
    121 	// Simple cas-lock to coordinate with SetCPUProfileRate.
    122 	// (Other calls to add or addNonGo should be blocked out
    123 	// by the fact that only one SIGPROF can be handled by the
    124 	// process at a time. If not, this lock will serialize those too.)
    125 	for !atomic.Cas(&prof.signalLock, 0, 1) {
    126 		osyield()
    127 	}
    128 
    129 	if cpuprof.numExtra+1+len(stk) < len(cpuprof.extra) {
    130 		i := cpuprof.numExtra
    131 		cpuprof.extra[i] = uintptr(1 + len(stk))
    132 		copy(cpuprof.extra[i+1:], stk)
    133 		cpuprof.numExtra += 1 + len(stk)
    134 	} else {
    135 		cpuprof.lostExtra++
    136 	}
    137 
    138 	atomic.Store(&prof.signalLock, 0)
    139 }
    140 
    141 // addExtra adds the "extra" profiling events,
    142 // queued by addNonGo, to the profile log.
    143 // addExtra is called either from a signal handler on a Go thread
    144 // or from an ordinary goroutine; either way it can use stack
    145 // and has a g. The world may be stopped, though.
    146 func (p *cpuProfile) addExtra() {
    147 	// Copy accumulated non-Go profile events.
    148 	hdr := [1]uint64{1}
    149 	for i := 0; i < p.numExtra; {
    150 		p.log.write(nil, 0, hdr[:], p.extra[i+1:i+int(p.extra[i])])
    151 		i += int(p.extra[i])
    152 	}
    153 	p.numExtra = 0
    154 
    155 	// Report any lost events.
    156 	if p.lostExtra > 0 {
    157 		hdr := [1]uint64{p.lostExtra}
    158 		lostStk := [2]uintptr{
    159 			funcPC(_LostExternalCode) + sys.PCQuantum,
    160 			funcPC(_ExternalCode) + sys.PCQuantum,
    161 		}
    162 		cpuprof.log.write(nil, 0, hdr[:], lostStk[:])
    163 		p.lostExtra = 0
    164 	}
    165 }
    166 
    167 func (p *cpuProfile) addLostAtomic64(count uint64) {
    168 	hdr := [1]uint64{count}
    169 	lostStk := [2]uintptr{
    170 		funcPC(_LostSIGPROFDuringAtomic64) + sys.PCQuantum,
    171 		funcPC(_System) + sys.PCQuantum,
    172 	}
    173 	cpuprof.log.write(nil, 0, hdr[:], lostStk[:])
    174 }
    175 
    176 // CPUProfile panics.
    177 // It formerly provided raw access to chunks of
    178 // a pprof-format profile generated by the runtime.
    179 // The details of generating that format have changed,
    180 // so this functionality has been removed.
    181 //
    182 // Deprecated: use the runtime/pprof package,
    183 // or the handlers in the net/http/pprof package,
    184 // or the testing package's -test.cpuprofile flag instead.
    185 func CPUProfile() []byte {
    186 	panic("CPUProfile no longer available")
    187 }
    188 
    189 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
    190 func runtime_pprof_runtime_cyclesPerSecond() int64 {
    191 	return tickspersecond()
    192 }
    193 
    194 // readProfile, provided to runtime/pprof, returns the next chunk of
    195 // binary CPU profiling stack trace data, blocking until data is available.
    196 // If profiling is turned off and all the profile data accumulated while it was
    197 // on has been returned, readProfile returns eof=true.
    198 // The caller must save the returned data and tags before calling readProfile again.
    199 //
    200 //go:linkname runtime_pprof_readProfile runtime/pprof.readProfile
    201 func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) {
    202 	lock(&cpuprof.lock)
    203 	log := cpuprof.log
    204 	unlock(&cpuprof.lock)
    205 	data, tags, eof := log.read(profBufBlocking)
    206 	if len(data) == 0 && eof {
    207 		lock(&cpuprof.lock)
    208 		cpuprof.log = nil
    209 		unlock(&cpuprof.lock)
    210 	}
    211 	return data, tags, eof
    212 }
    213