1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // CPU profiling. 6 // Based on algorithms and data structures used in 7 // http://code.google.com/p/google-perftools/. 8 // 9 // The main difference between this code and the google-perftools 10 // code is that this code is written to allow copying the profile data 11 // to an arbitrary io.Writer, while the google-perftools code always 12 // writes to an operating system file. 13 // 14 // The signal handler for the profiling clock tick adds a new stack trace 15 // to a hash table tracking counts for recent traces. Most clock ticks 16 // hit in the cache. In the event of a cache miss, an entry must be 17 // evicted from the hash table, copied to a log that will eventually be 18 // written as profile data. The google-perftools code flushed the 19 // log itself during the signal handler. This code cannot do that, because 20 // the io.Writer might block or need system calls or locks that are not 21 // safe to use from within the signal handler. Instead, we split the log 22 // into two halves and let the signal handler fill one half while a goroutine 23 // is writing out the other half. When the signal handler fills its half, it 24 // offers to swap with the goroutine. If the writer is not done with its half, 25 // we lose the stack trace for this clock tick (and record that loss). 26 // The goroutine interacts with the signal handler by calling getprofile() to 27 // get the next log piece to write, implicitly handing back the last log 28 // piece it obtained. 29 // 30 // The state of this dance between the signal handler and the goroutine 31 // is encoded in the Profile.handoff field. If handoff == 0, then the goroutine 32 // is not using either log half and is waiting (or will soon be waiting) for 33 // a new piece by calling notesleep(&p.wait). If the signal handler 34 // changes handoff from 0 to non-zero, it must call notewakeup(&p.wait) 35 // to wake the goroutine. The value indicates the number of entries in the 36 // log half being handed off. The goroutine leaves the non-zero value in 37 // place until it has finished processing the log half and then flips the number 38 // back to zero. Setting the high bit in handoff means that the profiling is over, 39 // and the goroutine is now in charge of flushing the data left in the hash table 40 // to the log and returning that data. 41 // 42 // The handoff field is manipulated using atomic operations. 43 // For the most part, the manipulation of handoff is orderly: if handoff == 0 44 // then the signal handler owns it and can change it to non-zero. 45 // If handoff != 0 then the goroutine owns it and can change it to zero. 46 // If that were the end of the story then we would not need to manipulate 47 // handoff using atomic operations. The operations are needed, however, 48 // in order to let the log closer set the high bit to indicate "EOF" safely 49 // in the situation when normally the goroutine "owns" handoff. 50 51 package runtime 52 53 import "unsafe" 54 55 const ( 56 numBuckets = 1 << 10 57 logSize = 1 << 17 58 assoc = 4 59 maxCPUProfStack = 64 60 ) 61 62 type cpuprofEntry struct { 63 count uintptr 64 depth int 65 stack [maxCPUProfStack]uintptr 66 } 67 68 type cpuProfile struct { 69 on bool // profiling is on 70 wait note // goroutine waits here 71 count uintptr // tick count 72 evicts uintptr // eviction count 73 lost uintptr // lost ticks that need to be logged 74 75 // Active recent stack traces. 76 hash [numBuckets]struct { 77 entry [assoc]cpuprofEntry 78 } 79 80 // Log of traces evicted from hash. 81 // Signal handler has filled log[toggle][:nlog]. 82 // Goroutine is writing log[1-toggle][:handoff]. 83 log [2][logSize / 2]uintptr 84 nlog int 85 toggle int32 86 handoff uint32 87 88 // Writer state. 89 // Writer maintains its own toggle to avoid races 90 // looking at signal handler's toggle. 91 wtoggle uint32 92 wholding bool // holding & need to release a log half 93 flushing bool // flushing hash table - profile is over 94 eodSent bool // special end-of-data record sent; => flushing 95 } 96 97 var ( 98 cpuprofLock mutex 99 cpuprof *cpuProfile 100 101 eod = [3]uintptr{0, 1, 0} 102 ) 103 104 func setcpuprofilerate(hz int32) { 105 systemstack(func() { 106 setcpuprofilerate_m(hz) 107 }) 108 } 109 110 // lostProfileData is a no-op function used in profiles 111 // to mark the number of profiling stack traces that were 112 // discarded due to slow data writers. 113 func lostProfileData() {} 114 115 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. 116 // If hz <= 0, SetCPUProfileRate turns off profiling. 117 // If the profiler is on, the rate cannot be changed without first turning it off. 118 // 119 // Most clients should use the runtime/pprof package or 120 // the testing package's -test.cpuprofile flag instead of calling 121 // SetCPUProfileRate directly. 122 func SetCPUProfileRate(hz int) { 123 // Clamp hz to something reasonable. 124 if hz < 0 { 125 hz = 0 126 } 127 if hz > 1000000 { 128 hz = 1000000 129 } 130 131 lock(&cpuprofLock) 132 if hz > 0 { 133 if cpuprof == nil { 134 cpuprof = (*cpuProfile)(sysAlloc(unsafe.Sizeof(cpuProfile{}), &memstats.other_sys)) 135 if cpuprof == nil { 136 print("runtime: cpu profiling cannot allocate memory\n") 137 unlock(&cpuprofLock) 138 return 139 } 140 } 141 if cpuprof.on || cpuprof.handoff != 0 { 142 print("runtime: cannot set cpu profile rate until previous profile has finished.\n") 143 unlock(&cpuprofLock) 144 return 145 } 146 147 cpuprof.on = true 148 // pprof binary header format. 149 // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117 150 p := &cpuprof.log[0] 151 p[0] = 0 // count for header 152 p[1] = 3 // depth for header 153 p[2] = 0 // version number 154 p[3] = uintptr(1e6 / hz) // period (microseconds) 155 p[4] = 0 156 cpuprof.nlog = 5 157 cpuprof.toggle = 0 158 cpuprof.wholding = false 159 cpuprof.wtoggle = 0 160 cpuprof.flushing = false 161 cpuprof.eodSent = false 162 noteclear(&cpuprof.wait) 163 164 setcpuprofilerate(int32(hz)) 165 } else if cpuprof != nil && cpuprof.on { 166 setcpuprofilerate(0) 167 cpuprof.on = false 168 169 // Now add is not running anymore, and getprofile owns the entire log. 170 // Set the high bit in cpuprof.handoff to tell getprofile. 171 for { 172 n := cpuprof.handoff 173 if n&0x80000000 != 0 { 174 print("runtime: setcpuprofile(off) twice\n") 175 } 176 if cas(&cpuprof.handoff, n, n|0x80000000) { 177 if n == 0 { 178 // we did the transition from 0 -> nonzero so we wake getprofile 179 notewakeup(&cpuprof.wait) 180 } 181 break 182 } 183 } 184 } 185 unlock(&cpuprofLock) 186 } 187 188 // add adds the stack trace to the profile. 189 // It is called from signal handlers and other limited environments 190 // and cannot allocate memory or acquire locks that might be 191 // held at the time of the signal, nor can it use substantial amounts 192 // of stack. It is allowed to call evict. 193 func (p *cpuProfile) add(pc []uintptr) { 194 if len(pc) > maxCPUProfStack { 195 pc = pc[:maxCPUProfStack] 196 } 197 198 // Compute hash. 199 h := uintptr(0) 200 for _, x := range pc { 201 h = h<<8 | (h >> (8 * (unsafe.Sizeof(h) - 1))) 202 h += x * 41 203 } 204 p.count++ 205 206 // Add to entry count if already present in table. 207 b := &p.hash[h%numBuckets] 208 Assoc: 209 for i := range b.entry { 210 e := &b.entry[i] 211 if e.depth != len(pc) { 212 continue 213 } 214 for j := range pc { 215 if e.stack[j] != pc[j] { 216 continue Assoc 217 } 218 } 219 e.count++ 220 return 221 } 222 223 // Evict entry with smallest count. 224 var e *cpuprofEntry 225 for i := range b.entry { 226 if e == nil || b.entry[i].count < e.count { 227 e = &b.entry[i] 228 } 229 } 230 if e.count > 0 { 231 if !p.evict(e) { 232 // Could not evict entry. Record lost stack. 233 p.lost++ 234 return 235 } 236 p.evicts++ 237 } 238 239 // Reuse the newly evicted entry. 240 e.depth = len(pc) 241 e.count = 1 242 copy(e.stack[:], pc) 243 } 244 245 // evict copies the given entry's data into the log, so that 246 // the entry can be reused. evict is called from add, which 247 // is called from the profiling signal handler, so it must not 248 // allocate memory or block. It is safe to call flushlog. 249 // evict returns true if the entry was copied to the log, 250 // false if there was no room available. 251 func (p *cpuProfile) evict(e *cpuprofEntry) bool { 252 d := e.depth 253 nslot := d + 2 254 log := &p.log[p.toggle] 255 if p.nlog+nslot > len(log) { 256 if !p.flushlog() { 257 return false 258 } 259 log = &p.log[p.toggle] 260 } 261 262 q := p.nlog 263 log[q] = e.count 264 q++ 265 log[q] = uintptr(d) 266 q++ 267 copy(log[q:], e.stack[:d]) 268 q += d 269 p.nlog = q 270 e.count = 0 271 return true 272 } 273 274 // flushlog tries to flush the current log and switch to the other one. 275 // flushlog is called from evict, called from add, called from the signal handler, 276 // so it cannot allocate memory or block. It can try to swap logs with 277 // the writing goroutine, as explained in the comment at the top of this file. 278 func (p *cpuProfile) flushlog() bool { 279 if !cas(&p.handoff, 0, uint32(p.nlog)) { 280 return false 281 } 282 notewakeup(&p.wait) 283 284 p.toggle = 1 - p.toggle 285 log := &p.log[p.toggle] 286 q := 0 287 if p.lost > 0 { 288 lostPC := funcPC(lostProfileData) 289 log[0] = p.lost 290 log[1] = 1 291 log[2] = lostPC 292 q = 3 293 p.lost = 0 294 } 295 p.nlog = q 296 return true 297 } 298 299 // getprofile blocks until the next block of profiling data is available 300 // and returns it as a []byte. It is called from the writing goroutine. 301 func (p *cpuProfile) getprofile() []byte { 302 if p == nil { 303 return nil 304 } 305 306 if p.wholding { 307 // Release previous log to signal handling side. 308 // Loop because we are racing against SetCPUProfileRate(0). 309 for { 310 n := p.handoff 311 if n == 0 { 312 print("runtime: phase error during cpu profile handoff\n") 313 return nil 314 } 315 if n&0x80000000 != 0 { 316 p.wtoggle = 1 - p.wtoggle 317 p.wholding = false 318 p.flushing = true 319 goto Flush 320 } 321 if cas(&p.handoff, n, 0) { 322 break 323 } 324 } 325 p.wtoggle = 1 - p.wtoggle 326 p.wholding = false 327 } 328 329 if p.flushing { 330 goto Flush 331 } 332 333 if !p.on && p.handoff == 0 { 334 return nil 335 } 336 337 // Wait for new log. 338 notetsleepg(&p.wait, -1) 339 noteclear(&p.wait) 340 341 switch n := p.handoff; { 342 case n == 0: 343 print("runtime: phase error during cpu profile wait\n") 344 return nil 345 case n == 0x80000000: 346 p.flushing = true 347 goto Flush 348 default: 349 n &^= 0x80000000 350 351 // Return new log to caller. 352 p.wholding = true 353 354 return uintptrBytes(p.log[p.wtoggle][:n]) 355 } 356 357 // In flush mode. 358 // Add is no longer being called. We own the log. 359 // Also, p.handoff is non-zero, so flushlog will return false. 360 // Evict the hash table into the log and return it. 361 Flush: 362 for i := range p.hash { 363 b := &p.hash[i] 364 for j := range b.entry { 365 e := &b.entry[j] 366 if e.count > 0 && !p.evict(e) { 367 // Filled the log. Stop the loop and return what we've got. 368 break Flush 369 } 370 } 371 } 372 373 // Return pending log data. 374 if p.nlog > 0 { 375 // Note that we're using toggle now, not wtoggle, 376 // because we're working on the log directly. 377 n := p.nlog 378 p.nlog = 0 379 return uintptrBytes(p.log[p.toggle][:n]) 380 } 381 382 // Made it through the table without finding anything to log. 383 if !p.eodSent { 384 // We may not have space to append this to the partial log buf, 385 // so we always return a new slice for the end-of-data marker. 386 p.eodSent = true 387 return uintptrBytes(eod[:]) 388 } 389 390 // Finally done. Clean up and return nil. 391 p.flushing = false 392 if !cas(&p.handoff, p.handoff, 0) { 393 print("runtime: profile flush racing with something\n") 394 } 395 return nil 396 } 397 398 func uintptrBytes(p []uintptr) (ret []byte) { 399 pp := (*slice)(unsafe.Pointer(&p)) 400 rp := (*slice)(unsafe.Pointer(&ret)) 401 402 rp.array = pp.array 403 rp.len = pp.len * int(unsafe.Sizeof(p[0])) 404 rp.cap = rp.len 405 406 return 407 } 408 409 // CPUProfile returns the next chunk of binary CPU profiling stack trace data, 410 // blocking until data is available. If profiling is turned off and all the profile 411 // data accumulated while it was on has been returned, CPUProfile returns nil. 412 // The caller must save the returned data before calling CPUProfile again. 413 // 414 // Most clients should use the runtime/pprof package or 415 // the testing package's -test.cpuprofile flag instead of calling 416 // CPUProfile directly. 417 func CPUProfile() []byte { 418 return cpuprof.getprofile() 419 } 420 421 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond 422 func runtime_pprof_runtime_cyclesPerSecond() int64 { 423 return tickspersecond() 424 } 425