1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package pprof writes runtime profiling data in the format expected 6 // by the pprof visualization tool. 7 // For more information about pprof, see 8 // http://code.google.com/p/google-perftools/. 9 package pprof 10 11 import ( 12 "bufio" 13 "bytes" 14 "fmt" 15 "io" 16 "runtime" 17 "sort" 18 "strings" 19 "sync" 20 "text/tabwriter" 21 ) 22 23 // BUG(rsc): Profiles are incomplete and inaccurate on NetBSD and OS X. 24 // See https://golang.org/issue/6047 for details. 25 26 // A Profile is a collection of stack traces showing the call sequences 27 // that led to instances of a particular event, such as allocation. 28 // Packages can create and maintain their own profiles; the most common 29 // use is for tracking resources that must be explicitly closed, such as files 30 // or network connections. 31 // 32 // A Profile's methods can be called from multiple goroutines simultaneously. 33 // 34 // Each Profile has a unique name. A few profiles are predefined: 35 // 36 // goroutine - stack traces of all current goroutines 37 // heap - a sampling of all heap allocations 38 // threadcreate - stack traces that led to the creation of new OS threads 39 // block - stack traces that led to blocking on synchronization primitives 40 // 41 // These predefined profiles maintain themselves and panic on an explicit 42 // Add or Remove method call. 43 // 44 // The heap profile reports statistics as of the most recently completed 45 // garbage collection; it elides more recent allocation to avoid skewing 46 // the profile away from live data and toward garbage. 47 // If there has been no garbage collection at all, the heap profile reports 48 // all known allocations. This exception helps mainly in programs running 49 // without garbage collection enabled, usually for debugging purposes. 50 // 51 // The CPU profile is not available as a Profile. It has a special API, 52 // the StartCPUProfile and StopCPUProfile functions, because it streams 53 // output to a writer during profiling. 54 // 55 type Profile struct { 56 name string 57 mu sync.Mutex 58 m map[interface{}][]uintptr 59 count func() int 60 write func(io.Writer, int) error 61 } 62 63 // profiles records all registered profiles. 64 var profiles struct { 65 mu sync.Mutex 66 m map[string]*Profile 67 } 68 69 var goroutineProfile = &Profile{ 70 name: "goroutine", 71 count: countGoroutine, 72 write: writeGoroutine, 73 } 74 75 var threadcreateProfile = &Profile{ 76 name: "threadcreate", 77 count: countThreadCreate, 78 write: writeThreadCreate, 79 } 80 81 var heapProfile = &Profile{ 82 name: "heap", 83 count: countHeap, 84 write: writeHeap, 85 } 86 87 var blockProfile = &Profile{ 88 name: "block", 89 count: countBlock, 90 write: writeBlock, 91 } 92 93 func lockProfiles() { 94 profiles.mu.Lock() 95 if profiles.m == nil { 96 // Initial built-in profiles. 97 profiles.m = map[string]*Profile{ 98 "goroutine": goroutineProfile, 99 "threadcreate": threadcreateProfile, 100 "heap": heapProfile, 101 "block": blockProfile, 102 } 103 } 104 } 105 106 func unlockProfiles() { 107 profiles.mu.Unlock() 108 } 109 110 // NewProfile creates a new profile with the given name. 111 // If a profile with that name already exists, NewProfile panics. 112 // The convention is to use a 'import/path.' prefix to create 113 // separate name spaces for each package. 114 func NewProfile(name string) *Profile { 115 lockProfiles() 116 defer unlockProfiles() 117 if name == "" { 118 panic("pprof: NewProfile with empty name") 119 } 120 if profiles.m[name] != nil { 121 panic("pprof: NewProfile name already in use: " + name) 122 } 123 p := &Profile{ 124 name: name, 125 m: map[interface{}][]uintptr{}, 126 } 127 profiles.m[name] = p 128 return p 129 } 130 131 // Lookup returns the profile with the given name, or nil if no such profile exists. 132 func Lookup(name string) *Profile { 133 lockProfiles() 134 defer unlockProfiles() 135 return profiles.m[name] 136 } 137 138 // Profiles returns a slice of all the known profiles, sorted by name. 139 func Profiles() []*Profile { 140 lockProfiles() 141 defer unlockProfiles() 142 143 var all []*Profile 144 for _, p := range profiles.m { 145 all = append(all, p) 146 } 147 148 sort.Sort(byName(all)) 149 return all 150 } 151 152 type byName []*Profile 153 154 func (x byName) Len() int { return len(x) } 155 func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 156 func (x byName) Less(i, j int) bool { return x[i].name < x[j].name } 157 158 // Name returns this profile's name, which can be passed to Lookup to reobtain the profile. 159 func (p *Profile) Name() string { 160 return p.name 161 } 162 163 // Count returns the number of execution stacks currently in the profile. 164 func (p *Profile) Count() int { 165 p.mu.Lock() 166 defer p.mu.Unlock() 167 if p.count != nil { 168 return p.count() 169 } 170 return len(p.m) 171 } 172 173 // Add adds the current execution stack to the profile, associated with value. 174 // Add stores value in an internal map, so value must be suitable for use as 175 // a map key and will not be garbage collected until the corresponding 176 // call to Remove. Add panics if the profile already contains a stack for value. 177 // 178 // The skip parameter has the same meaning as runtime.Caller's skip 179 // and controls where the stack trace begins. Passing skip=0 begins the 180 // trace in the function calling Add. For example, given this 181 // execution stack: 182 // 183 // Add 184 // called from rpc.NewClient 185 // called from mypkg.Run 186 // called from main.main 187 // 188 // Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient. 189 // Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run. 190 // 191 func (p *Profile) Add(value interface{}, skip int) { 192 if p.name == "" { 193 panic("pprof: use of uninitialized Profile") 194 } 195 if p.write != nil { 196 panic("pprof: Add called on built-in Profile " + p.name) 197 } 198 199 stk := make([]uintptr, 32) 200 n := runtime.Callers(skip+1, stk[:]) 201 202 p.mu.Lock() 203 defer p.mu.Unlock() 204 if p.m[value] != nil { 205 panic("pprof: Profile.Add of duplicate value") 206 } 207 p.m[value] = stk[:n] 208 } 209 210 // Remove removes the execution stack associated with value from the profile. 211 // It is a no-op if the value is not in the profile. 212 func (p *Profile) Remove(value interface{}) { 213 p.mu.Lock() 214 defer p.mu.Unlock() 215 delete(p.m, value) 216 } 217 218 // WriteTo writes a pprof-formatted snapshot of the profile to w. 219 // If a write to w returns an error, WriteTo returns that error. 220 // Otherwise, WriteTo returns nil. 221 // 222 // The debug parameter enables additional output. 223 // Passing debug=0 prints only the hexadecimal addresses that pprof needs. 224 // Passing debug=1 adds comments translating addresses to function names 225 // and line numbers, so that a programmer can read the profile without tools. 226 // 227 // The predefined profiles may assign meaning to other debug values; 228 // for example, when printing the "goroutine" profile, debug=2 means to 229 // print the goroutine stacks in the same form that a Go program uses 230 // when dying due to an unrecovered panic. 231 func (p *Profile) WriteTo(w io.Writer, debug int) error { 232 if p.name == "" { 233 panic("pprof: use of zero Profile") 234 } 235 if p.write != nil { 236 return p.write(w, debug) 237 } 238 239 // Obtain consistent snapshot under lock; then process without lock. 240 var all [][]uintptr 241 p.mu.Lock() 242 for _, stk := range p.m { 243 all = append(all, stk) 244 } 245 p.mu.Unlock() 246 247 // Map order is non-deterministic; make output deterministic. 248 sort.Sort(stackProfile(all)) 249 250 return printCountProfile(w, debug, p.name, stackProfile(all)) 251 } 252 253 type stackProfile [][]uintptr 254 255 func (x stackProfile) Len() int { return len(x) } 256 func (x stackProfile) Stack(i int) []uintptr { return x[i] } 257 func (x stackProfile) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 258 func (x stackProfile) Less(i, j int) bool { 259 t, u := x[i], x[j] 260 for k := 0; k < len(t) && k < len(u); k++ { 261 if t[k] != u[k] { 262 return t[k] < u[k] 263 } 264 } 265 return len(t) < len(u) 266 } 267 268 // A countProfile is a set of stack traces to be printed as counts 269 // grouped by stack trace. There are multiple implementations: 270 // all that matters is that we can find out how many traces there are 271 // and obtain each trace in turn. 272 type countProfile interface { 273 Len() int 274 Stack(i int) []uintptr 275 } 276 277 // printCountProfile prints a countProfile at the specified debug level. 278 func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { 279 b := bufio.NewWriter(w) 280 var tw *tabwriter.Writer 281 w = b 282 if debug > 0 { 283 tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) 284 w = tw 285 } 286 287 fmt.Fprintf(w, "%s profile: total %d\n", name, p.Len()) 288 289 // Build count of each stack. 290 var buf bytes.Buffer 291 key := func(stk []uintptr) string { 292 buf.Reset() 293 fmt.Fprintf(&buf, "@") 294 for _, pc := range stk { 295 fmt.Fprintf(&buf, " %#x", pc) 296 } 297 return buf.String() 298 } 299 m := map[string]int{} 300 n := p.Len() 301 for i := 0; i < n; i++ { 302 m[key(p.Stack(i))]++ 303 } 304 305 // Print stacks, listing count on first occurrence of a unique stack. 306 for i := 0; i < n; i++ { 307 stk := p.Stack(i) 308 s := key(stk) 309 if count := m[s]; count != 0 { 310 fmt.Fprintf(w, "%d %s\n", count, s) 311 if debug > 0 { 312 printStackRecord(w, stk, false) 313 } 314 delete(m, s) 315 } 316 } 317 318 if tw != nil { 319 tw.Flush() 320 } 321 return b.Flush() 322 } 323 324 // printStackRecord prints the function + source line information 325 // for a single stack trace. 326 func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { 327 show := allFrames 328 wasPanic := false 329 for i, pc := range stk { 330 f := runtime.FuncForPC(pc) 331 if f == nil { 332 show = true 333 fmt.Fprintf(w, "#\t%#x\n", pc) 334 wasPanic = false 335 } else { 336 tracepc := pc 337 // Back up to call instruction. 338 if i > 0 && pc > f.Entry() && !wasPanic { 339 if runtime.GOARCH == "386" || runtime.GOARCH == "amd64" { 340 tracepc-- 341 } else { 342 tracepc -= 4 // arm, etc 343 } 344 } 345 file, line := f.FileLine(tracepc) 346 name := f.Name() 347 // Hide runtime.goexit and any runtime functions at the beginning. 348 // This is useful mainly for allocation traces. 349 wasPanic = name == "runtime.panic" 350 if name == "runtime.goexit" || !show && strings.HasPrefix(name, "runtime.") { 351 continue 352 } 353 show = true 354 fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", pc, name, pc-f.Entry(), file, line) 355 } 356 } 357 if !show { 358 // We didn't print anything; do it again, 359 // and this time include runtime functions. 360 printStackRecord(w, stk, true) 361 return 362 } 363 fmt.Fprintf(w, "\n") 364 } 365 366 // Interface to system profiles. 367 368 type byInUseBytes []runtime.MemProfileRecord 369 370 func (x byInUseBytes) Len() int { return len(x) } 371 func (x byInUseBytes) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 372 func (x byInUseBytes) Less(i, j int) bool { return x[i].InUseBytes() > x[j].InUseBytes() } 373 374 // WriteHeapProfile is shorthand for Lookup("heap").WriteTo(w, 0). 375 // It is preserved for backwards compatibility. 376 func WriteHeapProfile(w io.Writer) error { 377 return writeHeap(w, 0) 378 } 379 380 // countHeap returns the number of records in the heap profile. 381 func countHeap() int { 382 n, _ := runtime.MemProfile(nil, true) 383 return n 384 } 385 386 // writeHeap writes the current runtime heap profile to w. 387 func writeHeap(w io.Writer, debug int) error { 388 // Find out how many records there are (MemProfile(nil, true)), 389 // allocate that many records, and get the data. 390 // There's a racemore records might be added between 391 // the two callsso allocate a few extra records for safety 392 // and also try again if we're very unlucky. 393 // The loop should only execute one iteration in the common case. 394 var p []runtime.MemProfileRecord 395 n, ok := runtime.MemProfile(nil, true) 396 for { 397 // Allocate room for a slightly bigger profile, 398 // in case a few more entries have been added 399 // since the call to MemProfile. 400 p = make([]runtime.MemProfileRecord, n+50) 401 n, ok = runtime.MemProfile(p, true) 402 if ok { 403 p = p[0:n] 404 break 405 } 406 // Profile grew; try again. 407 } 408 409 sort.Sort(byInUseBytes(p)) 410 411 b := bufio.NewWriter(w) 412 var tw *tabwriter.Writer 413 w = b 414 if debug > 0 { 415 tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) 416 w = tw 417 } 418 419 var total runtime.MemProfileRecord 420 for i := range p { 421 r := &p[i] 422 total.AllocBytes += r.AllocBytes 423 total.AllocObjects += r.AllocObjects 424 total.FreeBytes += r.FreeBytes 425 total.FreeObjects += r.FreeObjects 426 } 427 428 // Technically the rate is MemProfileRate not 2*MemProfileRate, 429 // but early versions of the C++ heap profiler reported 2*MemProfileRate, 430 // so that's what pprof has come to expect. 431 fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n", 432 total.InUseObjects(), total.InUseBytes(), 433 total.AllocObjects, total.AllocBytes, 434 2*runtime.MemProfileRate) 435 436 for i := range p { 437 r := &p[i] 438 fmt.Fprintf(w, "%d: %d [%d: %d] @", 439 r.InUseObjects(), r.InUseBytes(), 440 r.AllocObjects, r.AllocBytes) 441 for _, pc := range r.Stack() { 442 fmt.Fprintf(w, " %#x", pc) 443 } 444 fmt.Fprintf(w, "\n") 445 if debug > 0 { 446 printStackRecord(w, r.Stack(), false) 447 } 448 } 449 450 // Print memstats information too. 451 // Pprof will ignore, but useful for people 452 s := new(runtime.MemStats) 453 runtime.ReadMemStats(s) 454 fmt.Fprintf(w, "\n# runtime.MemStats\n") 455 fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) 456 fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) 457 fmt.Fprintf(w, "# Sys = %d\n", s.Sys) 458 fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups) 459 fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs) 460 fmt.Fprintf(w, "# Frees = %d\n", s.Frees) 461 462 fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc) 463 fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys) 464 fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle) 465 fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse) 466 fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased) 467 fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects) 468 469 fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys) 470 fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) 471 fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys) 472 fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys) 473 474 fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) 475 fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) 476 fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) 477 fmt.Fprintf(w, "# EnableGC = %v\n", s.EnableGC) 478 fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC) 479 480 if tw != nil { 481 tw.Flush() 482 } 483 return b.Flush() 484 } 485 486 // countThreadCreate returns the size of the current ThreadCreateProfile. 487 func countThreadCreate() int { 488 n, _ := runtime.ThreadCreateProfile(nil) 489 return n 490 } 491 492 // writeThreadCreate writes the current runtime ThreadCreateProfile to w. 493 func writeThreadCreate(w io.Writer, debug int) error { 494 return writeRuntimeProfile(w, debug, "threadcreate", runtime.ThreadCreateProfile) 495 } 496 497 // countGoroutine returns the number of goroutines. 498 func countGoroutine() int { 499 return runtime.NumGoroutine() 500 } 501 502 // writeGoroutine writes the current runtime GoroutineProfile to w. 503 func writeGoroutine(w io.Writer, debug int) error { 504 if debug >= 2 { 505 return writeGoroutineStacks(w) 506 } 507 return writeRuntimeProfile(w, debug, "goroutine", runtime.GoroutineProfile) 508 } 509 510 func writeGoroutineStacks(w io.Writer) error { 511 // We don't know how big the buffer needs to be to collect 512 // all the goroutines. Start with 1 MB and try a few times, doubling each time. 513 // Give up and use a truncated trace if 64 MB is not enough. 514 buf := make([]byte, 1<<20) 515 for i := 0; ; i++ { 516 n := runtime.Stack(buf, true) 517 if n < len(buf) { 518 buf = buf[:n] 519 break 520 } 521 if len(buf) >= 64<<20 { 522 // Filled 64 MB - stop there. 523 break 524 } 525 buf = make([]byte, 2*len(buf)) 526 } 527 _, err := w.Write(buf) 528 return err 529 } 530 531 func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]runtime.StackRecord) (int, bool)) error { 532 // Find out how many records there are (fetch(nil)), 533 // allocate that many records, and get the data. 534 // There's a racemore records might be added between 535 // the two callsso allocate a few extra records for safety 536 // and also try again if we're very unlucky. 537 // The loop should only execute one iteration in the common case. 538 var p []runtime.StackRecord 539 n, ok := fetch(nil) 540 for { 541 // Allocate room for a slightly bigger profile, 542 // in case a few more entries have been added 543 // since the call to ThreadProfile. 544 p = make([]runtime.StackRecord, n+10) 545 n, ok = fetch(p) 546 if ok { 547 p = p[0:n] 548 break 549 } 550 // Profile grew; try again. 551 } 552 553 return printCountProfile(w, debug, name, runtimeProfile(p)) 554 } 555 556 type runtimeProfile []runtime.StackRecord 557 558 func (p runtimeProfile) Len() int { return len(p) } 559 func (p runtimeProfile) Stack(i int) []uintptr { return p[i].Stack() } 560 561 var cpu struct { 562 sync.Mutex 563 profiling bool 564 done chan bool 565 } 566 567 // StartCPUProfile enables CPU profiling for the current process. 568 // While profiling, the profile will be buffered and written to w. 569 // StartCPUProfile returns an error if profiling is already enabled. 570 func StartCPUProfile(w io.Writer) error { 571 // The runtime routines allow a variable profiling rate, 572 // but in practice operating systems cannot trigger signals 573 // at more than about 500 Hz, and our processing of the 574 // signal is not cheap (mostly getting the stack trace). 575 // 100 Hz is a reasonable choice: it is frequent enough to 576 // produce useful data, rare enough not to bog down the 577 // system, and a nice round number to make it easy to 578 // convert sample counts to seconds. Instead of requiring 579 // each client to specify the frequency, we hard code it. 580 const hz = 100 581 582 cpu.Lock() 583 defer cpu.Unlock() 584 if cpu.done == nil { 585 cpu.done = make(chan bool) 586 } 587 // Double-check. 588 if cpu.profiling { 589 return fmt.Errorf("cpu profiling already in use") 590 } 591 cpu.profiling = true 592 runtime.SetCPUProfileRate(hz) 593 go profileWriter(w) 594 return nil 595 } 596 597 func profileWriter(w io.Writer) { 598 for { 599 data := runtime.CPUProfile() 600 if data == nil { 601 break 602 } 603 w.Write(data) 604 } 605 cpu.done <- true 606 } 607 608 // StopCPUProfile stops the current CPU profile, if any. 609 // StopCPUProfile only returns after all the writes for the 610 // profile have completed. 611 func StopCPUProfile() { 612 cpu.Lock() 613 defer cpu.Unlock() 614 615 if !cpu.profiling { 616 return 617 } 618 cpu.profiling = false 619 runtime.SetCPUProfileRate(0) 620 <-cpu.done 621 } 622 623 type byCycles []runtime.BlockProfileRecord 624 625 func (x byCycles) Len() int { return len(x) } 626 func (x byCycles) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 627 func (x byCycles) Less(i, j int) bool { return x[i].Cycles > x[j].Cycles } 628 629 // countBlock returns the number of records in the blocking profile. 630 func countBlock() int { 631 n, _ := runtime.BlockProfile(nil) 632 return n 633 } 634 635 // writeBlock writes the current blocking profile to w. 636 func writeBlock(w io.Writer, debug int) error { 637 var p []runtime.BlockProfileRecord 638 n, ok := runtime.BlockProfile(nil) 639 for { 640 p = make([]runtime.BlockProfileRecord, n+50) 641 n, ok = runtime.BlockProfile(p) 642 if ok { 643 p = p[:n] 644 break 645 } 646 } 647 648 sort.Sort(byCycles(p)) 649 650 b := bufio.NewWriter(w) 651 var tw *tabwriter.Writer 652 w = b 653 if debug > 0 { 654 tw = tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) 655 w = tw 656 } 657 658 fmt.Fprintf(w, "--- contention:\n") 659 fmt.Fprintf(w, "cycles/second=%v\n", runtime_cyclesPerSecond()) 660 for i := range p { 661 r := &p[i] 662 fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) 663 for _, pc := range r.Stack() { 664 fmt.Fprintf(w, " %#x", pc) 665 } 666 fmt.Fprint(w, "\n") 667 if debug > 0 { 668 printStackRecord(w, r.Stack(), true) 669 } 670 } 671 672 if tw != nil { 673 tw.Flush() 674 } 675 return b.Flush() 676 } 677 678 func runtime_cyclesPerSecond() int64 679