1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // This file implements parsers to convert legacy profiles into the 16 // profile.proto format. 17 18 package profile 19 20 import ( 21 "bufio" 22 "bytes" 23 "fmt" 24 "io" 25 "math" 26 "regexp" 27 "strconv" 28 "strings" 29 ) 30 31 var ( 32 countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`) 33 countRE = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`) 34 35 heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`) 36 heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`) 37 38 contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`) 39 40 hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`) 41 42 growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`) 43 44 fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`) 45 46 threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`) 47 threadStartRE = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`) 48 49 // Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools. 50 // Recommended format: 51 // Start End object file name offset(optional) linker build id 52 // 0x40000-0x80000 /path/to/binary (@FF00) abc123456 53 spaceDigits = `\s+[[:digit:]]+` 54 hexPair = `\s+[[:xdigit:]]+:[[:xdigit:]]+` 55 oSpace = `\s*` 56 // Capturing expressions. 57 cHex = `(?:0x)?([[:xdigit:]]+)` 58 cHexRange = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?` 59 cSpaceString = `(?:\s+(\S+))?` 60 cSpaceHex = `(?:\s+([[:xdigit:]]+))?` 61 cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?` 62 cPerm = `(?:\s+([-rwxp]+))?` 63 64 procMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString) 65 briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex) 66 67 // Regular expression to parse log data, of the form: 68 // ... file:line] msg... 69 logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`) 70 ) 71 72 func isSpaceOrComment(line string) bool { 73 trimmed := strings.TrimSpace(line) 74 return len(trimmed) == 0 || trimmed[0] == '#' 75 } 76 77 // parseGoCount parses a Go count profile (e.g., threadcreate or 78 // goroutine) and returns a new Profile. 79 func parseGoCount(b []byte) (*Profile, error) { 80 s := bufio.NewScanner(bytes.NewBuffer(b)) 81 // Skip comments at the beginning of the file. 82 for s.Scan() && isSpaceOrComment(s.Text()) { 83 } 84 if err := s.Err(); err != nil { 85 return nil, err 86 } 87 m := countStartRE.FindStringSubmatch(s.Text()) 88 if m == nil { 89 return nil, errUnrecognized 90 } 91 profileType := m[1] 92 p := &Profile{ 93 PeriodType: &ValueType{Type: profileType, Unit: "count"}, 94 Period: 1, 95 SampleType: []*ValueType{{Type: profileType, Unit: "count"}}, 96 } 97 locations := make(map[uint64]*Location) 98 for s.Scan() { 99 line := s.Text() 100 if isSpaceOrComment(line) { 101 continue 102 } 103 if strings.HasPrefix(line, "---") { 104 break 105 } 106 m := countRE.FindStringSubmatch(line) 107 if m == nil { 108 return nil, errMalformed 109 } 110 n, err := strconv.ParseInt(m[1], 0, 64) 111 if err != nil { 112 return nil, errMalformed 113 } 114 fields := strings.Fields(m[2]) 115 locs := make([]*Location, 0, len(fields)) 116 for _, stk := range fields { 117 addr, err := strconv.ParseUint(stk, 0, 64) 118 if err != nil { 119 return nil, errMalformed 120 } 121 // Adjust all frames by -1 to land on top of the call instruction. 122 addr-- 123 loc := locations[addr] 124 if loc == nil { 125 loc = &Location{ 126 Address: addr, 127 } 128 locations[addr] = loc 129 p.Location = append(p.Location, loc) 130 } 131 locs = append(locs, loc) 132 } 133 p.Sample = append(p.Sample, &Sample{ 134 Location: locs, 135 Value: []int64{n}, 136 }) 137 } 138 if err := s.Err(); err != nil { 139 return nil, err 140 } 141 142 if err := parseAdditionalSections(s, p); err != nil { 143 return nil, err 144 } 145 return p, nil 146 } 147 148 // remapLocationIDs ensures there is a location for each address 149 // referenced by a sample, and remaps the samples to point to the new 150 // location ids. 151 func (p *Profile) remapLocationIDs() { 152 seen := make(map[*Location]bool, len(p.Location)) 153 var locs []*Location 154 155 for _, s := range p.Sample { 156 for _, l := range s.Location { 157 if seen[l] { 158 continue 159 } 160 l.ID = uint64(len(locs) + 1) 161 locs = append(locs, l) 162 seen[l] = true 163 } 164 } 165 p.Location = locs 166 } 167 168 func (p *Profile) remapFunctionIDs() { 169 seen := make(map[*Function]bool, len(p.Function)) 170 var fns []*Function 171 172 for _, l := range p.Location { 173 for _, ln := range l.Line { 174 fn := ln.Function 175 if fn == nil || seen[fn] { 176 continue 177 } 178 fn.ID = uint64(len(fns) + 1) 179 fns = append(fns, fn) 180 seen[fn] = true 181 } 182 } 183 p.Function = fns 184 } 185 186 // remapMappingIDs matches location addresses with existing mappings 187 // and updates them appropriately. This is O(N*M), if this ever shows 188 // up as a bottleneck, evaluate sorting the mappings and doing a 189 // binary search, which would make it O(N*log(M)). 190 func (p *Profile) remapMappingIDs() { 191 // Some profile handlers will incorrectly set regions for the main 192 // executable if its section is remapped. Fix them through heuristics. 193 194 if len(p.Mapping) > 0 { 195 // Remove the initial mapping if named '/anon_hugepage' and has a 196 // consecutive adjacent mapping. 197 if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") { 198 if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start { 199 p.Mapping = p.Mapping[1:] 200 } 201 } 202 } 203 204 // Subtract the offset from the start of the main mapping if it 205 // ends up at a recognizable start address. 206 if len(p.Mapping) > 0 { 207 const expectedStart = 0x400000 208 if m := p.Mapping[0]; m.Start-m.Offset == expectedStart { 209 m.Start = expectedStart 210 m.Offset = 0 211 } 212 } 213 214 // Associate each location with an address to the corresponding 215 // mapping. Create fake mapping if a suitable one isn't found. 216 var fake *Mapping 217 nextLocation: 218 for _, l := range p.Location { 219 a := l.Address 220 if l.Mapping != nil || a == 0 { 221 continue 222 } 223 for _, m := range p.Mapping { 224 if m.Start <= a && a < m.Limit { 225 l.Mapping = m 226 continue nextLocation 227 } 228 } 229 // Work around legacy handlers failing to encode the first 230 // part of mappings split into adjacent ranges. 231 for _, m := range p.Mapping { 232 if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start { 233 m.Start -= m.Offset 234 m.Offset = 0 235 l.Mapping = m 236 continue nextLocation 237 } 238 } 239 // If there is still no mapping, create a fake one. 240 // This is important for the Go legacy handler, which produced 241 // no mappings. 242 if fake == nil { 243 fake = &Mapping{ 244 ID: 1, 245 Limit: ^uint64(0), 246 } 247 p.Mapping = append(p.Mapping, fake) 248 } 249 l.Mapping = fake 250 } 251 252 // Reset all mapping IDs. 253 for i, m := range p.Mapping { 254 m.ID = uint64(i + 1) 255 } 256 } 257 258 var cpuInts = []func([]byte) (uint64, []byte){ 259 get32l, 260 get32b, 261 get64l, 262 get64b, 263 } 264 265 func get32l(b []byte) (uint64, []byte) { 266 if len(b) < 4 { 267 return 0, nil 268 } 269 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:] 270 } 271 272 func get32b(b []byte) (uint64, []byte) { 273 if len(b) < 4 { 274 return 0, nil 275 } 276 return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:] 277 } 278 279 func get64l(b []byte) (uint64, []byte) { 280 if len(b) < 8 { 281 return 0, nil 282 } 283 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:] 284 } 285 286 func get64b(b []byte) (uint64, []byte) { 287 if len(b) < 8 { 288 return 0, nil 289 } 290 return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:] 291 } 292 293 // parseCPU parses a profilez legacy profile and returns a newly 294 // populated Profile. 295 // 296 // The general format for profilez samples is a sequence of words in 297 // binary format. The first words are a header with the following data: 298 // 1st word -- 0 299 // 2nd word -- 3 300 // 3rd word -- 0 if a c++ application, 1 if a java application. 301 // 4th word -- Sampling period (in microseconds). 302 // 5th word -- Padding. 303 func parseCPU(b []byte) (*Profile, error) { 304 var parse func([]byte) (uint64, []byte) 305 var n1, n2, n3, n4, n5 uint64 306 for _, parse = range cpuInts { 307 var tmp []byte 308 n1, tmp = parse(b) 309 n2, tmp = parse(tmp) 310 n3, tmp = parse(tmp) 311 n4, tmp = parse(tmp) 312 n5, tmp = parse(tmp) 313 314 if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 { 315 b = tmp 316 return cpuProfile(b, int64(n4), parse) 317 } 318 if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 { 319 b = tmp 320 return javaCPUProfile(b, int64(n4), parse) 321 } 322 } 323 return nil, errUnrecognized 324 } 325 326 // cpuProfile returns a new Profile from C++ profilez data. 327 // b is the profile bytes after the header, period is the profiling 328 // period, and parse is a function to parse 8-byte chunks from the 329 // profile in its native endianness. 330 func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) { 331 p := &Profile{ 332 Period: period * 1000, 333 PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"}, 334 SampleType: []*ValueType{ 335 {Type: "samples", Unit: "count"}, 336 {Type: "cpu", Unit: "nanoseconds"}, 337 }, 338 } 339 var err error 340 if b, _, err = parseCPUSamples(b, parse, true, p); err != nil { 341 return nil, err 342 } 343 344 // If *most* samples have the same second-to-the-bottom frame, it 345 // strongly suggests that it is an uninteresting artifact of 346 // measurement -- a stack frame pushed by the signal handler. The 347 // bottom frame is always correct as it is picked up from the signal 348 // structure, not the stack. Check if this is the case and if so, 349 // remove. 350 351 // Remove up to two frames. 352 maxiter := 2 353 // Allow one different sample for this many samples with the same 354 // second-to-last frame. 355 similarSamples := 32 356 margin := len(p.Sample) / similarSamples 357 358 for iter := 0; iter < maxiter; iter++ { 359 addr1 := make(map[uint64]int) 360 for _, s := range p.Sample { 361 if len(s.Location) > 1 { 362 a := s.Location[1].Address 363 addr1[a] = addr1[a] + 1 364 } 365 } 366 367 for id1, count := range addr1 { 368 if count >= len(p.Sample)-margin { 369 // Found uninteresting frame, strip it out from all samples 370 for _, s := range p.Sample { 371 if len(s.Location) > 1 && s.Location[1].Address == id1 { 372 s.Location = append(s.Location[:1], s.Location[2:]...) 373 } 374 } 375 break 376 } 377 } 378 } 379 380 if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil { 381 return nil, err 382 } 383 384 cleanupDuplicateLocations(p) 385 return p, nil 386 } 387 388 func cleanupDuplicateLocations(p *Profile) { 389 // The profile handler may duplicate the leaf frame, because it gets 390 // its address both from stack unwinding and from the signal 391 // context. Detect this and delete the duplicate, which has been 392 // adjusted by -1. The leaf address should not be adjusted as it is 393 // not a call. 394 for _, s := range p.Sample { 395 if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 { 396 s.Location = append(s.Location[:1], s.Location[2:]...) 397 } 398 } 399 } 400 401 // parseCPUSamples parses a collection of profilez samples from a 402 // profile. 403 // 404 // profilez samples are a repeated sequence of stack frames of the 405 // form: 406 // 1st word -- The number of times this stack was encountered. 407 // 2nd word -- The size of the stack (StackSize). 408 // 3rd word -- The first address on the stack. 409 // ... 410 // StackSize + 2 -- The last address on the stack 411 // The last stack trace is of the form: 412 // 1st word -- 0 413 // 2nd word -- 1 414 // 3rd word -- 0 415 // 416 // Addresses from stack traces may point to the next instruction after 417 // each call. Optionally adjust by -1 to land somewhere on the actual 418 // call (except for the leaf, which is not a call). 419 func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) { 420 locs := make(map[uint64]*Location) 421 for len(b) > 0 { 422 var count, nstk uint64 423 count, b = parse(b) 424 nstk, b = parse(b) 425 if b == nil || nstk > uint64(len(b)/4) { 426 return nil, nil, errUnrecognized 427 } 428 var sloc []*Location 429 addrs := make([]uint64, nstk) 430 for i := 0; i < int(nstk); i++ { 431 addrs[i], b = parse(b) 432 } 433 434 if count == 0 && nstk == 1 && addrs[0] == 0 { 435 // End of data marker 436 break 437 } 438 for i, addr := range addrs { 439 if adjust && i > 0 { 440 addr-- 441 } 442 loc := locs[addr] 443 if loc == nil { 444 loc = &Location{ 445 Address: addr, 446 } 447 locs[addr] = loc 448 p.Location = append(p.Location, loc) 449 } 450 sloc = append(sloc, loc) 451 } 452 p.Sample = append(p.Sample, 453 &Sample{ 454 Value: []int64{int64(count), int64(count) * p.Period}, 455 Location: sloc, 456 }) 457 } 458 // Reached the end without finding the EOD marker. 459 return b, locs, nil 460 } 461 462 // parseHeap parses a heapz legacy or a growthz profile and 463 // returns a newly populated Profile. 464 func parseHeap(b []byte) (p *Profile, err error) { 465 s := bufio.NewScanner(bytes.NewBuffer(b)) 466 if !s.Scan() { 467 if err := s.Err(); err != nil { 468 return nil, err 469 } 470 return nil, errUnrecognized 471 } 472 p = &Profile{} 473 474 sampling := "" 475 hasAlloc := false 476 477 line := s.Text() 478 p.PeriodType = &ValueType{Type: "space", Unit: "bytes"} 479 if header := heapHeaderRE.FindStringSubmatch(line); header != nil { 480 sampling, p.Period, hasAlloc, err = parseHeapHeader(line) 481 if err != nil { 482 return nil, err 483 } 484 } else if header = growthHeaderRE.FindStringSubmatch(line); header != nil { 485 p.Period = 1 486 } else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil { 487 p.Period = 1 488 } else { 489 return nil, errUnrecognized 490 } 491 492 if hasAlloc { 493 // Put alloc before inuse so that default pprof selection 494 // will prefer inuse_space. 495 p.SampleType = []*ValueType{ 496 {Type: "alloc_objects", Unit: "count"}, 497 {Type: "alloc_space", Unit: "bytes"}, 498 {Type: "inuse_objects", Unit: "count"}, 499 {Type: "inuse_space", Unit: "bytes"}, 500 } 501 } else { 502 p.SampleType = []*ValueType{ 503 {Type: "objects", Unit: "count"}, 504 {Type: "space", Unit: "bytes"}, 505 } 506 } 507 508 locs := make(map[uint64]*Location) 509 for s.Scan() { 510 line := strings.TrimSpace(s.Text()) 511 512 if isSpaceOrComment(line) { 513 continue 514 } 515 516 if isMemoryMapSentinel(line) { 517 break 518 } 519 520 value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc) 521 if err != nil { 522 return nil, err 523 } 524 525 var sloc []*Location 526 for _, addr := range addrs { 527 // Addresses from stack traces point to the next instruction after 528 // each call. Adjust by -1 to land somewhere on the actual call. 529 addr-- 530 loc := locs[addr] 531 if locs[addr] == nil { 532 loc = &Location{ 533 Address: addr, 534 } 535 p.Location = append(p.Location, loc) 536 locs[addr] = loc 537 } 538 sloc = append(sloc, loc) 539 } 540 541 p.Sample = append(p.Sample, &Sample{ 542 Value: value, 543 Location: sloc, 544 NumLabel: map[string][]int64{"bytes": {blocksize}}, 545 }) 546 } 547 if err := s.Err(); err != nil { 548 return nil, err 549 } 550 if err := parseAdditionalSections(s, p); err != nil { 551 return nil, err 552 } 553 return p, nil 554 } 555 556 func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) { 557 header := heapHeaderRE.FindStringSubmatch(line) 558 if header == nil { 559 return "", 0, false, errUnrecognized 560 } 561 562 if len(header[6]) > 0 { 563 if period, err = strconv.ParseInt(header[6], 10, 64); err != nil { 564 return "", 0, false, errUnrecognized 565 } 566 } 567 568 if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") { 569 hasAlloc = true 570 } 571 572 switch header[5] { 573 case "heapz_v2", "heap_v2": 574 return "v2", period, hasAlloc, nil 575 case "heapprofile": 576 return "", 1, hasAlloc, nil 577 case "heap": 578 return "v2", period / 2, hasAlloc, nil 579 default: 580 return "", 0, false, errUnrecognized 581 } 582 } 583 584 // parseHeapSample parses a single row from a heap profile into a new Sample. 585 func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) { 586 sampleData := heapSampleRE.FindStringSubmatch(line) 587 if len(sampleData) != 6 { 588 return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData)) 589 } 590 591 // This is a local-scoped helper function to avoid needing to pass 592 // around rate, sampling and many return parameters. 593 addValues := func(countString, sizeString string, label string) error { 594 count, err := strconv.ParseInt(countString, 10, 64) 595 if err != nil { 596 return fmt.Errorf("malformed sample: %s: %v", line, err) 597 } 598 size, err := strconv.ParseInt(sizeString, 10, 64) 599 if err != nil { 600 return fmt.Errorf("malformed sample: %s: %v", line, err) 601 } 602 if count == 0 && size != 0 { 603 return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size) 604 } 605 if count != 0 { 606 blocksize = size / count 607 if sampling == "v2" { 608 count, size = scaleHeapSample(count, size, rate) 609 } 610 } 611 value = append(value, count, size) 612 return nil 613 } 614 615 if includeAlloc { 616 if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil { 617 return nil, 0, nil, err 618 } 619 } 620 621 if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil { 622 return nil, 0, nil, err 623 } 624 625 addrs, err = parseHexAddresses(sampleData[5]) 626 if err != nil { 627 return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err) 628 } 629 630 return value, blocksize, addrs, nil 631 } 632 633 // parseHexAddresses extracts hex numbers from a string, attempts to convert 634 // each to an unsigned 64-bit number and returns the resulting numbers as a 635 // slice, or an error if the string contains hex numbers which are too large to 636 // handle (which means a malformed profile). 637 func parseHexAddresses(s string) ([]uint64, error) { 638 hexStrings := hexNumberRE.FindAllString(s, -1) 639 var addrs []uint64 640 for _, s := range hexStrings { 641 if addr, err := strconv.ParseUint(s, 0, 64); err == nil { 642 addrs = append(addrs, addr) 643 } else { 644 return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s) 645 } 646 } 647 return addrs, nil 648 } 649 650 // scaleHeapSample adjusts the data from a heapz Sample to 651 // account for its probability of appearing in the collected 652 // data. heapz profiles are a sampling of the memory allocations 653 // requests in a program. We estimate the unsampled value by dividing 654 // each collected sample by its probability of appearing in the 655 // profile. heapz v2 profiles rely on a poisson process to determine 656 // which samples to collect, based on the desired average collection 657 // rate R. The probability of a sample of size S to appear in that 658 // profile is 1-exp(-S/R). 659 func scaleHeapSample(count, size, rate int64) (int64, int64) { 660 if count == 0 || size == 0 { 661 return 0, 0 662 } 663 664 if rate <= 1 { 665 // if rate==1 all samples were collected so no adjustment is needed. 666 // if rate<1 treat as unknown and skip scaling. 667 return count, size 668 } 669 670 avgSize := float64(size) / float64(count) 671 scale := 1 / (1 - math.Exp(-avgSize/float64(rate))) 672 673 return int64(float64(count) * scale), int64(float64(size) * scale) 674 } 675 676 // parseContention parses a mutex or contention profile. There are 2 cases: 677 // "--- contentionz " for legacy C++ profiles (and backwards compatibility) 678 // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime. 679 func parseContention(b []byte) (*Profile, error) { 680 s := bufio.NewScanner(bytes.NewBuffer(b)) 681 if !s.Scan() { 682 if err := s.Err(); err != nil { 683 return nil, err 684 } 685 return nil, errUnrecognized 686 } 687 688 switch l := s.Text(); { 689 case strings.HasPrefix(l, "--- contentionz "): 690 case strings.HasPrefix(l, "--- mutex:"): 691 case strings.HasPrefix(l, "--- contention:"): 692 default: 693 return nil, errUnrecognized 694 } 695 696 p := &Profile{ 697 PeriodType: &ValueType{Type: "contentions", Unit: "count"}, 698 Period: 1, 699 SampleType: []*ValueType{ 700 {Type: "contentions", Unit: "count"}, 701 {Type: "delay", Unit: "nanoseconds"}, 702 }, 703 } 704 705 var cpuHz int64 706 // Parse text of the form "attribute = value" before the samples. 707 const delimiter = "=" 708 for s.Scan() { 709 line := s.Text() 710 if line = strings.TrimSpace(line); isSpaceOrComment(line) { 711 continue 712 } 713 if strings.HasPrefix(line, "---") { 714 break 715 } 716 attr := strings.SplitN(line, delimiter, 2) 717 if len(attr) != 2 { 718 break 719 } 720 key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]) 721 var err error 722 switch key { 723 case "cycles/second": 724 if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil { 725 return nil, errUnrecognized 726 } 727 case "sampling period": 728 if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil { 729 return nil, errUnrecognized 730 } 731 case "ms since reset": 732 ms, err := strconv.ParseInt(val, 0, 64) 733 if err != nil { 734 return nil, errUnrecognized 735 } 736 p.DurationNanos = ms * 1000 * 1000 737 case "format": 738 // CPP contentionz profiles don't have format. 739 return nil, errUnrecognized 740 case "resolution": 741 // CPP contentionz profiles don't have resolution. 742 return nil, errUnrecognized 743 case "discarded samples": 744 default: 745 return nil, errUnrecognized 746 } 747 } 748 if err := s.Err(); err != nil { 749 return nil, err 750 } 751 752 locs := make(map[uint64]*Location) 753 for { 754 line := strings.TrimSpace(s.Text()) 755 if strings.HasPrefix(line, "---") { 756 break 757 } 758 if !isSpaceOrComment(line) { 759 value, addrs, err := parseContentionSample(line, p.Period, cpuHz) 760 if err != nil { 761 return nil, err 762 } 763 var sloc []*Location 764 for _, addr := range addrs { 765 // Addresses from stack traces point to the next instruction after 766 // each call. Adjust by -1 to land somewhere on the actual call. 767 addr-- 768 loc := locs[addr] 769 if locs[addr] == nil { 770 loc = &Location{ 771 Address: addr, 772 } 773 p.Location = append(p.Location, loc) 774 locs[addr] = loc 775 } 776 sloc = append(sloc, loc) 777 } 778 p.Sample = append(p.Sample, &Sample{ 779 Value: value, 780 Location: sloc, 781 }) 782 } 783 if !s.Scan() { 784 break 785 } 786 } 787 if err := s.Err(); err != nil { 788 return nil, err 789 } 790 791 if err := parseAdditionalSections(s, p); err != nil { 792 return nil, err 793 } 794 795 return p, nil 796 } 797 798 // parseContentionSample parses a single row from a contention profile 799 // into a new Sample. 800 func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) { 801 sampleData := contentionSampleRE.FindStringSubmatch(line) 802 if sampleData == nil { 803 return nil, nil, errUnrecognized 804 } 805 806 v1, err := strconv.ParseInt(sampleData[1], 10, 64) 807 if err != nil { 808 return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err) 809 } 810 v2, err := strconv.ParseInt(sampleData[2], 10, 64) 811 if err != nil { 812 return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err) 813 } 814 815 // Unsample values if period and cpuHz are available. 816 // - Delays are scaled to cycles and then to nanoseconds. 817 // - Contentions are scaled to cycles. 818 if period > 0 { 819 if cpuHz > 0 { 820 cpuGHz := float64(cpuHz) / 1e9 821 v1 = int64(float64(v1) * float64(period) / cpuGHz) 822 } 823 v2 = v2 * period 824 } 825 826 value = []int64{v2, v1} 827 addrs, err = parseHexAddresses(sampleData[3]) 828 if err != nil { 829 return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err) 830 } 831 832 return value, addrs, nil 833 } 834 835 // parseThread parses a Threadz profile and returns a new Profile. 836 func parseThread(b []byte) (*Profile, error) { 837 s := bufio.NewScanner(bytes.NewBuffer(b)) 838 // Skip past comments and empty lines seeking a real header. 839 for s.Scan() && isSpaceOrComment(s.Text()) { 840 } 841 842 line := s.Text() 843 if m := threadzStartRE.FindStringSubmatch(line); m != nil { 844 // Advance over initial comments until first stack trace. 845 for s.Scan() { 846 if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") { 847 break 848 } 849 } 850 } else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { 851 return nil, errUnrecognized 852 } 853 854 p := &Profile{ 855 SampleType: []*ValueType{{Type: "thread", Unit: "count"}}, 856 PeriodType: &ValueType{Type: "thread", Unit: "count"}, 857 Period: 1, 858 } 859 860 locs := make(map[uint64]*Location) 861 // Recognize each thread and populate profile samples. 862 for !isMemoryMapSentinel(line) { 863 if strings.HasPrefix(line, "---- no stack trace for") { 864 line = "" 865 break 866 } 867 if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 { 868 return nil, errUnrecognized 869 } 870 871 var addrs []uint64 872 var err error 873 line, addrs, err = parseThreadSample(s) 874 if err != nil { 875 return nil, err 876 } 877 if len(addrs) == 0 { 878 // We got a --same as previous threads--. Bump counters. 879 if len(p.Sample) > 0 { 880 s := p.Sample[len(p.Sample)-1] 881 s.Value[0]++ 882 } 883 continue 884 } 885 886 var sloc []*Location 887 for i, addr := range addrs { 888 // Addresses from stack traces point to the next instruction after 889 // each call. Adjust by -1 to land somewhere on the actual call 890 // (except for the leaf, which is not a call). 891 if i > 0 { 892 addr-- 893 } 894 loc := locs[addr] 895 if locs[addr] == nil { 896 loc = &Location{ 897 Address: addr, 898 } 899 p.Location = append(p.Location, loc) 900 locs[addr] = loc 901 } 902 sloc = append(sloc, loc) 903 } 904 905 p.Sample = append(p.Sample, &Sample{ 906 Value: []int64{1}, 907 Location: sloc, 908 }) 909 } 910 911 if err := parseAdditionalSections(s, p); err != nil { 912 return nil, err 913 } 914 915 cleanupDuplicateLocations(p) 916 return p, nil 917 } 918 919 // parseThreadSample parses a symbolized or unsymbolized stack trace. 920 // Returns the first line after the traceback, the sample (or nil if 921 // it hits a 'same-as-previous' marker) and an error. 922 func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) { 923 var line string 924 sameAsPrevious := false 925 for s.Scan() { 926 line = strings.TrimSpace(s.Text()) 927 if line == "" { 928 continue 929 } 930 931 if strings.HasPrefix(line, "---") { 932 break 933 } 934 if strings.Contains(line, "same as previous thread") { 935 sameAsPrevious = true 936 continue 937 } 938 939 curAddrs, err := parseHexAddresses(line) 940 if err != nil { 941 return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err) 942 } 943 addrs = append(addrs, curAddrs...) 944 } 945 if err := s.Err(); err != nil { 946 return "", nil, err 947 } 948 if sameAsPrevious { 949 return line, nil, nil 950 } 951 return line, addrs, nil 952 } 953 954 // parseAdditionalSections parses any additional sections in the 955 // profile, ignoring any unrecognized sections. 956 func parseAdditionalSections(s *bufio.Scanner, p *Profile) error { 957 for !isMemoryMapSentinel(s.Text()) && s.Scan() { 958 } 959 if err := s.Err(); err != nil { 960 return err 961 } 962 return p.ParseMemoryMapFromScanner(s) 963 } 964 965 // ParseProcMaps parses a memory map in the format of /proc/self/maps. 966 // ParseMemoryMap should be called after setting on a profile to 967 // associate locations to the corresponding mapping based on their 968 // address. 969 func ParseProcMaps(rd io.Reader) ([]*Mapping, error) { 970 s := bufio.NewScanner(rd) 971 return parseProcMapsFromScanner(s) 972 } 973 974 func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) { 975 var mapping []*Mapping 976 977 var attrs []string 978 const delimiter = "=" 979 r := strings.NewReplacer() 980 for s.Scan() { 981 line := r.Replace(removeLoggingInfo(s.Text())) 982 m, err := parseMappingEntry(line) 983 if err != nil { 984 if err == errUnrecognized { 985 // Recognize assignments of the form: attr=value, and replace 986 // $attr with value on subsequent mappings. 987 if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 { 988 attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])) 989 r = strings.NewReplacer(attrs...) 990 } 991 // Ignore any unrecognized entries 992 continue 993 } 994 return nil, err 995 } 996 if m == nil { 997 continue 998 } 999 mapping = append(mapping, m) 1000 } 1001 if err := s.Err(); err != nil { 1002 return nil, err 1003 } 1004 return mapping, nil 1005 } 1006 1007 // removeLoggingInfo detects and removes log prefix entries generated 1008 // by the glog package. If no logging prefix is detected, the string 1009 // is returned unmodified. 1010 func removeLoggingInfo(line string) string { 1011 if match := logInfoRE.FindStringIndex(line); match != nil { 1012 return line[match[1]:] 1013 } 1014 return line 1015 } 1016 1017 // ParseMemoryMap parses a memory map in the format of 1018 // /proc/self/maps, and overrides the mappings in the current profile. 1019 // It renumbers the samples and locations in the profile correspondingly. 1020 func (p *Profile) ParseMemoryMap(rd io.Reader) error { 1021 return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd)) 1022 } 1023 1024 // ParseMemoryMapFromScanner parses a memory map in the format of 1025 // /proc/self/maps or a variety of legacy format, and overrides the 1026 // mappings in the current profile. It renumbers the samples and 1027 // locations in the profile correspondingly. 1028 func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error { 1029 mapping, err := parseProcMapsFromScanner(s) 1030 if err != nil { 1031 return err 1032 } 1033 p.Mapping = append(p.Mapping, mapping...) 1034 p.massageMappings() 1035 p.remapLocationIDs() 1036 p.remapFunctionIDs() 1037 p.remapMappingIDs() 1038 return nil 1039 } 1040 1041 func parseMappingEntry(l string) (*Mapping, error) { 1042 var start, end, perm, file, offset, buildID string 1043 if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 { 1044 start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5] 1045 } else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 { 1046 start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6] 1047 } else { 1048 return nil, errUnrecognized 1049 } 1050 1051 var err error 1052 mapping := &Mapping{ 1053 File: file, 1054 BuildID: buildID, 1055 } 1056 if perm != "" && !strings.Contains(perm, "x") { 1057 // Skip non-executable entries. 1058 return nil, nil 1059 } 1060 if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil { 1061 return nil, errUnrecognized 1062 } 1063 if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil { 1064 return nil, errUnrecognized 1065 } 1066 if offset != "" { 1067 if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil { 1068 return nil, errUnrecognized 1069 } 1070 } 1071 return mapping, nil 1072 } 1073 1074 var memoryMapSentinels = []string{ 1075 "--- Memory map: ---", 1076 "MAPPED_LIBRARIES:", 1077 } 1078 1079 // isMemoryMapSentinel returns true if the string contains one of the 1080 // known sentinels for memory map information. 1081 func isMemoryMapSentinel(line string) bool { 1082 for _, s := range memoryMapSentinels { 1083 if strings.Contains(line, s) { 1084 return true 1085 } 1086 } 1087 return false 1088 } 1089 1090 func (p *Profile) addLegacyFrameInfo() { 1091 switch { 1092 case isProfileType(p, heapzSampleTypes): 1093 p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr 1094 case isProfileType(p, contentionzSampleTypes): 1095 p.DropFrames, p.KeepFrames = lockRxStr, "" 1096 default: 1097 p.DropFrames, p.KeepFrames = cpuProfilerRxStr, "" 1098 } 1099 } 1100 1101 var heapzSampleTypes = [][]string{ 1102 {"allocations", "size"}, // early Go pprof profiles 1103 {"objects", "space"}, 1104 {"inuse_objects", "inuse_space"}, 1105 {"alloc_objects", "alloc_space"}, 1106 } 1107 var contentionzSampleTypes = [][]string{ 1108 {"contentions", "delay"}, 1109 } 1110 1111 func isProfileType(p *Profile, types [][]string) bool { 1112 st := p.SampleType 1113 nextType: 1114 for _, t := range types { 1115 if len(st) != len(t) { 1116 continue 1117 } 1118 1119 for i := range st { 1120 if st[i].Type != t[i] { 1121 continue nextType 1122 } 1123 } 1124 return true 1125 } 1126 return false 1127 } 1128 1129 var allocRxStr = strings.Join([]string{ 1130 // POSIX entry points. 1131 `calloc`, 1132 `cfree`, 1133 `malloc`, 1134 `free`, 1135 `memalign`, 1136 `do_memalign`, 1137 `(__)?posix_memalign`, 1138 `pvalloc`, 1139 `valloc`, 1140 `realloc`, 1141 1142 // TC malloc. 1143 `tcmalloc::.*`, 1144 `tc_calloc`, 1145 `tc_cfree`, 1146 `tc_malloc`, 1147 `tc_free`, 1148 `tc_memalign`, 1149 `tc_posix_memalign`, 1150 `tc_pvalloc`, 1151 `tc_valloc`, 1152 `tc_realloc`, 1153 `tc_new`, 1154 `tc_delete`, 1155 `tc_newarray`, 1156 `tc_deletearray`, 1157 `tc_new_nothrow`, 1158 `tc_newarray_nothrow`, 1159 1160 // Memory-allocation routines on OS X. 1161 `malloc_zone_malloc`, 1162 `malloc_zone_calloc`, 1163 `malloc_zone_valloc`, 1164 `malloc_zone_realloc`, 1165 `malloc_zone_memalign`, 1166 `malloc_zone_free`, 1167 1168 // Go runtime 1169 `runtime\..*`, 1170 1171 // Other misc. memory allocation routines 1172 `BaseArena::.*`, 1173 `(::)?do_malloc_no_errno`, 1174 `(::)?do_malloc_pages`, 1175 `(::)?do_malloc`, 1176 `DoSampledAllocation`, 1177 `MallocedMemBlock::MallocedMemBlock`, 1178 `_M_allocate`, 1179 `__builtin_(vec_)?delete`, 1180 `__builtin_(vec_)?new`, 1181 `__gnu_cxx::new_allocator::allocate`, 1182 `__libc_malloc`, 1183 `__malloc_alloc_template::allocate`, 1184 `allocate`, 1185 `cpp_alloc`, 1186 `operator new(\[\])?`, 1187 `simple_alloc::allocate`, 1188 }, `|`) 1189 1190 var allocSkipRxStr = strings.Join([]string{ 1191 // Preserve Go runtime frames that appear in the middle/bottom of 1192 // the stack. 1193 `runtime\.panic`, 1194 `runtime\.reflectcall`, 1195 `runtime\.call[0-9]*`, 1196 }, `|`) 1197 1198 var cpuProfilerRxStr = strings.Join([]string{ 1199 `ProfileData::Add`, 1200 `ProfileData::prof_handler`, 1201 `CpuProfiler::prof_handler`, 1202 `__pthread_sighandler`, 1203 `__restore`, 1204 }, `|`) 1205 1206 var lockRxStr = strings.Join([]string{ 1207 `RecordLockProfileData`, 1208 `(base::)?RecordLockProfileData.*`, 1209 `(base::)?SubmitMutexProfileData.*`, 1210 `(base::)?SubmitSpinLockProfileData.*`, 1211 `(base::Mutex::)?AwaitCommon.*`, 1212 `(base::Mutex::)?Unlock.*`, 1213 `(base::Mutex::)?UnlockSlow.*`, 1214 `(base::Mutex::)?ReaderUnlock.*`, 1215 `(base::MutexLock::)?~MutexLock.*`, 1216 `(Mutex::)?AwaitCommon.*`, 1217 `(Mutex::)?Unlock.*`, 1218 `(Mutex::)?UnlockSlow.*`, 1219 `(Mutex::)?ReaderUnlock.*`, 1220 `(MutexLock::)?~MutexLock.*`, 1221 `(SpinLock::)?Unlock.*`, 1222 `(SpinLock::)?SlowUnlock.*`, 1223 `(SpinLockHolder::)?~SpinLockHolder.*`, 1224 }, `|`) 1225