Home | History | Annotate | Download | only in profile
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // This file implements parsers to convert legacy profiles into the
      6 // profile.proto format.
      7 
      8 package profile
      9 
     10 import (
     11 	"bufio"
     12 	"bytes"
     13 	"fmt"
     14 	"io"
     15 	"math"
     16 	"regexp"
     17 	"strconv"
     18 	"strings"
     19 )
     20 
     21 var (
     22 	countStartRE = regexp.MustCompile(`\A(\w+) profile: total \d+\n\z`)
     23 	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\n\z`)
     24 
     25 	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
     26 	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
     27 
     28 	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
     29 
     30 	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
     31 
     32 	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz`)
     33 
     34 	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz`)
     35 
     36 	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
     37 	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
     38 
     39 	procMapsRE = regexp.MustCompile(`([[:xdigit:]]+)-([[:xdigit:]]+)\s+([-rwxp]+)\s+([[:xdigit:]]+)\s+([[:xdigit:]]+):([[:xdigit:]]+)\s+([[:digit:]]+)\s*(\S+)?`)
     40 
     41 	briefMapsRE = regexp.MustCompile(`\s*([[:xdigit:]]+)-([[:xdigit:]]+):\s*(\S+)(\s.*@)?([[:xdigit:]]+)?`)
     42 
     43 	// LegacyHeapAllocated instructs the heapz parsers to use the
     44 	// allocated memory stats instead of the default in-use memory. Note
     45 	// that tcmalloc doesn't provide all allocated memory, only in-use
     46 	// stats.
     47 	LegacyHeapAllocated bool
     48 )
     49 
     50 func isSpaceOrComment(line string) bool {
     51 	trimmed := strings.TrimSpace(line)
     52 	return len(trimmed) == 0 || trimmed[0] == '#'
     53 }
     54 
     55 // parseGoCount parses a Go count profile (e.g., threadcreate or
     56 // goroutine) and returns a new Profile.
     57 func parseGoCount(b []byte) (*Profile, error) {
     58 	r := bytes.NewBuffer(b)
     59 
     60 	var line string
     61 	var err error
     62 	for {
     63 		// Skip past comments and empty lines seeking a real header.
     64 		line, err = r.ReadString('\n')
     65 		if err != nil {
     66 			return nil, err
     67 		}
     68 		if !isSpaceOrComment(line) {
     69 			break
     70 		}
     71 	}
     72 
     73 	m := countStartRE.FindStringSubmatch(line)
     74 	if m == nil {
     75 		return nil, errUnrecognized
     76 	}
     77 	profileType := m[1]
     78 	p := &Profile{
     79 		PeriodType: &ValueType{Type: profileType, Unit: "count"},
     80 		Period:     1,
     81 		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
     82 	}
     83 	locations := make(map[uint64]*Location)
     84 	for {
     85 		line, err = r.ReadString('\n')
     86 		if err != nil {
     87 			if err == io.EOF {
     88 				break
     89 			}
     90 			return nil, err
     91 		}
     92 		if isSpaceOrComment(line) {
     93 			continue
     94 		}
     95 		if strings.HasPrefix(line, "---") {
     96 			break
     97 		}
     98 		m := countRE.FindStringSubmatch(line)
     99 		if m == nil {
    100 			return nil, errMalformed
    101 		}
    102 		n, err := strconv.ParseInt(m[1], 0, 64)
    103 		if err != nil {
    104 			return nil, errMalformed
    105 		}
    106 		fields := strings.Fields(m[2])
    107 		locs := make([]*Location, 0, len(fields))
    108 		for _, stk := range fields {
    109 			addr, err := strconv.ParseUint(stk, 0, 64)
    110 			if err != nil {
    111 				return nil, errMalformed
    112 			}
    113 			// Adjust all frames by -1 to land on the call instruction.
    114 			addr--
    115 			loc := locations[addr]
    116 			if loc == nil {
    117 				loc = &Location{
    118 					Address: addr,
    119 				}
    120 				locations[addr] = loc
    121 				p.Location = append(p.Location, loc)
    122 			}
    123 			locs = append(locs, loc)
    124 		}
    125 		p.Sample = append(p.Sample, &Sample{
    126 			Location: locs,
    127 			Value:    []int64{n},
    128 		})
    129 	}
    130 
    131 	if err = parseAdditionalSections(strings.TrimSpace(line), r, p); err != nil {
    132 		return nil, err
    133 	}
    134 	return p, nil
    135 }
    136 
    137 // remapLocationIDs ensures there is a location for each address
    138 // referenced by a sample, and remaps the samples to point to the new
    139 // location ids.
    140 func (p *Profile) remapLocationIDs() {
    141 	seen := make(map[*Location]bool, len(p.Location))
    142 	var locs []*Location
    143 
    144 	for _, s := range p.Sample {
    145 		for _, l := range s.Location {
    146 			if seen[l] {
    147 				continue
    148 			}
    149 			l.ID = uint64(len(locs) + 1)
    150 			locs = append(locs, l)
    151 			seen[l] = true
    152 		}
    153 	}
    154 	p.Location = locs
    155 }
    156 
    157 func (p *Profile) remapFunctionIDs() {
    158 	seen := make(map[*Function]bool, len(p.Function))
    159 	var fns []*Function
    160 
    161 	for _, l := range p.Location {
    162 		for _, ln := range l.Line {
    163 			fn := ln.Function
    164 			if fn == nil || seen[fn] {
    165 				continue
    166 			}
    167 			fn.ID = uint64(len(fns) + 1)
    168 			fns = append(fns, fn)
    169 			seen[fn] = true
    170 		}
    171 	}
    172 	p.Function = fns
    173 }
    174 
    175 // remapMappingIDs matches location addresses with existing mappings
    176 // and updates them appropriately. This is O(N*M), if this ever shows
    177 // up as a bottleneck, evaluate sorting the mappings and doing a
    178 // binary search, which would make it O(N*log(M)).
    179 func (p *Profile) remapMappingIDs() {
    180 	if len(p.Mapping) == 0 {
    181 		return
    182 	}
    183 
    184 	// Some profile handlers will incorrectly set regions for the main
    185 	// executable if its section is remapped. Fix them through heuristics.
    186 
    187 	// Remove the initial mapping if named '/anon_hugepage' and has a
    188 	// consecutive adjacent mapping.
    189 	if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
    190 		if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
    191 			p.Mapping = p.Mapping[1:]
    192 		}
    193 	}
    194 
    195 	// Subtract the offset from the start of the main mapping if it
    196 	// ends up at a recognizable start address.
    197 	const expectedStart = 0x400000
    198 	if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
    199 		m.Start = expectedStart
    200 		m.Offset = 0
    201 	}
    202 
    203 	for _, l := range p.Location {
    204 		if a := l.Address; a != 0 {
    205 			for _, m := range p.Mapping {
    206 				if m.Start <= a && a < m.Limit {
    207 					l.Mapping = m
    208 					break
    209 				}
    210 			}
    211 		}
    212 	}
    213 
    214 	// Reset all mapping IDs.
    215 	for i, m := range p.Mapping {
    216 		m.ID = uint64(i + 1)
    217 	}
    218 }
    219 
    220 var cpuInts = []func([]byte) (uint64, []byte){
    221 	get32l,
    222 	get32b,
    223 	get64l,
    224 	get64b,
    225 }
    226 
    227 func get32l(b []byte) (uint64, []byte) {
    228 	if len(b) < 4 {
    229 		return 0, nil
    230 	}
    231 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
    232 }
    233 
    234 func get32b(b []byte) (uint64, []byte) {
    235 	if len(b) < 4 {
    236 		return 0, nil
    237 	}
    238 	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
    239 }
    240 
    241 func get64l(b []byte) (uint64, []byte) {
    242 	if len(b) < 8 {
    243 		return 0, nil
    244 	}
    245 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
    246 }
    247 
    248 func get64b(b []byte) (uint64, []byte) {
    249 	if len(b) < 8 {
    250 		return 0, nil
    251 	}
    252 	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
    253 }
    254 
    255 // ParseTracebacks parses a set of tracebacks and returns a newly
    256 // populated profile. It will accept any text file and generate a
    257 // Profile out of it with any hex addresses it can identify, including
    258 // a process map if it can recognize one. Each sample will include a
    259 // tag "source" with the addresses recognized in string format.
    260 func ParseTracebacks(b []byte) (*Profile, error) {
    261 	r := bytes.NewBuffer(b)
    262 
    263 	p := &Profile{
    264 		PeriodType: &ValueType{Type: "trace", Unit: "count"},
    265 		Period:     1,
    266 		SampleType: []*ValueType{
    267 			{Type: "trace", Unit: "count"},
    268 		},
    269 	}
    270 
    271 	var sources []string
    272 	var sloc []*Location
    273 
    274 	locs := make(map[uint64]*Location)
    275 	for {
    276 		l, err := r.ReadString('\n')
    277 		if err != nil {
    278 			if err != io.EOF {
    279 				return nil, err
    280 			}
    281 			if l == "" {
    282 				break
    283 			}
    284 		}
    285 		if sectionTrigger(l) == memoryMapSection {
    286 			break
    287 		}
    288 		if s, addrs := extractHexAddresses(l); len(s) > 0 {
    289 			for _, addr := range addrs {
    290 				// Addresses from stack traces point to the next instruction after
    291 				// each call. Adjust by -1 to land somewhere on the actual call.
    292 				addr--
    293 				loc := locs[addr]
    294 				if locs[addr] == nil {
    295 					loc = &Location{
    296 						Address: addr,
    297 					}
    298 					p.Location = append(p.Location, loc)
    299 					locs[addr] = loc
    300 				}
    301 				sloc = append(sloc, loc)
    302 			}
    303 
    304 			sources = append(sources, s...)
    305 		} else {
    306 			if len(sources) > 0 || len(sloc) > 0 {
    307 				addTracebackSample(sloc, sources, p)
    308 				sloc, sources = nil, nil
    309 			}
    310 		}
    311 	}
    312 
    313 	// Add final sample to save any leftover data.
    314 	if len(sources) > 0 || len(sloc) > 0 {
    315 		addTracebackSample(sloc, sources, p)
    316 	}
    317 
    318 	if err := p.ParseMemoryMap(r); err != nil {
    319 		return nil, err
    320 	}
    321 	return p, nil
    322 }
    323 
    324 func addTracebackSample(l []*Location, s []string, p *Profile) {
    325 	p.Sample = append(p.Sample,
    326 		&Sample{
    327 			Value:    []int64{1},
    328 			Location: l,
    329 			Label:    map[string][]string{"source": s},
    330 		})
    331 }
    332 
    333 // parseCPU parses a profilez legacy profile and returns a newly
    334 // populated Profile.
    335 //
    336 // The general format for profilez samples is a sequence of words in
    337 // binary format. The first words are a header with the following data:
    338 //   1st word -- 0
    339 //   2nd word -- 3
    340 //   3rd word -- 0 if a c++ application, 1 if a java application.
    341 //   4th word -- Sampling period (in microseconds).
    342 //   5th word -- Padding.
    343 func parseCPU(b []byte) (*Profile, error) {
    344 	var parse func([]byte) (uint64, []byte)
    345 	var n1, n2, n3, n4, n5 uint64
    346 	for _, parse = range cpuInts {
    347 		var tmp []byte
    348 		n1, tmp = parse(b)
    349 		n2, tmp = parse(tmp)
    350 		n3, tmp = parse(tmp)
    351 		n4, tmp = parse(tmp)
    352 		n5, tmp = parse(tmp)
    353 
    354 		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
    355 			b = tmp
    356 			return cpuProfile(b, int64(n4), parse)
    357 		}
    358 	}
    359 	return nil, errUnrecognized
    360 }
    361 
    362 // cpuProfile returns a new Profile from C++ profilez data.
    363 // b is the profile bytes after the header, period is the profiling
    364 // period, and parse is a function to parse 8-byte chunks from the
    365 // profile in its native endianness.
    366 func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
    367 	p := &Profile{
    368 		Period:     period * 1000,
    369 		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
    370 		SampleType: []*ValueType{
    371 			{Type: "samples", Unit: "count"},
    372 			{Type: "cpu", Unit: "nanoseconds"},
    373 		},
    374 	}
    375 	var err error
    376 	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
    377 		return nil, err
    378 	}
    379 
    380 	// If all samples have the same second-to-the-bottom frame, it
    381 	// strongly suggests that it is an uninteresting artifact of
    382 	// measurement -- a stack frame pushed by the signal handler. The
    383 	// bottom frame is always correct as it is picked up from the signal
    384 	// structure, not the stack. Check if this is the case and if so,
    385 	// remove.
    386 	if len(p.Sample) > 1 && len(p.Sample[0].Location) > 1 {
    387 		allSame := true
    388 		id1 := p.Sample[0].Location[1].Address
    389 		for _, s := range p.Sample {
    390 			if len(s.Location) < 2 || id1 != s.Location[1].Address {
    391 				allSame = false
    392 				break
    393 			}
    394 		}
    395 		if allSame {
    396 			for _, s := range p.Sample {
    397 				s.Location = append(s.Location[:1], s.Location[2:]...)
    398 			}
    399 		}
    400 	}
    401 
    402 	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
    403 		return nil, err
    404 	}
    405 	return p, nil
    406 }
    407 
    408 // parseCPUSamples parses a collection of profilez samples from a
    409 // profile.
    410 //
    411 // profilez samples are a repeated sequence of stack frames of the
    412 // form:
    413 //    1st word -- The number of times this stack was encountered.
    414 //    2nd word -- The size of the stack (StackSize).
    415 //    3rd word -- The first address on the stack.
    416 //    ...
    417 //    StackSize + 2 -- The last address on the stack
    418 // The last stack trace is of the form:
    419 //   1st word -- 0
    420 //   2nd word -- 1
    421 //   3rd word -- 0
    422 //
    423 // Addresses from stack traces may point to the next instruction after
    424 // each call. Optionally adjust by -1 to land somewhere on the actual
    425 // call (except for the leaf, which is not a call).
    426 func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
    427 	locs := make(map[uint64]*Location)
    428 	for len(b) > 0 {
    429 		var count, nstk uint64
    430 		count, b = parse(b)
    431 		nstk, b = parse(b)
    432 		if b == nil || nstk > uint64(len(b)/4) {
    433 			return nil, nil, errUnrecognized
    434 		}
    435 		var sloc []*Location
    436 		addrs := make([]uint64, nstk)
    437 		for i := 0; i < int(nstk); i++ {
    438 			addrs[i], b = parse(b)
    439 		}
    440 
    441 		if count == 0 && nstk == 1 && addrs[0] == 0 {
    442 			// End of data marker
    443 			break
    444 		}
    445 		for i, addr := range addrs {
    446 			if adjust && i > 0 {
    447 				addr--
    448 			}
    449 			loc := locs[addr]
    450 			if loc == nil {
    451 				loc = &Location{
    452 					Address: addr,
    453 				}
    454 				locs[addr] = loc
    455 				p.Location = append(p.Location, loc)
    456 			}
    457 			sloc = append(sloc, loc)
    458 		}
    459 		p.Sample = append(p.Sample,
    460 			&Sample{
    461 				Value:    []int64{int64(count), int64(count) * p.Period},
    462 				Location: sloc,
    463 			})
    464 	}
    465 	// Reached the end without finding the EOD marker.
    466 	return b, locs, nil
    467 }
    468 
    469 // parseHeap parses a heapz legacy or a growthz profile and
    470 // returns a newly populated Profile.
    471 func parseHeap(b []byte) (p *Profile, err error) {
    472 	r := bytes.NewBuffer(b)
    473 	l, err := r.ReadString('\n')
    474 	if err != nil {
    475 		return nil, errUnrecognized
    476 	}
    477 
    478 	sampling := ""
    479 
    480 	if header := heapHeaderRE.FindStringSubmatch(l); header != nil {
    481 		p = &Profile{
    482 			SampleType: []*ValueType{
    483 				{Type: "objects", Unit: "count"},
    484 				{Type: "space", Unit: "bytes"},
    485 			},
    486 			PeriodType: &ValueType{Type: "objects", Unit: "bytes"},
    487 		}
    488 
    489 		var period int64
    490 		if len(header[6]) > 0 {
    491 			if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
    492 				return nil, errUnrecognized
    493 			}
    494 		}
    495 
    496 		switch header[5] {
    497 		case "heapz_v2", "heap_v2":
    498 			sampling, p.Period = "v2", period
    499 		case "heapprofile":
    500 			sampling, p.Period = "", 1
    501 		case "heap":
    502 			sampling, p.Period = "v2", period/2
    503 		default:
    504 			return nil, errUnrecognized
    505 		}
    506 	} else if header = growthHeaderRE.FindStringSubmatch(l); header != nil {
    507 		p = &Profile{
    508 			SampleType: []*ValueType{
    509 				{Type: "objects", Unit: "count"},
    510 				{Type: "space", Unit: "bytes"},
    511 			},
    512 			PeriodType: &ValueType{Type: "heapgrowth", Unit: "count"},
    513 			Period:     1,
    514 		}
    515 	} else if header = fragmentationHeaderRE.FindStringSubmatch(l); header != nil {
    516 		p = &Profile{
    517 			SampleType: []*ValueType{
    518 				{Type: "objects", Unit: "count"},
    519 				{Type: "space", Unit: "bytes"},
    520 			},
    521 			PeriodType: &ValueType{Type: "allocations", Unit: "count"},
    522 			Period:     1,
    523 		}
    524 	} else {
    525 		return nil, errUnrecognized
    526 	}
    527 
    528 	if LegacyHeapAllocated {
    529 		for _, st := range p.SampleType {
    530 			st.Type = "alloc_" + st.Type
    531 		}
    532 	} else {
    533 		for _, st := range p.SampleType {
    534 			st.Type = "inuse_" + st.Type
    535 		}
    536 	}
    537 
    538 	locs := make(map[uint64]*Location)
    539 	for {
    540 		l, err = r.ReadString('\n')
    541 		if err != nil {
    542 			if err != io.EOF {
    543 				return nil, err
    544 			}
    545 
    546 			if l == "" {
    547 				break
    548 			}
    549 		}
    550 
    551 		if isSpaceOrComment(l) {
    552 			continue
    553 		}
    554 		l = strings.TrimSpace(l)
    555 
    556 		if sectionTrigger(l) != unrecognizedSection {
    557 			break
    558 		}
    559 
    560 		value, blocksize, addrs, err := parseHeapSample(l, p.Period, sampling)
    561 		if err != nil {
    562 			return nil, err
    563 		}
    564 		var sloc []*Location
    565 		for _, addr := range addrs {
    566 			// Addresses from stack traces point to the next instruction after
    567 			// each call. Adjust by -1 to land somewhere on the actual call.
    568 			addr--
    569 			loc := locs[addr]
    570 			if locs[addr] == nil {
    571 				loc = &Location{
    572 					Address: addr,
    573 				}
    574 				p.Location = append(p.Location, loc)
    575 				locs[addr] = loc
    576 			}
    577 			sloc = append(sloc, loc)
    578 		}
    579 
    580 		p.Sample = append(p.Sample, &Sample{
    581 			Value:    value,
    582 			Location: sloc,
    583 			NumLabel: map[string][]int64{"bytes": {blocksize}},
    584 		})
    585 	}
    586 
    587 	if err = parseAdditionalSections(l, r, p); err != nil {
    588 		return nil, err
    589 	}
    590 	return p, nil
    591 }
    592 
    593 // parseHeapSample parses a single row from a heap profile into a new Sample.
    594 func parseHeapSample(line string, rate int64, sampling string) (value []int64, blocksize int64, addrs []uint64, err error) {
    595 	sampleData := heapSampleRE.FindStringSubmatch(line)
    596 	if len(sampleData) != 6 {
    597 		return value, blocksize, addrs, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
    598 	}
    599 
    600 	// Use first two values by default; tcmalloc sampling generates the
    601 	// same value for both, only the older heap-profile collect separate
    602 	// stats for in-use and allocated objects.
    603 	valueIndex := 1
    604 	if LegacyHeapAllocated {
    605 		valueIndex = 3
    606 	}
    607 
    608 	var v1, v2 int64
    609 	if v1, err = strconv.ParseInt(sampleData[valueIndex], 10, 64); err != nil {
    610 		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
    611 	}
    612 	if v2, err = strconv.ParseInt(sampleData[valueIndex+1], 10, 64); err != nil {
    613 		return value, blocksize, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
    614 	}
    615 
    616 	if v1 == 0 {
    617 		if v2 != 0 {
    618 			return value, blocksize, addrs, fmt.Errorf("allocation count was 0 but allocation bytes was %d", v2)
    619 		}
    620 	} else {
    621 		blocksize = v2 / v1
    622 		if sampling == "v2" {
    623 			v1, v2 = scaleHeapSample(v1, v2, rate)
    624 		}
    625 	}
    626 
    627 	value = []int64{v1, v2}
    628 	addrs = parseHexAddresses(sampleData[5])
    629 
    630 	return value, blocksize, addrs, nil
    631 }
    632 
    633 // extractHexAddresses extracts hex numbers from a string and returns
    634 // them, together with their numeric value, in a slice.
    635 func extractHexAddresses(s string) ([]string, []uint64) {
    636 	hexStrings := hexNumberRE.FindAllString(s, -1)
    637 	var ids []uint64
    638 	for _, s := range hexStrings {
    639 		if id, err := strconv.ParseUint(s, 0, 64); err == nil {
    640 			ids = append(ids, id)
    641 		} else {
    642 			// Do not expect any parsing failures due to the regexp matching.
    643 			panic("failed to parse hex value:" + s)
    644 		}
    645 	}
    646 	return hexStrings, ids
    647 }
    648 
    649 // parseHexAddresses parses hex numbers from a string and returns them
    650 // in a slice.
    651 func parseHexAddresses(s string) []uint64 {
    652 	_, ids := extractHexAddresses(s)
    653 	return ids
    654 }
    655 
    656 // scaleHeapSample adjusts the data from a heapz Sample to
    657 // account for its probability of appearing in the collected
    658 // data. heapz profiles are a sampling of the memory allocations
    659 // requests in a program. We estimate the unsampled value by dividing
    660 // each collected sample by its probability of appearing in the
    661 // profile. heapz v2 profiles rely on a poisson process to determine
    662 // which samples to collect, based on the desired average collection
    663 // rate R. The probability of a sample of size S to appear in that
    664 // profile is 1-exp(-S/R).
    665 func scaleHeapSample(count, size, rate int64) (int64, int64) {
    666 	if count == 0 || size == 0 {
    667 		return 0, 0
    668 	}
    669 
    670 	if rate <= 1 {
    671 		// if rate==1 all samples were collected so no adjustment is needed.
    672 		// if rate<1 treat as unknown and skip scaling.
    673 		return count, size
    674 	}
    675 
    676 	avgSize := float64(size) / float64(count)
    677 	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
    678 
    679 	return int64(float64(count) * scale), int64(float64(size) * scale)
    680 }
    681 
    682 // parseContention parses a mutex or contention profile. There are 2 cases:
    683 // "--- contentionz " for legacy C++ profiles (and backwards compatibility)
    684 // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
    685 // This code converts the text output from runtime into a *Profile. (In the future
    686 // the runtime might write a serialized Profile directly making this unnecessary.)
    687 func parseContention(b []byte) (*Profile, error) {
    688 	r := bytes.NewBuffer(b)
    689 	var l string
    690 	var err error
    691 	for {
    692 		// Skip past comments and empty lines seeking a real header.
    693 		l, err = r.ReadString('\n')
    694 		if err != nil {
    695 			return nil, err
    696 		}
    697 		if !isSpaceOrComment(l) {
    698 			break
    699 		}
    700 	}
    701 
    702 	if strings.HasPrefix(l, "--- contentionz ") {
    703 		return parseCppContention(r)
    704 	} else if strings.HasPrefix(l, "--- mutex:") {
    705 		return parseCppContention(r)
    706 	} else if strings.HasPrefix(l, "--- contention:") {
    707 		return parseCppContention(r)
    708 	}
    709 	return nil, errUnrecognized
    710 }
    711 
    712 // parseCppContention parses the output from synchronization_profiling.cc
    713 // for backward compatibility, and the compatible (non-debug) block profile
    714 // output from the Go runtime.
    715 func parseCppContention(r *bytes.Buffer) (*Profile, error) {
    716 	p := &Profile{
    717 		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
    718 		Period:     1,
    719 		SampleType: []*ValueType{
    720 			{Type: "contentions", Unit: "count"},
    721 			{Type: "delay", Unit: "nanoseconds"},
    722 		},
    723 	}
    724 
    725 	var cpuHz int64
    726 	var l string
    727 	var err error
    728 	// Parse text of the form "attribute = value" before the samples.
    729 	const delimiter = "="
    730 	for {
    731 		l, err = r.ReadString('\n')
    732 		if err != nil {
    733 			if err != io.EOF {
    734 				return nil, err
    735 			}
    736 
    737 			if l == "" {
    738 				break
    739 			}
    740 		}
    741 		if isSpaceOrComment(l) {
    742 			continue
    743 		}
    744 
    745 		if l = strings.TrimSpace(l); l == "" {
    746 			continue
    747 		}
    748 
    749 		if strings.HasPrefix(l, "---") {
    750 			break
    751 		}
    752 
    753 		attr := strings.SplitN(l, delimiter, 2)
    754 		if len(attr) != 2 {
    755 			break
    756 		}
    757 		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
    758 		var err error
    759 		switch key {
    760 		case "cycles/second":
    761 			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
    762 				return nil, errUnrecognized
    763 			}
    764 		case "sampling period":
    765 			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
    766 				return nil, errUnrecognized
    767 			}
    768 		case "ms since reset":
    769 			ms, err := strconv.ParseInt(val, 0, 64)
    770 			if err != nil {
    771 				return nil, errUnrecognized
    772 			}
    773 			p.DurationNanos = ms * 1000 * 1000
    774 		case "format":
    775 			// CPP contentionz profiles don't have format.
    776 			return nil, errUnrecognized
    777 		case "resolution":
    778 			// CPP contentionz profiles don't have resolution.
    779 			return nil, errUnrecognized
    780 		case "discarded samples":
    781 		default:
    782 			return nil, errUnrecognized
    783 		}
    784 	}
    785 
    786 	locs := make(map[uint64]*Location)
    787 	for {
    788 		if !isSpaceOrComment(l) {
    789 			if l = strings.TrimSpace(l); strings.HasPrefix(l, "---") {
    790 				break
    791 			}
    792 			value, addrs, err := parseContentionSample(l, p.Period, cpuHz)
    793 			if err != nil {
    794 				return nil, err
    795 			}
    796 			var sloc []*Location
    797 			for _, addr := range addrs {
    798 				// Addresses from stack traces point to the next instruction after
    799 				// each call. Adjust by -1 to land somewhere on the actual call.
    800 				addr--
    801 				loc := locs[addr]
    802 				if locs[addr] == nil {
    803 					loc = &Location{
    804 						Address: addr,
    805 					}
    806 					p.Location = append(p.Location, loc)
    807 					locs[addr] = loc
    808 				}
    809 				sloc = append(sloc, loc)
    810 			}
    811 			p.Sample = append(p.Sample, &Sample{
    812 				Value:    value,
    813 				Location: sloc,
    814 			})
    815 		}
    816 
    817 		if l, err = r.ReadString('\n'); err != nil {
    818 			if err != io.EOF {
    819 				return nil, err
    820 			}
    821 			if l == "" {
    822 				break
    823 			}
    824 		}
    825 	}
    826 
    827 	if err = parseAdditionalSections(l, r, p); err != nil {
    828 		return nil, err
    829 	}
    830 
    831 	return p, nil
    832 }
    833 
    834 // parseContentionSample parses a single row from a contention profile
    835 // into a new Sample.
    836 func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
    837 	sampleData := contentionSampleRE.FindStringSubmatch(line)
    838 	if sampleData == nil {
    839 		return value, addrs, errUnrecognized
    840 	}
    841 
    842 	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
    843 	if err != nil {
    844 		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
    845 	}
    846 	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
    847 	if err != nil {
    848 		return value, addrs, fmt.Errorf("malformed sample: %s: %v", line, err)
    849 	}
    850 
    851 	// Unsample values if period and cpuHz are available.
    852 	// - Delays are scaled to cycles and then to nanoseconds.
    853 	// - Contentions are scaled to cycles.
    854 	if period > 0 {
    855 		if cpuHz > 0 {
    856 			cpuGHz := float64(cpuHz) / 1e9
    857 			v1 = int64(float64(v1) * float64(period) / cpuGHz)
    858 		}
    859 		v2 = v2 * period
    860 	}
    861 
    862 	value = []int64{v2, v1}
    863 	addrs = parseHexAddresses(sampleData[3])
    864 
    865 	return value, addrs, nil
    866 }
    867 
    868 // parseThread parses a Threadz profile and returns a new Profile.
    869 func parseThread(b []byte) (*Profile, error) {
    870 	r := bytes.NewBuffer(b)
    871 
    872 	var line string
    873 	var err error
    874 	for {
    875 		// Skip past comments and empty lines seeking a real header.
    876 		line, err = r.ReadString('\n')
    877 		if err != nil {
    878 			return nil, err
    879 		}
    880 		if !isSpaceOrComment(line) {
    881 			break
    882 		}
    883 	}
    884 
    885 	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
    886 		// Advance over initial comments until first stack trace.
    887 		for {
    888 			line, err = r.ReadString('\n')
    889 			if err != nil {
    890 				if err != io.EOF {
    891 					return nil, err
    892 				}
    893 
    894 				if line == "" {
    895 					break
    896 				}
    897 			}
    898 			if sectionTrigger(line) != unrecognizedSection || line[0] == '-' {
    899 				break
    900 			}
    901 		}
    902 	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
    903 		return nil, errUnrecognized
    904 	}
    905 
    906 	p := &Profile{
    907 		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
    908 		PeriodType: &ValueType{Type: "thread", Unit: "count"},
    909 		Period:     1,
    910 	}
    911 
    912 	locs := make(map[uint64]*Location)
    913 	// Recognize each thread and populate profile samples.
    914 	for sectionTrigger(line) == unrecognizedSection {
    915 		if strings.HasPrefix(line, "---- no stack trace for") {
    916 			line = ""
    917 			break
    918 		}
    919 		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
    920 			return nil, errUnrecognized
    921 		}
    922 
    923 		var addrs []uint64
    924 		line, addrs, err = parseThreadSample(r)
    925 		if err != nil {
    926 			return nil, errUnrecognized
    927 		}
    928 		if len(addrs) == 0 {
    929 			// We got a --same as previous threads--. Bump counters.
    930 			if len(p.Sample) > 0 {
    931 				s := p.Sample[len(p.Sample)-1]
    932 				s.Value[0]++
    933 			}
    934 			continue
    935 		}
    936 
    937 		var sloc []*Location
    938 		for _, addr := range addrs {
    939 			// Addresses from stack traces point to the next instruction after
    940 			// each call. Adjust by -1 to land somewhere on the actual call.
    941 			addr--
    942 			loc := locs[addr]
    943 			if locs[addr] == nil {
    944 				loc = &Location{
    945 					Address: addr,
    946 				}
    947 				p.Location = append(p.Location, loc)
    948 				locs[addr] = loc
    949 			}
    950 			sloc = append(sloc, loc)
    951 		}
    952 
    953 		p.Sample = append(p.Sample, &Sample{
    954 			Value:    []int64{1},
    955 			Location: sloc,
    956 		})
    957 	}
    958 
    959 	if err = parseAdditionalSections(line, r, p); err != nil {
    960 		return nil, err
    961 	}
    962 
    963 	return p, nil
    964 }
    965 
    966 // parseThreadSample parses a symbolized or unsymbolized stack trace.
    967 // Returns the first line after the traceback, the sample (or nil if
    968 // it hits a 'same-as-previous' marker) and an error.
    969 func parseThreadSample(b *bytes.Buffer) (nextl string, addrs []uint64, err error) {
    970 	var l string
    971 	sameAsPrevious := false
    972 	for {
    973 		if l, err = b.ReadString('\n'); err != nil {
    974 			if err != io.EOF {
    975 				return "", nil, err
    976 			}
    977 			if l == "" {
    978 				break
    979 			}
    980 		}
    981 		if l = strings.TrimSpace(l); l == "" {
    982 			continue
    983 		}
    984 
    985 		if strings.HasPrefix(l, "---") {
    986 			break
    987 		}
    988 		if strings.Contains(l, "same as previous thread") {
    989 			sameAsPrevious = true
    990 			continue
    991 		}
    992 
    993 		addrs = append(addrs, parseHexAddresses(l)...)
    994 	}
    995 
    996 	if sameAsPrevious {
    997 		return l, nil, nil
    998 	}
    999 	return l, addrs, nil
   1000 }
   1001 
   1002 // parseAdditionalSections parses any additional sections in the
   1003 // profile, ignoring any unrecognized sections.
   1004 func parseAdditionalSections(l string, b *bytes.Buffer, p *Profile) (err error) {
   1005 	for {
   1006 		if sectionTrigger(l) == memoryMapSection {
   1007 			break
   1008 		}
   1009 		// Ignore any unrecognized sections.
   1010 		if l, err := b.ReadString('\n'); err != nil {
   1011 			if err != io.EOF {
   1012 				return err
   1013 			}
   1014 			if l == "" {
   1015 				break
   1016 			}
   1017 		}
   1018 	}
   1019 	return p.ParseMemoryMap(b)
   1020 }
   1021 
   1022 // ParseMemoryMap parses a memory map in the format of
   1023 // /proc/self/maps, and overrides the mappings in the current profile.
   1024 // It renumbers the samples and locations in the profile correspondingly.
   1025 func (p *Profile) ParseMemoryMap(rd io.Reader) error {
   1026 	b := bufio.NewReader(rd)
   1027 
   1028 	var attrs []string
   1029 	var r *strings.Replacer
   1030 	const delimiter = "="
   1031 	for {
   1032 		l, err := b.ReadString('\n')
   1033 		if err != nil {
   1034 			if err != io.EOF {
   1035 				return err
   1036 			}
   1037 			if l == "" {
   1038 				break
   1039 			}
   1040 		}
   1041 		if l = strings.TrimSpace(l); l == "" {
   1042 			continue
   1043 		}
   1044 
   1045 		if r != nil {
   1046 			l = r.Replace(l)
   1047 		}
   1048 		m, err := parseMappingEntry(l)
   1049 		if err != nil {
   1050 			if err == errUnrecognized {
   1051 				// Recognize assignments of the form: attr=value, and replace
   1052 				// $attr with value on subsequent mappings.
   1053 				if attr := strings.SplitN(l, delimiter, 2); len(attr) == 2 {
   1054 					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
   1055 					r = strings.NewReplacer(attrs...)
   1056 				}
   1057 				// Ignore any unrecognized entries
   1058 				continue
   1059 			}
   1060 			return err
   1061 		}
   1062 		if m == nil || (m.File == "" && len(p.Mapping) != 0) {
   1063 			// In some cases the first entry may include the address range
   1064 			// but not the name of the file. It should be followed by
   1065 			// another entry with the name.
   1066 			continue
   1067 		}
   1068 		if len(p.Mapping) == 1 && p.Mapping[0].File == "" {
   1069 			// Update the name if this is the entry following that empty one.
   1070 			p.Mapping[0].File = m.File
   1071 			continue
   1072 		}
   1073 		p.Mapping = append(p.Mapping, m)
   1074 	}
   1075 	p.remapLocationIDs()
   1076 	p.remapFunctionIDs()
   1077 	p.remapMappingIDs()
   1078 	return nil
   1079 }
   1080 
   1081 func parseMappingEntry(l string) (*Mapping, error) {
   1082 	mapping := &Mapping{}
   1083 	var err error
   1084 	if me := procMapsRE.FindStringSubmatch(l); len(me) == 9 {
   1085 		if !strings.Contains(me[3], "x") {
   1086 			// Skip non-executable entries.
   1087 			return nil, nil
   1088 		}
   1089 		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
   1090 			return nil, errUnrecognized
   1091 		}
   1092 		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
   1093 			return nil, errUnrecognized
   1094 		}
   1095 		if me[4] != "" {
   1096 			if mapping.Offset, err = strconv.ParseUint(me[4], 16, 64); err != nil {
   1097 				return nil, errUnrecognized
   1098 			}
   1099 		}
   1100 		mapping.File = me[8]
   1101 		return mapping, nil
   1102 	}
   1103 
   1104 	if me := briefMapsRE.FindStringSubmatch(l); len(me) == 6 {
   1105 		if mapping.Start, err = strconv.ParseUint(me[1], 16, 64); err != nil {
   1106 			return nil, errUnrecognized
   1107 		}
   1108 		if mapping.Limit, err = strconv.ParseUint(me[2], 16, 64); err != nil {
   1109 			return nil, errUnrecognized
   1110 		}
   1111 		mapping.File = me[3]
   1112 		if me[5] != "" {
   1113 			if mapping.Offset, err = strconv.ParseUint(me[5], 16, 64); err != nil {
   1114 				return nil, errUnrecognized
   1115 			}
   1116 		}
   1117 		return mapping, nil
   1118 	}
   1119 
   1120 	return nil, errUnrecognized
   1121 }
   1122 
   1123 type sectionType int
   1124 
   1125 const (
   1126 	unrecognizedSection sectionType = iota
   1127 	memoryMapSection
   1128 )
   1129 
   1130 var memoryMapTriggers = []string{
   1131 	"--- Memory map: ---",
   1132 	"MAPPED_LIBRARIES:",
   1133 }
   1134 
   1135 func sectionTrigger(line string) sectionType {
   1136 	for _, trigger := range memoryMapTriggers {
   1137 		if strings.Contains(line, trigger) {
   1138 			return memoryMapSection
   1139 		}
   1140 	}
   1141 	return unrecognizedSection
   1142 }
   1143 
   1144 func (p *Profile) addLegacyFrameInfo() {
   1145 	switch {
   1146 	case isProfileType(p, heapzSampleTypes) ||
   1147 		isProfileType(p, heapzInUseSampleTypes) ||
   1148 		isProfileType(p, heapzAllocSampleTypes):
   1149 		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
   1150 	case isProfileType(p, contentionzSampleTypes):
   1151 		p.DropFrames, p.KeepFrames = lockRxStr, ""
   1152 	default:
   1153 		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
   1154 	}
   1155 }
   1156 
   1157 var heapzSampleTypes = []string{"allocations", "size"} // early Go pprof profiles
   1158 var heapzInUseSampleTypes = []string{"inuse_objects", "inuse_space"}
   1159 var heapzAllocSampleTypes = []string{"alloc_objects", "alloc_space"}
   1160 var contentionzSampleTypes = []string{"contentions", "delay"}
   1161 
   1162 func isProfileType(p *Profile, t []string) bool {
   1163 	st := p.SampleType
   1164 	if len(st) != len(t) {
   1165 		return false
   1166 	}
   1167 
   1168 	for i := range st {
   1169 		if st[i].Type != t[i] {
   1170 			return false
   1171 		}
   1172 	}
   1173 	return true
   1174 }
   1175 
   1176 var allocRxStr = strings.Join([]string{
   1177 	// POSIX entry points.
   1178 	`calloc`,
   1179 	`cfree`,
   1180 	`malloc`,
   1181 	`free`,
   1182 	`memalign`,
   1183 	`do_memalign`,
   1184 	`(__)?posix_memalign`,
   1185 	`pvalloc`,
   1186 	`valloc`,
   1187 	`realloc`,
   1188 
   1189 	// TC malloc.
   1190 	`tcmalloc::.*`,
   1191 	`tc_calloc`,
   1192 	`tc_cfree`,
   1193 	`tc_malloc`,
   1194 	`tc_free`,
   1195 	`tc_memalign`,
   1196 	`tc_posix_memalign`,
   1197 	`tc_pvalloc`,
   1198 	`tc_valloc`,
   1199 	`tc_realloc`,
   1200 	`tc_new`,
   1201 	`tc_delete`,
   1202 	`tc_newarray`,
   1203 	`tc_deletearray`,
   1204 	`tc_new_nothrow`,
   1205 	`tc_newarray_nothrow`,
   1206 
   1207 	// Memory-allocation routines on OS X.
   1208 	`malloc_zone_malloc`,
   1209 	`malloc_zone_calloc`,
   1210 	`malloc_zone_valloc`,
   1211 	`malloc_zone_realloc`,
   1212 	`malloc_zone_memalign`,
   1213 	`malloc_zone_free`,
   1214 
   1215 	// Go runtime
   1216 	`runtime\..*`,
   1217 
   1218 	// Other misc. memory allocation routines
   1219 	`BaseArena::.*`,
   1220 	`(::)?do_malloc_no_errno`,
   1221 	`(::)?do_malloc_pages`,
   1222 	`(::)?do_malloc`,
   1223 	`DoSampledAllocation`,
   1224 	`MallocedMemBlock::MallocedMemBlock`,
   1225 	`_M_allocate`,
   1226 	`__builtin_(vec_)?delete`,
   1227 	`__builtin_(vec_)?new`,
   1228 	`__gnu_cxx::new_allocator::allocate`,
   1229 	`__libc_malloc`,
   1230 	`__malloc_alloc_template::allocate`,
   1231 	`allocate`,
   1232 	`cpp_alloc`,
   1233 	`operator new(\[\])?`,
   1234 	`simple_alloc::allocate`,
   1235 }, `|`)
   1236 
   1237 var allocSkipRxStr = strings.Join([]string{
   1238 	// Preserve Go runtime frames that appear in the middle/bottom of
   1239 	// the stack.
   1240 	`runtime\.panic`,
   1241 	`runtime\.reflectcall`,
   1242 	`runtime\.call[0-9]*`,
   1243 }, `|`)
   1244 
   1245 var cpuProfilerRxStr = strings.Join([]string{
   1246 	`ProfileData::Add`,
   1247 	`ProfileData::prof_handler`,
   1248 	`CpuProfiler::prof_handler`,
   1249 	`__pthread_sighandler`,
   1250 	`__restore`,
   1251 }, `|`)
   1252 
   1253 var lockRxStr = strings.Join([]string{
   1254 	`RecordLockProfileData`,
   1255 	`(base::)?RecordLockProfileData.*`,
   1256 	`(base::)?SubmitMutexProfileData.*`,
   1257 	`(base::)?SubmitSpinLockProfileData.*`,
   1258 	`(Mutex::)?AwaitCommon.*`,
   1259 	`(Mutex::)?Unlock.*`,
   1260 	`(Mutex::)?UnlockSlow.*`,
   1261 	`(Mutex::)?ReaderUnlock.*`,
   1262 	`(MutexLock::)?~MutexLock.*`,
   1263 	`(SpinLock::)?Unlock.*`,
   1264 	`(SpinLock::)?SlowUnlock.*`,
   1265 	`(SpinLockHolder::)?~SpinLockHolder.*`,
   1266 }, `|`)
   1267