Home | History | Annotate | Download | only in profile
      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // This file implements parsers to convert legacy profiles into the
     16 // profile.proto format.
     17 
     18 package profile
     19 
     20 import (
     21 	"bufio"
     22 	"bytes"
     23 	"fmt"
     24 	"io"
     25 	"math"
     26 	"regexp"
     27 	"strconv"
     28 	"strings"
     29 )
     30 
     31 var (
     32 	countStartRE = regexp.MustCompile(`\A(\S+) profile: total \d+\z`)
     33 	countRE      = regexp.MustCompile(`\A(\d+) @(( 0x[0-9a-f]+)+)\z`)
     34 
     35 	heapHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] *@ *(heap[_a-z0-9]*)/?(\d*)`)
     36 	heapSampleRE = regexp.MustCompile(`(-?\d+): *(-?\d+) *\[ *(\d+): *(\d+) *] @([ x0-9a-f]*)`)
     37 
     38 	contentionSampleRE = regexp.MustCompile(`(\d+) *(\d+) @([ x0-9a-f]*)`)
     39 
     40 	hexNumberRE = regexp.MustCompile(`0x[0-9a-f]+`)
     41 
     42 	growthHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ growthz?`)
     43 
     44 	fragmentationHeaderRE = regexp.MustCompile(`heap profile: *(\d+): *(\d+) *\[ *(\d+): *(\d+) *\] @ fragmentationz?`)
     45 
     46 	threadzStartRE = regexp.MustCompile(`--- threadz \d+ ---`)
     47 	threadStartRE  = regexp.MustCompile(`--- Thread ([[:xdigit:]]+) \(name: (.*)/(\d+)\) stack: ---`)
     48 
     49 	// Regular expressions to parse process mappings. Support the format used by Linux /proc/.../maps and other tools.
     50 	// Recommended format:
     51 	// Start   End     object file name     offset(optional)   linker build id
     52 	// 0x40000-0x80000 /path/to/binary      (@FF00)            abc123456
     53 	spaceDigits = `\s+[[:digit:]]+`
     54 	hexPair     = `\s+[[:xdigit:]]+:[[:xdigit:]]+`
     55 	oSpace      = `\s*`
     56 	// Capturing expressions.
     57 	cHex           = `(?:0x)?([[:xdigit:]]+)`
     58 	cHexRange      = `\s*` + cHex + `[\s-]?` + oSpace + cHex + `:?`
     59 	cSpaceString   = `(?:\s+(\S+))?`
     60 	cSpaceHex      = `(?:\s+([[:xdigit:]]+))?`
     61 	cSpaceAtOffset = `(?:\s+\(@([[:xdigit:]]+)\))?`
     62 	cPerm          = `(?:\s+([-rwxp]+))?`
     63 
     64 	procMapsRE  = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceHex + hexPair + spaceDigits + cSpaceString)
     65 	briefMapsRE = regexp.MustCompile(`^` + cHexRange + cPerm + cSpaceString + cSpaceAtOffset + cSpaceHex)
     66 
     67 	// Regular expression to parse log data, of the form:
     68 	// ... file:line] msg...
     69 	logInfoRE = regexp.MustCompile(`^[^\[\]]+:[0-9]+]\s`)
     70 )
     71 
     72 func isSpaceOrComment(line string) bool {
     73 	trimmed := strings.TrimSpace(line)
     74 	return len(trimmed) == 0 || trimmed[0] == '#'
     75 }
     76 
     77 // parseGoCount parses a Go count profile (e.g., threadcreate or
     78 // goroutine) and returns a new Profile.
     79 func parseGoCount(b []byte) (*Profile, error) {
     80 	s := bufio.NewScanner(bytes.NewBuffer(b))
     81 	// Skip comments at the beginning of the file.
     82 	for s.Scan() && isSpaceOrComment(s.Text()) {
     83 	}
     84 	if err := s.Err(); err != nil {
     85 		return nil, err
     86 	}
     87 	m := countStartRE.FindStringSubmatch(s.Text())
     88 	if m == nil {
     89 		return nil, errUnrecognized
     90 	}
     91 	profileType := m[1]
     92 	p := &Profile{
     93 		PeriodType: &ValueType{Type: profileType, Unit: "count"},
     94 		Period:     1,
     95 		SampleType: []*ValueType{{Type: profileType, Unit: "count"}},
     96 	}
     97 	locations := make(map[uint64]*Location)
     98 	for s.Scan() {
     99 		line := s.Text()
    100 		if isSpaceOrComment(line) {
    101 			continue
    102 		}
    103 		if strings.HasPrefix(line, "---") {
    104 			break
    105 		}
    106 		m := countRE.FindStringSubmatch(line)
    107 		if m == nil {
    108 			return nil, errMalformed
    109 		}
    110 		n, err := strconv.ParseInt(m[1], 0, 64)
    111 		if err != nil {
    112 			return nil, errMalformed
    113 		}
    114 		fields := strings.Fields(m[2])
    115 		locs := make([]*Location, 0, len(fields))
    116 		for _, stk := range fields {
    117 			addr, err := strconv.ParseUint(stk, 0, 64)
    118 			if err != nil {
    119 				return nil, errMalformed
    120 			}
    121 			// Adjust all frames by -1 to land on top of the call instruction.
    122 			addr--
    123 			loc := locations[addr]
    124 			if loc == nil {
    125 				loc = &Location{
    126 					Address: addr,
    127 				}
    128 				locations[addr] = loc
    129 				p.Location = append(p.Location, loc)
    130 			}
    131 			locs = append(locs, loc)
    132 		}
    133 		p.Sample = append(p.Sample, &Sample{
    134 			Location: locs,
    135 			Value:    []int64{n},
    136 		})
    137 	}
    138 	if err := s.Err(); err != nil {
    139 		return nil, err
    140 	}
    141 
    142 	if err := parseAdditionalSections(s, p); err != nil {
    143 		return nil, err
    144 	}
    145 	return p, nil
    146 }
    147 
    148 // remapLocationIDs ensures there is a location for each address
    149 // referenced by a sample, and remaps the samples to point to the new
    150 // location ids.
    151 func (p *Profile) remapLocationIDs() {
    152 	seen := make(map[*Location]bool, len(p.Location))
    153 	var locs []*Location
    154 
    155 	for _, s := range p.Sample {
    156 		for _, l := range s.Location {
    157 			if seen[l] {
    158 				continue
    159 			}
    160 			l.ID = uint64(len(locs) + 1)
    161 			locs = append(locs, l)
    162 			seen[l] = true
    163 		}
    164 	}
    165 	p.Location = locs
    166 }
    167 
    168 func (p *Profile) remapFunctionIDs() {
    169 	seen := make(map[*Function]bool, len(p.Function))
    170 	var fns []*Function
    171 
    172 	for _, l := range p.Location {
    173 		for _, ln := range l.Line {
    174 			fn := ln.Function
    175 			if fn == nil || seen[fn] {
    176 				continue
    177 			}
    178 			fn.ID = uint64(len(fns) + 1)
    179 			fns = append(fns, fn)
    180 			seen[fn] = true
    181 		}
    182 	}
    183 	p.Function = fns
    184 }
    185 
    186 // remapMappingIDs matches location addresses with existing mappings
    187 // and updates them appropriately. This is O(N*M), if this ever shows
    188 // up as a bottleneck, evaluate sorting the mappings and doing a
    189 // binary search, which would make it O(N*log(M)).
    190 func (p *Profile) remapMappingIDs() {
    191 	// Some profile handlers will incorrectly set regions for the main
    192 	// executable if its section is remapped. Fix them through heuristics.
    193 
    194 	if len(p.Mapping) > 0 {
    195 		// Remove the initial mapping if named '/anon_hugepage' and has a
    196 		// consecutive adjacent mapping.
    197 		if m := p.Mapping[0]; strings.HasPrefix(m.File, "/anon_hugepage") {
    198 			if len(p.Mapping) > 1 && m.Limit == p.Mapping[1].Start {
    199 				p.Mapping = p.Mapping[1:]
    200 			}
    201 		}
    202 	}
    203 
    204 	// Subtract the offset from the start of the main mapping if it
    205 	// ends up at a recognizable start address.
    206 	if len(p.Mapping) > 0 {
    207 		const expectedStart = 0x400000
    208 		if m := p.Mapping[0]; m.Start-m.Offset == expectedStart {
    209 			m.Start = expectedStart
    210 			m.Offset = 0
    211 		}
    212 	}
    213 
    214 	// Associate each location with an address to the corresponding
    215 	// mapping. Create fake mapping if a suitable one isn't found.
    216 	var fake *Mapping
    217 nextLocation:
    218 	for _, l := range p.Location {
    219 		a := l.Address
    220 		if l.Mapping != nil || a == 0 {
    221 			continue
    222 		}
    223 		for _, m := range p.Mapping {
    224 			if m.Start <= a && a < m.Limit {
    225 				l.Mapping = m
    226 				continue nextLocation
    227 			}
    228 		}
    229 		// Work around legacy handlers failing to encode the first
    230 		// part of mappings split into adjacent ranges.
    231 		for _, m := range p.Mapping {
    232 			if m.Offset != 0 && m.Start-m.Offset <= a && a < m.Start {
    233 				m.Start -= m.Offset
    234 				m.Offset = 0
    235 				l.Mapping = m
    236 				continue nextLocation
    237 			}
    238 		}
    239 		// If there is still no mapping, create a fake one.
    240 		// This is important for the Go legacy handler, which produced
    241 		// no mappings.
    242 		if fake == nil {
    243 			fake = &Mapping{
    244 				ID:    1,
    245 				Limit: ^uint64(0),
    246 			}
    247 			p.Mapping = append(p.Mapping, fake)
    248 		}
    249 		l.Mapping = fake
    250 	}
    251 
    252 	// Reset all mapping IDs.
    253 	for i, m := range p.Mapping {
    254 		m.ID = uint64(i + 1)
    255 	}
    256 }
    257 
    258 var cpuInts = []func([]byte) (uint64, []byte){
    259 	get32l,
    260 	get32b,
    261 	get64l,
    262 	get64b,
    263 }
    264 
    265 func get32l(b []byte) (uint64, []byte) {
    266 	if len(b) < 4 {
    267 		return 0, nil
    268 	}
    269 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24, b[4:]
    270 }
    271 
    272 func get32b(b []byte) (uint64, []byte) {
    273 	if len(b) < 4 {
    274 		return 0, nil
    275 	}
    276 	return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24, b[4:]
    277 }
    278 
    279 func get64l(b []byte) (uint64, []byte) {
    280 	if len(b) < 8 {
    281 		return 0, nil
    282 	}
    283 	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56, b[8:]
    284 }
    285 
    286 func get64b(b []byte) (uint64, []byte) {
    287 	if len(b) < 8 {
    288 		return 0, nil
    289 	}
    290 	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56, b[8:]
    291 }
    292 
    293 // parseCPU parses a profilez legacy profile and returns a newly
    294 // populated Profile.
    295 //
    296 // The general format for profilez samples is a sequence of words in
    297 // binary format. The first words are a header with the following data:
    298 //   1st word -- 0
    299 //   2nd word -- 3
    300 //   3rd word -- 0 if a c++ application, 1 if a java application.
    301 //   4th word -- Sampling period (in microseconds).
    302 //   5th word -- Padding.
    303 func parseCPU(b []byte) (*Profile, error) {
    304 	var parse func([]byte) (uint64, []byte)
    305 	var n1, n2, n3, n4, n5 uint64
    306 	for _, parse = range cpuInts {
    307 		var tmp []byte
    308 		n1, tmp = parse(b)
    309 		n2, tmp = parse(tmp)
    310 		n3, tmp = parse(tmp)
    311 		n4, tmp = parse(tmp)
    312 		n5, tmp = parse(tmp)
    313 
    314 		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 0 && n4 > 0 && n5 == 0 {
    315 			b = tmp
    316 			return cpuProfile(b, int64(n4), parse)
    317 		}
    318 		if tmp != nil && n1 == 0 && n2 == 3 && n3 == 1 && n4 > 0 && n5 == 0 {
    319 			b = tmp
    320 			return javaCPUProfile(b, int64(n4), parse)
    321 		}
    322 	}
    323 	return nil, errUnrecognized
    324 }
    325 
    326 // cpuProfile returns a new Profile from C++ profilez data.
    327 // b is the profile bytes after the header, period is the profiling
    328 // period, and parse is a function to parse 8-byte chunks from the
    329 // profile in its native endianness.
    330 func cpuProfile(b []byte, period int64, parse func(b []byte) (uint64, []byte)) (*Profile, error) {
    331 	p := &Profile{
    332 		Period:     period * 1000,
    333 		PeriodType: &ValueType{Type: "cpu", Unit: "nanoseconds"},
    334 		SampleType: []*ValueType{
    335 			{Type: "samples", Unit: "count"},
    336 			{Type: "cpu", Unit: "nanoseconds"},
    337 		},
    338 	}
    339 	var err error
    340 	if b, _, err = parseCPUSamples(b, parse, true, p); err != nil {
    341 		return nil, err
    342 	}
    343 
    344 	// If *most* samples have the same second-to-the-bottom frame, it
    345 	// strongly suggests that it is an uninteresting artifact of
    346 	// measurement -- a stack frame pushed by the signal handler. The
    347 	// bottom frame is always correct as it is picked up from the signal
    348 	// structure, not the stack. Check if this is the case and if so,
    349 	// remove.
    350 
    351 	// Remove up to two frames.
    352 	maxiter := 2
    353 	// Allow one different sample for this many samples with the same
    354 	// second-to-last frame.
    355 	similarSamples := 32
    356 	margin := len(p.Sample) / similarSamples
    357 
    358 	for iter := 0; iter < maxiter; iter++ {
    359 		addr1 := make(map[uint64]int)
    360 		for _, s := range p.Sample {
    361 			if len(s.Location) > 1 {
    362 				a := s.Location[1].Address
    363 				addr1[a] = addr1[a] + 1
    364 			}
    365 		}
    366 
    367 		for id1, count := range addr1 {
    368 			if count >= len(p.Sample)-margin {
    369 				// Found uninteresting frame, strip it out from all samples
    370 				for _, s := range p.Sample {
    371 					if len(s.Location) > 1 && s.Location[1].Address == id1 {
    372 						s.Location = append(s.Location[:1], s.Location[2:]...)
    373 					}
    374 				}
    375 				break
    376 			}
    377 		}
    378 	}
    379 
    380 	if err := p.ParseMemoryMap(bytes.NewBuffer(b)); err != nil {
    381 		return nil, err
    382 	}
    383 
    384 	cleanupDuplicateLocations(p)
    385 	return p, nil
    386 }
    387 
    388 func cleanupDuplicateLocations(p *Profile) {
    389 	// The profile handler may duplicate the leaf frame, because it gets
    390 	// its address both from stack unwinding and from the signal
    391 	// context. Detect this and delete the duplicate, which has been
    392 	// adjusted by -1. The leaf address should not be adjusted as it is
    393 	// not a call.
    394 	for _, s := range p.Sample {
    395 		if len(s.Location) > 1 && s.Location[0].Address == s.Location[1].Address+1 {
    396 			s.Location = append(s.Location[:1], s.Location[2:]...)
    397 		}
    398 	}
    399 }
    400 
    401 // parseCPUSamples parses a collection of profilez samples from a
    402 // profile.
    403 //
    404 // profilez samples are a repeated sequence of stack frames of the
    405 // form:
    406 //    1st word -- The number of times this stack was encountered.
    407 //    2nd word -- The size of the stack (StackSize).
    408 //    3rd word -- The first address on the stack.
    409 //    ...
    410 //    StackSize + 2 -- The last address on the stack
    411 // The last stack trace is of the form:
    412 //   1st word -- 0
    413 //   2nd word -- 1
    414 //   3rd word -- 0
    415 //
    416 // Addresses from stack traces may point to the next instruction after
    417 // each call. Optionally adjust by -1 to land somewhere on the actual
    418 // call (except for the leaf, which is not a call).
    419 func parseCPUSamples(b []byte, parse func(b []byte) (uint64, []byte), adjust bool, p *Profile) ([]byte, map[uint64]*Location, error) {
    420 	locs := make(map[uint64]*Location)
    421 	for len(b) > 0 {
    422 		var count, nstk uint64
    423 		count, b = parse(b)
    424 		nstk, b = parse(b)
    425 		if b == nil || nstk > uint64(len(b)/4) {
    426 			return nil, nil, errUnrecognized
    427 		}
    428 		var sloc []*Location
    429 		addrs := make([]uint64, nstk)
    430 		for i := 0; i < int(nstk); i++ {
    431 			addrs[i], b = parse(b)
    432 		}
    433 
    434 		if count == 0 && nstk == 1 && addrs[0] == 0 {
    435 			// End of data marker
    436 			break
    437 		}
    438 		for i, addr := range addrs {
    439 			if adjust && i > 0 {
    440 				addr--
    441 			}
    442 			loc := locs[addr]
    443 			if loc == nil {
    444 				loc = &Location{
    445 					Address: addr,
    446 				}
    447 				locs[addr] = loc
    448 				p.Location = append(p.Location, loc)
    449 			}
    450 			sloc = append(sloc, loc)
    451 		}
    452 		p.Sample = append(p.Sample,
    453 			&Sample{
    454 				Value:    []int64{int64(count), int64(count) * p.Period},
    455 				Location: sloc,
    456 			})
    457 	}
    458 	// Reached the end without finding the EOD marker.
    459 	return b, locs, nil
    460 }
    461 
    462 // parseHeap parses a heapz legacy or a growthz profile and
    463 // returns a newly populated Profile.
    464 func parseHeap(b []byte) (p *Profile, err error) {
    465 	s := bufio.NewScanner(bytes.NewBuffer(b))
    466 	if !s.Scan() {
    467 		if err := s.Err(); err != nil {
    468 			return nil, err
    469 		}
    470 		return nil, errUnrecognized
    471 	}
    472 	p = &Profile{}
    473 
    474 	sampling := ""
    475 	hasAlloc := false
    476 
    477 	line := s.Text()
    478 	p.PeriodType = &ValueType{Type: "space", Unit: "bytes"}
    479 	if header := heapHeaderRE.FindStringSubmatch(line); header != nil {
    480 		sampling, p.Period, hasAlloc, err = parseHeapHeader(line)
    481 		if err != nil {
    482 			return nil, err
    483 		}
    484 	} else if header = growthHeaderRE.FindStringSubmatch(line); header != nil {
    485 		p.Period = 1
    486 	} else if header = fragmentationHeaderRE.FindStringSubmatch(line); header != nil {
    487 		p.Period = 1
    488 	} else {
    489 		return nil, errUnrecognized
    490 	}
    491 
    492 	if hasAlloc {
    493 		// Put alloc before inuse so that default pprof selection
    494 		// will prefer inuse_space.
    495 		p.SampleType = []*ValueType{
    496 			{Type: "alloc_objects", Unit: "count"},
    497 			{Type: "alloc_space", Unit: "bytes"},
    498 			{Type: "inuse_objects", Unit: "count"},
    499 			{Type: "inuse_space", Unit: "bytes"},
    500 		}
    501 	} else {
    502 		p.SampleType = []*ValueType{
    503 			{Type: "objects", Unit: "count"},
    504 			{Type: "space", Unit: "bytes"},
    505 		}
    506 	}
    507 
    508 	locs := make(map[uint64]*Location)
    509 	for s.Scan() {
    510 		line := strings.TrimSpace(s.Text())
    511 
    512 		if isSpaceOrComment(line) {
    513 			continue
    514 		}
    515 
    516 		if isMemoryMapSentinel(line) {
    517 			break
    518 		}
    519 
    520 		value, blocksize, addrs, err := parseHeapSample(line, p.Period, sampling, hasAlloc)
    521 		if err != nil {
    522 			return nil, err
    523 		}
    524 
    525 		var sloc []*Location
    526 		for _, addr := range addrs {
    527 			// Addresses from stack traces point to the next instruction after
    528 			// each call. Adjust by -1 to land somewhere on the actual call.
    529 			addr--
    530 			loc := locs[addr]
    531 			if locs[addr] == nil {
    532 				loc = &Location{
    533 					Address: addr,
    534 				}
    535 				p.Location = append(p.Location, loc)
    536 				locs[addr] = loc
    537 			}
    538 			sloc = append(sloc, loc)
    539 		}
    540 
    541 		p.Sample = append(p.Sample, &Sample{
    542 			Value:    value,
    543 			Location: sloc,
    544 			NumLabel: map[string][]int64{"bytes": {blocksize}},
    545 		})
    546 	}
    547 	if err := s.Err(); err != nil {
    548 		return nil, err
    549 	}
    550 	if err := parseAdditionalSections(s, p); err != nil {
    551 		return nil, err
    552 	}
    553 	return p, nil
    554 }
    555 
    556 func parseHeapHeader(line string) (sampling string, period int64, hasAlloc bool, err error) {
    557 	header := heapHeaderRE.FindStringSubmatch(line)
    558 	if header == nil {
    559 		return "", 0, false, errUnrecognized
    560 	}
    561 
    562 	if len(header[6]) > 0 {
    563 		if period, err = strconv.ParseInt(header[6], 10, 64); err != nil {
    564 			return "", 0, false, errUnrecognized
    565 		}
    566 	}
    567 
    568 	if (header[3] != header[1] && header[3] != "0") || (header[4] != header[2] && header[4] != "0") {
    569 		hasAlloc = true
    570 	}
    571 
    572 	switch header[5] {
    573 	case "heapz_v2", "heap_v2":
    574 		return "v2", period, hasAlloc, nil
    575 	case "heapprofile":
    576 		return "", 1, hasAlloc, nil
    577 	case "heap":
    578 		return "v2", period / 2, hasAlloc, nil
    579 	default:
    580 		return "", 0, false, errUnrecognized
    581 	}
    582 }
    583 
    584 // parseHeapSample parses a single row from a heap profile into a new Sample.
    585 func parseHeapSample(line string, rate int64, sampling string, includeAlloc bool) (value []int64, blocksize int64, addrs []uint64, err error) {
    586 	sampleData := heapSampleRE.FindStringSubmatch(line)
    587 	if len(sampleData) != 6 {
    588 		return nil, 0, nil, fmt.Errorf("unexpected number of sample values: got %d, want 6", len(sampleData))
    589 	}
    590 
    591 	// This is a local-scoped helper function to avoid needing to pass
    592 	// around rate, sampling and many return parameters.
    593 	addValues := func(countString, sizeString string, label string) error {
    594 		count, err := strconv.ParseInt(countString, 10, 64)
    595 		if err != nil {
    596 			return fmt.Errorf("malformed sample: %s: %v", line, err)
    597 		}
    598 		size, err := strconv.ParseInt(sizeString, 10, 64)
    599 		if err != nil {
    600 			return fmt.Errorf("malformed sample: %s: %v", line, err)
    601 		}
    602 		if count == 0 && size != 0 {
    603 			return fmt.Errorf("%s count was 0 but %s bytes was %d", label, label, size)
    604 		}
    605 		if count != 0 {
    606 			blocksize = size / count
    607 			if sampling == "v2" {
    608 				count, size = scaleHeapSample(count, size, rate)
    609 			}
    610 		}
    611 		value = append(value, count, size)
    612 		return nil
    613 	}
    614 
    615 	if includeAlloc {
    616 		if err := addValues(sampleData[3], sampleData[4], "allocation"); err != nil {
    617 			return nil, 0, nil, err
    618 		}
    619 	}
    620 
    621 	if err := addValues(sampleData[1], sampleData[2], "inuse"); err != nil {
    622 		return nil, 0, nil, err
    623 	}
    624 
    625 	addrs, err = parseHexAddresses(sampleData[5])
    626 	if err != nil {
    627 		return nil, 0, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
    628 	}
    629 
    630 	return value, blocksize, addrs, nil
    631 }
    632 
    633 // parseHexAddresses extracts hex numbers from a string, attempts to convert
    634 // each to an unsigned 64-bit number and returns the resulting numbers as a
    635 // slice, or an error if the string contains hex numbers which are too large to
    636 // handle (which means a malformed profile).
    637 func parseHexAddresses(s string) ([]uint64, error) {
    638 	hexStrings := hexNumberRE.FindAllString(s, -1)
    639 	var addrs []uint64
    640 	for _, s := range hexStrings {
    641 		if addr, err := strconv.ParseUint(s, 0, 64); err == nil {
    642 			addrs = append(addrs, addr)
    643 		} else {
    644 			return nil, fmt.Errorf("failed to parse as hex 64-bit number: %s", s)
    645 		}
    646 	}
    647 	return addrs, nil
    648 }
    649 
    650 // scaleHeapSample adjusts the data from a heapz Sample to
    651 // account for its probability of appearing in the collected
    652 // data. heapz profiles are a sampling of the memory allocations
    653 // requests in a program. We estimate the unsampled value by dividing
    654 // each collected sample by its probability of appearing in the
    655 // profile. heapz v2 profiles rely on a poisson process to determine
    656 // which samples to collect, based on the desired average collection
    657 // rate R. The probability of a sample of size S to appear in that
    658 // profile is 1-exp(-S/R).
    659 func scaleHeapSample(count, size, rate int64) (int64, int64) {
    660 	if count == 0 || size == 0 {
    661 		return 0, 0
    662 	}
    663 
    664 	if rate <= 1 {
    665 		// if rate==1 all samples were collected so no adjustment is needed.
    666 		// if rate<1 treat as unknown and skip scaling.
    667 		return count, size
    668 	}
    669 
    670 	avgSize := float64(size) / float64(count)
    671 	scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
    672 
    673 	return int64(float64(count) * scale), int64(float64(size) * scale)
    674 }
    675 
    676 // parseContention parses a mutex or contention profile. There are 2 cases:
    677 // "--- contentionz " for legacy C++ profiles (and backwards compatibility)
    678 // "--- mutex:" or "--- contention:" for profiles generated by the Go runtime.
    679 func parseContention(b []byte) (*Profile, error) {
    680 	s := bufio.NewScanner(bytes.NewBuffer(b))
    681 	if !s.Scan() {
    682 		if err := s.Err(); err != nil {
    683 			return nil, err
    684 		}
    685 		return nil, errUnrecognized
    686 	}
    687 
    688 	switch l := s.Text(); {
    689 	case strings.HasPrefix(l, "--- contentionz "):
    690 	case strings.HasPrefix(l, "--- mutex:"):
    691 	case strings.HasPrefix(l, "--- contention:"):
    692 	default:
    693 		return nil, errUnrecognized
    694 	}
    695 
    696 	p := &Profile{
    697 		PeriodType: &ValueType{Type: "contentions", Unit: "count"},
    698 		Period:     1,
    699 		SampleType: []*ValueType{
    700 			{Type: "contentions", Unit: "count"},
    701 			{Type: "delay", Unit: "nanoseconds"},
    702 		},
    703 	}
    704 
    705 	var cpuHz int64
    706 	// Parse text of the form "attribute = value" before the samples.
    707 	const delimiter = "="
    708 	for s.Scan() {
    709 		line := s.Text()
    710 		if line = strings.TrimSpace(line); isSpaceOrComment(line) {
    711 			continue
    712 		}
    713 		if strings.HasPrefix(line, "---") {
    714 			break
    715 		}
    716 		attr := strings.SplitN(line, delimiter, 2)
    717 		if len(attr) != 2 {
    718 			break
    719 		}
    720 		key, val := strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1])
    721 		var err error
    722 		switch key {
    723 		case "cycles/second":
    724 			if cpuHz, err = strconv.ParseInt(val, 0, 64); err != nil {
    725 				return nil, errUnrecognized
    726 			}
    727 		case "sampling period":
    728 			if p.Period, err = strconv.ParseInt(val, 0, 64); err != nil {
    729 				return nil, errUnrecognized
    730 			}
    731 		case "ms since reset":
    732 			ms, err := strconv.ParseInt(val, 0, 64)
    733 			if err != nil {
    734 				return nil, errUnrecognized
    735 			}
    736 			p.DurationNanos = ms * 1000 * 1000
    737 		case "format":
    738 			// CPP contentionz profiles don't have format.
    739 			return nil, errUnrecognized
    740 		case "resolution":
    741 			// CPP contentionz profiles don't have resolution.
    742 			return nil, errUnrecognized
    743 		case "discarded samples":
    744 		default:
    745 			return nil, errUnrecognized
    746 		}
    747 	}
    748 	if err := s.Err(); err != nil {
    749 		return nil, err
    750 	}
    751 
    752 	locs := make(map[uint64]*Location)
    753 	for {
    754 		line := strings.TrimSpace(s.Text())
    755 		if strings.HasPrefix(line, "---") {
    756 			break
    757 		}
    758 		if !isSpaceOrComment(line) {
    759 			value, addrs, err := parseContentionSample(line, p.Period, cpuHz)
    760 			if err != nil {
    761 				return nil, err
    762 			}
    763 			var sloc []*Location
    764 			for _, addr := range addrs {
    765 				// Addresses from stack traces point to the next instruction after
    766 				// each call. Adjust by -1 to land somewhere on the actual call.
    767 				addr--
    768 				loc := locs[addr]
    769 				if locs[addr] == nil {
    770 					loc = &Location{
    771 						Address: addr,
    772 					}
    773 					p.Location = append(p.Location, loc)
    774 					locs[addr] = loc
    775 				}
    776 				sloc = append(sloc, loc)
    777 			}
    778 			p.Sample = append(p.Sample, &Sample{
    779 				Value:    value,
    780 				Location: sloc,
    781 			})
    782 		}
    783 		if !s.Scan() {
    784 			break
    785 		}
    786 	}
    787 	if err := s.Err(); err != nil {
    788 		return nil, err
    789 	}
    790 
    791 	if err := parseAdditionalSections(s, p); err != nil {
    792 		return nil, err
    793 	}
    794 
    795 	return p, nil
    796 }
    797 
    798 // parseContentionSample parses a single row from a contention profile
    799 // into a new Sample.
    800 func parseContentionSample(line string, period, cpuHz int64) (value []int64, addrs []uint64, err error) {
    801 	sampleData := contentionSampleRE.FindStringSubmatch(line)
    802 	if sampleData == nil {
    803 		return nil, nil, errUnrecognized
    804 	}
    805 
    806 	v1, err := strconv.ParseInt(sampleData[1], 10, 64)
    807 	if err != nil {
    808 		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
    809 	}
    810 	v2, err := strconv.ParseInt(sampleData[2], 10, 64)
    811 	if err != nil {
    812 		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
    813 	}
    814 
    815 	// Unsample values if period and cpuHz are available.
    816 	// - Delays are scaled to cycles and then to nanoseconds.
    817 	// - Contentions are scaled to cycles.
    818 	if period > 0 {
    819 		if cpuHz > 0 {
    820 			cpuGHz := float64(cpuHz) / 1e9
    821 			v1 = int64(float64(v1) * float64(period) / cpuGHz)
    822 		}
    823 		v2 = v2 * period
    824 	}
    825 
    826 	value = []int64{v2, v1}
    827 	addrs, err = parseHexAddresses(sampleData[3])
    828 	if err != nil {
    829 		return nil, nil, fmt.Errorf("malformed sample: %s: %v", line, err)
    830 	}
    831 
    832 	return value, addrs, nil
    833 }
    834 
    835 // parseThread parses a Threadz profile and returns a new Profile.
    836 func parseThread(b []byte) (*Profile, error) {
    837 	s := bufio.NewScanner(bytes.NewBuffer(b))
    838 	// Skip past comments and empty lines seeking a real header.
    839 	for s.Scan() && isSpaceOrComment(s.Text()) {
    840 	}
    841 
    842 	line := s.Text()
    843 	if m := threadzStartRE.FindStringSubmatch(line); m != nil {
    844 		// Advance over initial comments until first stack trace.
    845 		for s.Scan() {
    846 			if line = s.Text(); isMemoryMapSentinel(line) || strings.HasPrefix(line, "-") {
    847 				break
    848 			}
    849 		}
    850 	} else if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
    851 		return nil, errUnrecognized
    852 	}
    853 
    854 	p := &Profile{
    855 		SampleType: []*ValueType{{Type: "thread", Unit: "count"}},
    856 		PeriodType: &ValueType{Type: "thread", Unit: "count"},
    857 		Period:     1,
    858 	}
    859 
    860 	locs := make(map[uint64]*Location)
    861 	// Recognize each thread and populate profile samples.
    862 	for !isMemoryMapSentinel(line) {
    863 		if strings.HasPrefix(line, "---- no stack trace for") {
    864 			line = ""
    865 			break
    866 		}
    867 		if t := threadStartRE.FindStringSubmatch(line); len(t) != 4 {
    868 			return nil, errUnrecognized
    869 		}
    870 
    871 		var addrs []uint64
    872 		var err error
    873 		line, addrs, err = parseThreadSample(s)
    874 		if err != nil {
    875 			return nil, err
    876 		}
    877 		if len(addrs) == 0 {
    878 			// We got a --same as previous threads--. Bump counters.
    879 			if len(p.Sample) > 0 {
    880 				s := p.Sample[len(p.Sample)-1]
    881 				s.Value[0]++
    882 			}
    883 			continue
    884 		}
    885 
    886 		var sloc []*Location
    887 		for i, addr := range addrs {
    888 			// Addresses from stack traces point to the next instruction after
    889 			// each call. Adjust by -1 to land somewhere on the actual call
    890 			// (except for the leaf, which is not a call).
    891 			if i > 0 {
    892 				addr--
    893 			}
    894 			loc := locs[addr]
    895 			if locs[addr] == nil {
    896 				loc = &Location{
    897 					Address: addr,
    898 				}
    899 				p.Location = append(p.Location, loc)
    900 				locs[addr] = loc
    901 			}
    902 			sloc = append(sloc, loc)
    903 		}
    904 
    905 		p.Sample = append(p.Sample, &Sample{
    906 			Value:    []int64{1},
    907 			Location: sloc,
    908 		})
    909 	}
    910 
    911 	if err := parseAdditionalSections(s, p); err != nil {
    912 		return nil, err
    913 	}
    914 
    915 	cleanupDuplicateLocations(p)
    916 	return p, nil
    917 }
    918 
    919 // parseThreadSample parses a symbolized or unsymbolized stack trace.
    920 // Returns the first line after the traceback, the sample (or nil if
    921 // it hits a 'same-as-previous' marker) and an error.
    922 func parseThreadSample(s *bufio.Scanner) (nextl string, addrs []uint64, err error) {
    923 	var line string
    924 	sameAsPrevious := false
    925 	for s.Scan() {
    926 		line = strings.TrimSpace(s.Text())
    927 		if line == "" {
    928 			continue
    929 		}
    930 
    931 		if strings.HasPrefix(line, "---") {
    932 			break
    933 		}
    934 		if strings.Contains(line, "same as previous thread") {
    935 			sameAsPrevious = true
    936 			continue
    937 		}
    938 
    939 		curAddrs, err := parseHexAddresses(line)
    940 		if err != nil {
    941 			return "", nil, fmt.Errorf("malformed sample: %s: %v", line, err)
    942 		}
    943 		addrs = append(addrs, curAddrs...)
    944 	}
    945 	if err := s.Err(); err != nil {
    946 		return "", nil, err
    947 	}
    948 	if sameAsPrevious {
    949 		return line, nil, nil
    950 	}
    951 	return line, addrs, nil
    952 }
    953 
    954 // parseAdditionalSections parses any additional sections in the
    955 // profile, ignoring any unrecognized sections.
    956 func parseAdditionalSections(s *bufio.Scanner, p *Profile) error {
    957 	for !isMemoryMapSentinel(s.Text()) && s.Scan() {
    958 	}
    959 	if err := s.Err(); err != nil {
    960 		return err
    961 	}
    962 	return p.ParseMemoryMapFromScanner(s)
    963 }
    964 
    965 // ParseProcMaps parses a memory map in the format of /proc/self/maps.
    966 // ParseMemoryMap should be called after setting on a profile to
    967 // associate locations to the corresponding mapping based on their
    968 // address.
    969 func ParseProcMaps(rd io.Reader) ([]*Mapping, error) {
    970 	s := bufio.NewScanner(rd)
    971 	return parseProcMapsFromScanner(s)
    972 }
    973 
    974 func parseProcMapsFromScanner(s *bufio.Scanner) ([]*Mapping, error) {
    975 	var mapping []*Mapping
    976 
    977 	var attrs []string
    978 	const delimiter = "="
    979 	r := strings.NewReplacer()
    980 	for s.Scan() {
    981 		line := r.Replace(removeLoggingInfo(s.Text()))
    982 		m, err := parseMappingEntry(line)
    983 		if err != nil {
    984 			if err == errUnrecognized {
    985 				// Recognize assignments of the form: attr=value, and replace
    986 				// $attr with value on subsequent mappings.
    987 				if attr := strings.SplitN(line, delimiter, 2); len(attr) == 2 {
    988 					attrs = append(attrs, "$"+strings.TrimSpace(attr[0]), strings.TrimSpace(attr[1]))
    989 					r = strings.NewReplacer(attrs...)
    990 				}
    991 				// Ignore any unrecognized entries
    992 				continue
    993 			}
    994 			return nil, err
    995 		}
    996 		if m == nil {
    997 			continue
    998 		}
    999 		mapping = append(mapping, m)
   1000 	}
   1001 	if err := s.Err(); err != nil {
   1002 		return nil, err
   1003 	}
   1004 	return mapping, nil
   1005 }
   1006 
   1007 // removeLoggingInfo detects and removes log prefix entries generated
   1008 // by the glog package. If no logging prefix is detected, the string
   1009 // is returned unmodified.
   1010 func removeLoggingInfo(line string) string {
   1011 	if match := logInfoRE.FindStringIndex(line); match != nil {
   1012 		return line[match[1]:]
   1013 	}
   1014 	return line
   1015 }
   1016 
   1017 // ParseMemoryMap parses a memory map in the format of
   1018 // /proc/self/maps, and overrides the mappings in the current profile.
   1019 // It renumbers the samples and locations in the profile correspondingly.
   1020 func (p *Profile) ParseMemoryMap(rd io.Reader) error {
   1021 	return p.ParseMemoryMapFromScanner(bufio.NewScanner(rd))
   1022 }
   1023 
   1024 // ParseMemoryMapFromScanner parses a memory map in the format of
   1025 // /proc/self/maps or a variety of legacy format, and overrides the
   1026 // mappings in the current profile.  It renumbers the samples and
   1027 // locations in the profile correspondingly.
   1028 func (p *Profile) ParseMemoryMapFromScanner(s *bufio.Scanner) error {
   1029 	mapping, err := parseProcMapsFromScanner(s)
   1030 	if err != nil {
   1031 		return err
   1032 	}
   1033 	p.Mapping = append(p.Mapping, mapping...)
   1034 	p.massageMappings()
   1035 	p.remapLocationIDs()
   1036 	p.remapFunctionIDs()
   1037 	p.remapMappingIDs()
   1038 	return nil
   1039 }
   1040 
   1041 func parseMappingEntry(l string) (*Mapping, error) {
   1042 	var start, end, perm, file, offset, buildID string
   1043 	if me := procMapsRE.FindStringSubmatch(l); len(me) == 6 {
   1044 		start, end, perm, offset, file = me[1], me[2], me[3], me[4], me[5]
   1045 	} else if me := briefMapsRE.FindStringSubmatch(l); len(me) == 7 {
   1046 		start, end, perm, file, offset, buildID = me[1], me[2], me[3], me[4], me[5], me[6]
   1047 	} else {
   1048 		return nil, errUnrecognized
   1049 	}
   1050 
   1051 	var err error
   1052 	mapping := &Mapping{
   1053 		File:    file,
   1054 		BuildID: buildID,
   1055 	}
   1056 	if perm != "" && !strings.Contains(perm, "x") {
   1057 		// Skip non-executable entries.
   1058 		return nil, nil
   1059 	}
   1060 	if mapping.Start, err = strconv.ParseUint(start, 16, 64); err != nil {
   1061 		return nil, errUnrecognized
   1062 	}
   1063 	if mapping.Limit, err = strconv.ParseUint(end, 16, 64); err != nil {
   1064 		return nil, errUnrecognized
   1065 	}
   1066 	if offset != "" {
   1067 		if mapping.Offset, err = strconv.ParseUint(offset, 16, 64); err != nil {
   1068 			return nil, errUnrecognized
   1069 		}
   1070 	}
   1071 	return mapping, nil
   1072 }
   1073 
   1074 var memoryMapSentinels = []string{
   1075 	"--- Memory map: ---",
   1076 	"MAPPED_LIBRARIES:",
   1077 }
   1078 
   1079 // isMemoryMapSentinel returns true if the string contains one of the
   1080 // known sentinels for memory map information.
   1081 func isMemoryMapSentinel(line string) bool {
   1082 	for _, s := range memoryMapSentinels {
   1083 		if strings.Contains(line, s) {
   1084 			return true
   1085 		}
   1086 	}
   1087 	return false
   1088 }
   1089 
   1090 func (p *Profile) addLegacyFrameInfo() {
   1091 	switch {
   1092 	case isProfileType(p, heapzSampleTypes):
   1093 		p.DropFrames, p.KeepFrames = allocRxStr, allocSkipRxStr
   1094 	case isProfileType(p, contentionzSampleTypes):
   1095 		p.DropFrames, p.KeepFrames = lockRxStr, ""
   1096 	default:
   1097 		p.DropFrames, p.KeepFrames = cpuProfilerRxStr, ""
   1098 	}
   1099 }
   1100 
   1101 var heapzSampleTypes = [][]string{
   1102 	{"allocations", "size"}, // early Go pprof profiles
   1103 	{"objects", "space"},
   1104 	{"inuse_objects", "inuse_space"},
   1105 	{"alloc_objects", "alloc_space"},
   1106 }
   1107 var contentionzSampleTypes = [][]string{
   1108 	{"contentions", "delay"},
   1109 }
   1110 
   1111 func isProfileType(p *Profile, types [][]string) bool {
   1112 	st := p.SampleType
   1113 nextType:
   1114 	for _, t := range types {
   1115 		if len(st) != len(t) {
   1116 			continue
   1117 		}
   1118 
   1119 		for i := range st {
   1120 			if st[i].Type != t[i] {
   1121 				continue nextType
   1122 			}
   1123 		}
   1124 		return true
   1125 	}
   1126 	return false
   1127 }
   1128 
   1129 var allocRxStr = strings.Join([]string{
   1130 	// POSIX entry points.
   1131 	`calloc`,
   1132 	`cfree`,
   1133 	`malloc`,
   1134 	`free`,
   1135 	`memalign`,
   1136 	`do_memalign`,
   1137 	`(__)?posix_memalign`,
   1138 	`pvalloc`,
   1139 	`valloc`,
   1140 	`realloc`,
   1141 
   1142 	// TC malloc.
   1143 	`tcmalloc::.*`,
   1144 	`tc_calloc`,
   1145 	`tc_cfree`,
   1146 	`tc_malloc`,
   1147 	`tc_free`,
   1148 	`tc_memalign`,
   1149 	`tc_posix_memalign`,
   1150 	`tc_pvalloc`,
   1151 	`tc_valloc`,
   1152 	`tc_realloc`,
   1153 	`tc_new`,
   1154 	`tc_delete`,
   1155 	`tc_newarray`,
   1156 	`tc_deletearray`,
   1157 	`tc_new_nothrow`,
   1158 	`tc_newarray_nothrow`,
   1159 
   1160 	// Memory-allocation routines on OS X.
   1161 	`malloc_zone_malloc`,
   1162 	`malloc_zone_calloc`,
   1163 	`malloc_zone_valloc`,
   1164 	`malloc_zone_realloc`,
   1165 	`malloc_zone_memalign`,
   1166 	`malloc_zone_free`,
   1167 
   1168 	// Go runtime
   1169 	`runtime\..*`,
   1170 
   1171 	// Other misc. memory allocation routines
   1172 	`BaseArena::.*`,
   1173 	`(::)?do_malloc_no_errno`,
   1174 	`(::)?do_malloc_pages`,
   1175 	`(::)?do_malloc`,
   1176 	`DoSampledAllocation`,
   1177 	`MallocedMemBlock::MallocedMemBlock`,
   1178 	`_M_allocate`,
   1179 	`__builtin_(vec_)?delete`,
   1180 	`__builtin_(vec_)?new`,
   1181 	`__gnu_cxx::new_allocator::allocate`,
   1182 	`__libc_malloc`,
   1183 	`__malloc_alloc_template::allocate`,
   1184 	`allocate`,
   1185 	`cpp_alloc`,
   1186 	`operator new(\[\])?`,
   1187 	`simple_alloc::allocate`,
   1188 }, `|`)
   1189 
   1190 var allocSkipRxStr = strings.Join([]string{
   1191 	// Preserve Go runtime frames that appear in the middle/bottom of
   1192 	// the stack.
   1193 	`runtime\.panic`,
   1194 	`runtime\.reflectcall`,
   1195 	`runtime\.call[0-9]*`,
   1196 }, `|`)
   1197 
   1198 var cpuProfilerRxStr = strings.Join([]string{
   1199 	`ProfileData::Add`,
   1200 	`ProfileData::prof_handler`,
   1201 	`CpuProfiler::prof_handler`,
   1202 	`__pthread_sighandler`,
   1203 	`__restore`,
   1204 }, `|`)
   1205 
   1206 var lockRxStr = strings.Join([]string{
   1207 	`RecordLockProfileData`,
   1208 	`(base::)?RecordLockProfileData.*`,
   1209 	`(base::)?SubmitMutexProfileData.*`,
   1210 	`(base::)?SubmitSpinLockProfileData.*`,
   1211 	`(base::Mutex::)?AwaitCommon.*`,
   1212 	`(base::Mutex::)?Unlock.*`,
   1213 	`(base::Mutex::)?UnlockSlow.*`,
   1214 	`(base::Mutex::)?ReaderUnlock.*`,
   1215 	`(base::MutexLock::)?~MutexLock.*`,
   1216 	`(Mutex::)?AwaitCommon.*`,
   1217 	`(Mutex::)?Unlock.*`,
   1218 	`(Mutex::)?UnlockSlow.*`,
   1219 	`(Mutex::)?ReaderUnlock.*`,
   1220 	`(MutexLock::)?~MutexLock.*`,
   1221 	`(SpinLock::)?Unlock.*`,
   1222 	`(SpinLock::)?SlowUnlock.*`,
   1223 	`(SpinLockHolder::)?~SpinLockHolder.*`,
   1224 }, `|`)
   1225