1 #!/usr/bin/env python 2 # 3 # Copyright (C) 2017 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be 19 used by pprof. 20 21 Example: 22 python app_profiler.py 23 python pprof_proto_generator.py 24 pprof -text pprof.profile 25 """ 26 27 from __future__ import print_function 28 import argparse 29 import os 30 import os.path 31 import re 32 import shutil 33 import sys 34 import time 35 36 from annotate import Addr2Line 37 from simpleperf_report_lib import * 38 from utils import * 39 40 try: 41 import google.protobuf 42 except: 43 log_exit('google.protobuf module is missing. Please install it first.') 44 45 import profile_pb2 46 47 def load_pprof_profile(filename): 48 profile = profile_pb2.Profile() 49 with open(filename, "rb") as f: 50 profile.ParseFromString(f.read()) 51 return profile 52 53 54 def store_pprof_profile(filename, profile): 55 with open(filename, 'wb') as f: 56 f.write(profile.SerializeToString()) 57 58 59 class PprofProfilePrinter(object): 60 61 def __init__(self, profile): 62 self.profile = profile 63 self.string_table = profile.string_table 64 65 def show(self): 66 p = self.profile 67 sub_space = ' ' 68 print('Profile {') 69 print('%d sample_types' % len(p.sample_type)) 70 for i in range(len(p.sample_type)): 71 print('sample_type[%d] = ' % i, end='') 72 self.show_value_type(p.sample_type[i]) 73 print('%d samples' % len(p.sample)) 74 for i in range(len(p.sample)): 75 print('sample[%d]:' % i) 76 self.show_sample(p.sample[i], sub_space) 77 print('%d mappings' % len(p.mapping)) 78 for i in range(len(p.mapping)): 79 print('mapping[%d]:' % i) 80 self.show_mapping(p.mapping[i], sub_space) 81 print('%d locations' % len(p.location)) 82 for i in range(len(p.location)): 83 print('location[%d]:' % i) 84 self.show_location(p.location[i], sub_space) 85 for i in range(len(p.function)): 86 print('function[%d]:' % i) 87 self.show_function(p.function[i], sub_space) 88 print('%d strings' % len(p.string_table)) 89 for i in range(len(p.string_table)): 90 print('string[%d]: %s' % (i, p.string_table[i])) 91 print('drop_frames: %s' % self.string(p.drop_frames)) 92 print('keep_frames: %s' % self.string(p.keep_frames)) 93 print('time_nanos: %u' % p.time_nanos) 94 print('duration_nanos: %u' % p.duration_nanos) 95 print('period_type: ', end='') 96 self.show_value_type(p.period_type) 97 print('period: %u' % p.period) 98 for i in range(len(p.comment)): 99 print('comment[%d] = %s' % (i, self.string(p.comment[i]))) 100 print('default_sample_type: %d' % p.default_sample_type) 101 print('} // Profile') 102 print() 103 104 def show_value_type(self, value_type, space=''): 105 print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % 106 (space, value_type.type, value_type.unit, 107 self.string(value_type.type), self.string(value_type.unit))) 108 109 def show_sample(self, sample, space=''): 110 sub_space = space + ' ' 111 for i in range(len(sample.location_id)): 112 print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) 113 self.show_location_id(sample.location_id[i], sub_space) 114 for i in range(len(sample.value)): 115 print('%svalue[%d] = %d' % (space, i, sample.value[i])) 116 for i in range(len(sample.label)): 117 print('%slabel[%d] = ', (space, i)) 118 119 def show_location_id(self, location_id, space=''): 120 location = self.profile.location[location_id - 1] 121 self.show_location(location, space) 122 123 def show_location(self, location, space=''): 124 sub_space = space + ' ' 125 print('%sid: %d' % (space, location.id)) 126 print('%smapping_id: %d' % (space, location.mapping_id)) 127 self.show_mapping_id(location.mapping_id, sub_space) 128 print('%saddress: %x' % (space, location.address)) 129 for i in range(len(location.line)): 130 print('%sline[%d]:' % (space, i)) 131 self.show_line(location.line[i], sub_space) 132 133 def show_mapping_id(self, mapping_id, space=''): 134 mapping = self.profile.mapping[mapping_id - 1] 135 self.show_mapping(mapping, space) 136 137 def show_mapping(self, mapping, space=''): 138 print('%sid: %d' % (space, mapping.id)) 139 print('%smemory_start: %x' % (space, mapping.memory_start)) 140 print('%smemory_limit: %x' % (space, mapping.memory_limit)) 141 print('%sfile_offset: %x' % (space, mapping.file_offset)) 142 print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), 143 mapping.filename)) 144 print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), 145 mapping.build_id)) 146 print('%shas_functions: %s' % (space, mapping.has_functions)) 147 print('%shas_filenames: %s' % (space, mapping.has_filenames)) 148 print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) 149 print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) 150 151 def show_line(self, line, space=''): 152 sub_space = space + ' ' 153 print('%sfunction_id: %d' % (space, line.function_id)) 154 self.show_function_id(line.function_id, sub_space) 155 print('%sline: %d' % (space, line.line)) 156 157 def show_function_id(self, function_id, space=''): 158 function = self.profile.function[function_id - 1] 159 self.show_function(function, space) 160 161 def show_function(self, function, space=''): 162 print('%sid: %d' % (space, function.id)) 163 print('%sname: %s' % (space, self.string(function.name))) 164 print('%ssystem_name: %s' % (space, self.string(function.system_name))) 165 print('%sfilename: %s' % (space, self.string(function.filename))) 166 print('%sstart_line: %d' % (space, function.start_line)) 167 168 def show_label(self, label, space=''): 169 print('%sLabel(%s =', space, self.string(label.key), end='') 170 if label.HasField('str'): 171 print('%s)' % self.get_string(label.str)) 172 else: 173 print('%d)' % label.num) 174 175 def string(self, id): 176 return self.string_table[id] 177 178 179 class Sample(object): 180 181 def __init__(self): 182 self.location_ids = [] 183 self.values = {} 184 185 def add_location_id(self, location_id): 186 self.location_ids.append(location_id) 187 188 def add_value(self, id, value): 189 self.values[id] = self.values.get(id, 0) + value 190 191 def add_values(self, values): 192 for id in values.keys(): 193 value = values[id] 194 self.add_value(id, value) 195 196 @property 197 def key(self): 198 return tuple(self.location_ids) 199 200 201 class Location(object): 202 203 def __init__(self, mapping_id, address, vaddr_in_dso): 204 self.id = -1 # unset 205 self.mapping_id = mapping_id 206 self.address = address 207 self.vaddr_in_dso = vaddr_in_dso 208 self.lines = [] 209 210 @property 211 def key(self): 212 return (self.mapping_id, self.address) 213 214 215 class Line(object): 216 217 def __init__(self): 218 self.function_id = 0 219 self.line = 0 220 221 222 class Mapping(object): 223 224 def __init__(self, start, end, pgoff, filename_id, build_id_id): 225 self.id = -1 # unset 226 self.memory_start = start 227 self.memory_limit = end 228 self.file_offset = pgoff 229 self.filename_id = filename_id 230 self.build_id_id = build_id_id 231 232 @property 233 def key(self): 234 return ( 235 self.memory_start, 236 self.memory_limit, 237 self.file_offset, 238 self.filename_id, 239 self.build_id_id) 240 241 242 class Function(object): 243 244 def __init__(self, name_id, dso_name_id, vaddr_in_dso): 245 self.id = -1 # unset 246 self.name_id = name_id 247 self.dso_name_id = dso_name_id 248 self.vaddr_in_dso = vaddr_in_dso 249 self.source_filename_id = 0 250 self.start_line = 0 251 252 @property 253 def key(self): 254 return (self.name_id, self.dso_name_id) 255 256 257 class PprofProfileGenerator(object): 258 259 def __init__(self, config): 260 self.config = config 261 self.lib = ReportLib() 262 263 config['binary_cache_dir'] = 'binary_cache' 264 if not os.path.isdir(config['binary_cache_dir']): 265 config['binary_cache_dir'] = None 266 else: 267 self.lib.SetSymfs(config['binary_cache_dir']) 268 if config.get('record_file'): 269 self.lib.SetRecordFile(config['record_file']) 270 kallsyms = 'binary_cache/kallsyms' 271 if os.path.isfile(kallsyms): 272 self.lib.SetKallsymsFile(kallsyms) 273 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 274 if config.get('pid_filters'): 275 self.pid_filter = {int(x) for x in config['pid_filters']} 276 else: 277 self.pid_filter = None 278 if config.get('tid_filters'): 279 self.tid_filter = {int(x) for x in config['tid_filters']} 280 else: 281 self.tid_filter = None 282 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 283 284 def gen(self): 285 self.profile = profile_pb2.Profile() 286 self.profile.string_table.append('') 287 self.string_table = {} 288 self.sample_types = {} 289 self.sample_map = {} 290 self.sample_list = [] 291 self.location_map = {} 292 self.location_list = [] 293 self.mapping_map = {} 294 self.mapping_list = [] 295 self.function_map = {} 296 self.function_list = [] 297 298 # 1. Process all samples in perf.data, aggregate samples. 299 while True: 300 report_sample = self.lib.GetNextSample() 301 if report_sample is None: 302 self.lib.Close() 303 break 304 event = self.lib.GetEventOfCurrentSample() 305 symbol = self.lib.GetSymbolOfCurrentSample() 306 callchain = self.lib.GetCallChainOfCurrentSample() 307 308 if not self._filter_report_sample(report_sample): 309 continue 310 311 sample_type_id = self.get_sample_type_id(event.name) 312 sample = Sample() 313 sample.add_value(sample_type_id, 1) 314 sample.add_value(sample_type_id + 1, report_sample.period) 315 if self._filter_symbol(symbol): 316 location_id = self.get_location_id(symbol.vaddr_in_file, symbol) 317 sample.add_location_id(location_id) 318 for i in range(callchain.nr): 319 entry = callchain.entries[i] 320 if self._filter_symbol(symbol): 321 location_id = self.get_location_id(entry.ip, entry.symbol) 322 sample.add_location_id(location_id) 323 if sample.location_ids: 324 self.add_sample(sample) 325 326 # 2. Generate line info for locations and functions. 327 self.gen_source_lines() 328 329 # 3. Produce samples/locations/functions in profile 330 for sample in self.sample_list: 331 self.gen_profile_sample(sample) 332 for mapping in self.mapping_list: 333 self.gen_profile_mapping(mapping) 334 for location in self.location_list: 335 self.gen_profile_location(location) 336 for function in self.function_list: 337 self.gen_profile_function(function) 338 339 return self.profile 340 341 def _filter_report_sample(self, sample): 342 """Return true if the sample can be used.""" 343 if self.comm_filter: 344 if sample.thread_comm not in self.comm_filter: 345 return False 346 if self.pid_filter: 347 if sample.pid not in self.pid_filter: 348 return False 349 if self.tid_filter: 350 if sample.tid not in self.tid_filter: 351 return False 352 return True 353 354 def _filter_symbol(self, symbol): 355 if not self.dso_filter or symbol.dso_name in self.dso_filter: 356 return True 357 return False 358 359 def get_string_id(self, str): 360 if len(str) == 0: 361 return 0 362 id = self.string_table.get(str) 363 if id is not None: 364 return id 365 id = len(self.string_table) + 1 366 self.string_table[str] = id 367 self.profile.string_table.append(str) 368 return id 369 370 def get_string(self, string_id): 371 return self.profile.string_table[string_id] 372 373 def get_sample_type_id(self, name): 374 id = self.sample_types.get(name) 375 if id is not None: 376 return id 377 id = len(self.profile.sample_type) 378 sample_type = self.profile.sample_type.add() 379 sample_type.type = self.get_string_id('event_' + name + '_samples') 380 sample_type.unit = self.get_string_id('count') 381 sample_type = self.profile.sample_type.add() 382 sample_type.type = self.get_string_id('event_' + name + '_count') 383 sample_type.unit = self.get_string_id('count') 384 self.sample_types[name] = id 385 return id 386 387 def get_location_id(self, ip, symbol): 388 mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name) 389 location = Location(mapping_id, ip, symbol.vaddr_in_file) 390 function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name, 391 symbol.symbol_addr) 392 if function_id: 393 # Add Line only when it has a valid function id, see http://b/36988814. 394 # Default line info only contains the function name 395 line = Line() 396 line.function_id = function_id 397 location.lines.append(line) 398 399 exist_location = self.location_map.get(location.key) 400 if exist_location: 401 return exist_location.id 402 # location_id starts from 1 403 location.id = len(self.location_list) + 1 404 self.location_list.append(location) 405 self.location_map[location.key] = location 406 return location.id 407 408 def get_mapping_id(self, report_mapping, filename): 409 filename_id = self.get_string_id(filename) 410 build_id = self.lib.GetBuildIdForPath(filename) 411 if build_id and build_id[0:2] == "0x": 412 build_id = build_id[2:] 413 build_id_id = self.get_string_id(build_id) 414 mapping = Mapping(report_mapping.start, report_mapping.end, 415 report_mapping.pgoff, filename_id, build_id_id) 416 exist_mapping = self.mapping_map.get(mapping.key) 417 if exist_mapping: 418 return exist_mapping.id 419 # mapping_id starts from 1 420 mapping.id = len(self.mapping_list) + 1 421 self.mapping_list.append(mapping) 422 self.mapping_map[mapping.key] = mapping 423 return mapping.id 424 425 def get_mapping(self, mapping_id): 426 return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None 427 428 def get_function_id(self, name, dso_name, vaddr_in_file): 429 if name == 'unknown': 430 return 0 431 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) 432 exist_function = self.function_map.get(function.key) 433 if exist_function: 434 return exist_function.id 435 # function_id starts from 1 436 function.id = len(self.function_list) + 1 437 self.function_list.append(function) 438 self.function_map[function.key] = function 439 return function.id 440 441 def get_function(self, function_id): 442 return self.function_list[function_id - 1] if function_id > 0 else None 443 444 def add_sample(self, sample): 445 exist_sample = self.sample_map.get(sample.key) 446 if exist_sample: 447 exist_sample.add_values(sample.values) 448 else: 449 self.sample_list.append(sample) 450 self.sample_map[sample.key] = sample 451 452 def gen_source_lines(self): 453 # 1. Create Addr2line instance 454 if not self.config.get('binary_cache_dir'): 455 log_info("Can't generate line information because binary_cache is missing.") 456 return 457 if not self.config['addr2line_path'] or not is_executable_available( 458 self.config['addr2line_path']): 459 if not find_tool_path('addr2line'): 460 log_info("Can't generate line information because can't find addr2line.") 461 return 462 463 addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir']) 464 465 # 2. Put all needed addresses to it. 466 for location in self.location_list: 467 mapping = self.get_mapping(location.mapping_id) 468 dso_name = self.get_string(mapping.filename_id) 469 addr2line.add_addr(dso_name, location.vaddr_in_dso) 470 for function in self.function_list: 471 dso_name = self.get_string(function.dso_name_id) 472 addr2line.add_addr(dso_name, function.vaddr_in_dso) 473 474 # 3. Generate source lines. 475 addr2line.convert_addrs_to_lines() 476 477 # 4. Annotate locations and functions. 478 for location in self.location_list: 479 mapping = self.get_mapping(location.mapping_id) 480 dso_name = self.get_string(mapping.filename_id) 481 sources = addr2line.get_sources(dso_name, location.vaddr_in_dso) 482 source_id = 0 483 for source in sources: 484 if source.file and source.function and source.line: 485 function_id = self.get_function_id(source.function, dso_name, 0) 486 if function_id == 0: 487 continue 488 if source_id == 0: 489 # Clear default line info 490 location.lines = [] 491 location.lines.append(self.add_line(source, dso_name, function_id)) 492 source_id += 1 493 494 for function in self.function_list: 495 dso_name = self.get_string(function.dso_name_id) 496 if function.vaddr_in_dso: 497 sources = addr2line.get_sources(dso_name, function.vaddr_in_dso) 498 source = sources[0] if sources else None 499 if source and source.file: 500 function.source_filename_id = self.get_string_id(source.file) 501 if source.line: 502 function.start_line = source.line 503 504 def add_line(self, source, dso_name, function_id): 505 line = Line() 506 function = self.get_function(function_id) 507 function.source_filename_id = self.get_string_id(source.file) 508 line.function_id = function_id 509 line.line = source.line 510 return line 511 512 def gen_profile_sample(self, sample): 513 profile_sample = self.profile.sample.add() 514 profile_sample.location_id.extend(sample.location_ids) 515 sample_type_count = len(self.sample_types) * 2 516 values = [0] * sample_type_count 517 for id in sample.values.keys(): 518 values[id] = sample.values[id] 519 profile_sample.value.extend(values) 520 521 def gen_profile_mapping(self, mapping): 522 profile_mapping = self.profile.mapping.add() 523 profile_mapping.id = mapping.id 524 profile_mapping.memory_start = mapping.memory_start 525 profile_mapping.memory_limit = mapping.memory_limit 526 profile_mapping.file_offset = mapping.file_offset 527 profile_mapping.filename = mapping.filename_id 528 profile_mapping.build_id = mapping.build_id_id 529 profile_mapping.has_filenames = True 530 profile_mapping.has_functions = True 531 if self.config.get('binary_cache_dir'): 532 profile_mapping.has_line_numbers = True 533 profile_mapping.has_inline_frames = True 534 else: 535 profile_mapping.has_line_numbers = False 536 profile_mapping.has_inline_frames = False 537 538 def gen_profile_location(self, location): 539 profile_location = self.profile.location.add() 540 profile_location.id = location.id 541 profile_location.mapping_id = location.mapping_id 542 profile_location.address = location.address 543 for i in range(len(location.lines)): 544 line = profile_location.line.add() 545 line.function_id = location.lines[i].function_id 546 line.line = location.lines[i].line 547 548 def gen_profile_function(self, function): 549 profile_function = self.profile.function.add() 550 profile_function.id = function.id 551 profile_function.name = function.name_id 552 profile_function.system_name = function.name_id 553 profile_function.filename = function.source_filename_id 554 profile_function.start_line = function.start_line 555 556 557 def main(): 558 parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.') 559 parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.') 560 parser.add_argument('-i', '--perf_data_path', default='perf.data', help= 561 """The path of profiling data.""") 562 parser.add_argument('-o', '--output_file', default='pprof.profile', help= 563 """The path of generated pprof profile data.""") 564 parser.add_argument('--comm', nargs='+', action='append', help= 565 """Use samples only in threads with selected names.""") 566 parser.add_argument('--pid', nargs='+', action='append', help= 567 """Use samples only in processes with selected process ids.""") 568 parser.add_argument('--tid', nargs='+', action='append', help= 569 """Use samples only in threads with selected thread ids.""") 570 parser.add_argument('--dso', nargs='+', action='append', help= 571 """Use samples only in selected binaries.""") 572 parser.add_argument('--addr2line', help= 573 """Set the path of addr2line.""") 574 575 args = parser.parse_args() 576 if args.show: 577 show_file = args.show[0] if args.show[0] else 'pprof.profile' 578 profile = load_pprof_profile(show_file) 579 printer = PprofProfilePrinter(profile) 580 printer.show() 581 return 582 583 config = {} 584 config['perf_data_path'] = args.perf_data_path 585 config['output_file'] = args.output_file 586 config['comm_filters'] = flatten_arg_list(args.comm) 587 config['pid_filters'] = flatten_arg_list(args.pid) 588 config['tid_filters'] = flatten_arg_list(args.tid) 589 config['dso_filters'] = flatten_arg_list(args.dso) 590 config['addr2line_path'] = args.addr2line 591 generator = PprofProfileGenerator(config) 592 profile = generator.gen() 593 store_pprof_profile(config['output_file'], profile) 594 595 596 if __name__ == '__main__': 597 main() 598