Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (C) 2017 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 #
     17 
     18 """pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
     19     used by pprof.
     20 
     21   Example:
     22     python app_profiler.py
     23     python pprof_proto_generator.py
     24     pprof -text pprof.profile
     25 """
     26 
     27 from __future__ import print_function
     28 import argparse
     29 import os
     30 import os.path
     31 import re
     32 import shutil
     33 import sys
     34 import time
     35 
     36 from annotate import Addr2Line
     37 from simpleperf_report_lib import *
     38 from utils import *
     39 
     40 try:
     41     import google.protobuf
     42 except:
     43     log_exit('google.protobuf module is missing. Please install it first.')
     44 
     45 import profile_pb2
     46 
     47 def load_pprof_profile(filename):
     48     profile = profile_pb2.Profile()
     49     with open(filename, "rb") as f:
     50         profile.ParseFromString(f.read())
     51     return profile
     52 
     53 
     54 def store_pprof_profile(filename, profile):
     55     with open(filename, 'wb') as f:
     56         f.write(profile.SerializeToString())
     57 
     58 
     59 class PprofProfilePrinter(object):
     60 
     61     def __init__(self, profile):
     62         self.profile = profile
     63         self.string_table = profile.string_table
     64 
     65     def show(self):
     66         p = self.profile
     67         sub_space = '  '
     68         print('Profile {')
     69         print('%d sample_types' % len(p.sample_type))
     70         for i in range(len(p.sample_type)):
     71             print('sample_type[%d] = ' % i, end='')
     72             self.show_value_type(p.sample_type[i])
     73         print('%d samples' % len(p.sample))
     74         for i in range(len(p.sample)):
     75             print('sample[%d]:' % i)
     76             self.show_sample(p.sample[i], sub_space)
     77         print('%d mappings' % len(p.mapping))
     78         for i in range(len(p.mapping)):
     79             print('mapping[%d]:' % i)
     80             self.show_mapping(p.mapping[i], sub_space)
     81         print('%d locations' % len(p.location))
     82         for i in range(len(p.location)):
     83             print('location[%d]:' % i)
     84             self.show_location(p.location[i], sub_space)
     85         for i in range(len(p.function)):
     86             print('function[%d]:' % i)
     87             self.show_function(p.function[i], sub_space)
     88         print('%d strings' % len(p.string_table))
     89         for i in range(len(p.string_table)):
     90             print('string[%d]: %s' % (i, p.string_table[i]))
     91         print('drop_frames: %s' % self.string(p.drop_frames))
     92         print('keep_frames: %s' % self.string(p.keep_frames))
     93         print('time_nanos: %u' % p.time_nanos)
     94         print('duration_nanos: %u' % p.duration_nanos)
     95         print('period_type: ', end='')
     96         self.show_value_type(p.period_type)
     97         print('period: %u' % p.period)
     98         for i in range(len(p.comment)):
     99             print('comment[%d] = %s' % (i, self.string(p.comment[i])))
    100         print('default_sample_type: %d' % p.default_sample_type)
    101         print('} // Profile')
    102         print()
    103 
    104     def show_value_type(self, value_type, space=''):
    105         print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
    106               (space, value_type.type, value_type.unit,
    107                self.string(value_type.type), self.string(value_type.unit)))
    108 
    109     def show_sample(self, sample, space=''):
    110         sub_space = space + '  '
    111         for i in range(len(sample.location_id)):
    112             print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
    113             self.show_location_id(sample.location_id[i], sub_space)
    114         for i in range(len(sample.value)):
    115             print('%svalue[%d] = %d' % (space, i, sample.value[i]))
    116         for i in range(len(sample.label)):
    117             print('%slabel[%d] = ', (space, i))
    118 
    119     def show_location_id(self, location_id, space=''):
    120         location = self.profile.location[location_id - 1]
    121         self.show_location(location, space)
    122 
    123     def show_location(self, location, space=''):
    124         sub_space = space + '  '
    125         print('%sid: %d' % (space, location.id))
    126         print('%smapping_id: %d' % (space, location.mapping_id))
    127         self.show_mapping_id(location.mapping_id, sub_space)
    128         print('%saddress: %x' % (space, location.address))
    129         for i in range(len(location.line)):
    130             print('%sline[%d]:' % (space, i))
    131             self.show_line(location.line[i], sub_space)
    132 
    133     def show_mapping_id(self, mapping_id, space=''):
    134         mapping = self.profile.mapping[mapping_id - 1]
    135         self.show_mapping(mapping, space)
    136 
    137     def show_mapping(self, mapping, space=''):
    138         print('%sid: %d' % (space, mapping.id))
    139         print('%smemory_start: %x' % (space, mapping.memory_start))
    140         print('%smemory_limit: %x' % (space, mapping.memory_limit))
    141         print('%sfile_offset: %x' % (space, mapping.file_offset))
    142         print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
    143                                       mapping.filename))
    144         print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
    145                                       mapping.build_id))
    146         print('%shas_functions: %s' % (space, mapping.has_functions))
    147         print('%shas_filenames: %s' % (space, mapping.has_filenames))
    148         print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
    149         print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
    150 
    151     def show_line(self, line, space=''):
    152         sub_space = space + '  '
    153         print('%sfunction_id: %d' % (space, line.function_id))
    154         self.show_function_id(line.function_id, sub_space)
    155         print('%sline: %d' % (space, line.line))
    156 
    157     def show_function_id(self, function_id, space=''):
    158         function = self.profile.function[function_id - 1]
    159         self.show_function(function, space)
    160 
    161     def show_function(self, function, space=''):
    162         print('%sid: %d' % (space, function.id))
    163         print('%sname: %s' % (space, self.string(function.name)))
    164         print('%ssystem_name: %s' % (space, self.string(function.system_name)))
    165         print('%sfilename: %s' % (space, self.string(function.filename)))
    166         print('%sstart_line: %d' % (space, function.start_line))
    167 
    168     def show_label(self, label, space=''):
    169         print('%sLabel(%s =', space, self.string(label.key), end='')
    170         if label.HasField('str'):
    171             print('%s)' % self.get_string(label.str))
    172         else:
    173             print('%d)' % label.num)
    174 
    175     def string(self, id):
    176         return self.string_table[id]
    177 
    178 
    179 class Sample(object):
    180 
    181     def __init__(self):
    182         self.location_ids = []
    183         self.values = {}
    184 
    185     def add_location_id(self, location_id):
    186         self.location_ids.append(location_id)
    187 
    188     def add_value(self, id, value):
    189         self.values[id] = self.values.get(id, 0) + value
    190 
    191     def add_values(self, values):
    192         for id in values.keys():
    193             value = values[id]
    194             self.add_value(id, value)
    195 
    196     @property
    197     def key(self):
    198         return tuple(self.location_ids)
    199 
    200 
    201 class Location(object):
    202 
    203     def __init__(self, mapping_id, address, vaddr_in_dso):
    204         self.id = -1  # unset
    205         self.mapping_id = mapping_id
    206         self.address = address
    207         self.vaddr_in_dso = vaddr_in_dso
    208         self.lines = []
    209 
    210     @property
    211     def key(self):
    212         return (self.mapping_id, self.address)
    213 
    214 
    215 class Line(object):
    216 
    217     def __init__(self):
    218         self.function_id = 0
    219         self.line = 0
    220 
    221 
    222 class Mapping(object):
    223 
    224     def __init__(self, start, end, pgoff, filename_id, build_id_id):
    225         self.id = -1  # unset
    226         self.memory_start = start
    227         self.memory_limit = end
    228         self.file_offset = pgoff
    229         self.filename_id = filename_id
    230         self.build_id_id = build_id_id
    231 
    232     @property
    233     def key(self):
    234         return (
    235             self.memory_start,
    236             self.memory_limit,
    237             self.file_offset,
    238             self.filename_id,
    239             self.build_id_id)
    240 
    241 
    242 class Function(object):
    243 
    244     def __init__(self, name_id, dso_name_id, vaddr_in_dso):
    245         self.id = -1  # unset
    246         self.name_id = name_id
    247         self.dso_name_id = dso_name_id
    248         self.vaddr_in_dso = vaddr_in_dso
    249         self.source_filename_id = 0
    250         self.start_line = 0
    251 
    252     @property
    253     def key(self):
    254         return (self.name_id, self.dso_name_id)
    255 
    256 
    257 class PprofProfileGenerator(object):
    258 
    259     def __init__(self, config):
    260         self.config = config
    261         self.lib = ReportLib()
    262 
    263         config['binary_cache_dir'] = 'binary_cache'
    264         if not os.path.isdir(config['binary_cache_dir']):
    265             config['binary_cache_dir'] = None
    266         else:
    267             self.lib.SetSymfs(config['binary_cache_dir'])
    268         if config.get('record_file'):
    269             self.lib.SetRecordFile(config['record_file'])
    270         kallsyms = 'binary_cache/kallsyms'
    271         if os.path.isfile(kallsyms):
    272             self.lib.SetKallsymsFile(kallsyms)
    273         self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None
    274         if config.get('pid_filters'):
    275             self.pid_filter = {int(x) for x in config['pid_filters']}
    276         else:
    277             self.pid_filter = None
    278         if config.get('tid_filters'):
    279             self.tid_filter = {int(x) for x in config['tid_filters']}
    280         else:
    281             self.tid_filter = None
    282         self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
    283 
    284     def gen(self):
    285         self.profile = profile_pb2.Profile()
    286         self.profile.string_table.append('')
    287         self.string_table = {}
    288         self.sample_types = {}
    289         self.sample_map = {}
    290         self.sample_list = []
    291         self.location_map = {}
    292         self.location_list = []
    293         self.mapping_map = {}
    294         self.mapping_list = []
    295         self.function_map = {}
    296         self.function_list = []
    297 
    298         # 1. Process all samples in perf.data, aggregate samples.
    299         while True:
    300             report_sample = self.lib.GetNextSample()
    301             if report_sample is None:
    302                 self.lib.Close()
    303                 break
    304             event = self.lib.GetEventOfCurrentSample()
    305             symbol = self.lib.GetSymbolOfCurrentSample()
    306             callchain = self.lib.GetCallChainOfCurrentSample()
    307 
    308             if not self._filter_report_sample(report_sample):
    309                 continue
    310 
    311             sample_type_id = self.get_sample_type_id(event.name)
    312             sample = Sample()
    313             sample.add_value(sample_type_id, 1)
    314             sample.add_value(sample_type_id + 1, report_sample.period)
    315             if self._filter_symbol(symbol):
    316                 location_id = self.get_location_id(symbol.vaddr_in_file, symbol)
    317                 sample.add_location_id(location_id)
    318             for i in range(callchain.nr):
    319                 entry = callchain.entries[i]
    320                 if self._filter_symbol(symbol):
    321                     location_id = self.get_location_id(entry.ip, entry.symbol)
    322                     sample.add_location_id(location_id)
    323             if sample.location_ids:
    324                 self.add_sample(sample)
    325 
    326         # 2. Generate line info for locations and functions.
    327         self.gen_source_lines()
    328 
    329         # 3. Produce samples/locations/functions in profile
    330         for sample in self.sample_list:
    331             self.gen_profile_sample(sample)
    332         for mapping in self.mapping_list:
    333             self.gen_profile_mapping(mapping)
    334         for location in self.location_list:
    335             self.gen_profile_location(location)
    336         for function in self.function_list:
    337             self.gen_profile_function(function)
    338 
    339         return self.profile
    340 
    341     def _filter_report_sample(self, sample):
    342         """Return true if the sample can be used."""
    343         if self.comm_filter:
    344             if sample.thread_comm not in self.comm_filter:
    345                 return False
    346             if self.pid_filter:
    347                 if sample.pid not in self.pid_filter:
    348                     return False
    349             if self.tid_filter:
    350                 if sample.tid not in self.tid_filter:
    351                     return False
    352         return True
    353 
    354     def _filter_symbol(self, symbol):
    355         if not self.dso_filter or symbol.dso_name in self.dso_filter:
    356             return True
    357         return False
    358 
    359     def get_string_id(self, str):
    360         if len(str) == 0:
    361             return 0
    362         id = self.string_table.get(str)
    363         if id is not None:
    364             return id
    365         id = len(self.string_table) + 1
    366         self.string_table[str] = id
    367         self.profile.string_table.append(str)
    368         return id
    369 
    370     def get_string(self, string_id):
    371         return self.profile.string_table[string_id]
    372 
    373     def get_sample_type_id(self, name):
    374         id = self.sample_types.get(name)
    375         if id is not None:
    376             return id
    377         id = len(self.profile.sample_type)
    378         sample_type = self.profile.sample_type.add()
    379         sample_type.type = self.get_string_id('event_' + name + '_samples')
    380         sample_type.unit = self.get_string_id('count')
    381         sample_type = self.profile.sample_type.add()
    382         sample_type.type = self.get_string_id('event_' + name + '_count')
    383         sample_type.unit = self.get_string_id('count')
    384         self.sample_types[name] = id
    385         return id
    386 
    387     def get_location_id(self, ip, symbol):
    388         mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name)
    389         location = Location(mapping_id, ip, symbol.vaddr_in_file)
    390         function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name,
    391                                            symbol.symbol_addr)
    392         if function_id:
    393             # Add Line only when it has a valid function id, see http://b/36988814.
    394             # Default line info only contains the function name
    395             line = Line()
    396             line.function_id = function_id
    397             location.lines.append(line)
    398 
    399         exist_location = self.location_map.get(location.key)
    400         if exist_location:
    401             return exist_location.id
    402         # location_id starts from 1
    403         location.id = len(self.location_list) + 1
    404         self.location_list.append(location)
    405         self.location_map[location.key] = location
    406         return location.id
    407 
    408     def get_mapping_id(self, report_mapping, filename):
    409         filename_id = self.get_string_id(filename)
    410         build_id = self.lib.GetBuildIdForPath(filename)
    411         if build_id and build_id[0:2] == "0x":
    412             build_id = build_id[2:]
    413         build_id_id = self.get_string_id(build_id)
    414         mapping = Mapping(report_mapping.start, report_mapping.end,
    415                           report_mapping.pgoff, filename_id, build_id_id)
    416         exist_mapping = self.mapping_map.get(mapping.key)
    417         if exist_mapping:
    418             return exist_mapping.id
    419         # mapping_id starts from 1
    420         mapping.id = len(self.mapping_list) + 1
    421         self.mapping_list.append(mapping)
    422         self.mapping_map[mapping.key] = mapping
    423         return mapping.id
    424 
    425     def get_mapping(self, mapping_id):
    426         return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
    427 
    428     def get_function_id(self, name, dso_name, vaddr_in_file):
    429         if name == 'unknown':
    430             return 0
    431         function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
    432         exist_function = self.function_map.get(function.key)
    433         if exist_function:
    434             return exist_function.id
    435         # function_id starts from 1
    436         function.id = len(self.function_list) + 1
    437         self.function_list.append(function)
    438         self.function_map[function.key] = function
    439         return function.id
    440 
    441     def get_function(self, function_id):
    442         return self.function_list[function_id - 1] if function_id > 0 else None
    443 
    444     def add_sample(self, sample):
    445         exist_sample = self.sample_map.get(sample.key)
    446         if exist_sample:
    447             exist_sample.add_values(sample.values)
    448         else:
    449             self.sample_list.append(sample)
    450             self.sample_map[sample.key] = sample
    451 
    452     def gen_source_lines(self):
    453         # 1. Create Addr2line instance
    454         if not self.config.get('binary_cache_dir'):
    455             log_info("Can't generate line information because binary_cache is missing.")
    456             return
    457         if not self.config['addr2line_path'] or not is_executable_available(
    458             self.config['addr2line_path']):
    459             if not find_tool_path('addr2line'):
    460                 log_info("Can't generate line information because can't find addr2line.")
    461                 return
    462 
    463         addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir'])
    464 
    465         # 2. Put all needed addresses to it.
    466         for location in self.location_list:
    467             mapping = self.get_mapping(location.mapping_id)
    468             dso_name = self.get_string(mapping.filename_id)
    469             addr2line.add_addr(dso_name, location.vaddr_in_dso)
    470         for function in self.function_list:
    471             dso_name = self.get_string(function.dso_name_id)
    472             addr2line.add_addr(dso_name, function.vaddr_in_dso)
    473 
    474         # 3. Generate source lines.
    475         addr2line.convert_addrs_to_lines()
    476 
    477         # 4. Annotate locations and functions.
    478         for location in self.location_list:
    479             mapping = self.get_mapping(location.mapping_id)
    480             dso_name = self.get_string(mapping.filename_id)
    481             sources = addr2line.get_sources(dso_name, location.vaddr_in_dso)
    482             source_id = 0
    483             for source in sources:
    484                 if source.file and source.function and source.line:
    485                     function_id = self.get_function_id(source.function, dso_name, 0)
    486                     if function_id == 0:
    487                         continue
    488                     if source_id == 0:
    489                         # Clear default line info
    490                         location.lines = []
    491                     location.lines.append(self.add_line(source, dso_name, function_id))
    492                     source_id += 1
    493 
    494         for function in self.function_list:
    495             dso_name = self.get_string(function.dso_name_id)
    496             if function.vaddr_in_dso:
    497                 sources = addr2line.get_sources(dso_name, function.vaddr_in_dso)
    498                 source = sources[0] if sources else None
    499                 if source and source.file:
    500                     function.source_filename_id = self.get_string_id(source.file)
    501                     if source.line:
    502                         function.start_line = source.line
    503 
    504     def add_line(self, source, dso_name, function_id):
    505         line = Line()
    506         function = self.get_function(function_id)
    507         function.source_filename_id = self.get_string_id(source.file)
    508         line.function_id = function_id
    509         line.line = source.line
    510         return line
    511 
    512     def gen_profile_sample(self, sample):
    513         profile_sample = self.profile.sample.add()
    514         profile_sample.location_id.extend(sample.location_ids)
    515         sample_type_count = len(self.sample_types) * 2
    516         values = [0] * sample_type_count
    517         for id in sample.values.keys():
    518             values[id] = sample.values[id]
    519         profile_sample.value.extend(values)
    520 
    521     def gen_profile_mapping(self, mapping):
    522         profile_mapping = self.profile.mapping.add()
    523         profile_mapping.id = mapping.id
    524         profile_mapping.memory_start = mapping.memory_start
    525         profile_mapping.memory_limit = mapping.memory_limit
    526         profile_mapping.file_offset = mapping.file_offset
    527         profile_mapping.filename = mapping.filename_id
    528         profile_mapping.build_id = mapping.build_id_id
    529         profile_mapping.has_filenames = True
    530         profile_mapping.has_functions = True
    531         if self.config.get('binary_cache_dir'):
    532             profile_mapping.has_line_numbers = True
    533             profile_mapping.has_inline_frames = True
    534         else:
    535             profile_mapping.has_line_numbers = False
    536             profile_mapping.has_inline_frames = False
    537 
    538     def gen_profile_location(self, location):
    539         profile_location = self.profile.location.add()
    540         profile_location.id = location.id
    541         profile_location.mapping_id = location.mapping_id
    542         profile_location.address = location.address
    543         for i in range(len(location.lines)):
    544             line = profile_location.line.add()
    545             line.function_id = location.lines[i].function_id
    546             line.line = location.lines[i].line
    547 
    548     def gen_profile_function(self, function):
    549         profile_function = self.profile.function.add()
    550         profile_function.id = function.id
    551         profile_function.name = function.name_id
    552         profile_function.system_name = function.name_id
    553         profile_function.filename = function.source_filename_id
    554         profile_function.start_line = function.start_line
    555 
    556 
    557 def main():
    558     parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
    559     parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
    560     parser.add_argument('-i', '--perf_data_path', default='perf.data', help=
    561 """The path of profiling data.""")
    562     parser.add_argument('-o', '--output_file', default='pprof.profile', help=
    563 """The path of generated pprof profile data.""")
    564     parser.add_argument('--comm', nargs='+', action='append', help=
    565 """Use samples only in threads with selected names.""")
    566     parser.add_argument('--pid', nargs='+', action='append', help=
    567 """Use samples only in processes with selected process ids.""")
    568     parser.add_argument('--tid', nargs='+', action='append', help=
    569 """Use samples only in threads with selected thread ids.""")
    570     parser.add_argument('--dso', nargs='+', action='append', help=
    571 """Use samples only in selected binaries.""")
    572     parser.add_argument('--addr2line', help=
    573 """Set the path of addr2line.""")
    574 
    575     args = parser.parse_args()
    576     if args.show:
    577         show_file = args.show[0] if args.show[0] else 'pprof.profile'
    578         profile = load_pprof_profile(show_file)
    579         printer = PprofProfilePrinter(profile)
    580         printer.show()
    581         return
    582 
    583     config = {}
    584     config['perf_data_path'] = args.perf_data_path
    585     config['output_file'] = args.output_file
    586     config['comm_filters'] = flatten_arg_list(args.comm)
    587     config['pid_filters'] = flatten_arg_list(args.pid)
    588     config['tid_filters'] = flatten_arg_list(args.tid)
    589     config['dso_filters'] = flatten_arg_list(args.dso)
    590     config['addr2line_path'] = args.addr2line
    591     generator = PprofProfileGenerator(config)
    592     profile = generator.gen()
    593     store_pprof_profile(config['output_file'], profile)
    594 
    595 
    596 if __name__ == '__main__':
    597     main()
    598