Home | History | Annotate | Download | only in cros_utils
      1 #!/usr/bin/env python2
      2 # Copyright 2012 Google Inc. All Rights Reserved.
      3 """One-line documentation for perf_diff module.
      4 
      5 A detailed description of perf_diff.
      6 """
      7 
      8 from __future__ import print_function
      9 
     10 __author__ = 'asharif (at] google.com (Ahmad Sharif)'
     11 
     12 import argparse
     13 import re
     14 import sys
     15 
     16 import misc
     17 import tabulator
     18 
     19 ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table'
     20 TOTAL_EVENTS = 'Total_events_of_this_profile'
     21 
     22 
     23 def GetPerfDictFromReport(report_file):
     24   output = {}
     25   perf_report = PerfReport(report_file)
     26   for k, v in perf_report.sections.items():
     27     if k not in output:
     28       output[k] = {}
     29     output[k][ROWS_TO_SHOW] = 0
     30     output[k][TOTAL_EVENTS] = 0
     31     for function in v.functions:
     32       out_key = '%s' % (function.name)
     33       output[k][out_key] = function.count
     34       output[k][TOTAL_EVENTS] += function.count
     35       if function.percent > 1:
     36         output[k][ROWS_TO_SHOW] += 1
     37   return output
     38 
     39 
     40 def _SortDictionaryByValue(d):
     41   l = [(k, v) for (k, v) in d.iteritems()]
     42 
     43   def GetFloat(x):
     44     if misc.IsFloat(x):
     45       return float(x)
     46     else:
     47       return x
     48 
     49   sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
     50   sorted_l.reverse()
     51   return [f[0] for f in sorted_l]
     52 
     53 
     54 class Tabulator(object):
     55   """Make tables."""
     56 
     57   def __init__(self, all_dicts):
     58     self._all_dicts = all_dicts
     59 
     60   def PrintTable(self):
     61     for dicts in self._all_dicts:
     62       self.PrintTableHelper(dicts)
     63 
     64   def PrintTableHelper(self, dicts):
     65     """Transfrom dicts to tables."""
     66     fields = {}
     67     for d in dicts:
     68       for f in d.keys():
     69         if f not in fields:
     70           fields[f] = d[f]
     71         else:
     72           fields[f] = max(fields[f], d[f])
     73     table = []
     74     header = ['name']
     75     for i in range(len(dicts)):
     76       header.append(i)
     77 
     78     table.append(header)
     79 
     80     sorted_fields = _SortDictionaryByValue(fields)
     81 
     82     for f in sorted_fields:
     83       row = [f]
     84       for d in dicts:
     85         if f in d:
     86           row.append(d[f])
     87         else:
     88           row.append('0')
     89       table.append(row)
     90 
     91     print(tabulator.GetSimpleTable(table))
     92 
     93 
     94 class Function(object):
     95   """Function for formatting."""
     96 
     97   def __init__(self):
     98     self.count = 0
     99     self.name = ''
    100     self.percent = 0
    101 
    102 
    103 class Section(object):
    104   """Section formatting."""
    105 
    106   def __init__(self, contents):
    107     self.name = ''
    108     self.raw_contents = contents
    109     self._ParseSection()
    110 
    111   def _ParseSection(self):
    112     matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents)
    113     assert len(matches) <= 1, 'More than one event found in 1 section'
    114     if not matches:
    115       return
    116     match = matches[0]
    117     self.name = match[1]
    118     self.count = misc.UnitToNumber(match[0])
    119 
    120     self.functions = []
    121     for line in self.raw_contents.splitlines():
    122       if not line.strip():
    123         continue
    124       if '%' not in line:
    125         continue
    126       if not line.startswith('#'):
    127         fields = [f for f in line.split(' ') if f]
    128         function = Function()
    129         function.percent = float(fields[0].strip('%'))
    130         function.count = int(fields[1])
    131         function.name = ' '.join(fields[2:])
    132         self.functions.append(function)
    133 
    134 
    135 class PerfReport(object):
    136   """Get report from raw report."""
    137 
    138   def __init__(self, perf_file):
    139     self.perf_file = perf_file
    140     self._ReadFile()
    141     self.sections = {}
    142     self.metadata = {}
    143     self._section_contents = []
    144     self._section_header = ''
    145     self._SplitSections()
    146     self._ParseSections()
    147     self._ParseSectionHeader()
    148 
    149   def _ParseSectionHeader(self):
    150     """Parse a header of a perf report file."""
    151     # The "captured on" field is inaccurate - this actually refers to when the
    152     # report was generated, not when the data was captured.
    153     for line in self._section_header.splitlines():
    154       line = line[2:]
    155       if ':' in line:
    156         key, val = line.strip().split(':', 1)
    157         key = key.strip()
    158         val = val.strip()
    159         self.metadata[key] = val
    160 
    161   def _ReadFile(self):
    162     self._perf_contents = open(self.perf_file).read()
    163 
    164   def _ParseSections(self):
    165     self.event_counts = {}
    166     self.sections = {}
    167     for section_content in self._section_contents:
    168       section = Section(section_content)
    169       section.name = self._GetHumanReadableName(section.name)
    170       self.sections[section.name] = section
    171 
    172   # TODO(asharif): Do this better.
    173   def _GetHumanReadableName(self, section_name):
    174     if not 'raw' in section_name:
    175       return section_name
    176     raw_number = section_name.strip().split(' ')[-1]
    177     for line in self._section_header.splitlines():
    178       if raw_number in line:
    179         name = line.strip().split(' ')[5]
    180         return name
    181 
    182   def _SplitSections(self):
    183     self._section_contents = []
    184     indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)]
    185     indices.append(len(self._perf_contents))
    186     for i in range(len(indices) - 1):
    187       section_content = self._perf_contents[indices[i]:indices[i + 1]]
    188       self._section_contents.append(section_content)
    189     self._section_header = ''
    190     if indices:
    191       self._section_header = self._perf_contents[0:indices[0]]
    192 
    193 
    194 class PerfDiffer(object):
    195   """Perf differ class."""
    196 
    197   def __init__(self, reports, num_symbols, common_only):
    198     self._reports = reports
    199     self._num_symbols = num_symbols
    200     self._common_only = common_only
    201     self._common_function_names = {}
    202 
    203   def DoDiff(self):
    204     """The function that does the diff."""
    205     section_names = self._FindAllSections()
    206 
    207     filename_dicts = []
    208     summary_dicts = []
    209     for report in self._reports:
    210       d = {}
    211       filename_dicts.append({'file': report.perf_file})
    212       for section_name in section_names:
    213         if section_name in report.sections:
    214           d[section_name] = report.sections[section_name].count
    215       summary_dicts.append(d)
    216 
    217     all_dicts = [filename_dicts, summary_dicts]
    218 
    219     for section_name in section_names:
    220       function_names = self._GetTopFunctions(section_name, self._num_symbols)
    221       self._FindCommonFunctions(section_name)
    222       dicts = []
    223       for report in self._reports:
    224         d = {}
    225         if section_name in report.sections:
    226           section = report.sections[section_name]
    227 
    228           # Get a common scaling factor for this report.
    229           common_scaling_factor = self._GetCommonScalingFactor(section)
    230 
    231           for function in section.functions:
    232             if function.name in function_names:
    233               key = '%s %s' % (section.name, function.name)
    234               d[key] = function.count
    235               # Compute a factor to scale the function count by in common_only
    236               # mode.
    237               if self._common_only and (
    238                   function.name in self._common_function_names[section.name]):
    239                 d[key + ' scaled'] = common_scaling_factor * function.count
    240         dicts.append(d)
    241 
    242       all_dicts.append(dicts)
    243 
    244     mytabulator = Tabulator(all_dicts)
    245     mytabulator.PrintTable()
    246 
    247   def _FindAllSections(self):
    248     sections = {}
    249     for report in self._reports:
    250       for section in report.sections.values():
    251         if section.name not in sections:
    252           sections[section.name] = section.count
    253         else:
    254           sections[section.name] = max(sections[section.name], section.count)
    255     return _SortDictionaryByValue(sections)
    256 
    257   def _GetCommonScalingFactor(self, section):
    258     unique_count = self._GetCount(
    259         section, lambda x: x in self._common_function_names[section.name])
    260     return 100.0 / unique_count
    261 
    262   def _GetCount(self, section, filter_fun=None):
    263     total_count = 0
    264     for function in section.functions:
    265       if not filter_fun or filter_fun(function.name):
    266         total_count += int(function.count)
    267     return total_count
    268 
    269   def _FindCommonFunctions(self, section_name):
    270     function_names_list = []
    271     for report in self._reports:
    272       if section_name in report.sections:
    273         section = report.sections[section_name]
    274         function_names = [f.name for f in section.functions]
    275         function_names_list.append(function_names)
    276 
    277     self._common_function_names[section_name] = (
    278         reduce(set.intersection, map(set, function_names_list)))
    279 
    280   def _GetTopFunctions(self, section_name, num_functions):
    281     all_functions = {}
    282     for report in self._reports:
    283       if section_name in report.sections:
    284         section = report.sections[section_name]
    285         for f in section.functions[:num_functions]:
    286           if f.name in all_functions:
    287             all_functions[f.name] = max(all_functions[f.name], f.count)
    288           else:
    289             all_functions[f.name] = f.count
    290     # FIXME(asharif): Don't really need to sort these...
    291     return _SortDictionaryByValue(all_functions)
    292 
    293   def _GetFunctionsDict(self, section, function_names):
    294     d = {}
    295     for function in section.functions:
    296       if function.name in function_names:
    297         d[function.name] = function.count
    298     return d
    299 
    300 
    301 def Main(argv):
    302   """The entry of the main."""
    303   parser = argparse.ArgumentParser()
    304   parser.add_argument('-n',
    305                       '--num_symbols',
    306                       dest='num_symbols',
    307                       default='5',
    308                       help='The number of symbols to show.')
    309   parser.add_argument('-c',
    310                       '--common_only',
    311                       dest='common_only',
    312                       action='store_true',
    313                       default=False,
    314                       help='Diff common symbols only.')
    315 
    316   options, args = parser.parse_known_args(argv)
    317 
    318   try:
    319     reports = []
    320     for report in args[1:]:
    321       report = PerfReport(report)
    322       reports.append(report)
    323     pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
    324     pd.DoDiff()
    325   finally:
    326     pass
    327 
    328   return 0
    329 
    330 
    331 if __name__ == '__main__':
    332   sys.exit(Main(sys.argv))
    333