Home | History | Annotate | Download | only in histograms
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Scans the Chromium source for histograms that are absent from histograms.xml.
      6 
      7 This is a heuristic scan, so a clean run of this script does not guarantee that
      8 all histograms in the Chromium source are properly mapped.  Notably, field
      9 trials are entirely ignored by this script.
     10 
     11 """
     12 
     13 import commands
     14 import extract_histograms
     15 import hashlib
     16 import logging
     17 import optparse
     18 import os
     19 import re
     20 import sys
     21 
     22 
     23 ADJACENT_C_STRING_REGEX = re.compile(r"""
     24     ("      # Opening quotation mark
     25     [^"]*)  # Literal string contents
     26     "       # Closing quotation mark
     27     \s*     # Any number of spaces
     28     "       # Another opening quotation mark
     29     """, re.VERBOSE)
     30 CONSTANT_REGEX = re.compile(r"""
     31     (\w*::)?  # Optional namespace
     32     k[A-Z]    # Match a constant identifier: 'k' followed by an uppercase letter
     33     \w*       # Match the rest of the constant identifier
     34     $         # Make sure there's only the identifier, nothing else
     35     """, re.VERBOSE)
     36 HISTOGRAM_REGEX = re.compile(r"""
     37     UMA_HISTOGRAM  # Match the shared prefix for standard UMA histogram macros
     38     \w*            # Match the rest of the macro name, e.g. '_ENUMERATION'
     39     \(             # Match the opening parenthesis for the macro
     40     \s*            # Match any whitespace -- especially, any newlines
     41     ([^,)]*)       # Capture the first parameter to the macro
     42     [,)]           # Match the comma/paren that delineates the first parameter
     43     """, re.VERBOSE)
     44 
     45 
     46 class DirectoryNotFoundException(Exception):
     47   """Base class to distinguish locally defined exceptions from standard ones."""
     48   def __init__(self, msg):
     49     self.msg = msg
     50 
     51   def __str__(self):
     52     return self.msg
     53 
     54 
     55 def findDefaultRoot():
     56   """Find the root of the chromium repo, in case the script is run from the
     57   histograms dir.
     58 
     59   Returns:
     60     string: path to the src dir of the repo.
     61 
     62   Raises:
     63     DirectoryNotFoundException if the target directory cannot be found.
     64   """
     65   path = os.getcwd()
     66   while path:
     67     head, tail = os.path.split(path)
     68     if tail == 'src':
     69       return path
     70     if path == head:
     71       break
     72     path = head
     73   raise DirectoryNotFoundException('Could not find src/ dir')
     74 
     75 
     76 def collapseAdjacentCStrings(string):
     77   """Collapses any adjacent C strings into a single string.
     78 
     79   Useful to re-combine strings that were split across multiple lines to satisfy
     80   the 80-col restriction.
     81 
     82   Args:
     83     string: The string to recombine, e.g. '"Foo"\n    "bar"'
     84 
     85   Returns:
     86     The collapsed string, e.g. "Foobar" for an input of '"Foo"\n    "bar"'
     87   """
     88   while True:
     89     collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1)
     90     if collapsed == string:
     91       return collapsed
     92 
     93     string = collapsed
     94 
     95 
     96 def logNonLiteralHistogram(filename, histogram):
     97   """Logs a statement warning about a non-literal histogram name found in the
     98   Chromium source.
     99 
    100   Filters out known acceptable exceptions.
    101 
    102   Args:
    103     filename: The filename for the file containing the histogram, e.g.
    104               'chrome/browser/memory_details.cc'
    105     histogram: The expression that evaluates to the name of the histogram, e.g.
    106                '"FakeHistogram" + variant'
    107 
    108   Returns:
    109     None
    110   """
    111   # Ignore histogram macros, which typically contain backslashes so that they
    112   # can be formatted across lines.
    113   if '\\' in histogram:
    114     return
    115 
    116   # Ignore histogram names that have been pulled out into C++ constants.
    117   if CONSTANT_REGEX.match(histogram):
    118     return
    119 
    120   # TODO(isherman): This is still a little noisy... needs further filtering to
    121   # reduce the noise.
    122   logging.warning('%s contains non-literal histogram name <%s>', filename,
    123                   histogram)
    124 
    125 
    126 def readChromiumHistograms():
    127   """Searches the Chromium source for all histogram names.
    128 
    129   Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
    130   names that might vary during a single run of the app.
    131 
    132   Returns:
    133     A set cotaining any found literal histogram names.
    134   """
    135   logging.info('Scanning Chromium source for histograms...')
    136 
    137   # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
    138   # Examples:
    139   #   'path/to/foo.cc:420:  UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
    140   #   'path/to/bar.cc:632:  UMA_HISTOGRAM_ENUMERATION('
    141   locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n')
    142   filenames = set([location.split(':')[0] for location in locations])
    143 
    144   histograms = set()
    145   for filename in filenames:
    146     contents = ''
    147     with open(filename, 'r') as f:
    148       contents = f.read()
    149 
    150     matches = set(HISTOGRAM_REGEX.findall(contents))
    151     for histogram in matches:
    152       histogram = collapseAdjacentCStrings(histogram)
    153 
    154       # Must begin and end with a quotation mark.
    155       if histogram[0] != '"' or histogram[-1] != '"':
    156         logNonLiteralHistogram(filename, histogram)
    157         continue
    158 
    159       # Must not include any quotation marks other than at the beginning or end.
    160       histogram_stripped = histogram.strip('"')
    161       if '"' in histogram_stripped:
    162         logNonLiteralHistogram(filename, histogram)
    163         continue
    164 
    165       histograms.add(histogram_stripped)
    166 
    167   return histograms
    168 
    169 
    170 def readXmlHistograms(histograms_file_location):
    171   """Parses all histogram names from histograms.xml.
    172 
    173   Returns:
    174     A set cotaining the parsed histogram names.
    175   """
    176   logging.info('Reading histograms from %s...' % histograms_file_location)
    177   histograms = extract_histograms.ExtractHistograms(histograms_file_location)
    178   return set(extract_histograms.ExtractNames(histograms))
    179 
    180 
    181 def hashHistogramName(name):
    182   """Computes the hash of a histogram name.
    183 
    184   Args:
    185     name: The string to hash (a histogram name).
    186 
    187   Returns:
    188     Histogram hash as a string representing a hex number (with leading 0x).
    189   """
    190   return '0x' + hashlib.md5(name).hexdigest()[:16]
    191 
    192 
    193 def main():
    194   # Find default paths.
    195   default_root = findDefaultRoot()
    196   default_histograms_path = os.path.join(
    197       default_root, 'tools/metrics/histograms/histograms.xml')
    198   default_extra_histograms_path = os.path.join(
    199       default_root, 'tools/histograms/histograms.xml')
    200 
    201   # Parse command line options
    202   parser = optparse.OptionParser()
    203   parser.add_option(
    204     '--root-directory', dest='root_directory', default=default_root,
    205     help='scan within DIRECTORY for histograms [optional, defaults to "%s"]' %
    206         default_root,
    207     metavar='DIRECTORY')
    208   parser.add_option(
    209     '--histograms-file', dest='histograms_file_location',
    210     default=default_histograms_path,
    211     help='read histogram definitions from FILE (relative to --root-directory) '
    212          '[optional, defaults to "%s"]' % default_histograms_path,
    213     metavar='FILE')
    214   parser.add_option(
    215     '--exrta_histograms-file', dest='extra_histograms_file_location',
    216     default=default_extra_histograms_path,
    217     help='read additional histogram definitions from FILE (relative to '
    218          '--root-directory) [optional, defaults to "%s"]' %
    219          default_extra_histograms_path,
    220     metavar='FILE')
    221 
    222   (options, args) = parser.parse_args()
    223   if args:
    224     parser.print_help()
    225     sys.exit(1)
    226 
    227   logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
    228 
    229   try:
    230     os.chdir(options.root_directory)
    231   except EnvironmentError as e:
    232     logging.error("Could not change to root directory: %s", e)
    233     sys.exit(1)
    234   chromium_histograms = readChromiumHistograms()
    235   xml_histograms = readXmlHistograms(options.histograms_file_location)
    236   unmapped_histograms = chromium_histograms - xml_histograms
    237 
    238   if os.path.isfile(options.extra_histograms_file_location):
    239     xml_histograms2 = readXmlHistograms(options.extra_histograms_file_location)
    240     unmapped_histograms -= xml_histograms2
    241   else:
    242     logging.warning('No such file: %s', options.extra_histograms_file_location)
    243 
    244   if len(unmapped_histograms):
    245     logging.info('')
    246     logging.info('')
    247     logging.info('Histograms in Chromium but not in XML files:')
    248     logging.info('-------------------------------------------------')
    249     for histogram in sorted(unmapped_histograms):
    250       logging.info('  %s - %s', histogram, hashHistogramName(histogram))
    251   else:
    252     logging.info('Success!  No unmapped histograms found.')
    253 
    254 
    255 if __name__ == '__main__':
    256   main()
    257