Home | History | Annotate | Download | only in histograms
      1 # Copyright 2013 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Scans the Chromium source for histograms that are absent from histograms.xml.
      6 
      7 This is a heuristic scan, so a clean run of this script does not guarantee that
      8 all histograms in the Chromium source are properly mapped.  Notably, field
      9 trials are entirely ignored by this script.
     10 
     11 """
     12 
     13 import commands
     14 import extract_histograms
     15 import logging
     16 import optparse
     17 import os
     18 import re
     19 import sys
     20 
     21 
     22 ADJACENT_C_STRING_REGEX = re.compile(r"""
     23     ("      # Opening quotation mark
     24     [^"]*)  # Literal string contents
     25     "       # Closing quotation mark
     26     \s*     # Any number of spaces
     27     "       # Another opening quotation mark
     28     """, re.VERBOSE)
     29 CONSTANT_REGEX = re.compile(r"""
     30     (\w*::)?  # Optional namespace
     31     k[A-Z]    # Match a constant identifier: 'k' followed by an uppercase letter
     32     \w*       # Match the rest of the constant identifier
     33     $         # Make sure there's only the identifier, nothing else
     34     """, re.VERBOSE)
     35 HISTOGRAM_REGEX = re.compile(r"""
     36     UMA_HISTOGRAM  # Match the shared prefix for standard UMA histogram macros
     37     \w*            # Match the rest of the macro name, e.g. '_ENUMERATION'
     38     \(             # Match the opening parenthesis for the macro
     39     \s*            # Match any whitespace -- especially, any newlines
     40     ([^,]*)        # Capture the first parameter to the macro
     41     ,              # Match the comma that delineates the first parameter
     42     """, re.VERBOSE)
     43 
     44 
     45 class DirectoryNotFoundException(Exception):
     46   """Base class to distinguish locally defined exceptions from standard ones."""
     47   def __init__(self, msg):
     48     self.msg = msg
     49 
     50   def __str__(self):
     51     return self.msg
     52 
     53 
     54 def changeWorkingDirectory(target_directory):
     55   """Changes the working directory to the given |target_directory|, which
     56   defaults to the root of the Chromium checkout.
     57 
     58   Returns:
     59     None
     60 
     61   Raises:
     62     DirectoryNotFoundException if the target directory cannot be found.
     63   """
     64   working_directory = os.getcwd()
     65   pos = working_directory.find(target_directory)
     66   if pos < 0:
     67     raise DirectoryNotFoundException('Could not find root directory "' +
     68                                      target_directory + '".  ' +
     69                                      'Please run this script within your ' +
     70                                      'Chromium checkout.')
     71 
     72   os.chdir(working_directory[:pos + len(target_directory)])
     73 
     74 
     75 def collapseAdjacentCStrings(string):
     76   """Collapses any adjacent C strings into a single string.
     77 
     78   Useful to re-combine strings that were split across multiple lines to satisfy
     79   the 80-col restriction.
     80 
     81   Args:
     82     string: The string to recombine, e.g. '"Foo"\n    "bar"'
     83 
     84   Returns:
     85     The collapsed string, e.g. "Foobar" for an input of '"Foo"\n    "bar"'
     86   """
     87   while True:
     88     collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1)
     89     if collapsed == string:
     90       return collapsed
     91 
     92     string = collapsed
     93 
     94 
     95 def logNonLiteralHistogram(filename, histogram):
     96   """Logs a statement warning about a non-literal histogram name found in the
     97   Chromium source.
     98 
     99   Filters out known acceptable exceptions.
    100 
    101   Args:
    102     filename: The filename for the file containing the histogram, e.g.
    103               'chrome/browser/memory_details.cc'
    104     histogram: The expression that evaluates to the name of the histogram, e.g.
    105                '"FakeHistogram" + variant'
    106 
    107   Returns:
    108     None
    109   """
    110   # Ignore histogram macros, which typically contain backslashes so that they
    111   # can be formatted across lines.
    112   if '\\' in histogram:
    113     return
    114 
    115   # Field trials are unique within a session, so are effectively constants.
    116   if histogram.startswith('base::FieldTrial::MakeName'):
    117     return
    118 
    119   # Ignore histogram names that have been pulled out into C++ constants.
    120   if CONSTANT_REGEX.match(histogram):
    121     return
    122 
    123   # TODO(isherman): This is still a little noisy... needs further filtering to
    124   # reduce the noise.
    125   logging.warning('%s contains non-literal histogram name <%s>', filename,
    126                   histogram)
    127 
    128 
    129 def readChromiumHistograms():
    130   """Searches the Chromium source for all histogram names.
    131 
    132   Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with
    133   names that might vary during a single run of the app.
    134 
    135   Returns:
    136     A set cotaining any found literal histogram names.
    137   """
    138   logging.info('Scanning Chromium source for histograms...')
    139 
    140   # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros.
    141   # Examples:
    142   #   'path/to/foo.cc:420:  UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",'
    143   #   'path/to/bar.cc:632:  UMA_HISTOGRAM_ENUMERATION('
    144   locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n')
    145   filenames = set([location.split(':')[0] for location in locations])
    146 
    147   histograms = set()
    148   for filename in filenames:
    149     contents = ''
    150     with open(filename, 'r') as f:
    151       contents = f.read()
    152 
    153     matches = set(HISTOGRAM_REGEX.findall(contents))
    154     for histogram in matches:
    155       histogram = collapseAdjacentCStrings(histogram)
    156 
    157       # Must begin and end with a quotation mark.
    158       if histogram[0] != '"' or histogram[-1] != '"':
    159         logNonLiteralHistogram(filename, histogram)
    160         continue
    161 
    162       # Must not include any quotation marks other than at the beginning or end.
    163       histogram_stripped = histogram.strip('"')
    164       if '"' in histogram_stripped:
    165         logNonLiteralHistogram(filename, histogram)
    166         continue
    167 
    168       histograms.add(histogram_stripped)
    169 
    170   return histograms
    171 
    172 
    173 def readXmlHistograms(histograms_file_location):
    174   """Parses all histogram names from histograms.xml.
    175 
    176   Returns:
    177     A set cotaining the parsed histogram names.
    178   """
    179   logging.info('Reading histograms from %s...' % histograms_file_location)
    180   histograms = extract_histograms.ExtractHistograms(histograms_file_location)
    181   return set(extract_histograms.ExtractNames(histograms))
    182 
    183 
    184 def main():
    185   # Parse command line options
    186   parser = optparse.OptionParser()
    187   parser.add_option(
    188     '--root-directory', dest='root_directory', default='src',
    189     help='scan within DIRECTORY for histograms [optional, defaults to "src/"]',
    190     metavar='DIRECTORY')
    191   parser.add_option(
    192     '--histograms-file', dest='histograms_file_location',
    193     default='tools/metrics/histograms/histograms.xml',
    194     help='read histogram definitions from FILE (relative to --root-directory) '
    195          '[optional, defaults to "tools/histograms/histograms.xml"]',
    196     metavar='FILE')
    197 
    198   (options, args) = parser.parse_args()
    199   if args:
    200     parser.print_help()
    201     sys.exit(1)
    202 
    203   logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
    204 
    205   try:
    206     changeWorkingDirectory(options.root_directory)
    207   except DirectoryNotFoundException as e:
    208     logging.error(e)
    209     sys.exit(1)
    210   chromium_histograms = readChromiumHistograms()
    211   xml_histograms = readXmlHistograms(options.histograms_file_location)
    212 
    213   unmapped_histograms = sorted(chromium_histograms - xml_histograms)
    214   if len(unmapped_histograms):
    215     logging.info('')
    216     logging.info('')
    217     logging.info('Histograms in Chromium but not in %s:' %
    218                  options.histograms_file_location)
    219     logging.info('-------------------------------------------------')
    220     for histogram in unmapped_histograms:
    221       logging.info('  %s', histogram)
    222   else:
    223     logging.info('Success!  No unmapped histograms found.')
    224 
    225 
    226 if __name__ == '__main__':
    227   main()
    228