1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Scans the Chromium source for histograms that are absent from histograms.xml. 6 7 This is a heuristic scan, so a clean run of this script does not guarantee that 8 all histograms in the Chromium source are properly mapped. Notably, field 9 trials are entirely ignored by this script. 10 11 """ 12 13 import commands 14 import extract_histograms 15 import logging 16 import optparse 17 import os 18 import re 19 import sys 20 21 22 ADJACENT_C_STRING_REGEX = re.compile(r""" 23 (" # Opening quotation mark 24 [^"]*) # Literal string contents 25 " # Closing quotation mark 26 \s* # Any number of spaces 27 " # Another opening quotation mark 28 """, re.VERBOSE) 29 CONSTANT_REGEX = re.compile(r""" 30 (\w*::)? # Optional namespace 31 k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter 32 \w* # Match the rest of the constant identifier 33 $ # Make sure there's only the identifier, nothing else 34 """, re.VERBOSE) 35 HISTOGRAM_REGEX = re.compile(r""" 36 UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros 37 \w* # Match the rest of the macro name, e.g. '_ENUMERATION' 38 \( # Match the opening parenthesis for the macro 39 \s* # Match any whitespace -- especially, any newlines 40 ([^,]*) # Capture the first parameter to the macro 41 , # Match the comma that delineates the first parameter 42 """, re.VERBOSE) 43 44 45 class DirectoryNotFoundException(Exception): 46 """Base class to distinguish locally defined exceptions from standard ones.""" 47 def __init__(self, msg): 48 self.msg = msg 49 50 def __str__(self): 51 return self.msg 52 53 54 def changeWorkingDirectory(target_directory): 55 """Changes the working directory to the given |target_directory|, which 56 defaults to the root of the Chromium checkout. 57 58 Returns: 59 None 60 61 Raises: 62 DirectoryNotFoundException if the target directory cannot be found. 63 """ 64 working_directory = os.getcwd() 65 pos = working_directory.find(target_directory) 66 if pos < 0: 67 raise DirectoryNotFoundException('Could not find root directory "' + 68 target_directory + '". ' + 69 'Please run this script within your ' + 70 'Chromium checkout.') 71 72 os.chdir(working_directory[:pos + len(target_directory)]) 73 74 75 def collapseAdjacentCStrings(string): 76 """Collapses any adjacent C strings into a single string. 77 78 Useful to re-combine strings that were split across multiple lines to satisfy 79 the 80-col restriction. 80 81 Args: 82 string: The string to recombine, e.g. '"Foo"\n "bar"' 83 84 Returns: 85 The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"' 86 """ 87 while True: 88 collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1) 89 if collapsed == string: 90 return collapsed 91 92 string = collapsed 93 94 95 def logNonLiteralHistogram(filename, histogram): 96 """Logs a statement warning about a non-literal histogram name found in the 97 Chromium source. 98 99 Filters out known acceptable exceptions. 100 101 Args: 102 filename: The filename for the file containing the histogram, e.g. 103 'chrome/browser/memory_details.cc' 104 histogram: The expression that evaluates to the name of the histogram, e.g. 105 '"FakeHistogram" + variant' 106 107 Returns: 108 None 109 """ 110 # Ignore histogram macros, which typically contain backslashes so that they 111 # can be formatted across lines. 112 if '\\' in histogram: 113 return 114 115 # Field trials are unique within a session, so are effectively constants. 116 if histogram.startswith('base::FieldTrial::MakeName'): 117 return 118 119 # Ignore histogram names that have been pulled out into C++ constants. 120 if CONSTANT_REGEX.match(histogram): 121 return 122 123 # TODO(isherman): This is still a little noisy... needs further filtering to 124 # reduce the noise. 125 logging.warning('%s contains non-literal histogram name <%s>', filename, 126 histogram) 127 128 129 def readChromiumHistograms(): 130 """Searches the Chromium source for all histogram names. 131 132 Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with 133 names that might vary during a single run of the app. 134 135 Returns: 136 A set cotaining any found literal histogram names. 137 """ 138 logging.info('Scanning Chromium source for histograms...') 139 140 # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros. 141 # Examples: 142 # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",' 143 # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION(' 144 locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n') 145 filenames = set([location.split(':')[0] for location in locations]) 146 147 histograms = set() 148 for filename in filenames: 149 contents = '' 150 with open(filename, 'r') as f: 151 contents = f.read() 152 153 matches = set(HISTOGRAM_REGEX.findall(contents)) 154 for histogram in matches: 155 histogram = collapseAdjacentCStrings(histogram) 156 157 # Must begin and end with a quotation mark. 158 if histogram[0] != '"' or histogram[-1] != '"': 159 logNonLiteralHistogram(filename, histogram) 160 continue 161 162 # Must not include any quotation marks other than at the beginning or end. 163 histogram_stripped = histogram.strip('"') 164 if '"' in histogram_stripped: 165 logNonLiteralHistogram(filename, histogram) 166 continue 167 168 histograms.add(histogram_stripped) 169 170 return histograms 171 172 173 def readXmlHistograms(histograms_file_location): 174 """Parses all histogram names from histograms.xml. 175 176 Returns: 177 A set cotaining the parsed histogram names. 178 """ 179 logging.info('Reading histograms from %s...' % histograms_file_location) 180 histograms = extract_histograms.ExtractHistograms(histograms_file_location) 181 return set(extract_histograms.ExtractNames(histograms)) 182 183 184 def main(): 185 # Parse command line options 186 parser = optparse.OptionParser() 187 parser.add_option( 188 '--root-directory', dest='root_directory', default='src', 189 help='scan within DIRECTORY for histograms [optional, defaults to "src/"]', 190 metavar='DIRECTORY') 191 parser.add_option( 192 '--histograms-file', dest='histograms_file_location', 193 default='tools/metrics/histograms/histograms.xml', 194 help='read histogram definitions from FILE (relative to --root-directory) ' 195 '[optional, defaults to "tools/histograms/histograms.xml"]', 196 metavar='FILE') 197 198 (options, args) = parser.parse_args() 199 if args: 200 parser.print_help() 201 sys.exit(1) 202 203 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) 204 205 try: 206 changeWorkingDirectory(options.root_directory) 207 except DirectoryNotFoundException as e: 208 logging.error(e) 209 sys.exit(1) 210 chromium_histograms = readChromiumHistograms() 211 xml_histograms = readXmlHistograms(options.histograms_file_location) 212 213 unmapped_histograms = sorted(chromium_histograms - xml_histograms) 214 if len(unmapped_histograms): 215 logging.info('') 216 logging.info('') 217 logging.info('Histograms in Chromium but not in %s:' % 218 options.histograms_file_location) 219 logging.info('-------------------------------------------------') 220 for histogram in unmapped_histograms: 221 logging.info(' %s', histogram) 222 else: 223 logging.info('Success! No unmapped histograms found.') 224 225 226 if __name__ == '__main__': 227 main() 228