1 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Scans the Chromium source for histograms that are absent from histograms.xml. 6 7 This is a heuristic scan, so a clean run of this script does not guarantee that 8 all histograms in the Chromium source are properly mapped. Notably, field 9 trials are entirely ignored by this script. 10 11 """ 12 13 import commands 14 import extract_histograms 15 import hashlib 16 import logging 17 import optparse 18 import os 19 import re 20 import sys 21 22 23 ADJACENT_C_STRING_REGEX = re.compile(r""" 24 (" # Opening quotation mark 25 [^"]*) # Literal string contents 26 " # Closing quotation mark 27 \s* # Any number of spaces 28 " # Another opening quotation mark 29 """, re.VERBOSE) 30 CONSTANT_REGEX = re.compile(r""" 31 (\w*::)? # Optional namespace 32 k[A-Z] # Match a constant identifier: 'k' followed by an uppercase letter 33 \w* # Match the rest of the constant identifier 34 $ # Make sure there's only the identifier, nothing else 35 """, re.VERBOSE) 36 HISTOGRAM_REGEX = re.compile(r""" 37 UMA_HISTOGRAM # Match the shared prefix for standard UMA histogram macros 38 \w* # Match the rest of the macro name, e.g. '_ENUMERATION' 39 \( # Match the opening parenthesis for the macro 40 \s* # Match any whitespace -- especially, any newlines 41 ([^,)]*) # Capture the first parameter to the macro 42 [,)] # Match the comma/paren that delineates the first parameter 43 """, re.VERBOSE) 44 45 46 class DirectoryNotFoundException(Exception): 47 """Base class to distinguish locally defined exceptions from standard ones.""" 48 def __init__(self, msg): 49 self.msg = msg 50 51 def __str__(self): 52 return self.msg 53 54 55 def findDefaultRoot(): 56 """Find the root of the chromium repo, in case the script is run from the 57 histograms dir. 58 59 Returns: 60 string: path to the src dir of the repo. 61 62 Raises: 63 DirectoryNotFoundException if the target directory cannot be found. 64 """ 65 path = os.getcwd() 66 while path: 67 head, tail = os.path.split(path) 68 if tail == 'src': 69 return path 70 if path == head: 71 break 72 path = head 73 raise DirectoryNotFoundException('Could not find src/ dir') 74 75 76 def collapseAdjacentCStrings(string): 77 """Collapses any adjacent C strings into a single string. 78 79 Useful to re-combine strings that were split across multiple lines to satisfy 80 the 80-col restriction. 81 82 Args: 83 string: The string to recombine, e.g. '"Foo"\n "bar"' 84 85 Returns: 86 The collapsed string, e.g. "Foobar" for an input of '"Foo"\n "bar"' 87 """ 88 while True: 89 collapsed = ADJACENT_C_STRING_REGEX.sub(r'\1', string, count=1) 90 if collapsed == string: 91 return collapsed 92 93 string = collapsed 94 95 96 def logNonLiteralHistogram(filename, histogram): 97 """Logs a statement warning about a non-literal histogram name found in the 98 Chromium source. 99 100 Filters out known acceptable exceptions. 101 102 Args: 103 filename: The filename for the file containing the histogram, e.g. 104 'chrome/browser/memory_details.cc' 105 histogram: The expression that evaluates to the name of the histogram, e.g. 106 '"FakeHistogram" + variant' 107 108 Returns: 109 None 110 """ 111 # Ignore histogram macros, which typically contain backslashes so that they 112 # can be formatted across lines. 113 if '\\' in histogram: 114 return 115 116 # Ignore histogram names that have been pulled out into C++ constants. 117 if CONSTANT_REGEX.match(histogram): 118 return 119 120 # TODO(isherman): This is still a little noisy... needs further filtering to 121 # reduce the noise. 122 logging.warning('%s contains non-literal histogram name <%s>', filename, 123 histogram) 124 125 126 def readChromiumHistograms(): 127 """Searches the Chromium source for all histogram names. 128 129 Also prints warnings for any invocations of the UMA_HISTOGRAM_* macros with 130 names that might vary during a single run of the app. 131 132 Returns: 133 A set cotaining any found literal histogram names. 134 """ 135 logging.info('Scanning Chromium source for histograms...') 136 137 # Use git grep to find all invocations of the UMA_HISTOGRAM_* macros. 138 # Examples: 139 # 'path/to/foo.cc:420: UMA_HISTOGRAM_COUNTS_100("FooGroup.FooName",' 140 # 'path/to/bar.cc:632: UMA_HISTOGRAM_ENUMERATION(' 141 locations = commands.getoutput('git gs UMA_HISTOGRAM').split('\n') 142 filenames = set([location.split(':')[0] for location in locations]) 143 144 histograms = set() 145 for filename in filenames: 146 contents = '' 147 with open(filename, 'r') as f: 148 contents = f.read() 149 150 matches = set(HISTOGRAM_REGEX.findall(contents)) 151 for histogram in matches: 152 histogram = collapseAdjacentCStrings(histogram) 153 154 # Must begin and end with a quotation mark. 155 if histogram[0] != '"' or histogram[-1] != '"': 156 logNonLiteralHistogram(filename, histogram) 157 continue 158 159 # Must not include any quotation marks other than at the beginning or end. 160 histogram_stripped = histogram.strip('"') 161 if '"' in histogram_stripped: 162 logNonLiteralHistogram(filename, histogram) 163 continue 164 165 histograms.add(histogram_stripped) 166 167 return histograms 168 169 170 def readXmlHistograms(histograms_file_location): 171 """Parses all histogram names from histograms.xml. 172 173 Returns: 174 A set cotaining the parsed histogram names. 175 """ 176 logging.info('Reading histograms from %s...' % histograms_file_location) 177 histograms = extract_histograms.ExtractHistograms(histograms_file_location) 178 return set(extract_histograms.ExtractNames(histograms)) 179 180 181 def hashHistogramName(name): 182 """Computes the hash of a histogram name. 183 184 Args: 185 name: The string to hash (a histogram name). 186 187 Returns: 188 Histogram hash as a string representing a hex number (with leading 0x). 189 """ 190 return '0x' + hashlib.md5(name).hexdigest()[:16] 191 192 193 def main(): 194 # Find default paths. 195 default_root = findDefaultRoot() 196 default_histograms_path = os.path.join( 197 default_root, 'tools/metrics/histograms/histograms.xml') 198 default_extra_histograms_path = os.path.join( 199 default_root, 'tools/histograms/histograms.xml') 200 201 # Parse command line options 202 parser = optparse.OptionParser() 203 parser.add_option( 204 '--root-directory', dest='root_directory', default=default_root, 205 help='scan within DIRECTORY for histograms [optional, defaults to "%s"]' % 206 default_root, 207 metavar='DIRECTORY') 208 parser.add_option( 209 '--histograms-file', dest='histograms_file_location', 210 default=default_histograms_path, 211 help='read histogram definitions from FILE (relative to --root-directory) ' 212 '[optional, defaults to "%s"]' % default_histograms_path, 213 metavar='FILE') 214 parser.add_option( 215 '--exrta_histograms-file', dest='extra_histograms_file_location', 216 default=default_extra_histograms_path, 217 help='read additional histogram definitions from FILE (relative to ' 218 '--root-directory) [optional, defaults to "%s"]' % 219 default_extra_histograms_path, 220 metavar='FILE') 221 222 (options, args) = parser.parse_args() 223 if args: 224 parser.print_help() 225 sys.exit(1) 226 227 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) 228 229 try: 230 os.chdir(options.root_directory) 231 except EnvironmentError as e: 232 logging.error("Could not change to root directory: %s", e) 233 sys.exit(1) 234 chromium_histograms = readChromiumHistograms() 235 xml_histograms = readXmlHistograms(options.histograms_file_location) 236 unmapped_histograms = chromium_histograms - xml_histograms 237 238 if os.path.isfile(options.extra_histograms_file_location): 239 xml_histograms2 = readXmlHistograms(options.extra_histograms_file_location) 240 unmapped_histograms -= xml_histograms2 241 else: 242 logging.warning('No such file: %s', options.extra_histograms_file_location) 243 244 if len(unmapped_histograms): 245 logging.info('') 246 logging.info('') 247 logging.info('Histograms in Chromium but not in XML files:') 248 logging.info('-------------------------------------------------') 249 for histogram in sorted(unmapped_histograms): 250 logging.info(' %s - %s', histogram, hashHistogramName(histogram)) 251 else: 252 logging.info('Success! No unmapped histograms found.') 253 254 255 if __name__ == '__main__': 256 main() 257