1 #!/usr/bin/env python 2 3 """ 4 CmpRuns - A simple tool for comparing two static analyzer runs to determine 5 which reports have been added, removed, or changed. 6 7 This is designed to support automated testing using the static analyzer, from 8 two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, for 10 regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14 15 Usage: 16 17 # Load the results of both runs, to obtain lists of the corresponding 18 # AnalysisDiagnostic objects. 19 # 20 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty) 21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty) 22 23 # Generate a relation from diagnostics in run A to diagnostics in run B 24 # to obtain a list of triples (a, b, confidence). 25 diff = compareResults(resultsA, resultsB) 26 27 """ 28 29 import os 30 import plistlib 31 import CmpRuns 32 33 # Information about analysis run: 34 # path - the analysis output directory 35 # root - the name of the root directory, which will be disregarded when 36 # determining the source file name 37 class SingleRunInfo: 38 def __init__(self, path, root="", verboseLog=None): 39 self.path = path 40 self.root = root 41 self.verboseLog = verboseLog 42 43 class AnalysisDiagnostic: 44 def __init__(self, data, report, htmlReport): 45 self._data = data 46 self._loc = self._data['location'] 47 self._report = report 48 self._htmlReport = htmlReport 49 50 def getFileName(self): 51 root = self._report.run.root 52 fileName = self._report.files[self._loc['file']] 53 if fileName.startswith(root) : 54 return fileName[len(root):] 55 return fileName 56 57 def getLine(self): 58 return self._loc['line'] 59 60 def getColumn(self): 61 return self._loc['col'] 62 63 def getCategory(self): 64 return self._data['category'] 65 66 def getDescription(self): 67 return self._data['description'] 68 69 def getIssueIdentifier(self) : 70 id = self.getFileName() + "+" 71 if 'issue_context' in self._data : 72 id += self._data['issue_context'] + "+" 73 if 'issue_hash' in self._data : 74 id += str(self._data['issue_hash']) 75 return id 76 77 def getReport(self): 78 if self._htmlReport is None: 79 return " " 80 return os.path.join(self._report.run.path, self._htmlReport) 81 82 def getReadableName(self): 83 return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 84 self.getColumn(), self.getCategory(), 85 self.getDescription()) 86 87 # Note, the data format is not an API and may change from one analyzer 88 # version to another. 89 def getRawData(self): 90 return self._data 91 92 class multidict: 93 def __init__(self, elts=()): 94 self.data = {} 95 for key,value in elts: 96 self[key] = value 97 98 def __getitem__(self, item): 99 return self.data[item] 100 def __setitem__(self, key, value): 101 if key in self.data: 102 self.data[key].append(value) 103 else: 104 self.data[key] = [value] 105 def items(self): 106 return self.data.items() 107 def values(self): 108 return self.data.values() 109 def keys(self): 110 return self.data.keys() 111 def __len__(self): 112 return len(self.data) 113 def get(self, key, default=None): 114 return self.data.get(key, default) 115 116 class CmpOptions: 117 def __init__(self, verboseLog=None, rootA="", rootB=""): 118 self.rootA = rootA 119 self.rootB = rootB 120 self.verboseLog = verboseLog 121 122 class AnalysisReport: 123 def __init__(self, run, files): 124 self.run = run 125 self.files = files 126 self.diagnostics = [] 127 128 class AnalysisRun: 129 def __init__(self, info): 130 self.path = info.path 131 self.root = info.root 132 self.info = info 133 self.reports = [] 134 # Cumulative list of all diagnostics from all the reports. 135 self.diagnostics = [] 136 self.clang_version = None 137 138 def getClangVersion(self): 139 return self.clang_version 140 141 142 # Backward compatibility API. 143 def loadResults(path, opts, root = "", deleteEmpty=True): 144 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog), 145 deleteEmpty) 146 147 # Load results of the analyzes from a given output folder. 148 # - info is the SingleRunInfo object 149 # - deleteEmpty specifies if the empty plist files should be deleted 150 def loadResultsFromSingleRun(info, deleteEmpty=True): 151 path = info.path 152 run = AnalysisRun(info) 153 154 for (dirpath, dirnames, filenames) in os.walk(path): 155 for f in filenames: 156 if (not f.endswith('plist')): 157 continue 158 159 p = os.path.join(dirpath, f) 160 data = plistlib.readPlist(p) 161 162 # We want to retrieve the clang version even if there are no 163 # reports. Assume that all reports were created using the same 164 # clang version (this is always true and is more efficient). 165 if ('clang_version' in data) : 166 if (run.clang_version == None) : 167 run.clang_version = data.pop('clang_version') 168 else: 169 data.pop('clang_version') 170 171 # Ignore/delete empty reports. 172 if not data['files']: 173 if deleteEmpty == True: 174 os.remove(p) 175 continue 176 177 # Extract the HTML reports, if they exists. 178 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 179 htmlFiles = [] 180 for d in data['diagnostics']: 181 # FIXME: Why is this named files, when does it have multiple 182 # files? 183 assert len(d['HTMLDiagnostics_files']) == 1 184 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 185 else: 186 htmlFiles = [None] * len(data['diagnostics']) 187 188 report = AnalysisReport(run, data.pop('files')) 189 diagnostics = [AnalysisDiagnostic(d, report, h) 190 for d,h in zip(data.pop('diagnostics'), 191 htmlFiles)] 192 193 assert not data 194 195 report.diagnostics.extend(diagnostics) 196 run.reports.append(report) 197 run.diagnostics.extend(diagnostics) 198 199 return run 200 201 def cmpAnalysisDiagnostic(d) : 202 return d.getIssueIdentifier() 203 204 def compareResults(A, B): 205 """ 206 compareResults - Generate a relation from diagnostics in run A to 207 diagnostics in run B. 208 209 The result is the relation as a list of triples (a, b, confidence) where 210 each element {a,b} is None or an element from the respective run, and 211 confidence is a measure of the match quality (where 0 indicates equality, 212 and None is used if either element is None). 213 """ 214 215 res = [] 216 217 # Quickly eliminate equal elements. 218 neqA = [] 219 neqB = [] 220 eltsA = list(A.diagnostics) 221 eltsB = list(B.diagnostics) 222 eltsA.sort(key = cmpAnalysisDiagnostic) 223 eltsB.sort(key = cmpAnalysisDiagnostic) 224 while eltsA and eltsB: 225 a = eltsA.pop() 226 b = eltsB.pop() 227 if (a.getIssueIdentifier() == b.getIssueIdentifier()) : 228 res.append((a, b, 0)) 229 elif a.getIssueIdentifier() > b.getIssueIdentifier(): 230 eltsB.append(b) 231 neqA.append(a) 232 else: 233 eltsA.append(a) 234 neqB.append(b) 235 neqA.extend(eltsA) 236 neqB.extend(eltsB) 237 238 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 239 # to bin the diagnostics, print them in a normalized form (based solely on 240 # the structure of the diagnostic), compute the diff, then use that as the 241 # basis for matching. This has the nice property that we don't depend in any 242 # way on the diagnostic format. 243 244 for a in neqA: 245 res.append((a, None, None)) 246 for b in neqB: 247 res.append((None, b, None)) 248 249 return res 250 251 def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True): 252 # Load the run results. 253 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty) 254 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty) 255 256 # Open the verbose log, if given. 257 if opts.verboseLog: 258 auxLog = open(opts.verboseLog, "wb") 259 else: 260 auxLog = None 261 262 diff = compareResults(resultsA, resultsB) 263 foundDiffs = 0 264 for res in diff: 265 a,b,confidence = res 266 if a is None: 267 print "ADDED: %r" % b.getReadableName() 268 foundDiffs += 1 269 if auxLog: 270 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 271 b.getReport())) 272 elif b is None: 273 print "REMOVED: %r" % a.getReadableName() 274 foundDiffs += 1 275 if auxLog: 276 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 277 a.getReport())) 278 elif confidence: 279 print "CHANGED: %r to %r" % (a.getReadableName(), 280 b.getReadableName()) 281 foundDiffs += 1 282 if auxLog: 283 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 284 % (a.getReadableName(), 285 b.getReadableName(), 286 a.getReport(), 287 b.getReport())) 288 else: 289 pass 290 291 TotalReports = len(resultsB.diagnostics) 292 print "TOTAL REPORTS: %r" % TotalReports 293 print "TOTAL DIFFERENCES: %r" % foundDiffs 294 if auxLog: 295 print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports 296 print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs 297 298 return foundDiffs 299 300 def main(): 301 from optparse import OptionParser 302 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 303 parser.add_option("", "--rootA", dest="rootA", 304 help="Prefix to ignore on source files for directory A", 305 action="store", type=str, default="") 306 parser.add_option("", "--rootB", dest="rootB", 307 help="Prefix to ignore on source files for directory B", 308 action="store", type=str, default="") 309 parser.add_option("", "--verbose-log", dest="verboseLog", 310 help="Write additional information to LOG [default=None]", 311 action="store", type=str, default=None, 312 metavar="LOG") 313 (opts, args) = parser.parse_args() 314 315 if len(args) != 2: 316 parser.error("invalid number of arguments") 317 318 dirA,dirB = args 319 320 dumpScanBuildResultsDiff(dirA, dirB, opts) 321 322 if __name__ == '__main__': 323 main() 324