1 #!/usr/bin/env python 2 3 """ 4 CmpRuns - A simple tool for comparing two static analyzer runs to determine 5 which reports have been added, removed, or changed. 6 7 This is designed to support automated testing using the static analyzer, from 8 two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, for 10 regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14 """ 15 16 import os 17 import plistlib 18 19 # 20 21 class multidict: 22 def __init__(self, elts=()): 23 self.data = {} 24 for key,value in elts: 25 self[key] = value 26 27 def __getitem__(self, item): 28 return self.data[item] 29 def __setitem__(self, key, value): 30 if key in self.data: 31 self.data[key].append(value) 32 else: 33 self.data[key] = [value] 34 def items(self): 35 return self.data.items() 36 def values(self): 37 return self.data.values() 38 def keys(self): 39 return self.data.keys() 40 def __len__(self): 41 return len(self.data) 42 def get(self, key, default=None): 43 return self.data.get(key, default) 44 45 # 46 47 class AnalysisReport: 48 def __init__(self, run, files): 49 self.run = run 50 self.files = files 51 52 class AnalysisDiagnostic: 53 def __init__(self, data, report, htmlReport): 54 self.data = data 55 self.report = report 56 self.htmlReport = htmlReport 57 58 def getReadableName(self): 59 loc = self.data['location'] 60 filename = self.report.run.getSourceName(self.report.files[loc['file']]) 61 line = loc['line'] 62 column = loc['col'] 63 64 # FIXME: Get a report number based on this key, to 'distinguish' 65 # reports, or something. 66 67 return '%s:%d:%d' % (filename, line, column) 68 69 def getReportData(self): 70 if self.htmlReport is None: 71 return "This diagnostic does not have any report data." 72 73 return open(os.path.join(self.report.run.path, 74 self.htmlReport), "rb").read() 75 76 class AnalysisRun: 77 def __init__(self, path, opts): 78 self.path = path 79 self.reports = [] 80 self.diagnostics = [] 81 self.opts = opts 82 83 def getSourceName(self, path): 84 if path.startswith(self.opts.root): 85 return path[len(self.opts.root):] 86 return path 87 88 def loadResults(path, opts): 89 run = AnalysisRun(path, opts) 90 91 for f in os.listdir(path): 92 if (not f.startswith('report') or 93 not f.endswith('plist')): 94 continue 95 96 p = os.path.join(path, f) 97 data = plistlib.readPlist(p) 98 99 # Ignore empty reports. 100 if not data['files']: 101 continue 102 103 # Extract the HTML reports, if they exists. 104 if 'HTMLDiagnostics_files' in data['diagnostics'][0]: 105 htmlFiles = [] 106 for d in data['diagnostics']: 107 # FIXME: Why is this named files, when does it have multiple 108 # files? 109 assert len(d['HTMLDiagnostics_files']) == 1 110 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0]) 111 else: 112 htmlFiles = [None] * len(data['diagnostics']) 113 114 report = AnalysisReport(run, data.pop('files')) 115 diagnostics = [AnalysisDiagnostic(d, report, h) 116 for d,h in zip(data.pop('diagnostics'), 117 htmlFiles)] 118 119 assert not data 120 121 run.reports.append(report) 122 run.diagnostics.extend(diagnostics) 123 124 return run 125 126 def compareResults(A, B): 127 """ 128 compareResults - Generate a relation from diagnostics in run A to 129 diagnostics in run B. 130 131 The result is the relation as a list of triples (a, b, confidence) where 132 each element {a,b} is None or an element from the respective run, and 133 confidence is a measure of the match quality (where 0 indicates equality, 134 and None is used if either element is None). 135 """ 136 137 res = [] 138 139 # Quickly eliminate equal elements. 140 neqA = [] 141 neqB = [] 142 eltsA = list(A.diagnostics) 143 eltsB = list(B.diagnostics) 144 eltsA.sort(key = lambda d: d.data) 145 eltsB.sort(key = lambda d: d.data) 146 while eltsA and eltsB: 147 a = eltsA.pop() 148 b = eltsB.pop() 149 if a.data == b.data: 150 res.append((a, b, 0)) 151 elif a.data > b.data: 152 neqA.append(a) 153 eltsB.append(b) 154 else: 155 neqB.append(b) 156 eltsA.append(a) 157 neqA.extend(eltsA) 158 neqB.extend(eltsB) 159 160 # FIXME: Add fuzzy matching. One simple and possible effective idea would be 161 # to bin the diagnostics, print them in a normalized form (based solely on 162 # the structure of the diagnostic), compute the diff, then use that as the 163 # basis for matching. This has the nice property that we don't depend in any 164 # way on the diagnostic format. 165 166 for a in neqA: 167 res.append((a, None, None)) 168 for b in neqB: 169 res.append((None, b, None)) 170 171 return res 172 173 def main(): 174 from optparse import OptionParser 175 parser = OptionParser("usage: %prog [options] [dir A] [dir B]") 176 parser.add_option("", "--root", dest="root", 177 help="Prefix to ignore on source files", 178 action="store", type=str, default="") 179 parser.add_option("", "--verbose-log", dest="verboseLog", 180 help="Write additional information to LOG [default=None]", 181 action="store", type=str, default=None, 182 metavar="LOG") 183 (opts, args) = parser.parse_args() 184 185 if len(args) != 2: 186 parser.error("invalid number of arguments") 187 188 dirA,dirB = args 189 190 # Load the run results. 191 resultsA = loadResults(dirA, opts) 192 resultsB = loadResults(dirB, opts) 193 194 # Open the verbose log, if given. 195 if opts.verboseLog: 196 auxLog = open(opts.verboseLog, "wb") 197 else: 198 auxLog = None 199 200 diff = compareResults(resultsA, resultsB) 201 for res in diff: 202 a,b,confidence = res 203 if a is None: 204 print "ADDED: %r" % b.getReadableName() 205 if auxLog: 206 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(), 207 b.getReportData())) 208 elif b is None: 209 print "REMOVED: %r" % a.getReadableName() 210 if auxLog: 211 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(), 212 a.getReportData())) 213 elif confidence: 214 print "CHANGED: %r to %r" % (a.getReadableName(), 215 b.getReadableName()) 216 if auxLog: 217 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 218 % (a.getReadableName(), 219 b.getReadableName(), 220 a.getReportData(), 221 b.getReportData())) 222 else: 223 pass 224 225 print "TOTAL REPORTS: %r" % len(resultsB.diagnostics) 226 if auxLog: 227 print >>auxLog, "('TOTAL', %r)" % len(resultsB.diagnostics) 228 229 if __name__ == '__main__': 230 main() 231