Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
      5 which reports have been added, removed, or changed.
      6 
      7 This is designed to support automated testing using the static analyzer, from
      8 two perspectives: 
      9   1. To monitor changes in the static analyzer's reports on real code bases, for
     10      regression testing.
     11 
     12   2. For use by end users who want to integrate regular static analyzer testing
     13      into a buildbot like environment.
     14 """
     15 
     16 import os
     17 import plistlib
     18 
     19 #
     20 
     21 class multidict:
     22     def __init__(self, elts=()):
     23         self.data = {}
     24         for key,value in elts:
     25             self[key] = value
     26     
     27     def __getitem__(self, item):
     28         return self.data[item]
     29     def __setitem__(self, key, value):
     30         if key in self.data:
     31             self.data[key].append(value)
     32         else:
     33             self.data[key] = [value]
     34     def items(self):
     35         return self.data.items()
     36     def values(self):
     37         return self.data.values()
     38     def keys(self):
     39         return self.data.keys()
     40     def __len__(self):
     41         return len(self.data)
     42     def get(self, key, default=None):
     43         return self.data.get(key, default)
     44     
     45 #
     46 
     47 class CmpOptions:
     48     def __init__(self, verboseLog=None, root=""):
     49         self.root = root
     50         self.verboseLog = verboseLog
     51 
     52 class AnalysisReport:
     53     def __init__(self, run, files):
     54         self.run = run
     55         self.files = files
     56 
     57 class AnalysisDiagnostic:
     58     def __init__(self, data, report, htmlReport):
     59         self.data = data
     60         self.report = report
     61         self.htmlReport = htmlReport
     62 
     63     def getReadableName(self):
     64         loc = self.data['location']
     65         filename = self.report.run.getSourceName(self.report.files[loc['file']])
     66         line = loc['line']
     67         column = loc['col']
     68         category = self.data['category']
     69         description = self.data['description']
     70 
     71         # FIXME: Get a report number based on this key, to 'distinguish'
     72         # reports, or something.
     73         
     74         return '%s:%d:%d, %s: %s' % (filename, line, column, category, 
     75                                    description)
     76 
     77     def getReportData(self):
     78         if self.htmlReport is None:
     79             return " "
     80         return os.path.join(self.report.run.path, self.htmlReport)
     81         # We could also dump the report with:
     82         # return open(os.path.join(self.report.run.path,
     83         #                         self.htmlReport), "rb").read() 
     84 
     85 class AnalysisRun:
     86     def __init__(self, path, opts):
     87         self.path = path
     88         self.reports = []
     89         self.diagnostics = []
     90         self.opts = opts
     91 
     92     def getSourceName(self, path):
     93         if path.startswith(self.opts.root):
     94             return path[len(self.opts.root):]
     95         return path
     96 
     97 def loadResults(path, opts, deleteEmpty=True):
     98     run = AnalysisRun(path, opts)
     99 
    100     for f in os.listdir(path):
    101         if (not f.startswith('report') or
    102             not f.endswith('plist')):
    103             continue
    104 
    105         p = os.path.join(path, f)
    106         data = plistlib.readPlist(p)
    107 
    108         # Ignore/delete empty reports.
    109         if not data['files']:
    110             if deleteEmpty == True:
    111                 os.remove(p)
    112             continue
    113 
    114         # Extract the HTML reports, if they exists.
    115         if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
    116             htmlFiles = []
    117             for d in data['diagnostics']:
    118                 # FIXME: Why is this named files, when does it have multiple
    119                 # files?
    120                 assert len(d['HTMLDiagnostics_files']) == 1
    121                 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
    122         else:
    123             htmlFiles = [None] * len(data['diagnostics'])
    124             
    125         report = AnalysisReport(run, data.pop('files'))
    126         diagnostics = [AnalysisDiagnostic(d, report, h) 
    127                        for d,h in zip(data.pop('diagnostics'),
    128                                       htmlFiles)]
    129 
    130         assert not data
    131 
    132         run.reports.append(report)
    133         run.diagnostics.extend(diagnostics)
    134 
    135     return run
    136 
    137 def compareResults(A, B):
    138     """
    139     compareResults - Generate a relation from diagnostics in run A to
    140     diagnostics in run B.
    141 
    142     The result is the relation as a list of triples (a, b, confidence) where
    143     each element {a,b} is None or an element from the respective run, and
    144     confidence is a measure of the match quality (where 0 indicates equality,
    145     and None is used if either element is None).
    146     """
    147 
    148     res = []
    149 
    150     # Quickly eliminate equal elements.
    151     neqA = []
    152     neqB = []
    153     eltsA = list(A.diagnostics)
    154     eltsB = list(B.diagnostics)
    155     eltsA.sort(key = lambda d: d.data)
    156     eltsB.sort(key = lambda d: d.data)
    157     while eltsA and eltsB:
    158         a = eltsA.pop()
    159         b = eltsB.pop()
    160         if a.data['location'] == b.data['location']:
    161             res.append((a, b, 0))
    162         elif a.data > b.data:
    163             neqA.append(a)
    164             eltsB.append(b)
    165         else:
    166             neqB.append(b)
    167             eltsA.append(a)
    168     neqA.extend(eltsA)
    169     neqB.extend(eltsB)
    170 
    171     # FIXME: Add fuzzy matching. One simple and possible effective idea would be
    172     # to bin the diagnostics, print them in a normalized form (based solely on
    173     # the structure of the diagnostic), compute the diff, then use that as the
    174     # basis for matching. This has the nice property that we don't depend in any
    175     # way on the diagnostic format.
    176 
    177     for a in neqA:
    178         res.append((a, None, None))
    179     for b in neqB:
    180         res.append((None, b, None))
    181 
    182     return res
    183 
    184 def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
    185     # Load the run results.
    186     resultsA = loadResults(dirA, opts, deleteEmpty)
    187     resultsB = loadResults(dirB, opts, deleteEmpty)
    188     
    189     # Open the verbose log, if given.
    190     if opts.verboseLog:
    191         auxLog = open(opts.verboseLog, "wb")
    192     else:
    193         auxLog = None
    194 
    195     diff = compareResults(resultsA, resultsB)
    196     foundDiffs = 0
    197     for res in diff:
    198         a,b,confidence = res
    199         if a is None:
    200             print "ADDED: %r" % b.getReadableName()
    201             foundDiffs += 1
    202             if auxLog:
    203                 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
    204                                                         b.getReportData()))
    205         elif b is None:
    206             print "REMOVED: %r" % a.getReadableName()
    207             foundDiffs += 1
    208             if auxLog:
    209                 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
    210                                                           a.getReportData()))
    211         elif confidence:
    212             print "CHANGED: %r to %r" % (a.getReadableName(),
    213                                          b.getReadableName())
    214             foundDiffs += 1
    215             if auxLog:
    216                 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 
    217                                  % (a.getReadableName(),
    218                                     b.getReadableName(),
    219                                     a.getReportData(),
    220                                     b.getReportData()))
    221         else:
    222             pass
    223 
    224     TotalReports = len(resultsB.diagnostics)
    225     print "TOTAL REPORTS: %r" % TotalReports
    226     print "TOTAL DIFFERENCES: %r" % foundDiffs
    227     if auxLog:
    228         print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
    229         print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
    230         
    231     return foundDiffs    
    232 
    233 def main():
    234     from optparse import OptionParser
    235     parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
    236     parser.add_option("", "--root", dest="root",
    237                       help="Prefix to ignore on source files",
    238                       action="store", type=str, default="")
    239     parser.add_option("", "--verbose-log", dest="verboseLog",
    240                       help="Write additional information to LOG [default=None]",
    241                       action="store", type=str, default=None,
    242                       metavar="LOG")
    243     (opts, args) = parser.parse_args()
    244 
    245     if len(args) != 2:
    246         parser.error("invalid number of arguments")
    247 
    248     dirA,dirB = args
    249 
    250     cmpScanBuildResults(dirA, dirB, opts)    
    251 
    252 if __name__ == '__main__':
    253     main()
    254