Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
      5 which reports have been added, removed, or changed.
      6 
      7 This is designed to support automated testing using the static analyzer, from
      8 two perspectives: 
      9   1. To monitor changes in the static analyzer's reports on real code bases, for
     10      regression testing.
     11 
     12   2. For use by end users who want to integrate regular static analyzer testing
     13      into a buildbot like environment.
     14 """
     15 
     16 import os
     17 import plistlib
     18 
     19 #
     20 
     21 class multidict:
     22     def __init__(self, elts=()):
     23         self.data = {}
     24         for key,value in elts:
     25             self[key] = value
     26     
     27     def __getitem__(self, item):
     28         return self.data[item]
     29     def __setitem__(self, key, value):
     30         if key in self.data:
     31             self.data[key].append(value)
     32         else:
     33             self.data[key] = [value]
     34     def items(self):
     35         return self.data.items()
     36     def values(self):
     37         return self.data.values()
     38     def keys(self):
     39         return self.data.keys()
     40     def __len__(self):
     41         return len(self.data)
     42     def get(self, key, default=None):
     43         return self.data.get(key, default)
     44     
     45 #
     46 
     47 class AnalysisReport:
     48     def __init__(self, run, files):
     49         self.run = run
     50         self.files = files
     51 
     52 class AnalysisDiagnostic:
     53     def __init__(self, data, report, htmlReport):
     54         self.data = data
     55         self.report = report
     56         self.htmlReport = htmlReport
     57 
     58     def getReadableName(self):
     59         loc = self.data['location']
     60         filename = self.report.run.getSourceName(self.report.files[loc['file']])
     61         line = loc['line']
     62         column = loc['col']
     63 
     64         # FIXME: Get a report number based on this key, to 'distinguish'
     65         # reports, or something.
     66         
     67         return '%s:%d:%d' % (filename, line, column)
     68 
     69     def getReportData(self):
     70         if self.htmlReport is None:
     71             return "This diagnostic does not have any report data."
     72 
     73         return open(os.path.join(self.report.run.path,
     74                                  self.htmlReport), "rb").read() 
     75 
     76 class AnalysisRun:
     77     def __init__(self, path, opts):
     78         self.path = path
     79         self.reports = []
     80         self.diagnostics = []
     81         self.opts = opts
     82 
     83     def getSourceName(self, path):
     84         if path.startswith(self.opts.root):
     85             return path[len(self.opts.root):]
     86         return path
     87 
     88 def loadResults(path, opts):
     89     run = AnalysisRun(path, opts)
     90 
     91     for f in os.listdir(path):
     92         if (not f.startswith('report') or
     93             not f.endswith('plist')):
     94             continue
     95 
     96         p = os.path.join(path, f)
     97         data = plistlib.readPlist(p)
     98 
     99         # Ignore empty reports.
    100         if not data['files']:
    101             continue
    102 
    103         # Extract the HTML reports, if they exists.
    104         if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
    105             htmlFiles = []
    106             for d in data['diagnostics']:
    107                 # FIXME: Why is this named files, when does it have multiple
    108                 # files?
    109                 assert len(d['HTMLDiagnostics_files']) == 1
    110                 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
    111         else:
    112             htmlFiles = [None] * len(data['diagnostics'])
    113             
    114         report = AnalysisReport(run, data.pop('files'))
    115         diagnostics = [AnalysisDiagnostic(d, report, h) 
    116                        for d,h in zip(data.pop('diagnostics'),
    117                                       htmlFiles)]
    118 
    119         assert not data
    120 
    121         run.reports.append(report)
    122         run.diagnostics.extend(diagnostics)
    123 
    124     return run
    125 
    126 def compareResults(A, B):
    127     """
    128     compareResults - Generate a relation from diagnostics in run A to
    129     diagnostics in run B.
    130 
    131     The result is the relation as a list of triples (a, b, confidence) where
    132     each element {a,b} is None or an element from the respective run, and
    133     confidence is a measure of the match quality (where 0 indicates equality,
    134     and None is used if either element is None).
    135     """
    136 
    137     res = []
    138 
    139     # Quickly eliminate equal elements.
    140     neqA = []
    141     neqB = []
    142     eltsA = list(A.diagnostics)
    143     eltsB = list(B.diagnostics)
    144     eltsA.sort(key = lambda d: d.data)
    145     eltsB.sort(key = lambda d: d.data)
    146     while eltsA and eltsB:
    147         a = eltsA.pop()
    148         b = eltsB.pop()
    149         if a.data == b.data:
    150             res.append((a, b, 0))
    151         elif a.data > b.data:
    152             neqA.append(a)
    153             eltsB.append(b)
    154         else:
    155             neqB.append(b)
    156             eltsA.append(a)
    157     neqA.extend(eltsA)
    158     neqB.extend(eltsB)
    159 
    160     # FIXME: Add fuzzy matching. One simple and possible effective idea would be
    161     # to bin the diagnostics, print them in a normalized form (based solely on
    162     # the structure of the diagnostic), compute the diff, then use that as the
    163     # basis for matching. This has the nice property that we don't depend in any
    164     # way on the diagnostic format.
    165 
    166     for a in neqA:
    167         res.append((a, None, None))
    168     for b in neqB:
    169         res.append((None, b, None))
    170 
    171     return res
    172 
    173 def main():
    174     from optparse import OptionParser
    175     parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
    176     parser.add_option("", "--root", dest="root",
    177                       help="Prefix to ignore on source files",
    178                       action="store", type=str, default="")
    179     parser.add_option("", "--verbose-log", dest="verboseLog",
    180                       help="Write additional information to LOG [default=None]",
    181                       action="store", type=str, default=None,
    182                       metavar="LOG")
    183     (opts, args) = parser.parse_args()
    184 
    185     if len(args) != 2:
    186         parser.error("invalid number of arguments")
    187 
    188     dirA,dirB = args
    189 
    190     # Load the run results.
    191     resultsA = loadResults(dirA, opts)
    192     resultsB = loadResults(dirB, opts)
    193     
    194     # Open the verbose log, if given.
    195     if opts.verboseLog:
    196         auxLog = open(opts.verboseLog, "wb")
    197     else:
    198         auxLog = None
    199 
    200     diff = compareResults(resultsA, resultsB)
    201     for res in diff:
    202         a,b,confidence = res
    203         if a is None:
    204             print "ADDED: %r" % b.getReadableName()
    205             if auxLog:
    206                 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
    207                                                         b.getReportData()))
    208         elif b is None:
    209             print "REMOVED: %r" % a.getReadableName()
    210             if auxLog:
    211                 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
    212                                                           a.getReportData()))
    213         elif confidence:
    214             print "CHANGED: %r to %r" % (a.getReadableName(),
    215                                          b.getReadableName())
    216             if auxLog:
    217                 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 
    218                                  % (a.getReadableName(),
    219                                     b.getReadableName(),
    220                                     a.getReportData(),
    221                                     b.getReportData()))
    222         else:
    223             pass
    224 
    225     print "TOTAL REPORTS: %r" % len(resultsB.diagnostics)
    226     if auxLog:
    227         print >>auxLog, "('TOTAL', %r)" % len(resultsB.diagnostics)
    228 
    229 if __name__ == '__main__':
    230     main()
    231