Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 Static Analyzer qualification infrastructure.
      5 
      6 The goal is to test the analyzer against different projects, check for failures,
      7 compare results, and measure performance.
      8 
      9 Repository Directory will contain sources of the projects as well as the 
     10 information on how to build them and the expected output. 
     11 Repository Directory structure:
     12    - ProjectMap file
     13    - Historical Performance Data
     14    - Project Dir1
     15      - ReferenceOutput
     16    - Project Dir2
     17      - ReferenceOutput
     18    ..
     19 
     20 To test the build of the analyzer one would:
     21    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 
     22      the build directory does not pollute the repository to min network traffic).
     23    - Build all projects, until error. Produce logs to report errors.
     24    - Compare results.  
     25 
     26 The files which should be kept around for failure investigations: 
     27    RepositoryCopy/Project DirI/ScanBuildResults
     28    RepositoryCopy/Project DirI/run_static_analyzer.log      
     29    
     30 Assumptions (TODO: shouldn't need to assume these.):   
     31    The script is being run from the Repository Directory.
     32    The compiler for scan-build and scan-build are in the PATH.
     33    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
     34 
     35 For more logging, set the  env variables:
     36    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
     37    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
     38 """
     39 import CmpRuns
     40 
     41 import os
     42 import csv
     43 import sys
     44 import glob
     45 import shutil
     46 import time
     47 import plistlib
     48 from subprocess import check_call, CalledProcessError
     49 
     50 # Project map stores info about all the "registered" projects.
     51 ProjectMapFile = "projectMap.csv"
     52 
     53 # Names of the project specific scripts.
     54 # The script that needs to be executed before the build can start.
     55 CleanupScript = "cleanup_run_static_analyzer.sh"
     56 # This is a file containing commands for scan-build.  
     57 BuildScript = "run_static_analyzer.cmd"
     58 
     59 # The log file name.
     60 LogFolderName = "Logs"
     61 BuildLogName = "run_static_analyzer.log"
     62 # Summary file - contains the summary of the failures. Ex: This info can be be  
     63 # displayed when buildbot detects a build failure.
     64 NumOfFailuresInSummary = 10
     65 FailuresSummaryFileName = "failures.txt"
     66 # Summary of the result diffs.
     67 DiffsSummaryFileName = "diffs.txt"
     68 
     69 # The scan-build result directory.
     70 SBOutputDirName = "ScanBuildResults"
     71 SBOutputDirReferencePrefix = "Ref"
     72 
     73 # The list of checkers used during analyzes.
     74 # Currently, consists of all the non experimental checkers.
     75 Checkers="experimental.security.taint,core,deadcode,cplusplus,security,unix,osx,cocoa"
     76 
     77 Verbose = 1
     78 
     79 IsReferenceBuild = False
     80 
     81 # Make sure we flush the output after every print statement.
     82 class flushfile(object):
     83     def __init__(self, f):
     84         self.f = f
     85     def write(self, x):
     86         self.f.write(x)
     87         self.f.flush()
     88 
     89 sys.stdout = flushfile(sys.stdout)
     90 
     91 def getProjectMapPath():
     92     ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 
     93                                   ProjectMapFile)
     94     if not os.path.exists(ProjectMapPath):
     95         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
     96                 "\nRunning script for the wrong directory?"
     97         sys.exit(-1)  
     98     return ProjectMapPath         
     99 
    100 def getProjectDir(ID):
    101     return os.path.join(os.path.abspath(os.curdir), ID)        
    102 
    103 def getSBOutputDirName() :
    104     if IsReferenceBuild == True :
    105         return SBOutputDirReferencePrefix + SBOutputDirName
    106     else :
    107         return SBOutputDirName
    108 
    109 # Run pre-processing script if any.
    110 def runCleanupScript(Dir, PBuildLogFile):
    111     ScriptPath = os.path.join(Dir, CleanupScript)
    112     if os.path.exists(ScriptPath):
    113         try:
    114             if Verbose == 1:        
    115                 print "  Executing: %s" % (ScriptPath,)
    116             check_call("chmod +x %s" % ScriptPath, cwd = Dir, 
    117                                               stderr=PBuildLogFile,
    118                                               stdout=PBuildLogFile, 
    119                                               shell=True)    
    120             check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
    121                                               stdout=PBuildLogFile, 
    122                                               shell=True)
    123         except:
    124             print "Error: The pre-processing step failed. See ", \
    125                   PBuildLogFile.name, " for details."
    126             sys.exit(-1)
    127 
    128 # Build the project with scan-build by reading in the commands and 
    129 # prefixing them with the scan-build options.
    130 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
    131     BuildScriptPath = os.path.join(Dir, BuildScript)
    132     if not os.path.exists(BuildScriptPath):
    133         print "Error: build script is not defined: %s" % BuildScriptPath
    134         sys.exit(-1)       
    135     SBOptions = "-plist-html -o " + SBOutputDir + " "
    136     SBOptions += "-enable-checker " + Checkers + " "  
    137     try:
    138         SBCommandFile = open(BuildScriptPath, "r")
    139         SBPrefix = "scan-build " + SBOptions + " "
    140         for Command in SBCommandFile:
    141             SBCommand = SBPrefix + Command
    142             if Verbose == 1:        
    143                 print "  Executing: %s" % (SBCommand,)
    144             check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
    145                                              stdout=PBuildLogFile, 
    146                                              shell=True)
    147     except:
    148         print "Error: scan-build failed. See ",PBuildLogFile.name,\
    149               " for details."
    150         raise
    151 
    152 def hasNoExtension(FileName):
    153     (Root, Ext) = os.path.splitext(FileName)
    154     if ((Ext == "")) :
    155         return True
    156     return False
    157 
    158 def isValidSingleInputFile(FileName):
    159     (Root, Ext) = os.path.splitext(FileName)
    160     if ((Ext == ".i") | (Ext == ".ii") | 
    161         (Ext == ".c") | (Ext == ".cpp") | 
    162         (Ext == ".m") | (Ext == "")) :
    163         return True
    164     return False
    165 
    166 # Run analysis on a set of preprocessed files.
    167 def runAnalyzePreprocessed(Dir, SBOutputDir):
    168     if os.path.exists(os.path.join(Dir, BuildScript)):
    169         print "Error: The preprocessed files project should not contain %s" % \
    170                BuildScript
    171         raise Exception()       
    172 
    173     CmdPrefix = "clang -cc1 -analyze -analyzer-output=plist -w "
    174     CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "   
    175     
    176     PlistPath = os.path.join(Dir, SBOutputDir, "date")
    177     FailPath = os.path.join(PlistPath, "failures");
    178     os.makedirs(FailPath);
    179  
    180     for FullFileName in glob.glob(Dir + "/*"):
    181         FileName = os.path.basename(FullFileName)
    182         Failed = False
    183         
    184         # Only run the analyzes on supported files.
    185         if (hasNoExtension(FileName)):
    186             continue
    187         if (isValidSingleInputFile(FileName) == False):
    188             print "Error: Invalid single input file %s." % (FullFileName,)
    189             raise Exception()
    190         
    191         # Build and call the analyzer command.
    192         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
    193         Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
    194         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
    195         try:
    196             if Verbose == 1:        
    197                 print "  Executing: %s" % (Command,)
    198             check_call(Command, cwd = Dir, stderr=LogFile,
    199                                            stdout=LogFile, 
    200                                            shell=True)
    201         except CalledProcessError, e:
    202             print "Error: Analyzes of %s failed. See %s for details." \
    203                   "Error code %d." % \
    204                    (FullFileName, LogFile.name, e.returncode)
    205             Failed = True       
    206         finally:
    207             LogFile.close()            
    208         
    209         # If command did not fail, erase the log file.
    210         if Failed == False:
    211             os.remove(LogFile.name);
    212 
    213 def buildProject(Dir, SBOutputDir, IsScanBuild):
    214     TBegin = time.time() 
    215 
    216     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
    217     print "Log file: %s" % (BuildLogPath,) 
    218     print "Output directory: %s" %(SBOutputDir, )
    219     
    220     # Clean up the log file.
    221     if (os.path.exists(BuildLogPath)) :
    222         RmCommand = "rm " + BuildLogPath
    223         if Verbose == 1:
    224             print "  Executing: %s" % (RmCommand,)
    225         check_call(RmCommand, shell=True)
    226     
    227     # Clean up scan build results.
    228     if (os.path.exists(SBOutputDir)) :
    229         RmCommand = "rm -r " + SBOutputDir
    230         if Verbose == 1: 
    231             print "  Executing: %s" % (RmCommand,)
    232             check_call(RmCommand, shell=True)
    233     assert(not os.path.exists(SBOutputDir))
    234     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
    235         
    236     # Open the log file.
    237     PBuildLogFile = open(BuildLogPath, "wb+")
    238     
    239     # Build and analyze the project.
    240     try:
    241         runCleanupScript(Dir, PBuildLogFile)
    242         
    243         if IsScanBuild:
    244             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
    245         else:
    246             runAnalyzePreprocessed(Dir, SBOutputDir)
    247         
    248         if IsReferenceBuild :
    249             runCleanupScript(Dir, PBuildLogFile)
    250            
    251     finally:
    252         PBuildLogFile.close()
    253         
    254     print "Build complete (time: %.2f). See the log for more details: %s" % \
    255            ((time.time()-TBegin), BuildLogPath) 
    256        
    257 # A plist file is created for each call to the analyzer(each source file).
    258 # We are only interested on the once that have bug reports, so delete the rest.        
    259 def CleanUpEmptyPlists(SBOutputDir):
    260     for F in glob.glob(SBOutputDir + "/*/*.plist"):
    261         P = os.path.join(SBOutputDir, F)
    262         
    263         Data = plistlib.readPlist(P)
    264         # Delete empty reports.
    265         if not Data['files']:
    266             os.remove(P)
    267             continue
    268 
    269 # Given the scan-build output directory, checks if the build failed 
    270 # (by searching for the failures directories). If there are failures, it 
    271 # creates a summary file in the output directory.         
    272 def checkBuild(SBOutputDir):
    273     # Check if there are failures.
    274     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
    275     TotalFailed = len(Failures);
    276     if TotalFailed == 0:
    277         CleanUpEmptyPlists(SBOutputDir)
    278         Plists = glob.glob(SBOutputDir + "/*/*.plist")
    279         print "Number of bug reports (non empty plist files) produced: %d" %\
    280            len(Plists)
    281         return;
    282     
    283     # Create summary file to display when the build fails.
    284     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
    285     if (Verbose > 0):
    286         print "  Creating the failures summary file %s" % (SummaryPath,)
    287     
    288     SummaryLog = open(SummaryPath, "w+")
    289     try:
    290         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
    291         if TotalFailed > NumOfFailuresInSummary:
    292             SummaryLog.write("See the first %d below.\n" 
    293                                                    % (NumOfFailuresInSummary,))
    294         # TODO: Add a line "See the results folder for more."
    295     
    296         FailuresCopied = NumOfFailuresInSummary
    297         Idx = 0
    298         for FailLogPathI in glob.glob(SBOutputDir + "/*/failures/*.stderr.txt"):
    299             if Idx >= NumOfFailuresInSummary:
    300                 break;
    301             Idx += 1 
    302             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
    303             FailLogI = open(FailLogPathI, "r");
    304             try: 
    305                 shutil.copyfileobj(FailLogI, SummaryLog);
    306             finally:
    307                 FailLogI.close()
    308     finally:
    309         SummaryLog.close()
    310     
    311     print "Error: analysis failed. See ", SummaryPath
    312     sys.exit(-1)       
    313 
    314 # Auxiliary object to discard stdout.
    315 class Discarder(object):
    316     def write(self, text):
    317         pass # do nothing
    318 
    319 # Compare the warnings produced by scan-build.
    320 def runCmpResults(Dir):   
    321     TBegin = time.time() 
    322 
    323     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
    324     NewDir = os.path.join(Dir, SBOutputDirName)
    325     
    326     # We have to go one level down the directory tree.
    327     RefList = glob.glob(RefDir + "/*") 
    328     NewList = glob.glob(NewDir + "/*")
    329     
    330     # Log folders are also located in the results dir, so ignore them. 
    331     RefList.remove(os.path.join(RefDir, LogFolderName))
    332     NewList.remove(os.path.join(NewDir, LogFolderName))
    333     
    334     if len(RefList) == 0 or len(NewList) == 0:
    335         return False
    336     assert(len(RefList) == len(NewList))
    337 
    338     # There might be more then one folder underneath - one per each scan-build 
    339     # command (Ex: one for configure and one for make).
    340     if (len(RefList) > 1):
    341         # Assume that the corresponding folders have the same names.
    342         RefList.sort()
    343         NewList.sort()
    344     
    345     # Iterate and find the differences.
    346     NumDiffs = 0
    347     PairList = zip(RefList, NewList)    
    348     for P in PairList:    
    349         RefDir = P[0] 
    350         NewDir = P[1]
    351     
    352         assert(RefDir != NewDir) 
    353         if Verbose == 1:        
    354             print "  Comparing Results: %s %s" % (RefDir, NewDir)
    355     
    356         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
    357         Opts = CmpRuns.CmpOptions(DiffsPath)
    358         # Discard everything coming out of stdout (CmpRun produces a lot of them).
    359         OLD_STDOUT = sys.stdout
    360         sys.stdout = Discarder()
    361         # Scan the results, delete empty plist files.
    362         NumDiffs = CmpRuns.cmpScanBuildResults(RefDir, NewDir, Opts, False)
    363         sys.stdout = OLD_STDOUT
    364         if (NumDiffs > 0) :
    365             print "Warning: %r differences in diagnostics. See %s" % \
    366                   (NumDiffs, DiffsPath,)
    367                     
    368     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
    369     return (NumDiffs > 0)
    370     
    371 def updateSVN(Mode, ProjectsMap):
    372     try:
    373         ProjectsMap.seek(0)    
    374         for I in csv.reader(ProjectsMap):
    375             ProjName = I[0] 
    376             Path = os.path.join(ProjName, getSBOutputDirName())
    377     
    378             if Mode == "delete":
    379                 Command = "svn delete %s" % (Path,)
    380             else:
    381                 Command = "svn add %s" % (Path,)
    382 
    383             if Verbose == 1:        
    384                 print "  Executing: %s" % (Command,)
    385                 check_call(Command, shell=True)    
    386     
    387         if Mode == "delete":
    388             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
    389                             "reference results.\""     
    390         else:
    391             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
    392                             "reference results.\""
    393         if Verbose == 1:        
    394             print "  Executing: %s" % (CommitCommand,)
    395             check_call(CommitCommand, shell=True)    
    396     except:
    397         print "Error: SVN update failed."
    398         sys.exit(-1)
    399         
    400 def testProject(ID, IsScanBuild, Dir=None):
    401     print " \n\n--- Building project %s" % (ID,)
    402 
    403     TBegin = time.time() 
    404 
    405     if Dir is None :
    406         Dir = getProjectDir(ID)        
    407     if Verbose == 1:        
    408         print "  Build directory: %s." % (Dir,)
    409     
    410     # Set the build results directory.
    411     RelOutputDir = getSBOutputDirName()
    412     SBOutputDir = os.path.join(Dir, RelOutputDir)
    413                 
    414     buildProject(Dir, SBOutputDir, IsScanBuild)    
    415 
    416     checkBuild(SBOutputDir)
    417     
    418     if IsReferenceBuild == False:
    419         runCmpResults(Dir)
    420         
    421     print "Completed tests for project %s (time: %.2f)." % \
    422           (ID, (time.time()-TBegin))
    423     
    424 def testAll(InIsReferenceBuild = False, UpdateSVN = False):
    425     global IsReferenceBuild
    426     IsReferenceBuild = InIsReferenceBuild
    427 
    428     PMapFile = open(getProjectMapPath(), "rb")
    429     try:        
    430         # Validate the input.
    431         for I in csv.reader(PMapFile):
    432             if (len(I) != 2) :
    433                 print "Error: Rows in the ProjectMapFile should have 3 entries."
    434                 raise Exception()
    435             if (not ((I[1] == "1") | (I[1] == "0"))):
    436                 print "Error: Second entry in the ProjectMapFile should be 0 or 1."
    437                 raise Exception()              
    438 
    439         # When we are regenerating the reference results, we might need to 
    440         # update svn. Remove reference results from SVN.
    441         if UpdateSVN == True:
    442             assert(InIsReferenceBuild == True);
    443             updateSVN("delete",  PMapFile);
    444             
    445         # Test the projects.
    446         PMapFile.seek(0)    
    447         for I in csv.reader(PMapFile):
    448             testProject(I[0], int(I[1]))
    449 
    450         # Add reference results to SVN.
    451         if UpdateSVN == True:
    452             updateSVN("add",  PMapFile);
    453 
    454     except:
    455         print "Error occurred. Premature termination."
    456         raise                            
    457     finally:
    458         PMapFile.close()    
    459             
    460 if __name__ == '__main__':
    461     IsReference = False
    462     UpdateSVN = False
    463     if len(sys.argv) >= 2:
    464         if sys.argv[1] == "-r":
    465             IsReference = True
    466         elif sys.argv[1] == "-rs":
    467             IsReference = True
    468             UpdateSVN = True
    469         else:     
    470           print >> sys.stderr, 'Usage: ', sys.argv[0],\
    471                              '[-r|-rs]' \
    472                              'Use -r to regenerate reference output' \
    473                              'Use -rs to regenerate reference output and update svn'
    474 
    475     testAll(IsReference, UpdateSVN)
    476