Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 Static Analyzer qualification infrastructure.
      5 
      6 The goal is to test the analyzer against different projects, check for failures,
      7 compare results, and measure performance.
      8 
      9 Repository Directory will contain sources of the projects as well as the 
     10 information on how to build them and the expected output. 
     11 Repository Directory structure:
     12    - ProjectMap file
     13    - Historical Performance Data
     14    - Project Dir1
     15      - ReferenceOutput
     16    - Project Dir2
     17      - ReferenceOutput
     18    ..
     19 
     20 To test the build of the analyzer one would:
     21    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 
     22      the build directory does not pollute the repository to min network traffic).
     23    - Build all projects, until error. Produce logs to report errors.
     24    - Compare results.  
     25 
     26 The files which should be kept around for failure investigations: 
     27    RepositoryCopy/Project DirI/ScanBuildResults
     28    RepositoryCopy/Project DirI/run_static_analyzer.log      
     29    
     30 Assumptions (TODO: shouldn't need to assume these.):   
     31    The script is being run from the Repository Directory.
     32    The compiler for scan-build and scan-build are in the PATH.
     33    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
     34 
     35 For more logging, set the  env variables:
     36    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
     37    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
     38 """
     39 import CmpRuns
     40 
     41 import os
     42 import csv
     43 import sys
     44 import glob
     45 import math
     46 import shutil
     47 import time
     48 import plistlib
     49 from subprocess import check_call, CalledProcessError
     50 
     51 #------------------------------------------------------------------------------
     52 # Helper functions.
     53 #------------------------------------------------------------------------------
     54 
     55 def detectCPUs():
     56     """
     57     Detects the number of CPUs on a system. Cribbed from pp.
     58     """
     59     # Linux, Unix and MacOS:
     60     if hasattr(os, "sysconf"):
     61         if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
     62             # Linux & Unix:
     63             ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
     64             if isinstance(ncpus, int) and ncpus > 0:
     65                 return ncpus
     66         else: # OSX:
     67             return int(capture(['sysctl', '-n', 'hw.ncpu']))
     68     # Windows:
     69     if os.environ.has_key("NUMBER_OF_PROCESSORS"):
     70         ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
     71         if ncpus > 0:
     72             return ncpus
     73     return 1 # Default
     74 
     75 def which(command, paths = None):
     76    """which(command, [paths]) - Look up the given command in the paths string
     77    (or the PATH environment variable, if unspecified)."""
     78 
     79    if paths is None:
     80        paths = os.environ.get('PATH','')
     81 
     82    # Check for absolute match first.
     83    if os.path.exists(command):
     84        return command
     85 
     86    # Would be nice if Python had a lib function for this.
     87    if not paths:
     88        paths = os.defpath
     89 
     90    # Get suffixes to search.
     91    # On Cygwin, 'PATHEXT' may exist but it should not be used.
     92    if os.pathsep == ';':
     93        pathext = os.environ.get('PATHEXT', '').split(';')
     94    else:
     95        pathext = ['']
     96 
     97    # Search the paths...
     98    for path in paths.split(os.pathsep):
     99        for ext in pathext:
    100            p = os.path.join(path, command + ext)
    101            if os.path.exists(p):
    102                return p
    103 
    104    return None
    105 
    106 # Make sure we flush the output after every print statement.
    107 class flushfile(object):
    108     def __init__(self, f):
    109         self.f = f
    110     def write(self, x):
    111         self.f.write(x)
    112         self.f.flush()
    113 
    114 sys.stdout = flushfile(sys.stdout)
    115 
    116 def getProjectMapPath():
    117     ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 
    118                                   ProjectMapFile)
    119     if not os.path.exists(ProjectMapPath):
    120         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
    121                 "\nRunning script for the wrong directory?"
    122         sys.exit(-1)  
    123     return ProjectMapPath         
    124 
    125 def getProjectDir(ID):
    126     return os.path.join(os.path.abspath(os.curdir), ID)        
    127 
    128 def getSBOutputDirName(IsReferenceBuild) :
    129     if IsReferenceBuild == True :
    130         return SBOutputDirReferencePrefix + SBOutputDirName
    131     else :
    132         return SBOutputDirName
    133 
    134 #------------------------------------------------------------------------------
    135 # Configuration setup.
    136 #------------------------------------------------------------------------------
    137 
    138 # Find Clang for static analysis.
    139 Clang = which("clang", os.environ['PATH'])
    140 if not Clang:
    141     print "Error: cannot find 'clang' in PATH"
    142     sys.exit(-1)
    143 
    144 # Number of jobs.
    145 Jobs = int(math.ceil(detectCPUs() * 0.75))
    146 
    147 # Project map stores info about all the "registered" projects.
    148 ProjectMapFile = "projectMap.csv"
    149 
    150 # Names of the project specific scripts.
    151 # The script that needs to be executed before the build can start.
    152 CleanupScript = "cleanup_run_static_analyzer.sh"
    153 # This is a file containing commands for scan-build.  
    154 BuildScript = "run_static_analyzer.cmd"
    155 
    156 # The log file name.
    157 LogFolderName = "Logs"
    158 BuildLogName = "run_static_analyzer.log"
    159 # Summary file - contains the summary of the failures. Ex: This info can be be  
    160 # displayed when buildbot detects a build failure.
    161 NumOfFailuresInSummary = 10
    162 FailuresSummaryFileName = "failures.txt"
    163 # Summary of the result diffs.
    164 DiffsSummaryFileName = "diffs.txt"
    165 
    166 # The scan-build result directory.
    167 SBOutputDirName = "ScanBuildResults"
    168 SBOutputDirReferencePrefix = "Ref"
    169 
    170 # The list of checkers used during analyzes.
    171 # Currently, consists of all the non experimental checkers.
    172 Checkers="alpha.unix.SimpleStream,alpha.security.taint,core,deadcode,security,unix,osx"
    173 
    174 Verbose = 1
    175 
    176 #------------------------------------------------------------------------------
    177 # Test harness logic.
    178 #------------------------------------------------------------------------------
    179 
    180 # Run pre-processing script if any.
    181 def runCleanupScript(Dir, PBuildLogFile):
    182     ScriptPath = os.path.join(Dir, CleanupScript)
    183     if os.path.exists(ScriptPath):
    184         try:
    185             if Verbose == 1:        
    186                 print "  Executing: %s" % (ScriptPath,)
    187             check_call("chmod +x %s" % ScriptPath, cwd = Dir, 
    188                                               stderr=PBuildLogFile,
    189                                               stdout=PBuildLogFile, 
    190                                               shell=True)    
    191             check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
    192                                               stdout=PBuildLogFile, 
    193                                               shell=True)
    194         except:
    195             print "Error: The pre-processing step failed. See ", \
    196                   PBuildLogFile.name, " for details."
    197             sys.exit(-1)
    198 
    199 # Build the project with scan-build by reading in the commands and 
    200 # prefixing them with the scan-build options.
    201 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
    202     BuildScriptPath = os.path.join(Dir, BuildScript)
    203     if not os.path.exists(BuildScriptPath):
    204         print "Error: build script is not defined: %s" % BuildScriptPath
    205         sys.exit(-1)
    206     SBOptions = "--use-analyzer " + Clang + " "
    207     SBOptions += "-plist-html -o " + SBOutputDir + " "
    208     SBOptions += "-enable-checker " + Checkers + " "  
    209     SBOptions += "--keep-empty "
    210     try:
    211         SBCommandFile = open(BuildScriptPath, "r")
    212         SBPrefix = "scan-build " + SBOptions + " "
    213         for Command in SBCommandFile:
    214             # If using 'make', auto imply a -jX argument
    215             # to speed up analysis.  xcodebuild will
    216             # automatically use the maximum number of cores.
    217             if (Command.startswith("make ") or Command == "make") and \
    218                 "-j" not in Command:
    219                 Command += " -j%d" % Jobs
    220             SBCommand = SBPrefix + Command
    221             if Verbose == 1:        
    222                 print "  Executing: %s" % (SBCommand,)
    223             check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
    224                                              stdout=PBuildLogFile, 
    225                                              shell=True)
    226     except:
    227         print "Error: scan-build failed. See ",PBuildLogFile.name,\
    228               " for details."
    229         raise
    230 
    231 def hasNoExtension(FileName):
    232     (Root, Ext) = os.path.splitext(FileName)
    233     if ((Ext == "")) :
    234         return True
    235     return False
    236 
    237 def isValidSingleInputFile(FileName):
    238     (Root, Ext) = os.path.splitext(FileName)
    239     if ((Ext == ".i") | (Ext == ".ii") | 
    240         (Ext == ".c") | (Ext == ".cpp") | 
    241         (Ext == ".m") | (Ext == "")) :
    242         return True
    243     return False
    244    
    245 # Run analysis on a set of preprocessed files.
    246 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
    247     if os.path.exists(os.path.join(Dir, BuildScript)):
    248         print "Error: The preprocessed files project should not contain %s" % \
    249                BuildScript
    250         raise Exception()       
    251 
    252     CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w "
    253     CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "   
    254     
    255     if (Mode == 2) :
    256         CmdPrefix += "-std=c++11 " 
    257     
    258     PlistPath = os.path.join(Dir, SBOutputDir, "date")
    259     FailPath = os.path.join(PlistPath, "failures");
    260     os.makedirs(FailPath);
    261  
    262     for FullFileName in glob.glob(Dir + "/*"):
    263         FileName = os.path.basename(FullFileName)
    264         Failed = False
    265         
    266         # Only run the analyzes on supported files.
    267         if (hasNoExtension(FileName)):
    268             continue
    269         if (isValidSingleInputFile(FileName) == False):
    270             print "Error: Invalid single input file %s." % (FullFileName,)
    271             raise Exception()
    272         
    273         # Build and call the analyzer command.
    274         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
    275         Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
    276         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
    277         try:
    278             if Verbose == 1:        
    279                 print "  Executing: %s" % (Command,)
    280             check_call(Command, cwd = Dir, stderr=LogFile,
    281                                            stdout=LogFile, 
    282                                            shell=True)
    283         except CalledProcessError, e:
    284             print "Error: Analyzes of %s failed. See %s for details." \
    285                   "Error code %d." % \
    286                    (FullFileName, LogFile.name, e.returncode)
    287             Failed = True       
    288         finally:
    289             LogFile.close()            
    290         
    291         # If command did not fail, erase the log file.
    292         if Failed == False:
    293             os.remove(LogFile.name);
    294 
    295 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
    296     TBegin = time.time() 
    297 
    298     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
    299     print "Log file: %s" % (BuildLogPath,) 
    300     print "Output directory: %s" %(SBOutputDir, )
    301     
    302     # Clean up the log file.
    303     if (os.path.exists(BuildLogPath)) :
    304         RmCommand = "rm " + BuildLogPath
    305         if Verbose == 1:
    306             print "  Executing: %s" % (RmCommand,)
    307         check_call(RmCommand, shell=True)
    308     
    309     # Clean up scan build results.
    310     if (os.path.exists(SBOutputDir)) :
    311         RmCommand = "rm -r " + SBOutputDir
    312         if Verbose == 1: 
    313             print "  Executing: %s" % (RmCommand,)
    314             check_call(RmCommand, shell=True)
    315     assert(not os.path.exists(SBOutputDir))
    316     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
    317         
    318     # Open the log file.
    319     PBuildLogFile = open(BuildLogPath, "wb+")
    320     
    321     # Build and analyze the project.
    322     try:
    323         runCleanupScript(Dir, PBuildLogFile)
    324         
    325         if (ProjectBuildMode == 1):
    326             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
    327         else:
    328             runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode)
    329         
    330         if IsReferenceBuild :
    331             runCleanupScript(Dir, PBuildLogFile)
    332            
    333     finally:
    334         PBuildLogFile.close()
    335         
    336     print "Build complete (time: %.2f). See the log for more details: %s" % \
    337            ((time.time()-TBegin), BuildLogPath) 
    338        
    339 # A plist file is created for each call to the analyzer(each source file).
    340 # We are only interested on the once that have bug reports, so delete the rest.        
    341 def CleanUpEmptyPlists(SBOutputDir):
    342     for F in glob.glob(SBOutputDir + "/*/*.plist"):
    343         P = os.path.join(SBOutputDir, F)
    344         
    345         Data = plistlib.readPlist(P)
    346         # Delete empty reports.
    347         if not Data['files']:
    348             os.remove(P)
    349             continue
    350 
    351 # Given the scan-build output directory, checks if the build failed 
    352 # (by searching for the failures directories). If there are failures, it 
    353 # creates a summary file in the output directory.         
    354 def checkBuild(SBOutputDir):
    355     # Check if there are failures.
    356     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
    357     TotalFailed = len(Failures);
    358     if TotalFailed == 0:
    359         CleanUpEmptyPlists(SBOutputDir)
    360         Plists = glob.glob(SBOutputDir + "/*/*.plist")
    361         print "Number of bug reports (non empty plist files) produced: %d" %\
    362            len(Plists)
    363         return;
    364     
    365     # Create summary file to display when the build fails.
    366     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
    367     if (Verbose > 0):
    368         print "  Creating the failures summary file %s" % (SummaryPath,)
    369     
    370     SummaryLog = open(SummaryPath, "w+")
    371     try:
    372         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
    373         if TotalFailed > NumOfFailuresInSummary:
    374             SummaryLog.write("See the first %d below.\n" 
    375                                                    % (NumOfFailuresInSummary,))
    376         # TODO: Add a line "See the results folder for more."
    377     
    378         FailuresCopied = NumOfFailuresInSummary
    379         Idx = 0
    380         for FailLogPathI in Failures:
    381             if Idx >= NumOfFailuresInSummary:
    382                 break;
    383             Idx += 1 
    384             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
    385             FailLogI = open(FailLogPathI, "r");
    386             try: 
    387                 shutil.copyfileobj(FailLogI, SummaryLog);
    388             finally:
    389                 FailLogI.close()
    390     finally:
    391         SummaryLog.close()
    392     
    393     print "Error: analysis failed. See ", SummaryPath
    394     sys.exit(-1)       
    395 
    396 # Auxiliary object to discard stdout.
    397 class Discarder(object):
    398     def write(self, text):
    399         pass # do nothing
    400 
    401 # Compare the warnings produced by scan-build.
    402 def runCmpResults(Dir):   
    403     TBegin = time.time() 
    404 
    405     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
    406     NewDir = os.path.join(Dir, SBOutputDirName)
    407     
    408     # We have to go one level down the directory tree.
    409     RefList = glob.glob(RefDir + "/*") 
    410     NewList = glob.glob(NewDir + "/*")
    411     
    412     # Log folders are also located in the results dir, so ignore them. 
    413     RefList.remove(os.path.join(RefDir, LogFolderName))
    414     NewList.remove(os.path.join(NewDir, LogFolderName))
    415     
    416     if len(RefList) == 0 or len(NewList) == 0:
    417         return False
    418     assert(len(RefList) == len(NewList))
    419 
    420     # There might be more then one folder underneath - one per each scan-build 
    421     # command (Ex: one for configure and one for make).
    422     if (len(RefList) > 1):
    423         # Assume that the corresponding folders have the same names.
    424         RefList.sort()
    425         NewList.sort()
    426     
    427     # Iterate and find the differences.
    428     NumDiffs = 0
    429     PairList = zip(RefList, NewList)    
    430     for P in PairList:    
    431         RefDir = P[0] 
    432         NewDir = P[1]
    433     
    434         assert(RefDir != NewDir) 
    435         if Verbose == 1:        
    436             print "  Comparing Results: %s %s" % (RefDir, NewDir)
    437     
    438         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
    439         Opts = CmpRuns.CmpOptions(DiffsPath)
    440         # Discard everything coming out of stdout (CmpRun produces a lot of them).
    441         OLD_STDOUT = sys.stdout
    442         sys.stdout = Discarder()
    443         # Scan the results, delete empty plist files.
    444         NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
    445         sys.stdout = OLD_STDOUT
    446         if (NumDiffs > 0) :
    447             print "Warning: %r differences in diagnostics. See %s" % \
    448                   (NumDiffs, DiffsPath,)
    449                     
    450     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
    451     return (NumDiffs > 0)
    452     
    453 def updateSVN(Mode, ProjectsMap):
    454     try:
    455         ProjectsMap.seek(0)    
    456         for I in csv.reader(ProjectsMap):
    457             ProjName = I[0] 
    458             Path = os.path.join(ProjName, getSBOutputDirName(True))
    459     
    460             if Mode == "delete":
    461                 Command = "svn delete %s" % (Path,)
    462             else:
    463                 Command = "svn add %s" % (Path,)
    464 
    465             if Verbose == 1:        
    466                 print "  Executing: %s" % (Command,)
    467             check_call(Command, shell=True)    
    468     
    469         if Mode == "delete":
    470             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
    471                             "reference results.\""     
    472         else:
    473             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
    474                             "reference results.\""
    475         if Verbose == 1:        
    476             print "  Executing: %s" % (CommitCommand,)
    477         check_call(CommitCommand, shell=True)    
    478     except:
    479         print "Error: SVN update failed."
    480         sys.exit(-1)
    481         
    482 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
    483     print " \n\n--- Building project %s" % (ID,)
    484 
    485     TBegin = time.time() 
    486 
    487     if Dir is None :
    488         Dir = getProjectDir(ID)        
    489     if Verbose == 1:        
    490         print "  Build directory: %s." % (Dir,)
    491     
    492     # Set the build results directory.
    493     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
    494     SBOutputDir = os.path.join(Dir, RelOutputDir)
    495                 
    496     buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild)
    497 
    498     checkBuild(SBOutputDir)
    499     
    500     if IsReferenceBuild == False:
    501         runCmpResults(Dir)
    502         
    503     print "Completed tests for project %s (time: %.2f)." % \
    504           (ID, (time.time()-TBegin))
    505     
    506 def testAll(IsReferenceBuild = False, UpdateSVN = False):
    507     PMapFile = open(getProjectMapPath(), "rb")
    508     try:        
    509         # Validate the input.
    510         for I in csv.reader(PMapFile):
    511             if (len(I) != 2) :
    512                 print "Error: Rows in the ProjectMapFile should have 3 entries."
    513                 raise Exception()
    514             if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
    515                 print "Error: Second entry in the ProjectMapFile should be 0" \
    516                       " (single file), 1 (project), or 2(single file c++11)."
    517                 raise Exception()              
    518 
    519         # When we are regenerating the reference results, we might need to 
    520         # update svn. Remove reference results from SVN.
    521         if UpdateSVN == True:
    522             assert(IsReferenceBuild == True);
    523             updateSVN("delete",  PMapFile);
    524             
    525         # Test the projects.
    526         PMapFile.seek(0)    
    527         for I in csv.reader(PMapFile):
    528             testProject(I[0], int(I[1]), IsReferenceBuild)
    529 
    530         # Add reference results to SVN.
    531         if UpdateSVN == True:
    532             updateSVN("add",  PMapFile);
    533 
    534     except:
    535         print "Error occurred. Premature termination."
    536         raise                            
    537     finally:
    538         PMapFile.close()    
    539             
    540 if __name__ == '__main__':
    541     IsReference = False
    542     UpdateSVN = False
    543     if len(sys.argv) >= 2:
    544         if sys.argv[1] == "-r":
    545             IsReference = True
    546         elif sys.argv[1] == "-rs":
    547             IsReference = True
    548             UpdateSVN = True
    549         else:     
    550           print >> sys.stderr, 'Usage: ', sys.argv[0],\
    551                              '[-r|-rs]' \
    552                              'Use -r to regenerate reference output' \
    553                              'Use -rs to regenerate reference output and update svn'
    554 
    555     testAll(IsReference, UpdateSVN)
    556