Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 Static Analyzer qualification infrastructure.
      5 
      6 The goal is to test the analyzer against different projects, check for failures,
      7 compare results, and measure performance.
      8 
      9 Repository Directory will contain sources of the projects as well as the 
     10 information on how to build them and the expected output. 
     11 Repository Directory structure:
     12    - ProjectMap file
     13    - Historical Performance Data
     14    - Project Dir1
     15      - ReferenceOutput
     16    - Project Dir2
     17      - ReferenceOutput
     18    ..
     19 
     20 To test the build of the analyzer one would:
     21    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 
     22      the build directory does not pollute the repository to min network traffic).
     23    - Build all projects, until error. Produce logs to report errors.
     24    - Compare results.  
     25 
     26 The files which should be kept around for failure investigations: 
     27    RepositoryCopy/Project DirI/ScanBuildResults
     28    RepositoryCopy/Project DirI/run_static_analyzer.log      
     29    
     30 Assumptions (TODO: shouldn't need to assume these.):   
     31    The script is being run from the Repository Directory.
     32    The compiler for scan-build and scan-build are in the PATH.
     33    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
     34 
     35 For more logging, set the  env variables:
     36    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
     37    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
     38 """
     39 import CmpRuns
     40 
     41 import os
     42 import csv
     43 import sys
     44 import glob
     45 import math
     46 import shutil
     47 import time
     48 import plistlib
     49 from subprocess import check_call, CalledProcessError
     50 
     51 #------------------------------------------------------------------------------
     52 # Helper functions.
     53 #------------------------------------------------------------------------------
     54 
     55 def detectCPUs():
     56     """
     57     Detects the number of CPUs on a system. Cribbed from pp.
     58     """
     59     # Linux, Unix and MacOS:
     60     if hasattr(os, "sysconf"):
     61         if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
     62             # Linux & Unix:
     63             ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
     64             if isinstance(ncpus, int) and ncpus > 0:
     65                 return ncpus
     66         else: # OSX:
     67             return int(capture(['sysctl', '-n', 'hw.ncpu']))
     68     # Windows:
     69     if os.environ.has_key("NUMBER_OF_PROCESSORS"):
     70         ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
     71         if ncpus > 0:
     72             return ncpus
     73     return 1 # Default
     74 
     75 def which(command, paths = None):
     76    """which(command, [paths]) - Look up the given command in the paths string
     77    (or the PATH environment variable, if unspecified)."""
     78 
     79    if paths is None:
     80        paths = os.environ.get('PATH','')
     81 
     82    # Check for absolute match first.
     83    if os.path.exists(command):
     84        return command
     85 
     86    # Would be nice if Python had a lib function for this.
     87    if not paths:
     88        paths = os.defpath
     89 
     90    # Get suffixes to search.
     91    # On Cygwin, 'PATHEXT' may exist but it should not be used.
     92    if os.pathsep == ';':
     93        pathext = os.environ.get('PATHEXT', '').split(';')
     94    else:
     95        pathext = ['']
     96 
     97    # Search the paths...
     98    for path in paths.split(os.pathsep):
     99        for ext in pathext:
    100            p = os.path.join(path, command + ext)
    101            if os.path.exists(p):
    102                return p
    103 
    104    return None
    105 
    106 # Make sure we flush the output after every print statement.
    107 class flushfile(object):
    108     def __init__(self, f):
    109         self.f = f
    110     def write(self, x):
    111         self.f.write(x)
    112         self.f.flush()
    113 
    114 sys.stdout = flushfile(sys.stdout)
    115 
    116 def getProjectMapPath():
    117     ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 
    118                                   ProjectMapFile)
    119     if not os.path.exists(ProjectMapPath):
    120         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
    121                 "\nRunning script for the wrong directory?"
    122         sys.exit(-1)  
    123     return ProjectMapPath         
    124 
    125 def getProjectDir(ID):
    126     return os.path.join(os.path.abspath(os.curdir), ID)        
    127 
    128 def getSBOutputDirName(IsReferenceBuild) :
    129     if IsReferenceBuild == True :
    130         return SBOutputDirReferencePrefix + SBOutputDirName
    131     else :
    132         return SBOutputDirName
    133 
    134 #------------------------------------------------------------------------------
    135 # Configuration setup.
    136 #------------------------------------------------------------------------------
    137 
    138 # Find Clang for static analysis.
    139 Clang = which("clang", os.environ['PATH'])
    140 if not Clang:
    141     print "Error: cannot find 'clang' in PATH"
    142     sys.exit(-1)
    143 
    144 # Number of jobs.
    145 Jobs = int(math.ceil(detectCPUs() * 0.75))
    146 
    147 # Project map stores info about all the "registered" projects.
    148 ProjectMapFile = "projectMap.csv"
    149 
    150 # Names of the project specific scripts.
    151 # The script that needs to be executed before the build can start.
    152 CleanupScript = "cleanup_run_static_analyzer.sh"
    153 # This is a file containing commands for scan-build.  
    154 BuildScript = "run_static_analyzer.cmd"
    155 
    156 # The log file name.
    157 LogFolderName = "Logs"
    158 BuildLogName = "run_static_analyzer.log"
    159 # Summary file - contains the summary of the failures. Ex: This info can be be  
    160 # displayed when buildbot detects a build failure.
    161 NumOfFailuresInSummary = 10
    162 FailuresSummaryFileName = "failures.txt"
    163 # Summary of the result diffs.
    164 DiffsSummaryFileName = "diffs.txt"
    165 
    166 # The scan-build result directory.
    167 SBOutputDirName = "ScanBuildResults"
    168 SBOutputDirReferencePrefix = "Ref"
    169 
    170 # The list of checkers used during analyzes.
    171 # Currently, consists of all the non-experimental checkers, plus a few alpha
    172 # checkers we don't want to regress on.
    173 Checkers="alpha.unix.SimpleStream,alpha.security.taint,alpha.cplusplus.NewDeleteLeaks,core,cplusplus,deadcode,security,unix,osx"
    174 
    175 Verbose = 1
    176 
    177 #------------------------------------------------------------------------------
    178 # Test harness logic.
    179 #------------------------------------------------------------------------------
    180 
    181 # Run pre-processing script if any.
    182 def runCleanupScript(Dir, PBuildLogFile):
    183     ScriptPath = os.path.join(Dir, CleanupScript)
    184     if os.path.exists(ScriptPath):
    185         try:
    186             if Verbose == 1:        
    187                 print "  Executing: %s" % (ScriptPath,)
    188             check_call("chmod +x %s" % ScriptPath, cwd = Dir, 
    189                                               stderr=PBuildLogFile,
    190                                               stdout=PBuildLogFile, 
    191                                               shell=True)    
    192             check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
    193                                               stdout=PBuildLogFile, 
    194                                               shell=True)
    195         except:
    196             print "Error: The pre-processing step failed. See ", \
    197                   PBuildLogFile.name, " for details."
    198             sys.exit(-1)
    199 
    200 # Build the project with scan-build by reading in the commands and 
    201 # prefixing them with the scan-build options.
    202 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
    203     BuildScriptPath = os.path.join(Dir, BuildScript)
    204     if not os.path.exists(BuildScriptPath):
    205         print "Error: build script is not defined: %s" % BuildScriptPath
    206         sys.exit(-1)
    207     SBOptions = "--use-analyzer " + Clang + " "
    208     SBOptions += "-plist-html -o " + SBOutputDir + " "
    209     SBOptions += "-enable-checker " + Checkers + " "  
    210     SBOptions += "--keep-empty "
    211     # Always use ccc-analyze to ensure that we can locate the failures 
    212     # directory.
    213     SBOptions += "--override-compiler "
    214     try:
    215         SBCommandFile = open(BuildScriptPath, "r")
    216         SBPrefix = "scan-build " + SBOptions + " "
    217         for Command in SBCommandFile:
    218             Command = Command.strip()
    219             # If using 'make', auto imply a -jX argument
    220             # to speed up analysis.  xcodebuild will
    221             # automatically use the maximum number of cores.
    222             if (Command.startswith("make ") or Command == "make") and \
    223                 "-j" not in Command:
    224                 Command += " -j%d" % Jobs
    225             SBCommand = SBPrefix + Command
    226             if Verbose == 1:        
    227                 print "  Executing: %s" % (SBCommand,)
    228             check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
    229                                              stdout=PBuildLogFile, 
    230                                              shell=True)
    231     except:
    232         print "Error: scan-build failed. See ",PBuildLogFile.name,\
    233               " for details."
    234         raise
    235 
    236 def hasNoExtension(FileName):
    237     (Root, Ext) = os.path.splitext(FileName)
    238     if ((Ext == "")) :
    239         return True
    240     return False
    241 
    242 def isValidSingleInputFile(FileName):
    243     (Root, Ext) = os.path.splitext(FileName)
    244     if ((Ext == ".i") | (Ext == ".ii") | 
    245         (Ext == ".c") | (Ext == ".cpp") | 
    246         (Ext == ".m") | (Ext == "")) :
    247         return True
    248     return False
    249    
    250 # Run analysis on a set of preprocessed files.
    251 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
    252     if os.path.exists(os.path.join(Dir, BuildScript)):
    253         print "Error: The preprocessed files project should not contain %s" % \
    254                BuildScript
    255         raise Exception()       
    256 
    257     CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w "
    258     CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "   
    259     
    260     if (Mode == 2) :
    261         CmdPrefix += "-std=c++11 " 
    262     
    263     PlistPath = os.path.join(Dir, SBOutputDir, "date")
    264     FailPath = os.path.join(PlistPath, "failures");
    265     os.makedirs(FailPath);
    266  
    267     for FullFileName in glob.glob(Dir + "/*"):
    268         FileName = os.path.basename(FullFileName)
    269         Failed = False
    270         
    271         # Only run the analyzes on supported files.
    272         if (hasNoExtension(FileName)):
    273             continue
    274         if (isValidSingleInputFile(FileName) == False):
    275             print "Error: Invalid single input file %s." % (FullFileName,)
    276             raise Exception()
    277         
    278         # Build and call the analyzer command.
    279         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
    280         Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
    281         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
    282         try:
    283             if Verbose == 1:        
    284                 print "  Executing: %s" % (Command,)
    285             check_call(Command, cwd = Dir, stderr=LogFile,
    286                                            stdout=LogFile, 
    287                                            shell=True)
    288         except CalledProcessError, e:
    289             print "Error: Analyzes of %s failed. See %s for details." \
    290                   "Error code %d." % \
    291                    (FullFileName, LogFile.name, e.returncode)
    292             Failed = True       
    293         finally:
    294             LogFile.close()            
    295         
    296         # If command did not fail, erase the log file.
    297         if Failed == False:
    298             os.remove(LogFile.name);
    299 
    300 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
    301     TBegin = time.time() 
    302 
    303     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
    304     print "Log file: %s" % (BuildLogPath,) 
    305     print "Output directory: %s" %(SBOutputDir, )
    306     
    307     # Clean up the log file.
    308     if (os.path.exists(BuildLogPath)) :
    309         RmCommand = "rm " + BuildLogPath
    310         if Verbose == 1:
    311             print "  Executing: %s" % (RmCommand,)
    312         check_call(RmCommand, shell=True)
    313     
    314     # Clean up scan build results.
    315     if (os.path.exists(SBOutputDir)) :
    316         RmCommand = "rm -r " + SBOutputDir
    317         if Verbose == 1: 
    318             print "  Executing: %s" % (RmCommand,)
    319             check_call(RmCommand, shell=True)
    320     assert(not os.path.exists(SBOutputDir))
    321     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
    322         
    323     # Open the log file.
    324     PBuildLogFile = open(BuildLogPath, "wb+")
    325     
    326     # Build and analyze the project.
    327     try:
    328         runCleanupScript(Dir, PBuildLogFile)
    329         
    330         if (ProjectBuildMode == 1):
    331             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
    332         else:
    333             runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode)
    334         
    335         if IsReferenceBuild :
    336             runCleanupScript(Dir, PBuildLogFile)
    337            
    338     finally:
    339         PBuildLogFile.close()
    340         
    341     print "Build complete (time: %.2f). See the log for more details: %s" % \
    342            ((time.time()-TBegin), BuildLogPath) 
    343        
    344 # A plist file is created for each call to the analyzer(each source file).
    345 # We are only interested on the once that have bug reports, so delete the rest.        
    346 def CleanUpEmptyPlists(SBOutputDir):
    347     for F in glob.glob(SBOutputDir + "/*/*.plist"):
    348         P = os.path.join(SBOutputDir, F)
    349         
    350         Data = plistlib.readPlist(P)
    351         # Delete empty reports.
    352         if not Data['files']:
    353             os.remove(P)
    354             continue
    355 
    356 # Given the scan-build output directory, checks if the build failed 
    357 # (by searching for the failures directories). If there are failures, it 
    358 # creates a summary file in the output directory.         
    359 def checkBuild(SBOutputDir):
    360     # Check if there are failures.
    361     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
    362     TotalFailed = len(Failures);
    363     if TotalFailed == 0:
    364         CleanUpEmptyPlists(SBOutputDir)
    365         Plists = glob.glob(SBOutputDir + "/*/*.plist")
    366         print "Number of bug reports (non-empty plist files) produced: %d" %\
    367            len(Plists)
    368         return;
    369     
    370     # Create summary file to display when the build fails.
    371     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
    372     if (Verbose > 0):
    373         print "  Creating the failures summary file %s" % (SummaryPath,)
    374     
    375     SummaryLog = open(SummaryPath, "w+")
    376     try:
    377         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
    378         if TotalFailed > NumOfFailuresInSummary:
    379             SummaryLog.write("See the first %d below.\n" 
    380                                                    % (NumOfFailuresInSummary,))
    381         # TODO: Add a line "See the results folder for more."
    382     
    383         FailuresCopied = NumOfFailuresInSummary
    384         Idx = 0
    385         for FailLogPathI in Failures:
    386             if Idx >= NumOfFailuresInSummary:
    387                 break;
    388             Idx += 1 
    389             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
    390             FailLogI = open(FailLogPathI, "r");
    391             try: 
    392                 shutil.copyfileobj(FailLogI, SummaryLog);
    393             finally:
    394                 FailLogI.close()
    395     finally:
    396         SummaryLog.close()
    397     
    398     print "Error: analysis failed. See ", SummaryPath
    399     sys.exit(-1)       
    400 
    401 # Auxiliary object to discard stdout.
    402 class Discarder(object):
    403     def write(self, text):
    404         pass # do nothing
    405 
    406 # Compare the warnings produced by scan-build.
    407 def runCmpResults(Dir):   
    408     TBegin = time.time() 
    409 
    410     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
    411     NewDir = os.path.join(Dir, SBOutputDirName)
    412     
    413     # We have to go one level down the directory tree.
    414     RefList = glob.glob(RefDir + "/*") 
    415     NewList = glob.glob(NewDir + "/*")
    416     
    417     # Log folders are also located in the results dir, so ignore them.
    418     RefLogDir = os.path.join(RefDir, LogFolderName)
    419     if RefLogDir in RefList:
    420         RefList.remove(RefLogDir)
    421     NewList.remove(os.path.join(NewDir, LogFolderName))
    422     
    423     if len(RefList) == 0 or len(NewList) == 0:
    424         return False
    425     assert(len(RefList) == len(NewList))
    426 
    427     # There might be more then one folder underneath - one per each scan-build 
    428     # command (Ex: one for configure and one for make).
    429     if (len(RefList) > 1):
    430         # Assume that the corresponding folders have the same names.
    431         RefList.sort()
    432         NewList.sort()
    433     
    434     # Iterate and find the differences.
    435     NumDiffs = 0
    436     PairList = zip(RefList, NewList)    
    437     for P in PairList:    
    438         RefDir = P[0] 
    439         NewDir = P[1]
    440     
    441         assert(RefDir != NewDir) 
    442         if Verbose == 1:        
    443             print "  Comparing Results: %s %s" % (RefDir, NewDir)
    444     
    445         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
    446         Opts = CmpRuns.CmpOptions(DiffsPath)
    447         # Discard everything coming out of stdout (CmpRun produces a lot of them).
    448         OLD_STDOUT = sys.stdout
    449         sys.stdout = Discarder()
    450         # Scan the results, delete empty plist files.
    451         NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
    452         sys.stdout = OLD_STDOUT
    453         if (NumDiffs > 0) :
    454             print "Warning: %r differences in diagnostics. See %s" % \
    455                   (NumDiffs, DiffsPath,)
    456                     
    457     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
    458     return (NumDiffs > 0)
    459     
    460 def updateSVN(Mode, ProjectsMap):
    461     try:
    462         ProjectsMap.seek(0)    
    463         for I in csv.reader(ProjectsMap):
    464             ProjName = I[0] 
    465             Path = os.path.join(ProjName, getSBOutputDirName(True))
    466     
    467             if Mode == "delete":
    468                 Command = "svn delete %s" % (Path,)
    469             else:
    470                 Command = "svn add %s" % (Path,)
    471 
    472             if Verbose == 1:        
    473                 print "  Executing: %s" % (Command,)
    474             check_call(Command, shell=True)    
    475     
    476         if Mode == "delete":
    477             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
    478                             "reference results.\""     
    479         else:
    480             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
    481                             "reference results.\""
    482         if Verbose == 1:        
    483             print "  Executing: %s" % (CommitCommand,)
    484         check_call(CommitCommand, shell=True)    
    485     except:
    486         print "Error: SVN update failed."
    487         sys.exit(-1)
    488         
    489 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
    490     print " \n\n--- Building project %s" % (ID,)
    491 
    492     TBegin = time.time() 
    493 
    494     if Dir is None :
    495         Dir = getProjectDir(ID)        
    496     if Verbose == 1:        
    497         print "  Build directory: %s." % (Dir,)
    498     
    499     # Set the build results directory.
    500     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
    501     SBOutputDir = os.path.join(Dir, RelOutputDir)
    502                 
    503     buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild)
    504 
    505     checkBuild(SBOutputDir)
    506     
    507     if IsReferenceBuild == False:
    508         runCmpResults(Dir)
    509         
    510     print "Completed tests for project %s (time: %.2f)." % \
    511           (ID, (time.time()-TBegin))
    512     
    513 def testAll(IsReferenceBuild = False, UpdateSVN = False):
    514     PMapFile = open(getProjectMapPath(), "rb")
    515     try:        
    516         # Validate the input.
    517         for I in csv.reader(PMapFile):
    518             if (len(I) != 2) :
    519                 print "Error: Rows in the ProjectMapFile should have 3 entries."
    520                 raise Exception()
    521             if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
    522                 print "Error: Second entry in the ProjectMapFile should be 0" \
    523                       " (single file), 1 (project), or 2(single file c++11)."
    524                 raise Exception()              
    525 
    526         # When we are regenerating the reference results, we might need to 
    527         # update svn. Remove reference results from SVN.
    528         if UpdateSVN == True:
    529             assert(IsReferenceBuild == True);
    530             updateSVN("delete",  PMapFile);
    531             
    532         # Test the projects.
    533         PMapFile.seek(0)    
    534         for I in csv.reader(PMapFile):
    535             testProject(I[0], int(I[1]), IsReferenceBuild)
    536 
    537         # Add reference results to SVN.
    538         if UpdateSVN == True:
    539             updateSVN("add",  PMapFile);
    540 
    541     except:
    542         print "Error occurred. Premature termination."
    543         raise                            
    544     finally:
    545         PMapFile.close()    
    546             
    547 if __name__ == '__main__':
    548     IsReference = False
    549     UpdateSVN = False
    550     if len(sys.argv) >= 2:
    551         if sys.argv[1] == "-r":
    552             IsReference = True
    553         elif sys.argv[1] == "-rs":
    554             IsReference = True
    555             UpdateSVN = True
    556         else:     
    557           print >> sys.stderr, 'Usage: ', sys.argv[0],\
    558                              '[-r|-rs]' \
    559                              'Use -r to regenerate reference output' \
    560                              'Use -rs to regenerate reference output and update svn'
    561 
    562     testAll(IsReference, UpdateSVN)
    563