Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 Static Analyzer qualification infrastructure.
      5 
      6 The goal is to test the analyzer against different projects, check for failures,
      7 compare results, and measure performance.
      8 
      9 Repository Directory will contain sources of the projects as well as the 
     10 information on how to build them and the expected output. 
     11 Repository Directory structure:
     12    - ProjectMap file
     13    - Historical Performance Data
     14    - Project Dir1
     15      - ReferenceOutput
     16    - Project Dir2
     17      - ReferenceOutput
     18    ..
     19 
     20 To test the build of the analyzer one would:
     21    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 
     22      the build directory does not pollute the repository to min network traffic).
     23    - Build all projects, until error. Produce logs to report errors.
     24    - Compare results.  
     25 
     26 The files which should be kept around for failure investigations: 
     27    RepositoryCopy/Project DirI/ScanBuildResults
     28    RepositoryCopy/Project DirI/run_static_analyzer.log      
     29    
     30 Assumptions (TODO: shouldn't need to assume these.):   
     31    The script is being run from the Repository Directory.
     32    The compiler for scan-build and scan-build are in the PATH.
     33    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
     34 
     35 For more logging, set the  env variables:
     36    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
     37    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
     38 """
     39 import CmpRuns
     40 
     41 import os
     42 import csv
     43 import sys
     44 import glob
     45 import math
     46 import shutil
     47 import time
     48 import plistlib
     49 from subprocess import check_call, CalledProcessError
     50 
     51 #------------------------------------------------------------------------------
     52 # Helper functions.
     53 #------------------------------------------------------------------------------
     54 
     55 def detectCPUs():
     56     """
     57     Detects the number of CPUs on a system. Cribbed from pp.
     58     """
     59     # Linux, Unix and MacOS:
     60     if hasattr(os, "sysconf"):
     61         if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
     62             # Linux & Unix:
     63             ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
     64             if isinstance(ncpus, int) and ncpus > 0:
     65                 return ncpus
     66         else: # OSX:
     67             return int(capture(['sysctl', '-n', 'hw.ncpu']))
     68     # Windows:
     69     if os.environ.has_key("NUMBER_OF_PROCESSORS"):
     70         ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
     71         if ncpus > 0:
     72             return ncpus
     73     return 1 # Default
     74 
     75 def which(command, paths = None):
     76    """which(command, [paths]) - Look up the given command in the paths string
     77    (or the PATH environment variable, if unspecified)."""
     78 
     79    if paths is None:
     80        paths = os.environ.get('PATH','')
     81 
     82    # Check for absolute match first.
     83    if os.path.exists(command):
     84        return command
     85 
     86    # Would be nice if Python had a lib function for this.
     87    if not paths:
     88        paths = os.defpath
     89 
     90    # Get suffixes to search.
     91    # On Cygwin, 'PATHEXT' may exist but it should not be used.
     92    if os.pathsep == ';':
     93        pathext = os.environ.get('PATHEXT', '').split(';')
     94    else:
     95        pathext = ['']
     96 
     97    # Search the paths...
     98    for path in paths.split(os.pathsep):
     99        for ext in pathext:
    100            p = os.path.join(path, command + ext)
    101            if os.path.exists(p):
    102                return p
    103 
    104    return None
    105 
    106 # Make sure we flush the output after every print statement.
    107 class flushfile(object):
    108     def __init__(self, f):
    109         self.f = f
    110     def write(self, x):
    111         self.f.write(x)
    112         self.f.flush()
    113 
    114 sys.stdout = flushfile(sys.stdout)
    115 
    116 def getProjectMapPath():
    117     ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 
    118                                   ProjectMapFile)
    119     if not os.path.exists(ProjectMapPath):
    120         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
    121                 "\nRunning script for the wrong directory?"
    122         sys.exit(-1)  
    123     return ProjectMapPath         
    124 
    125 def getProjectDir(ID):
    126     return os.path.join(os.path.abspath(os.curdir), ID)        
    127 
    128 def getSBOutputDirName(IsReferenceBuild) :
    129     if IsReferenceBuild == True :
    130         return SBOutputDirReferencePrefix + SBOutputDirName
    131     else :
    132         return SBOutputDirName
    133 
    134 #------------------------------------------------------------------------------
    135 # Configuration setup.
    136 #------------------------------------------------------------------------------
    137 
    138 # Find Clang for static analysis.
    139 Clang = which("clang", os.environ['PATH'])
    140 if not Clang:
    141     print "Error: cannot find 'clang' in PATH"
    142     sys.exit(-1)
    143 
    144 # Number of jobs.
    145 Jobs = int(math.ceil(detectCPUs() * 0.75))
    146 
    147 # Project map stores info about all the "registered" projects.
    148 ProjectMapFile = "projectMap.csv"
    149 
    150 # Names of the project specific scripts.
    151 # The script that needs to be executed before the build can start.
    152 CleanupScript = "cleanup_run_static_analyzer.sh"
    153 # This is a file containing commands for scan-build.  
    154 BuildScript = "run_static_analyzer.cmd"
    155 
    156 # The log file name.
    157 LogFolderName = "Logs"
    158 BuildLogName = "run_static_analyzer.log"
    159 # Summary file - contains the summary of the failures. Ex: This info can be be  
    160 # displayed when buildbot detects a build failure.
    161 NumOfFailuresInSummary = 10
    162 FailuresSummaryFileName = "failures.txt"
    163 # Summary of the result diffs.
    164 DiffsSummaryFileName = "diffs.txt"
    165 
    166 # The scan-build result directory.
    167 SBOutputDirName = "ScanBuildResults"
    168 SBOutputDirReferencePrefix = "Ref"
    169 
    170 # The list of checkers used during analyzes.
    171 # Currently, consists of all the non experimental checkers, plus a few alpha
    172 # checkers we don't want to regress on.
    173 Checkers="alpha.unix.SimpleStream,alpha.security.taint,alpha.cplusplus.NewDeleteLeaks,core,cplusplus,deadcode,security,unix,osx"
    174 
    175 Verbose = 1
    176 
    177 #------------------------------------------------------------------------------
    178 # Test harness logic.
    179 #------------------------------------------------------------------------------
    180 
    181 # Run pre-processing script if any.
    182 def runCleanupScript(Dir, PBuildLogFile):
    183     ScriptPath = os.path.join(Dir, CleanupScript)
    184     if os.path.exists(ScriptPath):
    185         try:
    186             if Verbose == 1:        
    187                 print "  Executing: %s" % (ScriptPath,)
    188             check_call("chmod +x %s" % ScriptPath, cwd = Dir, 
    189                                               stderr=PBuildLogFile,
    190                                               stdout=PBuildLogFile, 
    191                                               shell=True)    
    192             check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
    193                                               stdout=PBuildLogFile, 
    194                                               shell=True)
    195         except:
    196             print "Error: The pre-processing step failed. See ", \
    197                   PBuildLogFile.name, " for details."
    198             sys.exit(-1)
    199 
    200 # Build the project with scan-build by reading in the commands and 
    201 # prefixing them with the scan-build options.
    202 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
    203     BuildScriptPath = os.path.join(Dir, BuildScript)
    204     if not os.path.exists(BuildScriptPath):
    205         print "Error: build script is not defined: %s" % BuildScriptPath
    206         sys.exit(-1)
    207     SBOptions = "--use-analyzer " + Clang + " "
    208     SBOptions += "-plist-html -o " + SBOutputDir + " "
    209     SBOptions += "-enable-checker " + Checkers + " "  
    210     SBOptions += "--keep-empty "
    211     # Always use ccc-analyze to ensure that we can locate the failures 
    212     # directory.
    213     SBOptions += "--override-compiler "
    214     try:
    215         SBCommandFile = open(BuildScriptPath, "r")
    216         SBPrefix = "scan-build " + SBOptions + " "
    217         for Command in SBCommandFile:
    218             # If using 'make', auto imply a -jX argument
    219             # to speed up analysis.  xcodebuild will
    220             # automatically use the maximum number of cores.
    221             if (Command.startswith("make ") or Command == "make") and \
    222                 "-j" not in Command:
    223                 Command += " -j%d" % Jobs
    224             SBCommand = SBPrefix + Command
    225             if Verbose == 1:        
    226                 print "  Executing: %s" % (SBCommand,)
    227             check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
    228                                              stdout=PBuildLogFile, 
    229                                              shell=True)
    230     except:
    231         print "Error: scan-build failed. See ",PBuildLogFile.name,\
    232               " for details."
    233         raise
    234 
    235 def hasNoExtension(FileName):
    236     (Root, Ext) = os.path.splitext(FileName)
    237     if ((Ext == "")) :
    238         return True
    239     return False
    240 
    241 def isValidSingleInputFile(FileName):
    242     (Root, Ext) = os.path.splitext(FileName)
    243     if ((Ext == ".i") | (Ext == ".ii") | 
    244         (Ext == ".c") | (Ext == ".cpp") | 
    245         (Ext == ".m") | (Ext == "")) :
    246         return True
    247     return False
    248    
    249 # Run analysis on a set of preprocessed files.
    250 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
    251     if os.path.exists(os.path.join(Dir, BuildScript)):
    252         print "Error: The preprocessed files project should not contain %s" % \
    253                BuildScript
    254         raise Exception()       
    255 
    256     CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w "
    257     CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "   
    258     
    259     if (Mode == 2) :
    260         CmdPrefix += "-std=c++11 " 
    261     
    262     PlistPath = os.path.join(Dir, SBOutputDir, "date")
    263     FailPath = os.path.join(PlistPath, "failures");
    264     os.makedirs(FailPath);
    265  
    266     for FullFileName in glob.glob(Dir + "/*"):
    267         FileName = os.path.basename(FullFileName)
    268         Failed = False
    269         
    270         # Only run the analyzes on supported files.
    271         if (hasNoExtension(FileName)):
    272             continue
    273         if (isValidSingleInputFile(FileName) == False):
    274             print "Error: Invalid single input file %s." % (FullFileName,)
    275             raise Exception()
    276         
    277         # Build and call the analyzer command.
    278         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
    279         Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
    280         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
    281         try:
    282             if Verbose == 1:        
    283                 print "  Executing: %s" % (Command,)
    284             check_call(Command, cwd = Dir, stderr=LogFile,
    285                                            stdout=LogFile, 
    286                                            shell=True)
    287         except CalledProcessError, e:
    288             print "Error: Analyzes of %s failed. See %s for details." \
    289                   "Error code %d." % \
    290                    (FullFileName, LogFile.name, e.returncode)
    291             Failed = True       
    292         finally:
    293             LogFile.close()            
    294         
    295         # If command did not fail, erase the log file.
    296         if Failed == False:
    297             os.remove(LogFile.name);
    298 
    299 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
    300     TBegin = time.time() 
    301 
    302     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
    303     print "Log file: %s" % (BuildLogPath,) 
    304     print "Output directory: %s" %(SBOutputDir, )
    305     
    306     # Clean up the log file.
    307     if (os.path.exists(BuildLogPath)) :
    308         RmCommand = "rm " + BuildLogPath
    309         if Verbose == 1:
    310             print "  Executing: %s" % (RmCommand,)
    311         check_call(RmCommand, shell=True)
    312     
    313     # Clean up scan build results.
    314     if (os.path.exists(SBOutputDir)) :
    315         RmCommand = "rm -r " + SBOutputDir
    316         if Verbose == 1: 
    317             print "  Executing: %s" % (RmCommand,)
    318             check_call(RmCommand, shell=True)
    319     assert(not os.path.exists(SBOutputDir))
    320     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
    321         
    322     # Open the log file.
    323     PBuildLogFile = open(BuildLogPath, "wb+")
    324     
    325     # Build and analyze the project.
    326     try:
    327         runCleanupScript(Dir, PBuildLogFile)
    328         
    329         if (ProjectBuildMode == 1):
    330             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
    331         else:
    332             runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode)
    333         
    334         if IsReferenceBuild :
    335             runCleanupScript(Dir, PBuildLogFile)
    336            
    337     finally:
    338         PBuildLogFile.close()
    339         
    340     print "Build complete (time: %.2f). See the log for more details: %s" % \
    341            ((time.time()-TBegin), BuildLogPath) 
    342        
    343 # A plist file is created for each call to the analyzer(each source file).
    344 # We are only interested on the once that have bug reports, so delete the rest.        
    345 def CleanUpEmptyPlists(SBOutputDir):
    346     for F in glob.glob(SBOutputDir + "/*/*.plist"):
    347         P = os.path.join(SBOutputDir, F)
    348         
    349         Data = plistlib.readPlist(P)
    350         # Delete empty reports.
    351         if not Data['files']:
    352             os.remove(P)
    353             continue
    354 
    355 # Given the scan-build output directory, checks if the build failed 
    356 # (by searching for the failures directories). If there are failures, it 
    357 # creates a summary file in the output directory.         
    358 def checkBuild(SBOutputDir):
    359     # Check if there are failures.
    360     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
    361     TotalFailed = len(Failures);
    362     if TotalFailed == 0:
    363         CleanUpEmptyPlists(SBOutputDir)
    364         Plists = glob.glob(SBOutputDir + "/*/*.plist")
    365         print "Number of bug reports (non empty plist files) produced: %d" %\
    366            len(Plists)
    367         return;
    368     
    369     # Create summary file to display when the build fails.
    370     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
    371     if (Verbose > 0):
    372         print "  Creating the failures summary file %s" % (SummaryPath,)
    373     
    374     SummaryLog = open(SummaryPath, "w+")
    375     try:
    376         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
    377         if TotalFailed > NumOfFailuresInSummary:
    378             SummaryLog.write("See the first %d below.\n" 
    379                                                    % (NumOfFailuresInSummary,))
    380         # TODO: Add a line "See the results folder for more."
    381     
    382         FailuresCopied = NumOfFailuresInSummary
    383         Idx = 0
    384         for FailLogPathI in Failures:
    385             if Idx >= NumOfFailuresInSummary:
    386                 break;
    387             Idx += 1 
    388             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
    389             FailLogI = open(FailLogPathI, "r");
    390             try: 
    391                 shutil.copyfileobj(FailLogI, SummaryLog);
    392             finally:
    393                 FailLogI.close()
    394     finally:
    395         SummaryLog.close()
    396     
    397     print "Error: analysis failed. See ", SummaryPath
    398     sys.exit(-1)       
    399 
    400 # Auxiliary object to discard stdout.
    401 class Discarder(object):
    402     def write(self, text):
    403         pass # do nothing
    404 
    405 # Compare the warnings produced by scan-build.
    406 def runCmpResults(Dir):   
    407     TBegin = time.time() 
    408 
    409     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
    410     NewDir = os.path.join(Dir, SBOutputDirName)
    411     
    412     # We have to go one level down the directory tree.
    413     RefList = glob.glob(RefDir + "/*") 
    414     NewList = glob.glob(NewDir + "/*")
    415     
    416     # Log folders are also located in the results dir, so ignore them.
    417     RefLogDir = os.path.join(RefDir, LogFolderName)
    418     if RefLogDir in RefList:
    419         RefList.remove(RefLogDir)
    420     NewList.remove(os.path.join(NewDir, LogFolderName))
    421     
    422     if len(RefList) == 0 or len(NewList) == 0:
    423         return False
    424     assert(len(RefList) == len(NewList))
    425 
    426     # There might be more then one folder underneath - one per each scan-build 
    427     # command (Ex: one for configure and one for make).
    428     if (len(RefList) > 1):
    429         # Assume that the corresponding folders have the same names.
    430         RefList.sort()
    431         NewList.sort()
    432     
    433     # Iterate and find the differences.
    434     NumDiffs = 0
    435     PairList = zip(RefList, NewList)    
    436     for P in PairList:    
    437         RefDir = P[0] 
    438         NewDir = P[1]
    439     
    440         assert(RefDir != NewDir) 
    441         if Verbose == 1:        
    442             print "  Comparing Results: %s %s" % (RefDir, NewDir)
    443     
    444         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
    445         Opts = CmpRuns.CmpOptions(DiffsPath)
    446         # Discard everything coming out of stdout (CmpRun produces a lot of them).
    447         OLD_STDOUT = sys.stdout
    448         sys.stdout = Discarder()
    449         # Scan the results, delete empty plist files.
    450         NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
    451         sys.stdout = OLD_STDOUT
    452         if (NumDiffs > 0) :
    453             print "Warning: %r differences in diagnostics. See %s" % \
    454                   (NumDiffs, DiffsPath,)
    455                     
    456     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
    457     return (NumDiffs > 0)
    458     
    459 def updateSVN(Mode, ProjectsMap):
    460     try:
    461         ProjectsMap.seek(0)    
    462         for I in csv.reader(ProjectsMap):
    463             ProjName = I[0] 
    464             Path = os.path.join(ProjName, getSBOutputDirName(True))
    465     
    466             if Mode == "delete":
    467                 Command = "svn delete %s" % (Path,)
    468             else:
    469                 Command = "svn add %s" % (Path,)
    470 
    471             if Verbose == 1:        
    472                 print "  Executing: %s" % (Command,)
    473             check_call(Command, shell=True)    
    474     
    475         if Mode == "delete":
    476             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
    477                             "reference results.\""     
    478         else:
    479             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
    480                             "reference results.\""
    481         if Verbose == 1:        
    482             print "  Executing: %s" % (CommitCommand,)
    483         check_call(CommitCommand, shell=True)    
    484     except:
    485         print "Error: SVN update failed."
    486         sys.exit(-1)
    487         
    488 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
    489     print " \n\n--- Building project %s" % (ID,)
    490 
    491     TBegin = time.time() 
    492 
    493     if Dir is None :
    494         Dir = getProjectDir(ID)        
    495     if Verbose == 1:        
    496         print "  Build directory: %s." % (Dir,)
    497     
    498     # Set the build results directory.
    499     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
    500     SBOutputDir = os.path.join(Dir, RelOutputDir)
    501                 
    502     buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild)
    503 
    504     checkBuild(SBOutputDir)
    505     
    506     if IsReferenceBuild == False:
    507         runCmpResults(Dir)
    508         
    509     print "Completed tests for project %s (time: %.2f)." % \
    510           (ID, (time.time()-TBegin))
    511     
    512 def testAll(IsReferenceBuild = False, UpdateSVN = False):
    513     PMapFile = open(getProjectMapPath(), "rb")
    514     try:        
    515         # Validate the input.
    516         for I in csv.reader(PMapFile):
    517             if (len(I) != 2) :
    518                 print "Error: Rows in the ProjectMapFile should have 3 entries."
    519                 raise Exception()
    520             if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
    521                 print "Error: Second entry in the ProjectMapFile should be 0" \
    522                       " (single file), 1 (project), or 2(single file c++11)."
    523                 raise Exception()              
    524 
    525         # When we are regenerating the reference results, we might need to 
    526         # update svn. Remove reference results from SVN.
    527         if UpdateSVN == True:
    528             assert(IsReferenceBuild == True);
    529             updateSVN("delete",  PMapFile);
    530             
    531         # Test the projects.
    532         PMapFile.seek(0)    
    533         for I in csv.reader(PMapFile):
    534             testProject(I[0], int(I[1]), IsReferenceBuild)
    535 
    536         # Add reference results to SVN.
    537         if UpdateSVN == True:
    538             updateSVN("add",  PMapFile);
    539 
    540     except:
    541         print "Error occurred. Premature termination."
    542         raise                            
    543     finally:
    544         PMapFile.close()    
    545             
    546 if __name__ == '__main__':
    547     IsReference = False
    548     UpdateSVN = False
    549     if len(sys.argv) >= 2:
    550         if sys.argv[1] == "-r":
    551             IsReference = True
    552         elif sys.argv[1] == "-rs":
    553             IsReference = True
    554             UpdateSVN = True
    555         else:     
    556           print >> sys.stderr, 'Usage: ', sys.argv[0],\
    557                              '[-r|-rs]' \
    558                              'Use -r to regenerate reference output' \
    559                              'Use -rs to regenerate reference output and update svn'
    560 
    561     testAll(IsReference, UpdateSVN)
    562