Home | History | Annotate | Download | only in analyzer
      1 #!/usr/bin/env python
      2 
      3 """
      4 Static Analyzer qualification infrastructure.
      5 
      6 The goal is to test the analyzer against different projects, check for failures,
      7 compare results, and measure performance.
      8 
      9 Repository Directory will contain sources of the projects as well as the 
     10 information on how to build them and the expected output. 
     11 Repository Directory structure:
     12    - ProjectMap file
     13    - Historical Performance Data
     14    - Project Dir1
     15      - ReferenceOutput
     16    - Project Dir2
     17      - ReferenceOutput
     18    ..
     19 
     20 To test the build of the analyzer one would:
     21    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 
     22      the build directory does not pollute the repository to min network traffic).
     23    - Build all projects, until error. Produce logs to report errors.
     24    - Compare results.  
     25 
     26 The files which should be kept around for failure investigations: 
     27    RepositoryCopy/Project DirI/ScanBuildResults
     28    RepositoryCopy/Project DirI/run_static_analyzer.log      
     29    
     30 Assumptions (TODO: shouldn't need to assume these.):   
     31    The script is being run from the Repository Directory.
     32    The compiler for scan-build and scan-build are in the PATH.
     33    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
     34 
     35 For more logging, set the  env variables:
     36    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
     37    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
     38 """
     39 import CmpRuns
     40 
     41 import os
     42 import csv
     43 import sys
     44 import glob
     45 import math
     46 import shutil
     47 import time
     48 import plistlib
     49 from subprocess import check_call, CalledProcessError
     50 
     51 #------------------------------------------------------------------------------
     52 # Helper functions.
     53 #------------------------------------------------------------------------------
     54 
     55 def detectCPUs():
     56     """
     57     Detects the number of CPUs on a system. Cribbed from pp.
     58     """
     59     # Linux, Unix and MacOS:
     60     if hasattr(os, "sysconf"):
     61         if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
     62             # Linux & Unix:
     63             ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
     64             if isinstance(ncpus, int) and ncpus > 0:
     65                 return ncpus
     66         else: # OSX:
     67             return int(capture(['sysctl', '-n', 'hw.ncpu']))
     68     # Windows:
     69     if os.environ.has_key("NUMBER_OF_PROCESSORS"):
     70         ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
     71         if ncpus > 0:
     72             return ncpus
     73     return 1 # Default
     74 
     75 def which(command, paths = None):
     76    """which(command, [paths]) - Look up the given command in the paths string
     77    (or the PATH environment variable, if unspecified)."""
     78 
     79    if paths is None:
     80        paths = os.environ.get('PATH','')
     81 
     82    # Check for absolute match first.
     83    if os.path.exists(command):
     84        return command
     85 
     86    # Would be nice if Python had a lib function for this.
     87    if not paths:
     88        paths = os.defpath
     89 
     90    # Get suffixes to search.
     91    # On Cygwin, 'PATHEXT' may exist but it should not be used.
     92    if os.pathsep == ';':
     93        pathext = os.environ.get('PATHEXT', '').split(';')
     94    else:
     95        pathext = ['']
     96 
     97    # Search the paths...
     98    for path in paths.split(os.pathsep):
     99        for ext in pathext:
    100            p = os.path.join(path, command + ext)
    101            if os.path.exists(p):
    102                return p
    103 
    104    return None
    105 
    106 # Make sure we flush the output after every print statement.
    107 class flushfile(object):
    108     def __init__(self, f):
    109         self.f = f
    110     def write(self, x):
    111         self.f.write(x)
    112         self.f.flush()
    113 
    114 sys.stdout = flushfile(sys.stdout)
    115 
    116 def getProjectMapPath():
    117     ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 
    118                                   ProjectMapFile)
    119     if not os.path.exists(ProjectMapPath):
    120         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
    121                 "\nRunning script for the wrong directory?"
    122         sys.exit(-1)  
    123     return ProjectMapPath         
    124 
    125 def getProjectDir(ID):
    126     return os.path.join(os.path.abspath(os.curdir), ID)        
    127 
    128 def getSBOutputDirName(IsReferenceBuild) :
    129     if IsReferenceBuild == True :
    130         return SBOutputDirReferencePrefix + SBOutputDirName
    131     else :
    132         return SBOutputDirName
    133 
    134 #------------------------------------------------------------------------------
    135 # Configuration setup.
    136 #------------------------------------------------------------------------------
    137 
    138 # Find Clang for static analysis.
    139 Clang = which("clang", os.environ['PATH'])
    140 if not Clang:
    141     print "Error: cannot find 'clang' in PATH"
    142     sys.exit(-1)
    143 
    144 # Number of jobs.
    145 Jobs = math.ceil(detectCPUs() * 0.75)
    146 
    147 # Project map stores info about all the "registered" projects.
    148 ProjectMapFile = "projectMap.csv"
    149 
    150 # Names of the project specific scripts.
    151 # The script that needs to be executed before the build can start.
    152 CleanupScript = "cleanup_run_static_analyzer.sh"
    153 # This is a file containing commands for scan-build.  
    154 BuildScript = "run_static_analyzer.cmd"
    155 
    156 # The log file name.
    157 LogFolderName = "Logs"
    158 BuildLogName = "run_static_analyzer.log"
    159 # Summary file - contains the summary of the failures. Ex: This info can be be  
    160 # displayed when buildbot detects a build failure.
    161 NumOfFailuresInSummary = 10
    162 FailuresSummaryFileName = "failures.txt"
    163 # Summary of the result diffs.
    164 DiffsSummaryFileName = "diffs.txt"
    165 
    166 # The scan-build result directory.
    167 SBOutputDirName = "ScanBuildResults"
    168 SBOutputDirReferencePrefix = "Ref"
    169 
    170 # The list of checkers used during analyzes.
    171 # Currently, consists of all the non experimental checkers.
    172 Checkers="alpha.security.taint,core,deadcode,security,unix,osx"
    173 
    174 Verbose = 1
    175 
    176 #------------------------------------------------------------------------------
    177 # Test harness logic.
    178 #------------------------------------------------------------------------------
    179 
    180 # Run pre-processing script if any.
    181 def runCleanupScript(Dir, PBuildLogFile):
    182     ScriptPath = os.path.join(Dir, CleanupScript)
    183     if os.path.exists(ScriptPath):
    184         try:
    185             if Verbose == 1:        
    186                 print "  Executing: %s" % (ScriptPath,)
    187             check_call("chmod +x %s" % ScriptPath, cwd = Dir, 
    188                                               stderr=PBuildLogFile,
    189                                               stdout=PBuildLogFile, 
    190                                               shell=True)    
    191             check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
    192                                               stdout=PBuildLogFile, 
    193                                               shell=True)
    194         except:
    195             print "Error: The pre-processing step failed. See ", \
    196                   PBuildLogFile.name, " for details."
    197             sys.exit(-1)
    198 
    199 # Build the project with scan-build by reading in the commands and 
    200 # prefixing them with the scan-build options.
    201 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
    202     BuildScriptPath = os.path.join(Dir, BuildScript)
    203     if not os.path.exists(BuildScriptPath):
    204         print "Error: build script is not defined: %s" % BuildScriptPath
    205         sys.exit(-1)
    206     SBOptions = "--use-analyzer " + Clang + " "
    207     SBOptions += "-plist-html -o " + SBOutputDir + " "
    208     SBOptions += "-enable-checker " + Checkers + " "  
    209     try:
    210         SBCommandFile = open(BuildScriptPath, "r")
    211         SBPrefix = "scan-build " + SBOptions + " "
    212         for Command in SBCommandFile:
    213             # If using 'make', auto imply a -jX argument
    214             # to speed up analysis.  xcodebuild will
    215             # automatically use the maximum number of cores.
    216             if Command.startswith("make "):
    217                 Command += "-j" + Jobs
    218             SBCommand = SBPrefix + Command
    219             if Verbose == 1:        
    220                 print "  Executing: %s" % (SBCommand,)
    221             check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
    222                                              stdout=PBuildLogFile, 
    223                                              shell=True)
    224     except:
    225         print "Error: scan-build failed. See ",PBuildLogFile.name,\
    226               " for details."
    227         raise
    228 
    229 def hasNoExtension(FileName):
    230     (Root, Ext) = os.path.splitext(FileName)
    231     if ((Ext == "")) :
    232         return True
    233     return False
    234 
    235 def isValidSingleInputFile(FileName):
    236     (Root, Ext) = os.path.splitext(FileName)
    237     if ((Ext == ".i") | (Ext == ".ii") | 
    238         (Ext == ".c") | (Ext == ".cpp") | 
    239         (Ext == ".m") | (Ext == "")) :
    240         return True
    241     return False
    242    
    243 # Run analysis on a set of preprocessed files.
    244 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
    245     if os.path.exists(os.path.join(Dir, BuildScript)):
    246         print "Error: The preprocessed files project should not contain %s" % \
    247                BuildScript
    248         raise Exception()       
    249 
    250     CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w "
    251     CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "   
    252     
    253     if (Mode == 2) :
    254         CmdPrefix += "-std=c++11 " 
    255     
    256     PlistPath = os.path.join(Dir, SBOutputDir, "date")
    257     FailPath = os.path.join(PlistPath, "failures");
    258     os.makedirs(FailPath);
    259  
    260     for FullFileName in glob.glob(Dir + "/*"):
    261         FileName = os.path.basename(FullFileName)
    262         Failed = False
    263         
    264         # Only run the analyzes on supported files.
    265         if (hasNoExtension(FileName)):
    266             continue
    267         if (isValidSingleInputFile(FileName) == False):
    268             print "Error: Invalid single input file %s." % (FullFileName,)
    269             raise Exception()
    270         
    271         # Build and call the analyzer command.
    272         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
    273         Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
    274         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
    275         try:
    276             if Verbose == 1:        
    277                 print "  Executing: %s" % (Command,)
    278             check_call(Command, cwd = Dir, stderr=LogFile,
    279                                            stdout=LogFile, 
    280                                            shell=True)
    281         except CalledProcessError, e:
    282             print "Error: Analyzes of %s failed. See %s for details." \
    283                   "Error code %d." % \
    284                    (FullFileName, LogFile.name, e.returncode)
    285             Failed = True       
    286         finally:
    287             LogFile.close()            
    288         
    289         # If command did not fail, erase the log file.
    290         if Failed == False:
    291             os.remove(LogFile.name);
    292 
    293 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
    294     TBegin = time.time() 
    295 
    296     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
    297     print "Log file: %s" % (BuildLogPath,) 
    298     print "Output directory: %s" %(SBOutputDir, )
    299     
    300     # Clean up the log file.
    301     if (os.path.exists(BuildLogPath)) :
    302         RmCommand = "rm " + BuildLogPath
    303         if Verbose == 1:
    304             print "  Executing: %s" % (RmCommand,)
    305         check_call(RmCommand, shell=True)
    306     
    307     # Clean up scan build results.
    308     if (os.path.exists(SBOutputDir)) :
    309         RmCommand = "rm -r " + SBOutputDir
    310         if Verbose == 1: 
    311             print "  Executing: %s" % (RmCommand,)
    312             check_call(RmCommand, shell=True)
    313     assert(not os.path.exists(SBOutputDir))
    314     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
    315         
    316     # Open the log file.
    317     PBuildLogFile = open(BuildLogPath, "wb+")
    318     
    319     # Build and analyze the project.
    320     try:
    321         runCleanupScript(Dir, PBuildLogFile)
    322         
    323         if (ProjectBuildMode == 1):
    324             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
    325         else:
    326             runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode)
    327         
    328         if IsReferenceBuild :
    329             runCleanupScript(Dir, PBuildLogFile)
    330            
    331     finally:
    332         PBuildLogFile.close()
    333         
    334     print "Build complete (time: %.2f). See the log for more details: %s" % \
    335            ((time.time()-TBegin), BuildLogPath) 
    336        
    337 # A plist file is created for each call to the analyzer(each source file).
    338 # We are only interested on the once that have bug reports, so delete the rest.        
    339 def CleanUpEmptyPlists(SBOutputDir):
    340     for F in glob.glob(SBOutputDir + "/*/*.plist"):
    341         P = os.path.join(SBOutputDir, F)
    342         
    343         Data = plistlib.readPlist(P)
    344         # Delete empty reports.
    345         if not Data['files']:
    346             os.remove(P)
    347             continue
    348 
    349 # Given the scan-build output directory, checks if the build failed 
    350 # (by searching for the failures directories). If there are failures, it 
    351 # creates a summary file in the output directory.         
    352 def checkBuild(SBOutputDir):
    353     # Check if there are failures.
    354     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
    355     TotalFailed = len(Failures);
    356     if TotalFailed == 0:
    357         CleanUpEmptyPlists(SBOutputDir)
    358         Plists = glob.glob(SBOutputDir + "/*/*.plist")
    359         print "Number of bug reports (non empty plist files) produced: %d" %\
    360            len(Plists)
    361         return;
    362     
    363     # Create summary file to display when the build fails.
    364     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
    365     if (Verbose > 0):
    366         print "  Creating the failures summary file %s" % (SummaryPath,)
    367     
    368     SummaryLog = open(SummaryPath, "w+")
    369     try:
    370         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
    371         if TotalFailed > NumOfFailuresInSummary:
    372             SummaryLog.write("See the first %d below.\n" 
    373                                                    % (NumOfFailuresInSummary,))
    374         # TODO: Add a line "See the results folder for more."
    375     
    376         FailuresCopied = NumOfFailuresInSummary
    377         Idx = 0
    378         for FailLogPathI in Failures:
    379             if Idx >= NumOfFailuresInSummary:
    380                 break;
    381             Idx += 1 
    382             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
    383             FailLogI = open(FailLogPathI, "r");
    384             try: 
    385                 shutil.copyfileobj(FailLogI, SummaryLog);
    386             finally:
    387                 FailLogI.close()
    388     finally:
    389         SummaryLog.close()
    390     
    391     print "Error: analysis failed. See ", SummaryPath
    392     sys.exit(-1)       
    393 
    394 # Auxiliary object to discard stdout.
    395 class Discarder(object):
    396     def write(self, text):
    397         pass # do nothing
    398 
    399 # Compare the warnings produced by scan-build.
    400 def runCmpResults(Dir):   
    401     TBegin = time.time() 
    402 
    403     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
    404     NewDir = os.path.join(Dir, SBOutputDirName)
    405     
    406     # We have to go one level down the directory tree.
    407     RefList = glob.glob(RefDir + "/*") 
    408     NewList = glob.glob(NewDir + "/*")
    409     
    410     # Log folders are also located in the results dir, so ignore them. 
    411     RefList.remove(os.path.join(RefDir, LogFolderName))
    412     NewList.remove(os.path.join(NewDir, LogFolderName))
    413     
    414     if len(RefList) == 0 or len(NewList) == 0:
    415         return False
    416     assert(len(RefList) == len(NewList))
    417 
    418     # There might be more then one folder underneath - one per each scan-build 
    419     # command (Ex: one for configure and one for make).
    420     if (len(RefList) > 1):
    421         # Assume that the corresponding folders have the same names.
    422         RefList.sort()
    423         NewList.sort()
    424     
    425     # Iterate and find the differences.
    426     NumDiffs = 0
    427     PairList = zip(RefList, NewList)    
    428     for P in PairList:    
    429         RefDir = P[0] 
    430         NewDir = P[1]
    431     
    432         assert(RefDir != NewDir) 
    433         if Verbose == 1:        
    434             print "  Comparing Results: %s %s" % (RefDir, NewDir)
    435     
    436         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
    437         Opts = CmpRuns.CmpOptions(DiffsPath)
    438         # Discard everything coming out of stdout (CmpRun produces a lot of them).
    439         OLD_STDOUT = sys.stdout
    440         sys.stdout = Discarder()
    441         # Scan the results, delete empty plist files.
    442         NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
    443         sys.stdout = OLD_STDOUT
    444         if (NumDiffs > 0) :
    445             print "Warning: %r differences in diagnostics. See %s" % \
    446                   (NumDiffs, DiffsPath,)
    447                     
    448     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
    449     return (NumDiffs > 0)
    450     
    451 def updateSVN(Mode, ProjectsMap):
    452     try:
    453         ProjectsMap.seek(0)    
    454         for I in csv.reader(ProjectsMap):
    455             ProjName = I[0] 
    456             Path = os.path.join(ProjName, getSBOutputDirName(True))
    457     
    458             if Mode == "delete":
    459                 Command = "svn delete %s" % (Path,)
    460             else:
    461                 Command = "svn add %s" % (Path,)
    462 
    463             if Verbose == 1:        
    464                 print "  Executing: %s" % (Command,)
    465             check_call(Command, shell=True)    
    466     
    467         if Mode == "delete":
    468             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
    469                             "reference results.\""     
    470         else:
    471             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
    472                             "reference results.\""
    473         if Verbose == 1:        
    474             print "  Executing: %s" % (CommitCommand,)
    475         check_call(CommitCommand, shell=True)    
    476     except:
    477         print "Error: SVN update failed."
    478         sys.exit(-1)
    479         
    480 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None):
    481     print " \n\n--- Building project %s" % (ID,)
    482 
    483     TBegin = time.time() 
    484 
    485     if Dir is None :
    486         Dir = getProjectDir(ID)        
    487     if Verbose == 1:        
    488         print "  Build directory: %s." % (Dir,)
    489     
    490     # Set the build results directory.
    491     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
    492     SBOutputDir = os.path.join(Dir, RelOutputDir)
    493                 
    494     buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild)
    495 
    496     checkBuild(SBOutputDir)
    497     
    498     if IsReferenceBuild == False:
    499         runCmpResults(Dir)
    500         
    501     print "Completed tests for project %s (time: %.2f)." % \
    502           (ID, (time.time()-TBegin))
    503     
    504 def testAll(IsReferenceBuild = False, UpdateSVN = False):
    505     PMapFile = open(getProjectMapPath(), "rb")
    506     try:        
    507         # Validate the input.
    508         for I in csv.reader(PMapFile):
    509             if (len(I) != 2) :
    510                 print "Error: Rows in the ProjectMapFile should have 3 entries."
    511                 raise Exception()
    512             if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
    513                 print "Error: Second entry in the ProjectMapFile should be 0" \
    514                       " (single file), 1 (project), or 2(single file c++11)."
    515                 raise Exception()              
    516 
    517         # When we are regenerating the reference results, we might need to 
    518         # update svn. Remove reference results from SVN.
    519         if UpdateSVN == True:
    520             assert(IsReferenceBuild == True);
    521             updateSVN("delete",  PMapFile);
    522             
    523         # Test the projects.
    524         PMapFile.seek(0)    
    525         for I in csv.reader(PMapFile):
    526             testProject(I[0], int(I[1]), IsReferenceBuild)
    527 
    528         # Add reference results to SVN.
    529         if UpdateSVN == True:
    530             updateSVN("add",  PMapFile);
    531 
    532     except:
    533         print "Error occurred. Premature termination."
    534         raise                            
    535     finally:
    536         PMapFile.close()    
    537             
    538 if __name__ == '__main__':
    539     IsReference = False
    540     UpdateSVN = False
    541     if len(sys.argv) >= 2:
    542         if sys.argv[1] == "-r":
    543             IsReference = True
    544         elif sys.argv[1] == "-rs":
    545             IsReference = True
    546             UpdateSVN = True
    547         else:     
    548           print >> sys.stderr, 'Usage: ', sys.argv[0],\
    549                              '[-r|-rs]' \
    550                              'Use -r to regenerate reference output' \
    551                              'Use -rs to regenerate reference output and update svn'
    552 
    553     testAll(IsReference, UpdateSVN)
    554