1 #!/usr/bin/env python 2 3 """ 4 Static Analyzer qualification infrastructure. 5 6 The goal is to test the analyzer against different projects, check for failures, 7 compare results, and measure performance. 8 9 Repository Directory will contain sources of the projects as well as the 10 information on how to build them and the expected output. 11 Repository Directory structure: 12 - ProjectMap file 13 - Historical Performance Data 14 - Project Dir1 15 - ReferenceOutput 16 - Project Dir2 17 - ReferenceOutput 18 .. 19 20 To test the build of the analyzer one would: 21 - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 22 the build directory does not pollute the repository to min network traffic). 23 - Build all projects, until error. Produce logs to report errors. 24 - Compare results. 25 26 The files which should be kept around for failure investigations: 27 RepositoryCopy/Project DirI/ScanBuildResults 28 RepositoryCopy/Project DirI/run_static_analyzer.log 29 30 Assumptions (TODO: shouldn't need to assume these.): 31 The script is being run from the Repository Directory. 32 The compiler for scan-build and scan-build are in the PATH. 33 export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH 34 35 For more logging, set the env variables: 36 zaks:TI zaks$ export CCC_ANALYZER_LOG=1 37 zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1 38 """ 39 import CmpRuns 40 41 import os 42 import csv 43 import sys 44 import glob 45 import math 46 import shutil 47 import time 48 import plistlib 49 from subprocess import check_call, CalledProcessError 50 51 #------------------------------------------------------------------------------ 52 # Helper functions. 53 #------------------------------------------------------------------------------ 54 55 def detectCPUs(): 56 """ 57 Detects the number of CPUs on a system. Cribbed from pp. 58 """ 59 # Linux, Unix and MacOS: 60 if hasattr(os, "sysconf"): 61 if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): 62 # Linux & Unix: 63 ncpus = os.sysconf("SC_NPROCESSORS_ONLN") 64 if isinstance(ncpus, int) and ncpus > 0: 65 return ncpus 66 else: # OSX: 67 return int(capture(['sysctl', '-n', 'hw.ncpu'])) 68 # Windows: 69 if os.environ.has_key("NUMBER_OF_PROCESSORS"): 70 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]) 71 if ncpus > 0: 72 return ncpus 73 return 1 # Default 74 75 def which(command, paths = None): 76 """which(command, [paths]) - Look up the given command in the paths string 77 (or the PATH environment variable, if unspecified).""" 78 79 if paths is None: 80 paths = os.environ.get('PATH','') 81 82 # Check for absolute match first. 83 if os.path.exists(command): 84 return command 85 86 # Would be nice if Python had a lib function for this. 87 if not paths: 88 paths = os.defpath 89 90 # Get suffixes to search. 91 # On Cygwin, 'PATHEXT' may exist but it should not be used. 92 if os.pathsep == ';': 93 pathext = os.environ.get('PATHEXT', '').split(';') 94 else: 95 pathext = [''] 96 97 # Search the paths... 98 for path in paths.split(os.pathsep): 99 for ext in pathext: 100 p = os.path.join(path, command + ext) 101 if os.path.exists(p): 102 return p 103 104 return None 105 106 # Make sure we flush the output after every print statement. 107 class flushfile(object): 108 def __init__(self, f): 109 self.f = f 110 def write(self, x): 111 self.f.write(x) 112 self.f.flush() 113 114 sys.stdout = flushfile(sys.stdout) 115 116 def getProjectMapPath(): 117 ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 118 ProjectMapFile) 119 if not os.path.exists(ProjectMapPath): 120 print "Error: Cannot find the Project Map file " + ProjectMapPath +\ 121 "\nRunning script for the wrong directory?" 122 sys.exit(-1) 123 return ProjectMapPath 124 125 def getProjectDir(ID): 126 return os.path.join(os.path.abspath(os.curdir), ID) 127 128 def getSBOutputDirName(IsReferenceBuild) : 129 if IsReferenceBuild == True : 130 return SBOutputDirReferencePrefix + SBOutputDirName 131 else : 132 return SBOutputDirName 133 134 #------------------------------------------------------------------------------ 135 # Configuration setup. 136 #------------------------------------------------------------------------------ 137 138 # Find Clang for static analysis. 139 Clang = which("clang", os.environ['PATH']) 140 if not Clang: 141 print "Error: cannot find 'clang' in PATH" 142 sys.exit(-1) 143 144 # Number of jobs. 145 Jobs = math.ceil(detectCPUs() * 0.75) 146 147 # Project map stores info about all the "registered" projects. 148 ProjectMapFile = "projectMap.csv" 149 150 # Names of the project specific scripts. 151 # The script that needs to be executed before the build can start. 152 CleanupScript = "cleanup_run_static_analyzer.sh" 153 # This is a file containing commands for scan-build. 154 BuildScript = "run_static_analyzer.cmd" 155 156 # The log file name. 157 LogFolderName = "Logs" 158 BuildLogName = "run_static_analyzer.log" 159 # Summary file - contains the summary of the failures. Ex: This info can be be 160 # displayed when buildbot detects a build failure. 161 NumOfFailuresInSummary = 10 162 FailuresSummaryFileName = "failures.txt" 163 # Summary of the result diffs. 164 DiffsSummaryFileName = "diffs.txt" 165 166 # The scan-build result directory. 167 SBOutputDirName = "ScanBuildResults" 168 SBOutputDirReferencePrefix = "Ref" 169 170 # The list of checkers used during analyzes. 171 # Currently, consists of all the non experimental checkers. 172 Checkers="alpha.security.taint,core,deadcode,security,unix,osx" 173 174 Verbose = 1 175 176 #------------------------------------------------------------------------------ 177 # Test harness logic. 178 #------------------------------------------------------------------------------ 179 180 # Run pre-processing script if any. 181 def runCleanupScript(Dir, PBuildLogFile): 182 ScriptPath = os.path.join(Dir, CleanupScript) 183 if os.path.exists(ScriptPath): 184 try: 185 if Verbose == 1: 186 print " Executing: %s" % (ScriptPath,) 187 check_call("chmod +x %s" % ScriptPath, cwd = Dir, 188 stderr=PBuildLogFile, 189 stdout=PBuildLogFile, 190 shell=True) 191 check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile, 192 stdout=PBuildLogFile, 193 shell=True) 194 except: 195 print "Error: The pre-processing step failed. See ", \ 196 PBuildLogFile.name, " for details." 197 sys.exit(-1) 198 199 # Build the project with scan-build by reading in the commands and 200 # prefixing them with the scan-build options. 201 def runScanBuild(Dir, SBOutputDir, PBuildLogFile): 202 BuildScriptPath = os.path.join(Dir, BuildScript) 203 if not os.path.exists(BuildScriptPath): 204 print "Error: build script is not defined: %s" % BuildScriptPath 205 sys.exit(-1) 206 SBOptions = "--use-analyzer " + Clang + " " 207 SBOptions += "-plist-html -o " + SBOutputDir + " " 208 SBOptions += "-enable-checker " + Checkers + " " 209 try: 210 SBCommandFile = open(BuildScriptPath, "r") 211 SBPrefix = "scan-build " + SBOptions + " " 212 for Command in SBCommandFile: 213 # If using 'make', auto imply a -jX argument 214 # to speed up analysis. xcodebuild will 215 # automatically use the maximum number of cores. 216 if Command.startswith("make "): 217 Command += "-j" + Jobs 218 SBCommand = SBPrefix + Command 219 if Verbose == 1: 220 print " Executing: %s" % (SBCommand,) 221 check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile, 222 stdout=PBuildLogFile, 223 shell=True) 224 except: 225 print "Error: scan-build failed. See ",PBuildLogFile.name,\ 226 " for details." 227 raise 228 229 def hasNoExtension(FileName): 230 (Root, Ext) = os.path.splitext(FileName) 231 if ((Ext == "")) : 232 return True 233 return False 234 235 def isValidSingleInputFile(FileName): 236 (Root, Ext) = os.path.splitext(FileName) 237 if ((Ext == ".i") | (Ext == ".ii") | 238 (Ext == ".c") | (Ext == ".cpp") | 239 (Ext == ".m") | (Ext == "")) : 240 return True 241 return False 242 243 # Run analysis on a set of preprocessed files. 244 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode): 245 if os.path.exists(os.path.join(Dir, BuildScript)): 246 print "Error: The preprocessed files project should not contain %s" % \ 247 BuildScript 248 raise Exception() 249 250 CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w " 251 CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks " 252 253 if (Mode == 2) : 254 CmdPrefix += "-std=c++11 " 255 256 PlistPath = os.path.join(Dir, SBOutputDir, "date") 257 FailPath = os.path.join(PlistPath, "failures"); 258 os.makedirs(FailPath); 259 260 for FullFileName in glob.glob(Dir + "/*"): 261 FileName = os.path.basename(FullFileName) 262 Failed = False 263 264 # Only run the analyzes on supported files. 265 if (hasNoExtension(FileName)): 266 continue 267 if (isValidSingleInputFile(FileName) == False): 268 print "Error: Invalid single input file %s." % (FullFileName,) 269 raise Exception() 270 271 # Build and call the analyzer command. 272 OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist " 273 Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName) 274 LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b") 275 try: 276 if Verbose == 1: 277 print " Executing: %s" % (Command,) 278 check_call(Command, cwd = Dir, stderr=LogFile, 279 stdout=LogFile, 280 shell=True) 281 except CalledProcessError, e: 282 print "Error: Analyzes of %s failed. See %s for details." \ 283 "Error code %d." % \ 284 (FullFileName, LogFile.name, e.returncode) 285 Failed = True 286 finally: 287 LogFile.close() 288 289 # If command did not fail, erase the log file. 290 if Failed == False: 291 os.remove(LogFile.name); 292 293 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild): 294 TBegin = time.time() 295 296 BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName) 297 print "Log file: %s" % (BuildLogPath,) 298 print "Output directory: %s" %(SBOutputDir, ) 299 300 # Clean up the log file. 301 if (os.path.exists(BuildLogPath)) : 302 RmCommand = "rm " + BuildLogPath 303 if Verbose == 1: 304 print " Executing: %s" % (RmCommand,) 305 check_call(RmCommand, shell=True) 306 307 # Clean up scan build results. 308 if (os.path.exists(SBOutputDir)) : 309 RmCommand = "rm -r " + SBOutputDir 310 if Verbose == 1: 311 print " Executing: %s" % (RmCommand,) 312 check_call(RmCommand, shell=True) 313 assert(not os.path.exists(SBOutputDir)) 314 os.makedirs(os.path.join(SBOutputDir, LogFolderName)) 315 316 # Open the log file. 317 PBuildLogFile = open(BuildLogPath, "wb+") 318 319 # Build and analyze the project. 320 try: 321 runCleanupScript(Dir, PBuildLogFile) 322 323 if (ProjectBuildMode == 1): 324 runScanBuild(Dir, SBOutputDir, PBuildLogFile) 325 else: 326 runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode) 327 328 if IsReferenceBuild : 329 runCleanupScript(Dir, PBuildLogFile) 330 331 finally: 332 PBuildLogFile.close() 333 334 print "Build complete (time: %.2f). See the log for more details: %s" % \ 335 ((time.time()-TBegin), BuildLogPath) 336 337 # A plist file is created for each call to the analyzer(each source file). 338 # We are only interested on the once that have bug reports, so delete the rest. 339 def CleanUpEmptyPlists(SBOutputDir): 340 for F in glob.glob(SBOutputDir + "/*/*.plist"): 341 P = os.path.join(SBOutputDir, F) 342 343 Data = plistlib.readPlist(P) 344 # Delete empty reports. 345 if not Data['files']: 346 os.remove(P) 347 continue 348 349 # Given the scan-build output directory, checks if the build failed 350 # (by searching for the failures directories). If there are failures, it 351 # creates a summary file in the output directory. 352 def checkBuild(SBOutputDir): 353 # Check if there are failures. 354 Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt") 355 TotalFailed = len(Failures); 356 if TotalFailed == 0: 357 CleanUpEmptyPlists(SBOutputDir) 358 Plists = glob.glob(SBOutputDir + "/*/*.plist") 359 print "Number of bug reports (non empty plist files) produced: %d" %\ 360 len(Plists) 361 return; 362 363 # Create summary file to display when the build fails. 364 SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName) 365 if (Verbose > 0): 366 print " Creating the failures summary file %s" % (SummaryPath,) 367 368 SummaryLog = open(SummaryPath, "w+") 369 try: 370 SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,)) 371 if TotalFailed > NumOfFailuresInSummary: 372 SummaryLog.write("See the first %d below.\n" 373 % (NumOfFailuresInSummary,)) 374 # TODO: Add a line "See the results folder for more." 375 376 FailuresCopied = NumOfFailuresInSummary 377 Idx = 0 378 for FailLogPathI in Failures: 379 if Idx >= NumOfFailuresInSummary: 380 break; 381 Idx += 1 382 SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,)); 383 FailLogI = open(FailLogPathI, "r"); 384 try: 385 shutil.copyfileobj(FailLogI, SummaryLog); 386 finally: 387 FailLogI.close() 388 finally: 389 SummaryLog.close() 390 391 print "Error: analysis failed. See ", SummaryPath 392 sys.exit(-1) 393 394 # Auxiliary object to discard stdout. 395 class Discarder(object): 396 def write(self, text): 397 pass # do nothing 398 399 # Compare the warnings produced by scan-build. 400 def runCmpResults(Dir): 401 TBegin = time.time() 402 403 RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName) 404 NewDir = os.path.join(Dir, SBOutputDirName) 405 406 # We have to go one level down the directory tree. 407 RefList = glob.glob(RefDir + "/*") 408 NewList = glob.glob(NewDir + "/*") 409 410 # Log folders are also located in the results dir, so ignore them. 411 RefList.remove(os.path.join(RefDir, LogFolderName)) 412 NewList.remove(os.path.join(NewDir, LogFolderName)) 413 414 if len(RefList) == 0 or len(NewList) == 0: 415 return False 416 assert(len(RefList) == len(NewList)) 417 418 # There might be more then one folder underneath - one per each scan-build 419 # command (Ex: one for configure and one for make). 420 if (len(RefList) > 1): 421 # Assume that the corresponding folders have the same names. 422 RefList.sort() 423 NewList.sort() 424 425 # Iterate and find the differences. 426 NumDiffs = 0 427 PairList = zip(RefList, NewList) 428 for P in PairList: 429 RefDir = P[0] 430 NewDir = P[1] 431 432 assert(RefDir != NewDir) 433 if Verbose == 1: 434 print " Comparing Results: %s %s" % (RefDir, NewDir) 435 436 DiffsPath = os.path.join(NewDir, DiffsSummaryFileName) 437 Opts = CmpRuns.CmpOptions(DiffsPath) 438 # Discard everything coming out of stdout (CmpRun produces a lot of them). 439 OLD_STDOUT = sys.stdout 440 sys.stdout = Discarder() 441 # Scan the results, delete empty plist files. 442 NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False) 443 sys.stdout = OLD_STDOUT 444 if (NumDiffs > 0) : 445 print "Warning: %r differences in diagnostics. See %s" % \ 446 (NumDiffs, DiffsPath,) 447 448 print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 449 return (NumDiffs > 0) 450 451 def updateSVN(Mode, ProjectsMap): 452 try: 453 ProjectsMap.seek(0) 454 for I in csv.reader(ProjectsMap): 455 ProjName = I[0] 456 Path = os.path.join(ProjName, getSBOutputDirName(True)) 457 458 if Mode == "delete": 459 Command = "svn delete %s" % (Path,) 460 else: 461 Command = "svn add %s" % (Path,) 462 463 if Verbose == 1: 464 print " Executing: %s" % (Command,) 465 check_call(Command, shell=True) 466 467 if Mode == "delete": 468 CommitCommand = "svn commit -m \"[analyzer tests] Remove " \ 469 "reference results.\"" 470 else: 471 CommitCommand = "svn commit -m \"[analyzer tests] Add new " \ 472 "reference results.\"" 473 if Verbose == 1: 474 print " Executing: %s" % (CommitCommand,) 475 check_call(CommitCommand, shell=True) 476 except: 477 print "Error: SVN update failed." 478 sys.exit(-1) 479 480 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None): 481 print " \n\n--- Building project %s" % (ID,) 482 483 TBegin = time.time() 484 485 if Dir is None : 486 Dir = getProjectDir(ID) 487 if Verbose == 1: 488 print " Build directory: %s." % (Dir,) 489 490 # Set the build results directory. 491 RelOutputDir = getSBOutputDirName(IsReferenceBuild) 492 SBOutputDir = os.path.join(Dir, RelOutputDir) 493 494 buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild) 495 496 checkBuild(SBOutputDir) 497 498 if IsReferenceBuild == False: 499 runCmpResults(Dir) 500 501 print "Completed tests for project %s (time: %.2f)." % \ 502 (ID, (time.time()-TBegin)) 503 504 def testAll(IsReferenceBuild = False, UpdateSVN = False): 505 PMapFile = open(getProjectMapPath(), "rb") 506 try: 507 # Validate the input. 508 for I in csv.reader(PMapFile): 509 if (len(I) != 2) : 510 print "Error: Rows in the ProjectMapFile should have 3 entries." 511 raise Exception() 512 if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))): 513 print "Error: Second entry in the ProjectMapFile should be 0" \ 514 " (single file), 1 (project), or 2(single file c++11)." 515 raise Exception() 516 517 # When we are regenerating the reference results, we might need to 518 # update svn. Remove reference results from SVN. 519 if UpdateSVN == True: 520 assert(IsReferenceBuild == True); 521 updateSVN("delete", PMapFile); 522 523 # Test the projects. 524 PMapFile.seek(0) 525 for I in csv.reader(PMapFile): 526 testProject(I[0], int(I[1]), IsReferenceBuild) 527 528 # Add reference results to SVN. 529 if UpdateSVN == True: 530 updateSVN("add", PMapFile); 531 532 except: 533 print "Error occurred. Premature termination." 534 raise 535 finally: 536 PMapFile.close() 537 538 if __name__ == '__main__': 539 IsReference = False 540 UpdateSVN = False 541 if len(sys.argv) >= 2: 542 if sys.argv[1] == "-r": 543 IsReference = True 544 elif sys.argv[1] == "-rs": 545 IsReference = True 546 UpdateSVN = True 547 else: 548 print >> sys.stderr, 'Usage: ', sys.argv[0],\ 549 '[-r|-rs]' \ 550 'Use -r to regenerate reference output' \ 551 'Use -rs to regenerate reference output and update svn' 552 553 testAll(IsReference, UpdateSVN) 554