1 #!/usr/bin/env python 2 3 """ 4 Static Analyzer qualification infrastructure. 5 6 The goal is to test the analyzer against different projects, check for failures, 7 compare results, and measure performance. 8 9 Repository Directory will contain sources of the projects as well as the 10 information on how to build them and the expected output. 11 Repository Directory structure: 12 - ProjectMap file 13 - Historical Performance Data 14 - Project Dir1 15 - ReferenceOutput 16 - Project Dir2 17 - ReferenceOutput 18 .. 19 20 To test the build of the analyzer one would: 21 - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 22 the build directory does not pollute the repository to min network traffic). 23 - Build all projects, until error. Produce logs to report errors. 24 - Compare results. 25 26 The files which should be kept around for failure investigations: 27 RepositoryCopy/Project DirI/ScanBuildResults 28 RepositoryCopy/Project DirI/run_static_analyzer.log 29 30 Assumptions (TODO: shouldn't need to assume these.): 31 The script is being run from the Repository Directory. 32 The compiler for scan-build and scan-build are in the PATH. 33 export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH 34 35 For more logging, set the env variables: 36 zaks:TI zaks$ export CCC_ANALYZER_LOG=1 37 zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1 38 """ 39 import CmpRuns 40 41 import os 42 import csv 43 import sys 44 import glob 45 import math 46 import shutil 47 import time 48 import plistlib 49 from subprocess import check_call, CalledProcessError 50 51 #------------------------------------------------------------------------------ 52 # Helper functions. 53 #------------------------------------------------------------------------------ 54 55 def detectCPUs(): 56 """ 57 Detects the number of CPUs on a system. Cribbed from pp. 58 """ 59 # Linux, Unix and MacOS: 60 if hasattr(os, "sysconf"): 61 if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"): 62 # Linux & Unix: 63 ncpus = os.sysconf("SC_NPROCESSORS_ONLN") 64 if isinstance(ncpus, int) and ncpus > 0: 65 return ncpus 66 else: # OSX: 67 return int(capture(['sysctl', '-n', 'hw.ncpu'])) 68 # Windows: 69 if os.environ.has_key("NUMBER_OF_PROCESSORS"): 70 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]) 71 if ncpus > 0: 72 return ncpus 73 return 1 # Default 74 75 def which(command, paths = None): 76 """which(command, [paths]) - Look up the given command in the paths string 77 (or the PATH environment variable, if unspecified).""" 78 79 if paths is None: 80 paths = os.environ.get('PATH','') 81 82 # Check for absolute match first. 83 if os.path.exists(command): 84 return command 85 86 # Would be nice if Python had a lib function for this. 87 if not paths: 88 paths = os.defpath 89 90 # Get suffixes to search. 91 # On Cygwin, 'PATHEXT' may exist but it should not be used. 92 if os.pathsep == ';': 93 pathext = os.environ.get('PATHEXT', '').split(';') 94 else: 95 pathext = [''] 96 97 # Search the paths... 98 for path in paths.split(os.pathsep): 99 for ext in pathext: 100 p = os.path.join(path, command + ext) 101 if os.path.exists(p): 102 return p 103 104 return None 105 106 # Make sure we flush the output after every print statement. 107 class flushfile(object): 108 def __init__(self, f): 109 self.f = f 110 def write(self, x): 111 self.f.write(x) 112 self.f.flush() 113 114 sys.stdout = flushfile(sys.stdout) 115 116 def getProjectMapPath(): 117 ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 118 ProjectMapFile) 119 if not os.path.exists(ProjectMapPath): 120 print "Error: Cannot find the Project Map file " + ProjectMapPath +\ 121 "\nRunning script for the wrong directory?" 122 sys.exit(-1) 123 return ProjectMapPath 124 125 def getProjectDir(ID): 126 return os.path.join(os.path.abspath(os.curdir), ID) 127 128 def getSBOutputDirName(IsReferenceBuild) : 129 if IsReferenceBuild == True : 130 return SBOutputDirReferencePrefix + SBOutputDirName 131 else : 132 return SBOutputDirName 133 134 #------------------------------------------------------------------------------ 135 # Configuration setup. 136 #------------------------------------------------------------------------------ 137 138 # Find Clang for static analysis. 139 Clang = which("clang", os.environ['PATH']) 140 if not Clang: 141 print "Error: cannot find 'clang' in PATH" 142 sys.exit(-1) 143 144 # Number of jobs. 145 Jobs = int(math.ceil(detectCPUs() * 0.75)) 146 147 # Project map stores info about all the "registered" projects. 148 ProjectMapFile = "projectMap.csv" 149 150 # Names of the project specific scripts. 151 # The script that needs to be executed before the build can start. 152 CleanupScript = "cleanup_run_static_analyzer.sh" 153 # This is a file containing commands for scan-build. 154 BuildScript = "run_static_analyzer.cmd" 155 156 # The log file name. 157 LogFolderName = "Logs" 158 BuildLogName = "run_static_analyzer.log" 159 # Summary file - contains the summary of the failures. Ex: This info can be be 160 # displayed when buildbot detects a build failure. 161 NumOfFailuresInSummary = 10 162 FailuresSummaryFileName = "failures.txt" 163 # Summary of the result diffs. 164 DiffsSummaryFileName = "diffs.txt" 165 166 # The scan-build result directory. 167 SBOutputDirName = "ScanBuildResults" 168 SBOutputDirReferencePrefix = "Ref" 169 170 # The list of checkers used during analyzes. 171 # Currently, consists of all the non experimental checkers, plus a few alpha 172 # checkers we don't want to regress on. 173 Checkers="alpha.unix.SimpleStream,alpha.security.taint,alpha.cplusplus.NewDeleteLeaks,core,cplusplus,deadcode,security,unix,osx" 174 175 Verbose = 1 176 177 #------------------------------------------------------------------------------ 178 # Test harness logic. 179 #------------------------------------------------------------------------------ 180 181 # Run pre-processing script if any. 182 def runCleanupScript(Dir, PBuildLogFile): 183 ScriptPath = os.path.join(Dir, CleanupScript) 184 if os.path.exists(ScriptPath): 185 try: 186 if Verbose == 1: 187 print " Executing: %s" % (ScriptPath,) 188 check_call("chmod +x %s" % ScriptPath, cwd = Dir, 189 stderr=PBuildLogFile, 190 stdout=PBuildLogFile, 191 shell=True) 192 check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile, 193 stdout=PBuildLogFile, 194 shell=True) 195 except: 196 print "Error: The pre-processing step failed. See ", \ 197 PBuildLogFile.name, " for details." 198 sys.exit(-1) 199 200 # Build the project with scan-build by reading in the commands and 201 # prefixing them with the scan-build options. 202 def runScanBuild(Dir, SBOutputDir, PBuildLogFile): 203 BuildScriptPath = os.path.join(Dir, BuildScript) 204 if not os.path.exists(BuildScriptPath): 205 print "Error: build script is not defined: %s" % BuildScriptPath 206 sys.exit(-1) 207 SBOptions = "--use-analyzer " + Clang + " " 208 SBOptions += "-plist-html -o " + SBOutputDir + " " 209 SBOptions += "-enable-checker " + Checkers + " " 210 SBOptions += "--keep-empty " 211 # Always use ccc-analyze to ensure that we can locate the failures 212 # directory. 213 SBOptions += "--override-compiler " 214 try: 215 SBCommandFile = open(BuildScriptPath, "r") 216 SBPrefix = "scan-build " + SBOptions + " " 217 for Command in SBCommandFile: 218 # If using 'make', auto imply a -jX argument 219 # to speed up analysis. xcodebuild will 220 # automatically use the maximum number of cores. 221 if (Command.startswith("make ") or Command == "make") and \ 222 "-j" not in Command: 223 Command += " -j%d" % Jobs 224 SBCommand = SBPrefix + Command 225 if Verbose == 1: 226 print " Executing: %s" % (SBCommand,) 227 check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile, 228 stdout=PBuildLogFile, 229 shell=True) 230 except: 231 print "Error: scan-build failed. See ",PBuildLogFile.name,\ 232 " for details." 233 raise 234 235 def hasNoExtension(FileName): 236 (Root, Ext) = os.path.splitext(FileName) 237 if ((Ext == "")) : 238 return True 239 return False 240 241 def isValidSingleInputFile(FileName): 242 (Root, Ext) = os.path.splitext(FileName) 243 if ((Ext == ".i") | (Ext == ".ii") | 244 (Ext == ".c") | (Ext == ".cpp") | 245 (Ext == ".m") | (Ext == "")) : 246 return True 247 return False 248 249 # Run analysis on a set of preprocessed files. 250 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode): 251 if os.path.exists(os.path.join(Dir, BuildScript)): 252 print "Error: The preprocessed files project should not contain %s" % \ 253 BuildScript 254 raise Exception() 255 256 CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w " 257 CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks " 258 259 if (Mode == 2) : 260 CmdPrefix += "-std=c++11 " 261 262 PlistPath = os.path.join(Dir, SBOutputDir, "date") 263 FailPath = os.path.join(PlistPath, "failures"); 264 os.makedirs(FailPath); 265 266 for FullFileName in glob.glob(Dir + "/*"): 267 FileName = os.path.basename(FullFileName) 268 Failed = False 269 270 # Only run the analyzes on supported files. 271 if (hasNoExtension(FileName)): 272 continue 273 if (isValidSingleInputFile(FileName) == False): 274 print "Error: Invalid single input file %s." % (FullFileName,) 275 raise Exception() 276 277 # Build and call the analyzer command. 278 OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist " 279 Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName) 280 LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b") 281 try: 282 if Verbose == 1: 283 print " Executing: %s" % (Command,) 284 check_call(Command, cwd = Dir, stderr=LogFile, 285 stdout=LogFile, 286 shell=True) 287 except CalledProcessError, e: 288 print "Error: Analyzes of %s failed. See %s for details." \ 289 "Error code %d." % \ 290 (FullFileName, LogFile.name, e.returncode) 291 Failed = True 292 finally: 293 LogFile.close() 294 295 # If command did not fail, erase the log file. 296 if Failed == False: 297 os.remove(LogFile.name); 298 299 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild): 300 TBegin = time.time() 301 302 BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName) 303 print "Log file: %s" % (BuildLogPath,) 304 print "Output directory: %s" %(SBOutputDir, ) 305 306 # Clean up the log file. 307 if (os.path.exists(BuildLogPath)) : 308 RmCommand = "rm " + BuildLogPath 309 if Verbose == 1: 310 print " Executing: %s" % (RmCommand,) 311 check_call(RmCommand, shell=True) 312 313 # Clean up scan build results. 314 if (os.path.exists(SBOutputDir)) : 315 RmCommand = "rm -r " + SBOutputDir 316 if Verbose == 1: 317 print " Executing: %s" % (RmCommand,) 318 check_call(RmCommand, shell=True) 319 assert(not os.path.exists(SBOutputDir)) 320 os.makedirs(os.path.join(SBOutputDir, LogFolderName)) 321 322 # Open the log file. 323 PBuildLogFile = open(BuildLogPath, "wb+") 324 325 # Build and analyze the project. 326 try: 327 runCleanupScript(Dir, PBuildLogFile) 328 329 if (ProjectBuildMode == 1): 330 runScanBuild(Dir, SBOutputDir, PBuildLogFile) 331 else: 332 runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode) 333 334 if IsReferenceBuild : 335 runCleanupScript(Dir, PBuildLogFile) 336 337 finally: 338 PBuildLogFile.close() 339 340 print "Build complete (time: %.2f). See the log for more details: %s" % \ 341 ((time.time()-TBegin), BuildLogPath) 342 343 # A plist file is created for each call to the analyzer(each source file). 344 # We are only interested on the once that have bug reports, so delete the rest. 345 def CleanUpEmptyPlists(SBOutputDir): 346 for F in glob.glob(SBOutputDir + "/*/*.plist"): 347 P = os.path.join(SBOutputDir, F) 348 349 Data = plistlib.readPlist(P) 350 # Delete empty reports. 351 if not Data['files']: 352 os.remove(P) 353 continue 354 355 # Given the scan-build output directory, checks if the build failed 356 # (by searching for the failures directories). If there are failures, it 357 # creates a summary file in the output directory. 358 def checkBuild(SBOutputDir): 359 # Check if there are failures. 360 Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt") 361 TotalFailed = len(Failures); 362 if TotalFailed == 0: 363 CleanUpEmptyPlists(SBOutputDir) 364 Plists = glob.glob(SBOutputDir + "/*/*.plist") 365 print "Number of bug reports (non empty plist files) produced: %d" %\ 366 len(Plists) 367 return; 368 369 # Create summary file to display when the build fails. 370 SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName) 371 if (Verbose > 0): 372 print " Creating the failures summary file %s" % (SummaryPath,) 373 374 SummaryLog = open(SummaryPath, "w+") 375 try: 376 SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,)) 377 if TotalFailed > NumOfFailuresInSummary: 378 SummaryLog.write("See the first %d below.\n" 379 % (NumOfFailuresInSummary,)) 380 # TODO: Add a line "See the results folder for more." 381 382 FailuresCopied = NumOfFailuresInSummary 383 Idx = 0 384 for FailLogPathI in Failures: 385 if Idx >= NumOfFailuresInSummary: 386 break; 387 Idx += 1 388 SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,)); 389 FailLogI = open(FailLogPathI, "r"); 390 try: 391 shutil.copyfileobj(FailLogI, SummaryLog); 392 finally: 393 FailLogI.close() 394 finally: 395 SummaryLog.close() 396 397 print "Error: analysis failed. See ", SummaryPath 398 sys.exit(-1) 399 400 # Auxiliary object to discard stdout. 401 class Discarder(object): 402 def write(self, text): 403 pass # do nothing 404 405 # Compare the warnings produced by scan-build. 406 def runCmpResults(Dir): 407 TBegin = time.time() 408 409 RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName) 410 NewDir = os.path.join(Dir, SBOutputDirName) 411 412 # We have to go one level down the directory tree. 413 RefList = glob.glob(RefDir + "/*") 414 NewList = glob.glob(NewDir + "/*") 415 416 # Log folders are also located in the results dir, so ignore them. 417 RefLogDir = os.path.join(RefDir, LogFolderName) 418 if RefLogDir in RefList: 419 RefList.remove(RefLogDir) 420 NewList.remove(os.path.join(NewDir, LogFolderName)) 421 422 if len(RefList) == 0 or len(NewList) == 0: 423 return False 424 assert(len(RefList) == len(NewList)) 425 426 # There might be more then one folder underneath - one per each scan-build 427 # command (Ex: one for configure and one for make). 428 if (len(RefList) > 1): 429 # Assume that the corresponding folders have the same names. 430 RefList.sort() 431 NewList.sort() 432 433 # Iterate and find the differences. 434 NumDiffs = 0 435 PairList = zip(RefList, NewList) 436 for P in PairList: 437 RefDir = P[0] 438 NewDir = P[1] 439 440 assert(RefDir != NewDir) 441 if Verbose == 1: 442 print " Comparing Results: %s %s" % (RefDir, NewDir) 443 444 DiffsPath = os.path.join(NewDir, DiffsSummaryFileName) 445 Opts = CmpRuns.CmpOptions(DiffsPath) 446 # Discard everything coming out of stdout (CmpRun produces a lot of them). 447 OLD_STDOUT = sys.stdout 448 sys.stdout = Discarder() 449 # Scan the results, delete empty plist files. 450 NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False) 451 sys.stdout = OLD_STDOUT 452 if (NumDiffs > 0) : 453 print "Warning: %r differences in diagnostics. See %s" % \ 454 (NumDiffs, DiffsPath,) 455 456 print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 457 return (NumDiffs > 0) 458 459 def updateSVN(Mode, ProjectsMap): 460 try: 461 ProjectsMap.seek(0) 462 for I in csv.reader(ProjectsMap): 463 ProjName = I[0] 464 Path = os.path.join(ProjName, getSBOutputDirName(True)) 465 466 if Mode == "delete": 467 Command = "svn delete %s" % (Path,) 468 else: 469 Command = "svn add %s" % (Path,) 470 471 if Verbose == 1: 472 print " Executing: %s" % (Command,) 473 check_call(Command, shell=True) 474 475 if Mode == "delete": 476 CommitCommand = "svn commit -m \"[analyzer tests] Remove " \ 477 "reference results.\"" 478 else: 479 CommitCommand = "svn commit -m \"[analyzer tests] Add new " \ 480 "reference results.\"" 481 if Verbose == 1: 482 print " Executing: %s" % (CommitCommand,) 483 check_call(CommitCommand, shell=True) 484 except: 485 print "Error: SVN update failed." 486 sys.exit(-1) 487 488 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None): 489 print " \n\n--- Building project %s" % (ID,) 490 491 TBegin = time.time() 492 493 if Dir is None : 494 Dir = getProjectDir(ID) 495 if Verbose == 1: 496 print " Build directory: %s." % (Dir,) 497 498 # Set the build results directory. 499 RelOutputDir = getSBOutputDirName(IsReferenceBuild) 500 SBOutputDir = os.path.join(Dir, RelOutputDir) 501 502 buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild) 503 504 checkBuild(SBOutputDir) 505 506 if IsReferenceBuild == False: 507 runCmpResults(Dir) 508 509 print "Completed tests for project %s (time: %.2f)." % \ 510 (ID, (time.time()-TBegin)) 511 512 def testAll(IsReferenceBuild = False, UpdateSVN = False): 513 PMapFile = open(getProjectMapPath(), "rb") 514 try: 515 # Validate the input. 516 for I in csv.reader(PMapFile): 517 if (len(I) != 2) : 518 print "Error: Rows in the ProjectMapFile should have 3 entries." 519 raise Exception() 520 if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))): 521 print "Error: Second entry in the ProjectMapFile should be 0" \ 522 " (single file), 1 (project), or 2(single file c++11)." 523 raise Exception() 524 525 # When we are regenerating the reference results, we might need to 526 # update svn. Remove reference results from SVN. 527 if UpdateSVN == True: 528 assert(IsReferenceBuild == True); 529 updateSVN("delete", PMapFile); 530 531 # Test the projects. 532 PMapFile.seek(0) 533 for I in csv.reader(PMapFile): 534 testProject(I[0], int(I[1]), IsReferenceBuild) 535 536 # Add reference results to SVN. 537 if UpdateSVN == True: 538 updateSVN("add", PMapFile); 539 540 except: 541 print "Error occurred. Premature termination." 542 raise 543 finally: 544 PMapFile.close() 545 546 if __name__ == '__main__': 547 IsReference = False 548 UpdateSVN = False 549 if len(sys.argv) >= 2: 550 if sys.argv[1] == "-r": 551 IsReference = True 552 elif sys.argv[1] == "-rs": 553 IsReference = True 554 UpdateSVN = True 555 else: 556 print >> sys.stderr, 'Usage: ', sys.argv[0],\ 557 '[-r|-rs]' \ 558 'Use -r to regenerate reference output' \ 559 'Use -rs to regenerate reference output and update svn' 560 561 testAll(IsReference, UpdateSVN) 562