utils/analyzer/SATestBuild.py

   1 #!/usr/bin/env python
   2
   3 """
   4 Static Analyzer qualification infrastructure.
   5
   6 The goal is to test the analyzer against different projects, check for failures,
   7 compare results, and measure performance.
   8
   9 Repository Directory will contain sources of the projects as well as the
  10 information on how to build them and the expected output.
  11 Repository Directory structure:
  12    - ProjectMap file
  13    - Historical Performance Data
  14    - Project Dir1
  15      - ReferenceOutput
  16    - Project Dir2
  17      - ReferenceOutput
  18    ..
  19
  20 To test the build of the analyzer one would:
  21    - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
  22      the build directory does not pollute the repository to min network traffic).
  23    - Build all projects, until error. Produce logs to report errors.
  24    - Compare results.
  25
  26 The files which should be kept around for failure investigations:
  27    RepositoryCopy/Project DirI/ScanBuildResults
  28    RepositoryCopy/Project DirI/run_static_analyzer.log
  29
  30 Assumptions (TODO: shouldn't need to assume these.):
  31    The script is being run from the Repository Directory.
  32    The compiler for scan-build and scan-build are in the PATH.
  33    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
  34
  35 For more logging, set the  env variables:
  36    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
  37    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
  38 """
  39 import CmpRuns
  40
  41 import os
  42 import csv
  43 import sys
  44 import glob
  45 import shutil
  46 import time
  47 import plistlib
  48 from subprocess import check_call, CalledProcessError
  49
  50 # Project map stores info about all the "registered" projects.
  51 ProjectMapFile = "projectMap.csv"
  52
  53 # Names of the project specific scripts.
  54 # The script that needs to be executed before the build can start.
  55 CleanupScript = "cleanup_run_static_analyzer.sh"
  56 # This is a file containing commands for scan-build.
  57 BuildScript = "run_static_analyzer.cmd"
  58
  59 # The log file name.
  60 LogFolderName = "Logs"
  61 BuildLogName = "run_static_analyzer.log"
  62 # Summary file - contains the summary of the failures. Ex: This info can be be
  63 # displayed when buildbot detects a build failure.
  64 NumOfFailuresInSummary = 10
  65 FailuresSummaryFileName = "failures.txt"
  66 # Summary of the result diffs.
  67 DiffsSummaryFileName = "diffs.txt"
  68
  69 # The scan-build result directory.
  70 SBOutputDirName = "ScanBuildResults"
  71 SBOutputDirReferencePrefix = "Ref"
  72
  73 # The list of checkers used during analyzes.
  74 # Currently, consists of all the non experimental checkers.
  75 Checkers="experimental.security.taint,core,deadcode,security,unix,osx"
  76
  77 Verbose = 1
  78
  79 # Make sure we flush the output after every print statement.
  80 class flushfile(object):
  81     def __init__(self, f):
  82         self.f = f
  83     def write(self, x):
  84         self.f.write(x)
  85         self.f.flush()
  86
  87 sys.stdout = flushfile(sys.stdout)
  88
  89 def getProjectMapPath():
  90     ProjectMapPath = os.path.join(os.path.abspath(os.curdir),
  91                                   ProjectMapFile)
  92     if not os.path.exists(ProjectMapPath):
  93         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
  94                 "\nRunning script for the wrong directory?"
  95         sys.exit(-1)
  96     return ProjectMapPath
  97
  98 def getProjectDir(ID):
  99     return os.path.join(os.path.abspath(os.curdir), ID)
 100
 101 def getSBOutputDirName(IsReferenceBuild) :
 102     if IsReferenceBuild == True :
 103         return SBOutputDirReferencePrefix + SBOutputDirName
 104     else :
 105         return SBOutputDirName
 106
 107 # Run pre-processing script if any.
 108 def runCleanupScript(Dir, PBuildLogFile):
 109     ScriptPath = os.path.join(Dir, CleanupScript)
 110     if os.path.exists(ScriptPath):
 111         try:
 112             if Verbose == 1:
 113                 print "  Executing: %s" % (ScriptPath,)
 114             check_call("chmod +x %s" % ScriptPath, cwd = Dir,
 115                                               stderr=PBuildLogFile,
 116                                               stdout=PBuildLogFile,
 117                                               shell=True)
 118             check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
 119                                               stdout=PBuildLogFile,
 120                                               shell=True)
 121         except:
 122             print "Error: The pre-processing step failed. See ", \
 123                   PBuildLogFile.name, " for details."
 124             sys.exit(-1)
 125
 126 # Build the project with scan-build by reading in the commands and
 127 # prefixing them with the scan-build options.
 128 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
 129     BuildScriptPath = os.path.join(Dir, BuildScript)
 130     if not os.path.exists(BuildScriptPath):
 131         print "Error: build script is not defined: %s" % BuildScriptPath
 132         sys.exit(-1)
 133     SBOptions = "-plist-html -o " + SBOutputDir + " "
 134     SBOptions += "-enable-checker " + Checkers + " "
 135     try:
 136         SBCommandFile = open(BuildScriptPath, "r")
 137         SBPrefix = "scan-build " + SBOptions + " "
 138         for Command in SBCommandFile:
 139             SBCommand = SBPrefix + Command
 140             if Verbose == 1:
 141                 print "  Executing: %s" % (SBCommand,)
 142             check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
 143                                              stdout=PBuildLogFile,
 144                                              shell=True)
 145     except:
 146         print "Error: scan-build failed. See ",PBuildLogFile.name,\
 147               " for details."
 148         raise
 149
 150 def hasNoExtension(FileName):
 151     (Root, Ext) = os.path.splitext(FileName)
 152     if ((Ext == "")) :
 153         return True
 154     return False
 155
 156 def isValidSingleInputFile(FileName):
 157     (Root, Ext) = os.path.splitext(FileName)
 158     if ((Ext == ".i") | (Ext == ".ii") |
 159         (Ext == ".c") | (Ext == ".cpp") |
 160         (Ext == ".m") | (Ext == "")) :
 161         return True
 162     return False
 163
 164 # Run analysis on a set of preprocessed files.
 165 def runAnalyzePreprocessed(Dir, SBOutputDir):
 166     if os.path.exists(os.path.join(Dir, BuildScript)):
 167         print "Error: The preprocessed files project should not contain %s" % \
 168                BuildScript
 169         raise Exception()
 170
 171     CmdPrefix = "clang -cc1 -analyze -analyzer-output=plist -w "
 172     CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "
 173
 174     PlistPath = os.path.join(Dir, SBOutputDir, "date")
 175     FailPath = os.path.join(PlistPath, "failures");
 176     os.makedirs(FailPath);
 177
 178     for FullFileName in glob.glob(Dir + "/*"):
 179         FileName = os.path.basename(FullFileName)
 180         Failed = False
 181
 182         # Only run the analyzes on supported files.
 183         if (hasNoExtension(FileName)):
 184             continue
 185         if (isValidSingleInputFile(FileName) == False):
 186             print "Error: Invalid single input file %s." % (FullFileName,)
 187             raise Exception()
 188
 189         # Build and call the analyzer command.
 190         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
 191         Command = CmdPrefix + OutputOption + os.path.join(Dir, FileName)
 192         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
 193         try:
 194             if Verbose == 1:
 195                 print "  Executing: %s" % (Command,)
 196             check_call(Command, cwd = Dir, stderr=LogFile,
 197                                            stdout=LogFile,
 198                                            shell=True)
 199         except CalledProcessError, e:
 200             print "Error: Analyzes of %s failed. See %s for details." \
 201                   "Error code %d." % \
 202                    (FullFileName, LogFile.name, e.returncode)
 203             Failed = True
 204         finally:
 205             LogFile.close()
 206
 207         # If command did not fail, erase the log file.
 208         if Failed == False:
 209             os.remove(LogFile.name);
 210
 211 def buildProject(Dir, SBOutputDir, IsScanBuild, IsReferenceBuild):
 212     TBegin = time.time()
 213
 214     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
 215     print "Log file: %s" % (BuildLogPath,)
 216     print "Output directory: %s" %(SBOutputDir, )
 217
 218     # Clean up the log file.
 219     if (os.path.exists(BuildLogPath)) :
 220         RmCommand = "rm " + BuildLogPath
 221         if Verbose == 1:
 222             print "  Executing: %s" % (RmCommand,)
 223         check_call(RmCommand, shell=True)
 224
 225     # Clean up scan build results.
 226     if (os.path.exists(SBOutputDir)) :
 227         RmCommand = "rm -r " + SBOutputDir
 228         if Verbose == 1:
 229             print "  Executing: %s" % (RmCommand,)
 230             check_call(RmCommand, shell=True)
 231     assert(not os.path.exists(SBOutputDir))
 232     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
 233
 234     # Open the log file.
 235     PBuildLogFile = open(BuildLogPath, "wb+")
 236
 237     # Build and analyze the project.
 238     try:
 239         runCleanupScript(Dir, PBuildLogFile)
 240
 241         if IsScanBuild:
 242             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
 243         else:
 244             runAnalyzePreprocessed(Dir, SBOutputDir)
 245
 246         if IsReferenceBuild :
 247             runCleanupScript(Dir, PBuildLogFile)
 248
 249     finally:
 250         PBuildLogFile.close()
 251
 252     print "Build complete (time: %.2f). See the log for more details: %s" % \
 253            ((time.time()-TBegin), BuildLogPath)
 254
 255 # A plist file is created for each call to the analyzer(each source file).
 256 # We are only interested on the once that have bug reports, so delete the rest.
 257 def CleanUpEmptyPlists(SBOutputDir):
 258     for F in glob.glob(SBOutputDir + "/*/*.plist"):
 259         P = os.path.join(SBOutputDir, F)
 260
 261         Data = plistlib.readPlist(P)
 262         # Delete empty reports.
 263         if not Data['files']:
 264             os.remove(P)
 265             continue
 266
 267 # Given the scan-build output directory, checks if the build failed
 268 # (by searching for the failures directories). If there are failures, it
 269 # creates a summary file in the output directory.
 270 def checkBuild(SBOutputDir):
 271     # Check if there are failures.
 272     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
 273     TotalFailed = len(Failures);
 274     if TotalFailed == 0:
 275         CleanUpEmptyPlists(SBOutputDir)
 276         Plists = glob.glob(SBOutputDir + "/*/*.plist")
 277         print "Number of bug reports (non empty plist files) produced: %d" %\
 278            len(Plists)
 279         return;
 280
 281     # Create summary file to display when the build fails.
 282     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
 283     if (Verbose > 0):
 284         print "  Creating the failures summary file %s" % (SummaryPath,)
 285
 286     SummaryLog = open(SummaryPath, "w+")
 287     try:
 288         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
 289         if TotalFailed > NumOfFailuresInSummary:
 290             SummaryLog.write("See the first %d below.\n"
 291                                                    % (NumOfFailuresInSummary,))
 292         # TODO: Add a line "See the results folder for more."
 293
 294         FailuresCopied = NumOfFailuresInSummary
 295         Idx = 0
 296         for FailLogPathI in Failures:
 297             if Idx >= NumOfFailuresInSummary:
 298                 break;
 299             Idx += 1
 300             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
 301             FailLogI = open(FailLogPathI, "r");
 302             try:
 303                 shutil.copyfileobj(FailLogI, SummaryLog);
 304             finally:
 305                 FailLogI.close()
 306     finally:
 307         SummaryLog.close()
 308
 309     print "Error: analysis failed. See ", SummaryPath
 310     sys.exit(-1)
 311
 312 # Auxiliary object to discard stdout.
 313 class Discarder(object):
 314     def write(self, text):
 315         pass # do nothing
 316
 317 # Compare the warnings produced by scan-build.
 318 def runCmpResults(Dir):
 319     TBegin = time.time()
 320
 321     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
 322     NewDir = os.path.join(Dir, SBOutputDirName)
 323
 324     # We have to go one level down the directory tree.
 325     RefList = glob.glob(RefDir + "/*")
 326     NewList = glob.glob(NewDir + "/*")
 327
 328     # Log folders are also located in the results dir, so ignore them.
 329     RefList.remove(os.path.join(RefDir, LogFolderName))
 330     NewList.remove(os.path.join(NewDir, LogFolderName))
 331
 332     if len(RefList) == 0 or len(NewList) == 0:
 333         return False
 334     assert(len(RefList) == len(NewList))
 335
 336     # There might be more then one folder underneath - one per each scan-build
 337     # command (Ex: one for configure and one for make).
 338     if (len(RefList) > 1):
 339         # Assume that the corresponding folders have the same names.
 340         RefList.sort()
 341         NewList.sort()
 342
 343     # Iterate and find the differences.
 344     NumDiffs = 0
 345     PairList = zip(RefList, NewList)
 346     for P in PairList:
 347         RefDir = P[0]
 348         NewDir = P[1]
 349
 350         assert(RefDir != NewDir)
 351         if Verbose == 1:
 352             print "  Comparing Results: %s %s" % (RefDir, NewDir)
 353
 354         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
 355         Opts = CmpRuns.CmpOptions(DiffsPath)
 356         # Discard everything coming out of stdout (CmpRun produces a lot of them).
 357         OLD_STDOUT = sys.stdout
 358         sys.stdout = Discarder()
 359         # Scan the results, delete empty plist files.
 360         NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
 361         sys.stdout = OLD_STDOUT
 362         if (NumDiffs > 0) :
 363             print "Warning: %r differences in diagnostics. See %s" % \
 364                   (NumDiffs, DiffsPath,)
 365
 366     print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin)
 367     return (NumDiffs > 0)
 368
 369 def updateSVN(Mode, ProjectsMap):
 370     try:
 371         ProjectsMap.seek(0)
 372         for I in csv.reader(ProjectsMap):
 373             ProjName = I[0]
 374             Path = os.path.join(ProjName, getSBOutputDirName(True))
 375
 376             if Mode == "delete":
 377                 Command = "svn delete %s" % (Path,)
 378             else:
 379                 Command = "svn add %s" % (Path,)
 380
 381             if Verbose == 1:
 382                 print "  Executing: %s" % (Command,)
 383             check_call(Command, shell=True)
 384
 385         if Mode == "delete":
 386             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
 387                             "reference results.\""
 388         else:
 389             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
 390                             "reference results.\""
 391         if Verbose == 1:
 392             print "  Executing: %s" % (CommitCommand,)
 393         check_call(CommitCommand, shell=True)
 394     except:
 395         print "Error: SVN update failed."
 396         sys.exit(-1)
 397
 398 def testProject(ID, IsScanBuild, IsReferenceBuild=False, Dir=None):
 399     print " \n\n--- Building project %s" % (ID,)
 400
 401     TBegin = time.time()
 402
 403     if Dir is None :
 404         Dir = getProjectDir(ID)
 405     if Verbose == 1:
 406         print "  Build directory: %s." % (Dir,)
 407
 408     # Set the build results directory.
 409     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
 410     SBOutputDir = os.path.join(Dir, RelOutputDir)
 411
 412     buildProject(Dir, SBOutputDir, IsScanBuild, IsReferenceBuild)
 413
 414     checkBuild(SBOutputDir)
 415
 416     if IsReferenceBuild == False:
 417         runCmpResults(Dir)
 418
 419     print "Completed tests for project %s (time: %.2f)." % \
 420           (ID, (time.time()-TBegin))
 421
 422 def testAll(IsReferenceBuild = False, UpdateSVN = False):
 423     PMapFile = open(getProjectMapPath(), "rb")
 424     try:
 425         # Validate the input.
 426         for I in csv.reader(PMapFile):
 427             if (len(I) != 2) :
 428                 print "Error: Rows in the ProjectMapFile should have 3 entries."
 429                 raise Exception()
 430             if (not ((I[1] == "1") | (I[1] == "0"))):
 431                 print "Error: Second entry in the ProjectMapFile should be 0 or 1."
 432                 raise Exception()
 433
 434         # When we are regenerating the reference results, we might need to
 435         # update svn. Remove reference results from SVN.
 436         if UpdateSVN == True:
 437             assert(IsReferenceBuild == True);
 438             updateSVN("delete",  PMapFile);
 439
 440         # Test the projects.
 441         PMapFile.seek(0)
 442         for I in csv.reader(PMapFile):
 443             testProject(I[0], int(I[1]), IsReferenceBuild)
 444
 445         # Add reference results to SVN.
 446         if UpdateSVN == True:
 447             updateSVN("add",  PMapFile);
 448
 449     except:
 450         print "Error occurred. Premature termination."
 451         raise
 452     finally:
 453         PMapFile.close()
 454
 455 if __name__ == '__main__':
 456     IsReference = False
 457     UpdateSVN = False
 458     if len(sys.argv) >= 2:
 459         if sys.argv[1] == "-r":
 460             IsReference = True
 461         elif sys.argv[1] == "-rs":
 462             IsReference = True
 463             UpdateSVN = True
 464         else:
 465           print >> sys.stderr, 'Usage: ', sys.argv[0],\
 466                              '[-r|-rs]' \
 467                              'Use -r to regenerate reference output' \
 468                              'Use -rs to regenerate reference output and update svn'
 469
 470     testAll(IsReference, UpdateSVN)