5 files changed, 240 insertions, 70 deletions
diff --git a/utils/analyzer/CmpRuns.py b/utils/analyzer/CmpRuns.py
index e68c45df..f2961cf 100755
--- a/utils/analyzer/CmpRuns.py
+++ b/utils/analyzer/CmpRuns.py
@@ -11,12 +11,67 @@ two perspectives:
 
   2. For use by end users who want to integrate regular static analyzer testing
      into a buildbot like environment.
+
+Usage:
+
+    # Load the results of both runs, to obtain lists of the corresponding
+    # AnalysisDiagnostic objects.
+    #
+    # root - the name of the root directory, which will be disregarded when 
+    # determining the source file name
+    # 
+    resultsA = loadResults(dirA, opts, root, deleteEmpty)
+    resultsB = loadResults(dirB, opts, root, deleteEmpty)
+    
+    # Generate a relation from diagnostics in run A to diagnostics in run B 
+    # to obtain a list of triples (a, b, confidence). 
+    diff = compareResults(resultsA, resultsB)
+           
 """
 
 import os
 import plistlib
 
 #
+class AnalysisDiagnostic:
+    def __init__(self, data, report, htmlReport):
+        self._data = data
+        self._loc = self._data['location']
+        self._report = report
+        self._htmlReport = htmlReport
+
+    def getFileName(self):
+        return self._report.run.getSourceName(self._report.files[self._loc['file']])
+
+    def getLine(self):
+        return self._loc['line']
+        
+    def getColumn(self):
+        return self._loc['col']
+
+    def getCategory(self):
+        return self._data['category']
+
+    def getDescription(self):
+        return self._data['description']
+
+    def getIssueIdentifier(self) :
+        id = ''
+        if 'issue_context' in self._data :
+          id += self._data['issue_context'] + ":"
+        if 'issue_hash' in self._data :
+          id += str(self._data['issue_hash']) + ":"
+        return id + ":" + self.getFileName()
+
+    def getReport(self):
+        if self._htmlReport is None:
+            return " "
+        return os.path.join(self._report.run.path, self._htmlReport)
+
+    def getReadableName(self):
+        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 
+                                     self.getColumn(), self.getCategory(), 
+                                     self.getDescription())
 
 class multidict:
     def __init__(self, elts=()):
@@ -45,8 +100,9 @@ class multidict:
 #
 
 class CmpOptions:
-    def __init__(self, verboseLog=None, root=""):
-        self.root = root
+    def __init__(self, verboseLog=None, rootA="", rootB=""):
+        self.rootA = rootA
+        self.rootB = rootB
         self.verboseLog = verboseLog
 
 class AnalysisReport:
@@ -54,49 +110,22 @@ class AnalysisReport:
         self.run = run
         self.files = files
 
-class AnalysisDiagnostic:
-    def __init__(self, data, report, htmlReport):
-        self.data = data
-        self.report = report
-        self.htmlReport = htmlReport
-
-    def getReadableName(self):
-        loc = self.data['location']
-        filename = self.report.run.getSourceName(self.report.files[loc['file']])
-        line = loc['line']
-        column = loc['col']
-        category = self.data['category']
-        description = self.data['description']
-
-        # FIXME: Get a report number based on this key, to 'distinguish'
-        # reports, or something.
-        
-        return '%s:%d:%d, %s: %s' % (filename, line, column, category, 
-                                   description)
-
-    def getReportData(self):
-        if self.htmlReport is None:
-            return " "
-        return os.path.join(self.report.run.path, self.htmlReport)
-        # We could also dump the report with:
-        # return open(os.path.join(self.report.run.path,
-        #                         self.htmlReport), "rb").read() 
-
 class AnalysisRun:
-    def __init__(self, path, opts):
+    def __init__(self, path, root, opts):
         self.path = path
+        self.root = root
         self.reports = []
         self.diagnostics = []
         self.opts = opts
 
     def getSourceName(self, path):
-        if path.startswith(self.opts.root):
-            return path[len(self.opts.root):]
+        if path.startswith(self.root):
+            return path[len(self.root):]
         return path
 
-def loadResults(path, opts, deleteEmpty=True):
-    run = AnalysisRun(path, opts)
-
+def loadResults(path, opts, root = "", deleteEmpty=True):
+    run = AnalysisRun(path, root, opts)
+    
     for f in os.listdir(path):
         if (not f.startswith('report') or
             not f.endswith('plist')):
@@ -134,6 +163,9 @@ def loadResults(path, opts, deleteEmpty=True):
 
     return run
 
+def cmpAnalysisDiagnostic(d) :
+    return d.getIssueIdentifier()
+
 def compareResults(A, B):
     """
     compareResults - Generate a relation from diagnostics in run A to
@@ -152,14 +184,14 @@ def compareResults(A, B):
     neqB = []
     eltsA = list(A.diagnostics)
     eltsB = list(B.diagnostics)
-    eltsA.sort(key = lambda d: d.data)
-    eltsB.sort(key = lambda d: d.data)
+    eltsA.sort(key = cmpAnalysisDiagnostic)
+    eltsB.sort(key = cmpAnalysisDiagnostic)
     while eltsA and eltsB:
         a = eltsA.pop()
         b = eltsB.pop()
-        if a.data['location'] == b.data['location']:
+        if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
             res.append((a, b, 0))
-        elif a.data > b.data:
+        elif a._data > b._data:
             neqA.append(a)
             eltsB.append(b)
         else:
@@ -181,10 +213,10 @@ def compareResults(A, B):
 
     return res
 
-def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
+def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
     # Load the run results.
-    resultsA = loadResults(dirA, opts, deleteEmpty)
-    resultsB = loadResults(dirB, opts, deleteEmpty)
+    resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
+    resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
     
     # Open the verbose log, if given.
     if opts.verboseLog:
@@ -201,13 +233,13 @@ def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
             foundDiffs += 1
             if auxLog:
                 print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
-                                                        b.getReportData()))
+                                                        b.getReport()))
         elif b is None:
             print "REMOVED: %r" % a.getReadableName()
             foundDiffs += 1
             if auxLog:
                 print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
-                                                          a.getReportData()))
+                                                          a.getReport()))
         elif confidence:
             print "CHANGED: %r to %r" % (a.getReadableName(),
                                          b.getReadableName())
@@ -216,8 +248,8 @@ def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
                 print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 
                                  % (a.getReadableName(),
                                     b.getReadableName(),
-                                    a.getReportData(),
-                                    b.getReportData()))
+                                    a.getReport(),
+                                    b.getReport()))
         else:
             pass
 
@@ -233,8 +265,11 @@ def cmpScanBuildResults(dirA, dirB, opts, deleteEmpty=True):
 def main():
     from optparse import OptionParser
     parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
-    parser.add_option("", "--root", dest="root",
-                      help="Prefix to ignore on source files",
+    parser.add_option("", "--rootA", dest="rootA",
+                      help="Prefix to ignore on source files for directory A",
+                      action="store", type=str, default="")
+    parser.add_option("", "--rootB", dest="rootB",
+                      help="Prefix to ignore on source files for directory B",
                       action="store", type=str, default="")
     parser.add_option("", "--verbose-log", dest="verboseLog",
                       help="Write additional information to LOG [default=None]",
@@ -247,7 +282,7 @@ def main():
 
     dirA,dirB = args
 
-    cmpScanBuildResults(dirA, dirB, opts)    
+    dumpScanBuildResultsDiff(dirA, dirB, opts)    
 
 if __name__ == '__main__':
     main()
diff --git a/utils/analyzer/SATestAdd.py b/utils/analyzer/SATestAdd.py
index ce64bc8..2d32533 100644
--- a/utils/analyzer/SATestAdd.py
+++ b/utils/analyzer/SATestAdd.py
@@ -41,7 +41,7 @@ def addNewProject(ID, IsScanBuild) :
         sys.exit(-1)
         
     # Build the project.
-    SATestBuild.testProject(ID, True, IsScanBuild, Dir)
+    SATestBuild.testProject(ID, IsScanBuild, IsReferenceBuild=True, Dir=Dir)
 
     # Add the project ID to the project map.
     ProjectMapPath = os.path.join(CurDir, SATestBuild.ProjectMapFile)
diff --git a/utils/analyzer/SATestBuild.py b/utils/analyzer/SATestBuild.py
index 3fccb9a..fd4bc8a 100644
--- a/utils/analyzer/SATestBuild.py
+++ b/utils/analyzer/SATestBuild.py
@@ -72,12 +72,10 @@ SBOutputDirReferencePrefix = "Ref"
 
 # The list of checkers used during analyzes.
 # Currently, consists of all the non experimental checkers.
-Checkers="experimental.security.taint,core,deadcode,cplusplus,security,unix,osx,cocoa"
+Checkers="experimental.security.taint,core,deadcode,security,unix,osx"
 
 Verbose = 1
 
-IsReferenceBuild = False
-
 # Make sure we flush the output after every print statement.
 class flushfile(object):
     def __init__(self, f):
@@ -100,7 +98,7 @@ def getProjectMapPath():
 def getProjectDir(ID):
     return os.path.join(os.path.abspath(os.curdir), ID)        
 
-def getSBOutputDirName() :
+def getSBOutputDirName(IsReferenceBuild) :
     if IsReferenceBuild == True :
         return SBOutputDirReferencePrefix + SBOutputDirName
     else :
@@ -210,7 +208,7 @@ def runAnalyzePreprocessed(Dir, SBOutputDir):
         if Failed == False:
             os.remove(LogFile.name);
 
-def buildProject(Dir, SBOutputDir, IsScanBuild):
+def buildProject(Dir, SBOutputDir, IsScanBuild, IsReferenceBuild):
     TBegin = time.time() 
 
     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
@@ -295,7 +293,7 @@ def checkBuild(SBOutputDir):
     
         FailuresCopied = NumOfFailuresInSummary
         Idx = 0
-        for FailLogPathI in glob.glob(SBOutputDir + "/*/failures/*.stderr.txt"):
+        for FailLogPathI in Failures:
             if Idx >= NumOfFailuresInSummary:
                 break;
             Idx += 1 
@@ -359,7 +357,7 @@ def runCmpResults(Dir):
         OLD_STDOUT = sys.stdout
         sys.stdout = Discarder()
         # Scan the results, delete empty plist files.
-        NumDiffs = CmpRuns.cmpScanBuildResults(RefDir, NewDir, Opts, False)
+        NumDiffs = CmpRuns.dumpScanBuildResultsDiff(RefDir, NewDir, Opts, False)
         sys.stdout = OLD_STDOUT
         if (NumDiffs > 0) :
             print "Warning: %r differences in diagnostics. See %s" % \
@@ -373,7 +371,7 @@ def updateSVN(Mode, ProjectsMap):
         ProjectsMap.seek(0)    
         for I in csv.reader(ProjectsMap):
             ProjName = I[0] 
-            Path = os.path.join(ProjName, getSBOutputDirName())
+            Path = os.path.join(ProjName, getSBOutputDirName(True))
     
             if Mode == "delete":
                 Command = "svn delete %s" % (Path,)
@@ -382,7 +380,7 @@ def updateSVN(Mode, ProjectsMap):
 
             if Verbose == 1:        
                 print "  Executing: %s" % (Command,)
-                check_call(Command, shell=True)    
+            check_call(Command, shell=True)    
     
         if Mode == "delete":
             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
@@ -392,12 +390,12 @@ def updateSVN(Mode, ProjectsMap):
                             "reference results.\""
         if Verbose == 1:        
             print "  Executing: %s" % (CommitCommand,)
-            check_call(CommitCommand, shell=True)    
+        check_call(CommitCommand, shell=True)    
     except:
         print "Error: SVN update failed."
         sys.exit(-1)
         
-def testProject(ID, IsScanBuild, Dir=None):
+def testProject(ID, IsScanBuild, IsReferenceBuild=False, Dir=None):
     print " \n\n--- Building project %s" % (ID,)
 
     TBegin = time.time() 
@@ -408,10 +406,10 @@ def testProject(ID, IsScanBuild, Dir=None):
         print "  Build directory: %s." % (Dir,)
     
     # Set the build results directory.
-    RelOutputDir = getSBOutputDirName()
+    RelOutputDir = getSBOutputDirName(IsReferenceBuild)
     SBOutputDir = os.path.join(Dir, RelOutputDir)
                 
-    buildProject(Dir, SBOutputDir, IsScanBuild)    
+    buildProject(Dir, SBOutputDir, IsScanBuild, IsReferenceBuild)
 
     checkBuild(SBOutputDir)
     
@@ -421,10 +419,7 @@ def testProject(ID, IsScanBuild, Dir=None):
     print "Completed tests for project %s (time: %.2f)." % \
           (ID, (time.time()-TBegin))
     
-def testAll(InIsReferenceBuild = False, UpdateSVN = False):
-    global IsReferenceBuild
-    IsReferenceBuild = InIsReferenceBuild
-
+def testAll(IsReferenceBuild = False, UpdateSVN = False):
     PMapFile = open(getProjectMapPath(), "rb")
     try:        
         # Validate the input.
@@ -439,13 +434,13 @@ def testAll(InIsReferenceBuild = False, UpdateSVN = False):
         # When we are regenerating the reference results, we might need to 
         # update svn. Remove reference results from SVN.
         if UpdateSVN == True:
-            assert(InIsReferenceBuild == True);
+            assert(IsReferenceBuild == True);
             updateSVN("delete",  PMapFile);
             
         # Test the projects.
         PMapFile.seek(0)    
         for I in csv.reader(PMapFile):
-            testProject(I[0], int(I[1]))
+            testProject(I[0], int(I[1]), IsReferenceBuild)
 
         # Add reference results to SVN.
         if UpdateSVN == True:
diff --git a/utils/analyzer/SumTimerInfo.py b/utils/analyzer/SumTimerInfo.py
new file mode 100644
index 0000000..a6731bb
--- /dev/null
+++ b/utils/analyzer/SumTimerInfo.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+"""
+Script to Summarize statistics in the scan-build output.
+
+Statistics are enabled by passing '-internal-stats' option to scan-build 
+(or '-analyzer-stats' to the analyzer).
+
+"""
+
+import string
+from operator import itemgetter
+import sys
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print >> sys.stderr, 'Usage: ', sys.argv[0],\
+                             'scan_build_output_file'
+        sys.exit(-1)
+
+    f = open(sys.argv[1], 'r')
+    Time = 0.0
+    TotalTime = 0.0
+    MaxTime = 0.0
+    Warnings = 0
+    Count = 0
+    FunctionsAnalyzed = 0
+    ReachableBlocks = 0
+    ReachedMaxSteps = 0
+    NumSteps = 0
+    MaxCFGSize = 0
+    Mode = 1
+    for line in f:
+        if ("Miscellaneous Ungrouped Timers" in line) :
+          Mode = 1
+        if (("Analyzer Total Time" in line) and (Mode == 1)) :
+          s = line.split()
+          Time = Time + float(s[6])
+          Count = Count + 1
+          if (float(s[6]) > MaxTime) :
+            MaxTime = float(s[6])
+        if ((("warning generated." in line) or ("warnings generated." in line)) and Mode == 1) :
+          s = line.split()
+          Warnings = Warnings + int(s[0])
+        if (("The # of functions analysed (as top level)." in line) and (Mode == 1)) :
+          s = line.split()
+          FunctionsAnalyzed = FunctionsAnalyzed + int(s[0])
+        if (("The % of reachable basic blocks" in line) and (Mode == 1)) :
+          s = line.split()
+          ReachableBlocks = ReachableBlocks + int(s[0])
+        if (("The # of times we reached the max number of steps." in line) and (Mode == 1)) :
+          s = line.split()
+          ReachedMaxSteps = ReachedMaxSteps + int(s[0])
+        if (("The maximum number of basic blocks in a function" in line) and (Mode == 1)) :
+          s = line.split()
+          if (MaxCFGSize < int(s[0])) :
+            MaxCFGSize = int(s[0])
+        if (("The # of steps executed." in line) and (Mode == 1)) :
+          s = line.split()
+          NumSteps = NumSteps + int(s[0])
+        if ((")  Total" in line) and (Mode == 1)) :
+          s = line.split()
+          TotalTime = TotalTime + float(s[6])
+          
+    print "TU Count %d" % (Count)
+    print "Time %f" % (Time)
+    print "Warnings %d" % (Warnings)
+    print "Functions Analyzed %d" % (FunctionsAnalyzed)
+    print "Reachable Blocks %d" % (ReachableBlocks)
+    print "Reached Max Steps %d" % (ReachedMaxSteps)
+    print "Number of Steps %d" % (NumSteps)
+    print "MaxTime %f" % (MaxTime)
+    print "TotalTime %f" % (TotalTime)
+    print "Max CFG Size %d" % (MaxCFGSize)
+    
+\ No newline at end of file
diff --git a/utils/analyzer/reducer.pl b/utils/analyzer/reducer.pl
new file mode 100755
index 0000000..872f61b
--- /dev/null
+++ b/utils/analyzer/reducer.pl
@@ -0,0 +1,65 @@
+#!/usr/bin/perl -w
+use strict;
+use File::Temp qw/ tempdir /;
+my $prog = "reducer";
+
+die "$prog <code file> <error string> [optional command]\n" if ($#ARGV < 0);
+my $file = shift @ARGV;
+die "$prog: [error] cannot read file $file\n" if (! -r $file);
+
+my $magic = shift @ARGV;
+die "$prog: [error] no error string specified\n" if (! defined $magic);
+
+# Create a backup of the file.
+my $dir = tempdir( CLEANUP => 1 );
+print "$prog: created temporary directory '$dir'\n";
+my $srcFile = "$dir/$file";
+`cp $file $srcFile`;
+
+# Create the script.
+my $scriptFile = "$dir/script";
+open(OUT, ">$scriptFile") or die "$prog: cannot create '$scriptFile'\n";
+my $reduceOut = "$dir/reduceOut";
+
+my $command;
+if (scalar(@ARGV) > 0) { $command = \@ARGV; }
+else {
+  my $compiler = "clang";
+  $command = [$compiler, "-fsyntax-only", "-Wfatal-errors", "-Wno-deprecated-declarations", "-Wimplicit-function-declaration"];
+}
+push @$command, $srcFile;
+my $commandStr = "@$command";
+
+print OUT <<ENDTEXT;
+#!/usr/bin/perl -w
+use strict;
+my \$BAD = 1;
+my \$GOOD = 0;
+`rm -f $reduceOut`;
+my \$command = "$commandStr > $reduceOut 2>&1";
+system(\$command);
+open(IN, "$reduceOut") or exit(\$BAD);
+my \$found = 0;
+while(<IN>) {
+  if (/$magic/) { exit \$GOOD; }
+}
+exit \$BAD;
+ENDTEXT
+close(OUT);
+`chmod +x $scriptFile`;
+
+print "$prog: starting reduction\n";
+sub multidelta($) {
+    my ($level) = @_;
+    system("multidelta -level=$level $scriptFile $srcFile");
+}
+
+for (my $i = 1 ; $i <= 5; $i++) {
+  foreach my $level (0,0,1,1,2,2,10) {
+    multidelta($level);
+  }
+}
+
+# Copy the final file.
+`cp $srcFile $file.reduced`;
+print "$prog: generated '$file.reduced";