4 CmpRuns - A simple tool for comparing two static analyzer runs to determine
5 which reports have been added, removed, or changed.
7 This is designed to support automated testing using the static analyzer, from
9 1. To monitor changes in the static analyzer's reports on real code bases,
10 for regression testing.
12 2. For use by end users who want to integrate regular static analyzer testing
13 into a buildbot like environment.
17 # Load the results of both runs, to obtain lists of the corresponding
18 # AnalysisDiagnostic objects.
20 resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
21 resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
23 # Generate a relation from diagnostics in run A to diagnostics in run B
24 # to obtain a list of triples (a, b, confidence).
25 diff = compareResults(resultsA, resultsB)
29 from collections import defaultdict
32 from optparse import OptionParser
39 STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL)
43 Color for terminal highlight.
46 GREEN = '\x1b[6;30;42m'
49 # Information about analysis run:
50 # path - the analysis output directory
51 # root - the name of the root directory, which will be disregarded when
52 # determining the source file name
54 def __init__(self, path, root="", verboseLog=None):
56 self.root = root.rstrip("/\\")
57 self.verboseLog = verboseLog
60 class AnalysisDiagnostic:
61 def __init__(self, data, report, htmlReport):
63 self._loc = self._data['location']
65 self._htmlReport = htmlReport
66 self._reportSize = len(self._data['path'])
68 def getFileName(self):
69 root = self._report.run.root
70 fileName = self._report.files[self._loc['file']]
71 if fileName.startswith(root) and len(root) > 0:
72 return fileName[len(root) + 1:]
76 return self._loc['line']
79 return self._loc['col']
81 def getPathLength(self):
82 return self._reportSize
84 def getCategory(self):
85 return self._data['category']
87 def getDescription(self):
88 return self._data['description']
90 def getIssueIdentifier(self):
91 id = self.getFileName() + "+"
92 if 'issue_context' in self._data:
93 id += self._data['issue_context'] + "+"
94 if 'issue_hash_content_of_line_in_context' in self._data:
95 id += str(self._data['issue_hash_content_of_line_in_context'])
99 if self._htmlReport is None:
101 return os.path.join(self._report.run.path, self._htmlReport)
103 def getReadableName(self):
104 if 'issue_context' in self._data:
105 funcnamePostfix = "#" + self._data['issue_context']
108 return '%s%s:%d:%d, %s: %s' % (self.getFileName(),
111 self.getColumn(), self.getCategory(),
112 self.getDescription())
114 # Note, the data format is not an API and may change from one analyzer
115 # version to another.
116 def getRawData(self):
120 class AnalysisReport:
121 def __init__(self, run, files):
124 self.diagnostics = []
128 def __init__(self, info):
129 self.path = info.path
130 self.root = info.root
133 # Cumulative list of all diagnostics from all the reports.
134 self.diagnostics = []
135 self.clang_version = None
138 def getClangVersion(self):
139 return self.clang_version
141 def readSingleFile(self, p, deleteEmpty):
142 data = plistlib.readPlist(p)
143 if 'statistics' in data:
144 self.stats.append(json.loads(data['statistics']))
145 data.pop('statistics')
147 # We want to retrieve the clang version even if there are no
148 # reports. Assume that all reports were created using the same
149 # clang version (this is always true and is more efficient).
150 if 'clang_version' in data:
151 if self.clang_version is None:
152 self.clang_version = data.pop('clang_version')
154 data.pop('clang_version')
156 # Ignore/delete empty reports.
157 if not data['files']:
162 # Extract the HTML reports, if they exists.
163 if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
165 for d in data['diagnostics']:
166 # FIXME: Why is this named files, when does it have multiple
168 assert len(d['HTMLDiagnostics_files']) == 1
169 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
171 htmlFiles = [None] * len(data['diagnostics'])
173 report = AnalysisReport(self, data.pop('files'))
174 diagnostics = [AnalysisDiagnostic(d, report, h)
175 for d, h in zip(data.pop('diagnostics'), htmlFiles)]
179 report.diagnostics.extend(diagnostics)
180 self.reports.append(report)
181 self.diagnostics.extend(diagnostics)
184 def loadResults(path, opts, root="", deleteEmpty=True):
186 Backwards compatibility API.
188 return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
192 def loadResultsFromSingleRun(info, deleteEmpty=True):
194 # Load results of the analyzes from a given output folder.
195 # - info is the SingleRunInfo object
196 # - deleteEmpty specifies if the empty plist files should be deleted
200 run = AnalysisRun(info)
202 if os.path.isfile(path):
203 run.readSingleFile(path, deleteEmpty)
205 for (dirpath, dirnames, filenames) in os.walk(path):
207 if (not f.endswith('plist')):
209 p = os.path.join(dirpath, f)
210 run.readSingleFile(p, deleteEmpty)
215 def cmpAnalysisDiagnostic(d):
216 return d.getIssueIdentifier()
219 def compareResults(A, B, opts):
221 compareResults - Generate a relation from diagnostics in run A to
222 diagnostics in run B.
224 The result is the relation as a list of triples (a, b) where
225 each element {a,b} is None or a matching element from the respective run
230 # Map size_before -> size_after
231 path_difference_data = []
233 # Quickly eliminate equal elements.
236 eltsA = list(A.diagnostics)
237 eltsB = list(B.diagnostics)
238 eltsA.sort(key=cmpAnalysisDiagnostic)
239 eltsB.sort(key=cmpAnalysisDiagnostic)
240 while eltsA and eltsB:
243 if (a.getIssueIdentifier() == b.getIssueIdentifier()):
244 if a.getPathLength() != b.getPathLength():
245 if opts.relative_path_histogram:
246 path_difference_data.append(
247 float(a.getPathLength()) / b.getPathLength())
248 elif opts.relative_log_path_histogram:
249 path_difference_data.append(
250 log(float(a.getPathLength()) / b.getPathLength()))
251 elif opts.absolute_path_histogram:
252 path_difference_data.append(
253 a.getPathLength() - b.getPathLength())
256 elif a.getIssueIdentifier() > b.getIssueIdentifier():
265 # FIXME: Add fuzzy matching. One simple and possible effective idea would
266 # be to bin the diagnostics, print them in a normalized form (based solely
267 # on the structure of the diagnostic), compute the diff, then use that as
268 # the basis for matching. This has the nice property that we don't depend
269 # in any way on the diagnostic format.
272 res.append((a, None))
274 res.append((None, b))
276 if opts.relative_log_path_histogram or opts.relative_path_histogram or \
277 opts.absolute_path_histogram:
278 from matplotlib import pyplot
279 pyplot.hist(path_difference_data, bins=100)
284 def deriveStats(results):
285 # Assume all keys are the same in each statistics bucket.
286 combined_data = defaultdict(list)
287 for stat in results.stats:
288 for key, value in stat.iteritems():
289 combined_data[key].append(value)
291 for key, values in combined_data.iteritems():
292 combined_stats[str(key)] = {
295 "mean": sum(values) / len(values),
296 "median": sorted(values)[len(values) / 2],
299 return combined_stats
302 def compareStats(resultsA, resultsB):
303 statsA = deriveStats(resultsA)
304 statsB = deriveStats(resultsB)
305 keys = sorted(statsA.keys())
308 for kkey in statsA[key]:
309 valA = float(statsA[key][kkey])
310 valB = float(statsB[key][kkey])
311 report = "%.3f -> %.3f" % (valA, valB)
312 # Only apply highlighting when writing to TTY and it's not Windows
313 if sys.stdout.isatty() and os.name != 'nt':
315 ratio = (valB - valA) / valB
317 report = Colors.GREEN + report + Colors.CLEAR
319 report = Colors.RED + report + Colors.CLEAR
320 print "\t %s %s" % (kkey, report)
322 def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True,
324 # Load the run results.
325 resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
326 resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
328 compareStats(resultsA, resultsB)
332 # Open the verbose log, if given.
334 auxLog = open(opts.verboseLog, "wb")
338 diff = compareResults(resultsA, resultsB, opts)
345 Stdout.write("ADDED: %r\n" % b.getReadableName())
349 auxLog.write("('ADDED', %r, %r)\n" % (b.getReadableName(),
352 Stdout.write("REMOVED: %r\n" % a.getReadableName())
356 auxLog.write("('REMOVED', %r, %r)\n" % (a.getReadableName(),
361 TotalReports = len(resultsB.diagnostics)
362 Stdout.write("TOTAL REPORTS: %r\n" % TotalReports)
363 Stdout.write("TOTAL ADDED: %r\n" % totalAdded)
364 Stdout.write("TOTAL REMOVED: %r\n" % totalRemoved)
366 auxLog.write("('TOTAL NEW REPORTS', %r)\n" % TotalReports)
367 auxLog.write("('TOTAL DIFFERENCES', %r)\n" % foundDiffs)
370 return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
372 def generate_option_parser():
373 parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
374 parser.add_option("", "--rootA", dest="rootA",
375 help="Prefix to ignore on source files for directory A",
376 action="store", type=str, default="")
377 parser.add_option("", "--rootB", dest="rootB",
378 help="Prefix to ignore on source files for directory B",
379 action="store", type=str, default="")
380 parser.add_option("", "--verbose-log", dest="verboseLog",
381 help="Write additional information to LOG \
383 action="store", type=str, default=None,
385 parser.add_option("--relative-path-differences-histogram",
386 action="store_true", dest="relative_path_histogram",
388 help="Show histogram of relative paths differences. \
389 Requires matplotlib")
390 parser.add_option("--relative-log-path-differences-histogram",
391 action="store_true", dest="relative_log_path_histogram",
393 help="Show histogram of log relative paths differences. \
394 Requires matplotlib")
395 parser.add_option("--absolute-path-differences-histogram",
396 action="store_true", dest="absolute_path_histogram",
398 help="Show histogram of absolute paths differences. \
399 Requires matplotlib")
400 parser.add_option("--stats-only", action="store_true", dest="stats_only",
401 default=False, help="Only show statistics on reports")
406 parser = generate_option_parser()
407 (opts, args) = parser.parse_args()
410 parser.error("invalid number of arguments")
414 dumpScanBuildResultsDiff(dirA, dirB, opts)
417 if __name__ == '__main__':