tools/scan-build-py/libscanbuild/report.py

   1 # -*- coding: utf-8 -*-
   2 #                     The LLVM Compiler Infrastructure
   3 #
   4 # This file is distributed under the University of Illinois Open Source
   5 # License. See LICENSE.TXT for details.
   6 """ This module is responsible to generate 'index.html' for the report.
   7
   8 The input for this step is the output directory, where individual reports
   9 could be found. It parses those reports and generates 'index.html'. """
  10
  11 import re
  12 import os
  13 import os.path
  14 import sys
  15 import shutil
  16 import itertools
  17 import plistlib
  18 import glob
  19 import json
  20 import logging
  21 import datetime
  22 from libscanbuild import duplicate_check
  23 from libscanbuild.clang import get_version
  24
  25 __all__ = ['document']
  26
  27
  28 def document(args):
  29     """ Generates cover report and returns the number of bugs/crashes. """
  30
  31     html_reports_available = args.output_format in {'html', 'plist-html'}
  32
  33     logging.debug('count crashes and bugs')
  34     crash_count = sum(1 for _ in read_crashes(args.output))
  35     bug_counter = create_counters()
  36     for bug in read_bugs(args.output, html_reports_available):
  37         bug_counter(bug)
  38     result = crash_count + bug_counter.total
  39
  40     if html_reports_available and result:
  41         use_cdb = os.path.exists(args.cdb)
  42
  43         logging.debug('generate index.html file')
  44         # common prefix for source files to have sorter path
  45         prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
  46         # assemble the cover from multiple fragments
  47         fragments = []
  48         try:
  49             if bug_counter.total:
  50                 fragments.append(bug_summary(args.output, bug_counter))
  51                 fragments.append(bug_report(args.output, prefix))
  52             if crash_count:
  53                 fragments.append(crash_report(args.output, prefix))
  54             assemble_cover(args, prefix, fragments)
  55             # copy additional files to the report
  56             copy_resource_files(args.output)
  57             if use_cdb:
  58                 shutil.copy(args.cdb, args.output)
  59         finally:
  60             for fragment in fragments:
  61                 os.remove(fragment)
  62     return result
  63
  64
  65 def assemble_cover(args, prefix, fragments):
  66     """ Put together the fragments into a final report. """
  67
  68     import getpass
  69     import socket
  70
  71     if args.html_title is None:
  72         args.html_title = os.path.basename(prefix) + ' - analyzer results'
  73
  74     with open(os.path.join(args.output, 'index.html'), 'w') as handle:
  75         indent = 0
  76         handle.write(reindent("""
  77         |<!DOCTYPE html>
  78         |<html>
  79         |  <head>
  80         |    <title>{html_title}</title>
  81         |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
  82         |    <script type='text/javascript' src="sorttable.js"></script>
  83         |    <script type='text/javascript' src='selectable.js'></script>
  84         |  </head>""", indent).format(html_title=args.html_title))
  85         handle.write(comment('SUMMARYENDHEAD'))
  86         handle.write(reindent("""
  87         |  <body>
  88         |    <h1>{html_title}</h1>
  89         |    <table>
  90         |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
  91         |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
  92         |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
  93         |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
  94         |      <tr><th>Date:</th><td>{date}</td></tr>
  95         |    </table>""", indent).format(html_title=args.html_title,
  96                                          user_name=getpass.getuser(),
  97                                          host_name=socket.gethostname(),
  98                                          current_dir=prefix,
  99                                          cmd_args=' '.join(sys.argv),
 100                                          clang_version=get_version(args.clang),
 101                                          date=datetime.datetime.today(
 102                                          ).strftime('%c')))
 103         for fragment in fragments:
 104             # copy the content of fragments
 105             with open(fragment, 'r') as input_handle:
 106                 shutil.copyfileobj(input_handle, handle)
 107         handle.write(reindent("""
 108         |  </body>
 109         |</html>""", indent))
 110
 111
 112 def bug_summary(output_dir, bug_counter):
 113     """ Bug summary is a HTML table to give a better overview of the bugs. """
 114
 115     name = os.path.join(output_dir, 'summary.html.fragment')
 116     with open(name, 'w') as handle:
 117         indent = 4
 118         handle.write(reindent("""
 119         |<h2>Bug Summary</h2>
 120         |<table>
 121         |  <thead>
 122         |    <tr>
 123         |      <td>Bug Type</td>
 124         |      <td>Quantity</td>
 125         |      <td class="sorttable_nosort">Display?</td>
 126         |    </tr>
 127         |  </thead>
 128         |  <tbody>""", indent))
 129         handle.write(reindent("""
 130         |    <tr style="font-weight:bold">
 131         |      <td class="SUMM_DESC">All Bugs</td>
 132         |      <td class="Q">{0}</td>
 133         |      <td>
 134         |        <center>
 135         |          <input checked type="checkbox" id="AllBugsCheck"
 136         |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
 137         |        </center>
 138         |      </td>
 139         |    </tr>""", indent).format(bug_counter.total))
 140         for category, types in bug_counter.categories.items():
 141             handle.write(reindent("""
 142         |    <tr>
 143         |      <th>{0}</th><th colspan=2></th>
 144         |    </tr>""", indent).format(category))
 145             for bug_type in types.values():
 146                 handle.write(reindent("""
 147         |    <tr>
 148         |      <td class="SUMM_DESC">{bug_type}</td>
 149         |      <td class="Q">{bug_count}</td>
 150         |      <td>
 151         |        <center>
 152         |          <input checked type="checkbox"
 153         |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
 154         |        </center>
 155         |      </td>
 156         |    </tr>""", indent).format(**bug_type))
 157         handle.write(reindent("""
 158         |  </tbody>
 159         |</table>""", indent))
 160         handle.write(comment('SUMMARYBUGEND'))
 161     return name
 162
 163
 164 def bug_report(output_dir, prefix):
 165     """ Creates a fragment from the analyzer reports. """
 166
 167     pretty = prettify_bug(prefix, output_dir)
 168     bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
 169
 170     name = os.path.join(output_dir, 'bugs.html.fragment')
 171     with open(name, 'w') as handle:
 172         indent = 4
 173         handle.write(reindent("""
 174         |<h2>Reports</h2>
 175         |<table class="sortable" style="table-layout:automatic">
 176         |  <thead>
 177         |    <tr>
 178         |      <td>Bug Group</td>
 179         |      <td class="sorttable_sorted">
 180         |        Bug Type
 181         |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
 182         |      </td>
 183         |      <td>File</td>
 184         |      <td>Function/Method</td>
 185         |      <td class="Q">Line</td>
 186         |      <td class="Q">Path Length</td>
 187         |      <td class="sorttable_nosort"></td>
 188         |    </tr>
 189         |  </thead>
 190         |  <tbody>""", indent))
 191         handle.write(comment('REPORTBUGCOL'))
 192         for current in bugs:
 193             handle.write(reindent("""
 194         |    <tr class="{bug_type_class}">
 195         |      <td class="DESC">{bug_category}</td>
 196         |      <td class="DESC">{bug_type}</td>
 197         |      <td>{bug_file}</td>
 198         |      <td class="DESC">{bug_function}</td>
 199         |      <td class="Q">{bug_line}</td>
 200         |      <td class="Q">{bug_path_length}</td>
 201         |      <td><a href="{report_file}#EndPath">View Report</a></td>
 202         |    </tr>""", indent).format(**current))
 203             handle.write(comment('REPORTBUG', {'id': current['report_file']}))
 204         handle.write(reindent("""
 205         |  </tbody>
 206         |</table>""", indent))
 207         handle.write(comment('REPORTBUGEND'))
 208     return name
 209
 210
 211 def crash_report(output_dir, prefix):
 212     """ Creates a fragment from the compiler crashes. """
 213
 214     pretty = prettify_crash(prefix, output_dir)
 215     crashes = (pretty(crash) for crash in read_crashes(output_dir))
 216
 217     name = os.path.join(output_dir, 'crashes.html.fragment')
 218     with open(name, 'w') as handle:
 219         indent = 4
 220         handle.write(reindent("""
 221         |<h2>Analyzer Failures</h2>
 222         |<p>The analyzer had problems processing the following files:</p>
 223         |<table>
 224         |  <thead>
 225         |    <tr>
 226         |      <td>Problem</td>
 227         |      <td>Source File</td>
 228         |      <td>Preprocessed File</td>
 229         |      <td>STDERR Output</td>
 230         |    </tr>
 231         |  </thead>
 232         |  <tbody>""", indent))
 233         for current in crashes:
 234             handle.write(reindent("""
 235         |    <tr>
 236         |      <td>{problem}</td>
 237         |      <td>{source}</td>
 238         |      <td><a href="{file}">preprocessor output</a></td>
 239         |      <td><a href="{stderr}">analyzer std err</a></td>
 240         |    </tr>""", indent).format(**current))
 241             handle.write(comment('REPORTPROBLEM', current))
 242         handle.write(reindent("""
 243         |  </tbody>
 244         |</table>""", indent))
 245         handle.write(comment('REPORTCRASHES'))
 246     return name
 247
 248
 249 def read_crashes(output_dir):
 250     """ Generate a unique sequence of crashes from given output directory. """
 251
 252     return (parse_crash(filename)
 253             for filename in glob.iglob(os.path.join(output_dir, 'failures',
 254                                                     '*.info.txt')))
 255
 256
 257 def read_bugs(output_dir, html):
 258     """ Generate a unique sequence of bugs from given output directory.
 259
 260     Duplicates can be in a project if the same module was compiled multiple
 261     times with different compiler options. These would be better to show in
 262     the final report (cover) only once. """
 263
 264     parser = parse_bug_html if html else parse_bug_plist
 265     pattern = '*.html' if html else '*.plist'
 266
 267     duplicate = duplicate_check(
 268         lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
 269
 270     bugs = itertools.chain.from_iterable(
 271         # parser creates a bug generator not the bug itself
 272         parser(filename)
 273         for filename in glob.iglob(os.path.join(output_dir, pattern)))
 274
 275     return (bug for bug in bugs if not duplicate(bug))
 276
 277
 278 def parse_bug_plist(filename):
 279     """ Returns the generator of bugs from a single .plist file. """
 280
 281     content = plistlib.readPlist(filename)
 282     files = content.get('files')
 283     for bug in content.get('diagnostics', []):
 284         if len(files) <= int(bug['location']['file']):
 285             logging.warning('Parsing bug from "%s" failed', filename)
 286             continue
 287
 288         yield {
 289             'result': filename,
 290             'bug_type': bug['type'],
 291             'bug_category': bug['category'],
 292             'bug_line': int(bug['location']['line']),
 293             'bug_path_length': int(bug['location']['col']),
 294             'bug_file': files[int(bug['location']['file'])]
 295         }
 296
 297
 298 def parse_bug_html(filename):
 299     """ Parse out the bug information from HTML output. """
 300
 301     patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'),
 302                 re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'),
 303                 re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'),
 304                 re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'),
 305                 re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'),
 306                 re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'),
 307                 re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')]
 308     endsign = re.compile(r'<!-- BUGMETAEND -->')
 309
 310     bug = {
 311         'report_file': filename,
 312         'bug_function': 'n/a',  # compatibility with < clang-3.5
 313         'bug_category': 'Other',
 314         'bug_line': 0,
 315         'bug_path_length': 1
 316     }
 317
 318     with open(filename) as handler:
 319         for line in handler.readlines():
 320             # do not read the file further
 321             if endsign.match(line):
 322                 break
 323             # search for the right lines
 324             for regex in patterns:
 325                 match = regex.match(line.strip())
 326                 if match:
 327                     bug.update(match.groupdict())
 328                     break
 329
 330     encode_value(bug, 'bug_line', int)
 331     encode_value(bug, 'bug_path_length', int)
 332
 333     yield bug
 334
 335
 336 def parse_crash(filename):
 337     """ Parse out the crash information from the report file. """
 338
 339     match = re.match(r'(.*)\.info\.txt', filename)
 340     name = match.group(1) if match else None
 341     with open(filename, mode='rb') as handler:
 342         # this is a workaround to fix windows read '\r\n' as new lines.
 343         lines = [line.decode().rstrip() for line in handler.readlines()]
 344         return {
 345             'source': lines[0],
 346             'problem': lines[1],
 347             'file': name,
 348             'info': name + '.info.txt',
 349             'stderr': name + '.stderr.txt'
 350         }
 351
 352
 353 def category_type_name(bug):
 354     """ Create a new bug attribute from bug by category and type.
 355
 356     The result will be used as CSS class selector in the final report. """
 357
 358     def smash(key):
 359         """ Make value ready to be HTML attribute value. """
 360
 361         return bug.get(key, '').lower().replace(' ', '_').replace("'", '')
 362
 363     return escape('bt_' + smash('bug_category') + '_' + smash('bug_type'))
 364
 365
 366 def create_counters():
 367     """ Create counters for bug statistics.
 368
 369     Two entries are maintained: 'total' is an integer, represents the
 370     number of bugs. The 'categories' is a two level categorisation of bug
 371     counters. The first level is 'bug category' the second is 'bug type'.
 372     Each entry in this classification is a dictionary of 'count', 'type'
 373     and 'label'. """
 374
 375     def predicate(bug):
 376         bug_category = bug['bug_category']
 377         bug_type = bug['bug_type']
 378         current_category = predicate.categories.get(bug_category, dict())
 379         current_type = current_category.get(bug_type, {
 380             'bug_type': bug_type,
 381             'bug_type_class': category_type_name(bug),
 382             'bug_count': 0
 383         })
 384         current_type.update({'bug_count': current_type['bug_count'] + 1})
 385         current_category.update({bug_type: current_type})
 386         predicate.categories.update({bug_category: current_category})
 387         predicate.total += 1
 388
 389     predicate.total = 0
 390     predicate.categories = dict()
 391     return predicate
 392
 393
 394 def prettify_bug(prefix, output_dir):
 395     def predicate(bug):
 396         """ Make safe this values to embed into HTML. """
 397
 398         bug['bug_type_class'] = category_type_name(bug)
 399
 400         encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x)))
 401         encode_value(bug, 'bug_category', escape)
 402         encode_value(bug, 'bug_type', escape)
 403         encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x)))
 404         return bug
 405
 406     return predicate
 407
 408
 409 def prettify_crash(prefix, output_dir):
 410     def predicate(crash):
 411         """ Make safe this values to embed into HTML. """
 412
 413         encode_value(crash, 'source', lambda x: escape(chop(prefix, x)))
 414         encode_value(crash, 'problem', escape)
 415         encode_value(crash, 'file', lambda x: escape(chop(output_dir, x)))
 416         encode_value(crash, 'info', lambda x: escape(chop(output_dir, x)))
 417         encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x)))
 418         return crash
 419
 420     return predicate
 421
 422
 423 def copy_resource_files(output_dir):
 424     """ Copy the javascript and css files to the report directory. """
 425
 426     this_dir = os.path.dirname(os.path.realpath(__file__))
 427     for resource in os.listdir(os.path.join(this_dir, 'resources')):
 428         shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir)
 429
 430
 431 def encode_value(container, key, encode):
 432     """ Run 'encode' on 'container[key]' value and update it. """
 433
 434     if key in container:
 435         value = encode(container[key])
 436         container.update({key: value})
 437
 438
 439 def chop(prefix, filename):
 440     """ Create 'filename' from '/prefix/filename' """
 441
 442     return filename if not len(prefix) else os.path.relpath(filename, prefix)
 443
 444
 445 def escape(text):
 446     """ Paranoid HTML escape method. (Python version independent) """
 447
 448     escape_table = {
 449         '&': '&amp;',
 450         '"': '&quot;',
 451         "'": '&apos;',
 452         '>': '&gt;',
 453         '<': '&lt;'
 454     }
 455     return ''.join(escape_table.get(c, c) for c in text)
 456
 457
 458 def reindent(text, indent):
 459     """ Utility function to format html output and keep indentation. """
 460
 461     result = ''
 462     for line in text.splitlines():
 463         if len(line.strip()):
 464             result += ' ' * indent + line.split('|')[1] + os.linesep
 465     return result
 466
 467
 468 def comment(name, opts=dict()):
 469     """ Utility function to format meta information as comment. """
 470
 471     attributes = ''
 472     for key, value in opts.items():
 473         attributes += ' {0}="{1}"'.format(key, value)
 474
 475     return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep)
 476
 477
 478 def commonprefix_from(filename):
 479     """ Create file prefix from a compilation database entries. """
 480
 481     with open(filename, 'r') as handle:
 482         return commonprefix(item['file'] for item in json.load(handle))
 483
 484
 485 def commonprefix(files):
 486     """ Fixed version of os.path.commonprefix.
 487
 488     :param files: list of file names.
 489     :return: the longest path prefix that is a prefix of all files. """
 490     result = None
 491     for current in files:
 492         if result is not None:
 493             result = os.path.commonprefix([result, current])
 494         else:
 495             result = current
 496
 497     if result is None:
 498         return ''
 499     elif not os.path.isdir(result):
 500         return os.path.dirname(result)
 501     else:
 502         return os.path.abspath(result)