tools/scan-build-py/libscanbuild/analyze.py

   1 # -*- coding: utf-8 -*-
   2 #                     The LLVM Compiler Infrastructure
   3 #
   4 # This file is distributed under the University of Illinois Open Source
   5 # License. See LICENSE.TXT for details.
   6 """ This module implements the 'scan-build' command API.
   7
   8 To run the static analyzer against a build is done in multiple steps:
   9
  10  -- Intercept: capture the compilation command during the build,
  11  -- Analyze:   run the analyzer against the captured commands,
  12  -- Report:    create a cover report from the analyzer outputs.  """
  13
  14 import re
  15 import os
  16 import os.path
  17 import json
  18 import logging
  19 import multiprocessing
  20 import tempfile
  21 import functools
  22 import subprocess
  23 import contextlib
  24 import datetime
  25
  26 from libscanbuild import command_entry_point, compiler_wrapper, \
  27     wrapper_environment, run_build, run_command
  28 from libscanbuild.arguments import parse_args_for_scan_build, \
  29     parse_args_for_analyze_build
  30 from libscanbuild.intercept import capture
  31 from libscanbuild.report import document
  32 from libscanbuild.compilation import split_command, classify_source, \
  33     compiler_language
  34 from libscanbuild.clang import get_version, get_arguments
  35 from libscanbuild.shell import decode
  36
  37 __all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
  38
  39 COMPILER_WRAPPER_CC = 'analyze-cc'
  40 COMPILER_WRAPPER_CXX = 'analyze-c++'
  41
  42
  43 @command_entry_point
  44 def scan_build():
  45     """ Entry point for scan-build command. """
  46
  47     args = parse_args_for_scan_build()
  48     # will re-assign the report directory as new output
  49     with report_directory(args.output, args.keep_empty) as args.output:
  50         # Run against a build command. there are cases, when analyzer run
  51         # is not required. But we need to set up everything for the
  52         # wrappers, because 'configure' needs to capture the CC/CXX values
  53         # for the Makefile.
  54         if args.intercept_first:
  55             # Run build command with intercept module.
  56             exit_code = capture(args)
  57             # Run the analyzer against the captured commands.
  58             if need_analyzer(args.build):
  59                 run_analyzer_parallel(args)
  60         else:
  61             # Run build command and analyzer with compiler wrappers.
  62             environment = setup_environment(args)
  63             exit_code = run_build(args.build, env=environment)
  64         # Cover report generation and bug counting.
  65         number_of_bugs = document(args)
  66         # Set exit status as it was requested.
  67         return number_of_bugs if args.status_bugs else exit_code
  68
  69
  70 @command_entry_point
  71 def analyze_build():
  72     """ Entry point for analyze-build command. """
  73
  74     args = parse_args_for_analyze_build()
  75     # will re-assign the report directory as new output
  76     with report_directory(args.output, args.keep_empty) as args.output:
  77         # Run the analyzer against a compilation db.
  78         run_analyzer_parallel(args)
  79         # Cover report generation and bug counting.
  80         number_of_bugs = document(args)
  81         # Set exit status as it was requested.
  82         return number_of_bugs if args.status_bugs else 0
  83
  84
  85 def need_analyzer(args):
  86     """ Check the intent of the build command.
  87
  88     When static analyzer run against project configure step, it should be
  89     silent and no need to run the analyzer or generate report.
  90
  91     To run `scan-build` against the configure step might be neccessary,
  92     when compiler wrappers are used. That's the moment when build setup
  93     check the compiler and capture the location for the build process. """
  94
  95     return len(args) and not re.search('configure|autogen', args[0])
  96
  97
  98 def run_analyzer_parallel(args):
  99     """ Runs the analyzer against the given compilation database. """
 100
 101     def exclude(filename):
 102         """ Return true when any excluded directory prefix the filename. """
 103         return any(re.match(r'^' + directory, filename)
 104                    for directory in args.excludes)
 105
 106     consts = {
 107         'clang': args.clang,
 108         'output_dir': args.output,
 109         'output_format': args.output_format,
 110         'output_failures': args.output_failures,
 111         'direct_args': analyzer_params(args),
 112         'force_debug': args.force_debug
 113     }
 114
 115     logging.debug('run analyzer against compilation database')
 116     with open(args.cdb, 'r') as handle:
 117         generator = (dict(cmd, **consts)
 118                      for cmd in json.load(handle) if not exclude(cmd['file']))
 119         # when verbose output requested execute sequentially
 120         pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
 121         for current in pool.imap_unordered(run, generator):
 122             if current is not None:
 123                 # display error message from the static analyzer
 124                 for line in current['error_output']:
 125                     logging.info(line.rstrip())
 126         pool.close()
 127         pool.join()
 128
 129
 130 def setup_environment(args):
 131     """ Set up environment for build command to interpose compiler wrapper. """
 132
 133     environment = dict(os.environ)
 134     environment.update(wrapper_environment(args))
 135     environment.update({
 136         'CC': COMPILER_WRAPPER_CC,
 137         'CXX': COMPILER_WRAPPER_CXX,
 138         'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '',
 139         'ANALYZE_BUILD_REPORT_DIR': args.output,
 140         'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
 141         'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
 142         'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
 143         'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else ''
 144     })
 145     return environment
 146
 147
 148 @command_entry_point
 149 def analyze_compiler_wrapper():
 150     """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """
 151
 152     return compiler_wrapper(analyze_compiler_wrapper_impl)
 153
 154
 155 def analyze_compiler_wrapper_impl(result, execution):
 156     """ Implements analyzer compiler wrapper functionality. """
 157
 158     # don't run analyzer when compilation fails. or when it's not requested.
 159     if result or not os.getenv('ANALYZE_BUILD_CLANG'):
 160         return
 161
 162     # check is it a compilation?
 163     compilation = split_command(execution.cmd)
 164     if compilation is None:
 165         return
 166     # collect the needed parameters from environment, crash when missing
 167     parameters = {
 168         'clang': os.getenv('ANALYZE_BUILD_CLANG'),
 169         'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'),
 170         'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'),
 171         'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'),
 172         'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS',
 173                                  '').split(' '),
 174         'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
 175         'directory': execution.cwd,
 176         'command': [execution.cmd[0], '-c'] + compilation.flags
 177     }
 178     # call static analyzer against the compilation
 179     for source in compilation.files:
 180         parameters.update({'file': source})
 181         logging.debug('analyzer parameters %s', parameters)
 182         current = run(parameters)
 183         # display error message from the static analyzer
 184         if current is not None:
 185             for line in current['error_output']:
 186                 logging.info(line.rstrip())
 187
 188
 189 @contextlib.contextmanager
 190 def report_directory(hint, keep):
 191     """ Responsible for the report directory.
 192
 193     hint -- could specify the parent directory of the output directory.
 194     keep -- a boolean value to keep or delete the empty report directory. """
 195
 196     stamp_format = 'scan-build-%Y-%m-%d-%H-%M-%S-%f-'
 197     stamp = datetime.datetime.now().strftime(stamp_format)
 198     parent_dir = os.path.abspath(hint)
 199     if not os.path.exists(parent_dir):
 200         os.makedirs(parent_dir)
 201     name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir)
 202
 203     logging.info('Report directory created: %s', name)
 204
 205     try:
 206         yield name
 207     finally:
 208         if os.listdir(name):
 209             msg = "Run 'scan-view %s' to examine bug reports."
 210             keep = True
 211         else:
 212             if keep:
 213                 msg = "Report directory '%s' contains no report, but kept."
 214             else:
 215                 msg = "Removing directory '%s' because it contains no report."
 216         logging.warning(msg, name)
 217
 218         if not keep:
 219             os.rmdir(name)
 220
 221
 222 def analyzer_params(args):
 223     """ A group of command line arguments can mapped to command
 224     line arguments of the analyzer. This method generates those. """
 225
 226     def prefix_with(constant, pieces):
 227         """ From a sequence create another sequence where every second element
 228         is from the original sequence and the odd elements are the prefix.
 229
 230         eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
 231
 232         return [elem for piece in pieces for elem in [constant, piece]]
 233
 234     result = []
 235
 236     if args.store_model:
 237         result.append('-analyzer-store={0}'.format(args.store_model))
 238     if args.constraints_model:
 239         result.append('-analyzer-constraints={0}'.format(
 240             args.constraints_model))
 241     if args.internal_stats:
 242         result.append('-analyzer-stats')
 243     if args.analyze_headers:
 244         result.append('-analyzer-opt-analyze-headers')
 245     if args.stats:
 246         result.append('-analyzer-checker=debug.Stats')
 247     if args.maxloop:
 248         result.extend(['-analyzer-max-loop', str(args.maxloop)])
 249     if args.output_format:
 250         result.append('-analyzer-output={0}'.format(args.output_format))
 251     if args.analyzer_config:
 252         result.append(args.analyzer_config)
 253     if args.verbose >= 4:
 254         result.append('-analyzer-display-progress')
 255     if args.plugins:
 256         result.extend(prefix_with('-load', args.plugins))
 257     if args.enable_checker:
 258         checkers = ','.join(args.enable_checker)
 259         result.extend(['-analyzer-checker', checkers])
 260     if args.disable_checker:
 261         checkers = ','.join(args.disable_checker)
 262         result.extend(['-analyzer-disable-checker', checkers])
 263     if os.getenv('UBIVIZ'):
 264         result.append('-analyzer-viz-egraph-ubigraph')
 265
 266     return prefix_with('-Xclang', result)
 267
 268
 269 def require(required):
 270     """ Decorator for checking the required values in state.
 271
 272     It checks the required attributes in the passed state and stop when
 273     any of those is missing. """
 274
 275     def decorator(function):
 276         @functools.wraps(function)
 277         def wrapper(*args, **kwargs):
 278             for key in required:
 279                 if key not in args[0]:
 280                     raise KeyError('{0} not passed to {1}'.format(
 281                         key, function.__name__))
 282
 283             return function(*args, **kwargs)
 284
 285         return wrapper
 286
 287     return decorator
 288
 289
 290 @require(['command',  # entry from compilation database
 291           'directory',  # entry from compilation database
 292           'file',  # entry from compilation database
 293           'clang',  # clang executable name (and path)
 294           'direct_args',  # arguments from command line
 295           'force_debug',  # kill non debug macros
 296           'output_dir',  # where generated report files shall go
 297           'output_format',  # it's 'plist' or 'html' or both
 298           'output_failures'])  # generate crash reports or not
 299 def run(opts):
 300     """ Entry point to run (or not) static analyzer against a single entry
 301     of the compilation database.
 302
 303     This complex task is decomposed into smaller methods which are calling
 304     each other in chain. If the analyzis is not possibe the given method
 305     just return and break the chain.
 306
 307     The passed parameter is a python dictionary. Each method first check
 308     that the needed parameters received. (This is done by the 'require'
 309     decorator. It's like an 'assert' to check the contract between the
 310     caller and the called method.) """
 311
 312     try:
 313         command = opts.pop('command')
 314         command = command if isinstance(command, list) else decode(command)
 315         logging.debug("Run analyzer against '%s'", command)
 316         opts.update(classify_parameters(command))
 317
 318         return arch_check(opts)
 319     except Exception:
 320         logging.error("Problem occured during analyzis.", exc_info=1)
 321         return None
 322
 323
 324 @require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language',
 325           'error_output', 'exit_code'])
 326 def report_failure(opts):
 327     """ Create report when analyzer failed.
 328
 329     The major report is the preprocessor output. The output filename generated
 330     randomly. The compiler output also captured into '.stderr.txt' file.
 331     And some more execution context also saved into '.info.txt' file. """
 332
 333     def extension():
 334         """ Generate preprocessor file extension. """
 335
 336         mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'}
 337         return mapping.get(opts['language'], '.i')
 338
 339     def destination():
 340         """ Creates failures directory if not exits yet. """
 341
 342         failures_dir = os.path.join(opts['output_dir'], 'failures')
 343         if not os.path.isdir(failures_dir):
 344             os.makedirs(failures_dir)
 345         return failures_dir
 346
 347     # Classify error type: when Clang terminated by a signal it's a 'Crash'.
 348     # (python subprocess Popen.returncode is negative when child terminated
 349     # by signal.) Everything else is 'Other Error'.
 350     error = 'crash' if opts['exit_code'] < 0 else 'other_error'
 351     # Create preprocessor output file name. (This is blindly following the
 352     # Perl implementation.)
 353     (handle, name) = tempfile.mkstemp(suffix=extension(),
 354                                       prefix='clang_' + error + '_',
 355                                       dir=destination())
 356     os.close(handle)
 357     # Execute Clang again, but run the syntax check only.
 358     cwd = opts['directory']
 359     cmd = get_arguments(
 360         [opts['clang'], '-fsyntax-only', '-E'
 361          ] + opts['flags'] + [opts['file'], '-o', name], cwd)
 362     run_command(cmd, cwd=cwd)
 363     # write general information about the crash
 364     with open(name + '.info.txt', 'w') as handle:
 365         handle.write(opts['file'] + os.linesep)
 366         handle.write(error.title().replace('_', ' ') + os.linesep)
 367         handle.write(' '.join(cmd) + os.linesep)
 368         handle.write(' '.join(os.uname()) + os.linesep)
 369         handle.write(get_version(opts['clang']))
 370         handle.close()
 371     # write the captured output too
 372     with open(name + '.stderr.txt', 'w') as handle:
 373         handle.writelines(opts['error_output'])
 374         handle.close()
 375
 376
 377 @require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir',
 378           'output_format'])
 379 def run_analyzer(opts, continuation=report_failure):
 380     """ It assembles the analysis command line and executes it. Capture the
 381     output of the analysis and returns with it. If failure reports are
 382     requested, it calls the continuation to generate it. """
 383
 384     def target():
 385         """ Creates output file name for reports. """
 386         if opts['output_format'] in {'plist', 'plist-html'}:
 387             (handle, name) = tempfile.mkstemp(prefix='report-',
 388                                               suffix='.plist',
 389                                               dir=opts['output_dir'])
 390             os.close(handle)
 391             return name
 392         return opts['output_dir']
 393
 394     try:
 395         cwd = opts['directory']
 396         cmd = get_arguments([opts['clang'], '--analyze'] +
 397                             opts['direct_args'] + opts['flags'] +
 398                             [opts['file'], '-o', target()],
 399                             cwd)
 400         output = run_command(cmd, cwd=cwd)
 401         return {'error_output': output, 'exit_code': 0}
 402     except subprocess.CalledProcessError as ex:
 403         result = {'error_output': ex.output, 'exit_code': ex.returncode}
 404         if opts.get('output_failures', False):
 405             opts.update(result)
 406             continuation(opts)
 407         return result
 408
 409
 410 @require(['flags', 'force_debug'])
 411 def filter_debug_flags(opts, continuation=run_analyzer):
 412     """ Filter out nondebug macros when requested. """
 413
 414     if opts.pop('force_debug'):
 415         # lazy implementation just append an undefine macro at the end
 416         opts.update({'flags': opts['flags'] + ['-UNDEBUG']})
 417
 418     return continuation(opts)
 419
 420
 421 @require(['language', 'compiler', 'file', 'flags'])
 422 def language_check(opts, continuation=filter_debug_flags):
 423     """ Find out the language from command line parameters or file name
 424     extension. The decision also influenced by the compiler invocation. """
 425
 426     accepted = frozenset({
 427         'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output',
 428         'c++-cpp-output', 'objective-c-cpp-output'
 429     })
 430
 431     # language can be given as a parameter...
 432     language = opts.pop('language')
 433     compiler = opts.pop('compiler')
 434     # ... or find out from source file extension
 435     if language is None and compiler is not None:
 436         language = classify_source(opts['file'], compiler == 'c')
 437
 438     if language is None:
 439         logging.debug('skip analysis, language not known')
 440         return None
 441     elif language not in accepted:
 442         logging.debug('skip analysis, language not supported')
 443         return None
 444     else:
 445         logging.debug('analysis, language: %s', language)
 446         opts.update({'language': language,
 447                      'flags': ['-x', language] + opts['flags']})
 448         return continuation(opts)
 449
 450
 451 @require(['arch_list', 'flags'])
 452 def arch_check(opts, continuation=language_check):
 453     """ Do run analyzer through one of the given architectures. """
 454
 455     disabled = frozenset({'ppc', 'ppc64'})
 456
 457     received_list = opts.pop('arch_list')
 458     if received_list:
 459         # filter out disabled architectures and -arch switches
 460         filtered_list = [a for a in received_list if a not in disabled]
 461         if filtered_list:
 462             # There should be only one arch given (or the same multiple
 463             # times). If there are multiple arch are given and are not
 464             # the same, those should not change the pre-processing step.
 465             # But that's the only pass we have before run the analyzer.
 466             current = filtered_list.pop()
 467             logging.debug('analysis, on arch: %s', current)
 468
 469             opts.update({'flags': ['-arch', current] + opts['flags']})
 470             return continuation(opts)
 471         else:
 472             logging.debug('skip analysis, found not supported arch')
 473             return None
 474     else:
 475         logging.debug('analysis, on default arch')
 476         return continuation(opts)
 477
 478 # To have good results from static analyzer certain compiler options shall be
 479 # omitted. The compiler flag filtering only affects the static analyzer run.
 480 #
 481 # Keys are the option name, value number of options to skip
 482 IGNORED_FLAGS = {
 483     '-c': 0,  # compile option will be overwritten
 484     '-fsyntax-only': 0,  # static analyzer option will be overwritten
 485     '-o': 1,  # will set up own output file
 486     # flags below are inherited from the perl implementation.
 487     '-g': 0,
 488     '-save-temps': 0,
 489     '-install_name': 1,
 490     '-exported_symbols_list': 1,
 491     '-current_version': 1,
 492     '-compatibility_version': 1,
 493     '-init': 1,
 494     '-e': 1,
 495     '-seg1addr': 1,
 496     '-bundle_loader': 1,
 497     '-multiply_defined': 1,
 498     '-sectorder': 3,
 499     '--param': 1,
 500     '--serialize-diagnostics': 1
 501 }
 502
 503
 504 def classify_parameters(command):
 505     """ Prepare compiler flags (filters some and add others) and take out
 506     language (-x) and architecture (-arch) flags for future processing. """
 507
 508     result = {
 509         'flags': [],  # the filtered compiler flags
 510         'arch_list': [],  # list of architecture flags
 511         'language': None,  # compilation language, None, if not specified
 512         'compiler': compiler_language(command)  # 'c' or 'c++'
 513     }
 514
 515     # iterate on the compile options
 516     args = iter(command[1:])
 517     for arg in args:
 518         # take arch flags into a separate basket
 519         if arg == '-arch':
 520             result['arch_list'].append(next(args))
 521         # take language
 522         elif arg == '-x':
 523             result['language'] = next(args)
 524         # parameters which looks source file are not flags
 525         elif re.match(r'^[^-].+', arg) and classify_source(arg):
 526             pass
 527         # ignore some flags
 528         elif arg in IGNORED_FLAGS:
 529             count = IGNORED_FLAGS[arg]
 530             for _ in range(count):
 531                 next(args)
 532         # we don't care about extra warnings, but we should suppress ones
 533         # that we don't want to see.
 534         elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg):
 535             pass
 536         # and consider everything else as compilation flag.
 537         else:
 538             result['flags'].append(arg)
 539
 540     return result