1 # -*- coding: utf-8 -*-
2 # The LLVM Compiler Infrastructure
4 # This file is distributed under the University of Illinois Open Source
5 # License. See LICENSE.TXT for details.
6 """ This module is responsible to capture the compiler invocation of any
7 build process. The result of that should be a compilation database.
9 This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10 mechanisms provided by the dynamic linker. The related library is implemented
11 in C language and can be found under 'libear' directory.
13 The 'libear' library is capturing all child process creation and logging the
14 relevant information about it into separate files in a specified directory.
15 The parameter of this process is the output directory name, where the report
16 files shall be placed. This parameter is passed as an environment variable.
18 The module also implements compiler wrappers to intercept the compiler calls.
20 The module implements the build command execution and the post-processing of
21 the output files, which will condensates into a compilation database. """
31 from libear import build_libear, TemporaryDirectory
32 from libscanbuild import command_entry_point, compiler_wrapper, \
33 wrapper_environment, run_command, run_build
34 from libscanbuild import duplicate_check
35 from libscanbuild.compilation import split_command
36 from libscanbuild.arguments import parse_args_for_intercept_build
37 from libscanbuild.shell import encode, decode
39 __all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper']
45 COMPILER_WRAPPER_CC = 'intercept-cc'
46 COMPILER_WRAPPER_CXX = 'intercept-c++'
47 TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c
48 WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
52 def intercept_build():
53 """ Entry point for 'intercept-build' command. """
55 args = parse_args_for_intercept_build()
60 """ The entry point of build command interception. """
62 def post_processing(commands):
63 """ To make a compilation database, it needs to filter out commands
64 which are not compiler calls. Needs to find the source file name
65 from the arguments. And do shell escaping on the command.
67 To support incremental builds, it is desired to read elements from
68 an existing compilation database from a previous run. These elements
69 shall be merged with the new elements. """
71 # create entries from the current run
72 current = itertools.chain.from_iterable(
73 # creates a sequence of entry generators from an exec,
74 format_entry(command) for command in commands)
75 # read entries from previous run
76 if 'append' in args and args.append and os.path.isfile(args.cdb):
77 with open(args.cdb) as handle:
78 previous = iter(json.load(handle))
81 # filter out duplicate entries from both
82 duplicate = duplicate_check(entry_hash)
84 for entry in itertools.chain(previous, current)
85 if os.path.exists(entry['file']) and not duplicate(entry))
87 with TemporaryDirectory(prefix='intercept-') as tmp_dir:
88 # run the build command
89 environment = setup_environment(args, tmp_dir)
90 exit_code = run_build(args.build, env=environment)
91 # read the intercepted exec calls
92 exec_traces = itertools.chain.from_iterable(
93 parse_exec_trace(os.path.join(tmp_dir, filename))
94 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
96 entries = post_processing(exec_traces)
97 # dump the compilation database
98 with open(args.cdb, 'w+') as handle:
99 json.dump(list(entries), handle, sort_keys=True, indent=4)
103 def setup_environment(args, destination):
104 """ Sets up the environment for the build command.
106 It sets the required environment variables and execute the given command.
107 The exec calls will be logged by the 'libear' preloaded library or by the
108 'wrapper' programs. """
110 c_compiler = args.cc if 'cc' in args else 'cc'
111 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
113 libear_path = None if args.override_compiler or is_preload_disabled(
114 sys.platform) else build_libear(c_compiler, destination)
116 environment = dict(os.environ)
117 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
120 logging.debug('intercept gonna use compiler wrappers')
121 environment.update(wrapper_environment(args))
123 'CC': COMPILER_WRAPPER_CC,
124 'CXX': COMPILER_WRAPPER_CXX
126 elif sys.platform == 'darwin':
127 logging.debug('intercept gonna preload libear on OSX')
129 'DYLD_INSERT_LIBRARIES': libear_path,
130 'DYLD_FORCE_FLAT_NAMESPACE': '1'
133 logging.debug('intercept gonna preload libear on UNIX')
134 environment.update({'LD_PRELOAD': libear_path})
140 def intercept_compiler_wrapper():
141 """ Entry point for `intercept-cc` and `intercept-c++`. """
143 return compiler_wrapper(intercept_compiler_wrapper_impl)
146 def intercept_compiler_wrapper_impl(_, execution):
147 """ Implement intercept compiler wrapper functionality.
149 It does generate execution report into target directory.
150 The target directory name is from environment variables. """
152 message_prefix = 'execution report might be incomplete: %s'
154 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
156 logging.warning(message_prefix, 'missing target directory')
158 # write current execution info to the pid file
160 target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION
161 target_file = os.path.join(target_dir, target_file_name)
162 logging.debug('writing execution report to: %s', target_file)
163 write_exec_trace(target_file, execution)
165 logging.warning(message_prefix, 'io problem')
168 def write_exec_trace(filename, entry):
169 """ Write execution report file.
171 This method shall be sync with the execution report writer in interception
172 library. The entry in the file is a JSON objects.
174 :param filename: path to the output execution trace file,
175 :param entry: the Execution object to append to that file. """
177 with open(filename, 'ab') as handler:
179 command = US.join(entry.cmd) + US
180 content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS
181 handler.write(content.encode('utf-8'))
184 def parse_exec_trace(filename):
185 """ Parse the file generated by the 'libear' preloaded library.
187 Given filename points to a file which contains the basic report
188 generated by the interception library or wrapper command. A single
189 report file _might_ contain multiple process creation info. """
191 logging.debug('parse exec trace file: %s', filename)
192 with open(filename, 'r') as handler:
193 content = handler.read()
194 for group in filter(bool, content.split(GS)):
195 records = group.split(RS)
199 'function': records[2],
200 'directory': records[3],
201 'command': records[4].split(US)[:-1]
205 def format_entry(exec_trace):
206 """ Generate the desired fields for compilation database entries. """
208 def abspath(cwd, name):
209 """ Create normalized absolute path from input filename. """
210 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
211 return os.path.normpath(fullname)
213 logging.debug('format this command: %s', exec_trace['command'])
214 compilation = split_command(exec_trace['command'])
216 for source in compilation.files:
217 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
218 command = [compiler, '-c'] + compilation.flags + [source]
219 logging.debug('formated as: %s', command)
221 'directory': exec_trace['directory'],
222 'command': encode(command),
223 'file': abspath(exec_trace['directory'], source)
227 def is_preload_disabled(platform):
228 """ Library-based interposition will fail silently if SIP is enabled,
229 so this should be detected. You can detect whether SIP is enabled on
230 Darwin by checking whether (1) there is a binary called 'csrutil' in
231 the path and, if so, (2) whether the output of executing 'csrutil status'
232 contains 'System Integrity Protection status: enabled'.
234 :param platform: name of the platform (returned by sys.platform),
235 :return: True if library preload will fail by the dynamic linker. """
237 if platform in WRAPPER_ONLY_PLATFORMS:
239 elif platform == 'darwin':
240 command = ['csrutil', 'status']
241 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
243 return any(pattern.match(line) for line in run_command(command))
250 def entry_hash(entry):
251 """ Implement unique hash method for compilation database entries. """
253 # For faster lookup in set filename is reverted
254 filename = entry['file'][::-1]
255 # For faster lookup in set directory is reverted
256 directory = entry['directory'][::-1]
257 # On OS X the 'cc' and 'c++' compilers are wrappers for
258 # 'clang' therefore both call would be logged. To avoid
259 # this the hash does not contain the first word of the
261 command = ' '.join(decode(entry['command'])[1:])
263 return '<>'.join([filename, directory, command])