1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code rewrites include invocations into their expansions. This gives you
11 // a file with all included files merged into it.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Rewrite/Frontend/Rewriters.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Frontend/PreprocessorOutputOptions.h"
18 #include "clang/Lex/HeaderSearch.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/raw_ostream.h"
23 using namespace clang;
28 class InclusionRewriter : public PPCallbacks {
29 /// Information about which #includes were actually performed,
30 /// created by preprocessor callbacks.
35 SrcMgr::CharacteristicKind FileType;
36 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
39 Preprocessor &PP; ///< Used to find inclusion directives.
40 SourceManager &SM; ///< Used to read and manage source files.
41 raw_ostream &OS; ///< The destination stream for rewritten contents.
42 bool ShowLineMarkers; ///< Show #line markers.
43 bool UseLineDirective; ///< Use of line directives or line markers.
44 typedef std::map<unsigned, FileChange> FileChangeMap;
45 FileChangeMap FileChanges; ///< Tracks which files were included where.
46 /// Used transitively for building up the FileChanges mapping over the
47 /// various \c PPCallbacks callbacks.
48 FileChangeMap::iterator LastInsertedFileChange;
50 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
51 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
53 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
54 SrcMgr::CharacteristicKind FileType,
56 virtual void FileSkipped(const FileEntry &ParentFile,
57 const Token &FilenameTok,
58 SrcMgr::CharacteristicKind FileType);
59 virtual void InclusionDirective(SourceLocation HashLoc,
60 const Token &IncludeTok,
63 CharSourceRange FilenameRange,
64 const FileEntry *File,
66 StringRef RelativePath,
67 const Module *Imported);
68 void WriteLineInfo(const char *Filename, int Line,
69 SrcMgr::CharacteristicKind FileType,
70 StringRef EOL, StringRef Extra = StringRef());
71 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
72 void OutputContentUpTo(const MemoryBuffer &FromFile,
73 unsigned &WriteFrom, unsigned WriteTo,
74 StringRef EOL, int &lines,
75 bool EnsureNewline = false);
76 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
77 const MemoryBuffer &FromFile, StringRef EOL,
78 unsigned &NextToWrite, int &Lines);
79 bool HandleHasInclude(FileID FileId, Lexer &RawLex,
80 const DirectoryLookup *Lookup, Token &Tok,
82 const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
83 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
86 } // end anonymous namespace
88 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
89 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
91 : PP(PP), SM(PP.getSourceManager()), OS(OS),
92 ShowLineMarkers(ShowLineMarkers),
93 LastInsertedFileChange(FileChanges.end()) {
94 // If we're in microsoft mode, use normal #line instead of line markers.
95 UseLineDirective = PP.getLangOpts().MicrosoftExt;
98 /// Write appropriate line information as either #line directives or GNU line
99 /// markers depending on what mode we're in, including the \p Filename and
100 /// \p Line we are located at, using the specified \p EOL line separator, and
101 /// any \p Extra context specifiers in GNU line directives.
102 void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
103 SrcMgr::CharacteristicKind FileType,
104 StringRef EOL, StringRef Extra) {
105 if (!ShowLineMarkers)
107 if (UseLineDirective) {
108 OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
110 // Use GNU linemarkers as described here:
111 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
112 OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
115 if (FileType == SrcMgr::C_System)
116 // "`3' This indicates that the following text comes from a system header
117 // file, so certain warnings should be suppressed."
119 else if (FileType == SrcMgr::C_ExternCSystem)
120 // as above for `3', plus "`4' This indicates that the following text
121 // should be treated as being wrapped in an implicit extern "C" block."
127 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
129 OS << "@import " << Mod->getFullModuleName() << ";"
130 << " /* clang -frewrite-includes: implicit import */" << EOL;
133 /// FileChanged - Whenever the preprocessor enters or exits a #include file
134 /// it invokes this handler.
135 void InclusionRewriter::FileChanged(SourceLocation Loc,
136 FileChangeReason Reason,
137 SrcMgr::CharacteristicKind NewFileType,
139 if (Reason != EnterFile)
141 if (LastInsertedFileChange == FileChanges.end())
142 // we didn't reach this file (eg: the main file) via an inclusion directive
144 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
145 LastInsertedFileChange->second.FileType = NewFileType;
146 LastInsertedFileChange = FileChanges.end();
149 /// Called whenever an inclusion is skipped due to canonical header protection
151 void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
152 const Token &/*FilenameTok*/,
153 SrcMgr::CharacteristicKind /*FileType*/) {
154 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
155 "found via an inclusion directive, was skipped");
156 FileChanges.erase(LastInsertedFileChange);
157 LastInsertedFileChange = FileChanges.end();
160 /// This should be called whenever the preprocessor encounters include
161 /// directives. It does not say whether the file has been included, but it
162 /// provides more information about the directive (hash location instead
163 /// of location inside the included file). It is assumed that the matching
164 /// FileChanged() or FileSkipped() is called after this.
165 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
166 const Token &/*IncludeTok*/,
167 StringRef /*FileName*/,
169 CharSourceRange /*FilenameRange*/,
170 const FileEntry * /*File*/,
171 StringRef /*SearchPath*/,
172 StringRef /*RelativePath*/,
173 const Module *Imported) {
174 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
175 "directive was found before the previous one was processed");
176 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
177 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
178 assert(p.second && "Unexpected revisitation of the same include directive");
180 LastInsertedFileChange = p.first;
183 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
184 /// an inclusion directive) in the map of inclusion information, FileChanges.
185 const InclusionRewriter::FileChange *
186 InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
187 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
188 if (I != FileChanges.end())
193 /// Detect the likely line ending style of \p FromFile by examining the first
194 /// newline found within it.
195 static StringRef DetectEOL(const MemoryBuffer &FromFile) {
196 // detect what line endings the file uses, so that added content does not mix
198 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
201 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
203 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
208 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
210 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
211 unsigned &WriteFrom, unsigned WriteTo,
212 StringRef EOL, int &Line,
213 bool EnsureNewline) {
214 if (WriteTo <= WriteFrom)
216 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
217 // count lines manually, it's faster than getPresumedLoc()
218 Line += std::count(FromFile.getBufferStart() + WriteFrom,
219 FromFile.getBufferStart() + WriteTo, '\n');
221 char LastChar = FromFile.getBufferStart()[WriteTo - 1];
222 if (LastChar != '\n' && LastChar != '\r')
228 /// Print characters from \p FromFile starting at \p NextToWrite up until the
229 /// inclusion directive at \p StartToken, then print out the inclusion
230 /// inclusion directive disabled by a #if directive, updating \p NextToWrite
231 /// and \p Line to track the number of source lines visited and the progress
232 /// through the \p FromFile buffer.
233 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
234 const Token &StartToken,
235 const MemoryBuffer &FromFile,
237 unsigned &NextToWrite, int &Line) {
238 OutputContentUpTo(FromFile, NextToWrite,
239 SM.getFileOffset(StartToken.getLocation()), EOL, Line);
240 Token DirectiveToken;
242 DirectiveLex.LexFromRawLexer(DirectiveToken);
243 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
244 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
245 OutputContentUpTo(FromFile, NextToWrite,
246 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
248 OS << "#endif /* expanded by -frewrite-includes */" << EOL;
251 /// Find the next identifier in the pragma directive specified by \p RawToken.
252 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
254 RawLex.LexFromRawLexer(RawToken);
255 if (RawToken.is(tok::raw_identifier))
256 PP.LookUpIdentifierInfo(RawToken);
257 if (RawToken.is(tok::identifier))
258 return RawToken.getIdentifierInfo()->getName();
262 // Expand __has_include and __has_include_next if possible. If there's no
263 // definitive answer return false.
264 bool InclusionRewriter::HandleHasInclude(
265 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
267 // Lex the opening paren.
268 RawLex.LexFromRawLexer(Tok);
269 if (Tok.isNot(tok::l_paren))
272 RawLex.LexFromRawLexer(Tok);
274 SmallString<128> FilenameBuffer;
276 // Since the raw lexer doesn't give us angle_literals we have to parse them
278 // FIXME: What to do if the file name is a macro?
279 if (Tok.is(tok::less)) {
280 RawLex.LexFromRawLexer(Tok);
282 FilenameBuffer += '<';
284 if (Tok.is(tok::eod)) // Sanity check.
287 if (Tok.is(tok::raw_identifier))
288 PP.LookUpIdentifierInfo(Tok);
290 // Get the string piece.
291 SmallVector<char, 128> TmpBuffer;
292 bool Invalid = false;
293 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
297 FilenameBuffer += TmpName;
299 RawLex.LexFromRawLexer(Tok);
300 } while (Tok.isNot(tok::greater));
302 FilenameBuffer += '>';
303 Filename = FilenameBuffer;
305 if (Tok.isNot(tok::string_literal))
308 bool Invalid = false;
309 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
314 // Lex the closing paren.
315 RawLex.LexFromRawLexer(Tok);
316 if (Tok.isNot(tok::r_paren))
319 // Now ask HeaderInfo if it knows about the header.
320 // FIXME: Subframeworks aren't handled here. Do we care?
321 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
322 const DirectoryLookup *CurDir;
323 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
324 Filename, isAngled, 0, CurDir,
325 PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false);
327 FileExists = File != 0;
331 /// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
332 /// and including content of included files recursively.
333 bool InclusionRewriter::Process(FileID FileId,
334 SrcMgr::CharacteristicKind FileType)
337 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
338 if (Invalid) // invalid inclusion
340 const char *FileName = FromFile.getBufferIdentifier();
341 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
342 RawLex.SetCommentRetentionState(false);
344 StringRef EOL = DetectEOL(FromFile);
346 // Per the GNU docs: "1" indicates the start of a new file.
347 WriteLineInfo(FileName, 1, FileType, EOL, " 1");
349 if (SM.getFileIDSize(FileId) == 0)
352 // The next byte to be copied from the source file
353 unsigned NextToWrite = 0;
354 int Line = 1; // The current input file line number.
357 RawLex.LexFromRawLexer(RawToken);
359 // TODO: Consider adding a switch that strips possibly unimportant content,
360 // such as comments, to reduce the size of repro files.
361 while (RawToken.isNot(tok::eof)) {
362 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
363 RawLex.setParsingPreprocessorDirective(true);
364 Token HashToken = RawToken;
365 RawLex.LexFromRawLexer(RawToken);
366 if (RawToken.is(tok::raw_identifier))
367 PP.LookUpIdentifierInfo(RawToken);
368 if (RawToken.is(tok::identifier) || RawToken.is(tok::kw_if)) {
369 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
370 case tok::pp_include:
371 case tok::pp_include_next:
372 case tok::pp_import: {
373 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
375 StringRef LineInfoExtra;
376 if (const FileChange *Change = FindFileChangeLocation(
377 HashToken.getLocation())) {
379 WriteImplicitModuleImport(Change->Mod, EOL);
381 // else now include and recursively process the file
382 } else if (Process(Change->Id, Change->FileType)) {
383 // and set lineinfo back to this file, if the nested one was
385 // `2' indicates returning to a file (after having included
387 LineInfoExtra = " 2";
390 // fix up lineinfo (since commented out directive changed line
391 // numbers) for inclusions that were skipped due to header guards
392 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
395 case tok::pp_pragma: {
396 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
397 if (Identifier == "clang" || Identifier == "GCC") {
398 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
399 // keep the directive in, commented out
400 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
402 // update our own type
403 FileType = SM.getFileCharacteristic(RawToken.getLocation());
404 WriteLineInfo(FileName, Line, FileType, EOL);
406 } else if (Identifier == "once") {
407 // keep the directive in, commented out
408 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
410 WriteLineInfo(FileName, Line, FileType, EOL);
416 // Rewrite special builtin macros to avoid pulling in host details.
418 // Walk over the directive.
419 RawLex.LexFromRawLexer(RawToken);
420 if (RawToken.is(tok::raw_identifier))
421 PP.LookUpIdentifierInfo(RawToken);
423 if (RawToken.is(tok::identifier)) {
425 SourceLocation Loc = RawToken.getLocation();
427 // Rewrite __has_include(x)
428 if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
429 if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile))
431 // Rewrite __has_include_next(x)
432 } else if (RawToken.getIdentifierInfo()->isStr(
433 "__has_include_next")) {
434 const DirectoryLookup *Lookup = PP.GetCurDirLookup();
438 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
444 // Replace the macro with (0) or (1), followed by the commented
445 // out macro for reference.
446 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
448 OS << '(' << (int) HasFile << ")/*";
449 OutputContentUpTo(FromFile, NextToWrite,
450 SM.getFileOffset(RawToken.getLocation()) +
451 RawToken.getLength(),
455 } while (RawToken.isNot(tok::eod));
462 RawLex.setParsingPreprocessorDirective(false);
464 RawLex.LexFromRawLexer(RawToken);
466 OutputContentUpTo(FromFile, NextToWrite,
467 SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
468 /*EnsureNewline*/true);
472 /// InclusionRewriterInInput - Implement -frewrite-includes mode.
473 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
474 const PreprocessorOutputOptions &Opts) {
475 SourceManager &SM = PP.getSourceManager();
476 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
477 Opts.ShowLineMarkers);
478 PP.addPPCallbacks(Rewrite);
480 // First let the preprocessor process the entire file and call callbacks.
481 // Callbacks will record which #include's were actually performed.
482 PP.EnterMainSourceFile();
484 // Only preprocessor directives matter here, so disable macro expansion
485 // everywhere else as an optimization.
486 // TODO: It would be even faster if the preprocessor could be switched
487 // to a mode where it would parse only preprocessor directives and comments,
488 // nothing else matters for parsing or processing.
489 PP.SetMacroExpansionOnlyInDirectives();
492 } while (Tok.isNot(tok::eof));
493 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);