1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code rewrites include invocations into their expansions. This gives you
11 // a file with all included files merged into it.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Rewrite/Frontend/Rewriters.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Frontend/PreprocessorOutputOptions.h"
18 #include "clang/Lex/HeaderSearch.h"
19 #include "clang/Lex/Pragma.h"
20 #include "clang/Lex/Preprocessor.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/raw_ostream.h"
24 using namespace clang;
29 class InclusionRewriter : public PPCallbacks {
30 /// Information about which #includes were actually performed,
31 /// created by preprocessor callbacks.
34 SrcMgr::CharacteristicKind FileType;
35 const DirectoryLookup *DirLookup;
36 IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType,
37 const DirectoryLookup *DirLookup)
38 : Id(Id), FileType(FileType), DirLookup(DirLookup) {}
40 Preprocessor &PP; ///< Used to find inclusion directives.
41 SourceManager &SM; ///< Used to read and manage source files.
42 raw_ostream &OS; ///< The destination stream for rewritten contents.
43 StringRef MainEOL; ///< The line ending marker to use.
44 const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
45 bool ShowLineMarkers; ///< Show #line markers.
46 bool UseLineDirectives; ///< Use of line directives or line markers.
47 /// Tracks where inclusions that change the file are found.
48 std::map<unsigned, IncludedFile> FileIncludes;
49 /// Tracks where inclusions that import modules are found.
50 std::map<unsigned, const Module *> ModuleIncludes;
51 /// Tracks where inclusions that enter modules (in a module build) are found.
52 std::map<unsigned, const Module *> ModuleEntryIncludes;
53 /// Used transitively for building up the FileIncludes mapping over the
54 /// various \c PPCallbacks callbacks.
55 SourceLocation LastInclusionLocation;
57 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
58 bool UseLineDirectives);
59 void Process(FileID FileId, SrcMgr::CharacteristicKind FileType,
60 const DirectoryLookup *DirLookup);
61 void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
62 PredefinesBuffer = Buf;
64 void detectMainFileEOL();
65 void handleModuleBegin(Token &Tok) {
66 assert(Tok.getKind() == tok::annot_module_begin);
67 ModuleEntryIncludes.insert({Tok.getLocation().getRawEncoding(),
68 (Module *)Tok.getAnnotationValue()});
71 void FileChanged(SourceLocation Loc, FileChangeReason Reason,
72 SrcMgr::CharacteristicKind FileType,
73 FileID PrevFID) override;
74 void FileSkipped(const FileEntry &SkippedFile, const Token &FilenameTok,
75 SrcMgr::CharacteristicKind FileType) override;
76 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
77 StringRef FileName, bool IsAngled,
78 CharSourceRange FilenameRange, const FileEntry *File,
79 StringRef SearchPath, StringRef RelativePath,
80 const Module *Imported,
81 SrcMgr::CharacteristicKind FileType) override;
82 void WriteLineInfo(StringRef Filename, int Line,
83 SrcMgr::CharacteristicKind FileType,
84 StringRef Extra = StringRef());
85 void WriteImplicitModuleImport(const Module *Mod);
86 void OutputContentUpTo(const MemoryBuffer &FromFile,
87 unsigned &WriteFrom, unsigned WriteTo,
88 StringRef EOL, int &lines,
90 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
91 const MemoryBuffer &FromFile, StringRef EOL,
92 unsigned &NextToWrite, int &Lines);
93 bool HandleHasInclude(FileID FileId, Lexer &RawLex,
94 const DirectoryLookup *Lookup, Token &Tok,
96 const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;
97 const Module *FindModuleAtLocation(SourceLocation Loc) const;
98 const Module *FindEnteredModule(SourceLocation Loc) const;
99 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
102 } // end anonymous namespace
104 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
105 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
106 bool ShowLineMarkers,
107 bool UseLineDirectives)
108 : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
109 PredefinesBuffer(nullptr), ShowLineMarkers(ShowLineMarkers),
110 UseLineDirectives(UseLineDirectives),
111 LastInclusionLocation(SourceLocation()) {}
113 /// Write appropriate line information as either #line directives or GNU line
114 /// markers depending on what mode we're in, including the \p Filename and
115 /// \p Line we are located at, using the specified \p EOL line separator, and
116 /// any \p Extra context specifiers in GNU line directives.
117 void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,
118 SrcMgr::CharacteristicKind FileType,
120 if (!ShowLineMarkers)
122 if (UseLineDirectives) {
123 OS << "#line" << ' ' << Line << ' ' << '"';
124 OS.write_escaped(Filename);
127 // Use GNU linemarkers as described here:
128 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
129 OS << '#' << ' ' << Line << ' ' << '"';
130 OS.write_escaped(Filename);
134 if (FileType == SrcMgr::C_System)
135 // "`3' This indicates that the following text comes from a system header
136 // file, so certain warnings should be suppressed."
138 else if (FileType == SrcMgr::C_ExternCSystem)
139 // as above for `3', plus "`4' This indicates that the following text
140 // should be treated as being wrapped in an implicit extern "C" block."
146 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
147 OS << "#pragma clang module import " << Mod->getFullModuleName(true)
148 << " /* clang -frewrite-includes: implicit import */" << MainEOL;
151 /// FileChanged - Whenever the preprocessor enters or exits a #include file
152 /// it invokes this handler.
153 void InclusionRewriter::FileChanged(SourceLocation Loc,
154 FileChangeReason Reason,
155 SrcMgr::CharacteristicKind NewFileType,
157 if (Reason != EnterFile)
159 if (LastInclusionLocation.isInvalid())
160 // we didn't reach this file (eg: the main file) via an inclusion directive
162 FileID Id = FullSourceLoc(Loc, SM).getFileID();
163 auto P = FileIncludes.insert(
164 std::make_pair(LastInclusionLocation.getRawEncoding(),
165 IncludedFile(Id, NewFileType, PP.GetCurDirLookup())));
167 assert(P.second && "Unexpected revisitation of the same include directive");
168 LastInclusionLocation = SourceLocation();
171 /// Called whenever an inclusion is skipped due to canonical header protection
173 void InclusionRewriter::FileSkipped(const FileEntry &/*SkippedFile*/,
174 const Token &/*FilenameTok*/,
175 SrcMgr::CharacteristicKind /*FileType*/) {
176 assert(LastInclusionLocation.isValid() &&
177 "A file, that wasn't found via an inclusion directive, was skipped");
178 LastInclusionLocation = SourceLocation();
181 /// This should be called whenever the preprocessor encounters include
182 /// directives. It does not say whether the file has been included, but it
183 /// provides more information about the directive (hash location instead
184 /// of location inside the included file). It is assumed that the matching
185 /// FileChanged() or FileSkipped() is called after this (or neither is
186 /// called if this #include results in an error or does not textually include
188 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
189 const Token &/*IncludeTok*/,
190 StringRef /*FileName*/,
192 CharSourceRange /*FilenameRange*/,
193 const FileEntry * /*File*/,
194 StringRef /*SearchPath*/,
195 StringRef /*RelativePath*/,
196 const Module *Imported,
197 SrcMgr::CharacteristicKind FileType){
199 auto P = ModuleIncludes.insert(
200 std::make_pair(HashLoc.getRawEncoding(), Imported));
202 assert(P.second && "Unexpected revisitation of the same include directive");
204 LastInclusionLocation = HashLoc;
207 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
208 /// an inclusion directive) in the map of inclusion information, FileChanges.
209 const InclusionRewriter::IncludedFile *
210 InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {
211 const auto I = FileIncludes.find(Loc.getRawEncoding());
212 if (I != FileIncludes.end())
217 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
218 /// an inclusion directive) in the map of module inclusion information.
220 InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {
221 const auto I = ModuleIncludes.find(Loc.getRawEncoding());
222 if (I != ModuleIncludes.end())
227 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
228 /// an inclusion directive) in the map of module entry information.
230 InclusionRewriter::FindEnteredModule(SourceLocation Loc) const {
231 const auto I = ModuleEntryIncludes.find(Loc.getRawEncoding());
232 if (I != ModuleEntryIncludes.end())
237 /// Detect the likely line ending style of \p FromFile by examining the first
238 /// newline found within it.
239 static StringRef DetectEOL(const MemoryBuffer &FromFile) {
240 // Detect what line endings the file uses, so that added content does not mix
241 // the style. We need to check for "\r\n" first because "\n\r" will match
243 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
246 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
248 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
253 void InclusionRewriter::detectMainFileEOL() {
255 const MemoryBuffer &FromFile = *SM.getBuffer(SM.getMainFileID(), &Invalid);
258 return; // Should never happen, but whatever.
259 MainEOL = DetectEOL(FromFile);
262 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
264 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
265 unsigned &WriteFrom, unsigned WriteTo,
266 StringRef LocalEOL, int &Line,
267 bool EnsureNewline) {
268 if (WriteTo <= WriteFrom)
270 if (&FromFile == PredefinesBuffer) {
271 // Ignore the #defines of the predefines buffer.
276 // If we would output half of a line ending, advance one character to output
277 // the whole line ending. All buffers are null terminated, so looking ahead
279 if (LocalEOL.size() == 2 &&
280 LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
281 LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
284 StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
285 WriteTo - WriteFrom);
287 if (MainEOL == LocalEOL) {
289 // count lines manually, it's faster than getPresumedLoc()
290 Line += TextToWrite.count(LocalEOL);
291 if (EnsureNewline && !TextToWrite.endswith(LocalEOL))
294 // Output the file one line at a time, rewriting the line endings as we go.
295 StringRef Rest = TextToWrite;
296 while (!Rest.empty()) {
298 std::tie(LineText, Rest) = Rest.split(LocalEOL);
304 if (TextToWrite.endswith(LocalEOL) || EnsureNewline)
310 /// Print characters from \p FromFile starting at \p NextToWrite up until the
311 /// inclusion directive at \p StartToken, then print out the inclusion
312 /// inclusion directive disabled by a #if directive, updating \p NextToWrite
313 /// and \p Line to track the number of source lines visited and the progress
314 /// through the \p FromFile buffer.
315 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
316 const Token &StartToken,
317 const MemoryBuffer &FromFile,
319 unsigned &NextToWrite, int &Line) {
320 OutputContentUpTo(FromFile, NextToWrite,
321 SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
323 Token DirectiveToken;
325 DirectiveLex.LexFromRawLexer(DirectiveToken);
326 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
327 if (&FromFile == PredefinesBuffer) {
328 // OutputContentUpTo() would not output anything anyway.
331 OS << "#if 0 /* expanded by -frewrite-includes */" << MainEOL;
332 OutputContentUpTo(FromFile, NextToWrite,
333 SM.getFileOffset(DirectiveToken.getLocation()) +
334 DirectiveToken.getLength(),
335 LocalEOL, Line, true);
336 OS << "#endif /* expanded by -frewrite-includes */" << MainEOL;
339 /// Find the next identifier in the pragma directive specified by \p RawToken.
340 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
342 RawLex.LexFromRawLexer(RawToken);
343 if (RawToken.is(tok::raw_identifier))
344 PP.LookUpIdentifierInfo(RawToken);
345 if (RawToken.is(tok::identifier))
346 return RawToken.getIdentifierInfo()->getName();
350 // Expand __has_include and __has_include_next if possible. If there's no
351 // definitive answer return false.
352 bool InclusionRewriter::HandleHasInclude(
353 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
355 // Lex the opening paren.
356 RawLex.LexFromRawLexer(Tok);
357 if (Tok.isNot(tok::l_paren))
360 RawLex.LexFromRawLexer(Tok);
362 SmallString<128> FilenameBuffer;
364 // Since the raw lexer doesn't give us angle_literals we have to parse them
366 // FIXME: What to do if the file name is a macro?
367 if (Tok.is(tok::less)) {
368 RawLex.LexFromRawLexer(Tok);
370 FilenameBuffer += '<';
372 if (Tok.is(tok::eod)) // Sanity check.
375 if (Tok.is(tok::raw_identifier))
376 PP.LookUpIdentifierInfo(Tok);
378 // Get the string piece.
379 SmallVector<char, 128> TmpBuffer;
380 bool Invalid = false;
381 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
385 FilenameBuffer += TmpName;
387 RawLex.LexFromRawLexer(Tok);
388 } while (Tok.isNot(tok::greater));
390 FilenameBuffer += '>';
391 Filename = FilenameBuffer;
393 if (Tok.isNot(tok::string_literal))
396 bool Invalid = false;
397 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
402 // Lex the closing paren.
403 RawLex.LexFromRawLexer(Tok);
404 if (Tok.isNot(tok::r_paren))
407 // Now ask HeaderInfo if it knows about the header.
408 // FIXME: Subframeworks aren't handled here. Do we care?
409 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
410 const DirectoryLookup *CurDir;
411 const FileEntry *FileEnt = PP.getSourceManager().getFileEntryForID(FileId);
412 SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 1>
414 Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
415 // FIXME: Why don't we call PP.LookupFile here?
416 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
417 Filename, SourceLocation(), isAngled, Lookup, CurDir, Includers, nullptr,
418 nullptr, nullptr, nullptr, nullptr);
420 FileExists = File != nullptr;
424 /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
425 /// and including content of included files recursively.
426 void InclusionRewriter::Process(FileID FileId,
427 SrcMgr::CharacteristicKind FileType,
428 const DirectoryLookup *DirLookup) {
430 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
431 assert(!Invalid && "Attempting to process invalid inclusion");
432 StringRef FileName = FromFile.getBufferIdentifier();
433 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
434 RawLex.SetCommentRetentionState(false);
436 StringRef LocalEOL = DetectEOL(FromFile);
438 // Per the GNU docs: "1" indicates entering a new file.
439 if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
440 WriteLineInfo(FileName, 1, FileType, "");
442 WriteLineInfo(FileName, 1, FileType, " 1");
444 if (SM.getFileIDSize(FileId) == 0)
447 // The next byte to be copied from the source file, which may be non-zero if
448 // the lexer handled a BOM.
449 unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
450 assert(SM.getLineNumber(FileId, NextToWrite) == 1);
451 int Line = 1; // The current input file line number.
454 RawLex.LexFromRawLexer(RawToken);
456 // TODO: Consider adding a switch that strips possibly unimportant content,
457 // such as comments, to reduce the size of repro files.
458 while (RawToken.isNot(tok::eof)) {
459 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
460 RawLex.setParsingPreprocessorDirective(true);
461 Token HashToken = RawToken;
462 RawLex.LexFromRawLexer(RawToken);
463 if (RawToken.is(tok::raw_identifier))
464 PP.LookUpIdentifierInfo(RawToken);
465 if (RawToken.getIdentifierInfo() != nullptr) {
466 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
467 case tok::pp_include:
468 case tok::pp_include_next:
469 case tok::pp_import: {
470 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, NextToWrite,
472 if (FileId != PP.getPredefinesFileID())
473 WriteLineInfo(FileName, Line - 1, FileType, "");
474 StringRef LineInfoExtra;
475 SourceLocation Loc = HashToken.getLocation();
476 if (const Module *Mod = FindModuleAtLocation(Loc))
477 WriteImplicitModuleImport(Mod);
478 else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
479 const Module *Mod = FindEnteredModule(Loc);
481 OS << "#pragma clang module begin "
482 << Mod->getFullModuleName(true) << "\n";
484 // Include and recursively process the file.
485 Process(Inc->Id, Inc->FileType, Inc->DirLookup);
488 OS << "#pragma clang module end /*"
489 << Mod->getFullModuleName(true) << "*/\n";
491 // Add line marker to indicate we're returning from an included
493 LineInfoExtra = " 2";
495 // fix up lineinfo (since commented out directive changed line
496 // numbers) for inclusions that were skipped due to header guards
497 WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
500 case tok::pp_pragma: {
501 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
502 if (Identifier == "clang" || Identifier == "GCC") {
503 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
504 // keep the directive in, commented out
505 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
507 // update our own type
508 FileType = SM.getFileCharacteristic(RawToken.getLocation());
509 WriteLineInfo(FileName, Line, FileType);
511 } else if (Identifier == "once") {
512 // keep the directive in, commented out
513 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
515 WriteLineInfo(FileName, Line, FileType);
521 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
523 // Rewrite special builtin macros to avoid pulling in host details.
525 // Walk over the directive.
526 RawLex.LexFromRawLexer(RawToken);
527 if (RawToken.is(tok::raw_identifier))
528 PP.LookUpIdentifierInfo(RawToken);
530 if (RawToken.is(tok::identifier)) {
532 SourceLocation Loc = RawToken.getLocation();
534 // Rewrite __has_include(x)
535 if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
536 if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken,
539 // Rewrite __has_include_next(x)
540 } else if (RawToken.getIdentifierInfo()->isStr(
541 "__has_include_next")) {
545 if (!HandleHasInclude(FileId, RawLex, DirLookup, RawToken,
551 // Replace the macro with (0) or (1), followed by the commented
552 // out macro for reference.
553 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
554 LocalEOL, Line, false);
555 OS << '(' << (int) HasFile << ")/*";
556 OutputContentUpTo(FromFile, NextToWrite,
557 SM.getFileOffset(RawToken.getLocation()) +
558 RawToken.getLength(),
559 LocalEOL, Line, false);
562 } while (RawToken.isNot(tok::eod));
564 OutputContentUpTo(FromFile, NextToWrite,
565 SM.getFileOffset(RawToken.getLocation()) +
566 RawToken.getLength(),
567 LocalEOL, Line, /*EnsureNewline=*/ true);
568 WriteLineInfo(FileName, Line, FileType);
574 // We surround every #include by #if 0 to comment it out, but that
575 // changes line numbers. These are fixed up right after that, but
576 // the whole #include could be inside a preprocessor conditional
577 // that is not processed. So it is necessary to fix the line
578 // numbers one the next line after each #else/#endif as well.
579 RawLex.SetKeepWhitespaceMode(true);
581 RawLex.LexFromRawLexer(RawToken);
582 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
583 OutputContentUpTo(FromFile, NextToWrite,
584 SM.getFileOffset(RawToken.getLocation()) +
585 RawToken.getLength(),
586 LocalEOL, Line, /*EnsureNewline=*/ true);
587 WriteLineInfo(FileName, Line, FileType);
588 RawLex.SetKeepWhitespaceMode(false);
594 RawLex.setParsingPreprocessorDirective(false);
596 RawLex.LexFromRawLexer(RawToken);
598 OutputContentUpTo(FromFile, NextToWrite,
599 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
600 Line, /*EnsureNewline=*/true);
603 /// InclusionRewriterInInput - Implement -frewrite-includes mode.
604 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
605 const PreprocessorOutputOptions &Opts) {
606 SourceManager &SM = PP.getSourceManager();
607 InclusionRewriter *Rewrite = new InclusionRewriter(
608 PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);
609 Rewrite->detectMainFileEOL();
611 PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));
614 // First let the preprocessor process the entire file and call callbacks.
615 // Callbacks will record which #include's were actually performed.
616 PP.EnterMainSourceFile();
618 // Only preprocessor directives matter here, so disable macro expansion
619 // everywhere else as an optimization.
620 // TODO: It would be even faster if the preprocessor could be switched
621 // to a mode where it would parse only preprocessor directives and comments,
622 // nothing else matters for parsing or processing.
623 PP.SetMacroExpansionOnlyInDirectives();
626 if (Tok.is(tok::annot_module_begin))
627 Rewrite->handleModuleBegin(Tok);
628 } while (Tok.isNot(tok::eof));
629 Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
630 Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User, nullptr);
631 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User, nullptr);