1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code rewrites include invocations into their expansions. This gives you
11 // a file with all included files merged into it.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Rewrite/Frontend/Rewriters.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Frontend/PreprocessorOutputOptions.h"
18 #include "clang/Lex/HeaderSearch.h"
19 #include "clang/Lex/Pragma.h"
20 #include "clang/Lex/Preprocessor.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/raw_ostream.h"
24 using namespace clang;
29 class InclusionRewriter : public PPCallbacks {
30 /// Information about which #includes were actually performed,
31 /// created by preprocessor callbacks.
36 SrcMgr::CharacteristicKind FileType;
37 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
40 Preprocessor &PP; ///< Used to find inclusion directives.
41 SourceManager &SM; ///< Used to read and manage source files.
42 raw_ostream &OS; ///< The destination stream for rewritten contents.
43 StringRef MainEOL; ///< The line ending marker to use.
44 const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
45 bool ShowLineMarkers; ///< Show #line markers.
46 bool UseLineDirectives; ///< Use of line directives or line markers.
47 typedef std::map<unsigned, FileChange> FileChangeMap;
48 FileChangeMap FileChanges; ///< Tracks which files were included where.
49 /// Used transitively for building up the FileChanges mapping over the
50 /// various \c PPCallbacks callbacks.
51 FileChangeMap::iterator LastInsertedFileChange;
53 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
54 bool UseLineDirectives);
55 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
56 void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
57 PredefinesBuffer = Buf;
59 void detectMainFileEOL();
61 void FileChanged(SourceLocation Loc, FileChangeReason Reason,
62 SrcMgr::CharacteristicKind FileType,
63 FileID PrevFID) override;
64 void FileSkipped(const FileEntry &SkippedFile, const Token &FilenameTok,
65 SrcMgr::CharacteristicKind FileType) override;
66 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
67 StringRef FileName, bool IsAngled,
68 CharSourceRange FilenameRange, const FileEntry *File,
69 StringRef SearchPath, StringRef RelativePath,
70 const Module *Imported) override;
71 void WriteLineInfo(const char *Filename, int Line,
72 SrcMgr::CharacteristicKind FileType,
73 StringRef Extra = StringRef());
74 void WriteImplicitModuleImport(const Module *Mod);
75 void OutputContentUpTo(const MemoryBuffer &FromFile,
76 unsigned &WriteFrom, unsigned WriteTo,
77 StringRef EOL, int &lines,
79 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
80 const MemoryBuffer &FromFile, StringRef EOL,
81 unsigned &NextToWrite, int &Lines);
82 bool HandleHasInclude(FileID FileId, Lexer &RawLex,
83 const DirectoryLookup *Lookup, Token &Tok,
85 const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
86 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
89 } // end anonymous namespace
91 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
92 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
94 bool UseLineDirectives)
95 : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
96 PredefinesBuffer(nullptr), ShowLineMarkers(ShowLineMarkers),
97 UseLineDirectives(UseLineDirectives),
98 LastInsertedFileChange(FileChanges.end()) {}
100 /// Write appropriate line information as either #line directives or GNU line
101 /// markers depending on what mode we're in, including the \p Filename and
102 /// \p Line we are located at, using the specified \p EOL line separator, and
103 /// any \p Extra context specifiers in GNU line directives.
104 void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
105 SrcMgr::CharacteristicKind FileType,
107 if (!ShowLineMarkers)
109 if (UseLineDirectives) {
110 OS << "#line" << ' ' << Line << ' ' << '"';
111 OS.write_escaped(Filename);
114 // Use GNU linemarkers as described here:
115 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
116 OS << '#' << ' ' << Line << ' ' << '"';
117 OS.write_escaped(Filename);
121 if (FileType == SrcMgr::C_System)
122 // "`3' This indicates that the following text comes from a system header
123 // file, so certain warnings should be suppressed."
125 else if (FileType == SrcMgr::C_ExternCSystem)
126 // as above for `3', plus "`4' This indicates that the following text
127 // should be treated as being wrapped in an implicit extern "C" block."
133 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
134 OS << "@import " << Mod->getFullModuleName() << ";"
135 << " /* clang -frewrite-includes: implicit import */" << MainEOL;
138 /// FileChanged - Whenever the preprocessor enters or exits a #include file
139 /// it invokes this handler.
140 void InclusionRewriter::FileChanged(SourceLocation Loc,
141 FileChangeReason Reason,
142 SrcMgr::CharacteristicKind NewFileType,
144 if (Reason != EnterFile)
146 if (LastInsertedFileChange == FileChanges.end())
147 // we didn't reach this file (eg: the main file) via an inclusion directive
149 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
150 LastInsertedFileChange->second.FileType = NewFileType;
151 LastInsertedFileChange = FileChanges.end();
154 /// Called whenever an inclusion is skipped due to canonical header protection
156 void InclusionRewriter::FileSkipped(const FileEntry &/*SkippedFile*/,
157 const Token &/*FilenameTok*/,
158 SrcMgr::CharacteristicKind /*FileType*/) {
159 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
160 "found via an inclusion directive, was skipped");
161 FileChanges.erase(LastInsertedFileChange);
162 LastInsertedFileChange = FileChanges.end();
165 /// This should be called whenever the preprocessor encounters include
166 /// directives. It does not say whether the file has been included, but it
167 /// provides more information about the directive (hash location instead
168 /// of location inside the included file). It is assumed that the matching
169 /// FileChanged() or FileSkipped() is called after this.
170 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
171 const Token &/*IncludeTok*/,
172 StringRef /*FileName*/,
174 CharSourceRange /*FilenameRange*/,
175 const FileEntry * /*File*/,
176 StringRef /*SearchPath*/,
177 StringRef /*RelativePath*/,
178 const Module *Imported) {
179 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
180 "directive was found before the previous one was processed");
181 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
182 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
183 assert(p.second && "Unexpected revisitation of the same include directive");
185 LastInsertedFileChange = p.first;
188 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
189 /// an inclusion directive) in the map of inclusion information, FileChanges.
190 const InclusionRewriter::FileChange *
191 InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
192 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
193 if (I != FileChanges.end())
198 /// Detect the likely line ending style of \p FromFile by examining the first
199 /// newline found within it.
200 static StringRef DetectEOL(const MemoryBuffer &FromFile) {
201 // Detect what line endings the file uses, so that added content does not mix
202 // the style. We need to check for "\r\n" first because "\n\r" will match
204 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
207 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
209 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
214 void InclusionRewriter::detectMainFileEOL() {
216 const MemoryBuffer &FromFile = *SM.getBuffer(SM.getMainFileID(), &Invalid);
219 return; // Should never happen, but whatever.
220 MainEOL = DetectEOL(FromFile);
223 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
225 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
226 unsigned &WriteFrom, unsigned WriteTo,
227 StringRef LocalEOL, int &Line,
228 bool EnsureNewline) {
229 if (WriteTo <= WriteFrom)
231 if (&FromFile == PredefinesBuffer) {
232 // Ignore the #defines of the predefines buffer.
237 // If we would output half of a line ending, advance one character to output
238 // the whole line ending. All buffers are null terminated, so looking ahead
240 if (LocalEOL.size() == 2 &&
241 LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
242 LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
245 StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
246 WriteTo - WriteFrom);
248 if (MainEOL == LocalEOL) {
250 // count lines manually, it's faster than getPresumedLoc()
251 Line += TextToWrite.count(LocalEOL);
252 if (EnsureNewline && !TextToWrite.endswith(LocalEOL))
255 // Output the file one line at a time, rewriting the line endings as we go.
256 StringRef Rest = TextToWrite;
257 while (!Rest.empty()) {
259 std::tie(LineText, Rest) = Rest.split(LocalEOL);
265 if (TextToWrite.endswith(LocalEOL) || EnsureNewline)
271 /// Print characters from \p FromFile starting at \p NextToWrite up until the
272 /// inclusion directive at \p StartToken, then print out the inclusion
273 /// inclusion directive disabled by a #if directive, updating \p NextToWrite
274 /// and \p Line to track the number of source lines visited and the progress
275 /// through the \p FromFile buffer.
276 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
277 const Token &StartToken,
278 const MemoryBuffer &FromFile,
280 unsigned &NextToWrite, int &Line) {
281 OutputContentUpTo(FromFile, NextToWrite,
282 SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
284 Token DirectiveToken;
286 DirectiveLex.LexFromRawLexer(DirectiveToken);
287 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
288 if (&FromFile == PredefinesBuffer) {
289 // OutputContentUpTo() would not output anything anyway.
292 OS << "#if 0 /* expanded by -frewrite-includes */" << MainEOL;
293 OutputContentUpTo(FromFile, NextToWrite,
294 SM.getFileOffset(DirectiveToken.getLocation()) +
295 DirectiveToken.getLength(),
296 LocalEOL, Line, true);
297 OS << "#endif /* expanded by -frewrite-includes */" << MainEOL;
300 /// Find the next identifier in the pragma directive specified by \p RawToken.
301 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
303 RawLex.LexFromRawLexer(RawToken);
304 if (RawToken.is(tok::raw_identifier))
305 PP.LookUpIdentifierInfo(RawToken);
306 if (RawToken.is(tok::identifier))
307 return RawToken.getIdentifierInfo()->getName();
311 // Expand __has_include and __has_include_next if possible. If there's no
312 // definitive answer return false.
313 bool InclusionRewriter::HandleHasInclude(
314 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
316 // Lex the opening paren.
317 RawLex.LexFromRawLexer(Tok);
318 if (Tok.isNot(tok::l_paren))
321 RawLex.LexFromRawLexer(Tok);
323 SmallString<128> FilenameBuffer;
325 // Since the raw lexer doesn't give us angle_literals we have to parse them
327 // FIXME: What to do if the file name is a macro?
328 if (Tok.is(tok::less)) {
329 RawLex.LexFromRawLexer(Tok);
331 FilenameBuffer += '<';
333 if (Tok.is(tok::eod)) // Sanity check.
336 if (Tok.is(tok::raw_identifier))
337 PP.LookUpIdentifierInfo(Tok);
339 // Get the string piece.
340 SmallVector<char, 128> TmpBuffer;
341 bool Invalid = false;
342 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
346 FilenameBuffer += TmpName;
348 RawLex.LexFromRawLexer(Tok);
349 } while (Tok.isNot(tok::greater));
351 FilenameBuffer += '>';
352 Filename = FilenameBuffer;
354 if (Tok.isNot(tok::string_literal))
357 bool Invalid = false;
358 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
363 // Lex the closing paren.
364 RawLex.LexFromRawLexer(Tok);
365 if (Tok.isNot(tok::r_paren))
368 // Now ask HeaderInfo if it knows about the header.
369 // FIXME: Subframeworks aren't handled here. Do we care?
370 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
371 const DirectoryLookup *CurDir;
372 const FileEntry *FileEnt = PP.getSourceManager().getFileEntryForID(FileId);
373 SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 1>
375 Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
376 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
377 Filename, SourceLocation(), isAngled, nullptr, CurDir, Includers, nullptr,
378 nullptr, nullptr, false);
380 FileExists = File != nullptr;
384 /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
385 /// and including content of included files recursively.
386 bool InclusionRewriter::Process(FileID FileId,
387 SrcMgr::CharacteristicKind FileType)
390 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
391 if (Invalid) // invalid inclusion
393 const char *FileName = FromFile.getBufferIdentifier();
394 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
395 RawLex.SetCommentRetentionState(false);
397 StringRef LocalEOL = DetectEOL(FromFile);
399 // Per the GNU docs: "1" indicates entering a new file.
400 if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
401 WriteLineInfo(FileName, 1, FileType, "");
403 WriteLineInfo(FileName, 1, FileType, " 1");
405 if (SM.getFileIDSize(FileId) == 0)
408 // The next byte to be copied from the source file, which may be non-zero if
409 // the lexer handled a BOM.
410 unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
411 assert(SM.getLineNumber(FileId, NextToWrite) == 1);
412 int Line = 1; // The current input file line number.
415 RawLex.LexFromRawLexer(RawToken);
417 // TODO: Consider adding a switch that strips possibly unimportant content,
418 // such as comments, to reduce the size of repro files.
419 while (RawToken.isNot(tok::eof)) {
420 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
421 RawLex.setParsingPreprocessorDirective(true);
422 Token HashToken = RawToken;
423 RawLex.LexFromRawLexer(RawToken);
424 if (RawToken.is(tok::raw_identifier))
425 PP.LookUpIdentifierInfo(RawToken);
426 if (RawToken.getIdentifierInfo() != nullptr) {
427 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
428 case tok::pp_include:
429 case tok::pp_include_next:
430 case tok::pp_import: {
431 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, NextToWrite,
433 if (FileId != PP.getPredefinesFileID())
434 WriteLineInfo(FileName, Line - 1, FileType, "");
435 StringRef LineInfoExtra;
436 if (const FileChange *Change = FindFileChangeLocation(
437 HashToken.getLocation())) {
439 WriteImplicitModuleImport(Change->Mod);
441 // else now include and recursively process the file
442 } else if (Process(Change->Id, Change->FileType)) {
443 // and set lineinfo back to this file, if the nested one was
445 // `2' indicates returning to a file (after having included
447 LineInfoExtra = " 2";
450 // fix up lineinfo (since commented out directive changed line
451 // numbers) for inclusions that were skipped due to header guards
452 WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
455 case tok::pp_pragma: {
456 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
457 if (Identifier == "clang" || Identifier == "GCC") {
458 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
459 // keep the directive in, commented out
460 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
462 // update our own type
463 FileType = SM.getFileCharacteristic(RawToken.getLocation());
464 WriteLineInfo(FileName, Line, FileType);
466 } else if (Identifier == "once") {
467 // keep the directive in, commented out
468 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
470 WriteLineInfo(FileName, Line, FileType);
476 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
478 // Rewrite special builtin macros to avoid pulling in host details.
480 // Walk over the directive.
481 RawLex.LexFromRawLexer(RawToken);
482 if (RawToken.is(tok::raw_identifier))
483 PP.LookUpIdentifierInfo(RawToken);
485 if (RawToken.is(tok::identifier)) {
487 SourceLocation Loc = RawToken.getLocation();
489 // Rewrite __has_include(x)
490 if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
491 if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken,
494 // Rewrite __has_include_next(x)
495 } else if (RawToken.getIdentifierInfo()->isStr(
496 "__has_include_next")) {
497 const DirectoryLookup *Lookup = PP.GetCurDirLookup();
501 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
507 // Replace the macro with (0) or (1), followed by the commented
508 // out macro for reference.
509 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
510 LocalEOL, Line, false);
511 OS << '(' << (int) HasFile << ")/*";
512 OutputContentUpTo(FromFile, NextToWrite,
513 SM.getFileOffset(RawToken.getLocation()) +
514 RawToken.getLength(),
515 LocalEOL, Line, false);
518 } while (RawToken.isNot(tok::eod));
520 OutputContentUpTo(FromFile, NextToWrite,
521 SM.getFileOffset(RawToken.getLocation()) +
522 RawToken.getLength(),
523 LocalEOL, Line, /*EnsureNewline=*/ true);
524 WriteLineInfo(FileName, Line, FileType);
530 // We surround every #include by #if 0 to comment it out, but that
531 // changes line numbers. These are fixed up right after that, but
532 // the whole #include could be inside a preprocessor conditional
533 // that is not processed. So it is necessary to fix the line
534 // numbers one the next line after each #else/#endif as well.
535 RawLex.SetKeepWhitespaceMode(true);
537 RawLex.LexFromRawLexer(RawToken);
538 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
539 OutputContentUpTo(FromFile, NextToWrite,
540 SM.getFileOffset(RawToken.getLocation()) +
541 RawToken.getLength(),
542 LocalEOL, Line, /*EnsureNewline=*/ true);
543 WriteLineInfo(FileName, Line, FileType);
544 RawLex.SetKeepWhitespaceMode(false);
550 RawLex.setParsingPreprocessorDirective(false);
552 RawLex.LexFromRawLexer(RawToken);
554 OutputContentUpTo(FromFile, NextToWrite,
555 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
556 Line, /*EnsureNewline=*/true);
560 /// InclusionRewriterInInput - Implement -frewrite-includes mode.
561 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
562 const PreprocessorOutputOptions &Opts) {
563 SourceManager &SM = PP.getSourceManager();
564 InclusionRewriter *Rewrite = new InclusionRewriter(
565 PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);
566 Rewrite->detectMainFileEOL();
568 PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));
571 // First let the preprocessor process the entire file and call callbacks.
572 // Callbacks will record which #include's were actually performed.
573 PP.EnterMainSourceFile();
575 // Only preprocessor directives matter here, so disable macro expansion
576 // everywhere else as an optimization.
577 // TODO: It would be even faster if the preprocessor could be switched
578 // to a mode where it would parse only preprocessor directives and comments,
579 // nothing else matters for parsing or processing.
580 PP.SetMacroExpansionOnlyInDirectives();
583 } while (Tok.isNot(tok::eof));
584 Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
585 Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
586 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);