1 //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code rewrites include invocations into their expansions. This gives you
11 // a file with all included files merged into it.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Rewrite/Frontend/Rewriters.h"
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Frontend/PreprocessorOutputOptions.h"
18 #include "clang/Lex/HeaderSearch.h"
19 #include "clang/Lex/Pragma.h"
20 #include "clang/Lex/Preprocessor.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/raw_ostream.h"
24 using namespace clang;
29 class InclusionRewriter : public PPCallbacks {
30 /// Information about which #includes were actually performed,
31 /// created by preprocessor callbacks.
36 SrcMgr::CharacteristicKind FileType;
37 FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
40 Preprocessor &PP; ///< Used to find inclusion directives.
41 SourceManager &SM; ///< Used to read and manage source files.
42 raw_ostream &OS; ///< The destination stream for rewritten contents.
43 const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
44 bool ShowLineMarkers; ///< Show #line markers.
45 bool UseLineDirective; ///< Use of line directives or line markers.
46 typedef std::map<unsigned, FileChange> FileChangeMap;
47 FileChangeMap FileChanges; ///< Tracks which files were included where.
48 /// Used transitively for building up the FileChanges mapping over the
49 /// various \c PPCallbacks callbacks.
50 FileChangeMap::iterator LastInsertedFileChange;
52 InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
53 bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
54 void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
55 PredefinesBuffer = Buf;
58 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
59 SrcMgr::CharacteristicKind FileType,
61 virtual void FileSkipped(const FileEntry &ParentFile,
62 const Token &FilenameTok,
63 SrcMgr::CharacteristicKind FileType);
64 virtual void InclusionDirective(SourceLocation HashLoc,
65 const Token &IncludeTok,
68 CharSourceRange FilenameRange,
69 const FileEntry *File,
71 StringRef RelativePath,
72 const Module *Imported);
73 void WriteLineInfo(const char *Filename, int Line,
74 SrcMgr::CharacteristicKind FileType,
75 StringRef EOL, StringRef Extra = StringRef());
76 void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
77 void OutputContentUpTo(const MemoryBuffer &FromFile,
78 unsigned &WriteFrom, unsigned WriteTo,
79 StringRef EOL, int &lines,
80 bool EnsureNewline = false);
81 void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
82 const MemoryBuffer &FromFile, StringRef EOL,
83 unsigned &NextToWrite, int &Lines);
84 bool HandleHasInclude(FileID FileId, Lexer &RawLex,
85 const DirectoryLookup *Lookup, Token &Tok,
87 const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
88 StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
91 } // end anonymous namespace
93 /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
94 InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
96 : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(0),
97 ShowLineMarkers(ShowLineMarkers),
98 LastInsertedFileChange(FileChanges.end()) {
99 // If we're in microsoft mode, use normal #line instead of line markers.
100 UseLineDirective = PP.getLangOpts().MicrosoftExt;
103 /// Write appropriate line information as either #line directives or GNU line
104 /// markers depending on what mode we're in, including the \p Filename and
105 /// \p Line we are located at, using the specified \p EOL line separator, and
106 /// any \p Extra context specifiers in GNU line directives.
107 void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
108 SrcMgr::CharacteristicKind FileType,
109 StringRef EOL, StringRef Extra) {
110 if (!ShowLineMarkers)
112 if (UseLineDirective) {
113 OS << "#line" << ' ' << Line << ' ' << '"';
114 OS.write_escaped(Filename);
117 // Use GNU linemarkers as described here:
118 // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
119 OS << '#' << ' ' << Line << ' ' << '"';
120 OS.write_escaped(Filename);
124 if (FileType == SrcMgr::C_System)
125 // "`3' This indicates that the following text comes from a system header
126 // file, so certain warnings should be suppressed."
128 else if (FileType == SrcMgr::C_ExternCSystem)
129 // as above for `3', plus "`4' This indicates that the following text
130 // should be treated as being wrapped in an implicit extern "C" block."
136 void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
138 OS << "@import " << Mod->getFullModuleName() << ";"
139 << " /* clang -frewrite-includes: implicit import */" << EOL;
142 /// FileChanged - Whenever the preprocessor enters or exits a #include file
143 /// it invokes this handler.
144 void InclusionRewriter::FileChanged(SourceLocation Loc,
145 FileChangeReason Reason,
146 SrcMgr::CharacteristicKind NewFileType,
148 if (Reason != EnterFile)
150 if (LastInsertedFileChange == FileChanges.end())
151 // we didn't reach this file (eg: the main file) via an inclusion directive
153 LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
154 LastInsertedFileChange->second.FileType = NewFileType;
155 LastInsertedFileChange = FileChanges.end();
158 /// Called whenever an inclusion is skipped due to canonical header protection
160 void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
161 const Token &/*FilenameTok*/,
162 SrcMgr::CharacteristicKind /*FileType*/) {
163 assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
164 "found via an inclusion directive, was skipped");
165 FileChanges.erase(LastInsertedFileChange);
166 LastInsertedFileChange = FileChanges.end();
169 /// This should be called whenever the preprocessor encounters include
170 /// directives. It does not say whether the file has been included, but it
171 /// provides more information about the directive (hash location instead
172 /// of location inside the included file). It is assumed that the matching
173 /// FileChanged() or FileSkipped() is called after this.
174 void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
175 const Token &/*IncludeTok*/,
176 StringRef /*FileName*/,
178 CharSourceRange /*FilenameRange*/,
179 const FileEntry * /*File*/,
180 StringRef /*SearchPath*/,
181 StringRef /*RelativePath*/,
182 const Module *Imported) {
183 assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
184 "directive was found before the previous one was processed");
185 std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
186 std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
187 assert(p.second && "Unexpected revisitation of the same include directive");
189 LastInsertedFileChange = p.first;
192 /// Simple lookup for a SourceLocation (specifically one denoting the hash in
193 /// an inclusion directive) in the map of inclusion information, FileChanges.
194 const InclusionRewriter::FileChange *
195 InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
196 FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
197 if (I != FileChanges.end())
202 /// Detect the likely line ending style of \p FromFile by examining the first
203 /// newline found within it.
204 static StringRef DetectEOL(const MemoryBuffer &FromFile) {
205 // detect what line endings the file uses, so that added content does not mix
207 const char *Pos = strchr(FromFile.getBufferStart(), '\n');
210 if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
212 if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
217 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
219 void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
220 unsigned &WriteFrom, unsigned WriteTo,
221 StringRef EOL, int &Line,
222 bool EnsureNewline) {
223 if (WriteTo <= WriteFrom)
225 if (&FromFile == PredefinesBuffer) {
226 // Ignore the #defines of the predefines buffer.
230 OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
231 // count lines manually, it's faster than getPresumedLoc()
232 Line += std::count(FromFile.getBufferStart() + WriteFrom,
233 FromFile.getBufferStart() + WriteTo, '\n');
235 char LastChar = FromFile.getBufferStart()[WriteTo - 1];
236 if (LastChar != '\n' && LastChar != '\r')
242 /// Print characters from \p FromFile starting at \p NextToWrite up until the
243 /// inclusion directive at \p StartToken, then print out the inclusion
244 /// inclusion directive disabled by a #if directive, updating \p NextToWrite
245 /// and \p Line to track the number of source lines visited and the progress
246 /// through the \p FromFile buffer.
247 void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
248 const Token &StartToken,
249 const MemoryBuffer &FromFile,
251 unsigned &NextToWrite, int &Line) {
252 OutputContentUpTo(FromFile, NextToWrite,
253 SM.getFileOffset(StartToken.getLocation()), EOL, Line);
254 Token DirectiveToken;
256 DirectiveLex.LexFromRawLexer(DirectiveToken);
257 } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
258 OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
259 OutputContentUpTo(FromFile, NextToWrite,
260 SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
262 OS << "#endif /* expanded by -frewrite-includes */" << EOL;
265 /// Find the next identifier in the pragma directive specified by \p RawToken.
266 StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
268 RawLex.LexFromRawLexer(RawToken);
269 if (RawToken.is(tok::raw_identifier))
270 PP.LookUpIdentifierInfo(RawToken);
271 if (RawToken.is(tok::identifier))
272 return RawToken.getIdentifierInfo()->getName();
276 // Expand __has_include and __has_include_next if possible. If there's no
277 // definitive answer return false.
278 bool InclusionRewriter::HandleHasInclude(
279 FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
281 // Lex the opening paren.
282 RawLex.LexFromRawLexer(Tok);
283 if (Tok.isNot(tok::l_paren))
286 RawLex.LexFromRawLexer(Tok);
288 SmallString<128> FilenameBuffer;
290 // Since the raw lexer doesn't give us angle_literals we have to parse them
292 // FIXME: What to do if the file name is a macro?
293 if (Tok.is(tok::less)) {
294 RawLex.LexFromRawLexer(Tok);
296 FilenameBuffer += '<';
298 if (Tok.is(tok::eod)) // Sanity check.
301 if (Tok.is(tok::raw_identifier))
302 PP.LookUpIdentifierInfo(Tok);
304 // Get the string piece.
305 SmallVector<char, 128> TmpBuffer;
306 bool Invalid = false;
307 StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
311 FilenameBuffer += TmpName;
313 RawLex.LexFromRawLexer(Tok);
314 } while (Tok.isNot(tok::greater));
316 FilenameBuffer += '>';
317 Filename = FilenameBuffer;
319 if (Tok.isNot(tok::string_literal))
322 bool Invalid = false;
323 Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
328 // Lex the closing paren.
329 RawLex.LexFromRawLexer(Tok);
330 if (Tok.isNot(tok::r_paren))
333 // Now ask HeaderInfo if it knows about the header.
334 // FIXME: Subframeworks aren't handled here. Do we care?
335 bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
336 const DirectoryLookup *CurDir;
337 const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
338 Filename, isAngled, 0, CurDir,
339 PP.getSourceManager().getFileEntryForID(FileId), 0, 0, 0, false);
341 FileExists = File != 0;
345 /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
346 /// and including content of included files recursively.
347 bool InclusionRewriter::Process(FileID FileId,
348 SrcMgr::CharacteristicKind FileType)
351 const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
352 if (Invalid) // invalid inclusion
354 const char *FileName = FromFile.getBufferIdentifier();
355 Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
356 RawLex.SetCommentRetentionState(false);
358 StringRef EOL = DetectEOL(FromFile);
360 // Per the GNU docs: "1" indicates the start of a new file.
361 WriteLineInfo(FileName, 1, FileType, EOL, " 1");
363 if (SM.getFileIDSize(FileId) == 0)
366 // The next byte to be copied from the source file
367 unsigned NextToWrite = 0;
368 int Line = 1; // The current input file line number.
370 // Ignore UTF-8 BOM, otherwise it'd end up somewhere else than the start
371 // of the resulting file.
372 if (FromFile.getBuffer().startswith("\xEF\xBB\xBF"))
376 RawLex.LexFromRawLexer(RawToken);
378 // TODO: Consider adding a switch that strips possibly unimportant content,
379 // such as comments, to reduce the size of repro files.
380 while (RawToken.isNot(tok::eof)) {
381 if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
382 RawLex.setParsingPreprocessorDirective(true);
383 Token HashToken = RawToken;
384 RawLex.LexFromRawLexer(RawToken);
385 if (RawToken.is(tok::raw_identifier))
386 PP.LookUpIdentifierInfo(RawToken);
387 if (RawToken.getIdentifierInfo() != NULL) {
388 switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
389 case tok::pp_include:
390 case tok::pp_include_next:
391 case tok::pp_import: {
392 CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
394 StringRef LineInfoExtra;
395 if (const FileChange *Change = FindFileChangeLocation(
396 HashToken.getLocation())) {
398 WriteImplicitModuleImport(Change->Mod, EOL);
400 // else now include and recursively process the file
401 } else if (Process(Change->Id, Change->FileType)) {
402 // and set lineinfo back to this file, if the nested one was
404 // `2' indicates returning to a file (after having included
406 LineInfoExtra = " 2";
409 // fix up lineinfo (since commented out directive changed line
410 // numbers) for inclusions that were skipped due to header guards
411 WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
414 case tok::pp_pragma: {
415 StringRef Identifier = NextIdentifierName(RawLex, RawToken);
416 if (Identifier == "clang" || Identifier == "GCC") {
417 if (NextIdentifierName(RawLex, RawToken) == "system_header") {
418 // keep the directive in, commented out
419 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
421 // update our own type
422 FileType = SM.getFileCharacteristic(RawToken.getLocation());
423 WriteLineInfo(FileName, Line, FileType, EOL);
425 } else if (Identifier == "once") {
426 // keep the directive in, commented out
427 CommentOutDirective(RawLex, HashToken, FromFile, EOL,
429 WriteLineInfo(FileName, Line, FileType, EOL);
435 bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
437 // Rewrite special builtin macros to avoid pulling in host details.
439 // Walk over the directive.
440 RawLex.LexFromRawLexer(RawToken);
441 if (RawToken.is(tok::raw_identifier))
442 PP.LookUpIdentifierInfo(RawToken);
444 if (RawToken.is(tok::identifier)) {
446 SourceLocation Loc = RawToken.getLocation();
448 // Rewrite __has_include(x)
449 if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
450 if (!HandleHasInclude(FileId, RawLex, 0, RawToken, HasFile))
452 // Rewrite __has_include_next(x)
453 } else if (RawToken.getIdentifierInfo()->isStr(
454 "__has_include_next")) {
455 const DirectoryLookup *Lookup = PP.GetCurDirLookup();
459 if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
465 // Replace the macro with (0) or (1), followed by the commented
466 // out macro for reference.
467 OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
469 OS << '(' << (int) HasFile << ")/*";
470 OutputContentUpTo(FromFile, NextToWrite,
471 SM.getFileOffset(RawToken.getLocation()) +
472 RawToken.getLength(),
476 } while (RawToken.isNot(tok::eod));
478 OutputContentUpTo(FromFile, NextToWrite,
479 SM.getFileOffset(RawToken.getLocation()) +
480 RawToken.getLength(),
481 EOL, Line, /*EnsureNewLine*/ true);
482 WriteLineInfo(FileName, Line, FileType, EOL);
488 // We surround every #include by #if 0 to comment it out, but that
489 // changes line numbers. These are fixed up right after that, but
490 // the whole #include could be inside a preprocessor conditional
491 // that is not processed. So it is necessary to fix the line
492 // numbers one the next line after each #else/#endif as well.
493 RawLex.SetKeepWhitespaceMode(true);
495 RawLex.LexFromRawLexer(RawToken);
496 } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
498 FromFile, NextToWrite,
499 SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
500 EOL, Line, /*EnsureNewLine*/ true);
501 WriteLineInfo(FileName, Line, FileType, EOL);
502 RawLex.SetKeepWhitespaceMode(false);
508 RawLex.setParsingPreprocessorDirective(false);
510 RawLex.LexFromRawLexer(RawToken);
512 OutputContentUpTo(FromFile, NextToWrite,
513 SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
514 /*EnsureNewline*/true);
518 /// InclusionRewriterInInput - Implement -frewrite-includes mode.
519 void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
520 const PreprocessorOutputOptions &Opts) {
521 SourceManager &SM = PP.getSourceManager();
522 InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
523 Opts.ShowLineMarkers);
524 PP.addPPCallbacks(Rewrite);
525 // Ignore all pragmas, otherwise there will be warnings about unknown pragmas
526 // (because there's nothing to handle them).
527 PP.AddPragmaHandler(new EmptyPragmaHandler());
528 // Ignore also all pragma in all namespaces created
529 // in Preprocessor::RegisterBuiltinPragmas().
530 PP.AddPragmaHandler("GCC", new EmptyPragmaHandler());
531 PP.AddPragmaHandler("clang", new EmptyPragmaHandler());
533 // First let the preprocessor process the entire file and call callbacks.
534 // Callbacks will record which #include's were actually performed.
535 PP.EnterMainSourceFile();
537 // Only preprocessor directives matter here, so disable macro expansion
538 // everywhere else as an optimization.
539 // TODO: It would be even faster if the preprocessor could be switched
540 // to a mode where it would parse only preprocessor directives and comments,
541 // nothing else matters for parsing or processing.
542 PP.SetMacroExpansionOnlyInDirectives();
545 } while (Tok.isNot(tok::eof));
546 Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
547 Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
548 Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);