1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This code simply runs the preprocessor on the input file and prints out the
11 // result. This is the traditional behavior of the -E option.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Frontend/Utils.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Frontend/PreprocessorOutputOptions.h"
20 #include "clang/Lex/MacroInfo.h"
21 #include "clang/Lex/PPCallbacks.h"
22 #include "clang/Lex/Pragma.h"
23 #include "clang/Lex/Preprocessor.h"
24 #include "clang/Lex/TokenConcatenation.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Support/ErrorHandling.h"
29 #include "llvm/Support/raw_ostream.h"
31 using namespace clang;
33 /// PrintMacroDefinition - Print a macro definition in a form that will be
34 /// properly accepted back as a definition.
35 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
36 Preprocessor &PP, raw_ostream &OS) {
37 OS << "#define " << II.getName();
39 if (MI.isFunctionLike()) {
41 if (!MI.arg_empty()) {
42 MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
43 for (; AI+1 != E; ++AI) {
44 OS << (*AI)->getName();
49 if ((*AI)->getName() == "__VA_ARGS__")
52 OS << (*AI)->getName();
55 if (MI.isGNUVarargs())
56 OS << "..."; // #define foo(x...)
61 // GCC always emits a space, even if the macro body is empty. However, do not
62 // want to emit two spaces if the first token has a leading space.
63 if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
66 SmallString<128> SpellingBuffer;
67 for (MacroInfo::tokens_iterator I = MI.tokens_begin(), E = MI.tokens_end();
69 if (I->hasLeadingSpace())
72 OS << PP.getSpelling(*I, SpellingBuffer);
76 //===----------------------------------------------------------------------===//
77 // Preprocessed token printer
78 //===----------------------------------------------------------------------===//
81 class PrintPPOutputPPCallbacks : public PPCallbacks {
84 TokenConcatenation ConcatInfo;
90 bool EmittedTokensOnThisLine;
91 bool EmittedDirectiveOnThisLine;
92 SrcMgr::CharacteristicKind FileType;
93 SmallString<512> CurFilename;
95 bool DisableLineMarkers;
97 bool UseLineDirective;
98 bool IsFirstFileEntered;
100 PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os,
101 bool lineMarkers, bool defines)
102 : PP(pp), SM(PP.getSourceManager()),
103 ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers),
104 DumpDefines(defines) {
106 CurFilename += "<uninit>";
107 EmittedTokensOnThisLine = false;
108 EmittedDirectiveOnThisLine = false;
109 FileType = SrcMgr::C_User;
111 IsFirstFileEntered = false;
113 // If we're in microsoft mode, use normal #line instead of line markers.
114 UseLineDirective = PP.getLangOpts().MicrosoftExt;
117 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
118 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
120 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
121 bool hasEmittedDirectiveOnThisLine() const {
122 return EmittedDirectiveOnThisLine;
125 bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true);
127 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
128 SrcMgr::CharacteristicKind FileType,
130 virtual void InclusionDirective(SourceLocation HashLoc,
131 const Token &IncludeTok,
134 CharSourceRange FilenameRange,
135 const FileEntry *File,
136 StringRef SearchPath,
137 StringRef RelativePath,
138 const Module *Imported);
139 virtual void Ident(SourceLocation Loc, const std::string &str);
140 virtual void PragmaCaptured(SourceLocation Loc, StringRef Str);
141 virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
142 const std::string &Str);
143 virtual void PragmaMessage(SourceLocation Loc, StringRef Namespace,
144 PragmaMessageKind Kind, StringRef Str);
145 virtual void PragmaDebug(SourceLocation Loc, StringRef DebugType);
146 virtual void PragmaDiagnosticPush(SourceLocation Loc,
147 StringRef Namespace);
148 virtual void PragmaDiagnosticPop(SourceLocation Loc,
149 StringRef Namespace);
150 virtual void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
151 diag::Mapping Map, StringRef Str);
153 bool HandleFirstTokOnLine(Token &Tok);
155 /// Move to the line of the provided source location. This will
156 /// return true if the output stream required adjustment or if
157 /// the requested location is on the first line.
158 bool MoveToLine(SourceLocation Loc) {
159 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
160 if (PLoc.isInvalid())
162 return MoveToLine(PLoc.getLine()) || (PLoc.getLine() == 1);
164 bool MoveToLine(unsigned LineNo);
166 bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
168 return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
170 void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
171 bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
172 void HandleNewlinesInToken(const char *TokStr, unsigned Len);
174 /// MacroDefined - This hook is called whenever a macro definition is seen.
175 void MacroDefined(const Token &MacroNameTok, const MacroDirective *MD);
177 /// MacroUndefined - This hook is called whenever a macro #undef is seen.
178 void MacroUndefined(const Token &MacroNameTok, const MacroDirective *MD);
180 } // end anonymous namespace
182 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
185 startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
187 // Emit #line directives or GNU line markers depending on what mode we're in.
188 if (UseLineDirective) {
189 OS << "#line" << ' ' << LineNo << ' ' << '"';
190 OS.write(CurFilename.data(), CurFilename.size());
193 OS << '#' << ' ' << LineNo << ' ' << '"';
194 OS.write(CurFilename.data(), CurFilename.size());
198 OS.write(Extra, ExtraLen);
200 if (FileType == SrcMgr::C_System)
202 else if (FileType == SrcMgr::C_ExternCSystem)
208 /// MoveToLine - Move the output to the source line specified by the location
209 /// object. We can do this by emitting some number of \n's, or be emitting a
210 /// #line directive. This returns false if already at the specified line, true
211 /// if some newlines were emitted.
212 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
213 // If this line is "close enough" to the original line, just print newlines,
214 // otherwise print a #line directive.
215 if (LineNo-CurLine <= 8) {
216 if (LineNo-CurLine == 1)
218 else if (LineNo == CurLine)
219 return false; // Spelling line moved, but expansion line didn't.
221 const char *NewLines = "\n\n\n\n\n\n\n\n";
222 OS.write(NewLines, LineNo-CurLine);
224 } else if (!DisableLineMarkers) {
225 // Emit a #line or line marker.
226 WriteLineInfo(LineNo, 0, 0);
228 // Okay, we're in -P mode, which turns off line markers. However, we still
229 // need to emit a newline between tokens on different lines.
230 startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
238 PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) {
239 if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
241 EmittedTokensOnThisLine = false;
242 EmittedDirectiveOnThisLine = false;
243 if (ShouldUpdateCurrentLine)
251 /// FileChanged - Whenever the preprocessor enters or exits a #include file
252 /// it invokes this handler. Update our conception of the current source
254 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
255 FileChangeReason Reason,
256 SrcMgr::CharacteristicKind NewFileType,
258 // Unless we are exiting a #include, make sure to skip ahead to the line the
259 // #include directive was at.
260 SourceManager &SourceMgr = SM;
262 PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
263 if (UserLoc.isInvalid())
266 unsigned NewLine = UserLoc.getLine();
268 if (Reason == PPCallbacks::EnterFile) {
269 SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
270 if (IncludeLoc.isValid())
271 MoveToLine(IncludeLoc);
272 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
273 // GCC emits the # directive for this directive on the line AFTER the
274 // directive and emits a bunch of spaces that aren't needed. This is because
275 // otherwise we will emit a line marker for THIS line, which requires an
276 // extra blank line after the directive to avoid making all following lines
277 // off by one. We can do better by simply incrementing NewLine here.
284 CurFilename += UserLoc.getFilename();
285 Lexer::Stringify(CurFilename);
286 FileType = NewFileType;
288 if (DisableLineMarkers) {
289 startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false);
294 WriteLineInfo(CurLine);
298 // Do not emit an enter marker for the main file (which we expect is the first
299 // entered file). This matches gcc, and improves compatibility with some tools
300 // which track the # line markers as a way to determine when the preprocessed
301 // output is in the context of the main file.
302 if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
303 IsFirstFileEntered = true;
308 case PPCallbacks::EnterFile:
309 WriteLineInfo(CurLine, " 1", 2);
311 case PPCallbacks::ExitFile:
312 WriteLineInfo(CurLine, " 2", 2);
314 case PPCallbacks::SystemHeaderPragma:
315 case PPCallbacks::RenameFile:
316 WriteLineInfo(CurLine);
321 void PrintPPOutputPPCallbacks::InclusionDirective(SourceLocation HashLoc,
322 const Token &IncludeTok,
325 CharSourceRange FilenameRange,
326 const FileEntry *File,
327 StringRef SearchPath,
328 StringRef RelativePath,
329 const Module *Imported) {
330 // When preprocessing, turn implicit imports into @imports.
331 // FIXME: This is a stop-gap until a more comprehensive "preprocessing with
332 // modules" solution is introduced.
334 startNewLineIfNeeded();
336 OS << "@import " << Imported->getFullModuleName() << ";"
337 << " /* clang -E: implicit import for \"" << File->getName() << "\" */";
338 EmittedTokensOnThisLine = true;
342 /// Ident - Handle #ident directives when read by the preprocessor.
344 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
347 OS.write("#ident ", strlen("#ident "));
348 OS.write(&S[0], S.size());
349 EmittedTokensOnThisLine = true;
352 void PrintPPOutputPPCallbacks::PragmaCaptured(SourceLocation Loc,
354 startNewLineIfNeeded();
356 OS << "#pragma captured";
358 setEmittedDirectiveOnThisLine();
361 /// MacroDefined - This hook is called whenever a macro definition is seen.
362 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
363 const MacroDirective *MD) {
364 const MacroInfo *MI = MD->getMacroInfo();
365 // Only print out macro definitions in -dD mode.
367 // Ignore __FILE__ etc.
368 MI->isBuiltinMacro()) return;
370 MoveToLine(MI->getDefinitionLoc());
371 PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
372 setEmittedDirectiveOnThisLine();
375 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
376 const MacroDirective *MD) {
377 // Only print out macro definitions in -dD mode.
378 if (!DumpDefines) return;
380 MoveToLine(MacroNameTok.getLocation());
381 OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
382 setEmittedDirectiveOnThisLine();
385 void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
386 const IdentifierInfo *Kind,
387 const std::string &Str) {
388 startNewLineIfNeeded();
390 OS << "#pragma comment(" << Kind->getName();
395 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
396 unsigned char Char = Str[i];
397 if (isPrintable(Char) && Char != '\\' && Char != '"')
399 else // Output anything hard as an octal escape.
401 << (char)('0'+ ((Char >> 6) & 7))
402 << (char)('0'+ ((Char >> 3) & 7))
403 << (char)('0'+ ((Char >> 0) & 7));
409 setEmittedDirectiveOnThisLine();
412 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
414 PragmaMessageKind Kind,
416 startNewLineIfNeeded();
419 if (!Namespace.empty())
420 OS << Namespace << ' ';
433 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
434 unsigned char Char = Str[i];
435 if (isPrintable(Char) && Char != '\\' && Char != '"')
437 else // Output anything hard as an octal escape.
439 << (char)('0'+ ((Char >> 6) & 7))
440 << (char)('0'+ ((Char >> 3) & 7))
441 << (char)('0'+ ((Char >> 0) & 7));
444 if (Kind == PMK_Message)
446 setEmittedDirectiveOnThisLine();
449 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
450 StringRef DebugType) {
451 startNewLineIfNeeded();
454 OS << "#pragma clang __debug ";
457 setEmittedDirectiveOnThisLine();
460 void PrintPPOutputPPCallbacks::
461 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
462 startNewLineIfNeeded();
464 OS << "#pragma " << Namespace << " diagnostic push";
465 setEmittedDirectiveOnThisLine();
468 void PrintPPOutputPPCallbacks::
469 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
470 startNewLineIfNeeded();
472 OS << "#pragma " << Namespace << " diagnostic pop";
473 setEmittedDirectiveOnThisLine();
476 void PrintPPOutputPPCallbacks::
477 PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
478 diag::Mapping Map, StringRef Str) {
479 startNewLineIfNeeded();
481 OS << "#pragma " << Namespace << " diagnostic ";
483 case diag::MAP_WARNING:
486 case diag::MAP_ERROR:
489 case diag::MAP_IGNORE:
492 case diag::MAP_FATAL:
496 OS << " \"" << Str << '"';
497 setEmittedDirectiveOnThisLine();
500 /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
501 /// is called for the first token on each new line. If this really is the start
502 /// of a new logical line, handle it and return true, otherwise return false.
503 /// This may not be the start of a logical line because the "start of line"
504 /// marker is set for spelling lines, not expansion ones.
505 bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
506 // Figure out what line we went to and insert the appropriate number of
507 // newline characters.
508 if (!MoveToLine(Tok.getLocation()))
511 // Print out space characters so that the first token on a line is
512 // indented for easy reading.
513 unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
515 // This hack prevents stuff like:
517 // HASH define foo bar
518 // From having the # character end up at column 1, which makes it so it
519 // is not handled as a #define next time through the preprocessor if in
520 // -fpreprocessed mode.
521 if (ColNo <= 1 && Tok.is(tok::hash))
524 // Otherwise, indent the appropriate number of spaces.
525 for (; ColNo > 1; --ColNo)
531 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
533 unsigned NumNewlines = 0;
534 for (; Len; --Len, ++TokStr) {
535 if (*TokStr != '\n' &&
541 // If we have \n\r or \r\n, skip both and count as one line.
543 (TokStr[1] == '\n' || TokStr[1] == '\r') &&
544 TokStr[0] != TokStr[1])
548 if (NumNewlines == 0) return;
550 CurLine += NumNewlines;
555 struct UnknownPragmaHandler : public PragmaHandler {
557 PrintPPOutputPPCallbacks *Callbacks;
559 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
560 : Prefix(prefix), Callbacks(callbacks) {}
561 virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
563 // Figure out what line we went to and insert the appropriate number of
564 // newline characters.
565 Callbacks->startNewLineIfNeeded();
566 Callbacks->MoveToLine(PragmaTok.getLocation());
567 Callbacks->OS.write(Prefix, strlen(Prefix));
568 // Read and print all of the pragma tokens.
569 while (PragmaTok.isNot(tok::eod)) {
570 if (PragmaTok.hasLeadingSpace())
571 Callbacks->OS << ' ';
572 std::string TokSpell = PP.getSpelling(PragmaTok);
573 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
574 PP.LexUnexpandedToken(PragmaTok);
576 Callbacks->setEmittedDirectiveOnThisLine();
579 } // end anonymous namespace
582 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
583 PrintPPOutputPPCallbacks *Callbacks,
585 bool DropComments = PP.getLangOpts().TraditionalCPP &&
586 !PP.getCommentRetentionState();
589 Token PrevPrevTok, PrevTok;
590 PrevPrevTok.startToken();
591 PrevTok.startToken();
593 if (Callbacks->hasEmittedDirectiveOnThisLine()) {
594 Callbacks->startNewLineIfNeeded();
595 Callbacks->MoveToLine(Tok.getLocation());
598 // If this token is at the start of a line, emit newlines if needed.
599 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
601 } else if (Tok.hasLeadingSpace() ||
602 // If we haven't emitted a token on this line yet, PrevTok isn't
603 // useful to look at and no concatenation could happen anyway.
604 (Callbacks->hasEmittedTokensOnThisLine() &&
605 // Don't print "-" next to "-", it would form "--".
606 Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
610 if (DropComments && Tok.is(tok::comment)) {
611 // Skip comments. Normally the preprocessor does not generate
612 // tok::comment nodes at all when not keeping comments, but under
613 // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
614 SourceLocation StartLoc = Tok.getLocation();
615 Callbacks->MoveToLine(StartLoc.getLocWithOffset(Tok.getLength()));
616 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
618 } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
619 Tok.getLiteralData()) {
620 OS.write(Tok.getLiteralData(), Tok.getLength());
621 } else if (Tok.getLength() < 256) {
622 const char *TokPtr = Buffer;
623 unsigned Len = PP.getSpelling(Tok, TokPtr);
624 OS.write(TokPtr, Len);
626 // Tokens that can contain embedded newlines need to adjust our current
628 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
629 Callbacks->HandleNewlinesInToken(TokPtr, Len);
631 std::string S = PP.getSpelling(Tok);
632 OS.write(&S[0], S.size());
634 // Tokens that can contain embedded newlines need to adjust our current
636 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
637 Callbacks->HandleNewlinesInToken(&S[0], S.size());
639 Callbacks->setEmittedTokensOnThisLine();
641 if (Tok.is(tok::eof)) break;
643 PrevPrevTok = PrevTok;
649 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
650 static int MacroIDCompare(const void* a, const void* b) {
651 const id_macro_pair *LHS = static_cast<const id_macro_pair*>(a);
652 const id_macro_pair *RHS = static_cast<const id_macro_pair*>(b);
653 return LHS->first->getName().compare(RHS->first->getName());
656 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
657 // Ignore unknown pragmas.
658 PP.AddPragmaHandler(new EmptyPragmaHandler());
660 // -dM mode just scans and ignores all tokens in the files, then dumps out
661 // the macro table at the end.
662 PP.EnterMainSourceFile();
666 while (Tok.isNot(tok::eof));
668 SmallVector<id_macro_pair, 128> MacrosByID;
669 for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
671 if (I->first->hasMacroDefinition())
672 MacrosByID.push_back(id_macro_pair(I->first, I->second->getMacroInfo()));
674 llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
676 for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
677 MacroInfo &MI = *MacrosByID[i].second;
678 // Ignore computed macros like __LINE__ and friends.
679 if (MI.isBuiltinMacro()) continue;
681 PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
686 /// DoPrintPreprocessedInput - This implements -E mode.
688 void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
689 const PreprocessorOutputOptions &Opts) {
690 // Show macros with no output is handled specially.
692 assert(Opts.ShowMacros && "Not yet implemented!");
693 DoPrintMacros(PP, OS);
697 // Inform the preprocessor whether we want it to retain comments or not, due
699 PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
701 PrintPPOutputPPCallbacks *Callbacks =
702 new PrintPPOutputPPCallbacks(PP, *OS, !Opts.ShowLineMarkers,
704 PP.AddPragmaHandler(new UnknownPragmaHandler("#pragma", Callbacks));
705 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
706 PP.AddPragmaHandler("clang",
707 new UnknownPragmaHandler("#pragma clang", Callbacks));
709 PP.addPPCallbacks(Callbacks);
711 // After we have configured the preprocessor, enter the main file.
712 PP.EnterMainSourceFile();
714 // Consume all of the tokens that come from the predefines buffer. Those
715 // should not be emitted into the output and are guaranteed to be at the
717 const SourceManager &SourceMgr = PP.getSourceManager();
721 if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
724 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
725 if (PLoc.isInvalid())
728 if (strcmp(PLoc.getFilename(), "<built-in>"))
732 // Read all the preprocessed tokens, printing them out to the stream.
733 PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);