1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// Defines the clang::Preprocessor interface.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16 #define LLVM_CLANG_LEX_PREPROCESSOR_H
18 #include "clang/Basic/Builtins.h"
19 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Basic/IdentifierTable.h"
21 #include "clang/Basic/LLVM.h"
22 #include "clang/Basic/LangOptions.h"
23 #include "clang/Basic/Module.h"
24 #include "clang/Basic/SourceLocation.h"
25 #include "clang/Basic/SourceManager.h"
26 #include "clang/Basic/TokenKinds.h"
27 #include "clang/Lex/Lexer.h"
28 #include "clang/Lex/MacroInfo.h"
29 #include "clang/Lex/ModuleLoader.h"
30 #include "clang/Lex/ModuleMap.h"
31 #include "clang/Lex/PPCallbacks.h"
32 #include "clang/Lex/Token.h"
33 #include "clang/Lex/TokenLexer.h"
34 #include "llvm/ADT/ArrayRef.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/FoldingSet.h"
37 #include "llvm/ADT/None.h"
38 #include "llvm/ADT/Optional.h"
39 #include "llvm/ADT/PointerUnion.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/ADT/TinyPtrVector.h"
45 #include "llvm/ADT/iterator_range.h"
46 #include "llvm/Support/Allocator.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Registry.h"
60 template<unsigned InternalLen> class SmallString;
66 class CodeCompletionHandler;
69 class DirectoryLookup;
70 class ExternalPreprocessorSource;
75 class MemoryBufferCache;
77 class PragmaNamespace;
78 class PreprocessingRecord;
79 class PreprocessorLexer;
80 class PreprocessorOptions;
84 /// Stores token information for comparing actual tokens with
85 /// predefined values. Only handles simple tokens and identifiers.
91 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
92 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
93 assert(Kind != tok::identifier &&
94 "Identifiers should be created by TokenValue(IdentifierInfo *)");
95 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
96 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
99 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
101 bool operator==(const Token &Tok) const {
102 return Tok.getKind() == Kind &&
103 (!II || II == Tok.getIdentifierInfo());
107 /// Context in which macro name is used.
109 // other than #define or #undef
112 // macro name specified in #define
115 // macro name specified in #undef
119 /// Engages in a tight little dance with the lexer to efficiently
120 /// preprocess tokens.
122 /// Lexers know only about tokens within a single source file, and don't
123 /// know anything about preprocessor-level issues like the \#include stack,
124 /// token expansion, etc.
126 friend class VAOptDefinitionContext;
127 friend class VariadicMacroScopeGuard;
129 std::shared_ptr<PreprocessorOptions> PPOpts;
130 DiagnosticsEngine *Diags;
131 LangOptions &LangOpts;
132 const TargetInfo *Target = nullptr;
133 const TargetInfo *AuxTarget = nullptr;
134 FileManager &FileMgr;
135 SourceManager &SourceMgr;
136 MemoryBufferCache &PCMCache;
137 std::unique_ptr<ScratchBuffer> ScratchBuf;
138 HeaderSearch &HeaderInfo;
139 ModuleLoader &TheModuleLoader;
141 /// External source of macros.
142 ExternalPreprocessorSource *ExternalSource;
144 /// A BumpPtrAllocator object used to quickly allocate and release
145 /// objects internal to the Preprocessor.
146 llvm::BumpPtrAllocator BP;
148 /// Identifiers for builtin macros and other builtins.
149 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
150 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
151 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
152 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
153 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
154 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
155 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
156 IdentifierInfo *Ident__identifier; // __identifier
157 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
158 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
159 IdentifierInfo *Ident__has_feature; // __has_feature
160 IdentifierInfo *Ident__has_extension; // __has_extension
161 IdentifierInfo *Ident__has_builtin; // __has_builtin
162 IdentifierInfo *Ident__has_attribute; // __has_attribute
163 IdentifierInfo *Ident__has_include; // __has_include
164 IdentifierInfo *Ident__has_include_next; // __has_include_next
165 IdentifierInfo *Ident__has_warning; // __has_warning
166 IdentifierInfo *Ident__is_identifier; // __is_identifier
167 IdentifierInfo *Ident__building_module; // __building_module
168 IdentifierInfo *Ident__MODULE__; // __MODULE__
169 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
170 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
171 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
172 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
173 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
174 IdentifierInfo *Ident__is_target_os; // __is_target_os
175 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
177 SourceLocation DATELoc, TIMELoc;
179 // Next __COUNTER__ value, starts at 0.
180 unsigned CounterValue = 0;
183 /// Maximum depth of \#includes.
184 MaxAllowedIncludeStackDepth = 200
187 // State that is set before the preprocessor begins.
188 bool KeepComments : 1;
189 bool KeepMacroComments : 1;
190 bool SuppressIncludeNotFoundError : 1;
192 // State that changes while the preprocessor runs:
193 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
195 /// Whether the preprocessor owns the header search object.
196 bool OwnsHeaderSearch : 1;
198 /// True if macro expansion is disabled.
199 bool DisableMacroExpansion : 1;
201 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
202 /// when parsing preprocessor directives.
203 bool MacroExpansionInDirectivesOverride : 1;
205 class ResetMacroExpansionHelper;
207 /// Whether we have already loaded macros from the external source.
208 mutable bool ReadMacrosFromExternalSource : 1;
210 /// True if pragmas are enabled.
211 bool PragmasEnabled : 1;
213 /// True if the current build action is a preprocessing action.
214 bool PreprocessedOutput : 1;
216 /// True if we are currently preprocessing a #if or #elif directive
217 bool ParsingIfOrElifDirective;
219 /// True if we are pre-expanding macro arguments.
220 bool InMacroArgPreExpansion;
222 /// Mapping/lookup information for all identifiers in
223 /// the program, including program keywords.
224 mutable IdentifierTable Identifiers;
226 /// This table contains all the selectors in the program.
228 /// Unlike IdentifierTable above, this table *isn't* populated by the
229 /// preprocessor. It is declared/expanded here because its role/lifetime is
230 /// conceptually similar to the IdentifierTable. In addition, the current
231 /// control flow (in clang::ParseAST()), make it convenient to put here.
233 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
234 /// the lifetime of the preprocessor.
235 SelectorTable Selectors;
237 /// Information about builtins.
238 Builtin::Context BuiltinInfo;
240 /// Tracks all of the pragmas that the client registered
241 /// with this preprocessor.
242 std::unique_ptr<PragmaNamespace> PragmaHandlers;
244 /// Pragma handlers of the original source is stored here during the
245 /// parsing of a model file.
246 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
248 /// Tracks all of the comment handlers that the client registered
249 /// with this preprocessor.
250 std::vector<CommentHandler *> CommentHandlers;
252 /// True if we want to ignore EOF token and continue later on (thus
253 /// avoid tearing the Lexer and etc. down).
254 bool IncrementalProcessing = false;
256 /// The kind of translation unit we are processing.
257 TranslationUnitKind TUKind;
259 /// The code-completion handler.
260 CodeCompletionHandler *CodeComplete = nullptr;
262 /// The file that we're performing code-completion for, if any.
263 const FileEntry *CodeCompletionFile = nullptr;
265 /// The offset in file for the code-completion point.
266 unsigned CodeCompletionOffset = 0;
268 /// The location for the code-completion point. This gets instantiated
269 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
270 SourceLocation CodeCompletionLoc;
272 /// The start location for the file of the code-completion point.
274 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
275 /// for preprocessing.
276 SourceLocation CodeCompletionFileLoc;
278 /// The source location of the \c import contextual keyword we just
280 SourceLocation ModuleImportLoc;
282 /// The module import path that we're currently processing.
283 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
285 /// Whether the last token we lexed was an '@'.
286 bool LastTokenWasAt = false;
288 /// Whether the module import expects an identifier next. Otherwise,
289 /// it expects a '.' or ';'.
290 bool ModuleImportExpectsIdentifier = false;
292 /// The source location of the currently-active
293 /// \#pragma clang arc_cf_code_audited begin.
294 SourceLocation PragmaARCCFCodeAuditedLoc;
296 /// The source location of the currently-active
297 /// \#pragma clang assume_nonnull begin.
298 SourceLocation PragmaAssumeNonNullLoc;
300 /// True if we hit the code-completion point.
301 bool CodeCompletionReached = false;
303 /// The code completion token containing the information
304 /// on the stem that is to be code completed.
305 IdentifierInfo *CodeCompletionII = nullptr;
307 /// Range for the code completion token.
308 SourceRange CodeCompletionTokenRange;
310 /// The directory that the main file should be considered to occupy,
311 /// if it does not correspond to a real file (as happens when building a
313 const DirectoryEntry *MainFileDir = nullptr;
315 /// The number of bytes that we will initially skip when entering the
316 /// main file, along with a flag that indicates whether skipping this number
317 /// of bytes will place the lexer at the start of a line.
319 /// This is used when loading a precompiled preamble.
320 std::pair<int, bool> SkipMainFilePreamble;
322 /// Whether we hit an error due to reaching max allowed include depth. Allows
323 /// to avoid hitting the same error over and over again.
324 bool HasReachedMaxIncludeDepth = false;
327 struct PreambleSkipInfo {
328 SourceLocation HashTokenLoc;
329 SourceLocation IfTokenLoc;
330 bool FoundNonSkipPortion;
332 SourceLocation ElseLoc;
334 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
335 bool FoundNonSkipPortion, bool FoundElse,
336 SourceLocation ElseLoc)
337 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
338 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
343 friend class ASTReader;
344 friend class MacroArgs;
346 class PreambleConditionalStackStore {
354 PreambleConditionalStackStore() = default;
356 void startRecording() { ConditionalStackState = Recording; }
357 void startReplaying() { ConditionalStackState = Replaying; }
358 bool isRecording() const { return ConditionalStackState == Recording; }
359 bool isReplaying() const { return ConditionalStackState == Replaying; }
361 ArrayRef<PPConditionalInfo> getStack() const {
362 return ConditionalStack;
365 void doneReplaying() {
366 ConditionalStack.clear();
367 ConditionalStackState = Off;
370 void setStack(ArrayRef<PPConditionalInfo> s) {
371 if (!isRecording() && !isReplaying())
373 ConditionalStack.clear();
374 ConditionalStack.append(s.begin(), s.end());
377 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
379 bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
381 void clearSkipInfo() { SkipInfo.reset(); }
383 llvm::Optional<PreambleSkipInfo> SkipInfo;
386 SmallVector<PPConditionalInfo, 4> ConditionalStack;
387 State ConditionalStackState = Off;
388 } PreambleConditionalStack;
390 /// The current top of the stack that we're lexing from if
391 /// not expanding a macro and we are lexing directly from source code.
393 /// Only one of CurLexer, or CurTokenLexer will be non-null.
394 std::unique_ptr<Lexer> CurLexer;
396 /// The current top of the stack what we're lexing from
397 /// if not expanding a macro.
399 /// This is an alias for CurLexer.
400 PreprocessorLexer *CurPPLexer = nullptr;
402 /// Used to find the current FileEntry, if CurLexer is non-null
403 /// and if applicable.
405 /// This allows us to implement \#include_next and find directory-specific
407 const DirectoryLookup *CurDirLookup = nullptr;
409 /// The current macro we are expanding, if we are expanding a macro.
411 /// One of CurLexer and CurTokenLexer must be null.
412 std::unique_ptr<TokenLexer> CurTokenLexer;
414 /// The kind of lexer we're currently working with.
419 CLK_LexAfterModuleImport
420 } CurLexerKind = CLK_Lexer;
422 /// If the current lexer is for a submodule that is being built, this
423 /// is that submodule.
424 Module *CurLexerSubmodule = nullptr;
426 /// Keeps track of the stack of files currently
427 /// \#included, and macros currently being expanded from, not counting
428 /// CurLexer/CurTokenLexer.
429 struct IncludeStackInfo {
430 enum CurLexerKind CurLexerKind;
431 Module *TheSubmodule;
432 std::unique_ptr<Lexer> TheLexer;
433 PreprocessorLexer *ThePPLexer;
434 std::unique_ptr<TokenLexer> TheTokenLexer;
435 const DirectoryLookup *TheDirLookup;
437 // The following constructors are completely useless copies of the default
438 // versions, only needed to pacify MSVC.
439 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
440 std::unique_ptr<Lexer> &&TheLexer,
441 PreprocessorLexer *ThePPLexer,
442 std::unique_ptr<TokenLexer> &&TheTokenLexer,
443 const DirectoryLookup *TheDirLookup)
444 : CurLexerKind(std::move(CurLexerKind)),
445 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
446 ThePPLexer(std::move(ThePPLexer)),
447 TheTokenLexer(std::move(TheTokenLexer)),
448 TheDirLookup(std::move(TheDirLookup)) {}
450 std::vector<IncludeStackInfo> IncludeMacroStack;
452 /// Actions invoked when some preprocessor activity is
453 /// encountered (e.g. a file is \#included, etc).
454 std::unique_ptr<PPCallbacks> Callbacks;
456 struct MacroExpandsInfo {
461 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
462 : Tok(Tok), MD(MD), Range(Range) {}
464 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
466 /// Information about a name that has been used to define a module macro.
467 struct ModuleMacroInfo {
468 /// The most recent macro directive for this identifier.
471 /// The active module macros for this identifier.
472 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
474 /// The generation number at which we last updated ActiveModuleMacros.
475 /// \see Preprocessor::VisibleModules.
476 unsigned ActiveModuleMacrosGeneration = 0;
478 /// Whether this macro name is ambiguous.
479 bool IsAmbiguous = false;
481 /// The module macros that are overridden by this macro.
482 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
484 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
487 /// The state of a macro for an identifier.
489 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
491 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
492 const IdentifierInfo *II) const {
493 if (II->isOutOfDate())
494 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
495 // FIXME: Find a spare bit on IdentifierInfo and store a
496 // HasModuleMacros flag.
497 if (!II->hasMacroDefinition() ||
498 (!PP.getLangOpts().Modules &&
499 !PP.getLangOpts().ModulesLocalVisibility) ||
500 !PP.CurSubmoduleState->VisibleModules.getGeneration())
503 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
505 Info = new (PP.getPreprocessorAllocator())
506 ModuleMacroInfo(State.get<MacroDirective *>());
510 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
511 Info->ActiveModuleMacrosGeneration)
512 PP.updateModuleMacroInfo(II, *Info);
517 MacroState() : MacroState(nullptr) {}
518 MacroState(MacroDirective *MD) : State(MD) {}
520 MacroState(MacroState &&O) noexcept : State(O.State) {
521 O.State = (MacroDirective *)nullptr;
524 MacroState &operator=(MacroState &&O) noexcept {
526 O.State = (MacroDirective *)nullptr;
532 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
533 Info->~ModuleMacroInfo();
536 MacroDirective *getLatest() const {
537 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
539 return State.get<MacroDirective*>();
542 void setLatest(MacroDirective *MD) {
543 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
549 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
550 auto *Info = getModuleInfo(PP, II);
551 return Info ? Info->IsAmbiguous : false;
554 ArrayRef<ModuleMacro *>
555 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
556 if (auto *Info = getModuleInfo(PP, II))
557 return Info->ActiveModuleMacros;
561 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
562 SourceManager &SourceMgr) const {
563 // FIXME: Incorporate module macros into the result of this.
564 if (auto *Latest = getLatest())
565 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
569 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
570 if (auto *Info = getModuleInfo(PP, II)) {
571 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
572 Info->ActiveModuleMacros.begin(),
573 Info->ActiveModuleMacros.end());
574 Info->ActiveModuleMacros.clear();
575 Info->IsAmbiguous = false;
579 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
580 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
581 return Info->OverriddenMacros;
585 void setOverriddenMacros(Preprocessor &PP,
586 ArrayRef<ModuleMacro *> Overrides) {
587 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
589 if (Overrides.empty())
591 Info = new (PP.getPreprocessorAllocator())
592 ModuleMacroInfo(State.get<MacroDirective *>());
595 Info->OverriddenMacros.clear();
596 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
597 Overrides.begin(), Overrides.end());
598 Info->ActiveModuleMacrosGeneration = 0;
602 /// For each IdentifierInfo that was associated with a macro, we
603 /// keep a mapping to the history of all macro definitions and #undefs in
604 /// the reverse order (the latest one is in the head of the list).
606 /// This mapping lives within the \p CurSubmoduleState.
607 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
609 struct SubmoduleState;
611 /// Information about a submodule that we're currently building.
612 struct BuildingSubmoduleInfo {
613 /// The module that we are building.
616 /// The location at which the module was included.
617 SourceLocation ImportLoc;
619 /// Whether we entered this submodule via a pragma.
622 /// The previous SubmoduleState.
623 SubmoduleState *OuterSubmoduleState;
625 /// The number of pending module macro names when we started building this.
626 unsigned OuterPendingModuleMacroNames;
628 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
629 SubmoduleState *OuterSubmoduleState,
630 unsigned OuterPendingModuleMacroNames)
631 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
632 OuterSubmoduleState(OuterSubmoduleState),
633 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
635 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
637 /// Information about a submodule's preprocessor state.
638 struct SubmoduleState {
639 /// The macros for the submodule.
642 /// The set of modules that are visible within the submodule.
643 VisibleModuleSet VisibleModules;
645 // FIXME: CounterValue?
646 // FIXME: PragmaPushMacroInfo?
648 std::map<Module *, SubmoduleState> Submodules;
650 /// The preprocessor state for preprocessing outside of any submodule.
651 SubmoduleState NullSubmoduleState;
653 /// The current submodule state. Will be \p NullSubmoduleState if we're not
655 SubmoduleState *CurSubmoduleState;
657 /// The set of known macros exported from modules.
658 llvm::FoldingSet<ModuleMacro> ModuleMacros;
660 /// The names of potential module macros that we've not yet processed.
661 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
663 /// The list of module macros, for each identifier, that are not overridden by
664 /// any other module macro.
665 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
668 /// Macros that we want to warn because they are not used at the end
669 /// of the translation unit.
671 /// We store just their SourceLocations instead of
672 /// something like MacroInfo*. The benefit of this is that when we are
673 /// deserializing from PCH, we don't need to deserialize identifier & macros
674 /// just so that we can report that they are unused, we just warn using
675 /// the SourceLocations of this set (that will be filled by the ASTReader).
676 /// We are using SmallPtrSet instead of a vector for faster removal.
677 using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
678 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
680 /// A "freelist" of MacroArg objects that can be
681 /// reused for quick allocation.
682 MacroArgs *MacroArgCache = nullptr;
684 /// For each IdentifierInfo used in a \#pragma push_macro directive,
685 /// we keep a MacroInfo stack used to restore the previous macro value.
686 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
689 // Various statistics we track for performance analysis.
690 unsigned NumDirectives = 0;
691 unsigned NumDefined = 0;
692 unsigned NumUndefined = 0;
693 unsigned NumPragma = 0;
695 unsigned NumElse = 0;
696 unsigned NumEndif = 0;
697 unsigned NumEnteredSourceFiles = 0;
698 unsigned MaxIncludeStackDepth = 0;
699 unsigned NumMacroExpanded = 0;
700 unsigned NumFnMacroExpanded = 0;
701 unsigned NumBuiltinMacroExpanded = 0;
702 unsigned NumFastMacroExpanded = 0;
703 unsigned NumTokenPaste = 0;
704 unsigned NumFastTokenPaste = 0;
705 unsigned NumSkipped = 0;
707 /// The predefined macros that preprocessor should use from the
708 /// command line etc.
709 std::string Predefines;
711 /// The file ID for the preprocessor predefines.
712 FileID PredefinesFileID;
714 /// The file ID for the PCH through header.
715 FileID PCHThroughHeaderFileID;
717 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
718 bool SkippingUntilPragmaHdrStop = false;
720 /// Whether tokens are being skipped until the through header is seen.
721 bool SkippingUntilPCHThroughHeader = false;
724 /// Cache of macro expanders to reduce malloc traffic.
725 enum { TokenLexerCacheSize = 8 };
726 unsigned NumCachedTokenLexers;
727 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
730 /// Keeps macro expanded tokens for TokenLexers.
732 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
733 /// going to lex in the cache and when it finishes the tokens are removed
734 /// from the end of the cache.
735 SmallVector<Token, 16> MacroExpandedTokens;
736 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
738 /// A record of the macro definitions and expansions that
739 /// occurred during preprocessing.
741 /// This is an optional side structure that can be enabled with
742 /// \c createPreprocessingRecord() prior to preprocessing.
743 PreprocessingRecord *Record = nullptr;
745 /// Cached tokens state.
746 using CachedTokensTy = SmallVector<Token, 1>;
748 /// Cached tokens are stored here when we do backtracking or
749 /// lookahead. They are "lexed" by the CachingLex() method.
750 CachedTokensTy CachedTokens;
752 /// The position of the cached token that CachingLex() should
755 /// If it points beyond the CachedTokens vector, it means that a normal
756 /// Lex() should be invoked.
757 CachedTokensTy::size_type CachedLexPos = 0;
759 /// Stack of backtrack positions, allowing nested backtracks.
761 /// The EnableBacktrackAtThisPos() method pushes a position to
762 /// indicate where CachedLexPos should be set when the BackTrack() method is
763 /// invoked (at which point the last position is popped).
764 std::vector<CachedTokensTy::size_type> BacktrackPositions;
766 struct MacroInfoChain {
768 MacroInfoChain *Next;
771 /// MacroInfos are managed as a chain for easy disposal. This is the head
773 MacroInfoChain *MIChainHead = nullptr;
775 void updateOutOfDateIdentifier(IdentifierInfo &II) const;
778 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
779 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
780 MemoryBufferCache &PCMCache,
781 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
782 IdentifierInfoLookup *IILookup = nullptr,
783 bool OwnsHeaderSearch = false,
784 TranslationUnitKind TUKind = TU_Complete);
788 /// Initialize the preprocessor using information about the target.
790 /// \param Target is owned by the caller and must remain valid for the
791 /// lifetime of the preprocessor.
792 /// \param AuxTarget is owned by the caller and must remain valid for
793 /// the lifetime of the preprocessor.
794 void Initialize(const TargetInfo &Target,
795 const TargetInfo *AuxTarget = nullptr);
797 /// Initialize the preprocessor to parse a model file
799 /// To parse model files the preprocessor of the original source is reused to
800 /// preserver the identifier table. However to avoid some duplicate
801 /// information in the preprocessor some cleanup is needed before it is used
802 /// to parse model files. This method does that cleanup.
803 void InitializeForModelFile();
805 /// Cleanup after model file parsing
806 void FinalizeForModelFile();
808 /// Retrieve the preprocessor options used to initialize this
810 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
812 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
813 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
815 const LangOptions &getLangOpts() const { return LangOpts; }
816 const TargetInfo &getTargetInfo() const { return *Target; }
817 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
818 FileManager &getFileManager() const { return FileMgr; }
819 SourceManager &getSourceManager() const { return SourceMgr; }
820 MemoryBufferCache &getPCMCache() const { return PCMCache; }
821 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
823 IdentifierTable &getIdentifierTable() { return Identifiers; }
824 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
825 SelectorTable &getSelectorTable() { return Selectors; }
826 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
827 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
829 void setExternalSource(ExternalPreprocessorSource *Source) {
830 ExternalSource = Source;
833 ExternalPreprocessorSource *getExternalSource() const {
834 return ExternalSource;
837 /// Retrieve the module loader associated with this preprocessor.
838 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
840 bool hadModuleLoaderFatalFailure() const {
841 return TheModuleLoader.HadFatalFailure;
844 /// True if we are currently preprocessing a #if or #elif directive
845 bool isParsingIfOrElifDirective() const {
846 return ParsingIfOrElifDirective;
849 /// Control whether the preprocessor retains comments in output.
850 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
851 this->KeepComments = KeepComments | KeepMacroComments;
852 this->KeepMacroComments = KeepMacroComments;
855 bool getCommentRetentionState() const { return KeepComments; }
857 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
858 bool getPragmasEnabled() const { return PragmasEnabled; }
860 void SetSuppressIncludeNotFoundError(bool Suppress) {
861 SuppressIncludeNotFoundError = Suppress;
864 bool GetSuppressIncludeNotFoundError() {
865 return SuppressIncludeNotFoundError;
868 /// Sets whether the preprocessor is responsible for producing output or if
869 /// it is producing tokens to be consumed by Parse and Sema.
870 void setPreprocessedOutput(bool IsPreprocessedOutput) {
871 PreprocessedOutput = IsPreprocessedOutput;
874 /// Returns true if the preprocessor is responsible for generating output,
875 /// false if it is producing tokens to be consumed by Parse and Sema.
876 bool isPreprocessedOutput() const { return PreprocessedOutput; }
878 /// Return true if we are lexing directly from the specified lexer.
879 bool isCurrentLexer(const PreprocessorLexer *L) const {
880 return CurPPLexer == L;
883 /// Return the current lexer being lexed from.
885 /// Note that this ignores any potentially active macro expansions and _Pragma
886 /// expansions going on at the time.
887 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
889 /// Return the current file lexer being lexed from.
891 /// Note that this ignores any potentially active macro expansions and _Pragma
892 /// expansions going on at the time.
893 PreprocessorLexer *getCurrentFileLexer() const;
895 /// Return the submodule owning the file being lexed. This may not be
896 /// the current module if we have changed modules since entering the file.
897 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
899 /// Returns the FileID for the preprocessor predefines.
900 FileID getPredefinesFileID() const { return PredefinesFileID; }
903 /// Accessors for preprocessor callbacks.
905 /// Note that this class takes ownership of any PPCallbacks object given to
907 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
908 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
910 C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
911 std::move(Callbacks));
912 Callbacks = std::move(C);
916 bool isMacroDefined(StringRef Id) {
917 return isMacroDefined(&Identifiers.get(Id));
919 bool isMacroDefined(const IdentifierInfo *II) {
920 return II->hasMacroDefinition() &&
921 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
924 /// Determine whether II is defined as a macro within the module M,
925 /// if that is a module that we've already preprocessed. Does not check for
926 /// macros imported into M.
927 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
928 if (!II->hasMacroDefinition())
930 auto I = Submodules.find(M);
931 if (I == Submodules.end())
933 auto J = I->second.Macros.find(II);
934 if (J == I->second.Macros.end())
936 auto *MD = J->second.getLatest();
937 return MD && MD->isDefined();
940 MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
941 if (!II->hasMacroDefinition())
944 MacroState &S = CurSubmoduleState->Macros[II];
945 auto *MD = S.getLatest();
946 while (MD && isa<VisibilityMacroDirective>(MD))
947 MD = MD->getPrevious();
948 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
949 S.getActiveModuleMacros(*this, II),
950 S.isAmbiguous(*this, II));
953 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
954 SourceLocation Loc) {
955 if (!II->hadMacroDefinition())
958 MacroState &S = CurSubmoduleState->Macros[II];
959 MacroDirective::DefInfo DI;
960 if (auto *MD = S.getLatest())
961 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
962 // FIXME: Compute the set of active module macros at the specified location.
963 return MacroDefinition(DI.getDirective(),
964 S.getActiveModuleMacros(*this, II),
965 S.isAmbiguous(*this, II));
968 /// Given an identifier, return its latest non-imported MacroDirective
969 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
970 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
971 if (!II->hasMacroDefinition())
974 auto *MD = getLocalMacroDirectiveHistory(II);
975 if (!MD || MD->getDefinition().isUndefined())
981 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
982 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
985 MacroInfo *getMacroInfo(const IdentifierInfo *II) {
986 if (!II->hasMacroDefinition())
988 if (auto MD = getMacroDefinition(II))
989 return MD.getMacroInfo();
993 /// Given an identifier, return the latest non-imported macro
994 /// directive for that identifier.
996 /// One can iterate over all previous macro directives from the most recent
998 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1000 /// Add a directive to the macro directive history for this identifier.
1001 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1002 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1003 SourceLocation Loc) {
1004 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1005 appendMacroDirective(II, MD);
1008 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1010 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1013 /// Set a MacroDirective that was loaded from a PCH file.
1014 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1015 MacroDirective *MD);
1017 /// Register an exported macro for a module and identifier.
1018 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1019 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1020 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
1022 /// Get the list of leaf (non-overridden) module macros for a name.
1023 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1024 if (II->isOutOfDate())
1025 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1026 auto I = LeafModuleMacros.find(II);
1027 if (I != LeafModuleMacros.end())
1033 /// Iterators for the macro history table. Currently defined macros have
1034 /// IdentifierInfo::hasMacroDefinition() set and an empty
1035 /// MacroInfo::getUndefLoc() at the head of the list.
1036 using macro_iterator = MacroMap::const_iterator;
1038 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1039 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1041 llvm::iterator_range<macro_iterator>
1042 macros(bool IncludeExternalMacros = true) const {
1043 macro_iterator begin = macro_begin(IncludeExternalMacros);
1044 macro_iterator end = macro_end(IncludeExternalMacros);
1045 return llvm::make_range(begin, end);
1050 /// Return the name of the macro defined before \p Loc that has
1051 /// spelling \p Tokens. If there are multiple macros with same spelling,
1052 /// return the last one defined.
1053 StringRef getLastMacroWithSpelling(SourceLocation Loc,
1054 ArrayRef<TokenValue> Tokens) const;
1056 const std::string &getPredefines() const { return Predefines; }
1058 /// Set the predefines for this Preprocessor.
1060 /// These predefines are automatically injected when parsing the main file.
1061 void setPredefines(const char *P) { Predefines = P; }
1062 void setPredefines(StringRef P) { Predefines = P; }
1064 /// Return information about the specified preprocessor
1065 /// identifier token.
1066 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1067 return &Identifiers.get(Name);
1070 /// Add the specified pragma handler to this preprocessor.
1072 /// If \p Namespace is non-null, then it is a token required to exist on the
1073 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1074 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1075 void AddPragmaHandler(PragmaHandler *Handler) {
1076 AddPragmaHandler(StringRef(), Handler);
1079 /// Remove the specific pragma handler from this preprocessor.
1081 /// If \p Namespace is non-null, then it should be the namespace that
1082 /// \p Handler was added to. It is an error to remove a handler that
1083 /// has not been registered.
1084 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1085 void RemovePragmaHandler(PragmaHandler *Handler) {
1086 RemovePragmaHandler(StringRef(), Handler);
1089 /// Install empty handlers for all pragmas (making them ignored).
1090 void IgnorePragmas();
1092 /// Add the specified comment handler to the preprocessor.
1093 void addCommentHandler(CommentHandler *Handler);
1095 /// Remove the specified comment handler.
1097 /// It is an error to remove a handler that has not been registered.
1098 void removeCommentHandler(CommentHandler *Handler);
1100 /// Set the code completion handler to the given object.
1101 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1102 CodeComplete = &Handler;
1105 /// Retrieve the current code-completion handler.
1106 CodeCompletionHandler *getCodeCompletionHandler() const {
1107 return CodeComplete;
1110 /// Clear out the code completion handler.
1111 void clearCodeCompletionHandler() {
1112 CodeComplete = nullptr;
1115 /// Hook used by the lexer to invoke the "included file" code
1116 /// completion point.
1117 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1119 /// Hook used by the lexer to invoke the "natural language" code
1120 /// completion point.
1121 void CodeCompleteNaturalLanguage();
1123 /// Set the code completion token for filtering purposes.
1124 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1125 CodeCompletionII = Filter;
1128 /// Set the code completion token range for detecting replacement range later
1130 void setCodeCompletionTokenRange(const SourceLocation Start,
1131 const SourceLocation End) {
1132 CodeCompletionTokenRange = {Start, End};
1134 SourceRange getCodeCompletionTokenRange() const {
1135 return CodeCompletionTokenRange;
1138 /// Get the code completion token for filtering purposes.
1139 StringRef getCodeCompletionFilter() {
1140 if (CodeCompletionII)
1141 return CodeCompletionII->getName();
1145 /// Retrieve the preprocessing record, or NULL if there is no
1146 /// preprocessing record.
1147 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1149 /// Create a new preprocessing record, which will keep track of
1150 /// all macro expansions, macro definitions, etc.
1151 void createPreprocessingRecord();
1153 /// Returns true if the FileEntry is the PCH through header.
1154 bool isPCHThroughHeader(const FileEntry *FE);
1156 /// True if creating a PCH with a through header.
1157 bool creatingPCHWithThroughHeader();
1159 /// True if using a PCH with a through header.
1160 bool usingPCHWithThroughHeader();
1162 /// True if creating a PCH with a #pragma hdrstop.
1163 bool creatingPCHWithPragmaHdrStop();
1165 /// True if using a PCH with a #pragma hdrstop.
1166 bool usingPCHWithPragmaHdrStop();
1168 /// Skip tokens until after the #include of the through header or
1169 /// until after a #pragma hdrstop.
1170 void SkipTokensWhileUsingPCH();
1172 /// Process directives while skipping until the through header or
1173 /// #pragma hdrstop is found.
1174 void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1175 SourceLocation HashLoc);
1177 /// Enter the specified FileID as the main source file,
1178 /// which implicitly adds the builtin defines etc.
1179 void EnterMainSourceFile();
1181 /// Inform the preprocessor callbacks that processing is complete.
1182 void EndSourceFile();
1184 /// Add a source file to the top of the include stack and
1185 /// start lexing tokens from it instead of the current buffer.
1187 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1188 bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1189 SourceLocation Loc);
1191 /// Add a Macro to the top of the include stack and start lexing
1192 /// tokens from it instead of the current buffer.
1194 /// \param Args specifies the tokens input to a function-like macro.
1195 /// \param ILEnd specifies the location of the ')' for a function-like macro
1196 /// or the identifier for an object-like macro.
1197 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1200 /// Add a "macro" context to the top of the include stack,
1201 /// which will cause the lexer to start returning the specified tokens.
1203 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1204 /// will not be subject to further macro expansion. Otherwise, these tokens
1205 /// will be re-macro-expanded when/if expansion is enabled.
1207 /// If \p OwnsTokens is false, this method assumes that the specified stream
1208 /// of tokens has a permanent owner somewhere, so they do not need to be
1209 /// copied. If it is true, it assumes the array of tokens is allocated with
1210 /// \c new[] and the Preprocessor will delete[] it.
1212 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1213 bool DisableMacroExpansion, bool OwnsTokens);
1216 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1217 bool DisableMacroExpansion) {
1218 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
1221 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
1222 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
1225 /// Pop the current lexer/macro exp off the top of the lexer stack.
1227 /// This should only be used in situations where the current state of the
1228 /// top-of-stack lexer is known.
1229 void RemoveTopOfLexerStack();
1231 /// From the point that this method is called, and until
1232 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1233 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1234 /// make the Preprocessor re-lex the same tokens.
1236 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1237 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1238 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1240 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1241 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1242 /// tokens will continue indefinitely.
1244 void EnableBacktrackAtThisPos();
1246 /// Disable the last EnableBacktrackAtThisPos call.
1247 void CommitBacktrackedTokens();
1249 struct CachedTokensRange {
1250 CachedTokensTy::size_type Begin, End;
1254 /// A range of cached tokens that should be erased after lexing
1255 /// when backtracking requires the erasure of such cached tokens.
1256 Optional<CachedTokensRange> CachedTokenRangeToErase;
1259 /// Returns the range of cached tokens that were lexed since
1260 /// EnableBacktrackAtThisPos() was previously called.
1261 CachedTokensRange LastCachedTokenRange();
1263 /// Erase the range of cached tokens that were lexed since
1264 /// EnableBacktrackAtThisPos() was previously called.
1265 void EraseCachedTokens(CachedTokensRange TokenRange);
1267 /// Make Preprocessor re-lex the tokens that were lexed since
1268 /// EnableBacktrackAtThisPos() was previously called.
1271 /// True if EnableBacktrackAtThisPos() was called and
1272 /// caching of tokens is on.
1273 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1275 /// Lex the next token for this preprocessor.
1276 void Lex(Token &Result);
1278 void LexAfterModuleImport(Token &Result);
1280 void makeModuleVisible(Module *M, SourceLocation Loc);
1282 SourceLocation getModuleImportLoc(Module *M) const {
1283 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1286 /// Lex a string literal, which may be the concatenation of multiple
1287 /// string literals and may even come from macro expansion.
1288 /// \returns true on success, false if a error diagnostic has been generated.
1289 bool LexStringLiteral(Token &Result, std::string &String,
1290 const char *DiagnosticTag, bool AllowMacroExpansion) {
1291 if (AllowMacroExpansion)
1294 LexUnexpandedToken(Result);
1295 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1296 AllowMacroExpansion);
1299 /// Complete the lexing of a string literal where the first token has
1300 /// already been lexed (see LexStringLiteral).
1301 bool FinishLexStringLiteral(Token &Result, std::string &String,
1302 const char *DiagnosticTag,
1303 bool AllowMacroExpansion);
1305 /// Lex a token. If it's a comment, keep lexing until we get
1306 /// something not a comment.
1308 /// This is useful in -E -C mode where comments would foul up preprocessor
1309 /// directive handling.
1310 void LexNonComment(Token &Result) {
1313 while (Result.getKind() == tok::comment);
1316 /// Just like Lex, but disables macro expansion of identifier tokens.
1317 void LexUnexpandedToken(Token &Result) {
1318 // Disable macro expansion.
1319 bool OldVal = DisableMacroExpansion;
1320 DisableMacroExpansion = true;
1325 DisableMacroExpansion = OldVal;
1328 /// Like LexNonComment, but this disables macro expansion of
1329 /// identifier tokens.
1330 void LexUnexpandedNonComment(Token &Result) {
1332 LexUnexpandedToken(Result);
1333 while (Result.getKind() == tok::comment);
1336 /// Parses a simple integer literal to get its numeric value. Floating
1337 /// point literals and user defined literals are rejected. Used primarily to
1338 /// handle pragmas that accept integer arguments.
1339 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1341 /// Disables macro expansion everywhere except for preprocessor directives.
1342 void SetMacroExpansionOnlyInDirectives() {
1343 DisableMacroExpansion = true;
1344 MacroExpansionInDirectivesOverride = true;
1347 /// Peeks ahead N tokens and returns that token without consuming any
1350 /// LookAhead(0) returns the next token that would be returned by Lex(),
1351 /// LookAhead(1) returns the token after it, etc. This returns normal
1352 /// tokens after phase 5. As such, it is equivalent to using
1353 /// 'Lex', not 'LexUnexpandedToken'.
1354 const Token &LookAhead(unsigned N) {
1355 if (CachedLexPos + N < CachedTokens.size())
1356 return CachedTokens[CachedLexPos+N];
1358 return PeekAhead(N+1);
1361 /// When backtracking is enabled and tokens are cached,
1362 /// this allows to revert a specific number of tokens.
1364 /// Note that the number of tokens being reverted should be up to the last
1365 /// backtrack position, not more.
1366 void RevertCachedTokens(unsigned N) {
1367 assert(isBacktrackEnabled() &&
1368 "Should only be called when tokens are cached for backtracking");
1369 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1370 && "Should revert tokens up to the last backtrack position, not more");
1371 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1372 "Corrupted backtrack positions ?");
1376 /// Enters a token in the token stream to be lexed next.
1378 /// If BackTrack() is called afterwards, the token will remain at the
1379 /// insertion point.
1380 void EnterToken(const Token &Tok) {
1381 EnterCachingLexMode();
1382 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1385 /// We notify the Preprocessor that if it is caching tokens (because
1386 /// backtrack is enabled) it should replace the most recent cached tokens
1387 /// with the given annotation token. This function has no effect if
1388 /// backtracking is not enabled.
1390 /// Note that the use of this function is just for optimization, so that the
1391 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1393 void AnnotateCachedTokens(const Token &Tok) {
1394 assert(Tok.isAnnotation() && "Expected annotation token");
1395 if (CachedLexPos != 0 && isBacktrackEnabled())
1396 AnnotatePreviousCachedTokens(Tok);
1399 /// Get the location of the last cached token, suitable for setting the end
1400 /// location of an annotation token.
1401 SourceLocation getLastCachedTokenLocation() const {
1402 assert(CachedLexPos != 0);
1403 return CachedTokens[CachedLexPos-1].getLastLoc();
1406 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1408 bool IsPreviousCachedToken(const Token &Tok) const;
1410 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1413 /// Useful when a token needs to be split in smaller ones and CachedTokens
1414 /// most recent token must to be updated to reflect that.
1415 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1417 /// Replace the last token with an annotation token.
1419 /// Like AnnotateCachedTokens(), this routine replaces an
1420 /// already-parsed (and resolved) token with an annotation
1421 /// token. However, this routine only replaces the last token with
1422 /// the annotation token; it does not affect any other cached
1423 /// tokens. This function has no effect if backtracking is not
1425 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1426 assert(Tok.isAnnotation() && "Expected annotation token");
1427 if (CachedLexPos != 0 && isBacktrackEnabled())
1428 CachedTokens[CachedLexPos-1] = Tok;
1431 /// Enter an annotation token into the token stream.
1432 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1433 void *AnnotationVal);
1435 /// Update the current token to represent the provided
1436 /// identifier, in order to cache an action performed by typo correction.
1437 void TypoCorrectToken(const Token &Tok) {
1438 assert(Tok.getIdentifierInfo() && "Expected identifier token");
1439 if (CachedLexPos != 0 && isBacktrackEnabled())
1440 CachedTokens[CachedLexPos-1] = Tok;
1443 /// Recompute the current lexer kind based on the CurLexer/
1444 /// CurTokenLexer pointers.
1445 void recomputeCurLexerKind();
1447 /// Returns true if incremental processing is enabled
1448 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1450 /// Enables the incremental processing
1451 void enableIncrementalProcessing(bool value = true) {
1452 IncrementalProcessing = value;
1455 /// Specify the point at which code-completion will be performed.
1457 /// \param File the file in which code completion should occur. If
1458 /// this file is included multiple times, code-completion will
1459 /// perform completion the first time it is included. If NULL, this
1460 /// function clears out the code-completion point.
1462 /// \param Line the line at which code completion should occur
1465 /// \param Column the column at which code completion should occur
1468 /// \returns true if an error occurred, false otherwise.
1469 bool SetCodeCompletionPoint(const FileEntry *File,
1470 unsigned Line, unsigned Column);
1472 /// Determine if we are performing code completion.
1473 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1475 /// Returns the location of the code-completion point.
1477 /// Returns an invalid location if code-completion is not enabled or the file
1478 /// containing the code-completion point has not been lexed yet.
1479 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1481 /// Returns the start location of the file of code-completion point.
1483 /// Returns an invalid location if code-completion is not enabled or the file
1484 /// containing the code-completion point has not been lexed yet.
1485 SourceLocation getCodeCompletionFileLoc() const {
1486 return CodeCompletionFileLoc;
1489 /// Returns true if code-completion is enabled and we have hit the
1490 /// code-completion point.
1491 bool isCodeCompletionReached() const { return CodeCompletionReached; }
1493 /// Note that we hit the code-completion point.
1494 void setCodeCompletionReached() {
1495 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1496 CodeCompletionReached = true;
1497 // Silence any diagnostics that occur after we hit the code-completion.
1498 getDiagnostics().setSuppressAllDiagnostics(true);
1501 /// The location of the currently-active \#pragma clang
1502 /// arc_cf_code_audited begin.
1504 /// Returns an invalid location if there is no such pragma active.
1505 SourceLocation getPragmaARCCFCodeAuditedLoc() const {
1506 return PragmaARCCFCodeAuditedLoc;
1509 /// Set the location of the currently-active \#pragma clang
1510 /// arc_cf_code_audited begin. An invalid location ends the pragma.
1511 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
1512 PragmaARCCFCodeAuditedLoc = Loc;
1515 /// The location of the currently-active \#pragma clang
1516 /// assume_nonnull begin.
1518 /// Returns an invalid location if there is no such pragma active.
1519 SourceLocation getPragmaAssumeNonNullLoc() const {
1520 return PragmaAssumeNonNullLoc;
1523 /// Set the location of the currently-active \#pragma clang
1524 /// assume_nonnull begin. An invalid location ends the pragma.
1525 void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1526 PragmaAssumeNonNullLoc = Loc;
1529 /// Set the directory in which the main file should be considered
1530 /// to have been found, if it is not a real file.
1531 void setMainFileDir(const DirectoryEntry *Dir) {
1535 /// Instruct the preprocessor to skip part of the main source file.
1537 /// \param Bytes The number of bytes in the preamble to skip.
1539 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1540 /// start of a line.
1541 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1542 SkipMainFilePreamble.first = Bytes;
1543 SkipMainFilePreamble.second = StartOfLine;
1546 /// Forwarding function for diagnostics. This emits a diagnostic at
1547 /// the specified Token's location, translating the token's start
1548 /// position in the current buffer into a SourcePosition object for rendering.
1549 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1550 return Diags->Report(Loc, DiagID);
1553 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1554 return Diags->Report(Tok.getLocation(), DiagID);
1557 /// Return the 'spelling' of the token at the given
1558 /// location; does not go up to the spelling location or down to the
1559 /// expansion location.
1561 /// \param buffer A buffer which will be used only if the token requires
1562 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1563 /// \param invalid If non-null, will be set \c true if an error occurs.
1564 StringRef getSpelling(SourceLocation loc,
1565 SmallVectorImpl<char> &buffer,
1566 bool *invalid = nullptr) const {
1567 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1570 /// Return the 'spelling' of the Tok token.
1572 /// The spelling of a token is the characters used to represent the token in
1573 /// the source file after trigraph expansion and escaped-newline folding. In
1574 /// particular, this wants to get the true, uncanonicalized, spelling of
1575 /// things like digraphs, UCNs, etc.
1577 /// \param Invalid If non-null, will be set \c true if an error occurs.
1578 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1579 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1582 /// Get the spelling of a token into a preallocated buffer, instead
1583 /// of as an std::string.
1585 /// The caller is required to allocate enough space for the token, which is
1586 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1587 /// actual result is returned.
1589 /// Note that this method may do two possible things: it may either fill in
1590 /// the buffer specified with characters, or it may *change the input pointer*
1591 /// to point to a constant buffer with the data already in it (avoiding a
1592 /// copy). The caller is not allowed to modify the returned buffer pointer
1593 /// if an internal buffer is returned.
1594 unsigned getSpelling(const Token &Tok, const char *&Buffer,
1595 bool *Invalid = nullptr) const {
1596 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1599 /// Get the spelling of a token into a SmallVector.
1601 /// Note that the returned StringRef may not point to the
1602 /// supplied buffer if a copy can be avoided.
1603 StringRef getSpelling(const Token &Tok,
1604 SmallVectorImpl<char> &Buffer,
1605 bool *Invalid = nullptr) const;
1607 /// Relex the token at the specified location.
1608 /// \returns true if there was a failure, false on success.
1609 bool getRawToken(SourceLocation Loc, Token &Result,
1610 bool IgnoreWhiteSpace = false) {
1611 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1614 /// Given a Token \p Tok that is a numeric constant with length 1,
1615 /// return the character.
1617 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1618 bool *Invalid = nullptr) const {
1619 assert(Tok.is(tok::numeric_constant) &&
1620 Tok.getLength() == 1 && "Called on unsupported token");
1621 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1623 // If the token is carrying a literal data pointer, just use it.
1624 if (const char *D = Tok.getLiteralData())
1627 // Otherwise, fall back on getCharacterData, which is slower, but always
1629 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1632 /// Retrieve the name of the immediate macro expansion.
1634 /// This routine starts from a source location, and finds the name of the
1635 /// macro responsible for its immediate expansion. It looks through any
1636 /// intervening macro argument expansions to compute this. It returns a
1637 /// StringRef that refers to the SourceManager-owned buffer of the source
1638 /// where that macro name is spelled. Thus, the result shouldn't out-live
1639 /// the SourceManager.
1640 StringRef getImmediateMacroName(SourceLocation Loc) {
1641 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1644 /// Plop the specified string into a scratch buffer and set the
1645 /// specified token's location and length to it.
1647 /// If specified, the source location provides a location of the expansion
1648 /// point of the token.
1649 void CreateString(StringRef Str, Token &Tok,
1650 SourceLocation ExpansionLocStart = SourceLocation(),
1651 SourceLocation ExpansionLocEnd = SourceLocation());
1653 /// Split the first Length characters out of the token starting at TokLoc
1654 /// and return a location pointing to the split token. Re-lexing from the
1655 /// split token will return the split token rather than the original.
1656 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1658 /// Computes the source location just past the end of the
1659 /// token at this source location.
1661 /// This routine can be used to produce a source location that
1662 /// points just past the end of the token referenced by \p Loc, and
1663 /// is generally used when a diagnostic needs to point just after a
1664 /// token where it expected something different that it received. If
1665 /// the returned source location would not be meaningful (e.g., if
1666 /// it points into a macro), this routine returns an invalid
1667 /// source location.
1669 /// \param Offset an offset from the end of the token, where the source
1670 /// location should refer to. The default offset (0) produces a source
1671 /// location pointing just past the end of the token; an offset of 1 produces
1672 /// a source location pointing to the last character in the token, etc.
1673 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1674 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1677 /// Returns true if the given MacroID location points at the first
1678 /// token of the macro expansion.
1680 /// \param MacroBegin If non-null and function returns true, it is set to
1681 /// begin location of the macro.
1682 bool isAtStartOfMacroExpansion(SourceLocation loc,
1683 SourceLocation *MacroBegin = nullptr) const {
1684 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1688 /// Returns true if the given MacroID location points at the last
1689 /// token of the macro expansion.
1691 /// \param MacroEnd If non-null and function returns true, it is set to
1692 /// end location of the macro.
1693 bool isAtEndOfMacroExpansion(SourceLocation loc,
1694 SourceLocation *MacroEnd = nullptr) const {
1695 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1698 /// Print the token to stderr, used for debugging.
1699 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1700 void DumpLocation(SourceLocation Loc) const;
1701 void DumpMacro(const MacroInfo &MI) const;
1702 void dumpMacroInfo(const IdentifierInfo *II);
1704 /// Given a location that specifies the start of a
1705 /// token, return a new location that specifies a character within the token.
1706 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1707 unsigned Char) const {
1708 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1711 /// Increment the counters for the number of token paste operations
1714 /// If fast was specified, this is a 'fast paste' case we handled.
1715 void IncrementPasteCounter(bool isFast) {
1717 ++NumFastTokenPaste;
1724 size_t getTotalMemory() const;
1726 /// When the macro expander pastes together a comment (/##/) in Microsoft
1727 /// mode, this method handles updating the current state, returning the
1728 /// token on the next source line.
1729 void HandleMicrosoftCommentPaste(Token &Tok);
1731 //===--------------------------------------------------------------------===//
1732 // Preprocessor callback methods. These are invoked by a lexer as various
1733 // directives and events are found.
1735 /// Given a tok::raw_identifier token, look up the
1736 /// identifier information for the token and install it into the token,
1737 /// updating the token kind accordingly.
1738 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1741 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1744 /// Specifies the reason for poisoning an identifier.
1746 /// If that identifier is accessed while poisoned, then this reason will be
1747 /// used instead of the default "poisoned" diagnostic.
1748 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1750 /// Display reason for poisoned identifier.
1751 void HandlePoisonedIdentifier(Token & Identifier);
1753 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1754 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1755 if(II->isPoisoned()) {
1756 HandlePoisonedIdentifier(Identifier);
1762 /// Identifiers used for SEH handling in Borland. These are only
1763 /// allowed in particular circumstances
1765 IdentifierInfo *Ident__exception_code,
1766 *Ident___exception_code,
1767 *Ident_GetExceptionCode;
1768 // __except filter expression
1769 IdentifierInfo *Ident__exception_info,
1770 *Ident___exception_info,
1771 *Ident_GetExceptionInfo;
1773 IdentifierInfo *Ident__abnormal_termination,
1774 *Ident___abnormal_termination,
1775 *Ident_AbnormalTermination;
1777 const char *getCurLexerEndPos();
1778 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1781 void PoisonSEHIdentifiers(bool Poison = true); // Borland
1783 /// Callback invoked when the lexer reads an identifier and has
1784 /// filled in the tokens IdentifierInfo member.
1786 /// This callback potentially macro expands it or turns it into a named
1787 /// token (like 'for').
1789 /// \returns true if we actually computed a token, false if we need to
1791 bool HandleIdentifier(Token &Identifier);
1793 /// Callback invoked when the lexer hits the end of the current file.
1795 /// This either returns the EOF token and returns true, or
1796 /// pops a level off the include stack and returns false, at which point the
1797 /// client should call lex again.
1798 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1800 /// Callback invoked when the current TokenLexer hits the end of its
1802 bool HandleEndOfTokenLexer(Token &Result);
1804 /// Callback invoked when the lexer sees a # token at the start of a
1807 /// This consumes the directive, modifies the lexer/preprocessor state, and
1808 /// advances the lexer(s) so that the next token read is the correct one.
1809 void HandleDirective(Token &Result);
1811 /// Ensure that the next token is a tok::eod token.
1813 /// If not, emit a diagnostic and consume up until the eod.
1814 /// If \p EnableMacros is true, then we consider macros that expand to zero
1815 /// tokens as being ok.
1816 void CheckEndOfDirective(const char *DirType, bool EnableMacros = false);
1818 /// Read and discard all tokens remaining on the current line until
1819 /// the tok::eod token is found.
1820 void DiscardUntilEndOfDirective();
1822 /// Returns true if the preprocessor has seen a use of
1823 /// __DATE__ or __TIME__ in the file so far.
1824 bool SawDateOrTime() const {
1825 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1827 unsigned getCounterValue() const { return CounterValue; }
1828 void setCounterValue(unsigned V) { CounterValue = V; }
1830 /// Retrieves the module that we're currently building, if any.
1831 Module *getCurrentModule();
1833 /// Allocate a new MacroInfo object with the provided SourceLocation.
1834 MacroInfo *AllocateMacroInfo(SourceLocation L);
1836 /// Turn the specified lexer token into a fully checked and spelled
1837 /// filename, e.g. as an operand of \#include.
1839 /// The caller is expected to provide a buffer that is large enough to hold
1840 /// the spelling of the filename, but is also expected to handle the case
1841 /// when this method decides to use a different buffer.
1843 /// \returns true if the input filename was in <>'s or false if it was
1845 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
1847 /// Given a "foo" or \<foo> reference, look up the indicated file.
1849 /// Returns null on failure. \p isAngled indicates whether the file
1850 /// reference is for system \#include's or not (i.e. using <> instead of "").
1851 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1852 bool isAngled, const DirectoryLookup *FromDir,
1853 const FileEntry *FromFile,
1854 const DirectoryLookup *&CurDir,
1855 SmallVectorImpl<char> *SearchPath,
1856 SmallVectorImpl<char> *RelativePath,
1857 ModuleMap::KnownHeader *SuggestedModule,
1858 bool *IsMapped, bool SkipCache = false);
1860 /// Get the DirectoryLookup structure used to find the current
1861 /// FileEntry, if CurLexer is non-null and if applicable.
1863 /// This allows us to implement \#include_next and find directory-specific
1865 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1867 /// Return true if we're in the top-level file, not in a \#include.
1868 bool isInPrimaryFile() const;
1870 /// Handle cases where the \#include name is expanded
1871 /// from a macro as multiple tokens, which need to be glued together.
1873 /// This occurs for code like:
1875 /// \#define FOO <x/y.h>
1878 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1880 /// This code concatenates and consumes tokens up to the '>' token. It
1881 /// returns false if the > was found, otherwise it returns true if it finds
1882 /// and consumes the EOD marker.
1883 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1884 SourceLocation &End);
1886 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1887 /// followed by EOD. Return true if the token is not a valid on-off-switch.
1888 bool LexOnOffSwitch(tok::OnOffSwitch &Result);
1890 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
1891 bool *ShadowFlag = nullptr);
1893 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
1894 Module *LeaveSubmodule(bool ForPragma);
1897 friend void TokenLexer::ExpandFunctionArguments();
1899 void PushIncludeMacroStack() {
1900 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
1901 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
1902 std::move(CurLexer), CurPPLexer,
1903 std::move(CurTokenLexer), CurDirLookup);
1904 CurPPLexer = nullptr;
1907 void PopIncludeMacroStack() {
1908 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1909 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1910 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1911 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
1912 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
1913 CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1914 IncludeMacroStack.pop_back();
1917 void PropagateLineStartLeadingSpaceInfo(Token &Result);
1919 /// Determine whether we need to create module macros for #defines in the
1920 /// current context.
1921 bool needModuleMacros() const;
1923 /// Update the set of active module macros and ambiguity flag for a module
1925 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
1927 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1928 SourceLocation Loc);
1929 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1930 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1933 /// Lex and validate a macro name, which occurs after a
1934 /// \#define or \#undef.
1936 /// \param MacroNameTok Token that represents the name defined or undefined.
1937 /// \param IsDefineUndef Kind if preprocessor directive.
1938 /// \param ShadowFlag Points to flag that is set if macro name shadows
1941 /// This emits a diagnostic, sets the token kind to eod,
1942 /// and discards the rest of the macro line if the macro name is invalid.
1943 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
1944 bool *ShadowFlag = nullptr);
1946 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
1947 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
1948 /// doing so performs certain validity checks including (but not limited to):
1949 /// - # (stringization) is followed by a macro parameter
1950 /// \param MacroNameTok - Token that represents the macro name
1951 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
1953 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
1954 /// returns a nullptr if an invalid sequence of tokens is encountered.
1955 MacroInfo *ReadOptionalMacroParameterListAndBody(
1956 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
1958 /// The ( starting an argument list of a macro definition has just been read.
1959 /// Lex the rest of the parameters and the closing ), updating \p MI with
1960 /// what we learn and saving in \p LastTok the last token read.
1961 /// Return true if an error occurs parsing the arg list.
1962 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
1964 /// We just read a \#if or related directive and decided that the
1965 /// subsequent tokens are in the \#if'd out portion of the
1966 /// file. Lex the rest of the file, until we see an \#endif. If \p
1967 /// FoundNonSkipPortion is true, then we have already emitted code for part of
1968 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1969 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1970 /// already seen one so a \#else directive is a duplicate. When this returns,
1971 /// the caller can lex the first valid token.
1972 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
1973 SourceLocation IfTokenLoc,
1974 bool FoundNonSkipPortion, bool FoundElse,
1975 SourceLocation ElseLoc = SourceLocation());
1977 /// Information about the result for evaluating an expression for a
1978 /// preprocessor directive.
1979 struct DirectiveEvalResult {
1980 /// Whether the expression was evaluated as true or not.
1983 /// True if the expression contained identifiers that were undefined.
1984 bool IncludedUndefinedIds;
1987 /// Evaluate an integer constant expression that may occur after a
1988 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
1990 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1991 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1993 /// Install the standard preprocessor pragmas:
1994 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
1995 void RegisterBuiltinPragmas();
1997 /// Register builtin macros such as __LINE__ with the identifier table.
1998 void RegisterBuiltinMacros();
2000 /// If an identifier token is read that is to be expanded as a macro, handle
2001 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2002 /// otherwise the caller should lex again.
2003 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2005 /// Cache macro expanded tokens for TokenLexers.
2007 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2008 /// going to lex in the cache and when it finishes the tokens are removed
2009 /// from the end of the cache.
2010 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2011 ArrayRef<Token> tokens);
2013 void removeCachedMacroExpandedTokensOfLastLexer();
2015 /// Determine whether the next preprocessor token to be
2016 /// lexed is a '('. If so, consume the token and return true, if not, this
2017 /// method should have no observable side-effect on the lexed tokens.
2018 bool isNextPPTokenLParen();
2020 /// After reading "MACRO(", this method is invoked to read all of the formal
2021 /// arguments specified for the macro invocation. Returns null on error.
2022 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2023 SourceLocation &MacroEnd);
2025 /// If an identifier token is read that is to be expanded
2026 /// as a builtin macro, handle it and return the next token as 'Tok'.
2027 void ExpandBuiltinMacro(Token &Tok);
2029 /// Read a \c _Pragma directive, slice it up, process it, then
2030 /// return the first token after the directive.
2031 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2032 void Handle_Pragma(Token &Tok);
2034 /// Like Handle_Pragma except the pragma text is not enclosed within
2035 /// a string literal.
2036 void HandleMicrosoft__pragma(Token &Tok);
2038 /// Add a lexer to the top of the include stack and
2039 /// start lexing tokens from it instead of the current buffer.
2040 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2042 /// Set the FileID for the preprocessor predefines.
2043 void setPredefinesFileID(FileID FID) {
2044 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2045 PredefinesFileID = FID;
2048 /// Set the FileID for the PCH through header.
2049 void setPCHThroughHeaderFileID(FileID FID);
2051 /// Returns true if we are lexing from a file and not a
2052 /// pragma or a macro.
2053 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2054 return L ? !L->isPragmaLexer() : P != nullptr;
2057 static bool IsFileLexer(const IncludeStackInfo& I) {
2058 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2061 bool IsFileLexer() const {
2062 return IsFileLexer(CurLexer.get(), CurPPLexer);
2065 //===--------------------------------------------------------------------===//
2067 void CachingLex(Token &Result);
2069 bool InCachingLexMode() const {
2070 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2071 // that we are past EOF, not that we are in CachingLex mode.
2072 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2075 void EnterCachingLexMode();
2077 void ExitCachingLexMode() {
2078 if (InCachingLexMode())
2079 RemoveTopOfLexerStack();
2082 const Token &PeekAhead(unsigned N);
2083 void AnnotatePreviousCachedTokens(const Token &Tok);
2085 //===--------------------------------------------------------------------===//
2086 /// Handle*Directive - implement the various preprocessor directives. These
2087 /// should side-effect the current preprocessor object so that the next call
2088 /// to Lex() will return the appropriate token next.
2089 void HandleLineDirective();
2090 void HandleDigitDirective(Token &Tok);
2091 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2092 void HandleIdentSCCSDirective(Token &Tok);
2093 void HandleMacroPublicDirective(Token &Tok);
2094 void HandleMacroPrivateDirective();
2097 void HandleIncludeDirective(SourceLocation HashLoc,
2099 const DirectoryLookup *LookupFrom = nullptr,
2100 const FileEntry *LookupFromFile = nullptr,
2101 bool isImport = false);
2102 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2103 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2104 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2105 void HandleMicrosoftImportDirective(Token &Tok);
2108 /// Check that the given module is available, producing a diagnostic if not.
2109 /// \return \c true if the check failed (because the module is not available).
2110 /// \c false if the module appears to be usable.
2111 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2112 const TargetInfo &TargetInfo,
2113 DiagnosticsEngine &Diags, Module *M);
2115 // Module inclusion testing.
2116 /// Find the module that owns the source or header file that
2117 /// \p Loc points to. If the location is in a file that was included
2118 /// into a module, or is outside any module, returns nullptr.
2119 Module *getModuleForLocation(SourceLocation Loc);
2121 /// We want to produce a diagnostic at location IncLoc concerning a
2122 /// missing module import.
2124 /// \param IncLoc The location at which the missing import was detected.
2125 /// \param M The desired module.
2126 /// \param MLoc A location within the desired module at which some desired
2127 /// effect occurred (eg, where a desired entity was declared).
2129 /// \return A file that can be #included to import a module containing MLoc.
2130 /// Null if no such file could be determined or if a #include is not
2132 const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2134 SourceLocation MLoc);
2136 bool isRecordingPreamble() const {
2137 return PreambleConditionalStack.isRecording();
2140 bool hasRecordedPreamble() const {
2141 return PreambleConditionalStack.hasRecordedPreamble();
2144 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2145 return PreambleConditionalStack.getStack();
2148 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2149 PreambleConditionalStack.setStack(s);
2152 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2153 llvm::Optional<PreambleSkipInfo> SkipInfo) {
2154 PreambleConditionalStack.startReplaying();
2155 PreambleConditionalStack.setStack(s);
2156 PreambleConditionalStack.SkipInfo = SkipInfo;
2159 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2160 return PreambleConditionalStack.SkipInfo;
2164 /// After processing predefined file, initialize the conditional stack from
2166 void replayPreambleConditionalStack();
2169 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2170 void HandleUndefDirective();
2172 // Conditional Inclusion.
2173 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2174 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2175 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2176 bool ReadAnyTokensBeforeDirective);
2177 void HandleEndifDirective(Token &EndifToken);
2178 void HandleElseDirective(Token &Result, const Token &HashToken);
2179 void HandleElifDirective(Token &ElifToken, const Token &HashToken);
2182 void HandlePragmaDirective(SourceLocation IntroducerLoc,
2183 PragmaIntroducerKind Introducer);
2186 void HandlePragmaOnce(Token &OnceTok);
2187 void HandlePragmaMark();
2188 void HandlePragmaPoison();
2189 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2190 void HandlePragmaDependency(Token &DependencyTok);
2191 void HandlePragmaPushMacro(Token &Tok);
2192 void HandlePragmaPopMacro(Token &Tok);
2193 void HandlePragmaIncludeAlias(Token &Tok);
2194 void HandlePragmaModuleBuild(Token &Tok);
2195 void HandlePragmaHdrstop(Token &Tok);
2196 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2198 // Return true and store the first token only if any CommentHandler
2199 // has inserted some tokens and getCommentRetentionState() is false.
2200 bool HandleComment(Token &result, SourceRange Comment);
2202 /// A macro is used, update information about macros that need unused
2204 void markMacroAsUsed(MacroInfo *MI);
2207 /// Abstract base class that describes a handler that will receive
2208 /// source ranges for each of the comments encountered in the source file.
2209 class CommentHandler {
2211 virtual ~CommentHandler();
2213 // The handler shall return true if it has pushed any tokens
2214 // to be read using e.g. EnterToken or EnterTokenStream.
2215 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2218 /// Registry of pragma handlers added by plugins
2219 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2221 } // namespace clang
2223 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H