1 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Defines the clang::Preprocessor interface.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
15 #define LLVM_CLANG_LEX_PREPROCESSOR_H
17 #include "clang/Basic/Diagnostic.h"
18 #include "clang/Basic/IdentifierTable.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/LangOptions.h"
21 #include "clang/Basic/Module.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Basic/TokenKinds.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/MacroInfo.h"
27 #include "clang/Lex/ModuleLoader.h"
28 #include "clang/Lex/ModuleMap.h"
29 #include "clang/Lex/PPCallbacks.h"
30 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
31 #include "clang/Lex/Token.h"
32 #include "clang/Lex/TokenLexer.h"
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/FoldingSet.h"
36 #include "llvm/ADT/FunctionExtras.h"
37 #include "llvm/ADT/None.h"
38 #include "llvm/ADT/Optional.h"
39 #include "llvm/ADT/PointerUnion.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/ADT/TinyPtrVector.h"
45 #include "llvm/ADT/iterator_range.h"
46 #include "llvm/Support/Allocator.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Registry.h"
60 template<unsigned InternalLen> class SmallString;
66 class CodeCompletionHandler;
69 class DirectoryLookup;
70 class ExternalPreprocessorSource;
76 class PragmaNamespace;
77 class PreprocessingRecord;
78 class PreprocessorLexer;
79 class PreprocessorOptions;
87 /// Stores token information for comparing actual tokens with
88 /// predefined values. Only handles simple tokens and identifiers.
94 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
95 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
96 assert(Kind != tok::identifier &&
97 "Identifiers should be created by TokenValue(IdentifierInfo *)");
98 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
99 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
102 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
104 bool operator==(const Token &Tok) const {
105 return Tok.getKind() == Kind &&
106 (!II || II == Tok.getIdentifierInfo());
110 /// Context in which macro name is used.
112 // other than #define or #undef
115 // macro name specified in #define
118 // macro name specified in #undef
122 /// Engages in a tight little dance with the lexer to efficiently
123 /// preprocess tokens.
125 /// Lexers know only about tokens within a single source file, and don't
126 /// know anything about preprocessor-level issues like the \#include stack,
127 /// token expansion, etc.
129 friend class VAOptDefinitionContext;
130 friend class VariadicMacroScopeGuard;
132 llvm::unique_function<void(const clang::Token &)> OnToken;
133 std::shared_ptr<PreprocessorOptions> PPOpts;
134 DiagnosticsEngine *Diags;
135 LangOptions &LangOpts;
136 const TargetInfo *Target = nullptr;
137 const TargetInfo *AuxTarget = nullptr;
138 FileManager &FileMgr;
139 SourceManager &SourceMgr;
140 std::unique_ptr<ScratchBuffer> ScratchBuf;
141 HeaderSearch &HeaderInfo;
142 ModuleLoader &TheModuleLoader;
144 /// External source of macros.
145 ExternalPreprocessorSource *ExternalSource;
147 /// A BumpPtrAllocator object used to quickly allocate and release
148 /// objects internal to the Preprocessor.
149 llvm::BumpPtrAllocator BP;
151 /// Identifiers for builtin macros and other builtins.
152 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
153 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
154 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
155 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
156 IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
157 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
158 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
159 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
160 IdentifierInfo *Ident__identifier; // __identifier
161 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
162 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
163 IdentifierInfo *Ident__has_feature; // __has_feature
164 IdentifierInfo *Ident__has_extension; // __has_extension
165 IdentifierInfo *Ident__has_builtin; // __has_builtin
166 IdentifierInfo *Ident__has_attribute; // __has_attribute
167 IdentifierInfo *Ident__has_include; // __has_include
168 IdentifierInfo *Ident__has_include_next; // __has_include_next
169 IdentifierInfo *Ident__has_warning; // __has_warning
170 IdentifierInfo *Ident__is_identifier; // __is_identifier
171 IdentifierInfo *Ident__building_module; // __building_module
172 IdentifierInfo *Ident__MODULE__; // __MODULE__
173 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
174 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
175 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
176 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
177 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
178 IdentifierInfo *Ident__is_target_os; // __is_target_os
179 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
181 // Weak, only valid (and set) while InMacroArgs is true.
184 SourceLocation DATELoc, TIMELoc;
186 // Next __COUNTER__ value, starts at 0.
187 unsigned CounterValue = 0;
190 /// Maximum depth of \#includes.
191 MaxAllowedIncludeStackDepth = 200
194 // State that is set before the preprocessor begins.
195 bool KeepComments : 1;
196 bool KeepMacroComments : 1;
197 bool SuppressIncludeNotFoundError : 1;
199 // State that changes while the preprocessor runs:
200 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
202 /// Whether the preprocessor owns the header search object.
203 bool OwnsHeaderSearch : 1;
205 /// True if macro expansion is disabled.
206 bool DisableMacroExpansion : 1;
208 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
209 /// when parsing preprocessor directives.
210 bool MacroExpansionInDirectivesOverride : 1;
212 class ResetMacroExpansionHelper;
214 /// Whether we have already loaded macros from the external source.
215 mutable bool ReadMacrosFromExternalSource : 1;
217 /// True if pragmas are enabled.
218 bool PragmasEnabled : 1;
220 /// True if the current build action is a preprocessing action.
221 bool PreprocessedOutput : 1;
223 /// True if we are currently preprocessing a #if or #elif directive
224 bool ParsingIfOrElifDirective;
226 /// True if we are pre-expanding macro arguments.
227 bool InMacroArgPreExpansion;
229 /// Mapping/lookup information for all identifiers in
230 /// the program, including program keywords.
231 mutable IdentifierTable Identifiers;
233 /// This table contains all the selectors in the program.
235 /// Unlike IdentifierTable above, this table *isn't* populated by the
236 /// preprocessor. It is declared/expanded here because its role/lifetime is
237 /// conceptually similar to the IdentifierTable. In addition, the current
238 /// control flow (in clang::ParseAST()), make it convenient to put here.
240 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
241 /// the lifetime of the preprocessor.
242 SelectorTable Selectors;
244 /// Information about builtins.
245 std::unique_ptr<Builtin::Context> BuiltinInfo;
247 /// Tracks all of the pragmas that the client registered
248 /// with this preprocessor.
249 std::unique_ptr<PragmaNamespace> PragmaHandlers;
251 /// Pragma handlers of the original source is stored here during the
252 /// parsing of a model file.
253 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
255 /// Tracks all of the comment handlers that the client registered
256 /// with this preprocessor.
257 std::vector<CommentHandler *> CommentHandlers;
259 /// True if we want to ignore EOF token and continue later on (thus
260 /// avoid tearing the Lexer and etc. down).
261 bool IncrementalProcessing = false;
263 /// The kind of translation unit we are processing.
264 TranslationUnitKind TUKind;
266 /// The code-completion handler.
267 CodeCompletionHandler *CodeComplete = nullptr;
269 /// The file that we're performing code-completion for, if any.
270 const FileEntry *CodeCompletionFile = nullptr;
272 /// The offset in file for the code-completion point.
273 unsigned CodeCompletionOffset = 0;
275 /// The location for the code-completion point. This gets instantiated
276 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
277 SourceLocation CodeCompletionLoc;
279 /// The start location for the file of the code-completion point.
281 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
282 /// for preprocessing.
283 SourceLocation CodeCompletionFileLoc;
285 /// The source location of the \c import contextual keyword we just
287 SourceLocation ModuleImportLoc;
289 /// The module import path that we're currently processing.
290 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
292 /// Whether the last token we lexed was an '@'.
293 bool LastTokenWasAt = false;
295 /// A position within a C++20 import-seq.
299 // Positive values represent a number of unclosed brackets.
301 AfterTopLevelTokenSeq = -1,
306 ImportSeq(State S) : S(S) {}
308 /// Saw any kind of open bracket.
309 void handleOpenBracket() {
310 S = static_cast<State>(std::max<int>(S, 0) + 1);
312 /// Saw any kind of close bracket other than '}'.
313 void handleCloseBracket() {
314 S = static_cast<State>(std::max<int>(S, 1) - 1);
316 /// Saw a close brace.
317 void handleCloseBrace() {
318 handleCloseBracket();
319 if (S == AtTopLevel && !AfterHeaderName)
320 S = AfterTopLevelTokenSeq;
325 S = AfterTopLevelTokenSeq;
326 AfterHeaderName = false;
330 /// Saw an 'export' identifier.
331 void handleExport() {
332 if (S == AfterTopLevelTokenSeq)
337 /// Saw an 'import' identifier.
338 void handleImport() {
339 if (S == AfterTopLevelTokenSeq || S == AfterExport)
345 /// Saw a 'header-name' token; do not recognize any more 'import' tokens
346 /// until we reach a top-level semicolon.
347 void handleHeaderName() {
348 if (S == AfterImportSeq)
349 AfterHeaderName = true;
353 /// Saw any other token.
359 bool atTopLevel() { return S <= 0; }
360 bool afterImportSeq() { return S == AfterImportSeq; }
364 /// Whether we're in the pp-import-suffix following the header-name in a
365 /// pp-import. If so, a close-brace is not sufficient to end the
366 /// top-level-token-seq of an import-seq.
367 bool AfterHeaderName = false;
370 /// Our current position within a C++20 import-seq.
371 ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
373 /// Whether the module import expects an identifier next. Otherwise,
374 /// it expects a '.' or ';'.
375 bool ModuleImportExpectsIdentifier = false;
377 /// The identifier and source location of the currently-active
378 /// \#pragma clang arc_cf_code_audited begin.
379 std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
381 /// The source location of the currently-active
382 /// \#pragma clang assume_nonnull begin.
383 SourceLocation PragmaAssumeNonNullLoc;
385 /// True if we hit the code-completion point.
386 bool CodeCompletionReached = false;
388 /// The code completion token containing the information
389 /// on the stem that is to be code completed.
390 IdentifierInfo *CodeCompletionII = nullptr;
392 /// Range for the code completion token.
393 SourceRange CodeCompletionTokenRange;
395 /// The directory that the main file should be considered to occupy,
396 /// if it does not correspond to a real file (as happens when building a
398 const DirectoryEntry *MainFileDir = nullptr;
400 /// The number of bytes that we will initially skip when entering the
401 /// main file, along with a flag that indicates whether skipping this number
402 /// of bytes will place the lexer at the start of a line.
404 /// This is used when loading a precompiled preamble.
405 std::pair<int, bool> SkipMainFilePreamble;
407 /// Whether we hit an error due to reaching max allowed include depth. Allows
408 /// to avoid hitting the same error over and over again.
409 bool HasReachedMaxIncludeDepth = false;
411 /// The number of currently-active calls to Lex.
413 /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
414 /// require asking for multiple additional tokens. This counter makes it
415 /// possible for Lex to detect whether it's producing a token for the end
416 /// of phase 4 of translation or for some other situation.
417 unsigned LexLevel = 0;
419 /// The number of (LexLevel 0) preprocessor tokens.
420 unsigned TokenCount = 0;
422 /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
423 /// warning, or zero for unlimited.
424 unsigned MaxTokens = 0;
425 SourceLocation MaxTokensOverrideLoc;
428 struct PreambleSkipInfo {
429 SourceLocation HashTokenLoc;
430 SourceLocation IfTokenLoc;
431 bool FoundNonSkipPortion;
433 SourceLocation ElseLoc;
435 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
436 bool FoundNonSkipPortion, bool FoundElse,
437 SourceLocation ElseLoc)
438 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
439 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
444 friend class ASTReader;
445 friend class MacroArgs;
447 class PreambleConditionalStackStore {
455 PreambleConditionalStackStore() = default;
457 void startRecording() { ConditionalStackState = Recording; }
458 void startReplaying() { ConditionalStackState = Replaying; }
459 bool isRecording() const { return ConditionalStackState == Recording; }
460 bool isReplaying() const { return ConditionalStackState == Replaying; }
462 ArrayRef<PPConditionalInfo> getStack() const {
463 return ConditionalStack;
466 void doneReplaying() {
467 ConditionalStack.clear();
468 ConditionalStackState = Off;
471 void setStack(ArrayRef<PPConditionalInfo> s) {
472 if (!isRecording() && !isReplaying())
474 ConditionalStack.clear();
475 ConditionalStack.append(s.begin(), s.end());
478 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
480 bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
482 void clearSkipInfo() { SkipInfo.reset(); }
484 llvm::Optional<PreambleSkipInfo> SkipInfo;
487 SmallVector<PPConditionalInfo, 4> ConditionalStack;
488 State ConditionalStackState = Off;
489 } PreambleConditionalStack;
491 /// The current top of the stack that we're lexing from if
492 /// not expanding a macro and we are lexing directly from source code.
494 /// Only one of CurLexer, or CurTokenLexer will be non-null.
495 std::unique_ptr<Lexer> CurLexer;
497 /// The current top of the stack what we're lexing from
498 /// if not expanding a macro.
500 /// This is an alias for CurLexer.
501 PreprocessorLexer *CurPPLexer = nullptr;
503 /// Used to find the current FileEntry, if CurLexer is non-null
504 /// and if applicable.
506 /// This allows us to implement \#include_next and find directory-specific
508 const DirectoryLookup *CurDirLookup = nullptr;
510 /// The current macro we are expanding, if we are expanding a macro.
512 /// One of CurLexer and CurTokenLexer must be null.
513 std::unique_ptr<TokenLexer> CurTokenLexer;
515 /// The kind of lexer we're currently working with.
520 CLK_LexAfterModuleImport
521 } CurLexerKind = CLK_Lexer;
523 /// If the current lexer is for a submodule that is being built, this
524 /// is that submodule.
525 Module *CurLexerSubmodule = nullptr;
527 /// Keeps track of the stack of files currently
528 /// \#included, and macros currently being expanded from, not counting
529 /// CurLexer/CurTokenLexer.
530 struct IncludeStackInfo {
531 enum CurLexerKind CurLexerKind;
532 Module *TheSubmodule;
533 std::unique_ptr<Lexer> TheLexer;
534 PreprocessorLexer *ThePPLexer;
535 std::unique_ptr<TokenLexer> TheTokenLexer;
536 const DirectoryLookup *TheDirLookup;
538 // The following constructors are completely useless copies of the default
539 // versions, only needed to pacify MSVC.
540 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
541 std::unique_ptr<Lexer> &&TheLexer,
542 PreprocessorLexer *ThePPLexer,
543 std::unique_ptr<TokenLexer> &&TheTokenLexer,
544 const DirectoryLookup *TheDirLookup)
545 : CurLexerKind(std::move(CurLexerKind)),
546 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
547 ThePPLexer(std::move(ThePPLexer)),
548 TheTokenLexer(std::move(TheTokenLexer)),
549 TheDirLookup(std::move(TheDirLookup)) {}
551 std::vector<IncludeStackInfo> IncludeMacroStack;
553 /// Actions invoked when some preprocessor activity is
554 /// encountered (e.g. a file is \#included, etc).
555 std::unique_ptr<PPCallbacks> Callbacks;
557 struct MacroExpandsInfo {
562 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
563 : Tok(Tok), MD(MD), Range(Range) {}
565 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
567 /// Information about a name that has been used to define a module macro.
568 struct ModuleMacroInfo {
569 /// The most recent macro directive for this identifier.
572 /// The active module macros for this identifier.
573 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
575 /// The generation number at which we last updated ActiveModuleMacros.
576 /// \see Preprocessor::VisibleModules.
577 unsigned ActiveModuleMacrosGeneration = 0;
579 /// Whether this macro name is ambiguous.
580 bool IsAmbiguous = false;
582 /// The module macros that are overridden by this macro.
583 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
585 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
588 /// The state of a macro for an identifier.
590 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
592 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
593 const IdentifierInfo *II) const {
594 if (II->isOutOfDate())
595 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
596 // FIXME: Find a spare bit on IdentifierInfo and store a
597 // HasModuleMacros flag.
598 if (!II->hasMacroDefinition() ||
599 (!PP.getLangOpts().Modules &&
600 !PP.getLangOpts().ModulesLocalVisibility) ||
601 !PP.CurSubmoduleState->VisibleModules.getGeneration())
604 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
606 Info = new (PP.getPreprocessorAllocator())
607 ModuleMacroInfo(State.get<MacroDirective *>());
611 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
612 Info->ActiveModuleMacrosGeneration)
613 PP.updateModuleMacroInfo(II, *Info);
618 MacroState() : MacroState(nullptr) {}
619 MacroState(MacroDirective *MD) : State(MD) {}
621 MacroState(MacroState &&O) noexcept : State(O.State) {
622 O.State = (MacroDirective *)nullptr;
625 MacroState &operator=(MacroState &&O) noexcept {
627 O.State = (MacroDirective *)nullptr;
633 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
634 Info->~ModuleMacroInfo();
637 MacroDirective *getLatest() const {
638 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
640 return State.get<MacroDirective*>();
643 void setLatest(MacroDirective *MD) {
644 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
650 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
651 auto *Info = getModuleInfo(PP, II);
652 return Info ? Info->IsAmbiguous : false;
655 ArrayRef<ModuleMacro *>
656 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
657 if (auto *Info = getModuleInfo(PP, II))
658 return Info->ActiveModuleMacros;
662 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
663 SourceManager &SourceMgr) const {
664 // FIXME: Incorporate module macros into the result of this.
665 if (auto *Latest = getLatest())
666 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
670 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
671 if (auto *Info = getModuleInfo(PP, II)) {
672 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
673 Info->ActiveModuleMacros.begin(),
674 Info->ActiveModuleMacros.end());
675 Info->ActiveModuleMacros.clear();
676 Info->IsAmbiguous = false;
680 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
681 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
682 return Info->OverriddenMacros;
686 void setOverriddenMacros(Preprocessor &PP,
687 ArrayRef<ModuleMacro *> Overrides) {
688 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
690 if (Overrides.empty())
692 Info = new (PP.getPreprocessorAllocator())
693 ModuleMacroInfo(State.get<MacroDirective *>());
696 Info->OverriddenMacros.clear();
697 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
698 Overrides.begin(), Overrides.end());
699 Info->ActiveModuleMacrosGeneration = 0;
703 /// For each IdentifierInfo that was associated with a macro, we
704 /// keep a mapping to the history of all macro definitions and #undefs in
705 /// the reverse order (the latest one is in the head of the list).
707 /// This mapping lives within the \p CurSubmoduleState.
708 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
710 struct SubmoduleState;
712 /// Information about a submodule that we're currently building.
713 struct BuildingSubmoduleInfo {
714 /// The module that we are building.
717 /// The location at which the module was included.
718 SourceLocation ImportLoc;
720 /// Whether we entered this submodule via a pragma.
723 /// The previous SubmoduleState.
724 SubmoduleState *OuterSubmoduleState;
726 /// The number of pending module macro names when we started building this.
727 unsigned OuterPendingModuleMacroNames;
729 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
730 SubmoduleState *OuterSubmoduleState,
731 unsigned OuterPendingModuleMacroNames)
732 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
733 OuterSubmoduleState(OuterSubmoduleState),
734 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
736 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
738 /// Information about a submodule's preprocessor state.
739 struct SubmoduleState {
740 /// The macros for the submodule.
743 /// The set of modules that are visible within the submodule.
744 VisibleModuleSet VisibleModules;
746 // FIXME: CounterValue?
747 // FIXME: PragmaPushMacroInfo?
749 std::map<Module *, SubmoduleState> Submodules;
751 /// The preprocessor state for preprocessing outside of any submodule.
752 SubmoduleState NullSubmoduleState;
754 /// The current submodule state. Will be \p NullSubmoduleState if we're not
756 SubmoduleState *CurSubmoduleState;
758 /// The set of known macros exported from modules.
759 llvm::FoldingSet<ModuleMacro> ModuleMacros;
761 /// The names of potential module macros that we've not yet processed.
762 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
764 /// The list of module macros, for each identifier, that are not overridden by
765 /// any other module macro.
766 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
769 /// Macros that we want to warn because they are not used at the end
770 /// of the translation unit.
772 /// We store just their SourceLocations instead of
773 /// something like MacroInfo*. The benefit of this is that when we are
774 /// deserializing from PCH, we don't need to deserialize identifier & macros
775 /// just so that we can report that they are unused, we just warn using
776 /// the SourceLocations of this set (that will be filled by the ASTReader).
777 /// We are using SmallPtrSet instead of a vector for faster removal.
778 using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
779 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
781 /// A "freelist" of MacroArg objects that can be
782 /// reused for quick allocation.
783 MacroArgs *MacroArgCache = nullptr;
785 /// For each IdentifierInfo used in a \#pragma push_macro directive,
786 /// we keep a MacroInfo stack used to restore the previous macro value.
787 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
790 // Various statistics we track for performance analysis.
791 unsigned NumDirectives = 0;
792 unsigned NumDefined = 0;
793 unsigned NumUndefined = 0;
794 unsigned NumPragma = 0;
796 unsigned NumElse = 0;
797 unsigned NumEndif = 0;
798 unsigned NumEnteredSourceFiles = 0;
799 unsigned MaxIncludeStackDepth = 0;
800 unsigned NumMacroExpanded = 0;
801 unsigned NumFnMacroExpanded = 0;
802 unsigned NumBuiltinMacroExpanded = 0;
803 unsigned NumFastMacroExpanded = 0;
804 unsigned NumTokenPaste = 0;
805 unsigned NumFastTokenPaste = 0;
806 unsigned NumSkipped = 0;
808 /// The predefined macros that preprocessor should use from the
809 /// command line etc.
810 std::string Predefines;
812 /// The file ID for the preprocessor predefines.
813 FileID PredefinesFileID;
815 /// The file ID for the PCH through header.
816 FileID PCHThroughHeaderFileID;
818 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
819 bool SkippingUntilPragmaHdrStop = false;
821 /// Whether tokens are being skipped until the through header is seen.
822 bool SkippingUntilPCHThroughHeader = false;
825 /// Cache of macro expanders to reduce malloc traffic.
826 enum { TokenLexerCacheSize = 8 };
827 unsigned NumCachedTokenLexers;
828 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
831 /// Keeps macro expanded tokens for TokenLexers.
833 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
834 /// going to lex in the cache and when it finishes the tokens are removed
835 /// from the end of the cache.
836 SmallVector<Token, 16> MacroExpandedTokens;
837 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
839 /// A record of the macro definitions and expansions that
840 /// occurred during preprocessing.
842 /// This is an optional side structure that can be enabled with
843 /// \c createPreprocessingRecord() prior to preprocessing.
844 PreprocessingRecord *Record = nullptr;
846 /// Cached tokens state.
847 using CachedTokensTy = SmallVector<Token, 1>;
849 /// Cached tokens are stored here when we do backtracking or
850 /// lookahead. They are "lexed" by the CachingLex() method.
851 CachedTokensTy CachedTokens;
853 /// The position of the cached token that CachingLex() should
856 /// If it points beyond the CachedTokens vector, it means that a normal
857 /// Lex() should be invoked.
858 CachedTokensTy::size_type CachedLexPos = 0;
860 /// Stack of backtrack positions, allowing nested backtracks.
862 /// The EnableBacktrackAtThisPos() method pushes a position to
863 /// indicate where CachedLexPos should be set when the BackTrack() method is
864 /// invoked (at which point the last position is popped).
865 std::vector<CachedTokensTy::size_type> BacktrackPositions;
867 struct MacroInfoChain {
869 MacroInfoChain *Next;
872 /// MacroInfos are managed as a chain for easy disposal. This is the head
874 MacroInfoChain *MIChainHead = nullptr;
876 void updateOutOfDateIdentifier(IdentifierInfo &II) const;
879 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
880 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
881 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
882 IdentifierInfoLookup *IILookup = nullptr,
883 bool OwnsHeaderSearch = false,
884 TranslationUnitKind TUKind = TU_Complete);
888 /// Initialize the preprocessor using information about the target.
890 /// \param Target is owned by the caller and must remain valid for the
891 /// lifetime of the preprocessor.
892 /// \param AuxTarget is owned by the caller and must remain valid for
893 /// the lifetime of the preprocessor.
894 void Initialize(const TargetInfo &Target,
895 const TargetInfo *AuxTarget = nullptr);
897 /// Initialize the preprocessor to parse a model file
899 /// To parse model files the preprocessor of the original source is reused to
900 /// preserver the identifier table. However to avoid some duplicate
901 /// information in the preprocessor some cleanup is needed before it is used
902 /// to parse model files. This method does that cleanup.
903 void InitializeForModelFile();
905 /// Cleanup after model file parsing
906 void FinalizeForModelFile();
908 /// Retrieve the preprocessor options used to initialize this
910 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
912 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
913 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
915 const LangOptions &getLangOpts() const { return LangOpts; }
916 const TargetInfo &getTargetInfo() const { return *Target; }
917 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
918 FileManager &getFileManager() const { return FileMgr; }
919 SourceManager &getSourceManager() const { return SourceMgr; }
920 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
922 IdentifierTable &getIdentifierTable() { return Identifiers; }
923 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
924 SelectorTable &getSelectorTable() { return Selectors; }
925 Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
926 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
928 void setExternalSource(ExternalPreprocessorSource *Source) {
929 ExternalSource = Source;
932 ExternalPreprocessorSource *getExternalSource() const {
933 return ExternalSource;
936 /// Retrieve the module loader associated with this preprocessor.
937 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
939 bool hadModuleLoaderFatalFailure() const {
940 return TheModuleLoader.HadFatalFailure;
943 /// Retrieve the number of Directives that have been processed by the
945 unsigned getNumDirectives() const {
946 return NumDirectives;
949 /// True if we are currently preprocessing a #if or #elif directive
950 bool isParsingIfOrElifDirective() const {
951 return ParsingIfOrElifDirective;
954 /// Control whether the preprocessor retains comments in output.
955 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
956 this->KeepComments = KeepComments | KeepMacroComments;
957 this->KeepMacroComments = KeepMacroComments;
960 bool getCommentRetentionState() const { return KeepComments; }
962 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
963 bool getPragmasEnabled() const { return PragmasEnabled; }
965 void SetSuppressIncludeNotFoundError(bool Suppress) {
966 SuppressIncludeNotFoundError = Suppress;
969 bool GetSuppressIncludeNotFoundError() {
970 return SuppressIncludeNotFoundError;
973 /// Sets whether the preprocessor is responsible for producing output or if
974 /// it is producing tokens to be consumed by Parse and Sema.
975 void setPreprocessedOutput(bool IsPreprocessedOutput) {
976 PreprocessedOutput = IsPreprocessedOutput;
979 /// Returns true if the preprocessor is responsible for generating output,
980 /// false if it is producing tokens to be consumed by Parse and Sema.
981 bool isPreprocessedOutput() const { return PreprocessedOutput; }
983 /// Return true if we are lexing directly from the specified lexer.
984 bool isCurrentLexer(const PreprocessorLexer *L) const {
985 return CurPPLexer == L;
988 /// Return the current lexer being lexed from.
990 /// Note that this ignores any potentially active macro expansions and _Pragma
991 /// expansions going on at the time.
992 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
994 /// Return the current file lexer being lexed from.
996 /// Note that this ignores any potentially active macro expansions and _Pragma
997 /// expansions going on at the time.
998 PreprocessorLexer *getCurrentFileLexer() const;
1000 /// Return the submodule owning the file being lexed. This may not be
1001 /// the current module if we have changed modules since entering the file.
1002 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1004 /// Returns the FileID for the preprocessor predefines.
1005 FileID getPredefinesFileID() const { return PredefinesFileID; }
1008 /// Accessors for preprocessor callbacks.
1010 /// Note that this class takes ownership of any PPCallbacks object given to
1012 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1013 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1015 C = std::make_unique<PPChainedCallbacks>(std::move(C),
1016 std::move(Callbacks));
1017 Callbacks = std::move(C);
1021 /// Get the number of tokens processed so far.
1022 unsigned getTokenCount() const { return TokenCount; }
1024 /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1025 unsigned getMaxTokens() const { return MaxTokens; }
1027 void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1029 MaxTokensOverrideLoc = Loc;
1032 SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1034 /// Register a function that would be called on each token in the final
1035 /// expanded token stream.
1036 /// This also reports annotation tokens produced by the parser.
1037 void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1038 OnToken = std::move(F);
1041 bool isMacroDefined(StringRef Id) {
1042 return isMacroDefined(&Identifiers.get(Id));
1044 bool isMacroDefined(const IdentifierInfo *II) {
1045 return II->hasMacroDefinition() &&
1046 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1049 /// Determine whether II is defined as a macro within the module M,
1050 /// if that is a module that we've already preprocessed. Does not check for
1051 /// macros imported into M.
1052 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1053 if (!II->hasMacroDefinition())
1055 auto I = Submodules.find(M);
1056 if (I == Submodules.end())
1058 auto J = I->second.Macros.find(II);
1059 if (J == I->second.Macros.end())
1061 auto *MD = J->second.getLatest();
1062 return MD && MD->isDefined();
1065 MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1066 if (!II->hasMacroDefinition())
1069 MacroState &S = CurSubmoduleState->Macros[II];
1070 auto *MD = S.getLatest();
1071 while (MD && isa<VisibilityMacroDirective>(MD))
1072 MD = MD->getPrevious();
1073 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1074 S.getActiveModuleMacros(*this, II),
1075 S.isAmbiguous(*this, II));
1078 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1079 SourceLocation Loc) {
1080 if (!II->hadMacroDefinition())
1083 MacroState &S = CurSubmoduleState->Macros[II];
1084 MacroDirective::DefInfo DI;
1085 if (auto *MD = S.getLatest())
1086 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1087 // FIXME: Compute the set of active module macros at the specified location.
1088 return MacroDefinition(DI.getDirective(),
1089 S.getActiveModuleMacros(*this, II),
1090 S.isAmbiguous(*this, II));
1093 /// Given an identifier, return its latest non-imported MacroDirective
1094 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1095 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1096 if (!II->hasMacroDefinition())
1099 auto *MD = getLocalMacroDirectiveHistory(II);
1100 if (!MD || MD->getDefinition().isUndefined())
1106 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1107 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1110 MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1111 if (!II->hasMacroDefinition())
1113 if (auto MD = getMacroDefinition(II))
1114 return MD.getMacroInfo();
1118 /// Given an identifier, return the latest non-imported macro
1119 /// directive for that identifier.
1121 /// One can iterate over all previous macro directives from the most recent
1123 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1125 /// Add a directive to the macro directive history for this identifier.
1126 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1127 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1128 SourceLocation Loc) {
1129 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1130 appendMacroDirective(II, MD);
1133 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1135 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1138 /// Set a MacroDirective that was loaded from a PCH file.
1139 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1140 MacroDirective *MD);
1142 /// Register an exported macro for a module and identifier.
1143 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1144 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1145 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
1147 /// Get the list of leaf (non-overridden) module macros for a name.
1148 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1149 if (II->isOutOfDate())
1150 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1151 auto I = LeafModuleMacros.find(II);
1152 if (I != LeafModuleMacros.end())
1158 /// Iterators for the macro history table. Currently defined macros have
1159 /// IdentifierInfo::hasMacroDefinition() set and an empty
1160 /// MacroInfo::getUndefLoc() at the head of the list.
1161 using macro_iterator = MacroMap::const_iterator;
1163 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1164 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1166 llvm::iterator_range<macro_iterator>
1167 macros(bool IncludeExternalMacros = true) const {
1168 macro_iterator begin = macro_begin(IncludeExternalMacros);
1169 macro_iterator end = macro_end(IncludeExternalMacros);
1170 return llvm::make_range(begin, end);
1175 /// Return the name of the macro defined before \p Loc that has
1176 /// spelling \p Tokens. If there are multiple macros with same spelling,
1177 /// return the last one defined.
1178 StringRef getLastMacroWithSpelling(SourceLocation Loc,
1179 ArrayRef<TokenValue> Tokens) const;
1181 const std::string &getPredefines() const { return Predefines; }
1183 /// Set the predefines for this Preprocessor.
1185 /// These predefines are automatically injected when parsing the main file.
1186 void setPredefines(const char *P) { Predefines = P; }
1187 void setPredefines(StringRef P) { Predefines = std::string(P); }
1189 /// Return information about the specified preprocessor
1190 /// identifier token.
1191 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1192 return &Identifiers.get(Name);
1195 /// Add the specified pragma handler to this preprocessor.
1197 /// If \p Namespace is non-null, then it is a token required to exist on the
1198 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1199 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1200 void AddPragmaHandler(PragmaHandler *Handler) {
1201 AddPragmaHandler(StringRef(), Handler);
1204 /// Remove the specific pragma handler from this preprocessor.
1206 /// If \p Namespace is non-null, then it should be the namespace that
1207 /// \p Handler was added to. It is an error to remove a handler that
1208 /// has not been registered.
1209 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1210 void RemovePragmaHandler(PragmaHandler *Handler) {
1211 RemovePragmaHandler(StringRef(), Handler);
1214 /// Install empty handlers for all pragmas (making them ignored).
1215 void IgnorePragmas();
1217 /// Add the specified comment handler to the preprocessor.
1218 void addCommentHandler(CommentHandler *Handler);
1220 /// Remove the specified comment handler.
1222 /// It is an error to remove a handler that has not been registered.
1223 void removeCommentHandler(CommentHandler *Handler);
1225 /// Set the code completion handler to the given object.
1226 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1227 CodeComplete = &Handler;
1230 /// Retrieve the current code-completion handler.
1231 CodeCompletionHandler *getCodeCompletionHandler() const {
1232 return CodeComplete;
1235 /// Clear out the code completion handler.
1236 void clearCodeCompletionHandler() {
1237 CodeComplete = nullptr;
1240 /// Hook used by the lexer to invoke the "included file" code
1241 /// completion point.
1242 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1244 /// Hook used by the lexer to invoke the "natural language" code
1245 /// completion point.
1246 void CodeCompleteNaturalLanguage();
1248 /// Set the code completion token for filtering purposes.
1249 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1250 CodeCompletionII = Filter;
1253 /// Set the code completion token range for detecting replacement range later
1255 void setCodeCompletionTokenRange(const SourceLocation Start,
1256 const SourceLocation End) {
1257 CodeCompletionTokenRange = {Start, End};
1259 SourceRange getCodeCompletionTokenRange() const {
1260 return CodeCompletionTokenRange;
1263 /// Get the code completion token for filtering purposes.
1264 StringRef getCodeCompletionFilter() {
1265 if (CodeCompletionII)
1266 return CodeCompletionII->getName();
1270 /// Retrieve the preprocessing record, or NULL if there is no
1271 /// preprocessing record.
1272 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1274 /// Create a new preprocessing record, which will keep track of
1275 /// all macro expansions, macro definitions, etc.
1276 void createPreprocessingRecord();
1278 /// Returns true if the FileEntry is the PCH through header.
1279 bool isPCHThroughHeader(const FileEntry *FE);
1281 /// True if creating a PCH with a through header.
1282 bool creatingPCHWithThroughHeader();
1284 /// True if using a PCH with a through header.
1285 bool usingPCHWithThroughHeader();
1287 /// True if creating a PCH with a #pragma hdrstop.
1288 bool creatingPCHWithPragmaHdrStop();
1290 /// True if using a PCH with a #pragma hdrstop.
1291 bool usingPCHWithPragmaHdrStop();
1293 /// Skip tokens until after the #include of the through header or
1294 /// until after a #pragma hdrstop.
1295 void SkipTokensWhileUsingPCH();
1297 /// Process directives while skipping until the through header or
1298 /// #pragma hdrstop is found.
1299 void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1300 SourceLocation HashLoc);
1302 /// Enter the specified FileID as the main source file,
1303 /// which implicitly adds the builtin defines etc.
1304 void EnterMainSourceFile();
1306 /// Inform the preprocessor callbacks that processing is complete.
1307 void EndSourceFile();
1309 /// Add a source file to the top of the include stack and
1310 /// start lexing tokens from it instead of the current buffer.
1312 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1313 bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1314 SourceLocation Loc);
1316 /// Add a Macro to the top of the include stack and start lexing
1317 /// tokens from it instead of the current buffer.
1319 /// \param Args specifies the tokens input to a function-like macro.
1320 /// \param ILEnd specifies the location of the ')' for a function-like macro
1321 /// or the identifier for an object-like macro.
1322 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1326 /// Add a "macro" context to the top of the include stack,
1327 /// which will cause the lexer to start returning the specified tokens.
1329 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1330 /// will not be subject to further macro expansion. Otherwise, these tokens
1331 /// will be re-macro-expanded when/if expansion is enabled.
1333 /// If \p OwnsTokens is false, this method assumes that the specified stream
1334 /// of tokens has a permanent owner somewhere, so they do not need to be
1335 /// copied. If it is true, it assumes the array of tokens is allocated with
1336 /// \c new[] and the Preprocessor will delete[] it.
1338 /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1339 /// set, see the flag documentation for details.
1340 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1341 bool DisableMacroExpansion, bool OwnsTokens,
1345 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1346 bool DisableMacroExpansion, bool IsReinject) {
1347 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1351 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1353 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1357 /// Pop the current lexer/macro exp off the top of the lexer stack.
1359 /// This should only be used in situations where the current state of the
1360 /// top-of-stack lexer is known.
1361 void RemoveTopOfLexerStack();
1363 /// From the point that this method is called, and until
1364 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1365 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1366 /// make the Preprocessor re-lex the same tokens.
1368 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1369 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1370 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1372 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1373 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1374 /// tokens will continue indefinitely.
1376 void EnableBacktrackAtThisPos();
1378 /// Disable the last EnableBacktrackAtThisPos call.
1379 void CommitBacktrackedTokens();
1381 /// Make Preprocessor re-lex the tokens that were lexed since
1382 /// EnableBacktrackAtThisPos() was previously called.
1385 /// True if EnableBacktrackAtThisPos() was called and
1386 /// caching of tokens is on.
1387 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1389 /// Lex the next token for this preprocessor.
1390 void Lex(Token &Result);
1392 /// Lex a token, forming a header-name token if possible.
1393 bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1395 bool LexAfterModuleImport(Token &Result);
1396 void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1398 void makeModuleVisible(Module *M, SourceLocation Loc);
1400 SourceLocation getModuleImportLoc(Module *M) const {
1401 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1404 /// Lex a string literal, which may be the concatenation of multiple
1405 /// string literals and may even come from macro expansion.
1406 /// \returns true on success, false if a error diagnostic has been generated.
1407 bool LexStringLiteral(Token &Result, std::string &String,
1408 const char *DiagnosticTag, bool AllowMacroExpansion) {
1409 if (AllowMacroExpansion)
1412 LexUnexpandedToken(Result);
1413 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1414 AllowMacroExpansion);
1417 /// Complete the lexing of a string literal where the first token has
1418 /// already been lexed (see LexStringLiteral).
1419 bool FinishLexStringLiteral(Token &Result, std::string &String,
1420 const char *DiagnosticTag,
1421 bool AllowMacroExpansion);
1423 /// Lex a token. If it's a comment, keep lexing until we get
1424 /// something not a comment.
1426 /// This is useful in -E -C mode where comments would foul up preprocessor
1427 /// directive handling.
1428 void LexNonComment(Token &Result) {
1431 while (Result.getKind() == tok::comment);
1434 /// Just like Lex, but disables macro expansion of identifier tokens.
1435 void LexUnexpandedToken(Token &Result) {
1436 // Disable macro expansion.
1437 bool OldVal = DisableMacroExpansion;
1438 DisableMacroExpansion = true;
1443 DisableMacroExpansion = OldVal;
1446 /// Like LexNonComment, but this disables macro expansion of
1447 /// identifier tokens.
1448 void LexUnexpandedNonComment(Token &Result) {
1450 LexUnexpandedToken(Result);
1451 while (Result.getKind() == tok::comment);
1454 /// Parses a simple integer literal to get its numeric value. Floating
1455 /// point literals and user defined literals are rejected. Used primarily to
1456 /// handle pragmas that accept integer arguments.
1457 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1459 /// Disables macro expansion everywhere except for preprocessor directives.
1460 void SetMacroExpansionOnlyInDirectives() {
1461 DisableMacroExpansion = true;
1462 MacroExpansionInDirectivesOverride = true;
1465 /// Peeks ahead N tokens and returns that token without consuming any
1468 /// LookAhead(0) returns the next token that would be returned by Lex(),
1469 /// LookAhead(1) returns the token after it, etc. This returns normal
1470 /// tokens after phase 5. As such, it is equivalent to using
1471 /// 'Lex', not 'LexUnexpandedToken'.
1472 const Token &LookAhead(unsigned N) {
1473 assert(LexLevel == 0 && "cannot use lookahead while lexing");
1474 if (CachedLexPos + N < CachedTokens.size())
1475 return CachedTokens[CachedLexPos+N];
1477 return PeekAhead(N+1);
1480 /// When backtracking is enabled and tokens are cached,
1481 /// this allows to revert a specific number of tokens.
1483 /// Note that the number of tokens being reverted should be up to the last
1484 /// backtrack position, not more.
1485 void RevertCachedTokens(unsigned N) {
1486 assert(isBacktrackEnabled() &&
1487 "Should only be called when tokens are cached for backtracking");
1488 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1489 && "Should revert tokens up to the last backtrack position, not more");
1490 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1491 "Corrupted backtrack positions ?");
1495 /// Enters a token in the token stream to be lexed next.
1497 /// If BackTrack() is called afterwards, the token will remain at the
1498 /// insertion point.
1499 /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1500 /// flag set. See the flag documentation for details.
1501 void EnterToken(const Token &Tok, bool IsReinject) {
1503 // It's not correct in general to enter caching lex mode while in the
1504 // middle of a nested lexing action.
1505 auto TokCopy = std::make_unique<Token[]>(1);
1507 EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1509 EnterCachingLexMode();
1510 assert(IsReinject && "new tokens in the middle of cached stream");
1511 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1515 /// We notify the Preprocessor that if it is caching tokens (because
1516 /// backtrack is enabled) it should replace the most recent cached tokens
1517 /// with the given annotation token. This function has no effect if
1518 /// backtracking is not enabled.
1520 /// Note that the use of this function is just for optimization, so that the
1521 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1523 void AnnotateCachedTokens(const Token &Tok) {
1524 assert(Tok.isAnnotation() && "Expected annotation token");
1525 if (CachedLexPos != 0 && isBacktrackEnabled())
1526 AnnotatePreviousCachedTokens(Tok);
1529 /// Get the location of the last cached token, suitable for setting the end
1530 /// location of an annotation token.
1531 SourceLocation getLastCachedTokenLocation() const {
1532 assert(CachedLexPos != 0);
1533 return CachedTokens[CachedLexPos-1].getLastLoc();
1536 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1538 bool IsPreviousCachedToken(const Token &Tok) const;
1540 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1543 /// Useful when a token needs to be split in smaller ones and CachedTokens
1544 /// most recent token must to be updated to reflect that.
1545 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1547 /// Replace the last token with an annotation token.
1549 /// Like AnnotateCachedTokens(), this routine replaces an
1550 /// already-parsed (and resolved) token with an annotation
1551 /// token. However, this routine only replaces the last token with
1552 /// the annotation token; it does not affect any other cached
1553 /// tokens. This function has no effect if backtracking is not
1555 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1556 assert(Tok.isAnnotation() && "Expected annotation token");
1557 if (CachedLexPos != 0 && isBacktrackEnabled())
1558 CachedTokens[CachedLexPos-1] = Tok;
1561 /// Enter an annotation token into the token stream.
1562 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1563 void *AnnotationVal);
1565 /// Determine whether it's possible for a future call to Lex to produce an
1566 /// annotation token created by a previous call to EnterAnnotationToken.
1567 bool mightHavePendingAnnotationTokens() {
1568 return CurLexerKind != CLK_Lexer;
1571 /// Update the current token to represent the provided
1572 /// identifier, in order to cache an action performed by typo correction.
1573 void TypoCorrectToken(const Token &Tok) {
1574 assert(Tok.getIdentifierInfo() && "Expected identifier token");
1575 if (CachedLexPos != 0 && isBacktrackEnabled())
1576 CachedTokens[CachedLexPos-1] = Tok;
1579 /// Recompute the current lexer kind based on the CurLexer/
1580 /// CurTokenLexer pointers.
1581 void recomputeCurLexerKind();
1583 /// Returns true if incremental processing is enabled
1584 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1586 /// Enables the incremental processing
1587 void enableIncrementalProcessing(bool value = true) {
1588 IncrementalProcessing = value;
1591 /// Specify the point at which code-completion will be performed.
1593 /// \param File the file in which code completion should occur. If
1594 /// this file is included multiple times, code-completion will
1595 /// perform completion the first time it is included. If NULL, this
1596 /// function clears out the code-completion point.
1598 /// \param Line the line at which code completion should occur
1601 /// \param Column the column at which code completion should occur
1604 /// \returns true if an error occurred, false otherwise.
1605 bool SetCodeCompletionPoint(const FileEntry *File,
1606 unsigned Line, unsigned Column);
1608 /// Determine if we are performing code completion.
1609 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1611 /// Returns the location of the code-completion point.
1613 /// Returns an invalid location if code-completion is not enabled or the file
1614 /// containing the code-completion point has not been lexed yet.
1615 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1617 /// Returns the start location of the file of code-completion point.
1619 /// Returns an invalid location if code-completion is not enabled or the file
1620 /// containing the code-completion point has not been lexed yet.
1621 SourceLocation getCodeCompletionFileLoc() const {
1622 return CodeCompletionFileLoc;
1625 /// Returns true if code-completion is enabled and we have hit the
1626 /// code-completion point.
1627 bool isCodeCompletionReached() const { return CodeCompletionReached; }
1629 /// Note that we hit the code-completion point.
1630 void setCodeCompletionReached() {
1631 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1632 CodeCompletionReached = true;
1633 // Silence any diagnostics that occur after we hit the code-completion.
1634 getDiagnostics().setSuppressAllDiagnostics(true);
1637 /// The location of the currently-active \#pragma clang
1638 /// arc_cf_code_audited begin.
1640 /// Returns an invalid location if there is no such pragma active.
1641 std::pair<IdentifierInfo *, SourceLocation>
1642 getPragmaARCCFCodeAuditedInfo() const {
1643 return PragmaARCCFCodeAuditedInfo;
1646 /// Set the location of the currently-active \#pragma clang
1647 /// arc_cf_code_audited begin. An invalid location ends the pragma.
1648 void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
1649 SourceLocation Loc) {
1650 PragmaARCCFCodeAuditedInfo = {Ident, Loc};
1653 /// The location of the currently-active \#pragma clang
1654 /// assume_nonnull begin.
1656 /// Returns an invalid location if there is no such pragma active.
1657 SourceLocation getPragmaAssumeNonNullLoc() const {
1658 return PragmaAssumeNonNullLoc;
1661 /// Set the location of the currently-active \#pragma clang
1662 /// assume_nonnull begin. An invalid location ends the pragma.
1663 void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1664 PragmaAssumeNonNullLoc = Loc;
1667 /// Set the directory in which the main file should be considered
1668 /// to have been found, if it is not a real file.
1669 void setMainFileDir(const DirectoryEntry *Dir) {
1673 /// Instruct the preprocessor to skip part of the main source file.
1675 /// \param Bytes The number of bytes in the preamble to skip.
1677 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1678 /// start of a line.
1679 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1680 SkipMainFilePreamble.first = Bytes;
1681 SkipMainFilePreamble.second = StartOfLine;
1684 /// Forwarding function for diagnostics. This emits a diagnostic at
1685 /// the specified Token's location, translating the token's start
1686 /// position in the current buffer into a SourcePosition object for rendering.
1687 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1688 return Diags->Report(Loc, DiagID);
1691 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1692 return Diags->Report(Tok.getLocation(), DiagID);
1695 /// Return the 'spelling' of the token at the given
1696 /// location; does not go up to the spelling location or down to the
1697 /// expansion location.
1699 /// \param buffer A buffer which will be used only if the token requires
1700 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1701 /// \param invalid If non-null, will be set \c true if an error occurs.
1702 StringRef getSpelling(SourceLocation loc,
1703 SmallVectorImpl<char> &buffer,
1704 bool *invalid = nullptr) const {
1705 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1708 /// Return the 'spelling' of the Tok token.
1710 /// The spelling of a token is the characters used to represent the token in
1711 /// the source file after trigraph expansion and escaped-newline folding. In
1712 /// particular, this wants to get the true, uncanonicalized, spelling of
1713 /// things like digraphs, UCNs, etc.
1715 /// \param Invalid If non-null, will be set \c true if an error occurs.
1716 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1717 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1720 /// Get the spelling of a token into a preallocated buffer, instead
1721 /// of as an std::string.
1723 /// The caller is required to allocate enough space for the token, which is
1724 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1725 /// actual result is returned.
1727 /// Note that this method may do two possible things: it may either fill in
1728 /// the buffer specified with characters, or it may *change the input pointer*
1729 /// to point to a constant buffer with the data already in it (avoiding a
1730 /// copy). The caller is not allowed to modify the returned buffer pointer
1731 /// if an internal buffer is returned.
1732 unsigned getSpelling(const Token &Tok, const char *&Buffer,
1733 bool *Invalid = nullptr) const {
1734 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1737 /// Get the spelling of a token into a SmallVector.
1739 /// Note that the returned StringRef may not point to the
1740 /// supplied buffer if a copy can be avoided.
1741 StringRef getSpelling(const Token &Tok,
1742 SmallVectorImpl<char> &Buffer,
1743 bool *Invalid = nullptr) const;
1745 /// Relex the token at the specified location.
1746 /// \returns true if there was a failure, false on success.
1747 bool getRawToken(SourceLocation Loc, Token &Result,
1748 bool IgnoreWhiteSpace = false) {
1749 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1752 /// Given a Token \p Tok that is a numeric constant with length 1,
1753 /// return the character.
1755 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1756 bool *Invalid = nullptr) const {
1757 assert(Tok.is(tok::numeric_constant) &&
1758 Tok.getLength() == 1 && "Called on unsupported token");
1759 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1761 // If the token is carrying a literal data pointer, just use it.
1762 if (const char *D = Tok.getLiteralData())
1765 // Otherwise, fall back on getCharacterData, which is slower, but always
1767 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1770 /// Retrieve the name of the immediate macro expansion.
1772 /// This routine starts from a source location, and finds the name of the
1773 /// macro responsible for its immediate expansion. It looks through any
1774 /// intervening macro argument expansions to compute this. It returns a
1775 /// StringRef that refers to the SourceManager-owned buffer of the source
1776 /// where that macro name is spelled. Thus, the result shouldn't out-live
1777 /// the SourceManager.
1778 StringRef getImmediateMacroName(SourceLocation Loc) {
1779 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1782 /// Plop the specified string into a scratch buffer and set the
1783 /// specified token's location and length to it.
1785 /// If specified, the source location provides a location of the expansion
1786 /// point of the token.
1787 void CreateString(StringRef Str, Token &Tok,
1788 SourceLocation ExpansionLocStart = SourceLocation(),
1789 SourceLocation ExpansionLocEnd = SourceLocation());
1791 /// Split the first Length characters out of the token starting at TokLoc
1792 /// and return a location pointing to the split token. Re-lexing from the
1793 /// split token will return the split token rather than the original.
1794 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1796 /// Computes the source location just past the end of the
1797 /// token at this source location.
1799 /// This routine can be used to produce a source location that
1800 /// points just past the end of the token referenced by \p Loc, and
1801 /// is generally used when a diagnostic needs to point just after a
1802 /// token where it expected something different that it received. If
1803 /// the returned source location would not be meaningful (e.g., if
1804 /// it points into a macro), this routine returns an invalid
1805 /// source location.
1807 /// \param Offset an offset from the end of the token, where the source
1808 /// location should refer to. The default offset (0) produces a source
1809 /// location pointing just past the end of the token; an offset of 1 produces
1810 /// a source location pointing to the last character in the token, etc.
1811 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1812 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1815 /// Returns true if the given MacroID location points at the first
1816 /// token of the macro expansion.
1818 /// \param MacroBegin If non-null and function returns true, it is set to
1819 /// begin location of the macro.
1820 bool isAtStartOfMacroExpansion(SourceLocation loc,
1821 SourceLocation *MacroBegin = nullptr) const {
1822 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1826 /// Returns true if the given MacroID location points at the last
1827 /// token of the macro expansion.
1829 /// \param MacroEnd If non-null and function returns true, it is set to
1830 /// end location of the macro.
1831 bool isAtEndOfMacroExpansion(SourceLocation loc,
1832 SourceLocation *MacroEnd = nullptr) const {
1833 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1836 /// Print the token to stderr, used for debugging.
1837 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1838 void DumpLocation(SourceLocation Loc) const;
1839 void DumpMacro(const MacroInfo &MI) const;
1840 void dumpMacroInfo(const IdentifierInfo *II);
1842 /// Given a location that specifies the start of a
1843 /// token, return a new location that specifies a character within the token.
1844 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1845 unsigned Char) const {
1846 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1849 /// Increment the counters for the number of token paste operations
1852 /// If fast was specified, this is a 'fast paste' case we handled.
1853 void IncrementPasteCounter(bool isFast) {
1855 ++NumFastTokenPaste;
1862 size_t getTotalMemory() const;
1864 /// When the macro expander pastes together a comment (/##/) in Microsoft
1865 /// mode, this method handles updating the current state, returning the
1866 /// token on the next source line.
1867 void HandleMicrosoftCommentPaste(Token &Tok);
1869 //===--------------------------------------------------------------------===//
1870 // Preprocessor callback methods. These are invoked by a lexer as various
1871 // directives and events are found.
1873 /// Given a tok::raw_identifier token, look up the
1874 /// identifier information for the token and install it into the token,
1875 /// updating the token kind accordingly.
1876 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1879 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1882 /// Specifies the reason for poisoning an identifier.
1884 /// If that identifier is accessed while poisoned, then this reason will be
1885 /// used instead of the default "poisoned" diagnostic.
1886 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1888 /// Display reason for poisoned identifier.
1889 void HandlePoisonedIdentifier(Token & Identifier);
1891 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1892 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1893 if(II->isPoisoned()) {
1894 HandlePoisonedIdentifier(Identifier);
1900 /// Identifiers used for SEH handling in Borland. These are only
1901 /// allowed in particular circumstances
1903 IdentifierInfo *Ident__exception_code,
1904 *Ident___exception_code,
1905 *Ident_GetExceptionCode;
1906 // __except filter expression
1907 IdentifierInfo *Ident__exception_info,
1908 *Ident___exception_info,
1909 *Ident_GetExceptionInfo;
1911 IdentifierInfo *Ident__abnormal_termination,
1912 *Ident___abnormal_termination,
1913 *Ident_AbnormalTermination;
1915 const char *getCurLexerEndPos();
1916 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1919 void PoisonSEHIdentifiers(bool Poison = true); // Borland
1921 /// Callback invoked when the lexer reads an identifier and has
1922 /// filled in the tokens IdentifierInfo member.
1924 /// This callback potentially macro expands it or turns it into a named
1925 /// token (like 'for').
1927 /// \returns true if we actually computed a token, false if we need to
1929 bool HandleIdentifier(Token &Identifier);
1931 /// Callback invoked when the lexer hits the end of the current file.
1933 /// This either returns the EOF token and returns true, or
1934 /// pops a level off the include stack and returns false, at which point the
1935 /// client should call lex again.
1936 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1938 /// Callback invoked when the current TokenLexer hits the end of its
1940 bool HandleEndOfTokenLexer(Token &Result);
1942 /// Callback invoked when the lexer sees a # token at the start of a
1945 /// This consumes the directive, modifies the lexer/preprocessor state, and
1946 /// advances the lexer(s) so that the next token read is the correct one.
1947 void HandleDirective(Token &Result);
1949 /// Ensure that the next token is a tok::eod token.
1951 /// If not, emit a diagnostic and consume up until the eod.
1952 /// If \p EnableMacros is true, then we consider macros that expand to zero
1953 /// tokens as being ok.
1955 /// \return The location of the end of the directive (the terminating
1957 SourceLocation CheckEndOfDirective(const char *DirType,
1958 bool EnableMacros = false);
1960 /// Read and discard all tokens remaining on the current line until
1961 /// the tok::eod token is found. Returns the range of the skipped tokens.
1962 SourceRange DiscardUntilEndOfDirective();
1964 /// Returns true if the preprocessor has seen a use of
1965 /// __DATE__ or __TIME__ in the file so far.
1966 bool SawDateOrTime() const {
1967 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1969 unsigned getCounterValue() const { return CounterValue; }
1970 void setCounterValue(unsigned V) { CounterValue = V; }
1972 /// Retrieves the module that we're currently building, if any.
1973 Module *getCurrentModule();
1975 /// Allocate a new MacroInfo object with the provided SourceLocation.
1976 MacroInfo *AllocateMacroInfo(SourceLocation L);
1978 /// Turn the specified lexer token into a fully checked and spelled
1979 /// filename, e.g. as an operand of \#include.
1981 /// The caller is expected to provide a buffer that is large enough to hold
1982 /// the spelling of the filename, but is also expected to handle the case
1983 /// when this method decides to use a different buffer.
1985 /// \returns true if the input filename was in <>'s or false if it was
1987 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
1989 /// Given a "foo" or \<foo> reference, look up the indicated file.
1991 /// Returns None on failure. \p isAngled indicates whether the file
1992 /// reference is for system \#include's or not (i.e. using <> instead of "").
1993 Optional<FileEntryRef>
1994 LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
1995 const DirectoryLookup *FromDir, const FileEntry *FromFile,
1996 const DirectoryLookup *&CurDir, SmallVectorImpl<char> *SearchPath,
1997 SmallVectorImpl<char> *RelativePath,
1998 ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
1999 bool *IsFrameworkFound, bool SkipCache = false);
2001 /// Get the DirectoryLookup structure used to find the current
2002 /// FileEntry, if CurLexer is non-null and if applicable.
2004 /// This allows us to implement \#include_next and find directory-specific
2006 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
2008 /// Return true if we're in the top-level file, not in a \#include.
2009 bool isInPrimaryFile() const;
2011 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2012 /// followed by EOD. Return true if the token is not a valid on-off-switch.
2013 bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2015 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2016 bool *ShadowFlag = nullptr);
2018 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2019 Module *LeaveSubmodule(bool ForPragma);
2022 friend void TokenLexer::ExpandFunctionArguments();
2024 void PushIncludeMacroStack() {
2025 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
2026 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
2027 std::move(CurLexer), CurPPLexer,
2028 std::move(CurTokenLexer), CurDirLookup);
2029 CurPPLexer = nullptr;
2032 void PopIncludeMacroStack() {
2033 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2034 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2035 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2036 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
2037 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2038 CurLexerKind = IncludeMacroStack.back().CurLexerKind;
2039 IncludeMacroStack.pop_back();
2042 void PropagateLineStartLeadingSpaceInfo(Token &Result);
2044 /// Determine whether we need to create module macros for #defines in the
2045 /// current context.
2046 bool needModuleMacros() const;
2048 /// Update the set of active module macros and ambiguity flag for a module
2050 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2052 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2053 SourceLocation Loc);
2054 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2055 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2058 /// Lex and validate a macro name, which occurs after a
2059 /// \#define or \#undef.
2061 /// \param MacroNameTok Token that represents the name defined or undefined.
2062 /// \param IsDefineUndef Kind if preprocessor directive.
2063 /// \param ShadowFlag Points to flag that is set if macro name shadows
2066 /// This emits a diagnostic, sets the token kind to eod,
2067 /// and discards the rest of the macro line if the macro name is invalid.
2068 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2069 bool *ShadowFlag = nullptr);
2071 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2072 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2073 /// doing so performs certain validity checks including (but not limited to):
2074 /// - # (stringization) is followed by a macro parameter
2075 /// \param MacroNameTok - Token that represents the macro name
2076 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2078 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2079 /// returns a nullptr if an invalid sequence of tokens is encountered.
2080 MacroInfo *ReadOptionalMacroParameterListAndBody(
2081 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2083 /// The ( starting an argument list of a macro definition has just been read.
2084 /// Lex the rest of the parameters and the closing ), updating \p MI with
2085 /// what we learn and saving in \p LastTok the last token read.
2086 /// Return true if an error occurs parsing the arg list.
2087 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2089 /// We just read a \#if or related directive and decided that the
2090 /// subsequent tokens are in the \#if'd out portion of the
2091 /// file. Lex the rest of the file, until we see an \#endif. If \p
2092 /// FoundNonSkipPortion is true, then we have already emitted code for part of
2093 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2094 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2095 /// already seen one so a \#else directive is a duplicate. When this returns,
2096 /// the caller can lex the first valid token.
2097 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2098 SourceLocation IfTokenLoc,
2099 bool FoundNonSkipPortion, bool FoundElse,
2100 SourceLocation ElseLoc = SourceLocation());
2102 /// Information about the result for evaluating an expression for a
2103 /// preprocessor directive.
2104 struct DirectiveEvalResult {
2105 /// Whether the expression was evaluated as true or not.
2108 /// True if the expression contained identifiers that were undefined.
2109 bool IncludedUndefinedIds;
2111 /// The source range for the expression.
2112 SourceRange ExprRange;
2115 /// Evaluate an integer constant expression that may occur after a
2116 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2118 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2119 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2121 /// Install the standard preprocessor pragmas:
2122 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2123 void RegisterBuiltinPragmas();
2125 /// Register builtin macros such as __LINE__ with the identifier table.
2126 void RegisterBuiltinMacros();
2128 /// If an identifier token is read that is to be expanded as a macro, handle
2129 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2130 /// otherwise the caller should lex again.
2131 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2133 /// Cache macro expanded tokens for TokenLexers.
2135 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2136 /// going to lex in the cache and when it finishes the tokens are removed
2137 /// from the end of the cache.
2138 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2139 ArrayRef<Token> tokens);
2141 void removeCachedMacroExpandedTokensOfLastLexer();
2143 /// Determine whether the next preprocessor token to be
2144 /// lexed is a '('. If so, consume the token and return true, if not, this
2145 /// method should have no observable side-effect on the lexed tokens.
2146 bool isNextPPTokenLParen();
2148 /// After reading "MACRO(", this method is invoked to read all of the formal
2149 /// arguments specified for the macro invocation. Returns null on error.
2150 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2151 SourceLocation &MacroEnd);
2153 /// If an identifier token is read that is to be expanded
2154 /// as a builtin macro, handle it and return the next token as 'Tok'.
2155 void ExpandBuiltinMacro(Token &Tok);
2157 /// Read a \c _Pragma directive, slice it up, process it, then
2158 /// return the first token after the directive.
2159 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2160 void Handle_Pragma(Token &Tok);
2162 /// Like Handle_Pragma except the pragma text is not enclosed within
2163 /// a string literal.
2164 void HandleMicrosoft__pragma(Token &Tok);
2166 /// Add a lexer to the top of the include stack and
2167 /// start lexing tokens from it instead of the current buffer.
2168 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2170 /// Set the FileID for the preprocessor predefines.
2171 void setPredefinesFileID(FileID FID) {
2172 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2173 PredefinesFileID = FID;
2176 /// Set the FileID for the PCH through header.
2177 void setPCHThroughHeaderFileID(FileID FID);
2179 /// Returns true if we are lexing from a file and not a
2180 /// pragma or a macro.
2181 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2182 return L ? !L->isPragmaLexer() : P != nullptr;
2185 static bool IsFileLexer(const IncludeStackInfo& I) {
2186 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2189 bool IsFileLexer() const {
2190 return IsFileLexer(CurLexer.get(), CurPPLexer);
2193 //===--------------------------------------------------------------------===//
2195 void CachingLex(Token &Result);
2197 bool InCachingLexMode() const {
2198 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2199 // that we are past EOF, not that we are in CachingLex mode.
2200 return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2203 void EnterCachingLexMode();
2204 void EnterCachingLexModeUnchecked();
2206 void ExitCachingLexMode() {
2207 if (InCachingLexMode())
2208 RemoveTopOfLexerStack();
2211 const Token &PeekAhead(unsigned N);
2212 void AnnotatePreviousCachedTokens(const Token &Tok);
2214 //===--------------------------------------------------------------------===//
2215 /// Handle*Directive - implement the various preprocessor directives. These
2216 /// should side-effect the current preprocessor object so that the next call
2217 /// to Lex() will return the appropriate token next.
2218 void HandleLineDirective();
2219 void HandleDigitDirective(Token &Tok);
2220 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2221 void HandleIdentSCCSDirective(Token &Tok);
2222 void HandleMacroPublicDirective(Token &Tok);
2223 void HandleMacroPrivateDirective();
2225 /// An additional notification that can be produced by a header inclusion or
2226 /// import to tell the parser what happened.
2227 struct ImportAction {
2232 SkippedModuleImport,
2235 Module *ModuleForHeader = nullptr;
2237 ImportAction(ActionKind AK, Module *Mod = nullptr)
2238 : Kind(AK), ModuleForHeader(Mod) {
2239 assert((AK == None || Mod || AK == Failure) &&
2240 "no module for module action");
2244 Optional<FileEntryRef> LookupHeaderIncludeOrImport(
2245 const DirectoryLookup *&CurDir, StringRef &Filename,
2246 SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2247 const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2248 bool &IsMapped, const DirectoryLookup *LookupFrom,
2249 const FileEntry *LookupFromFile, StringRef &LookupFilename,
2250 SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2251 ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2254 void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2255 const DirectoryLookup *LookupFrom = nullptr,
2256 const FileEntry *LookupFromFile = nullptr);
2258 HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2259 Token &FilenameTok, SourceLocation EndLoc,
2260 const DirectoryLookup *LookupFrom = nullptr,
2261 const FileEntry *LookupFromFile = nullptr);
2262 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2263 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2264 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2265 void HandleMicrosoftImportDirective(Token &Tok);
2268 /// Check that the given module is available, producing a diagnostic if not.
2269 /// \return \c true if the check failed (because the module is not available).
2270 /// \c false if the module appears to be usable.
2271 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2272 const TargetInfo &TargetInfo,
2273 DiagnosticsEngine &Diags, Module *M);
2275 // Module inclusion testing.
2276 /// Find the module that owns the source or header file that
2277 /// \p Loc points to. If the location is in a file that was included
2278 /// into a module, or is outside any module, returns nullptr.
2279 Module *getModuleForLocation(SourceLocation Loc);
2281 /// We want to produce a diagnostic at location IncLoc concerning an
2282 /// unreachable effect at location MLoc (eg, where a desired entity was
2283 /// declared or defined). Determine whether the right way to make MLoc
2284 /// reachable is by #include, and if so, what header should be included.
2286 /// This is not necessarily fast, and might load unexpected module maps, so
2287 /// should only be called by code that intends to produce an error.
2289 /// \param IncLoc The location at which the missing effect was detected.
2290 /// \param MLoc A location within an unimported module at which the desired
2291 /// effect occurred.
2292 /// \return A file that can be #included to provide the desired effect. Null
2293 /// if no such file could be determined or if a #include is not
2294 /// appropriate (eg, if a module should be imported instead).
2295 const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2296 SourceLocation MLoc);
2298 bool isRecordingPreamble() const {
2299 return PreambleConditionalStack.isRecording();
2302 bool hasRecordedPreamble() const {
2303 return PreambleConditionalStack.hasRecordedPreamble();
2306 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2307 return PreambleConditionalStack.getStack();
2310 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2311 PreambleConditionalStack.setStack(s);
2314 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2315 llvm::Optional<PreambleSkipInfo> SkipInfo) {
2316 PreambleConditionalStack.startReplaying();
2317 PreambleConditionalStack.setStack(s);
2318 PreambleConditionalStack.SkipInfo = SkipInfo;
2321 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2322 return PreambleConditionalStack.SkipInfo;
2326 /// After processing predefined file, initialize the conditional stack from
2328 void replayPreambleConditionalStack();
2331 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2332 void HandleUndefDirective();
2334 // Conditional Inclusion.
2335 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2336 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2337 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2338 bool ReadAnyTokensBeforeDirective);
2339 void HandleEndifDirective(Token &EndifToken);
2340 void HandleElseDirective(Token &Result, const Token &HashToken);
2341 void HandleElifDirective(Token &ElifToken, const Token &HashToken);
2344 void HandlePragmaDirective(PragmaIntroducer Introducer);
2347 void HandlePragmaOnce(Token &OnceTok);
2348 void HandlePragmaMark();
2349 void HandlePragmaPoison();
2350 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2351 void HandlePragmaDependency(Token &DependencyTok);
2352 void HandlePragmaPushMacro(Token &Tok);
2353 void HandlePragmaPopMacro(Token &Tok);
2354 void HandlePragmaIncludeAlias(Token &Tok);
2355 void HandlePragmaModuleBuild(Token &Tok);
2356 void HandlePragmaHdrstop(Token &Tok);
2357 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2359 // Return true and store the first token only if any CommentHandler
2360 // has inserted some tokens and getCommentRetentionState() is false.
2361 bool HandleComment(Token &result, SourceRange Comment);
2363 /// A macro is used, update information about macros that need unused
2365 void markMacroAsUsed(MacroInfo *MI);
2369 getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
2371 /// Contains the currently active skipped range mappings for skipping excluded
2372 /// conditional directives.
2373 ExcludedPreprocessorDirectiveSkipMapping
2374 *ExcludedConditionalDirectiveSkipMappings;
2377 /// Abstract base class that describes a handler that will receive
2378 /// source ranges for each of the comments encountered in the source file.
2379 class CommentHandler {
2381 virtual ~CommentHandler();
2383 // The handler shall return true if it has pushed any tokens
2384 // to be read using e.g. EnterToken or EnterTokenStream.
2385 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2388 /// Registry of pragma handlers added by plugins
2389 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2391 } // namespace clang
2393 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H