1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Defines the clang::Preprocessor interface.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16 #define LLVM_CLANG_LEX_PREPROCESSOR_H
18 #include "clang/Basic/Builtins.h"
19 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Basic/IdentifierTable.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Lex/Lexer.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/ModuleMap.h"
25 #include "clang/Lex/PPCallbacks.h"
26 #include "clang/Lex/PTHLexer.h"
27 #include "clang/Lex/PTHManager.h"
28 #include "clang/Lex/TokenLexer.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Support/Allocator.h"
39 template<unsigned InternalLen> class SmallString;
45 class ExternalPreprocessorSource;
49 class PragmaNamespace;
55 class CodeCompletionHandler;
56 class DirectoryLookup;
57 class PreprocessingRecord;
59 class PreprocessorOptions;
61 /// \brief Stores token information for comparing actual tokens with
62 /// predefined values. Only handles simple tokens and identifiers.
68 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
69 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
70 assert(Kind != tok::identifier &&
71 "Identifiers should be created by TokenValue(IdentifierInfo *)");
72 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
73 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
75 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
76 bool operator==(const Token &Tok) const {
77 return Tok.getKind() == Kind &&
78 (!II || II == Tok.getIdentifierInfo());
82 /// \brief Context in which macro name is used.
84 MU_Other = 0, // other than #define or #undef
85 MU_Define = 1, // macro name specified in #define
86 MU_Undef = 2 // macro name specified in #undef
89 /// \brief Engages in a tight little dance with the lexer to efficiently
90 /// preprocess tokens.
92 /// Lexers know only about tokens within a single source file, and don't
93 /// know anything about preprocessor-level issues like the \#include stack,
94 /// token expansion, etc.
95 class Preprocessor : public RefCountedBase<Preprocessor> {
96 IntrusiveRefCntPtr<PreprocessorOptions> PPOpts;
97 DiagnosticsEngine *Diags;
98 LangOptions &LangOpts;
99 const TargetInfo *Target;
100 FileManager &FileMgr;
101 SourceManager &SourceMgr;
102 std::unique_ptr<ScratchBuffer> ScratchBuf;
103 HeaderSearch &HeaderInfo;
104 ModuleLoader &TheModuleLoader;
106 /// \brief External source of macros.
107 ExternalPreprocessorSource *ExternalSource;
110 /// An optional PTHManager object used for getting tokens from
111 /// a token cache rather than lexing the original source file.
112 std::unique_ptr<PTHManager> PTH;
114 /// A BumpPtrAllocator object used to quickly allocate and release
115 /// objects internal to the Preprocessor.
116 llvm::BumpPtrAllocator BP;
118 /// Identifiers for builtin macros and other builtins.
119 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
120 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
121 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
122 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
123 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
124 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
125 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
126 IdentifierInfo *Ident__identifier; // __identifier
127 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
128 IdentifierInfo *Ident__has_feature; // __has_feature
129 IdentifierInfo *Ident__has_extension; // __has_extension
130 IdentifierInfo *Ident__has_builtin; // __has_builtin
131 IdentifierInfo *Ident__has_attribute; // __has_attribute
132 IdentifierInfo *Ident__has_include; // __has_include
133 IdentifierInfo *Ident__has_include_next; // __has_include_next
134 IdentifierInfo *Ident__has_warning; // __has_warning
135 IdentifierInfo *Ident__is_identifier; // __is_identifier
136 IdentifierInfo *Ident__building_module; // __building_module
137 IdentifierInfo *Ident__MODULE__; // __MODULE__
138 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
139 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
141 SourceLocation DATELoc, TIMELoc;
142 unsigned CounterValue; // Next __COUNTER__ value.
145 /// \brief Maximum depth of \#includes.
146 MaxAllowedIncludeStackDepth = 200
149 // State that is set before the preprocessor begins.
150 bool KeepComments : 1;
151 bool KeepMacroComments : 1;
152 bool SuppressIncludeNotFoundError : 1;
154 // State that changes while the preprocessor runs:
155 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
157 /// Whether the preprocessor owns the header search object.
158 bool OwnsHeaderSearch : 1;
160 /// True if macro expansion is disabled.
161 bool DisableMacroExpansion : 1;
163 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
164 /// when parsing preprocessor directives.
165 bool MacroExpansionInDirectivesOverride : 1;
167 class ResetMacroExpansionHelper;
169 /// \brief Whether we have already loaded macros from the external source.
170 mutable bool ReadMacrosFromExternalSource : 1;
172 /// \brief True if pragmas are enabled.
173 bool PragmasEnabled : 1;
175 /// \brief True if the current build action is a preprocessing action.
176 bool PreprocessedOutput : 1;
178 /// \brief True if we are currently preprocessing a #if or #elif directive
179 bool ParsingIfOrElifDirective;
181 /// \brief True if we are pre-expanding macro arguments.
182 bool InMacroArgPreExpansion;
184 /// \brief Mapping/lookup information for all identifiers in
185 /// the program, including program keywords.
186 mutable IdentifierTable Identifiers;
188 /// \brief This table contains all the selectors in the program.
190 /// Unlike IdentifierTable above, this table *isn't* populated by the
191 /// preprocessor. It is declared/expanded here because its role/lifetime is
192 /// conceptually similar to the IdentifierTable. In addition, the current
193 /// control flow (in clang::ParseAST()), make it convenient to put here.
195 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
196 /// the lifetime of the preprocessor.
197 SelectorTable Selectors;
199 /// \brief Information about builtins.
200 Builtin::Context BuiltinInfo;
202 /// \brief Tracks all of the pragmas that the client registered
203 /// with this preprocessor.
204 std::unique_ptr<PragmaNamespace> PragmaHandlers;
206 /// \brief Pragma handlers of the original source is stored here during the
207 /// parsing of a model file.
208 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
210 /// \brief Tracks all of the comment handlers that the client registered
211 /// with this preprocessor.
212 std::vector<CommentHandler *> CommentHandlers;
214 /// \brief True if we want to ignore EOF token and continue later on (thus
215 /// avoid tearing the Lexer and etc. down).
216 bool IncrementalProcessing;
218 /// The kind of translation unit we are processing.
219 TranslationUnitKind TUKind;
221 /// \brief The code-completion handler.
222 CodeCompletionHandler *CodeComplete;
224 /// \brief The file that we're performing code-completion for, if any.
225 const FileEntry *CodeCompletionFile;
227 /// \brief The offset in file for the code-completion point.
228 unsigned CodeCompletionOffset;
230 /// \brief The location for the code-completion point. This gets instantiated
231 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
232 SourceLocation CodeCompletionLoc;
234 /// \brief The start location for the file of the code-completion point.
236 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
237 /// for preprocessing.
238 SourceLocation CodeCompletionFileLoc;
240 /// \brief The source location of the \c import contextual keyword we just
242 SourceLocation ModuleImportLoc;
244 /// \brief The module import path that we're currently processing.
245 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
247 /// \brief Whether the last token we lexed was an '@'.
250 /// \brief Whether the module import expects an identifier next. Otherwise,
251 /// it expects a '.' or ';'.
252 bool ModuleImportExpectsIdentifier;
254 /// \brief The source location of the currently-active
255 /// \#pragma clang arc_cf_code_audited begin.
256 SourceLocation PragmaARCCFCodeAuditedLoc;
258 /// \brief True if we hit the code-completion point.
259 bool CodeCompletionReached;
261 /// \brief The directory that the main file should be considered to occupy,
262 /// if it does not correspond to a real file (as happens when building a
264 const DirectoryEntry *MainFileDir;
266 /// \brief The number of bytes that we will initially skip when entering the
267 /// main file, along with a flag that indicates whether skipping this number
268 /// of bytes will place the lexer at the start of a line.
270 /// This is used when loading a precompiled preamble.
271 std::pair<int, bool> SkipMainFilePreamble;
273 /// \brief The current top of the stack that we're lexing from if
274 /// not expanding a macro and we are lexing directly from source code.
276 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
277 std::unique_ptr<Lexer> CurLexer;
279 /// \brief The current top of stack that we're lexing from if
280 /// not expanding from a macro and we are lexing from a PTH cache.
282 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
283 std::unique_ptr<PTHLexer> CurPTHLexer;
285 /// \brief The current top of the stack what we're lexing from
286 /// if not expanding a macro.
288 /// This is an alias for either CurLexer or CurPTHLexer.
289 PreprocessorLexer *CurPPLexer;
291 /// \brief Used to find the current FileEntry, if CurLexer is non-null
292 /// and if applicable.
294 /// This allows us to implement \#include_next and find directory-specific
296 const DirectoryLookup *CurDirLookup;
298 /// \brief The current macro we are expanding, if we are expanding a macro.
300 /// One of CurLexer and CurTokenLexer must be null.
301 std::unique_ptr<TokenLexer> CurTokenLexer;
303 /// \brief The kind of lexer we're currently working with.
309 CLK_LexAfterModuleImport
312 /// \brief If the current lexer is for a submodule that is being built, this
313 /// is that submodule.
314 Module *CurSubmodule;
316 /// \brief Keeps track of the stack of files currently
317 /// \#included, and macros currently being expanded from, not counting
318 /// CurLexer/CurTokenLexer.
319 struct IncludeStackInfo {
320 enum CurLexerKind CurLexerKind;
321 Module *TheSubmodule;
322 std::unique_ptr<Lexer> TheLexer;
323 std::unique_ptr<PTHLexer> ThePTHLexer;
324 PreprocessorLexer *ThePPLexer;
325 std::unique_ptr<TokenLexer> TheTokenLexer;
326 const DirectoryLookup *TheDirLookup;
328 // The following constructors are completely useless copies of the default
329 // versions, only needed to pacify MSVC.
330 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
331 std::unique_ptr<Lexer> &&TheLexer,
332 std::unique_ptr<PTHLexer> &&ThePTHLexer,
333 PreprocessorLexer *ThePPLexer,
334 std::unique_ptr<TokenLexer> &&TheTokenLexer,
335 const DirectoryLookup *TheDirLookup)
336 : CurLexerKind(std::move(CurLexerKind)),
337 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
338 ThePTHLexer(std::move(ThePTHLexer)),
339 ThePPLexer(std::move(ThePPLexer)),
340 TheTokenLexer(std::move(TheTokenLexer)),
341 TheDirLookup(std::move(TheDirLookup)) {}
342 IncludeStackInfo(IncludeStackInfo &&RHS)
343 : CurLexerKind(std::move(RHS.CurLexerKind)),
344 TheSubmodule(std::move(RHS.TheSubmodule)),
345 TheLexer(std::move(RHS.TheLexer)),
346 ThePTHLexer(std::move(RHS.ThePTHLexer)),
347 ThePPLexer(std::move(RHS.ThePPLexer)),
348 TheTokenLexer(std::move(RHS.TheTokenLexer)),
349 TheDirLookup(std::move(RHS.TheDirLookup)) {}
351 std::vector<IncludeStackInfo> IncludeMacroStack;
353 /// \brief Actions invoked when some preprocessor activity is
354 /// encountered (e.g. a file is \#included, etc).
355 std::unique_ptr<PPCallbacks> Callbacks;
357 struct MacroExpandsInfo {
361 MacroExpandsInfo(Token Tok, MacroDirective *MD, SourceRange Range)
362 : Tok(Tok), MD(MD), Range(Range) { }
364 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
366 /// For each IdentifierInfo that was associated with a macro, we
367 /// keep a mapping to the history of all macro definitions and #undefs in
368 /// the reverse order (the latest one is in the head of the list).
369 llvm::DenseMap<const IdentifierInfo*, MacroDirective*> Macros;
370 friend class ASTReader;
372 /// \brief Macros that we want to warn because they are not used at the end
373 /// of the translation unit.
375 /// We store just their SourceLocations instead of
376 /// something like MacroInfo*. The benefit of this is that when we are
377 /// deserializing from PCH, we don't need to deserialize identifier & macros
378 /// just so that we can report that they are unused, we just warn using
379 /// the SourceLocations of this set (that will be filled by the ASTReader).
380 /// We are using SmallPtrSet instead of a vector for faster removal.
381 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
382 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
384 /// \brief A "freelist" of MacroArg objects that can be
385 /// reused for quick allocation.
386 MacroArgs *MacroArgCache;
387 friend class MacroArgs;
389 /// For each IdentifierInfo used in a \#pragma push_macro directive,
390 /// we keep a MacroInfo stack used to restore the previous macro value.
391 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
393 // Various statistics we track for performance analysis.
394 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
395 unsigned NumIf, NumElse, NumEndif;
396 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
397 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
398 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
401 /// \brief The predefined macros that preprocessor should use from the
402 /// command line etc.
403 std::string Predefines;
405 /// \brief The file ID for the preprocessor predefines.
406 FileID PredefinesFileID;
409 /// \brief Cache of macro expanders to reduce malloc traffic.
410 enum { TokenLexerCacheSize = 8 };
411 unsigned NumCachedTokenLexers;
412 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
415 /// \brief Keeps macro expanded tokens for TokenLexers.
417 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
418 /// going to lex in the cache and when it finishes the tokens are removed
419 /// from the end of the cache.
420 SmallVector<Token, 16> MacroExpandedTokens;
421 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
423 /// \brief A record of the macro definitions and expansions that
424 /// occurred during preprocessing.
426 /// This is an optional side structure that can be enabled with
427 /// \c createPreprocessingRecord() prior to preprocessing.
428 PreprocessingRecord *Record;
430 private: // Cached tokens state.
431 typedef SmallVector<Token, 1> CachedTokensTy;
433 /// \brief Cached tokens are stored here when we do backtracking or
434 /// lookahead. They are "lexed" by the CachingLex() method.
435 CachedTokensTy CachedTokens;
437 /// \brief The position of the cached token that CachingLex() should
440 /// If it points beyond the CachedTokens vector, it means that a normal
441 /// Lex() should be invoked.
442 CachedTokensTy::size_type CachedLexPos;
444 /// \brief Stack of backtrack positions, allowing nested backtracks.
446 /// The EnableBacktrackAtThisPos() method pushes a position to
447 /// indicate where CachedLexPos should be set when the BackTrack() method is
448 /// invoked (at which point the last position is popped).
449 std::vector<CachedTokensTy::size_type> BacktrackPositions;
451 struct MacroInfoChain {
453 MacroInfoChain *Next;
456 /// MacroInfos are managed as a chain for easy disposal. This is the head
458 MacroInfoChain *MIChainHead;
460 struct DeserializedMacroInfoChain {
462 unsigned OwningModuleID; // MUST be immediately after the MacroInfo object
463 // so it can be accessed by MacroInfo::getOwningModuleID().
464 DeserializedMacroInfoChain *Next;
466 DeserializedMacroInfoChain *DeserialMIChainHead;
469 Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
470 DiagnosticsEngine &diags, LangOptions &opts,
471 SourceManager &SM, HeaderSearch &Headers,
472 ModuleLoader &TheModuleLoader,
473 IdentifierInfoLookup *IILookup = nullptr,
474 bool OwnsHeaderSearch = false,
475 TranslationUnitKind TUKind = TU_Complete);
479 /// \brief Initialize the preprocessor using information about the target.
481 /// \param Target is owned by the caller and must remain valid for the
482 /// lifetime of the preprocessor.
483 void Initialize(const TargetInfo &Target);
485 /// \brief Initialize the preprocessor to parse a model file
487 /// To parse model files the preprocessor of the original source is reused to
488 /// preserver the identifier table. However to avoid some duplicate
489 /// information in the preprocessor some cleanup is needed before it is used
490 /// to parse model files. This method does that cleanup.
491 void InitializeForModelFile();
493 /// \brief Cleanup after model file parsing
494 void FinalizeForModelFile();
496 /// \brief Retrieve the preprocessor options used to initialize this
498 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
500 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
501 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
503 const LangOptions &getLangOpts() const { return LangOpts; }
504 const TargetInfo &getTargetInfo() const { return *Target; }
505 FileManager &getFileManager() const { return FileMgr; }
506 SourceManager &getSourceManager() const { return SourceMgr; }
507 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
509 IdentifierTable &getIdentifierTable() { return Identifiers; }
510 SelectorTable &getSelectorTable() { return Selectors; }
511 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
512 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
514 void setPTHManager(PTHManager* pm);
516 PTHManager *getPTHManager() { return PTH.get(); }
518 void setExternalSource(ExternalPreprocessorSource *Source) {
519 ExternalSource = Source;
522 ExternalPreprocessorSource *getExternalSource() const {
523 return ExternalSource;
526 /// \brief Retrieve the module loader associated with this preprocessor.
527 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
529 bool hadModuleLoaderFatalFailure() const {
530 return TheModuleLoader.HadFatalFailure;
533 /// \brief True if we are currently preprocessing a #if or #elif directive
534 bool isParsingIfOrElifDirective() const {
535 return ParsingIfOrElifDirective;
538 /// \brief Control whether the preprocessor retains comments in output.
539 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
540 this->KeepComments = KeepComments | KeepMacroComments;
541 this->KeepMacroComments = KeepMacroComments;
544 bool getCommentRetentionState() const { return KeepComments; }
546 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
547 bool getPragmasEnabled() const { return PragmasEnabled; }
549 void SetSuppressIncludeNotFoundError(bool Suppress) {
550 SuppressIncludeNotFoundError = Suppress;
553 bool GetSuppressIncludeNotFoundError() {
554 return SuppressIncludeNotFoundError;
557 /// Sets whether the preprocessor is responsible for producing output or if
558 /// it is producing tokens to be consumed by Parse and Sema.
559 void setPreprocessedOutput(bool IsPreprocessedOutput) {
560 PreprocessedOutput = IsPreprocessedOutput;
563 /// Returns true if the preprocessor is responsible for generating output,
564 /// false if it is producing tokens to be consumed by Parse and Sema.
565 bool isPreprocessedOutput() const { return PreprocessedOutput; }
567 /// \brief Return true if we are lexing directly from the specified lexer.
568 bool isCurrentLexer(const PreprocessorLexer *L) const {
569 return CurPPLexer == L;
572 /// \brief Return the current lexer being lexed from.
574 /// Note that this ignores any potentially active macro expansions and _Pragma
575 /// expansions going on at the time.
576 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
578 /// \brief Return the current file lexer being lexed from.
580 /// Note that this ignores any potentially active macro expansions and _Pragma
581 /// expansions going on at the time.
582 PreprocessorLexer *getCurrentFileLexer() const;
584 /// \brief Return the submodule owning the file being lexed.
585 Module *getCurrentSubmodule() const { return CurSubmodule; }
587 /// \brief Returns the FileID for the preprocessor predefines.
588 FileID getPredefinesFileID() const { return PredefinesFileID; }
591 /// \brief Accessors for preprocessor callbacks.
593 /// Note that this class takes ownership of any PPCallbacks object given to
595 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
596 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
598 C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
599 std::move(Callbacks));
600 Callbacks = std::move(C);
604 /// \brief Given an identifier, return its latest MacroDirective if it is
605 /// \#defined or null if it isn't \#define'd.
606 MacroDirective *getMacroDirective(IdentifierInfo *II) const {
607 if (!II->hasMacroDefinition())
610 MacroDirective *MD = getMacroDirectiveHistory(II);
611 assert(MD->isDefined() && "Macro is undefined!");
615 const MacroInfo *getMacroInfo(IdentifierInfo *II) const {
616 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
619 MacroInfo *getMacroInfo(IdentifierInfo *II) {
620 if (MacroDirective *MD = getMacroDirective(II))
621 return MD->getMacroInfo();
625 /// \brief Given an identifier, return the (probably #undef'd) MacroInfo
626 /// representing the most recent macro definition.
628 /// One can iterate over all previous macro definitions from the most recent
629 /// one. This should only be called for identifiers that hadMacroDefinition().
630 MacroDirective *getMacroDirectiveHistory(const IdentifierInfo *II) const;
632 /// \brief Add a directive to the macro directive history for this identifier.
633 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
634 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
636 unsigned ImportedFromModuleID,
637 ArrayRef<unsigned> Overrides) {
638 DefMacroDirective *MD =
639 AllocateDefMacroDirective(MI, Loc, ImportedFromModuleID, Overrides);
640 appendMacroDirective(II, MD);
643 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI){
644 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc(), 0, None);
646 /// \brief Set a MacroDirective that was loaded from a PCH file.
647 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD);
650 /// Iterators for the macro history table. Currently defined macros have
651 /// IdentifierInfo::hasMacroDefinition() set and an empty
652 /// MacroInfo::getUndefLoc() at the head of the list.
653 typedef llvm::DenseMap<const IdentifierInfo *,
654 MacroDirective*>::const_iterator macro_iterator;
655 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
656 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
659 /// \brief Return the name of the macro defined before \p Loc that has
660 /// spelling \p Tokens. If there are multiple macros with same spelling,
661 /// return the last one defined.
662 StringRef getLastMacroWithSpelling(SourceLocation Loc,
663 ArrayRef<TokenValue> Tokens) const;
665 const std::string &getPredefines() const { return Predefines; }
666 /// \brief Set the predefines for this Preprocessor.
668 /// These predefines are automatically injected when parsing the main file.
669 void setPredefines(const char *P) { Predefines = P; }
670 void setPredefines(const std::string &P) { Predefines = P; }
672 /// Return information about the specified preprocessor
673 /// identifier token.
674 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
675 return &Identifiers.get(Name);
678 /// \brief Add the specified pragma handler to this preprocessor.
680 /// If \p Namespace is non-null, then it is a token required to exist on the
681 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
682 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
683 void AddPragmaHandler(PragmaHandler *Handler) {
684 AddPragmaHandler(StringRef(), Handler);
687 /// \brief Remove the specific pragma handler from this preprocessor.
689 /// If \p Namespace is non-null, then it should be the namespace that
690 /// \p Handler was added to. It is an error to remove a handler that
691 /// has not been registered.
692 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
693 void RemovePragmaHandler(PragmaHandler *Handler) {
694 RemovePragmaHandler(StringRef(), Handler);
697 /// Install empty handlers for all pragmas (making them ignored).
698 void IgnorePragmas();
700 /// \brief Add the specified comment handler to the preprocessor.
701 void addCommentHandler(CommentHandler *Handler);
703 /// \brief Remove the specified comment handler.
705 /// It is an error to remove a handler that has not been registered.
706 void removeCommentHandler(CommentHandler *Handler);
708 /// \brief Set the code completion handler to the given object.
709 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
710 CodeComplete = &Handler;
713 /// \brief Retrieve the current code-completion handler.
714 CodeCompletionHandler *getCodeCompletionHandler() const {
718 /// \brief Clear out the code completion handler.
719 void clearCodeCompletionHandler() {
720 CodeComplete = nullptr;
723 /// \brief Hook used by the lexer to invoke the "natural language" code
724 /// completion point.
725 void CodeCompleteNaturalLanguage();
727 /// \brief Retrieve the preprocessing record, or NULL if there is no
728 /// preprocessing record.
729 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
731 /// \brief Create a new preprocessing record, which will keep track of
732 /// all macro expansions, macro definitions, etc.
733 void createPreprocessingRecord();
735 /// \brief Enter the specified FileID as the main source file,
736 /// which implicitly adds the builtin defines etc.
737 void EnterMainSourceFile();
739 /// \brief Inform the preprocessor callbacks that processing is complete.
740 void EndSourceFile();
742 /// \brief Add a source file to the top of the include stack and
743 /// start lexing tokens from it instead of the current buffer.
745 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
746 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
749 /// \brief Add a Macro to the top of the include stack and start lexing
750 /// tokens from it instead of the current buffer.
752 /// \param Args specifies the tokens input to a function-like macro.
753 /// \param ILEnd specifies the location of the ')' for a function-like macro
754 /// or the identifier for an object-like macro.
755 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
758 /// \brief Add a "macro" context to the top of the include stack,
759 /// which will cause the lexer to start returning the specified tokens.
761 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
762 /// will not be subject to further macro expansion. Otherwise, these tokens
763 /// will be re-macro-expanded when/if expansion is enabled.
765 /// If \p OwnsTokens is false, this method assumes that the specified stream
766 /// of tokens has a permanent owner somewhere, so they do not need to be
767 /// copied. If it is true, it assumes the array of tokens is allocated with
768 /// \c new[] and must be freed.
769 void EnterTokenStream(const Token *Toks, unsigned NumToks,
770 bool DisableMacroExpansion, bool OwnsTokens);
772 /// \brief Pop the current lexer/macro exp off the top of the lexer stack.
774 /// This should only be used in situations where the current state of the
775 /// top-of-stack lexer is known.
776 void RemoveTopOfLexerStack();
778 /// From the point that this method is called, and until
779 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
780 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
781 /// make the Preprocessor re-lex the same tokens.
783 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
784 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
785 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
787 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
788 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
789 /// tokens will continue indefinitely.
791 void EnableBacktrackAtThisPos();
793 /// \brief Disable the last EnableBacktrackAtThisPos call.
794 void CommitBacktrackedTokens();
796 /// \brief Make Preprocessor re-lex the tokens that were lexed since
797 /// EnableBacktrackAtThisPos() was previously called.
800 /// \brief True if EnableBacktrackAtThisPos() was called and
801 /// caching of tokens is on.
802 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
804 /// \brief Lex the next token for this preprocessor.
805 void Lex(Token &Result);
807 void LexAfterModuleImport(Token &Result);
809 /// \brief Lex a string literal, which may be the concatenation of multiple
810 /// string literals and may even come from macro expansion.
811 /// \returns true on success, false if a error diagnostic has been generated.
812 bool LexStringLiteral(Token &Result, std::string &String,
813 const char *DiagnosticTag, bool AllowMacroExpansion) {
814 if (AllowMacroExpansion)
817 LexUnexpandedToken(Result);
818 return FinishLexStringLiteral(Result, String, DiagnosticTag,
819 AllowMacroExpansion);
822 /// \brief Complete the lexing of a string literal where the first token has
823 /// already been lexed (see LexStringLiteral).
824 bool FinishLexStringLiteral(Token &Result, std::string &String,
825 const char *DiagnosticTag,
826 bool AllowMacroExpansion);
828 /// \brief Lex a token. If it's a comment, keep lexing until we get
829 /// something not a comment.
831 /// This is useful in -E -C mode where comments would foul up preprocessor
832 /// directive handling.
833 void LexNonComment(Token &Result) {
836 while (Result.getKind() == tok::comment);
839 /// \brief Just like Lex, but disables macro expansion of identifier tokens.
840 void LexUnexpandedToken(Token &Result) {
841 // Disable macro expansion.
842 bool OldVal = DisableMacroExpansion;
843 DisableMacroExpansion = true;
848 DisableMacroExpansion = OldVal;
851 /// \brief Like LexNonComment, but this disables macro expansion of
852 /// identifier tokens.
853 void LexUnexpandedNonComment(Token &Result) {
855 LexUnexpandedToken(Result);
856 while (Result.getKind() == tok::comment);
859 /// \brief Parses a simple integer literal to get its numeric value. Floating
860 /// point literals and user defined literals are rejected. Used primarily to
861 /// handle pragmas that accept integer arguments.
862 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
864 /// Disables macro expansion everywhere except for preprocessor directives.
865 void SetMacroExpansionOnlyInDirectives() {
866 DisableMacroExpansion = true;
867 MacroExpansionInDirectivesOverride = true;
870 /// \brief Peeks ahead N tokens and returns that token without consuming any
873 /// LookAhead(0) returns the next token that would be returned by Lex(),
874 /// LookAhead(1) returns the token after it, etc. This returns normal
875 /// tokens after phase 5. As such, it is equivalent to using
876 /// 'Lex', not 'LexUnexpandedToken'.
877 const Token &LookAhead(unsigned N) {
878 if (CachedLexPos + N < CachedTokens.size())
879 return CachedTokens[CachedLexPos+N];
881 return PeekAhead(N+1);
884 /// \brief When backtracking is enabled and tokens are cached,
885 /// this allows to revert a specific number of tokens.
887 /// Note that the number of tokens being reverted should be up to the last
888 /// backtrack position, not more.
889 void RevertCachedTokens(unsigned N) {
890 assert(isBacktrackEnabled() &&
891 "Should only be called when tokens are cached for backtracking");
892 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
893 && "Should revert tokens up to the last backtrack position, not more");
894 assert(signed(CachedLexPos) - signed(N) >= 0 &&
895 "Corrupted backtrack positions ?");
899 /// \brief Enters a token in the token stream to be lexed next.
901 /// If BackTrack() is called afterwards, the token will remain at the
903 void EnterToken(const Token &Tok) {
904 EnterCachingLexMode();
905 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
908 /// We notify the Preprocessor that if it is caching tokens (because
909 /// backtrack is enabled) it should replace the most recent cached tokens
910 /// with the given annotation token. This function has no effect if
911 /// backtracking is not enabled.
913 /// Note that the use of this function is just for optimization, so that the
914 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
916 void AnnotateCachedTokens(const Token &Tok) {
917 assert(Tok.isAnnotation() && "Expected annotation token");
918 if (CachedLexPos != 0 && isBacktrackEnabled())
919 AnnotatePreviousCachedTokens(Tok);
922 /// Get the location of the last cached token, suitable for setting the end
923 /// location of an annotation token.
924 SourceLocation getLastCachedTokenLocation() const {
925 assert(CachedLexPos != 0);
926 return CachedTokens[CachedLexPos-1].getLocation();
929 /// \brief Replace the last token with an annotation token.
931 /// Like AnnotateCachedTokens(), this routine replaces an
932 /// already-parsed (and resolved) token with an annotation
933 /// token. However, this routine only replaces the last token with
934 /// the annotation token; it does not affect any other cached
935 /// tokens. This function has no effect if backtracking is not
937 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
938 assert(Tok.isAnnotation() && "Expected annotation token");
939 if (CachedLexPos != 0 && isBacktrackEnabled())
940 CachedTokens[CachedLexPos-1] = Tok;
943 /// Update the current token to represent the provided
944 /// identifier, in order to cache an action performed by typo correction.
945 void TypoCorrectToken(const Token &Tok) {
946 assert(Tok.getIdentifierInfo() && "Expected identifier token");
947 if (CachedLexPos != 0 && isBacktrackEnabled())
948 CachedTokens[CachedLexPos-1] = Tok;
951 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
952 /// CurTokenLexer pointers.
953 void recomputeCurLexerKind();
955 /// \brief Returns true if incremental processing is enabled
956 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
958 /// \brief Enables the incremental processing
959 void enableIncrementalProcessing(bool value = true) {
960 IncrementalProcessing = value;
963 /// \brief Specify the point at which code-completion will be performed.
965 /// \param File the file in which code completion should occur. If
966 /// this file is included multiple times, code-completion will
967 /// perform completion the first time it is included. If NULL, this
968 /// function clears out the code-completion point.
970 /// \param Line the line at which code completion should occur
973 /// \param Column the column at which code completion should occur
976 /// \returns true if an error occurred, false otherwise.
977 bool SetCodeCompletionPoint(const FileEntry *File,
978 unsigned Line, unsigned Column);
980 /// \brief Determine if we are performing code completion.
981 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
983 /// \brief Returns the location of the code-completion point.
985 /// Returns an invalid location if code-completion is not enabled or the file
986 /// containing the code-completion point has not been lexed yet.
987 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
989 /// \brief Returns the start location of the file of code-completion point.
991 /// Returns an invalid location if code-completion is not enabled or the file
992 /// containing the code-completion point has not been lexed yet.
993 SourceLocation getCodeCompletionFileLoc() const {
994 return CodeCompletionFileLoc;
997 /// \brief Returns true if code-completion is enabled and we have hit the
998 /// code-completion point.
999 bool isCodeCompletionReached() const { return CodeCompletionReached; }
1001 /// \brief Note that we hit the code-completion point.
1002 void setCodeCompletionReached() {
1003 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1004 CodeCompletionReached = true;
1005 // Silence any diagnostics that occur after we hit the code-completion.
1006 getDiagnostics().setSuppressAllDiagnostics(true);
1009 /// \brief The location of the currently-active \#pragma clang
1010 /// arc_cf_code_audited begin.
1012 /// Returns an invalid location if there is no such pragma active.
1013 SourceLocation getPragmaARCCFCodeAuditedLoc() const {
1014 return PragmaARCCFCodeAuditedLoc;
1017 /// \brief Set the location of the currently-active \#pragma clang
1018 /// arc_cf_code_audited begin. An invalid location ends the pragma.
1019 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
1020 PragmaARCCFCodeAuditedLoc = Loc;
1023 /// \brief Set the directory in which the main file should be considered
1024 /// to have been found, if it is not a real file.
1025 void setMainFileDir(const DirectoryEntry *Dir) {
1029 /// \brief Instruct the preprocessor to skip part of the main source file.
1031 /// \param Bytes The number of bytes in the preamble to skip.
1033 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1034 /// start of a line.
1035 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1036 SkipMainFilePreamble.first = Bytes;
1037 SkipMainFilePreamble.second = StartOfLine;
1040 /// Forwarding function for diagnostics. This emits a diagnostic at
1041 /// the specified Token's location, translating the token's start
1042 /// position in the current buffer into a SourcePosition object for rendering.
1043 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1044 return Diags->Report(Loc, DiagID);
1047 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1048 return Diags->Report(Tok.getLocation(), DiagID);
1051 /// Return the 'spelling' of the token at the given
1052 /// location; does not go up to the spelling location or down to the
1053 /// expansion location.
1055 /// \param buffer A buffer which will be used only if the token requires
1056 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1057 /// \param invalid If non-null, will be set \c true if an error occurs.
1058 StringRef getSpelling(SourceLocation loc,
1059 SmallVectorImpl<char> &buffer,
1060 bool *invalid = nullptr) const {
1061 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1064 /// \brief Return the 'spelling' of the Tok token.
1066 /// The spelling of a token is the characters used to represent the token in
1067 /// the source file after trigraph expansion and escaped-newline folding. In
1068 /// particular, this wants to get the true, uncanonicalized, spelling of
1069 /// things like digraphs, UCNs, etc.
1071 /// \param Invalid If non-null, will be set \c true if an error occurs.
1072 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1073 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1076 /// \brief Get the spelling of a token into a preallocated buffer, instead
1077 /// of as an std::string.
1079 /// The caller is required to allocate enough space for the token, which is
1080 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1081 /// actual result is returned.
1083 /// Note that this method may do two possible things: it may either fill in
1084 /// the buffer specified with characters, or it may *change the input pointer*
1085 /// to point to a constant buffer with the data already in it (avoiding a
1086 /// copy). The caller is not allowed to modify the returned buffer pointer
1087 /// if an internal buffer is returned.
1088 unsigned getSpelling(const Token &Tok, const char *&Buffer,
1089 bool *Invalid = nullptr) const {
1090 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1093 /// \brief Get the spelling of a token into a SmallVector.
1095 /// Note that the returned StringRef may not point to the
1096 /// supplied buffer if a copy can be avoided.
1097 StringRef getSpelling(const Token &Tok,
1098 SmallVectorImpl<char> &Buffer,
1099 bool *Invalid = nullptr) const;
1101 /// \brief Relex the token at the specified location.
1102 /// \returns true if there was a failure, false on success.
1103 bool getRawToken(SourceLocation Loc, Token &Result,
1104 bool IgnoreWhiteSpace = false) {
1105 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1108 /// \brief Given a Token \p Tok that is a numeric constant with length 1,
1109 /// return the character.
1111 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1112 bool *Invalid = nullptr) const {
1113 assert(Tok.is(tok::numeric_constant) &&
1114 Tok.getLength() == 1 && "Called on unsupported token");
1115 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1117 // If the token is carrying a literal data pointer, just use it.
1118 if (const char *D = Tok.getLiteralData())
1121 // Otherwise, fall back on getCharacterData, which is slower, but always
1123 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1126 /// \brief Retrieve the name of the immediate macro expansion.
1128 /// This routine starts from a source location, and finds the name of the
1129 /// macro responsible for its immediate expansion. It looks through any
1130 /// intervening macro argument expansions to compute this. It returns a
1131 /// StringRef that refers to the SourceManager-owned buffer of the source
1132 /// where that macro name is spelled. Thus, the result shouldn't out-live
1133 /// the SourceManager.
1134 StringRef getImmediateMacroName(SourceLocation Loc) {
1135 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1138 /// \brief Plop the specified string into a scratch buffer and set the
1139 /// specified token's location and length to it.
1141 /// If specified, the source location provides a location of the expansion
1142 /// point of the token.
1143 void CreateString(StringRef Str, Token &Tok,
1144 SourceLocation ExpansionLocStart = SourceLocation(),
1145 SourceLocation ExpansionLocEnd = SourceLocation());
1147 /// \brief Computes the source location just past the end of the
1148 /// token at this source location.
1150 /// This routine can be used to produce a source location that
1151 /// points just past the end of the token referenced by \p Loc, and
1152 /// is generally used when a diagnostic needs to point just after a
1153 /// token where it expected something different that it received. If
1154 /// the returned source location would not be meaningful (e.g., if
1155 /// it points into a macro), this routine returns an invalid
1156 /// source location.
1158 /// \param Offset an offset from the end of the token, where the source
1159 /// location should refer to. The default offset (0) produces a source
1160 /// location pointing just past the end of the token; an offset of 1 produces
1161 /// a source location pointing to the last character in the token, etc.
1162 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1163 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1166 /// \brief Returns true if the given MacroID location points at the first
1167 /// token of the macro expansion.
1169 /// \param MacroBegin If non-null and function returns true, it is set to
1170 /// begin location of the macro.
1171 bool isAtStartOfMacroExpansion(SourceLocation loc,
1172 SourceLocation *MacroBegin = nullptr) const {
1173 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1177 /// \brief Returns true if the given MacroID location points at the last
1178 /// token of the macro expansion.
1180 /// \param MacroEnd If non-null and function returns true, it is set to
1181 /// end location of the macro.
1182 bool isAtEndOfMacroExpansion(SourceLocation loc,
1183 SourceLocation *MacroEnd = nullptr) const {
1184 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1187 /// \brief Print the token to stderr, used for debugging.
1188 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1189 void DumpLocation(SourceLocation Loc) const;
1190 void DumpMacro(const MacroInfo &MI) const;
1192 /// \brief Given a location that specifies the start of a
1193 /// token, return a new location that specifies a character within the token.
1194 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1195 unsigned Char) const {
1196 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1199 /// \brief Increment the counters for the number of token paste operations
1202 /// If fast was specified, this is a 'fast paste' case we handled.
1203 void IncrementPasteCounter(bool isFast) {
1205 ++NumFastTokenPaste;
1212 size_t getTotalMemory() const;
1214 /// When the macro expander pastes together a comment (/##/) in Microsoft
1215 /// mode, this method handles updating the current state, returning the
1216 /// token on the next source line.
1217 void HandleMicrosoftCommentPaste(Token &Tok);
1219 //===--------------------------------------------------------------------===//
1220 // Preprocessor callback methods. These are invoked by a lexer as various
1221 // directives and events are found.
1223 /// Given a tok::raw_identifier token, look up the
1224 /// identifier information for the token and install it into the token,
1225 /// updating the token kind accordingly.
1226 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1229 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1233 /// \brief Specifies the reason for poisoning an identifier.
1235 /// If that identifier is accessed while poisoned, then this reason will be
1236 /// used instead of the default "poisoned" diagnostic.
1237 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1239 /// \brief Display reason for poisoned identifier.
1240 void HandlePoisonedIdentifier(Token & Tok);
1242 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1243 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1244 if(II->isPoisoned()) {
1245 HandlePoisonedIdentifier(Identifier);
1251 /// Identifiers used for SEH handling in Borland. These are only
1252 /// allowed in particular circumstances
1254 IdentifierInfo *Ident__exception_code,
1255 *Ident___exception_code,
1256 *Ident_GetExceptionCode;
1257 // __except filter expression
1258 IdentifierInfo *Ident__exception_info,
1259 *Ident___exception_info,
1260 *Ident_GetExceptionInfo;
1262 IdentifierInfo *Ident__abnormal_termination,
1263 *Ident___abnormal_termination,
1264 *Ident_AbnormalTermination;
1266 const char *getCurLexerEndPos();
1269 void PoisonSEHIdentifiers(bool Poison = true); // Borland
1271 /// \brief Callback invoked when the lexer reads an identifier and has
1272 /// filled in the tokens IdentifierInfo member.
1274 /// This callback potentially macro expands it or turns it into a named
1275 /// token (like 'for').
1277 /// \returns true if we actually computed a token, false if we need to
1279 bool HandleIdentifier(Token &Identifier);
1282 /// \brief Callback invoked when the lexer hits the end of the current file.
1284 /// This either returns the EOF token and returns true, or
1285 /// pops a level off the include stack and returns false, at which point the
1286 /// client should call lex again.
1287 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1289 /// \brief Callback invoked when the current TokenLexer hits the end of its
1291 bool HandleEndOfTokenLexer(Token &Result);
1293 /// \brief Callback invoked when the lexer sees a # token at the start of a
1296 /// This consumes the directive, modifies the lexer/preprocessor state, and
1297 /// advances the lexer(s) so that the next token read is the correct one.
1298 void HandleDirective(Token &Result);
1300 /// \brief Ensure that the next token is a tok::eod token.
1302 /// If not, emit a diagnostic and consume up until the eod.
1303 /// If \p EnableMacros is true, then we consider macros that expand to zero
1304 /// tokens as being ok.
1305 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
1307 /// \brief Read and discard all tokens remaining on the current line until
1308 /// the tok::eod token is found.
1309 void DiscardUntilEndOfDirective();
1311 /// \brief Returns true if the preprocessor has seen a use of
1312 /// __DATE__ or __TIME__ in the file so far.
1313 bool SawDateOrTime() const {
1314 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1316 unsigned getCounterValue() const { return CounterValue; }
1317 void setCounterValue(unsigned V) { CounterValue = V; }
1319 /// \brief Retrieves the module that we're currently building, if any.
1320 Module *getCurrentModule();
1322 /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
1323 MacroInfo *AllocateMacroInfo(SourceLocation L);
1325 /// \brief Allocate a new MacroInfo object loaded from an AST file.
1326 MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L,
1327 unsigned SubModuleID);
1329 /// \brief Turn the specified lexer token into a fully checked and spelled
1330 /// filename, e.g. as an operand of \#include.
1332 /// The caller is expected to provide a buffer that is large enough to hold
1333 /// the spelling of the filename, but is also expected to handle the case
1334 /// when this method decides to use a different buffer.
1336 /// \returns true if the input filename was in <>'s or false if it was
1338 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
1340 /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
1342 /// Returns null on failure. \p isAngled indicates whether the file
1343 /// reference is for system \#include's or not (i.e. using <> instead of "").
1344 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1345 bool isAngled, const DirectoryLookup *FromDir,
1346 const FileEntry *FromFile,
1347 const DirectoryLookup *&CurDir,
1348 SmallVectorImpl<char> *SearchPath,
1349 SmallVectorImpl<char> *RelativePath,
1350 ModuleMap::KnownHeader *SuggestedModule,
1351 bool SkipCache = false);
1353 /// \brief Get the DirectoryLookup structure used to find the current
1354 /// FileEntry, if CurLexer is non-null and if applicable.
1356 /// This allows us to implement \#include_next and find directory-specific
1358 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1360 /// \brief Return true if we're in the top-level file, not in a \#include.
1361 bool isInPrimaryFile() const;
1363 /// \brief Handle cases where the \#include name is expanded
1364 /// from a macro as multiple tokens, which need to be glued together.
1366 /// This occurs for code like:
1368 /// \#define FOO <x/y.h>
1371 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1373 /// This code concatenates and consumes tokens up to the '>' token. It
1374 /// returns false if the > was found, otherwise it returns true if it finds
1375 /// and consumes the EOD marker.
1376 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1377 SourceLocation &End);
1379 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1380 /// followed by EOD. Return true if the token is not a valid on-off-switch.
1381 bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
1383 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
1384 bool *ShadowFlag = nullptr);
1388 void PushIncludeMacroStack() {
1389 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
1390 IncludeMacroStack.push_back(IncludeStackInfo(
1391 CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer),
1392 CurPPLexer, std::move(CurTokenLexer), CurDirLookup));
1393 CurPPLexer = nullptr;
1396 void PopIncludeMacroStack() {
1397 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1398 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
1399 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1400 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1401 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
1402 CurSubmodule = IncludeMacroStack.back().TheSubmodule;
1403 CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1404 IncludeMacroStack.pop_back();
1407 void PropagateLineStartLeadingSpaceInfo(Token &Result);
1409 /// \brief Allocate a new MacroInfo object.
1410 MacroInfo *AllocateMacroInfo();
1413 AllocateDefMacroDirective(MacroInfo *MI, SourceLocation Loc,
1414 unsigned ImportedFromModuleID = 0,
1415 ArrayRef<unsigned> Overrides = None);
1416 UndefMacroDirective *
1417 AllocateUndefMacroDirective(SourceLocation UndefLoc,
1418 unsigned ImportedFromModuleID = 0,
1419 ArrayRef<unsigned> Overrides = None);
1420 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1423 /// \brief Lex and validate a macro name, which occurs after a
1424 /// \#define or \#undef.
1426 /// \param MacroNameTok Token that represents the name defined or undefined.
1427 /// \param IsDefineUndef Kind if preprocessor directive.
1428 /// \param ShadowFlag Points to flag that is set if macro name shadows
1431 /// This emits a diagnostic, sets the token kind to eod,
1432 /// and discards the rest of the macro line if the macro name is invalid.
1433 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
1434 bool *ShadowFlag = nullptr);
1436 /// The ( starting an argument list of a macro definition has just been read.
1437 /// Lex the rest of the arguments and the closing ), updating \p MI with
1438 /// what we learn and saving in \p LastTok the last token read.
1439 /// Return true if an error occurs parsing the arg list.
1440 bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
1442 /// We just read a \#if or related directive and decided that the
1443 /// subsequent tokens are in the \#if'd out portion of the
1444 /// file. Lex the rest of the file, until we see an \#endif. If \p
1445 /// FoundNonSkipPortion is true, then we have already emitted code for part of
1446 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1447 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1448 /// already seen one so a \#else directive is a duplicate. When this returns,
1449 /// the caller can lex the first valid token.
1450 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
1451 bool FoundNonSkipPortion, bool FoundElse,
1452 SourceLocation ElseLoc = SourceLocation());
1454 /// \brief A fast PTH version of SkipExcludedConditionalBlock.
1455 void PTHSkipExcludedConditionalBlock();
1457 /// \brief Evaluate an integer constant expression that may occur after a
1458 /// \#if or \#elif directive and return it as a bool.
1460 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1461 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1463 /// \brief Install the standard preprocessor pragmas:
1464 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
1465 void RegisterBuiltinPragmas();
1467 /// \brief Register builtin macros such as __LINE__ with the identifier table.
1468 void RegisterBuiltinMacros();
1470 /// If an identifier token is read that is to be expanded as a macro, handle
1471 /// it and return the next token as 'Tok'. If we lexed a token, return true;
1472 /// otherwise the caller should lex again.
1473 bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD);
1475 /// \brief Cache macro expanded tokens for TokenLexers.
1477 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1478 /// going to lex in the cache and when it finishes the tokens are removed
1479 /// from the end of the cache.
1480 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
1481 ArrayRef<Token> tokens);
1482 void removeCachedMacroExpandedTokensOfLastLexer();
1483 friend void TokenLexer::ExpandFunctionArguments();
1485 /// Determine whether the next preprocessor token to be
1486 /// lexed is a '('. If so, consume the token and return true, if not, this
1487 /// method should have no observable side-effect on the lexed tokens.
1488 bool isNextPPTokenLParen();
1490 /// After reading "MACRO(", this method is invoked to read all of the formal
1491 /// arguments specified for the macro invocation. Returns null on error.
1492 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
1493 SourceLocation &ExpansionEnd);
1495 /// \brief If an identifier token is read that is to be expanded
1496 /// as a builtin macro, handle it and return the next token as 'Tok'.
1497 void ExpandBuiltinMacro(Token &Tok);
1499 /// \brief Read a \c _Pragma directive, slice it up, process it, then
1500 /// return the first token after the directive.
1501 /// This assumes that the \c _Pragma token has just been read into \p Tok.
1502 void Handle_Pragma(Token &Tok);
1504 /// \brief Like Handle_Pragma except the pragma text is not enclosed within
1505 /// a string literal.
1506 void HandleMicrosoft__pragma(Token &Tok);
1508 /// \brief Add a lexer to the top of the include stack and
1509 /// start lexing tokens from it instead of the current buffer.
1510 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
1512 /// \brief Add a lexer to the top of the include stack and
1513 /// start getting tokens from it using the PTH cache.
1514 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
1516 /// \brief Set the FileID for the preprocessor predefines.
1517 void setPredefinesFileID(FileID FID) {
1518 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
1519 PredefinesFileID = FID;
1522 /// \brief Returns true if we are lexing from a file and not a
1523 /// pragma or a macro.
1524 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
1525 return L ? !L->isPragmaLexer() : P != nullptr;
1528 static bool IsFileLexer(const IncludeStackInfo& I) {
1529 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
1532 bool IsFileLexer() const {
1533 return IsFileLexer(CurLexer.get(), CurPPLexer);
1536 //===--------------------------------------------------------------------===//
1538 void CachingLex(Token &Result);
1539 bool InCachingLexMode() const {
1540 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
1541 // that we are past EOF, not that we are in CachingLex mode.
1542 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
1543 !IncludeMacroStack.empty();
1545 void EnterCachingLexMode();
1546 void ExitCachingLexMode() {
1547 if (InCachingLexMode())
1548 RemoveTopOfLexerStack();
1550 const Token &PeekAhead(unsigned N);
1551 void AnnotatePreviousCachedTokens(const Token &Tok);
1553 //===--------------------------------------------------------------------===//
1554 /// Handle*Directive - implement the various preprocessor directives. These
1555 /// should side-effect the current preprocessor object so that the next call
1556 /// to Lex() will return the appropriate token next.
1557 void HandleLineDirective(Token &Tok);
1558 void HandleDigitDirective(Token &Tok);
1559 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
1560 void HandleIdentSCCSDirective(Token &Tok);
1561 void HandleMacroPublicDirective(Token &Tok);
1562 void HandleMacroPrivateDirective(Token &Tok);
1565 void HandleIncludeDirective(SourceLocation HashLoc,
1567 const DirectoryLookup *LookupFrom = nullptr,
1568 const FileEntry *LookupFromFile = nullptr,
1569 bool isImport = false);
1570 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
1571 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
1572 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
1573 void HandleMicrosoftImportDirective(Token &Tok);
1575 // Module inclusion testing.
1576 /// \brief Find the module for the source or header file that \p FilenameLoc
1578 Module *getModuleForLocation(SourceLocation FilenameLoc);
1581 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
1582 void HandleUndefDirective(Token &Tok);
1584 // Conditional Inclusion.
1585 void HandleIfdefDirective(Token &Tok, bool isIfndef,
1586 bool ReadAnyTokensBeforeDirective);
1587 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
1588 void HandleEndifDirective(Token &Tok);
1589 void HandleElseDirective(Token &Tok);
1590 void HandleElifDirective(Token &Tok);
1593 void HandlePragmaDirective(SourceLocation IntroducerLoc,
1594 PragmaIntroducerKind Introducer);
1596 void HandlePragmaOnce(Token &OnceTok);
1597 void HandlePragmaMark();
1598 void HandlePragmaPoison(Token &PoisonTok);
1599 void HandlePragmaSystemHeader(Token &SysHeaderTok);
1600 void HandlePragmaDependency(Token &DependencyTok);
1601 void HandlePragmaPushMacro(Token &Tok);
1602 void HandlePragmaPopMacro(Token &Tok);
1603 void HandlePragmaIncludeAlias(Token &Tok);
1604 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
1606 // Return true and store the first token only if any CommentHandler
1607 // has inserted some tokens and getCommentRetentionState() is false.
1608 bool HandleComment(Token &Token, SourceRange Comment);
1610 /// \brief A macro is used, update information about macros that need unused
1612 void markMacroAsUsed(MacroInfo *MI);
1615 /// \brief Abstract base class that describes a handler that will receive
1616 /// source ranges for each of the comments encountered in the source file.
1617 class CommentHandler {
1619 virtual ~CommentHandler();
1621 // The handler shall return true if it has pushed any tokens
1622 // to be read using e.g. EnterToken or EnterTokenStream.
1623 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
1626 } // end namespace clang