1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Defines the clang::Preprocessor interface.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16 #define LLVM_CLANG_LEX_PREPROCESSOR_H
18 #include "clang/Basic/Builtins.h"
19 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Basic/IdentifierTable.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Lex/Lexer.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/ModuleMap.h"
25 #include "clang/Lex/PPCallbacks.h"
26 #include "clang/Lex/PTHLexer.h"
27 #include "clang/Lex/PTHManager.h"
28 #include "clang/Lex/TokenLexer.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/IntrusiveRefCntPtr.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/Support/Allocator.h"
39 template<unsigned InternalLen> class SmallString;
45 class ExternalPreprocessorSource;
49 class PragmaNamespace;
55 class CodeCompletionHandler;
56 class DirectoryLookup;
57 class PreprocessingRecord;
59 class PreprocessorOptions;
61 /// \brief Stores token information for comparing actual tokens with
62 /// predefined values. Only handles simple tokens and identifiers.
68 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
69 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
70 assert(Kind != tok::identifier &&
71 "Identifiers should be created by TokenValue(IdentifierInfo *)");
72 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
73 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
75 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
76 bool operator==(const Token &Tok) const {
77 return Tok.getKind() == Kind &&
78 (!II || II == Tok.getIdentifierInfo());
82 /// \brief Engages in a tight little dance with the lexer to efficiently
83 /// preprocess tokens.
85 /// Lexers know only about tokens within a single source file, and don't
86 /// know anything about preprocessor-level issues like the \#include stack,
87 /// token expansion, etc.
88 class Preprocessor : public RefCountedBase<Preprocessor> {
89 IntrusiveRefCntPtr<PreprocessorOptions> PPOpts;
90 DiagnosticsEngine *Diags;
91 LangOptions &LangOpts;
92 const TargetInfo *Target;
94 SourceManager &SourceMgr;
95 ScratchBuffer *ScratchBuf;
96 HeaderSearch &HeaderInfo;
97 ModuleLoader &TheModuleLoader;
99 /// \brief External source of macros.
100 ExternalPreprocessorSource *ExternalSource;
103 /// An optional PTHManager object used for getting tokens from
104 /// a token cache rather than lexing the original source file.
105 std::unique_ptr<PTHManager> PTH;
107 /// A BumpPtrAllocator object used to quickly allocate and release
108 /// objects internal to the Preprocessor.
109 llvm::BumpPtrAllocator BP;
111 /// Identifiers for builtin macros and other builtins.
112 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
113 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
114 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
115 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
116 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
117 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
118 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
119 IdentifierInfo *Ident__identifier; // __identifier
120 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
121 IdentifierInfo *Ident__has_feature; // __has_feature
122 IdentifierInfo *Ident__has_extension; // __has_extension
123 IdentifierInfo *Ident__has_builtin; // __has_builtin
124 IdentifierInfo *Ident__has_attribute; // __has_attribute
125 IdentifierInfo *Ident__has_include; // __has_include
126 IdentifierInfo *Ident__has_include_next; // __has_include_next
127 IdentifierInfo *Ident__has_warning; // __has_warning
128 IdentifierInfo *Ident__is_identifier; // __is_identifier
129 IdentifierInfo *Ident__building_module; // __building_module
130 IdentifierInfo *Ident__MODULE__; // __MODULE__
132 SourceLocation DATELoc, TIMELoc;
133 unsigned CounterValue; // Next __COUNTER__ value.
136 /// \brief Maximum depth of \#includes.
137 MaxAllowedIncludeStackDepth = 200
140 // State that is set before the preprocessor begins.
141 bool KeepComments : 1;
142 bool KeepMacroComments : 1;
143 bool SuppressIncludeNotFoundError : 1;
145 // State that changes while the preprocessor runs:
146 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
148 /// Whether the preprocessor owns the header search object.
149 bool OwnsHeaderSearch : 1;
151 /// True if macro expansion is disabled.
152 bool DisableMacroExpansion : 1;
154 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
155 /// when parsing preprocessor directives.
156 bool MacroExpansionInDirectivesOverride : 1;
158 class ResetMacroExpansionHelper;
160 /// \brief Whether we have already loaded macros from the external source.
161 mutable bool ReadMacrosFromExternalSource : 1;
163 /// \brief True if pragmas are enabled.
164 bool PragmasEnabled : 1;
166 /// \brief True if the current build action is a preprocessing action.
167 bool PreprocessedOutput : 1;
169 /// \brief True if we are currently preprocessing a #if or #elif directive
170 bool ParsingIfOrElifDirective;
172 /// \brief True if we are pre-expanding macro arguments.
173 bool InMacroArgPreExpansion;
175 /// \brief Mapping/lookup information for all identifiers in
176 /// the program, including program keywords.
177 mutable IdentifierTable Identifiers;
179 /// \brief This table contains all the selectors in the program.
181 /// Unlike IdentifierTable above, this table *isn't* populated by the
182 /// preprocessor. It is declared/expanded here because its role/lifetime is
183 /// conceptually similar to the IdentifierTable. In addition, the current
184 /// control flow (in clang::ParseAST()), make it convenient to put here.
186 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
187 /// the lifetime of the preprocessor.
188 SelectorTable Selectors;
190 /// \brief Information about builtins.
191 Builtin::Context BuiltinInfo;
193 /// \brief Tracks all of the pragmas that the client registered
194 /// with this preprocessor.
195 PragmaNamespace *PragmaHandlers;
197 /// \brief Tracks all of the comment handlers that the client registered
198 /// with this preprocessor.
199 std::vector<CommentHandler *> CommentHandlers;
201 /// \brief True if we want to ignore EOF token and continue later on (thus
202 /// avoid tearing the Lexer and etc. down).
203 bool IncrementalProcessing;
205 /// The kind of translation unit we are processing.
206 TranslationUnitKind TUKind;
208 /// \brief The code-completion handler.
209 CodeCompletionHandler *CodeComplete;
211 /// \brief The file that we're performing code-completion for, if any.
212 const FileEntry *CodeCompletionFile;
214 /// \brief The offset in file for the code-completion point.
215 unsigned CodeCompletionOffset;
217 /// \brief The location for the code-completion point. This gets instantiated
218 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
219 SourceLocation CodeCompletionLoc;
221 /// \brief The start location for the file of the code-completion point.
223 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
224 /// for preprocessing.
225 SourceLocation CodeCompletionFileLoc;
227 /// \brief The source location of the \c import contextual keyword we just
229 SourceLocation ModuleImportLoc;
231 /// \brief The module import path that we're currently processing.
232 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
234 /// \brief Whether the last token we lexed was an '@'.
237 /// \brief Whether the module import expects an identifier next. Otherwise,
238 /// it expects a '.' or ';'.
239 bool ModuleImportExpectsIdentifier;
241 /// \brief The source location of the currently-active
242 /// \#pragma clang arc_cf_code_audited begin.
243 SourceLocation PragmaARCCFCodeAuditedLoc;
245 /// \brief True if we hit the code-completion point.
246 bool CodeCompletionReached;
248 /// \brief The number of bytes that we will initially skip when entering the
249 /// main file, along with a flag that indicates whether skipping this number
250 /// of bytes will place the lexer at the start of a line.
252 /// This is used when loading a precompiled preamble.
253 std::pair<unsigned, bool> SkipMainFilePreamble;
255 /// \brief The current top of the stack that we're lexing from if
256 /// not expanding a macro and we are lexing directly from source code.
258 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
259 std::unique_ptr<Lexer> CurLexer;
261 /// \brief The current top of stack that we're lexing from if
262 /// not expanding from a macro and we are lexing from a PTH cache.
264 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
265 std::unique_ptr<PTHLexer> CurPTHLexer;
267 /// \brief The current top of the stack what we're lexing from
268 /// if not expanding a macro.
270 /// This is an alias for either CurLexer or CurPTHLexer.
271 PreprocessorLexer *CurPPLexer;
273 /// \brief Used to find the current FileEntry, if CurLexer is non-null
274 /// and if applicable.
276 /// This allows us to implement \#include_next and find directory-specific
278 const DirectoryLookup *CurDirLookup;
280 /// \brief The current macro we are expanding, if we are expanding a macro.
282 /// One of CurLexer and CurTokenLexer must be null.
283 std::unique_ptr<TokenLexer> CurTokenLexer;
285 /// \brief The kind of lexer we're currently working with.
291 CLK_LexAfterModuleImport
294 /// \brief If the current lexer is for a submodule that is being built, this
295 /// is that submodule.
296 Module *CurSubmodule;
298 /// \brief Keeps track of the stack of files currently
299 /// \#included, and macros currently being expanded from, not counting
300 /// CurLexer/CurTokenLexer.
301 struct IncludeStackInfo {
302 enum CurLexerKind CurLexerKind;
303 Module *TheSubmodule;
304 std::unique_ptr<Lexer> TheLexer;
305 std::unique_ptr<PTHLexer> ThePTHLexer;
306 PreprocessorLexer *ThePPLexer;
307 std::unique_ptr<TokenLexer> TheTokenLexer;
308 const DirectoryLookup *TheDirLookup;
310 // The following constructors are completely useless copies of the default
311 // versions, only needed to pacify MSVC.
312 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
313 std::unique_ptr<Lexer> &&TheLexer,
314 std::unique_ptr<PTHLexer> &&ThePTHLexer,
315 PreprocessorLexer *ThePPLexer,
316 std::unique_ptr<TokenLexer> &&TheTokenLexer,
317 const DirectoryLookup *TheDirLookup)
318 : CurLexerKind(std::move(CurLexerKind)),
319 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
320 ThePTHLexer(std::move(ThePTHLexer)),
321 ThePPLexer(std::move(ThePPLexer)),
322 TheTokenLexer(std::move(TheTokenLexer)),
323 TheDirLookup(std::move(TheDirLookup)) {}
324 IncludeStackInfo(IncludeStackInfo &&RHS)
325 : CurLexerKind(std::move(RHS.CurLexerKind)),
326 TheSubmodule(std::move(RHS.TheSubmodule)),
327 TheLexer(std::move(RHS.TheLexer)),
328 ThePTHLexer(std::move(RHS.ThePTHLexer)),
329 ThePPLexer(std::move(RHS.ThePPLexer)),
330 TheTokenLexer(std::move(RHS.TheTokenLexer)),
331 TheDirLookup(std::move(RHS.TheDirLookup)) {}
333 std::vector<IncludeStackInfo> IncludeMacroStack;
335 /// \brief Actions invoked when some preprocessor activity is
336 /// encountered (e.g. a file is \#included, etc).
337 PPCallbacks *Callbacks;
339 struct MacroExpandsInfo {
343 MacroExpandsInfo(Token Tok, MacroDirective *MD, SourceRange Range)
344 : Tok(Tok), MD(MD), Range(Range) { }
346 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
348 /// For each IdentifierInfo that was associated with a macro, we
349 /// keep a mapping to the history of all macro definitions and #undefs in
350 /// the reverse order (the latest one is in the head of the list).
351 llvm::DenseMap<const IdentifierInfo*, MacroDirective*> Macros;
352 friend class ASTReader;
354 /// \brief Macros that we want to warn because they are not used at the end
355 /// of the translation unit.
357 /// We store just their SourceLocations instead of
358 /// something like MacroInfo*. The benefit of this is that when we are
359 /// deserializing from PCH, we don't need to deserialize identifier & macros
360 /// just so that we can report that they are unused, we just warn using
361 /// the SourceLocations of this set (that will be filled by the ASTReader).
362 /// We are using SmallPtrSet instead of a vector for faster removal.
363 typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
364 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
366 /// \brief A "freelist" of MacroArg objects that can be
367 /// reused for quick allocation.
368 MacroArgs *MacroArgCache;
369 friend class MacroArgs;
371 /// For each IdentifierInfo used in a \#pragma push_macro directive,
372 /// we keep a MacroInfo stack used to restore the previous macro value.
373 llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
375 // Various statistics we track for performance analysis.
376 unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
377 unsigned NumIf, NumElse, NumEndif;
378 unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
379 unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
380 unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
383 /// \brief The predefined macros that preprocessor should use from the
384 /// command line etc.
385 std::string Predefines;
387 /// \brief The file ID for the preprocessor predefines.
388 FileID PredefinesFileID;
391 /// \brief Cache of macro expanders to reduce malloc traffic.
392 enum { TokenLexerCacheSize = 8 };
393 unsigned NumCachedTokenLexers;
394 TokenLexer *TokenLexerCache[TokenLexerCacheSize];
397 /// \brief Keeps macro expanded tokens for TokenLexers.
399 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
400 /// going to lex in the cache and when it finishes the tokens are removed
401 /// from the end of the cache.
402 SmallVector<Token, 16> MacroExpandedTokens;
403 std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
405 /// \brief A record of the macro definitions and expansions that
406 /// occurred during preprocessing.
408 /// This is an optional side structure that can be enabled with
409 /// \c createPreprocessingRecord() prior to preprocessing.
410 PreprocessingRecord *Record;
412 private: // Cached tokens state.
413 typedef SmallVector<Token, 1> CachedTokensTy;
415 /// \brief Cached tokens are stored here when we do backtracking or
416 /// lookahead. They are "lexed" by the CachingLex() method.
417 CachedTokensTy CachedTokens;
419 /// \brief The position of the cached token that CachingLex() should
422 /// If it points beyond the CachedTokens vector, it means that a normal
423 /// Lex() should be invoked.
424 CachedTokensTy::size_type CachedLexPos;
426 /// \brief Stack of backtrack positions, allowing nested backtracks.
428 /// The EnableBacktrackAtThisPos() method pushes a position to
429 /// indicate where CachedLexPos should be set when the BackTrack() method is
430 /// invoked (at which point the last position is popped).
431 std::vector<CachedTokensTy::size_type> BacktrackPositions;
433 struct MacroInfoChain {
435 MacroInfoChain *Next;
436 MacroInfoChain *Prev;
439 /// MacroInfos are managed as a chain for easy disposal. This is the head
441 MacroInfoChain *MIChainHead;
443 /// A "freelist" of MacroInfo objects that can be reused for quick
445 MacroInfoChain *MICache;
447 struct DeserializedMacroInfoChain {
449 unsigned OwningModuleID; // MUST be immediately after the MacroInfo object
450 // so it can be accessed by MacroInfo::getOwningModuleID().
451 DeserializedMacroInfoChain *Next;
453 DeserializedMacroInfoChain *DeserialMIChainHead;
456 Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
457 DiagnosticsEngine &diags, LangOptions &opts,
458 SourceManager &SM, HeaderSearch &Headers,
459 ModuleLoader &TheModuleLoader,
460 IdentifierInfoLookup *IILookup = nullptr,
461 bool OwnsHeaderSearch = false,
462 TranslationUnitKind TUKind = TU_Complete);
466 /// \brief Initialize the preprocessor using information about the target.
468 /// \param Target is owned by the caller and must remain valid for the
469 /// lifetime of the preprocessor.
470 void Initialize(const TargetInfo &Target);
472 /// \brief Retrieve the preprocessor options used to initialize this
474 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
476 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
477 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
479 const LangOptions &getLangOpts() const { return LangOpts; }
480 const TargetInfo &getTargetInfo() const { return *Target; }
481 FileManager &getFileManager() const { return FileMgr; }
482 SourceManager &getSourceManager() const { return SourceMgr; }
483 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
485 IdentifierTable &getIdentifierTable() { return Identifiers; }
486 SelectorTable &getSelectorTable() { return Selectors; }
487 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
488 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
490 void setPTHManager(PTHManager* pm);
492 PTHManager *getPTHManager() { return PTH.get(); }
494 void setExternalSource(ExternalPreprocessorSource *Source) {
495 ExternalSource = Source;
498 ExternalPreprocessorSource *getExternalSource() const {
499 return ExternalSource;
502 /// \brief Retrieve the module loader associated with this preprocessor.
503 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
505 bool hadModuleLoaderFatalFailure() const {
506 return TheModuleLoader.HadFatalFailure;
509 /// \brief True if we are currently preprocessing a #if or #elif directive
510 bool isParsingIfOrElifDirective() const {
511 return ParsingIfOrElifDirective;
514 /// \brief Control whether the preprocessor retains comments in output.
515 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
516 this->KeepComments = KeepComments | KeepMacroComments;
517 this->KeepMacroComments = KeepMacroComments;
520 bool getCommentRetentionState() const { return KeepComments; }
522 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
523 bool getPragmasEnabled() const { return PragmasEnabled; }
525 void SetSuppressIncludeNotFoundError(bool Suppress) {
526 SuppressIncludeNotFoundError = Suppress;
529 bool GetSuppressIncludeNotFoundError() {
530 return SuppressIncludeNotFoundError;
533 /// Sets whether the preprocessor is responsible for producing output or if
534 /// it is producing tokens to be consumed by Parse and Sema.
535 void setPreprocessedOutput(bool IsPreprocessedOutput) {
536 PreprocessedOutput = IsPreprocessedOutput;
539 /// Returns true if the preprocessor is responsible for generating output,
540 /// false if it is producing tokens to be consumed by Parse and Sema.
541 bool isPreprocessedOutput() const { return PreprocessedOutput; }
543 /// \brief Return true if we are lexing directly from the specified lexer.
544 bool isCurrentLexer(const PreprocessorLexer *L) const {
545 return CurPPLexer == L;
548 /// \brief Return the current lexer being lexed from.
550 /// Note that this ignores any potentially active macro expansions and _Pragma
551 /// expansions going on at the time.
552 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
554 /// \brief Return the current file lexer being lexed from.
556 /// Note that this ignores any potentially active macro expansions and _Pragma
557 /// expansions going on at the time.
558 PreprocessorLexer *getCurrentFileLexer() const;
560 /// \brief Returns the FileID for the preprocessor predefines.
561 FileID getPredefinesFileID() const { return PredefinesFileID; }
564 /// \brief Accessors for preprocessor callbacks.
566 /// Note that this class takes ownership of any PPCallbacks object given to
568 PPCallbacks *getPPCallbacks() const { return Callbacks; }
569 void addPPCallbacks(PPCallbacks *C) {
571 C = new PPChainedCallbacks(C, Callbacks);
576 /// \brief Given an identifier, return its latest MacroDirective if it is
577 /// \#defined or null if it isn't \#define'd.
578 MacroDirective *getMacroDirective(IdentifierInfo *II) const {
579 if (!II->hasMacroDefinition())
582 MacroDirective *MD = getMacroDirectiveHistory(II);
583 assert(MD->isDefined() && "Macro is undefined!");
587 const MacroInfo *getMacroInfo(IdentifierInfo *II) const {
588 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
591 MacroInfo *getMacroInfo(IdentifierInfo *II) {
592 if (MacroDirective *MD = getMacroDirective(II))
593 return MD->getMacroInfo();
597 /// \brief Given an identifier, return the (probably #undef'd) MacroInfo
598 /// representing the most recent macro definition.
600 /// One can iterate over all previous macro definitions from the most recent
601 /// one. This should only be called for identifiers that hadMacroDefinition().
602 MacroDirective *getMacroDirectiveHistory(const IdentifierInfo *II) const;
604 /// \brief Add a directive to the macro directive history for this identifier.
605 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
606 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
609 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc, isImported);
610 appendMacroDirective(II, MD);
613 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI){
614 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc(), false);
616 /// \brief Set a MacroDirective that was loaded from a PCH file.
617 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD);
620 /// Iterators for the macro history table. Currently defined macros have
621 /// IdentifierInfo::hasMacroDefinition() set and an empty
622 /// MacroInfo::getUndefLoc() at the head of the list.
623 typedef llvm::DenseMap<const IdentifierInfo *,
624 MacroDirective*>::const_iterator macro_iterator;
625 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
626 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
629 /// \brief Return the name of the macro defined before \p Loc that has
630 /// spelling \p Tokens. If there are multiple macros with same spelling,
631 /// return the last one defined.
632 StringRef getLastMacroWithSpelling(SourceLocation Loc,
633 ArrayRef<TokenValue> Tokens) const;
635 const std::string &getPredefines() const { return Predefines; }
636 /// \brief Set the predefines for this Preprocessor.
638 /// These predefines are automatically injected when parsing the main file.
639 void setPredefines(const char *P) { Predefines = P; }
640 void setPredefines(const std::string &P) { Predefines = P; }
642 /// Return information about the specified preprocessor
643 /// identifier token.
644 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
645 return &Identifiers.get(Name);
648 /// \brief Add the specified pragma handler to this preprocessor.
650 /// If \p Namespace is non-null, then it is a token required to exist on the
651 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
652 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
653 void AddPragmaHandler(PragmaHandler *Handler) {
654 AddPragmaHandler(StringRef(), Handler);
657 /// \brief Remove the specific pragma handler from this preprocessor.
659 /// If \p Namespace is non-null, then it should be the namespace that
660 /// \p Handler was added to. It is an error to remove a handler that
661 /// has not been registered.
662 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
663 void RemovePragmaHandler(PragmaHandler *Handler) {
664 RemovePragmaHandler(StringRef(), Handler);
667 /// Install empty handlers for all pragmas (making them ignored).
668 void IgnorePragmas();
670 /// \brief Add the specified comment handler to the preprocessor.
671 void addCommentHandler(CommentHandler *Handler);
673 /// \brief Remove the specified comment handler.
675 /// It is an error to remove a handler that has not been registered.
676 void removeCommentHandler(CommentHandler *Handler);
678 /// \brief Set the code completion handler to the given object.
679 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
680 CodeComplete = &Handler;
683 /// \brief Retrieve the current code-completion handler.
684 CodeCompletionHandler *getCodeCompletionHandler() const {
688 /// \brief Clear out the code completion handler.
689 void clearCodeCompletionHandler() {
690 CodeComplete = nullptr;
693 /// \brief Hook used by the lexer to invoke the "natural language" code
694 /// completion point.
695 void CodeCompleteNaturalLanguage();
697 /// \brief Retrieve the preprocessing record, or NULL if there is no
698 /// preprocessing record.
699 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
701 /// \brief Create a new preprocessing record, which will keep track of
702 /// all macro expansions, macro definitions, etc.
703 void createPreprocessingRecord();
705 /// \brief Enter the specified FileID as the main source file,
706 /// which implicitly adds the builtin defines etc.
707 void EnterMainSourceFile();
709 /// \brief Inform the preprocessor callbacks that processing is complete.
710 void EndSourceFile();
712 /// \brief Add a source file to the top of the include stack and
713 /// start lexing tokens from it instead of the current buffer.
715 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
716 bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
719 /// \brief Add a Macro to the top of the include stack and start lexing
720 /// tokens from it instead of the current buffer.
722 /// \param Args specifies the tokens input to a function-like macro.
723 /// \param ILEnd specifies the location of the ')' for a function-like macro
724 /// or the identifier for an object-like macro.
725 void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
728 /// \brief Add a "macro" context to the top of the include stack,
729 /// which will cause the lexer to start returning the specified tokens.
731 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
732 /// will not be subject to further macro expansion. Otherwise, these tokens
733 /// will be re-macro-expanded when/if expansion is enabled.
735 /// If \p OwnsTokens is false, this method assumes that the specified stream
736 /// of tokens has a permanent owner somewhere, so they do not need to be
737 /// copied. If it is true, it assumes the array of tokens is allocated with
738 /// \c new[] and must be freed.
739 void EnterTokenStream(const Token *Toks, unsigned NumToks,
740 bool DisableMacroExpansion, bool OwnsTokens);
742 /// \brief Pop the current lexer/macro exp off the top of the lexer stack.
744 /// This should only be used in situations where the current state of the
745 /// top-of-stack lexer is known.
746 void RemoveTopOfLexerStack();
748 /// From the point that this method is called, and until
749 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
750 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
751 /// make the Preprocessor re-lex the same tokens.
753 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
754 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
755 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
757 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
758 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
759 /// tokens will continue indefinitely.
761 void EnableBacktrackAtThisPos();
763 /// \brief Disable the last EnableBacktrackAtThisPos call.
764 void CommitBacktrackedTokens();
766 /// \brief Make Preprocessor re-lex the tokens that were lexed since
767 /// EnableBacktrackAtThisPos() was previously called.
770 /// \brief True if EnableBacktrackAtThisPos() was called and
771 /// caching of tokens is on.
772 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
774 /// \brief Lex the next token for this preprocessor.
775 void Lex(Token &Result);
777 void LexAfterModuleImport(Token &Result);
779 /// \brief Lex a string literal, which may be the concatenation of multiple
780 /// string literals and may even come from macro expansion.
781 /// \returns true on success, false if a error diagnostic has been generated.
782 bool LexStringLiteral(Token &Result, std::string &String,
783 const char *DiagnosticTag, bool AllowMacroExpansion) {
784 if (AllowMacroExpansion)
787 LexUnexpandedToken(Result);
788 return FinishLexStringLiteral(Result, String, DiagnosticTag,
789 AllowMacroExpansion);
792 /// \brief Complete the lexing of a string literal where the first token has
793 /// already been lexed (see LexStringLiteral).
794 bool FinishLexStringLiteral(Token &Result, std::string &String,
795 const char *DiagnosticTag,
796 bool AllowMacroExpansion);
798 /// \brief Lex a token. If it's a comment, keep lexing until we get
799 /// something not a comment.
801 /// This is useful in -E -C mode where comments would foul up preprocessor
802 /// directive handling.
803 void LexNonComment(Token &Result) {
806 while (Result.getKind() == tok::comment);
809 /// \brief Just like Lex, but disables macro expansion of identifier tokens.
810 void LexUnexpandedToken(Token &Result) {
811 // Disable macro expansion.
812 bool OldVal = DisableMacroExpansion;
813 DisableMacroExpansion = true;
818 DisableMacroExpansion = OldVal;
821 /// \brief Like LexNonComment, but this disables macro expansion of
822 /// identifier tokens.
823 void LexUnexpandedNonComment(Token &Result) {
825 LexUnexpandedToken(Result);
826 while (Result.getKind() == tok::comment);
829 /// \brief Parses a simple integer literal to get its numeric value. Floating
830 /// point literals and user defined literals are rejected. Used primarily to
831 /// handle pragmas that accept integer arguments.
832 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
834 /// Disables macro expansion everywhere except for preprocessor directives.
835 void SetMacroExpansionOnlyInDirectives() {
836 DisableMacroExpansion = true;
837 MacroExpansionInDirectivesOverride = true;
840 /// \brief Peeks ahead N tokens and returns that token without consuming any
843 /// LookAhead(0) returns the next token that would be returned by Lex(),
844 /// LookAhead(1) returns the token after it, etc. This returns normal
845 /// tokens after phase 5. As such, it is equivalent to using
846 /// 'Lex', not 'LexUnexpandedToken'.
847 const Token &LookAhead(unsigned N) {
848 if (CachedLexPos + N < CachedTokens.size())
849 return CachedTokens[CachedLexPos+N];
851 return PeekAhead(N+1);
854 /// \brief When backtracking is enabled and tokens are cached,
855 /// this allows to revert a specific number of tokens.
857 /// Note that the number of tokens being reverted should be up to the last
858 /// backtrack position, not more.
859 void RevertCachedTokens(unsigned N) {
860 assert(isBacktrackEnabled() &&
861 "Should only be called when tokens are cached for backtracking");
862 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
863 && "Should revert tokens up to the last backtrack position, not more");
864 assert(signed(CachedLexPos) - signed(N) >= 0 &&
865 "Corrupted backtrack positions ?");
869 /// \brief Enters a token in the token stream to be lexed next.
871 /// If BackTrack() is called afterwards, the token will remain at the
873 void EnterToken(const Token &Tok) {
874 EnterCachingLexMode();
875 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
878 /// We notify the Preprocessor that if it is caching tokens (because
879 /// backtrack is enabled) it should replace the most recent cached tokens
880 /// with the given annotation token. This function has no effect if
881 /// backtracking is not enabled.
883 /// Note that the use of this function is just for optimization, so that the
884 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
886 void AnnotateCachedTokens(const Token &Tok) {
887 assert(Tok.isAnnotation() && "Expected annotation token");
888 if (CachedLexPos != 0 && isBacktrackEnabled())
889 AnnotatePreviousCachedTokens(Tok);
892 /// Get the location of the last cached token, suitable for setting the end
893 /// location of an annotation token.
894 SourceLocation getLastCachedTokenLocation() const {
895 assert(CachedLexPos != 0);
896 return CachedTokens[CachedLexPos-1].getLocation();
899 /// \brief Replace the last token with an annotation token.
901 /// Like AnnotateCachedTokens(), this routine replaces an
902 /// already-parsed (and resolved) token with an annotation
903 /// token. However, this routine only replaces the last token with
904 /// the annotation token; it does not affect any other cached
905 /// tokens. This function has no effect if backtracking is not
907 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
908 assert(Tok.isAnnotation() && "Expected annotation token");
909 if (CachedLexPos != 0 && isBacktrackEnabled())
910 CachedTokens[CachedLexPos-1] = Tok;
913 /// Update the current token to represent the provided
914 /// identifier, in order to cache an action performed by typo correction.
915 void TypoCorrectToken(const Token &Tok) {
916 assert(Tok.getIdentifierInfo() && "Expected identifier token");
917 if (CachedLexPos != 0 && isBacktrackEnabled())
918 CachedTokens[CachedLexPos-1] = Tok;
921 /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
922 /// CurTokenLexer pointers.
923 void recomputeCurLexerKind();
925 /// \brief Returns true if incremental processing is enabled
926 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
928 /// \brief Enables the incremental processing
929 void enableIncrementalProcessing(bool value = true) {
930 IncrementalProcessing = value;
933 /// \brief Specify the point at which code-completion will be performed.
935 /// \param File the file in which code completion should occur. If
936 /// this file is included multiple times, code-completion will
937 /// perform completion the first time it is included. If NULL, this
938 /// function clears out the code-completion point.
940 /// \param Line the line at which code completion should occur
943 /// \param Column the column at which code completion should occur
946 /// \returns true if an error occurred, false otherwise.
947 bool SetCodeCompletionPoint(const FileEntry *File,
948 unsigned Line, unsigned Column);
950 /// \brief Determine if we are performing code completion.
951 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
953 /// \brief Returns the location of the code-completion point.
955 /// Returns an invalid location if code-completion is not enabled or the file
956 /// containing the code-completion point has not been lexed yet.
957 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
959 /// \brief Returns the start location of the file of code-completion point.
961 /// Returns an invalid location if code-completion is not enabled or the file
962 /// containing the code-completion point has not been lexed yet.
963 SourceLocation getCodeCompletionFileLoc() const {
964 return CodeCompletionFileLoc;
967 /// \brief Returns true if code-completion is enabled and we have hit the
968 /// code-completion point.
969 bool isCodeCompletionReached() const { return CodeCompletionReached; }
971 /// \brief Note that we hit the code-completion point.
972 void setCodeCompletionReached() {
973 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
974 CodeCompletionReached = true;
975 // Silence any diagnostics that occur after we hit the code-completion.
976 getDiagnostics().setSuppressAllDiagnostics(true);
979 /// \brief The location of the currently-active \#pragma clang
980 /// arc_cf_code_audited begin.
982 /// Returns an invalid location if there is no such pragma active.
983 SourceLocation getPragmaARCCFCodeAuditedLoc() const {
984 return PragmaARCCFCodeAuditedLoc;
987 /// \brief Set the location of the currently-active \#pragma clang
988 /// arc_cf_code_audited begin. An invalid location ends the pragma.
989 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
990 PragmaARCCFCodeAuditedLoc = Loc;
993 /// \brief Instruct the preprocessor to skip part of the main source file.
995 /// \param Bytes The number of bytes in the preamble to skip.
997 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
999 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1000 SkipMainFilePreamble.first = Bytes;
1001 SkipMainFilePreamble.second = StartOfLine;
1004 /// Forwarding function for diagnostics. This emits a diagnostic at
1005 /// the specified Token's location, translating the token's start
1006 /// position in the current buffer into a SourcePosition object for rendering.
1007 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1008 return Diags->Report(Loc, DiagID);
1011 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1012 return Diags->Report(Tok.getLocation(), DiagID);
1015 /// Return the 'spelling' of the token at the given
1016 /// location; does not go up to the spelling location or down to the
1017 /// expansion location.
1019 /// \param buffer A buffer which will be used only if the token requires
1020 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1021 /// \param invalid If non-null, will be set \c true if an error occurs.
1022 StringRef getSpelling(SourceLocation loc,
1023 SmallVectorImpl<char> &buffer,
1024 bool *invalid = nullptr) const {
1025 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1028 /// \brief Return the 'spelling' of the Tok token.
1030 /// The spelling of a token is the characters used to represent the token in
1031 /// the source file after trigraph expansion and escaped-newline folding. In
1032 /// particular, this wants to get the true, uncanonicalized, spelling of
1033 /// things like digraphs, UCNs, etc.
1035 /// \param Invalid If non-null, will be set \c true if an error occurs.
1036 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1037 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1040 /// \brief Get the spelling of a token into a preallocated buffer, instead
1041 /// of as an std::string.
1043 /// The caller is required to allocate enough space for the token, which is
1044 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1045 /// actual result is returned.
1047 /// Note that this method may do two possible things: it may either fill in
1048 /// the buffer specified with characters, or it may *change the input pointer*
1049 /// to point to a constant buffer with the data already in it (avoiding a
1050 /// copy). The caller is not allowed to modify the returned buffer pointer
1051 /// if an internal buffer is returned.
1052 unsigned getSpelling(const Token &Tok, const char *&Buffer,
1053 bool *Invalid = nullptr) const {
1054 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1057 /// \brief Get the spelling of a token into a SmallVector.
1059 /// Note that the returned StringRef may not point to the
1060 /// supplied buffer if a copy can be avoided.
1061 StringRef getSpelling(const Token &Tok,
1062 SmallVectorImpl<char> &Buffer,
1063 bool *Invalid = nullptr) const;
1065 /// \brief Relex the token at the specified location.
1066 /// \returns true if there was a failure, false on success.
1067 bool getRawToken(SourceLocation Loc, Token &Result,
1068 bool IgnoreWhiteSpace = false) {
1069 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1072 /// \brief Given a Token \p Tok that is a numeric constant with length 1,
1073 /// return the character.
1075 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1076 bool *Invalid = nullptr) const {
1077 assert(Tok.is(tok::numeric_constant) &&
1078 Tok.getLength() == 1 && "Called on unsupported token");
1079 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1081 // If the token is carrying a literal data pointer, just use it.
1082 if (const char *D = Tok.getLiteralData())
1085 // Otherwise, fall back on getCharacterData, which is slower, but always
1087 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1090 /// \brief Retrieve the name of the immediate macro expansion.
1092 /// This routine starts from a source location, and finds the name of the
1093 /// macro responsible for its immediate expansion. It looks through any
1094 /// intervening macro argument expansions to compute this. It returns a
1095 /// StringRef that refers to the SourceManager-owned buffer of the source
1096 /// where that macro name is spelled. Thus, the result shouldn't out-live
1097 /// the SourceManager.
1098 StringRef getImmediateMacroName(SourceLocation Loc) {
1099 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1102 /// \brief Plop the specified string into a scratch buffer and set the
1103 /// specified token's location and length to it.
1105 /// If specified, the source location provides a location of the expansion
1106 /// point of the token.
1107 void CreateString(StringRef Str, Token &Tok,
1108 SourceLocation ExpansionLocStart = SourceLocation(),
1109 SourceLocation ExpansionLocEnd = SourceLocation());
1111 /// \brief Computes the source location just past the end of the
1112 /// token at this source location.
1114 /// This routine can be used to produce a source location that
1115 /// points just past the end of the token referenced by \p Loc, and
1116 /// is generally used when a diagnostic needs to point just after a
1117 /// token where it expected something different that it received. If
1118 /// the returned source location would not be meaningful (e.g., if
1119 /// it points into a macro), this routine returns an invalid
1120 /// source location.
1122 /// \param Offset an offset from the end of the token, where the source
1123 /// location should refer to. The default offset (0) produces a source
1124 /// location pointing just past the end of the token; an offset of 1 produces
1125 /// a source location pointing to the last character in the token, etc.
1126 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1127 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1130 /// \brief Returns true if the given MacroID location points at the first
1131 /// token of the macro expansion.
1133 /// \param MacroBegin If non-null and function returns true, it is set to
1134 /// begin location of the macro.
1135 bool isAtStartOfMacroExpansion(SourceLocation loc,
1136 SourceLocation *MacroBegin = nullptr) const {
1137 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1141 /// \brief Returns true if the given MacroID location points at the last
1142 /// token of the macro expansion.
1144 /// \param MacroEnd If non-null and function returns true, it is set to
1145 /// end location of the macro.
1146 bool isAtEndOfMacroExpansion(SourceLocation loc,
1147 SourceLocation *MacroEnd = nullptr) const {
1148 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1151 /// \brief Print the token to stderr, used for debugging.
1152 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1153 void DumpLocation(SourceLocation Loc) const;
1154 void DumpMacro(const MacroInfo &MI) const;
1156 /// \brief Given a location that specifies the start of a
1157 /// token, return a new location that specifies a character within the token.
1158 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1159 unsigned Char) const {
1160 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1163 /// \brief Increment the counters for the number of token paste operations
1166 /// If fast was specified, this is a 'fast paste' case we handled.
1167 void IncrementPasteCounter(bool isFast) {
1169 ++NumFastTokenPaste;
1176 size_t getTotalMemory() const;
1178 /// When the macro expander pastes together a comment (/##/) in Microsoft
1179 /// mode, this method handles updating the current state, returning the
1180 /// token on the next source line.
1181 void HandleMicrosoftCommentPaste(Token &Tok);
1183 //===--------------------------------------------------------------------===//
1184 // Preprocessor callback methods. These are invoked by a lexer as various
1185 // directives and events are found.
1187 /// Given a tok::raw_identifier token, look up the
1188 /// identifier information for the token and install it into the token,
1189 /// updating the token kind accordingly.
1190 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1193 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1197 /// \brief Specifies the reason for poisoning an identifier.
1199 /// If that identifier is accessed while poisoned, then this reason will be
1200 /// used instead of the default "poisoned" diagnostic.
1201 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1203 /// \brief Display reason for poisoned identifier.
1204 void HandlePoisonedIdentifier(Token & Tok);
1206 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1207 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1208 if(II->isPoisoned()) {
1209 HandlePoisonedIdentifier(Identifier);
1215 /// Identifiers used for SEH handling in Borland. These are only
1216 /// allowed in particular circumstances
1218 IdentifierInfo *Ident__exception_code,
1219 *Ident___exception_code,
1220 *Ident_GetExceptionCode;
1221 // __except filter expression
1222 IdentifierInfo *Ident__exception_info,
1223 *Ident___exception_info,
1224 *Ident_GetExceptionInfo;
1226 IdentifierInfo *Ident__abnormal_termination,
1227 *Ident___abnormal_termination,
1228 *Ident_AbnormalTermination;
1230 const char *getCurLexerEndPos();
1233 void PoisonSEHIdentifiers(bool Poison = true); // Borland
1235 /// \brief Callback invoked when the lexer reads an identifier and has
1236 /// filled in the tokens IdentifierInfo member.
1238 /// This callback potentially macro expands it or turns it into a named
1239 /// token (like 'for').
1241 /// \returns true if we actually computed a token, false if we need to
1243 bool HandleIdentifier(Token &Identifier);
1246 /// \brief Callback invoked when the lexer hits the end of the current file.
1248 /// This either returns the EOF token and returns true, or
1249 /// pops a level off the include stack and returns false, at which point the
1250 /// client should call lex again.
1251 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1253 /// \brief Callback invoked when the current TokenLexer hits the end of its
1255 bool HandleEndOfTokenLexer(Token &Result);
1257 /// \brief Callback invoked when the lexer sees a # token at the start of a
1260 /// This consumes the directive, modifies the lexer/preprocessor state, and
1261 /// advances the lexer(s) so that the next token read is the correct one.
1262 void HandleDirective(Token &Result);
1264 /// \brief Ensure that the next token is a tok::eod token.
1266 /// If not, emit a diagnostic and consume up until the eod.
1267 /// If \p EnableMacros is true, then we consider macros that expand to zero
1268 /// tokens as being ok.
1269 void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
1271 /// \brief Read and discard all tokens remaining on the current line until
1272 /// the tok::eod token is found.
1273 void DiscardUntilEndOfDirective();
1275 /// \brief Returns true if the preprocessor has seen a use of
1276 /// __DATE__ or __TIME__ in the file so far.
1277 bool SawDateOrTime() const {
1278 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1280 unsigned getCounterValue() const { return CounterValue; }
1281 void setCounterValue(unsigned V) { CounterValue = V; }
1283 /// \brief Retrieves the module that we're currently building, if any.
1284 Module *getCurrentModule();
1286 /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
1287 MacroInfo *AllocateMacroInfo(SourceLocation L);
1289 /// \brief Allocate a new MacroInfo object loaded from an AST file.
1290 MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L,
1291 unsigned SubModuleID);
1293 /// \brief Turn the specified lexer token into a fully checked and spelled
1294 /// filename, e.g. as an operand of \#include.
1296 /// The caller is expected to provide a buffer that is large enough to hold
1297 /// the spelling of the filename, but is also expected to handle the case
1298 /// when this method decides to use a different buffer.
1300 /// \returns true if the input filename was in <>'s or false if it was
1302 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
1304 /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
1306 /// Returns null on failure. \p isAngled indicates whether the file
1307 /// reference is for system \#include's or not (i.e. using <> instead of "").
1308 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1309 bool isAngled, const DirectoryLookup *FromDir,
1310 const DirectoryLookup *&CurDir,
1311 SmallVectorImpl<char> *SearchPath,
1312 SmallVectorImpl<char> *RelativePath,
1313 ModuleMap::KnownHeader *SuggestedModule,
1314 bool SkipCache = false);
1316 /// \brief Get the DirectoryLookup structure used to find the current
1317 /// FileEntry, if CurLexer is non-null and if applicable.
1319 /// This allows us to implement \#include_next and find directory-specific
1321 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1323 /// \brief Return true if we're in the top-level file, not in a \#include.
1324 bool isInPrimaryFile() const;
1326 /// \brief Handle cases where the \#include name is expanded
1327 /// from a macro as multiple tokens, which need to be glued together.
1329 /// This occurs for code like:
1331 /// \#define FOO <x/y.h>
1334 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1336 /// This code concatenates and consumes tokens up to the '>' token. It
1337 /// returns false if the > was found, otherwise it returns true if it finds
1338 /// and consumes the EOD marker.
1339 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1340 SourceLocation &End);
1342 /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1343 /// followed by EOD. Return true if the token is not a valid on-off-switch.
1344 bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
1346 bool CheckMacroName(Token &MacroNameTok, char isDefineUndef);
1350 void PushIncludeMacroStack() {
1351 IncludeMacroStack.push_back(IncludeStackInfo(
1352 CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer),
1353 CurPPLexer, std::move(CurTokenLexer), CurDirLookup));
1354 CurPPLexer = nullptr;
1357 void PopIncludeMacroStack() {
1358 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1359 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
1360 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1361 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1362 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
1363 CurSubmodule = IncludeMacroStack.back().TheSubmodule;
1364 CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1365 IncludeMacroStack.pop_back();
1368 void PropagateLineStartLeadingSpaceInfo(Token &Result);
1370 /// \brief Allocate a new MacroInfo object.
1371 MacroInfo *AllocateMacroInfo();
1373 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1376 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1377 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1380 /// \brief Release the specified MacroInfo for re-use.
1382 /// This memory will be reused for allocating new MacroInfo objects.
1383 void ReleaseMacroInfo(MacroInfo* MI);
1385 /// \brief Lex and validate a macro name, which occurs after a
1386 /// \#define or \#undef.
1388 /// This emits a diagnostic, sets the token kind to eod,
1389 /// and discards the rest of the macro line if the macro name is invalid.
1390 void ReadMacroName(Token &MacroNameTok, char isDefineUndef = 0);
1392 /// The ( starting an argument list of a macro definition has just been read.
1393 /// Lex the rest of the arguments and the closing ), updating \p MI with
1394 /// what we learn and saving in \p LastTok the last token read.
1395 /// Return true if an error occurs parsing the arg list.
1396 bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
1398 /// We just read a \#if or related directive and decided that the
1399 /// subsequent tokens are in the \#if'd out portion of the
1400 /// file. Lex the rest of the file, until we see an \#endif. If \p
1401 /// FoundNonSkipPortion is true, then we have already emitted code for part of
1402 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1403 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1404 /// already seen one so a \#else directive is a duplicate. When this returns,
1405 /// the caller can lex the first valid token.
1406 void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
1407 bool FoundNonSkipPortion, bool FoundElse,
1408 SourceLocation ElseLoc = SourceLocation());
1410 /// \brief A fast PTH version of SkipExcludedConditionalBlock.
1411 void PTHSkipExcludedConditionalBlock();
1413 /// \brief Evaluate an integer constant expression that may occur after a
1414 /// \#if or \#elif directive and return it as a bool.
1416 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1417 bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1419 /// \brief Install the standard preprocessor pragmas:
1420 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
1421 void RegisterBuiltinPragmas();
1423 /// \brief Register builtin macros such as __LINE__ with the identifier table.
1424 void RegisterBuiltinMacros();
1426 /// If an identifier token is read that is to be expanded as a macro, handle
1427 /// it and return the next token as 'Tok'. If we lexed a token, return true;
1428 /// otherwise the caller should lex again.
1429 bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD);
1431 /// \brief Cache macro expanded tokens for TokenLexers.
1433 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1434 /// going to lex in the cache and when it finishes the tokens are removed
1435 /// from the end of the cache.
1436 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
1437 ArrayRef<Token> tokens);
1438 void removeCachedMacroExpandedTokensOfLastLexer();
1439 friend void TokenLexer::ExpandFunctionArguments();
1441 /// Determine whether the next preprocessor token to be
1442 /// lexed is a '('. If so, consume the token and return true, if not, this
1443 /// method should have no observable side-effect on the lexed tokens.
1444 bool isNextPPTokenLParen();
1446 /// After reading "MACRO(", this method is invoked to read all of the formal
1447 /// arguments specified for the macro invocation. Returns null on error.
1448 MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
1449 SourceLocation &ExpansionEnd);
1451 /// \brief If an identifier token is read that is to be expanded
1452 /// as a builtin macro, handle it and return the next token as 'Tok'.
1453 void ExpandBuiltinMacro(Token &Tok);
1455 /// \brief Read a \c _Pragma directive, slice it up, process it, then
1456 /// return the first token after the directive.
1457 /// This assumes that the \c _Pragma token has just been read into \p Tok.
1458 void Handle_Pragma(Token &Tok);
1460 /// \brief Like Handle_Pragma except the pragma text is not enclosed within
1461 /// a string literal.
1462 void HandleMicrosoft__pragma(Token &Tok);
1464 /// \brief Add a lexer to the top of the include stack and
1465 /// start lexing tokens from it instead of the current buffer.
1466 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
1468 /// \brief Add a lexer to the top of the include stack and
1469 /// start getting tokens from it using the PTH cache.
1470 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
1472 /// \brief Set the FileID for the preprocessor predefines.
1473 void setPredefinesFileID(FileID FID) {
1474 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
1475 PredefinesFileID = FID;
1478 /// \brief Returns true if we are lexing from a file and not a
1479 /// pragma or a macro.
1480 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
1481 return L ? !L->isPragmaLexer() : P != nullptr;
1484 static bool IsFileLexer(const IncludeStackInfo& I) {
1485 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
1488 bool IsFileLexer() const {
1489 return IsFileLexer(CurLexer.get(), CurPPLexer);
1492 //===--------------------------------------------------------------------===//
1494 void CachingLex(Token &Result);
1495 bool InCachingLexMode() const {
1496 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
1497 // that we are past EOF, not that we are in CachingLex mode.
1498 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
1499 !IncludeMacroStack.empty();
1501 void EnterCachingLexMode();
1502 void ExitCachingLexMode() {
1503 if (InCachingLexMode())
1504 RemoveTopOfLexerStack();
1506 const Token &PeekAhead(unsigned N);
1507 void AnnotatePreviousCachedTokens(const Token &Tok);
1509 //===--------------------------------------------------------------------===//
1510 /// Handle*Directive - implement the various preprocessor directives. These
1511 /// should side-effect the current preprocessor object so that the next call
1512 /// to Lex() will return the appropriate token next.
1513 void HandleLineDirective(Token &Tok);
1514 void HandleDigitDirective(Token &Tok);
1515 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
1516 void HandleIdentSCCSDirective(Token &Tok);
1517 void HandleMacroPublicDirective(Token &Tok);
1518 void HandleMacroPrivateDirective(Token &Tok);
1521 void HandleIncludeDirective(SourceLocation HashLoc,
1523 const DirectoryLookup *LookupFrom = nullptr,
1524 bool isImport = false);
1525 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
1526 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
1527 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
1528 void HandleMicrosoftImportDirective(Token &Tok);
1530 // Module inclusion testing.
1531 /// \brief Find the module for the source or header file that \p FilenameLoc
1533 Module *getModuleForLocation(SourceLocation FilenameLoc);
1536 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
1537 void HandleUndefDirective(Token &Tok);
1539 // Conditional Inclusion.
1540 void HandleIfdefDirective(Token &Tok, bool isIfndef,
1541 bool ReadAnyTokensBeforeDirective);
1542 void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
1543 void HandleEndifDirective(Token &Tok);
1544 void HandleElseDirective(Token &Tok);
1545 void HandleElifDirective(Token &Tok);
1548 void HandlePragmaDirective(SourceLocation IntroducerLoc,
1549 PragmaIntroducerKind Introducer);
1551 void HandlePragmaOnce(Token &OnceTok);
1552 void HandlePragmaMark();
1553 void HandlePragmaPoison(Token &PoisonTok);
1554 void HandlePragmaSystemHeader(Token &SysHeaderTok);
1555 void HandlePragmaDependency(Token &DependencyTok);
1556 void HandlePragmaPushMacro(Token &Tok);
1557 void HandlePragmaPopMacro(Token &Tok);
1558 void HandlePragmaIncludeAlias(Token &Tok);
1559 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
1561 // Return true and store the first token only if any CommentHandler
1562 // has inserted some tokens and getCommentRetentionState() is false.
1563 bool HandleComment(Token &Token, SourceRange Comment);
1565 /// \brief A macro is used, update information about macros that need unused
1567 void markMacroAsUsed(MacroInfo *MI);
1570 /// \brief Abstract base class that describes a handler that will receive
1571 /// source ranges for each of the comments encountered in the source file.
1572 class CommentHandler {
1574 virtual ~CommentHandler();
1576 // The handler shall return true if it has pushed any tokens
1577 // to be read using e.g. EnterToken or EnterTokenStream.
1578 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
1581 } // end namespace clang