1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Preprocessor interface.
12 //===----------------------------------------------------------------------===//
14 // Options to support:
15 // -H - Print the name of each header file used.
16 // -d[DNI] - Dump various things.
17 // -fworking-directory - #line's with preprocessor's working dir.
19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
24 // "Multiple include guards may be useful for:\n"
26 //===----------------------------------------------------------------------===//
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APInt.h"
47 #include "llvm/ADT/DenseMap.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/SmallVector.h"
50 #include "llvm/ADT/STLExtras.h"
51 #include "llvm/ADT/StringRef.h"
52 #include "llvm/ADT/StringSwitch.h"
53 #include "llvm/Support/Capacity.h"
54 #include "llvm/Support/ErrorHandling.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/raw_ostream.h"
64 using namespace clang;
66 LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
68 //===----------------------------------------------------------------------===//
69 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
71 Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
72 DiagnosticsEngine &diags, LangOptions &opts,
73 SourceManager &SM, HeaderSearch &Headers,
74 ModuleLoader &TheModuleLoader,
75 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
76 TranslationUnitKind TUKind)
77 : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
78 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
79 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
80 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
81 Identifiers(opts, IILookup),
82 PragmaHandlers(new PragmaNamespace(StringRef())),
83 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
84 CodeCompletionFile(nullptr), CodeCompletionOffset(0),
85 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
86 CodeCompletionReached(false), CodeCompletionII(nullptr),
87 MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
88 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
89 Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
90 MacroArgCache(nullptr), Record(nullptr), MIChainHead(nullptr),
91 DeserialMIChainHead(nullptr) {
92 OwnsHeaderSearch = OwnsHeaders;
94 CounterValue = 0; // __COUNTER__ starts at 0.
97 NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
98 NumIf = NumElse = NumEndif = 0;
99 NumEnteredSourceFiles = 0;
100 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
101 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
102 MaxIncludeStackDepth = 0;
105 // Default to discarding comments.
106 KeepComments = false;
107 KeepMacroComments = false;
108 SuppressIncludeNotFoundError = false;
110 // Macro expansion is enabled.
111 DisableMacroExpansion = false;
112 MacroExpansionInDirectivesOverride = false;
114 InMacroArgPreExpansion = false;
115 NumCachedTokenLexers = 0;
116 PragmasEnabled = true;
117 ParsingIfOrElifDirective = false;
118 PreprocessedOutput = false;
122 // We haven't read anything from the external source.
123 ReadMacrosFromExternalSource = false;
125 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
126 // This gets unpoisoned where it is allowed.
127 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
128 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
130 // Initialize the pragma handlers.
131 RegisterBuiltinPragmas();
133 // Initialize builtin macros like __LINE__ and friends.
134 RegisterBuiltinMacros();
136 if(LangOpts.Borland) {
137 Ident__exception_info = getIdentifierInfo("_exception_info");
138 Ident___exception_info = getIdentifierInfo("__exception_info");
139 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
140 Ident__exception_code = getIdentifierInfo("_exception_code");
141 Ident___exception_code = getIdentifierInfo("__exception_code");
142 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
143 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
144 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
145 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
147 Ident__exception_info = Ident__exception_code = nullptr;
148 Ident__abnormal_termination = Ident___exception_info = nullptr;
149 Ident___exception_code = Ident___abnormal_termination = nullptr;
150 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
151 Ident_AbnormalTermination = nullptr;
155 Preprocessor::~Preprocessor() {
156 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
158 IncludeMacroStack.clear();
160 // Destroy any macro definitions.
161 while (MacroInfoChain *I = MIChainHead) {
162 MIChainHead = I->Next;
163 I->~MacroInfoChain();
166 // Free any cached macro expanders.
167 // This populates MacroArgCache, so all TokenLexers need to be destroyed
168 // before the code below that frees up the MacroArgCache list.
169 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
170 CurTokenLexer.reset();
172 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
173 DeserialMIChainHead = I->Next;
174 I->~DeserializedMacroInfoChain();
177 // Free any cached MacroArgs.
178 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
179 ArgList = ArgList->deallocate();
181 // Delete the header search info, if we own it.
182 if (OwnsHeaderSearch)
186 void Preprocessor::Initialize(const TargetInfo &Target,
187 const TargetInfo *AuxTarget) {
188 assert((!this->Target || this->Target == &Target) &&
189 "Invalid override of target information");
190 this->Target = &Target;
192 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
193 "Invalid override of aux target information.");
194 this->AuxTarget = AuxTarget;
196 // Initialize information about built-ins.
197 BuiltinInfo.InitializeTarget(Target, AuxTarget);
198 HeaderInfo.setTarget(Target);
201 void Preprocessor::InitializeForModelFile() {
202 NumEnteredSourceFiles = 0;
205 PragmaHandlersBackup = std::move(PragmaHandlers);
206 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
207 RegisterBuiltinPragmas();
209 // Reset PredefinesFileID
210 PredefinesFileID = FileID();
213 void Preprocessor::FinalizeForModelFile() {
214 NumEnteredSourceFiles = 1;
216 PragmaHandlers = std::move(PragmaHandlersBackup);
219 void Preprocessor::setPTHManager(PTHManager* pm) {
221 FileMgr.addStatCache(PTH->createStatCache());
224 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
225 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
226 << getSpelling(Tok) << "'";
228 if (!DumpFlags) return;
230 llvm::errs() << "\t";
231 if (Tok.isAtStartOfLine())
232 llvm::errs() << " [StartOfLine]";
233 if (Tok.hasLeadingSpace())
234 llvm::errs() << " [LeadingSpace]";
235 if (Tok.isExpandDisabled())
236 llvm::errs() << " [ExpandDisabled]";
237 if (Tok.needsCleaning()) {
238 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
239 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
243 llvm::errs() << "\tLoc=<";
244 DumpLocation(Tok.getLocation());
248 void Preprocessor::DumpLocation(SourceLocation Loc) const {
252 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
253 llvm::errs() << "MACRO: ";
254 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
255 DumpToken(MI.getReplacementToken(i));
258 llvm::errs() << "\n";
261 void Preprocessor::PrintStats() {
262 llvm::errs() << "\n*** Preprocessor Stats:\n";
263 llvm::errs() << NumDirectives << " directives found:\n";
264 llvm::errs() << " " << NumDefined << " #define.\n";
265 llvm::errs() << " " << NumUndefined << " #undef.\n";
266 llvm::errs() << " #include/#include_next/#import:\n";
267 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
268 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
269 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
270 llvm::errs() << " " << NumElse << " #else/#elif.\n";
271 llvm::errs() << " " << NumEndif << " #endif.\n";
272 llvm::errs() << " " << NumPragma << " #pragma.\n";
273 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
275 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
276 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
277 << NumFastMacroExpanded << " on the fast path.\n";
278 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
279 << " token paste (##) operations performed, "
280 << NumFastTokenPaste << " on the fast path.\n";
282 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
284 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
285 llvm::errs() << "\n Macro Expanded Tokens: "
286 << llvm::capacity_in_bytes(MacroExpandedTokens);
287 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
288 // FIXME: List information for all submodules.
289 llvm::errs() << "\n Macros: "
290 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
291 llvm::errs() << "\n #pragma push_macro Info: "
292 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
293 llvm::errs() << "\n Poison Reasons: "
294 << llvm::capacity_in_bytes(PoisonReasons);
295 llvm::errs() << "\n Comment Handlers: "
296 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
299 Preprocessor::macro_iterator
300 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
301 if (IncludeExternalMacros && ExternalSource &&
302 !ReadMacrosFromExternalSource) {
303 ReadMacrosFromExternalSource = true;
304 ExternalSource->ReadDefinedMacros();
307 // Make sure we cover all macros in visible modules.
308 for (const ModuleMacro &Macro : ModuleMacros)
309 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
311 return CurSubmoduleState->Macros.begin();
314 size_t Preprocessor::getTotalMemory() const {
315 return BP.getTotalMemory()
316 + llvm::capacity_in_bytes(MacroExpandedTokens)
317 + Predefines.capacity() /* Predefines buffer. */
318 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
320 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
321 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
322 + llvm::capacity_in_bytes(PoisonReasons)
323 + llvm::capacity_in_bytes(CommentHandlers);
326 Preprocessor::macro_iterator
327 Preprocessor::macro_end(bool IncludeExternalMacros) const {
328 if (IncludeExternalMacros && ExternalSource &&
329 !ReadMacrosFromExternalSource) {
330 ReadMacrosFromExternalSource = true;
331 ExternalSource->ReadDefinedMacros();
334 return CurSubmoduleState->Macros.end();
337 /// \brief Compares macro tokens with a specified token value sequence.
338 static bool MacroDefinitionEquals(const MacroInfo *MI,
339 ArrayRef<TokenValue> Tokens) {
340 return Tokens.size() == MI->getNumTokens() &&
341 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
344 StringRef Preprocessor::getLastMacroWithSpelling(
346 ArrayRef<TokenValue> Tokens) const {
347 SourceLocation BestLocation;
348 StringRef BestSpelling;
349 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
351 const MacroDirective::DefInfo
352 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
353 if (!Def || !Def.getMacroInfo())
355 if (!Def.getMacroInfo()->isObjectLike())
357 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
359 SourceLocation Location = Def.getLocation();
360 // Choose the macro defined latest.
361 if (BestLocation.isInvalid() ||
362 (Location.isValid() &&
363 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
364 BestLocation = Location;
365 BestSpelling = I->first->getName();
371 void Preprocessor::recomputeCurLexerKind() {
373 CurLexerKind = CLK_Lexer;
374 else if (CurPTHLexer)
375 CurLexerKind = CLK_PTHLexer;
376 else if (CurTokenLexer)
377 CurLexerKind = CLK_TokenLexer;
379 CurLexerKind = CLK_CachingLexer;
382 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
383 unsigned CompleteLine,
384 unsigned CompleteColumn) {
386 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
387 assert(!CodeCompletionFile && "Already set");
389 using llvm::MemoryBuffer;
391 // Load the actual file's contents.
392 bool Invalid = false;
393 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
397 // Find the byte position of the truncation point.
398 const char *Position = Buffer->getBufferStart();
399 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
400 for (; *Position; ++Position) {
401 if (*Position != '\r' && *Position != '\n')
404 // Eat \r\n or \n\r as a single line.
405 if ((Position[1] == '\r' || Position[1] == '\n') &&
406 Position[0] != Position[1])
413 Position += CompleteColumn - 1;
415 // If pointing inside the preamble, adjust the position at the beginning of
416 // the file after the preamble.
417 if (SkipMainFilePreamble.first &&
418 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
419 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
420 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
423 if (Position > Buffer->getBufferEnd())
424 Position = Buffer->getBufferEnd();
426 CodeCompletionFile = File;
427 CodeCompletionOffset = Position - Buffer->getBufferStart();
429 std::unique_ptr<MemoryBuffer> NewBuffer =
430 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
431 Buffer->getBufferIdentifier());
432 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
433 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
435 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
436 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
441 void Preprocessor::CodeCompleteNaturalLanguage() {
443 CodeComplete->CodeCompleteNaturalLanguage();
444 setCodeCompletionReached();
447 /// getSpelling - This method is used to get the spelling of a token into a
448 /// SmallVector. Note that the returned StringRef may not point to the
449 /// supplied buffer if a copy can be avoided.
450 StringRef Preprocessor::getSpelling(const Token &Tok,
451 SmallVectorImpl<char> &Buffer,
452 bool *Invalid) const {
453 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
454 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
455 // Try the fast path.
456 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
457 return II->getName();
460 // Resize the buffer if we need to copy into it.
461 if (Tok.needsCleaning())
462 Buffer.resize(Tok.getLength());
464 const char *Ptr = Buffer.data();
465 unsigned Len = getSpelling(Tok, Ptr, Invalid);
466 return StringRef(Ptr, Len);
469 /// CreateString - Plop the specified string into a scratch buffer and return a
470 /// location for it. If specified, the source location provides a source
471 /// location for the token.
472 void Preprocessor::CreateString(StringRef Str, Token &Tok,
473 SourceLocation ExpansionLocStart,
474 SourceLocation ExpansionLocEnd) {
475 Tok.setLength(Str.size());
478 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
480 if (ExpansionLocStart.isValid())
481 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
482 ExpansionLocEnd, Str.size());
483 Tok.setLocation(Loc);
485 // If this is a raw identifier or a literal token, set the pointer data.
486 if (Tok.is(tok::raw_identifier))
487 Tok.setRawIdentifierData(DestPtr);
488 else if (Tok.isLiteral())
489 Tok.setLiteralData(DestPtr);
492 Module *Preprocessor::getCurrentModule() {
493 if (!getLangOpts().isCompilingModule())
496 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
499 //===----------------------------------------------------------------------===//
500 // Preprocessor Initialization Methods
501 //===----------------------------------------------------------------------===//
503 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
504 /// which implicitly adds the builtin defines etc.
505 void Preprocessor::EnterMainSourceFile() {
506 // We do not allow the preprocessor to reenter the main file. Doing so will
507 // cause FileID's to accumulate information from both runs (e.g. #line
508 // information) and predefined macros aren't guaranteed to be set properly.
509 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
510 FileID MainFileID = SourceMgr.getMainFileID();
512 // If MainFileID is loaded it means we loaded an AST file, no need to enter
514 if (!SourceMgr.isLoadedFileID(MainFileID)) {
515 // Enter the main file source buffer.
516 EnterSourceFile(MainFileID, nullptr, SourceLocation());
518 // If we've been asked to skip bytes in the main file (e.g., as part of a
519 // precompiled preamble), do so now.
520 if (SkipMainFilePreamble.first > 0)
521 CurLexer->SkipBytes(SkipMainFilePreamble.first,
522 SkipMainFilePreamble.second);
524 // Tell the header info that the main file was entered. If the file is later
525 // #imported, it won't be re-entered.
526 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
527 HeaderInfo.IncrementIncludeCount(FE);
530 // Preprocess Predefines to populate the initial preprocessor state.
531 std::unique_ptr<llvm::MemoryBuffer> SB =
532 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
533 assert(SB && "Cannot create predefined source buffer");
534 FileID FID = SourceMgr.createFileID(std::move(SB));
535 assert(FID.isValid() && "Could not create FileID for predefines?");
536 setPredefinesFileID(FID);
538 // Start parsing the predefines.
539 EnterSourceFile(FID, nullptr, SourceLocation());
542 void Preprocessor::EndSourceFile() {
543 // Notify the client that we reached the end of the source file.
545 Callbacks->EndOfMainFile();
548 //===----------------------------------------------------------------------===//
549 // Lexer Event Handling.
550 //===----------------------------------------------------------------------===//
552 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
553 /// identifier information for the token and install it into the token,
554 /// updating the token kind accordingly.
555 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
556 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
558 // Look up this token, see if it is a macro, or if it is a language keyword.
560 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
561 // No cleaning needed, just use the characters from the lexed buffer.
562 II = getIdentifierInfo(Identifier.getRawIdentifier());
564 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
565 SmallString<64> IdentifierBuffer;
566 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
568 if (Identifier.hasUCN()) {
569 SmallString<64> UCNIdentifierBuffer;
570 expandUCNs(UCNIdentifierBuffer, CleanedStr);
571 II = getIdentifierInfo(UCNIdentifierBuffer);
573 II = getIdentifierInfo(CleanedStr);
577 // Update the token info (identifier info and appropriate token kind).
578 Identifier.setIdentifierInfo(II);
579 Identifier.setKind(II->getTokenID());
584 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
585 PoisonReasons[II] = DiagID;
588 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
589 assert(Ident__exception_code && Ident__exception_info);
590 assert(Ident___exception_code && Ident___exception_info);
591 Ident__exception_code->setIsPoisoned(Poison);
592 Ident___exception_code->setIsPoisoned(Poison);
593 Ident_GetExceptionCode->setIsPoisoned(Poison);
594 Ident__exception_info->setIsPoisoned(Poison);
595 Ident___exception_info->setIsPoisoned(Poison);
596 Ident_GetExceptionInfo->setIsPoisoned(Poison);
597 Ident__abnormal_termination->setIsPoisoned(Poison);
598 Ident___abnormal_termination->setIsPoisoned(Poison);
599 Ident_AbnormalTermination->setIsPoisoned(Poison);
602 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
603 assert(Identifier.getIdentifierInfo() &&
604 "Can't handle identifiers without identifier info!");
605 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
606 PoisonReasons.find(Identifier.getIdentifierInfo());
607 if(it == PoisonReasons.end())
608 Diag(Identifier, diag::err_pp_used_poisoned_id);
610 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
613 /// \brief Returns a diagnostic message kind for reporting a future keyword as
614 /// appropriate for the identifier and specified language.
615 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
616 const LangOptions &LangOpts) {
617 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
619 if (LangOpts.CPlusPlus)
620 return llvm::StringSwitch<diag::kind>(II.getName())
621 #define CXX11_KEYWORD(NAME, FLAGS) \
622 .Case(#NAME, diag::warn_cxx11_keyword)
623 #include "clang/Basic/TokenKinds.def"
627 "Keyword not known to come from a newer Standard or proposed Standard");
630 void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
631 assert(II.isOutOfDate() && "not out of date");
632 getExternalSource()->updateOutOfDateIdentifier(II);
635 /// HandleIdentifier - This callback is invoked when the lexer reads an
636 /// identifier. This callback looks up the identifier in the map and/or
637 /// potentially macro expands it or turns it into a named token (like 'for').
639 /// Note that callers of this method are guarded by checking the
640 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
641 /// IdentifierInfo methods that compute these properties will need to change to
643 bool Preprocessor::HandleIdentifier(Token &Identifier) {
644 assert(Identifier.getIdentifierInfo() &&
645 "Can't handle identifiers without identifier info!");
647 IdentifierInfo &II = *Identifier.getIdentifierInfo();
649 // If the information about this identifier is out of date, update it from
650 // the external source.
651 // We have to treat __VA_ARGS__ in a special way, since it gets
652 // serialized with isPoisoned = true, but our preprocessor may have
653 // unpoisoned it if we're defining a C99 macro.
654 if (II.isOutOfDate()) {
655 bool CurrentIsPoisoned = false;
656 if (&II == Ident__VA_ARGS__)
657 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
659 updateOutOfDateIdentifier(II);
660 Identifier.setKind(II.getTokenID());
662 if (&II == Ident__VA_ARGS__)
663 II.setIsPoisoned(CurrentIsPoisoned);
666 // If this identifier was poisoned, and if it was not produced from a macro
667 // expansion, emit an error.
668 if (II.isPoisoned() && CurPPLexer) {
669 HandlePoisonedIdentifier(Identifier);
672 // If this is a macro to be expanded, do it.
673 if (MacroDefinition MD = getMacroDefinition(&II)) {
674 auto *MI = MD.getMacroInfo();
675 assert(MI && "macro definition with no macro info?");
676 if (!DisableMacroExpansion) {
677 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
678 // C99 6.10.3p10: If the preprocessing token immediately after the
679 // macro name isn't a '(', this macro should not be expanded.
680 if (!MI->isFunctionLike() || isNextPPTokenLParen())
681 return HandleMacroExpandedIdentifier(Identifier, MD);
683 // C99 6.10.3.4p2 says that a disabled macro may never again be
684 // expanded, even if it's in a context where it could be expanded in the
686 Identifier.setFlag(Token::DisableExpand);
687 if (MI->isObjectLike() || isNextPPTokenLParen())
688 Diag(Identifier, diag::pp_disabled_macro_expansion);
693 // If this identifier is a keyword in a newer Standard or proposed Standard,
694 // produce a warning. Don't warn if we're not considering macro expansion,
695 // since this identifier might be the name of a macro.
696 // FIXME: This warning is disabled in cases where it shouldn't be, like
697 // "#define constexpr constexpr", "int constexpr;"
698 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
699 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
701 // Don't diagnose this keyword again in this translation unit.
702 II.setIsFutureCompatKeyword(false);
705 // C++ 2.11p2: If this is an alternative representation of a C++ operator,
706 // then we act as if it is the actual operator and not the textual
707 // representation of it.
708 if (II.isCPlusPlusOperatorKeyword())
709 Identifier.setIdentifierInfo(nullptr);
711 // If this is an extension token, diagnose its use.
712 // We avoid diagnosing tokens that originate from macro definitions.
713 // FIXME: This warning is disabled in cases where it shouldn't be,
714 // like "#define TY typeof", "TY(1) x".
715 if (II.isExtensionToken() && !DisableMacroExpansion)
716 Diag(Identifier, diag::ext_token_used);
718 // If this is the 'import' contextual keyword following an '@', note
719 // that the next token indicates a module name.
721 // Note that we do not treat 'import' as a contextual
722 // keyword when we're in a caching lexer, because caching lexers only get
723 // used in contexts where import declarations are disallowed.
725 // Likewise if this is the C++ Modules TS import keyword.
726 if (((LastTokenWasAt && II.isModulesImport()) ||
727 Identifier.is(tok::kw_import)) &&
728 !InMacroArgs && !DisableMacroExpansion &&
729 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
730 CurLexerKind != CLK_CachingLexer) {
731 ModuleImportLoc = Identifier.getLocation();
732 ModuleImportPath.clear();
733 ModuleImportExpectsIdentifier = true;
734 CurLexerKind = CLK_LexAfterModuleImport;
739 void Preprocessor::Lex(Token &Result) {
740 // We loop here until a lex function returns a token; this avoids recursion.
743 switch (CurLexerKind) {
745 ReturnedToken = CurLexer->Lex(Result);
748 ReturnedToken = CurPTHLexer->Lex(Result);
751 ReturnedToken = CurTokenLexer->Lex(Result);
753 case CLK_CachingLexer:
755 ReturnedToken = true;
757 case CLK_LexAfterModuleImport:
758 LexAfterModuleImport(Result);
759 ReturnedToken = true;
762 } while (!ReturnedToken);
764 if (Result.is(tok::code_completion))
765 setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
767 LastTokenWasAt = Result.is(tok::at);
770 /// \brief Lex a token following the 'import' contextual keyword.
772 void Preprocessor::LexAfterModuleImport(Token &Result) {
773 // Figure out what kind of lexer we actually have.
774 recomputeCurLexerKind();
776 // Lex the next token.
779 // The token sequence
781 // import identifier (. identifier)*
783 // indicates a module import directive. We already saw the 'import'
784 // contextual keyword, so now we're looking for the identifiers.
785 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
786 // We expected to see an identifier here, and we did; continue handling
788 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
789 Result.getLocation()));
790 ModuleImportExpectsIdentifier = false;
791 CurLexerKind = CLK_LexAfterModuleImport;
795 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
796 // see the next identifier. (We can also see a '[[' that begins an
797 // attribute-specifier-seq here under the C++ Modules TS.)
798 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
799 ModuleImportExpectsIdentifier = true;
800 CurLexerKind = CLK_LexAfterModuleImport;
804 // If we have a non-empty module path, load the named module.
805 if (!ModuleImportPath.empty()) {
806 // Under the Modules TS, the dot is just part of the module name, and not
807 // a real hierarachy separator. Flatten such module names now.
809 // FIXME: Is this the right level to be performing this transformation?
810 std::string FlatModuleName;
811 if (getLangOpts().ModulesTS) {
812 for (auto &Piece : ModuleImportPath) {
813 if (!FlatModuleName.empty())
814 FlatModuleName += ".";
815 FlatModuleName += Piece.first->getName();
817 SourceLocation FirstPathLoc = ModuleImportPath[0].second;
818 ModuleImportPath.clear();
819 ModuleImportPath.push_back(
820 std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
823 Module *Imported = nullptr;
824 if (getLangOpts().Modules) {
825 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
828 /*IsIncludeDirective=*/false);
830 makeModuleVisible(Imported, ModuleImportLoc);
832 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
833 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
837 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
838 CurSubmoduleState->VisibleModules.setVisible(
839 M, Loc, [](Module *) {},
840 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
841 // FIXME: Include the path in the diagnostic.
842 // FIXME: Include the import location for the conflicting module.
843 Diag(ModuleImportLoc, diag::warn_module_conflict)
844 << Path[0]->getFullModuleName()
845 << Conflict->getFullModuleName()
849 // Add this module to the imports list of the currently-built submodule.
850 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
851 BuildingSubmoduleStack.back().M->Imports.insert(M);
854 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
855 const char *DiagnosticTag,
856 bool AllowMacroExpansion) {
857 // We need at least one string literal.
858 if (Result.isNot(tok::string_literal)) {
859 Diag(Result, diag::err_expected_string_literal)
860 << /*Source='in...'*/0 << DiagnosticTag;
864 // Lex string literal tokens, optionally with macro expansion.
865 SmallVector<Token, 4> StrToks;
867 StrToks.push_back(Result);
869 if (Result.hasUDSuffix())
870 Diag(Result, diag::err_invalid_string_udl);
872 if (AllowMacroExpansion)
875 LexUnexpandedToken(Result);
876 } while (Result.is(tok::string_literal));
878 // Concatenate and parse the strings.
879 StringLiteralParser Literal(StrToks, *this);
880 assert(Literal.isAscii() && "Didn't allow wide strings in");
882 if (Literal.hadError)
885 if (Literal.Pascal) {
886 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
887 << /*Source='in...'*/0 << DiagnosticTag;
891 String = Literal.GetString();
895 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
896 assert(Tok.is(tok::numeric_constant));
897 SmallString<8> IntegerBuffer;
898 bool NumberInvalid = false;
899 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
902 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
903 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
905 llvm::APInt APVal(64, 0);
906 if (Literal.GetIntegerValue(APVal))
909 Value = APVal.getLimitedValue();
913 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
914 assert(Handler && "NULL comment handler");
915 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
916 CommentHandlers.end() && "Comment handler already registered");
917 CommentHandlers.push_back(Handler);
920 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
921 std::vector<CommentHandler *>::iterator Pos
922 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
923 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
924 CommentHandlers.erase(Pos);
927 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
928 bool AnyPendingTokens = false;
929 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
930 HEnd = CommentHandlers.end();
932 if ((*H)->HandleComment(*this, Comment))
933 AnyPendingTokens = true;
935 if (!AnyPendingTokens || getCommentRetentionState())
941 ModuleLoader::~ModuleLoader() { }
943 CommentHandler::~CommentHandler() { }
945 CodeCompletionHandler::~CodeCompletionHandler() { }
947 void Preprocessor::createPreprocessingRecord() {
951 Record = new PreprocessingRecord(getSourceManager());
952 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));