1 //===- TokenLexer.cpp - Lex from a token stream ---------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the TokenLexer interface.
11 //===----------------------------------------------------------------------===//
13 #include "clang/Lex/TokenLexer.h"
14 #include "clang/Basic/Diagnostic.h"
15 #include "clang/Basic/IdentifierTable.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "clang/Lex/LexDiagnostic.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Lex/MacroArgs.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/Token.h"
26 #include "clang/Lex/VariadicMacroSupport.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/SmallString.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/iterator_range.h"
34 using namespace clang;
36 /// Create a TokenLexer for the specified macro with the specified actual
37 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
38 void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
40 // If the client is reusing a TokenLexer, make sure to free any memory
41 // associated with it.
48 ExpandLocStart = Tok.getLocation();
50 AtStartOfLine = Tok.isAtStartOfLine();
51 HasLeadingSpace = Tok.hasLeadingSpace();
52 NextTokGetsSpace = false;
53 Tokens = &*Macro->tokens_begin();
55 DisableMacroExpansion = false;
57 NumTokens = Macro->tokens_end()-Macro->tokens_begin();
58 MacroExpansionStart = SourceLocation();
60 SourceManager &SM = PP.getSourceManager();
61 MacroStartSLocOffset = SM.getNextLocalOffset();
64 assert(Tokens[0].getLocation().isValid());
65 assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
66 "Macro defined in macro?");
67 assert(ExpandLocStart.isValid());
69 // Reserve a source location entry chunk for the length of the macro
70 // definition. Tokens that get lexed directly from the definition will
71 // have their locations pointing inside this chunk. This is to avoid
72 // creating separate source location entries for each token.
73 MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
74 MacroDefLength = Macro->getDefinitionLength(SM);
75 MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
81 // If this is a function-like macro, expand the arguments and change
82 // Tokens to point to the expanded tokens.
83 if (Macro->isFunctionLike() && Macro->getNumParams())
84 ExpandFunctionArguments();
86 // Mark the macro as currently disabled, so that it is not recursively
87 // expanded. The macro must be disabled only after argument pre-expansion of
88 // function-like macro arguments occurs.
89 Macro->DisableMacro();
92 /// Create a TokenLexer for the specified token stream. This does not
93 /// take ownership of the specified token vector.
94 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
95 bool disableMacroExpansion, bool ownsTokens,
97 assert(!isReinject || disableMacroExpansion);
98 // If the client is reusing a TokenLexer, make sure to free any memory
99 // associated with it.
103 ActualArgs = nullptr;
105 OwnsTokens = ownsTokens;
106 DisableMacroExpansion = disableMacroExpansion;
107 IsReinject = isReinject;
110 ExpandLocStart = ExpandLocEnd = SourceLocation();
111 AtStartOfLine = false;
112 HasLeadingSpace = false;
113 NextTokGetsSpace = false;
114 MacroExpansionStart = SourceLocation();
116 // Set HasLeadingSpace/AtStartOfLine so that the first token will be
117 // returned unmodified.
119 AtStartOfLine = TokArray[0].isAtStartOfLine();
120 HasLeadingSpace = TokArray[0].hasLeadingSpace();
124 void TokenLexer::destroy() {
125 // If this was a function-like macro that actually uses its arguments, delete
126 // the expanded tokens.
133 // TokenLexer owns its formal arguments.
134 if (ActualArgs) ActualArgs->destroy(PP);
137 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
138 SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
139 unsigned MacroArgNo, Preprocessor &PP) {
140 // Is the macro argument __VA_ARGS__?
141 if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1)
144 // In Microsoft-compatibility mode, a comma is removed in the expansion
145 // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
146 // not supported by gcc.
147 if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
150 // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
151 // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
152 // named arguments, where it remains. In all other modes, including C99
153 // with GNU extensions, it is removed regardless of named arguments.
154 // Microsoft also appears to support this extension, unofficially.
155 if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
156 && Macro->getNumParams() < 2)
159 // Is a comma available to be removed?
160 if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
163 // Issue an extension diagnostic for the paste operator.
164 if (HasPasteOperator)
165 PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
168 ResultToks.pop_back();
170 if (!ResultToks.empty()) {
171 // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
172 // then removal of the comma should produce a placemarker token (in C99
173 // terms) which we model by popping off the previous ##, giving us a plain
174 // "X" when __VA_ARGS__ is empty.
175 if (ResultToks.back().is(tok::hashhash))
176 ResultToks.pop_back();
178 // Remember that this comma was elided.
179 ResultToks.back().setFlag(Token::CommaAfterElided);
182 // Never add a space, even if the comma, ##, or arg had a space.
183 NextTokGetsSpace = false;
187 void TokenLexer::stringifyVAOPTContents(
188 SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx,
189 const SourceLocation VAOPTClosingParenLoc) {
190 const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt();
191 const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt;
192 Token *const VAOPTTokens =
193 NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr;
195 SmallVector<Token, 64> ConcatenatedVAOPTResultToks;
196 // FIXME: Should we keep track within VCtx that we did or didnot
197 // encounter pasting - and only then perform this loop.
199 // Perform token pasting (concatenation) prior to stringization.
200 for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens;
202 if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) {
203 assert(CurTokenIdx != 0 &&
204 "Can not have __VAOPT__ contents begin with a ##");
205 Token &LHS = VAOPTTokens[CurTokenIdx - 1];
206 pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens),
208 // Replace the token prior to the first ## in this iteration.
209 ConcatenatedVAOPTResultToks.back() = LHS;
210 if (CurTokenIdx == NumVAOptTokens)
213 ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]);
216 ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok());
217 // Get the SourceLocation that represents the start location within
218 // the macro definition that marks where this string is substituted
219 // into: i.e. the __VA_OPT__ and the ')' within the spelling of the
220 // macro definition, and use it to indicate that the stringified token
221 // was generated from that location.
222 const SourceLocation ExpansionLocStartWithinMacro =
223 getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc());
224 const SourceLocation ExpansionLocEndWithinMacro =
225 getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc);
227 Token StringifiedVAOPT = MacroArgs::StringifyArgument(
228 &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/,
229 ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro);
231 if (VCtx.getLeadingSpaceForStringifiedToken())
232 StringifiedVAOPT.setFlag(Token::LeadingSpace);
234 StringifiedVAOPT.setFlag(Token::StringifiedInMacro);
235 // Resize (shrink) the token stream to just capture this stringified token.
236 ResultToks.resize(NumToksPriorToVAOpt + 1);
237 ResultToks.back() = StringifiedVAOPT;
240 /// Expand the arguments of a function-like macro so that we can quickly
241 /// return preexpanded tokens from Tokens.
242 void TokenLexer::ExpandFunctionArguments() {
243 SmallVector<Token, 128> ResultToks;
245 // Loop through 'Tokens', expanding them into ResultToks. Keep
246 // track of whether we change anything. If not, no need to keep them. If so,
247 // we install the newly expanded sequence as the new 'Tokens' list.
248 bool MadeChange = false;
250 Optional<bool> CalledWithVariadicArguments;
252 VAOptExpansionContext VCtx(PP);
254 for (unsigned I = 0, E = NumTokens; I != E; ++I) {
255 const Token &CurTok = Tokens[I];
256 // We don't want a space for the next token after a paste
257 // operator. In valid code, the token will get smooshed onto the
258 // preceding one anyway. In assembler-with-cpp mode, invalid
259 // pastes are allowed through: in this case, we do not want the
260 // extra whitespace to be added. For example, we want ". ## foo"
261 // -> ".foo" not ". foo".
262 if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
263 NextTokGetsSpace = true;
265 if (VCtx.isVAOptToken(CurTok)) {
267 assert(Tokens[I + 1].is(tok::l_paren) &&
268 "__VA_OPT__ must be followed by '('");
270 ++I; // Skip the l_paren
271 VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(),
277 // We have entered into the __VA_OPT__ context, so handle tokens
279 if (VCtx.isInVAOpt()) {
280 // If we are about to process a token that is either an argument to
281 // __VA_OPT__ or its closing rparen, then:
282 // 1) If the token is the closing rparen that exits us out of __VA_OPT__,
283 // perform any necessary stringification or placemarker processing,
284 // and/or skip to the next token.
285 // 2) else if macro was invoked without variadic arguments skip this
287 // 3) else (macro was invoked with variadic arguments) process the token
290 if (Tokens[I].is(tok::l_paren))
291 VCtx.sawOpeningParen(Tokens[I].getLocation());
292 // Continue skipping tokens within __VA_OPT__ if the macro was not
293 // called with variadic arguments, else let the rest of the loop handle
294 // this token. Note sawClosingParen() returns true only if the r_paren matches
295 // the closing r_paren of the __VA_OPT__.
296 if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) {
297 // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__.
298 if (!CalledWithVariadicArguments.hasValue()) {
299 CalledWithVariadicArguments =
300 ActualArgs->invokedWithVariadicArgument(Macro, PP);
302 if (!*CalledWithVariadicArguments) {
306 // ... else the macro was called with variadic arguments, and we do not
307 // have a closing rparen - so process this token normally.
309 // Current token is the closing r_paren which marks the end of the
310 // __VA_OPT__ invocation, so handle any place-marker pasting (if
311 // empty) by removing hashhash either before (if exists) or after. And
312 // also stringify the entire contents if VAOPT was preceded by a hash,
313 // but do so only after any token concatenation that needs to occur
314 // within the contents of VAOPT.
316 if (VCtx.hasStringifyOrCharifyBefore()) {
317 // Replace all the tokens just added from within VAOPT into a single
318 // stringified token. This requires token-pasting to eagerly occur
319 // within these tokens. If either the contents of VAOPT were empty
320 // or the macro wasn't called with any variadic arguments, the result
321 // is a token that represents an empty string.
322 stringifyVAOPTContents(ResultToks, VCtx,
323 /*ClosingParenLoc*/ Tokens[I].getLocation());
325 } else if (/*No tokens within VAOPT*/
326 ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) {
327 // Treat VAOPT as a placemarker token. Eat either the '##' before the
328 // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that
329 // hashhash was not a placemarker) or the '##'
330 // after VAOPT, but not both.
332 if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) {
333 ResultToks.pop_back();
334 } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) {
335 ++I; // Skip the following hashhash.
338 // If there's a ## before the __VA_OPT__, we might have discovered
339 // that the __VA_OPT__ begins with a placeholder. We delay action on
340 // that to now to avoid messing up our stashed count of tokens before
342 if (VCtx.beginsWithPlaceholder()) {
343 assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 &&
344 ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() &&
345 ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is(
347 "no token paste before __VA_OPT__");
348 ResultToks.erase(ResultToks.begin() +
349 VCtx.getNumberOfTokensPriorToVAOpt() - 1);
351 // If the expansion of __VA_OPT__ ends with a placeholder, eat any
352 // following '##' token.
353 if (VCtx.endsWithPlaceholder() && I + 1 != E &&
354 Tokens[I + 1].is(tok::hashhash)) {
359 // We processed __VA_OPT__'s closing paren (and the exit out of
360 // __VA_OPT__), so skip to the next token.
365 // If we found the stringify operator, get the argument stringified. The
366 // preprocessor already verified that the following token is a macro
367 // parameter or __VA_OPT__ when the #define was lexed.
369 if (CurTok.isOneOf(tok::hash, tok::hashat)) {
370 int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo());
371 assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) &&
372 "Token following # is not an argument or __VA_OPT__!");
375 // Handle the __VA_OPT__ case.
376 VCtx.sawHashOrHashAtBefore(NextTokGetsSpace,
377 CurTok.is(tok::hashat));
380 // Else handle the simple argument case.
381 SourceLocation ExpansionLocStart =
382 getExpansionLocForMacroDefLoc(CurTok.getLocation());
383 SourceLocation ExpansionLocEnd =
384 getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation());
387 if (CurTok.is(tok::hash)) // Stringify
388 Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
392 // 'charify': don't bother caching these.
393 Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
398 Res.setFlag(Token::StringifiedInMacro);
400 // The stringified/charified string leading space flag gets set to match
401 // the #/#@ operator.
402 if (NextTokGetsSpace)
403 Res.setFlag(Token::LeadingSpace);
405 ResultToks.push_back(Res);
407 ++I; // Skip arg name.
408 NextTokGetsSpace = false;
412 // Find out if there is a paste (##) operator before or after the token.
413 bool NonEmptyPasteBefore =
414 !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
415 bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash);
416 bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash);
417 bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren);
419 assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) &&
420 "unexpected ## in ResultToks");
422 // Otherwise, if this is not an argument token, just add the token to the
424 IdentifierInfo *II = CurTok.getIdentifierInfo();
425 int ArgNo = II ? Macro->getParameterNum(II) : -1;
427 // This isn't an argument, just add it.
428 ResultToks.push_back(CurTok);
430 if (NextTokGetsSpace) {
431 ResultToks.back().setFlag(Token::LeadingSpace);
432 NextTokGetsSpace = false;
433 } else if (PasteBefore && !NonEmptyPasteBefore)
434 ResultToks.back().clearFlag(Token::LeadingSpace);
439 // An argument is expanded somehow, the result is different than the
443 // Otherwise, this is a use of the argument.
445 // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
446 // are no trailing commas if __VA_ARGS__ is empty.
447 if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
448 MaybeRemoveCommaBeforeVaArgs(ResultToks,
449 /*HasPasteOperator=*/false,
453 // If it is not the LHS/RHS of a ## operator, we must pre-expand the
454 // argument and substitute the expanded tokens into the result. This is
456 if (!PasteBefore && !PasteAfter) {
457 const Token *ResultArgToks;
459 // Only preexpand the argument if it could possibly need it. This
460 // avoids some work in common cases.
461 const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
462 if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
463 ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
465 ResultArgToks = ArgTok; // Use non-preexpanded tokens.
467 // If the arg token expanded into anything, append it.
468 if (ResultArgToks->isNot(tok::eof)) {
469 size_t FirstResult = ResultToks.size();
470 unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
471 ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
473 // In Microsoft-compatibility mode, we follow MSVC's preprocessing
474 // behavior by not considering single commas from nested macro
475 // expansions as argument separators. Set a flag on the token so we can
476 // test for this later when the macro expansion is processed.
477 if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
478 ResultToks.back().is(tok::comma))
479 ResultToks.back().setFlag(Token::IgnoredComma);
481 // If the '##' came from expanding an argument, turn it into 'unknown'
483 for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult,
485 if (Tok.is(tok::hashhash))
486 Tok.setKind(tok::unknown);
489 if(ExpandLocStart.isValid()) {
490 updateLocForMacroArgTokens(CurTok.getLocation(),
491 ResultToks.begin()+FirstResult,
495 // If any tokens were substituted from the argument, the whitespace
496 // before the first token should match the whitespace of the arg
498 ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
500 ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false);
501 NextTokGetsSpace = false;
503 // We're creating a placeholder token. Usually this doesn't matter,
504 // but it can affect paste behavior when at the start or end of a
506 if (NonEmptyPasteBefore) {
507 // We're imagining a placeholder token is inserted here. If this is
508 // the first token in a __VA_OPT__ after a ##, delete the ##.
509 assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__");
510 VCtx.hasPlaceholderAfterHashhashAtStart();
513 VCtx.hasPlaceholderBeforeRParen();
518 // Okay, we have a token that is either the LHS or RHS of a paste (##)
519 // argument. It gets substituted as its non-pre-expanded tokens.
520 const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
521 unsigned NumToks = MacroArgs::getArgLength(ArgToks);
522 if (NumToks) { // Not an empty argument?
523 bool VaArgsPseudoPaste = false;
524 // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
525 // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
526 // the expander tries to paste ',' with the first token of the __VA_ARGS__
528 if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
529 ResultToks[ResultToks.size()-2].is(tok::comma) &&
530 (unsigned)ArgNo == Macro->getNumParams()-1 &&
531 Macro->isVariadic()) {
532 VaArgsPseudoPaste = true;
533 // Remove the paste operator, report use of the extension.
534 PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
537 ResultToks.append(ArgToks, ArgToks+NumToks);
539 // If the '##' came from expanding an argument, turn it into 'unknown'
541 for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks,
543 if (Tok.is(tok::hashhash))
544 Tok.setKind(tok::unknown);
547 if (ExpandLocStart.isValid()) {
548 updateLocForMacroArgTokens(CurTok.getLocation(),
549 ResultToks.end()-NumToks, ResultToks.end());
552 // Transfer the leading whitespace information from the token
553 // (the macro argument) onto the first token of the
554 // expansion. Note that we don't do this for the GNU
555 // pseudo-paste extension ", ## __VA_ARGS__".
556 if (!VaArgsPseudoPaste) {
557 ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine,
559 ResultToks[ResultToks.size() - NumToks].setFlagValue(
560 Token::LeadingSpace, NextTokGetsSpace);
563 NextTokGetsSpace = false;
567 // If an empty argument is on the LHS or RHS of a paste, the standard (C99
568 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
569 // implement this by eating ## operators when a LHS or RHS expands to
572 // Discard the argument token and skip (don't copy to the expansion
573 // buffer) the paste operator after it.
579 VCtx.hasPlaceholderBeforeRParen();
581 // If this is on the RHS of a paste operator, we've already copied the
582 // paste operator to the ResultToks list, unless the LHS was empty too.
585 if (NonEmptyPasteBefore) {
586 assert(ResultToks.back().is(tok::hashhash));
587 // Do not remove the paste operator if it is the one before __VA_OPT__
588 // (and we are still processing tokens within VA_OPT). We handle the case
589 // of removing the paste operator if __VA_OPT__ reduces to the notional
590 // placemarker above when we encounter the closing paren of VA_OPT.
591 if (!VCtx.isInVAOpt() ||
592 ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt())
593 ResultToks.pop_back();
595 VCtx.hasPlaceholderAfterHashhashAtStart();
598 // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
599 // and if the macro had at least one real argument, and if the token before
600 // the ## was a comma, remove the comma. This is a GCC extension which is
601 // disabled when using -std=c99.
602 if (ActualArgs->isVarargsElidedUse())
603 MaybeRemoveCommaBeforeVaArgs(ResultToks,
604 /*HasPasteOperator=*/true,
608 // If anything changed, install this as the new Tokens list.
610 assert(!OwnsTokens && "This would leak if we already own the token list");
611 // This is deleted in the dtor.
612 NumTokens = ResultToks.size();
613 // The tokens will be added to Preprocessor's cache and will be removed
614 // when this TokenLexer finishes lexing them.
615 Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
617 // The preprocessor cache of macro expanded tokens owns these tokens,not us.
622 /// Checks if two tokens form wide string literal.
623 static bool isWideStringLiteralFromMacro(const Token &FirstTok,
624 const Token &SecondTok) {
625 return FirstTok.is(tok::identifier) &&
626 FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() &&
627 SecondTok.stringifiedInMacro();
630 /// Lex - Lex and return a token from this macro stream.
631 bool TokenLexer::Lex(Token &Tok) {
632 // Lexing off the end of the macro, pop this macro off the expansion stack.
634 // If this is a macro (not a token stream), mark the macro enabled now
635 // that it is no longer being expanded.
636 if (Macro) Macro->EnableMacro();
639 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
640 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
641 if (CurTokenIdx == 0)
642 Tok.setFlag(Token::LeadingEmptyMacro);
643 return PP.HandleEndOfTokenLexer(Tok);
646 SourceManager &SM = PP.getSourceManager();
648 // If this is the first token of the expanded result, we inherit spacing
650 bool isFirstToken = CurTokenIdx == 0;
652 // Get the next token to return.
653 Tok = Tokens[CurTokenIdx++];
655 Tok.setFlag(Token::IsReinjected);
657 bool TokenIsFromPaste = false;
659 // If this token is followed by a token paste (##) operator, paste the tokens!
660 // Note that ## is a normal token when not expanding a macro.
661 if (!isAtEnd() && Macro &&
662 (Tokens[CurTokenIdx].is(tok::hashhash) ||
663 // Special processing of L#x macros in -fms-compatibility mode.
664 // Microsoft compiler is able to form a wide string literal from
665 // 'L#macro_arg' construct in a function-like macro.
666 (PP.getLangOpts().MSVCCompat &&
667 isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) {
668 // When handling the microsoft /##/ extension, the final token is
669 // returned by pasteTokens, not the pasted token.
670 if (pasteTokens(Tok))
673 TokenIsFromPaste = true;
676 // The token's current location indicate where the token was lexed from. We
677 // need this information to compute the spelling of the token, but any
678 // diagnostics for the expanded token should appear as if they came from
679 // ExpansionLoc. Pull this information together into a new SourceLocation
680 // that captures all of this.
681 if (ExpandLocStart.isValid() && // Don't do this for token streams.
682 // Check that the token's location was not already set properly.
683 SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
684 SourceLocation instLoc;
685 if (Tok.is(tok::comment)) {
686 instLoc = SM.createExpansionLoc(Tok.getLocation(),
691 instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
694 Tok.setLocation(instLoc);
697 // If this is the first token, set the lexical properties of the token to
698 // match the lexical properties of the macro identifier.
700 Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
701 Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
703 // If this is not the first token, we may still need to pass through
704 // leading whitespace if we've expanded a macro.
705 if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
706 if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
708 AtStartOfLine = false;
709 HasLeadingSpace = false;
711 // Handle recursive expansion!
712 if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
713 // Change the kind of this identifier to the appropriate token kind, e.g.
714 // turning "for" into a keyword.
715 IdentifierInfo *II = Tok.getIdentifierInfo();
716 Tok.setKind(II->getTokenID());
718 // If this identifier was poisoned and from a paste, emit an error. This
719 // won't be handled by Preprocessor::HandleIdentifier because this is coming
720 // from a macro expansion.
721 if (II->isPoisoned() && TokenIsFromPaste) {
722 PP.HandlePoisonedIdentifier(Tok);
725 if (!DisableMacroExpansion && II->isHandleIdentifierCase())
726 return PP.HandleIdentifier(Tok);
729 // Otherwise, return a normal token.
733 bool TokenLexer::pasteTokens(Token &Tok) {
734 return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx);
737 /// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ##
738 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
739 /// are more ## after it, chomp them iteratively. Return the result as LHSTok.
740 /// If this returns true, the caller should immediately return the token.
741 bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
742 unsigned int &CurIdx) {
743 assert(CurIdx > 0 && "## can not be the first token within tokens");
744 assert((TokenStream[CurIdx].is(tok::hashhash) ||
745 (PP.getLangOpts().MSVCCompat &&
746 isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) &&
747 "Token at this Index must be ## or part of the MSVC 'L "
748 "#macro-arg' pasting pair");
750 // MSVC: If previous token was pasted, this must be a recovery from an invalid
751 // paste operation. Ignore spaces before this token to mimic MSVC output.
752 // Required for generating valid UUID strings in some MS headers.
753 if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) &&
754 TokenStream[CurIdx - 2].is(tok::hashhash))
755 LHSTok.clearFlag(Token::LeadingSpace);
757 SmallString<128> Buffer;
758 const char *ResultTokStrPtr = nullptr;
759 SourceLocation StartLoc = LHSTok.getLocation();
760 SourceLocation PasteOpLoc;
762 auto IsAtEnd = [&TokenStream, &CurIdx] {
763 return TokenStream.size() == CurIdx;
767 // Consume the ## operator if any.
768 PasteOpLoc = TokenStream[CurIdx].getLocation();
769 if (TokenStream[CurIdx].is(tok::hashhash))
771 assert(!IsAtEnd() && "No token on the RHS of a paste operator!");
773 // Get the RHS token.
774 const Token &RHS = TokenStream[CurIdx];
776 // Allocate space for the result token. This is guaranteed to be enough for
778 Buffer.resize(LHSTok.getLength() + RHS.getLength());
780 // Get the spelling of the LHS token in Buffer.
781 const char *BufPtr = &Buffer[0];
782 bool Invalid = false;
783 unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid);
784 if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
785 memcpy(&Buffer[0], BufPtr, LHSLen);
789 BufPtr = Buffer.data() + LHSLen;
790 unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
793 if (RHSLen && BufPtr != &Buffer[LHSLen])
794 // Really, we want the chars in Buffer!
795 memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
797 // Trim excess space.
798 Buffer.resize(LHSLen+RHSLen);
800 // Plop the pasted result (including the trailing newline and null) into a
801 // scratch buffer where we can lex it.
803 ResultTokTmp.startToken();
805 // Claim that the tmp token is a string_literal so that we can get the
806 // character pointer back from CreateString in getLiteralData().
807 ResultTokTmp.setKind(tok::string_literal);
808 PP.CreateString(Buffer, ResultTokTmp);
809 SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
810 ResultTokStrPtr = ResultTokTmp.getLiteralData();
812 // Lex the resultant pasted token into Result.
815 if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
816 // Common paste case: identifier+identifier = identifier. Avoid creating
817 // a lexer and other overhead.
818 PP.IncrementPasteCounter(true);
820 Result.setKind(tok::raw_identifier);
821 Result.setRawIdentifierData(ResultTokStrPtr);
822 Result.setLocation(ResultTokLoc);
823 Result.setLength(LHSLen+RHSLen);
825 PP.IncrementPasteCounter(false);
827 assert(ResultTokLoc.isFileID() &&
828 "Should be a raw location into scratch buffer");
829 SourceManager &SourceMgr = PP.getSourceManager();
830 FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
832 bool Invalid = false;
833 const char *ScratchBufStart
834 = SourceMgr.getBufferData(LocFileID, &Invalid).data();
838 // Make a lexer to lex this string from. Lex just this one token.
839 // Make a lexer object so that we lex and expand the paste result.
840 Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
841 PP.getLangOpts(), ScratchBufStart,
842 ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
844 // Lex a token in raw mode. This way it won't look up identifiers
845 // automatically, lexing off the end will return an eof token, and
846 // warnings are disabled. This returns true if the result token is the
848 bool isInvalid = !TL.LexFromRawLexer(Result);
850 // If we got an EOF token, we didn't form even ONE token. For example, we
851 // did "/ ## /" to get "//".
852 isInvalid |= Result.is(tok::eof);
854 // If pasting the two tokens didn't form a full new token, this is an
855 // error. This occurs with "x ## +" and other stuff. Return with LHSTok
856 // unmodified and with RHS as the next token to lex.
858 // Explicitly convert the token location to have proper expansion
859 // information so that the user knows where it came from.
860 SourceManager &SM = PP.getSourceManager();
862 SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
864 // Test for the Microsoft extension of /##/ turning into // here on the
866 if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) &&
867 RHS.is(tok::slash)) {
868 HandleMicrosoftCommentPaste(LHSTok, Loc);
872 // Do not emit the error when preprocessing assembler code.
873 if (!PP.getLangOpts().AsmPreprocessor) {
874 // If we're in microsoft extensions mode, downgrade this from a hard
875 // error to an extension that defaults to an error. This allows
877 PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
878 : diag::err_pp_bad_paste)
882 // An error has occurred so exit loop.
886 // Turn ## into 'unknown' to avoid # ## # from looking like a paste
888 if (Result.is(tok::hashhash))
889 Result.setKind(tok::unknown);
892 // Transfer properties of the LHS over the Result.
893 Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine());
894 Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace());
896 // Finally, replace LHS with the result, consume the RHS, and iterate.
899 } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash));
901 SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation();
903 // The token's current location indicate where the token was lexed from. We
904 // need this information to compute the spelling of the token, but any
905 // diagnostics for the expanded token should appear as if the token was
906 // expanded from the full ## expression. Pull this information together into
907 // a new SourceLocation that captures all of this.
908 SourceManager &SM = PP.getSourceManager();
909 if (StartLoc.isFileID())
910 StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
911 if (EndLoc.isFileID())
912 EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
913 FileID MacroFID = SM.getFileID(MacroExpansionStart);
914 while (SM.getFileID(StartLoc) != MacroFID)
915 StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin();
916 while (SM.getFileID(EndLoc) != MacroFID)
917 EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
919 LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc,
920 LHSTok.getLength()));
922 // Now that we got the result token, it will be subject to expansion. Since
923 // token pasting re-lexes the result token in raw mode, identifier information
924 // isn't looked up. As such, if the result is an identifier, look up id info.
925 if (LHSTok.is(tok::raw_identifier)) {
926 // Look up the identifier info for the token. We disabled identifier lookup
927 // by saying we're skipping contents, so we need to do this manually.
928 PP.LookUpIdentifierInfo(LHSTok);
933 /// isNextTokenLParen - If the next token lexed will pop this macro off the
934 /// expansion stack, return 2. If the next unexpanded token is a '(', return
935 /// 1, otherwise return 0.
936 unsigned TokenLexer::isNextTokenLParen() const {
940 return Tokens[CurTokenIdx].is(tok::l_paren);
943 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
944 /// preprocessor directive.
945 bool TokenLexer::isParsingPreprocessorDirective() const {
946 return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
949 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
950 /// together to form a comment that comments out everything in the current
951 /// macro, other active macros, and anything left on the current physical
952 /// source line of the expanded buffer. Handle this by returning the
953 /// first token on the next line.
954 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
955 PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
957 // We 'comment out' the rest of this macro by just ignoring the rest of the
958 // tokens that have not been lexed yet, if any.
960 // Since this must be a macro, mark the macro enabled now that it is no longer
962 assert(Macro && "Token streams can't paste comments");
963 Macro->EnableMacro();
965 PP.HandleMicrosoftCommentPaste(Tok);
968 /// If \arg loc is a file ID and points inside the current macro
969 /// definition, returns the appropriate source location pointing at the
970 /// macro expansion source location entry, otherwise it returns an invalid
973 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
974 assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
975 "Not appropriate for token streams");
976 assert(loc.isValid() && loc.isFileID());
978 SourceManager &SM = PP.getSourceManager();
979 assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
980 "Expected loc to come from the macro definition");
982 unsigned relativeOffset = 0;
983 SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
984 return MacroExpansionStart.getLocWithOffset(relativeOffset);
987 /// Finds the tokens that are consecutive (from the same FileID)
988 /// creates a single SLocEntry, and assigns SourceLocations to each token that
989 /// point to that SLocEntry. e.g for
990 /// assert(foo == bar);
991 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
992 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
994 /// \arg begin_tokens will be updated to a position past all the found
995 /// consecutive tokens.
996 static void updateConsecutiveMacroArgTokens(SourceManager &SM,
997 SourceLocation InstLoc,
998 Token *&begin_tokens,
999 Token * end_tokens) {
1000 assert(begin_tokens < end_tokens);
1002 SourceLocation FirstLoc = begin_tokens->getLocation();
1003 SourceLocation CurLoc = FirstLoc;
1005 // Compare the source location offset of tokens and group together tokens that
1006 // are close, even if their locations point to different FileIDs. e.g.
1008 // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
1010 // |bar foo cake| (one SLocEntry chunk for all tokens)
1012 // we can perform this "merge" since the token's spelling location depends
1013 // on the relative offset.
1015 Token *NextTok = begin_tokens + 1;
1016 for (; NextTok < end_tokens; ++NextTok) {
1017 SourceLocation NextLoc = NextTok->getLocation();
1018 if (CurLoc.isFileID() != NextLoc.isFileID())
1019 break; // Token from different kind of FileID.
1022 if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
1023 break; // Token from different local/loaded location.
1024 // Check that token is not before the previous token or more than 50
1025 // "characters" away.
1026 if (RelOffs < 0 || RelOffs > 50)
1029 if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
1030 break; // Token from a different macro.
1035 // For the consecutive tokens, find the length of the SLocEntry to contain
1037 Token &LastConsecutiveTok = *(NextTok-1);
1038 int LastRelOffs = 0;
1039 SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
1041 unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
1043 // Create a macro expansion SLocEntry that will "contain" all of the tokens.
1044 SourceLocation Expansion =
1045 SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
1047 // Change the location of the tokens from the spelling location to the new
1048 // expanded location.
1049 for (; begin_tokens < NextTok; ++begin_tokens) {
1050 Token &Tok = *begin_tokens;
1052 SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
1053 Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
1057 /// Creates SLocEntries and updates the locations of macro argument
1058 /// tokens to their new expanded locations.
1060 /// \param ArgIdSpellLoc the location of the macro argument id inside the macro
1062 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
1063 Token *begin_tokens,
1064 Token *end_tokens) {
1065 SourceManager &SM = PP.getSourceManager();
1067 SourceLocation InstLoc =
1068 getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
1070 while (begin_tokens < end_tokens) {
1071 // If there's only one token just create a SLocEntry for it.
1072 if (end_tokens - begin_tokens == 1) {
1073 Token &Tok = *begin_tokens;
1074 Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
1080 updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
1084 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
1085 AtStartOfLine = Result.isAtStartOfLine();
1086 HasLeadingSpace = Result.hasLeadingSpace();