lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file contains the implementation of the UnwrappedLineParser,
  11 /// which turns a stream of tokens into UnwrappedLines.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "UnwrappedLineParser.h"
  16 #include "llvm/ADT/STLExtras.h"
  17 #include "llvm/Support/Debug.h"
  18 #include "llvm/Support/raw_ostream.h"
  19
  20 #include <algorithm>
  21
  22 #define DEBUG_TYPE "format-parser"
  23
  24 namespace clang {
  25 namespace format {
  26
  27 class FormatTokenSource {
  28 public:
  29   virtual ~FormatTokenSource() {}
  30   virtual FormatToken *getNextToken() = 0;
  31
  32   virtual unsigned getPosition() = 0;
  33   virtual FormatToken *setPosition(unsigned Position) = 0;
  34 };
  35
  36 namespace {
  37
  38 class ScopedDeclarationState {
  39 public:
  40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  41                          bool MustBeDeclaration)
  42       : Line(Line), Stack(Stack) {
  43     Line.MustBeDeclaration = MustBeDeclaration;
  44     Stack.push_back(MustBeDeclaration);
  45   }
  46   ~ScopedDeclarationState() {
  47     Stack.pop_back();
  48     if (!Stack.empty())
  49       Line.MustBeDeclaration = Stack.back();
  50     else
  51       Line.MustBeDeclaration = true;
  52   }
  53
  54 private:
  55   UnwrappedLine &Line;
  56   std::vector<bool> &Stack;
  57 };
  58
  59 static bool isLineComment(const FormatToken &FormatTok) {
  60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  61 }
  62
  63 // Checks if \p FormatTok is a line comment that continues the line comment
  64 // \p Previous. The original column of \p MinColumnToken is used to determine
  65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
  66 static bool continuesLineComment(const FormatToken &FormatTok,
  67                                  const FormatToken *Previous,
  68                                  const FormatToken *MinColumnToken) {
  69   if (!Previous || !MinColumnToken)
  70     return false;
  71   unsigned MinContinueColumn =
  72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  74          isLineComment(*Previous) &&
  75          FormatTok.OriginalColumn >= MinContinueColumn;
  76 }
  77
  78 class ScopedMacroState : public FormatTokenSource {
  79 public:
  80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  81                    FormatToken *&ResetToken)
  82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  84         Token(nullptr), PreviousToken(nullptr) {
  85     FakeEOF.Tok.startToken();
  86     FakeEOF.Tok.setKind(tok::eof);
  87     TokenSource = this;
  88     Line.Level = 0;
  89     Line.InPPDirective = true;
  90   }
  91
  92   ~ScopedMacroState() override {
  93     TokenSource = PreviousTokenSource;
  94     ResetToken = Token;
  95     Line.InPPDirective = false;
  96     Line.Level = PreviousLineLevel;
  97   }
  98
  99   FormatToken *getNextToken() override {
 100     // The \c UnwrappedLineParser guards against this by never calling
 101     // \c getNextToken() after it has encountered the first eof token.
 102     assert(!eof());
 103     PreviousToken = Token;
 104     Token = PreviousTokenSource->getNextToken();
 105     if (eof())
 106       return &FakeEOF;
 107     return Token;
 108   }
 109
 110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
 111
 112   FormatToken *setPosition(unsigned Position) override {
 113     PreviousToken = nullptr;
 114     Token = PreviousTokenSource->setPosition(Position);
 115     return Token;
 116   }
 117
 118 private:
 119   bool eof() {
 120     return Token && Token->HasUnescapedNewline &&
 121            !continuesLineComment(*Token, PreviousToken,
 122                                  /*MinColumnToken=*/PreviousToken);
 123   }
 124
 125   FormatToken FakeEOF;
 126   UnwrappedLine &Line;
 127   FormatTokenSource *&TokenSource;
 128   FormatToken *&ResetToken;
 129   unsigned PreviousLineLevel;
 130   FormatTokenSource *PreviousTokenSource;
 131
 132   FormatToken *Token;
 133   FormatToken *PreviousToken;
 134 };
 135
 136 } // end anonymous namespace
 137
 138 class ScopedLineState {
 139 public:
 140   ScopedLineState(UnwrappedLineParser &Parser,
 141                   bool SwitchToPreprocessorLines = false)
 142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 143     if (SwitchToPreprocessorLines)
 144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 145     else if (!Parser.Line->Tokens.empty())
 146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 147     PreBlockLine = std::move(Parser.Line);
 148     Parser.Line = std::make_unique<UnwrappedLine>();
 149     Parser.Line->Level = PreBlockLine->Level;
 150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 151   }
 152
 153   ~ScopedLineState() {
 154     if (!Parser.Line->Tokens.empty()) {
 155       Parser.addUnwrappedLine();
 156     }
 157     assert(Parser.Line->Tokens.empty());
 158     Parser.Line = std::move(PreBlockLine);
 159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 160       Parser.MustBreakBeforeNextToken = true;
 161     Parser.CurrentLines = OriginalLines;
 162   }
 163
 164 private:
 165   UnwrappedLineParser &Parser;
 166
 167   std::unique_ptr<UnwrappedLine> PreBlockLine;
 168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 169 };
 170
 171 class CompoundStatementIndenter {
 172 public:
 173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 174                             const FormatStyle &Style, unsigned &LineLevel)
 175       : CompoundStatementIndenter(Parser, LineLevel,
 176                                   Style.BraceWrapping.AfterControlStatement,
 177                                   Style.BraceWrapping.IndentBraces) {}
 178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
 179                             bool WrapBrace, bool IndentBrace)
 180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 181     if (WrapBrace)
 182       Parser->addUnwrappedLine();
 183     if (IndentBrace)
 184       ++LineLevel;
 185   }
 186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 187
 188 private:
 189   unsigned &LineLevel;
 190   unsigned OldLineLevel;
 191 };
 192
 193 namespace {
 194
 195 class IndexedTokenSource : public FormatTokenSource {
 196 public:
 197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 198       : Tokens(Tokens), Position(-1) {}
 199
 200   FormatToken *getNextToken() override {
 201     ++Position;
 202     return Tokens[Position];
 203   }
 204
 205   unsigned getPosition() override {
 206     assert(Position >= 0);
 207     return Position;
 208   }
 209
 210   FormatToken *setPosition(unsigned P) override {
 211     Position = P;
 212     return Tokens[Position];
 213   }
 214
 215   void reset() { Position = -1; }
 216
 217 private:
 218   ArrayRef<FormatToken *> Tokens;
 219   int Position;
 220 };
 221
 222 } // end anonymous namespace
 223
 224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 225                                          const AdditionalKeywords &Keywords,
 226                                          unsigned FirstStartColumn,
 227                                          ArrayRef<FormatToken *> Tokens,
 228                                          UnwrappedLineConsumer &Callback)
 229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 234                        ? IG_Rejected
 235                        : IG_Inited),
 236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
 237
 238 void UnwrappedLineParser::reset() {
 239   PPBranchLevel = -1;
 240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 241                      ? IG_Rejected
 242                      : IG_Inited;
 243   IncludeGuardToken = nullptr;
 244   Line.reset(new UnwrappedLine);
 245   CommentsBeforeNextToken.clear();
 246   FormatTok = nullptr;
 247   MustBreakBeforeNextToken = false;
 248   PreprocessorDirectives.clear();
 249   CurrentLines = &Lines;
 250   DeclarationScopeStack.clear();
 251   PPStack.clear();
 252   Line->FirstStartColumn = FirstStartColumn;
 253 }
 254
 255 void UnwrappedLineParser::parse() {
 256   IndexedTokenSource TokenSource(AllTokens);
 257   Line->FirstStartColumn = FirstStartColumn;
 258   do {
 259     LLVM_DEBUG(llvm::dbgs() << "----\n");
 260     reset();
 261     Tokens = &TokenSource;
 262     TokenSource.reset();
 263
 264     readToken();
 265     parseFile();
 266
 267     // If we found an include guard then all preprocessor directives (other than
 268     // the guard) are over-indented by one.
 269     if (IncludeGuard == IG_Found)
 270       for (auto &Line : Lines)
 271         if (Line.InPPDirective && Line.Level > 0)
 272           --Line.Level;
 273
 274     // Create line with eof token.
 275     pushToken(FormatTok);
 276     addUnwrappedLine();
 277
 278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 279                                                   E = Lines.end();
 280          I != E; ++I) {
 281       Callback.consumeUnwrappedLine(*I);
 282     }
 283     Callback.finishRun();
 284     Lines.clear();
 285     while (!PPLevelBranchIndex.empty() &&
 286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 289     }
 290     if (!PPLevelBranchIndex.empty()) {
 291       ++PPLevelBranchIndex.back();
 292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 294     }
 295   } while (!PPLevelBranchIndex.empty());
 296 }
 297
 298 void UnwrappedLineParser::parseFile() {
 299   // The top-level context in a file always has declarations, except for pre-
 300   // processor directives and JavaScript files.
 301   bool MustBeDeclaration =
 302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 304                                           MustBeDeclaration);
 305   if (Style.Language == FormatStyle::LK_TextProto)
 306     parseBracedList();
 307   else
 308     parseLevel(/*HasOpeningBrace=*/false);
 309   // Make sure to format the remaining tokens.
 310   //
 311   // LK_TextProto is special since its top-level is parsed as the body of a
 312   // braced list, which does not necessarily have natural line separators such
 313   // as a semicolon. Comments after the last entry that have been determined to
 314   // not belong to that line, as in:
 315   //   key: value
 316   //   // endfile comment
 317   // do not have a chance to be put on a line of their own until this point.
 318   // Here we add this newline before end-of-file comments.
 319   if (Style.Language == FormatStyle::LK_TextProto &&
 320       !CommentsBeforeNextToken.empty())
 321     addUnwrappedLine();
 322   flushComments(true);
 323   addUnwrappedLine();
 324 }
 325
 326 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 327   bool SwitchLabelEncountered = false;
 328   do {
 329     tok::TokenKind kind = FormatTok->Tok.getKind();
 330     if (FormatTok->Type == TT_MacroBlockBegin) {
 331       kind = tok::l_brace;
 332     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 333       kind = tok::r_brace;
 334     }
 335
 336     switch (kind) {
 337     case tok::comment:
 338       nextToken();
 339       addUnwrappedLine();
 340       break;
 341     case tok::l_brace:
 342       // FIXME: Add parameter whether this can happen - if this happens, we must
 343       // be in a non-declaration context.
 344       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
 345         continue;
 346       parseBlock(/*MustBeDeclaration=*/false);
 347       addUnwrappedLine();
 348       break;
 349     case tok::r_brace:
 350       if (HasOpeningBrace)
 351         return;
 352       nextToken();
 353       addUnwrappedLine();
 354       break;
 355     case tok::kw_default: {
 356       unsigned StoredPosition = Tokens->getPosition();
 357       FormatToken *Next;
 358       do {
 359         Next = Tokens->getNextToken();
 360       } while (Next && Next->is(tok::comment));
 361       FormatTok = Tokens->setPosition(StoredPosition);
 362       if (Next && Next->isNot(tok::colon)) {
 363         // default not followed by ':' is not a case label; treat it like
 364         // an identifier.
 365         parseStructuralElement();
 366         break;
 367       }
 368       // Else, if it is 'default:', fall through to the case handling.
 369       LLVM_FALLTHROUGH;
 370     }
 371     case tok::kw_case:
 372       if (Style.Language == FormatStyle::LK_JavaScript &&
 373           Line->MustBeDeclaration) {
 374         // A 'case: string' style field declaration.
 375         parseStructuralElement();
 376         break;
 377       }
 378       if (!SwitchLabelEncountered &&
 379           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 380         ++Line->Level;
 381       SwitchLabelEncountered = true;
 382       parseStructuralElement();
 383       break;
 384     default:
 385       parseStructuralElement();
 386       break;
 387     }
 388   } while (!eof());
 389 }
 390
 391 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 392   // We'll parse forward through the tokens until we hit
 393   // a closing brace or eof - note that getNextToken() will
 394   // parse macros, so this will magically work inside macro
 395   // definitions, too.
 396   unsigned StoredPosition = Tokens->getPosition();
 397   FormatToken *Tok = FormatTok;
 398   const FormatToken *PrevTok = Tok->Previous;
 399   // Keep a stack of positions of lbrace tokens. We will
 400   // update information about whether an lbrace starts a
 401   // braced init list or a different block during the loop.
 402   SmallVector<FormatToken *, 8> LBraceStack;
 403   assert(Tok->Tok.is(tok::l_brace));
 404   do {
 405     // Get next non-comment token.
 406     FormatToken *NextTok;
 407     unsigned ReadTokens = 0;
 408     do {
 409       NextTok = Tokens->getNextToken();
 410       ++ReadTokens;
 411     } while (NextTok->is(tok::comment));
 412
 413     switch (Tok->Tok.getKind()) {
 414     case tok::l_brace:
 415       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
 416         if (PrevTok->isOneOf(tok::colon, tok::less))
 417           // A ':' indicates this code is in a type, or a braced list
 418           // following a label in an object literal ({a: {b: 1}}).
 419           // A '<' could be an object used in a comparison, but that is nonsense
 420           // code (can never return true), so more likely it is a generic type
 421           // argument (`X<{a: string; b: number}>`).
 422           // The code below could be confused by semicolons between the
 423           // individual members in a type member list, which would normally
 424           // trigger BK_Block. In both cases, this must be parsed as an inline
 425           // braced init.
 426           Tok->BlockKind = BK_BracedInit;
 427         else if (PrevTok->is(tok::r_paren))
 428           // `) { }` can only occur in function or method declarations in JS.
 429           Tok->BlockKind = BK_Block;
 430       } else {
 431         Tok->BlockKind = BK_Unknown;
 432       }
 433       LBraceStack.push_back(Tok);
 434       break;
 435     case tok::r_brace:
 436       if (LBraceStack.empty())
 437         break;
 438       if (LBraceStack.back()->BlockKind == BK_Unknown) {
 439         bool ProbablyBracedList = false;
 440         if (Style.Language == FormatStyle::LK_Proto) {
 441           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 442         } else {
 443           // Using OriginalColumn to distinguish between ObjC methods and
 444           // binary operators is a bit hacky.
 445           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 446                                   NextTok->OriginalColumn == 0;
 447
 448           // If there is a comma, semicolon or right paren after the closing
 449           // brace, we assume this is a braced initializer list.  Note that
 450           // regardless how we mark inner braces here, we will overwrite the
 451           // BlockKind later if we parse a braced list (where all blocks
 452           // inside are by default braced lists), or when we explicitly detect
 453           // blocks (for example while parsing lambdas).
 454           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 455           // braced list in JS.
 456           ProbablyBracedList =
 457               (Style.Language == FormatStyle::LK_JavaScript &&
 458                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 459                                 Keywords.kw_as)) ||
 460               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
 461               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 462                                tok::r_paren, tok::r_square, tok::l_brace,
 463                                tok::ellipsis) ||
 464               (NextTok->is(tok::identifier) &&
 465                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
 466               (NextTok->is(tok::semi) &&
 467                (!ExpectClassBody || LBraceStack.size() != 1)) ||
 468               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 469           if (NextTok->is(tok::l_square)) {
 470             // We can have an array subscript after a braced init
 471             // list, but C++11 attributes are expected after blocks.
 472             NextTok = Tokens->getNextToken();
 473             ++ReadTokens;
 474             ProbablyBracedList = NextTok->isNot(tok::l_square);
 475           }
 476         }
 477         if (ProbablyBracedList) {
 478           Tok->BlockKind = BK_BracedInit;
 479           LBraceStack.back()->BlockKind = BK_BracedInit;
 480         } else {
 481           Tok->BlockKind = BK_Block;
 482           LBraceStack.back()->BlockKind = BK_Block;
 483         }
 484       }
 485       LBraceStack.pop_back();
 486       break;
 487     case tok::identifier:
 488       if (!Tok->is(TT_StatementMacro))
 489         break;
 490       LLVM_FALLTHROUGH;
 491     case tok::at:
 492     case tok::semi:
 493     case tok::kw_if:
 494     case tok::kw_while:
 495     case tok::kw_for:
 496     case tok::kw_switch:
 497     case tok::kw_try:
 498     case tok::kw___try:
 499       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
 500         LBraceStack.back()->BlockKind = BK_Block;
 501       break;
 502     default:
 503       break;
 504     }
 505     PrevTok = Tok;
 506     Tok = NextTok;
 507   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 508
 509   // Assume other blocks for all unclosed opening braces.
 510   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 511     if (LBraceStack[i]->BlockKind == BK_Unknown)
 512       LBraceStack[i]->BlockKind = BK_Block;
 513   }
 514
 515   FormatTok = Tokens->setPosition(StoredPosition);
 516 }
 517
 518 template <class T>
 519 static inline void hash_combine(std::size_t &seed, const T &v) {
 520   std::hash<T> hasher;
 521   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 522 }
 523
 524 size_t UnwrappedLineParser::computePPHash() const {
 525   size_t h = 0;
 526   for (const auto &i : PPStack) {
 527     hash_combine(h, size_t(i.Kind));
 528     hash_combine(h, i.Line);
 529   }
 530   return h;
 531 }
 532
 533 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 534                                      bool MunchSemi) {
 535   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 536          "'{' or macro block token expected");
 537   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 538   FormatTok->BlockKind = BK_Block;
 539
 540   size_t PPStartHash = computePPHash();
 541
 542   unsigned InitialLevel = Line->Level;
 543   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
 544
 545   if (MacroBlock && FormatTok->is(tok::l_paren))
 546     parseParens();
 547
 548   size_t NbPreprocessorDirectives =
 549       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
 550   addUnwrappedLine();
 551   size_t OpeningLineIndex =
 552       CurrentLines->empty()
 553           ? (UnwrappedLine::kInvalidIndex)
 554           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 555
 556   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 557                                           MustBeDeclaration);
 558   if (AddLevel)
 559     ++Line->Level;
 560   parseLevel(/*HasOpeningBrace=*/true);
 561
 562   if (eof())
 563     return;
 564
 565   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 566                  : !FormatTok->is(tok::r_brace)) {
 567     Line->Level = InitialLevel;
 568     FormatTok->BlockKind = BK_Block;
 569     return;
 570   }
 571
 572   size_t PPEndHash = computePPHash();
 573
 574   // Munch the closing brace.
 575   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
 576
 577   if (MacroBlock && FormatTok->is(tok::l_paren))
 578     parseParens();
 579
 580   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 581     nextToken();
 582   Line->Level = InitialLevel;
 583
 584   if (PPStartHash == PPEndHash) {
 585     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 586     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 587       // Update the opening line to add the forward reference as well
 588       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 589           CurrentLines->size() - 1;
 590     }
 591   }
 592 }
 593
 594 static bool isGoogScope(const UnwrappedLine &Line) {
 595   // FIXME: Closure-library specific stuff should not be hard-coded but be
 596   // configurable.
 597   if (Line.Tokens.size() < 4)
 598     return false;
 599   auto I = Line.Tokens.begin();
 600   if (I->Tok->TokenText != "goog")
 601     return false;
 602   ++I;
 603   if (I->Tok->isNot(tok::period))
 604     return false;
 605   ++I;
 606   if (I->Tok->TokenText != "scope")
 607     return false;
 608   ++I;
 609   return I->Tok->is(tok::l_paren);
 610 }
 611
 612 static bool isIIFE(const UnwrappedLine &Line,
 613                    const AdditionalKeywords &Keywords) {
 614   // Look for the start of an immediately invoked anonymous function.
 615   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 616   // This is commonly done in JavaScript to create a new, anonymous scope.
 617   // Example: (function() { ... })()
 618   if (Line.Tokens.size() < 3)
 619     return false;
 620   auto I = Line.Tokens.begin();
 621   if (I->Tok->isNot(tok::l_paren))
 622     return false;
 623   ++I;
 624   if (I->Tok->isNot(Keywords.kw_function))
 625     return false;
 626   ++I;
 627   return I->Tok->is(tok::l_paren);
 628 }
 629
 630 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 631                                    const FormatToken &InitialToken) {
 632   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
 633     return Style.BraceWrapping.AfterNamespace;
 634   if (InitialToken.is(tok::kw_class))
 635     return Style.BraceWrapping.AfterClass;
 636   if (InitialToken.is(tok::kw_union))
 637     return Style.BraceWrapping.AfterUnion;
 638   if (InitialToken.is(tok::kw_struct))
 639     return Style.BraceWrapping.AfterStruct;
 640   return false;
 641 }
 642
 643 void UnwrappedLineParser::parseChildBlock() {
 644   FormatTok->BlockKind = BK_Block;
 645   nextToken();
 646   {
 647     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
 648                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 649     ScopedLineState LineState(*this);
 650     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 651                                             /*MustBeDeclaration=*/false);
 652     Line->Level += SkipIndent ? 0 : 1;
 653     parseLevel(/*HasOpeningBrace=*/true);
 654     flushComments(isOnNewLine(*FormatTok));
 655     Line->Level -= SkipIndent ? 0 : 1;
 656   }
 657   nextToken();
 658 }
 659
 660 void UnwrappedLineParser::parsePPDirective() {
 661   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 662   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 663
 664   nextToken();
 665
 666   if (!FormatTok->Tok.getIdentifierInfo()) {
 667     parsePPUnknown();
 668     return;
 669   }
 670
 671   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 672   case tok::pp_define:
 673     parsePPDefine();
 674     return;
 675   case tok::pp_if:
 676     parsePPIf(/*IfDef=*/false);
 677     break;
 678   case tok::pp_ifdef:
 679   case tok::pp_ifndef:
 680     parsePPIf(/*IfDef=*/true);
 681     break;
 682   case tok::pp_else:
 683     parsePPElse();
 684     break;
 685   case tok::pp_elif:
 686     parsePPElIf();
 687     break;
 688   case tok::pp_endif:
 689     parsePPEndIf();
 690     break;
 691   default:
 692     parsePPUnknown();
 693     break;
 694   }
 695 }
 696
 697 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 698   size_t Line = CurrentLines->size();
 699   if (CurrentLines == &PreprocessorDirectives)
 700     Line += Lines.size();
 701
 702   if (Unreachable ||
 703       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
 704     PPStack.push_back({PP_Unreachable, Line});
 705   else
 706     PPStack.push_back({PP_Conditional, Line});
 707 }
 708
 709 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 710   ++PPBranchLevel;
 711   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 712   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 713     PPLevelBranchIndex.push_back(0);
 714     PPLevelBranchCount.push_back(0);
 715   }
 716   PPChainBranchIndex.push(0);
 717   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 718   conditionalCompilationCondition(Unreachable || Skip);
 719 }
 720
 721 void UnwrappedLineParser::conditionalCompilationAlternative() {
 722   if (!PPStack.empty())
 723     PPStack.pop_back();
 724   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 725   if (!PPChainBranchIndex.empty())
 726     ++PPChainBranchIndex.top();
 727   conditionalCompilationCondition(
 728       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 729       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 730 }
 731
 732 void UnwrappedLineParser::conditionalCompilationEnd() {
 733   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 734   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 735     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 736       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 737     }
 738   }
 739   // Guard against #endif's without #if.
 740   if (PPBranchLevel > -1)
 741     --PPBranchLevel;
 742   if (!PPChainBranchIndex.empty())
 743     PPChainBranchIndex.pop();
 744   if (!PPStack.empty())
 745     PPStack.pop_back();
 746 }
 747
 748 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 749   bool IfNDef = FormatTok->is(tok::pp_ifndef);
 750   nextToken();
 751   bool Unreachable = false;
 752   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
 753     Unreachable = true;
 754   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
 755     Unreachable = true;
 756   conditionalCompilationStart(Unreachable);
 757   FormatToken *IfCondition = FormatTok;
 758   // If there's a #ifndef on the first line, and the only lines before it are
 759   // comments, it could be an include guard.
 760   bool MaybeIncludeGuard = IfNDef;
 761   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
 762     for (auto &Line : Lines) {
 763       if (!Line.Tokens.front().Tok->is(tok::comment)) {
 764         MaybeIncludeGuard = false;
 765         IncludeGuard = IG_Rejected;
 766         break;
 767       }
 768     }
 769   --PPBranchLevel;
 770   parsePPUnknown();
 771   ++PPBranchLevel;
 772   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
 773     IncludeGuard = IG_IfNdefed;
 774     IncludeGuardToken = IfCondition;
 775   }
 776 }
 777
 778 void UnwrappedLineParser::parsePPElse() {
 779   // If a potential include guard has an #else, it's not an include guard.
 780   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
 781     IncludeGuard = IG_Rejected;
 782   conditionalCompilationAlternative();
 783   if (PPBranchLevel > -1)
 784     --PPBranchLevel;
 785   parsePPUnknown();
 786   ++PPBranchLevel;
 787 }
 788
 789 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 790
 791 void UnwrappedLineParser::parsePPEndIf() {
 792   conditionalCompilationEnd();
 793   parsePPUnknown();
 794   // If the #endif of a potential include guard is the last thing in the file,
 795   // then we found an include guard.
 796   unsigned TokenPosition = Tokens->getPosition();
 797   FormatToken *PeekNext = AllTokens[TokenPosition];
 798   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
 799       PeekNext->is(tok::eof) &&
 800       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 801     IncludeGuard = IG_Found;
 802 }
 803
 804 void UnwrappedLineParser::parsePPDefine() {
 805   nextToken();
 806
 807   if (!FormatTok->Tok.getIdentifierInfo()) {
 808     IncludeGuard = IG_Rejected;
 809     IncludeGuardToken = nullptr;
 810     parsePPUnknown();
 811     return;
 812   }
 813
 814   if (IncludeGuard == IG_IfNdefed &&
 815       IncludeGuardToken->TokenText == FormatTok->TokenText) {
 816     IncludeGuard = IG_Defined;
 817     IncludeGuardToken = nullptr;
 818     for (auto &Line : Lines) {
 819       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
 820         IncludeGuard = IG_Rejected;
 821         break;
 822       }
 823     }
 824   }
 825
 826   nextToken();
 827   if (FormatTok->Tok.getKind() == tok::l_paren &&
 828       FormatTok->WhitespaceRange.getBegin() ==
 829           FormatTok->WhitespaceRange.getEnd()) {
 830     parseParens();
 831   }
 832   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 833     Line->Level += PPBranchLevel + 1;
 834   addUnwrappedLine();
 835   ++Line->Level;
 836
 837   // Errors during a preprocessor directive can only affect the layout of the
 838   // preprocessor directive, and thus we ignore them. An alternative approach
 839   // would be to use the same approach we use on the file level (no
 840   // re-indentation if there was a structural error) within the macro
 841   // definition.
 842   parseFile();
 843 }
 844
 845 void UnwrappedLineParser::parsePPUnknown() {
 846   do {
 847     nextToken();
 848   } while (!eof());
 849   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 850     Line->Level += PPBranchLevel + 1;
 851   addUnwrappedLine();
 852 }
 853
 854 // Here we blacklist certain tokens that are not usually the first token in an
 855 // unwrapped line. This is used in attempt to distinguish macro calls without
 856 // trailing semicolons from other constructs split to several lines.
 857 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 858   // Semicolon can be a null-statement, l_square can be a start of a macro or
 859   // a C++11 attribute, but this doesn't seem to be common.
 860   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 861          Tok.isNot(tok::l_square) &&
 862          // Tokens that can only be used as binary operators and a part of
 863          // overloaded operator names.
 864          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 865          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 866          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 867          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 868          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 869          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 870          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 871          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 872          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 873          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 874          Tok.isNot(tok::lesslessequal) &&
 875          // Colon is used in labels, base class lists, initializer lists,
 876          // range-based for loops, ternary operator, but should never be the
 877          // first token in an unwrapped line.
 878          Tok.isNot(tok::colon) &&
 879          // 'noexcept' is a trailing annotation.
 880          Tok.isNot(tok::kw_noexcept);
 881 }
 882
 883 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
 884                           const FormatToken *FormatTok) {
 885   // FIXME: This returns true for C/C++ keywords like 'struct'.
 886   return FormatTok->is(tok::identifier) &&
 887          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
 888           !FormatTok->isOneOf(
 889               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
 890               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
 891               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
 892               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
 893               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
 894               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
 895               Keywords.kw_from));
 896 }
 897
 898 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
 899                                  const FormatToken *FormatTok) {
 900   return FormatTok->Tok.isLiteral() ||
 901          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
 902          mustBeJSIdent(Keywords, FormatTok);
 903 }
 904
 905 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
 906 // when encountered after a value (see mustBeJSIdentOrValue).
 907 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
 908                            const FormatToken *FormatTok) {
 909   return FormatTok->isOneOf(
 910       tok::kw_return, Keywords.kw_yield,
 911       // conditionals
 912       tok::kw_if, tok::kw_else,
 913       // loops
 914       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
 915       // switch/case
 916       tok::kw_switch, tok::kw_case,
 917       // exceptions
 918       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
 919       // declaration
 920       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
 921       Keywords.kw_async, Keywords.kw_function,
 922       // import/export
 923       Keywords.kw_import, tok::kw_export);
 924 }
 925
 926 // readTokenWithJavaScriptASI reads the next token and terminates the current
 927 // line if JavaScript Automatic Semicolon Insertion must
 928 // happen between the current token and the next token.
 929 //
 930 // This method is conservative - it cannot cover all edge cases of JavaScript,
 931 // but only aims to correctly handle certain well known cases. It *must not*
 932 // return true in speculative cases.
 933 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 934   FormatToken *Previous = FormatTok;
 935   readToken();
 936   FormatToken *Next = FormatTok;
 937
 938   bool IsOnSameLine =
 939       CommentsBeforeNextToken.empty()
 940           ? Next->NewlinesBefore == 0
 941           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
 942   if (IsOnSameLine)
 943     return;
 944
 945   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
 946   bool PreviousStartsTemplateExpr =
 947       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
 948   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
 949     // If the line contains an '@' sign, the previous token might be an
 950     // annotation, which can precede another identifier/value.
 951     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
 952                               [](UnwrappedLineNode &LineNode) {
 953                                 return LineNode.Tok->is(tok::at);
 954                               }) != Line->Tokens.end();
 955     if (HasAt)
 956       return;
 957   }
 958   if (Next->is(tok::exclaim) && PreviousMustBeValue)
 959     return addUnwrappedLine();
 960   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
 961   bool NextEndsTemplateExpr =
 962       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
 963   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
 964       (PreviousMustBeValue ||
 965        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
 966                          tok::minusminus)))
 967     return addUnwrappedLine();
 968   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
 969       isJSDeclOrStmt(Keywords, Next))
 970     return addUnwrappedLine();
 971 }
 972
 973 void UnwrappedLineParser::parseStructuralElement() {
 974   assert(!FormatTok->is(tok::l_brace));
 975   if (Style.Language == FormatStyle::LK_TableGen &&
 976       FormatTok->is(tok::pp_include)) {
 977     nextToken();
 978     if (FormatTok->is(tok::string_literal))
 979       nextToken();
 980     addUnwrappedLine();
 981     return;
 982   }
 983   switch (FormatTok->Tok.getKind()) {
 984   case tok::kw_asm:
 985     nextToken();
 986     if (FormatTok->is(tok::l_brace)) {
 987       FormatTok->Type = TT_InlineASMBrace;
 988       nextToken();
 989       while (FormatTok && FormatTok->isNot(tok::eof)) {
 990         if (FormatTok->is(tok::r_brace)) {
 991           FormatTok->Type = TT_InlineASMBrace;
 992           nextToken();
 993           addUnwrappedLine();
 994           break;
 995         }
 996         FormatTok->Finalized = true;
 997         nextToken();
 998       }
 999     }
1000     break;
1001   case tok::kw_namespace:
1002     parseNamespace();
1003     return;
1004   case tok::kw_public:
1005   case tok::kw_protected:
1006   case tok::kw_private:
1007     if (Style.Language == FormatStyle::LK_Java ||
1008         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1009       nextToken();
1010     else
1011       parseAccessSpecifier();
1012     return;
1013   case tok::kw_if:
1014     parseIfThenElse();
1015     return;
1016   case tok::kw_for:
1017   case tok::kw_while:
1018     parseForOrWhileLoop();
1019     return;
1020   case tok::kw_do:
1021     parseDoWhile();
1022     return;
1023   case tok::kw_switch:
1024     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1025       // 'switch: string' field declaration.
1026       break;
1027     parseSwitch();
1028     return;
1029   case tok::kw_default:
1030     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1031       // 'default: string' field declaration.
1032       break;
1033     nextToken();
1034     if (FormatTok->is(tok::colon)) {
1035       parseLabel();
1036       return;
1037     }
1038     // e.g. "default void f() {}" in a Java interface.
1039     break;
1040   case tok::kw_case:
1041     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1042       // 'case: string' field declaration.
1043       break;
1044     parseCaseLabel();
1045     return;
1046   case tok::kw_try:
1047   case tok::kw___try:
1048     parseTryCatch();
1049     return;
1050   case tok::kw_extern:
1051     nextToken();
1052     if (FormatTok->Tok.is(tok::string_literal)) {
1053       nextToken();
1054       if (FormatTok->Tok.is(tok::l_brace)) {
1055         if (Style.BraceWrapping.AfterExternBlock) {
1056           addUnwrappedLine();
1057           parseBlock(/*MustBeDeclaration=*/true);
1058         } else {
1059           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1060         }
1061         addUnwrappedLine();
1062         return;
1063       }
1064     }
1065     break;
1066   case tok::kw_export:
1067     if (Style.Language == FormatStyle::LK_JavaScript) {
1068       parseJavaScriptEs6ImportExport();
1069       return;
1070     }
1071     if (!Style.isCpp())
1072       break;
1073     // Handle C++ "(inline|export) namespace".
1074     LLVM_FALLTHROUGH;
1075   case tok::kw_inline:
1076     nextToken();
1077     if (FormatTok->Tok.is(tok::kw_namespace)) {
1078       parseNamespace();
1079       return;
1080     }
1081     break;
1082   case tok::identifier:
1083     if (FormatTok->is(TT_ForEachMacro)) {
1084       parseForOrWhileLoop();
1085       return;
1086     }
1087     if (FormatTok->is(TT_MacroBlockBegin)) {
1088       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1089                  /*MunchSemi=*/false);
1090       return;
1091     }
1092     if (FormatTok->is(Keywords.kw_import)) {
1093       if (Style.Language == FormatStyle::LK_JavaScript) {
1094         parseJavaScriptEs6ImportExport();
1095         return;
1096       }
1097       if (Style.Language == FormatStyle::LK_Proto) {
1098         nextToken();
1099         if (FormatTok->is(tok::kw_public))
1100           nextToken();
1101         if (!FormatTok->is(tok::string_literal))
1102           return;
1103         nextToken();
1104         if (FormatTok->is(tok::semi))
1105           nextToken();
1106         addUnwrappedLine();
1107         return;
1108       }
1109     }
1110     if (Style.isCpp() &&
1111         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1112                            Keywords.kw_slots, Keywords.kw_qslots)) {
1113       nextToken();
1114       if (FormatTok->is(tok::colon)) {
1115         nextToken();
1116         addUnwrappedLine();
1117         return;
1118       }
1119     }
1120     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1121       parseStatementMacro();
1122       return;
1123     }
1124     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1125       parseNamespace();
1126       return;
1127     }
1128     // In all other cases, parse the declaration.
1129     break;
1130   default:
1131     break;
1132   }
1133   do {
1134     const FormatToken *Previous = FormatTok->Previous;
1135     switch (FormatTok->Tok.getKind()) {
1136     case tok::at:
1137       nextToken();
1138       if (FormatTok->Tok.is(tok::l_brace)) {
1139         nextToken();
1140         parseBracedList();
1141         break;
1142       } else if (Style.Language == FormatStyle::LK_Java &&
1143                  FormatTok->is(Keywords.kw_interface)) {
1144         nextToken();
1145         break;
1146       }
1147       switch (FormatTok->Tok.getObjCKeywordID()) {
1148       case tok::objc_public:
1149       case tok::objc_protected:
1150       case tok::objc_package:
1151       case tok::objc_private:
1152         return parseAccessSpecifier();
1153       case tok::objc_interface:
1154       case tok::objc_implementation:
1155         return parseObjCInterfaceOrImplementation();
1156       case tok::objc_protocol:
1157         if (parseObjCProtocol())
1158           return;
1159         break;
1160       case tok::objc_end:
1161         return; // Handled by the caller.
1162       case tok::objc_optional:
1163       case tok::objc_required:
1164         nextToken();
1165         addUnwrappedLine();
1166         return;
1167       case tok::objc_autoreleasepool:
1168         nextToken();
1169         if (FormatTok->Tok.is(tok::l_brace)) {
1170           if (Style.BraceWrapping.AfterControlStatement ==
1171               FormatStyle::BWACS_Always)
1172             addUnwrappedLine();
1173           parseBlock(/*MustBeDeclaration=*/false);
1174         }
1175         addUnwrappedLine();
1176         return;
1177       case tok::objc_synchronized:
1178         nextToken();
1179         if (FormatTok->Tok.is(tok::l_paren))
1180           // Skip synchronization object
1181           parseParens();
1182         if (FormatTok->Tok.is(tok::l_brace)) {
1183           if (Style.BraceWrapping.AfterControlStatement ==
1184               FormatStyle::BWACS_Always)
1185             addUnwrappedLine();
1186           parseBlock(/*MustBeDeclaration=*/false);
1187         }
1188         addUnwrappedLine();
1189         return;
1190       case tok::objc_try:
1191         // This branch isn't strictly necessary (the kw_try case below would
1192         // do this too after the tok::at is parsed above).  But be explicit.
1193         parseTryCatch();
1194         return;
1195       default:
1196         break;
1197       }
1198       break;
1199     case tok::kw_enum:
1200       // Ignore if this is part of "template <enum ...".
1201       if (Previous && Previous->is(tok::less)) {
1202         nextToken();
1203         break;
1204       }
1205
1206       // parseEnum falls through and does not yet add an unwrapped line as an
1207       // enum definition can start a structural element.
1208       if (!parseEnum())
1209         break;
1210       // This only applies for C++.
1211       if (!Style.isCpp()) {
1212         addUnwrappedLine();
1213         return;
1214       }
1215       break;
1216     case tok::kw_typedef:
1217       nextToken();
1218       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1219                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1220                              Keywords.kw_CF_CLOSED_ENUM,
1221                              Keywords.kw_NS_CLOSED_ENUM))
1222         parseEnum();
1223       break;
1224     case tok::kw_struct:
1225     case tok::kw_union:
1226     case tok::kw_class:
1227       // parseRecord falls through and does not yet add an unwrapped line as a
1228       // record declaration or definition can start a structural element.
1229       parseRecord();
1230       // This does not apply for Java, JavaScript and C#.
1231       if (Style.Language == FormatStyle::LK_Java ||
1232           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1233         if (FormatTok->is(tok::semi))
1234           nextToken();
1235         addUnwrappedLine();
1236         return;
1237       }
1238       break;
1239     case tok::period:
1240       nextToken();
1241       // In Java, classes have an implicit static member "class".
1242       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1243           FormatTok->is(tok::kw_class))
1244         nextToken();
1245       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1246           FormatTok->Tok.getIdentifierInfo())
1247         // JavaScript only has pseudo keywords, all keywords are allowed to
1248         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1249         nextToken();
1250       break;
1251     case tok::semi:
1252       nextToken();
1253       addUnwrappedLine();
1254       return;
1255     case tok::r_brace:
1256       addUnwrappedLine();
1257       return;
1258     case tok::l_paren:
1259       parseParens();
1260       break;
1261     case tok::kw_operator:
1262       nextToken();
1263       if (FormatTok->isBinaryOperator())
1264         nextToken();
1265       break;
1266     case tok::caret:
1267       nextToken();
1268       if (FormatTok->Tok.isAnyIdentifier() ||
1269           FormatTok->isSimpleTypeSpecifier())
1270         nextToken();
1271       if (FormatTok->is(tok::l_paren))
1272         parseParens();
1273       if (FormatTok->is(tok::l_brace))
1274         parseChildBlock();
1275       break;
1276     case tok::l_brace:
1277       if (!tryToParseBracedList()) {
1278         // A block outside of parentheses must be the last part of a
1279         // structural element.
1280         // FIXME: Figure out cases where this is not true, and add projections
1281         // for them (the one we know is missing are lambdas).
1282         if (Style.BraceWrapping.AfterFunction)
1283           addUnwrappedLine();
1284         FormatTok->Type = TT_FunctionLBrace;
1285         parseBlock(/*MustBeDeclaration=*/false);
1286         addUnwrappedLine();
1287         return;
1288       }
1289       // Otherwise this was a braced init list, and the structural
1290       // element continues.
1291       break;
1292     case tok::kw_try:
1293       // We arrive here when parsing function-try blocks.
1294       if (Style.BraceWrapping.AfterFunction)
1295         addUnwrappedLine();
1296       parseTryCatch();
1297       return;
1298     case tok::identifier: {
1299       if (FormatTok->is(TT_MacroBlockEnd)) {
1300         addUnwrappedLine();
1301         return;
1302       }
1303
1304       // Function declarations (as opposed to function expressions) are parsed
1305       // on their own unwrapped line by continuing this loop. Function
1306       // expressions (functions that are not on their own line) must not create
1307       // a new unwrapped line, so they are special cased below.
1308       size_t TokenCount = Line->Tokens.size();
1309       if (Style.Language == FormatStyle::LK_JavaScript &&
1310           FormatTok->is(Keywords.kw_function) &&
1311           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1312                                                      Keywords.kw_async)))) {
1313         tryToParseJSFunction();
1314         break;
1315       }
1316       if ((Style.Language == FormatStyle::LK_JavaScript ||
1317            Style.Language == FormatStyle::LK_Java) &&
1318           FormatTok->is(Keywords.kw_interface)) {
1319         if (Style.Language == FormatStyle::LK_JavaScript) {
1320           // In JavaScript/TypeScript, "interface" can be used as a standalone
1321           // identifier, e.g. in `var interface = 1;`. If "interface" is
1322           // followed by another identifier, it is very like to be an actual
1323           // interface declaration.
1324           unsigned StoredPosition = Tokens->getPosition();
1325           FormatToken *Next = Tokens->getNextToken();
1326           FormatTok = Tokens->setPosition(StoredPosition);
1327           if (Next && !mustBeJSIdent(Keywords, Next)) {
1328             nextToken();
1329             break;
1330           }
1331         }
1332         parseRecord();
1333         addUnwrappedLine();
1334         return;
1335       }
1336
1337       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1338         parseStatementMacro();
1339         return;
1340       }
1341
1342       // See if the following token should start a new unwrapped line.
1343       StringRef Text = FormatTok->TokenText;
1344       nextToken();
1345
1346       // JS doesn't have macros, and within classes colons indicate fields, not
1347       // labels.
1348       if (Style.Language == FormatStyle::LK_JavaScript)
1349         break;
1350
1351       TokenCount = Line->Tokens.size();
1352       if (TokenCount == 1 ||
1353           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1354         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1355           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1356           parseLabel(!Style.IndentGotoLabels);
1357           return;
1358         }
1359         // Recognize function-like macro usages without trailing semicolon as
1360         // well as free-standing macros like Q_OBJECT.
1361         bool FunctionLike = FormatTok->is(tok::l_paren);
1362         if (FunctionLike)
1363           parseParens();
1364
1365         bool FollowedByNewline =
1366             CommentsBeforeNextToken.empty()
1367                 ? FormatTok->NewlinesBefore > 0
1368                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1369
1370         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1371             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1372           addUnwrappedLine();
1373           return;
1374         }
1375       }
1376       break;
1377     }
1378     case tok::equal:
1379       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1380       // TT_JsFatArrow. The always start an expression or a child block if
1381       // followed by a curly.
1382       if (FormatTok->is(TT_JsFatArrow)) {
1383         nextToken();
1384         if (FormatTok->is(tok::l_brace))
1385           parseChildBlock();
1386         break;
1387       }
1388
1389       nextToken();
1390       if (FormatTok->Tok.is(tok::l_brace)) {
1391         nextToken();
1392         parseBracedList();
1393       } else if (Style.Language == FormatStyle::LK_Proto &&
1394                  FormatTok->Tok.is(tok::less)) {
1395         nextToken();
1396         parseBracedList(/*ContinueOnSemicolons=*/false,
1397                         /*ClosingBraceKind=*/tok::greater);
1398       }
1399       break;
1400     case tok::l_square:
1401       parseSquare();
1402       break;
1403     case tok::kw_new:
1404       parseNew();
1405       break;
1406     default:
1407       nextToken();
1408       break;
1409     }
1410   } while (!eof());
1411 }
1412
1413 bool UnwrappedLineParser::tryToParseLambda() {
1414   if (!Style.isCpp()) {
1415     nextToken();
1416     return false;
1417   }
1418   assert(FormatTok->is(tok::l_square));
1419   FormatToken &LSquare = *FormatTok;
1420   if (!tryToParseLambdaIntroducer())
1421     return false;
1422
1423   bool SeenArrow = false;
1424
1425   while (FormatTok->isNot(tok::l_brace)) {
1426     if (FormatTok->isSimpleTypeSpecifier()) {
1427       nextToken();
1428       continue;
1429     }
1430     switch (FormatTok->Tok.getKind()) {
1431     case tok::l_brace:
1432       break;
1433     case tok::l_paren:
1434       parseParens();
1435       break;
1436     case tok::amp:
1437     case tok::star:
1438     case tok::kw_const:
1439     case tok::comma:
1440     case tok::less:
1441     case tok::greater:
1442     case tok::identifier:
1443     case tok::numeric_constant:
1444     case tok::coloncolon:
1445     case tok::kw_class:
1446     case tok::kw_mutable:
1447     case tok::kw_noexcept:
1448     case tok::kw_template:
1449     case tok::kw_typename:
1450       nextToken();
1451       break;
1452     // Specialization of a template with an integer parameter can contain
1453     // arithmetic, logical, comparison and ternary operators.
1454     //
1455     // FIXME: This also accepts sequences of operators that are not in the scope
1456     // of a template argument list.
1457     //
1458     // In a C++ lambda a template type can only occur after an arrow. We use
1459     // this as an heuristic to distinguish between Objective-C expressions
1460     // followed by an `a->b` expression, such as:
1461     // ([obj func:arg] + a->b)
1462     // Otherwise the code below would parse as a lambda.
1463     //
1464     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1465     // explicit template lists: []<bool b = true && false>(U &&u){}
1466     case tok::plus:
1467     case tok::minus:
1468     case tok::exclaim:
1469     case tok::tilde:
1470     case tok::slash:
1471     case tok::percent:
1472     case tok::lessless:
1473     case tok::pipe:
1474     case tok::pipepipe:
1475     case tok::ampamp:
1476     case tok::caret:
1477     case tok::equalequal:
1478     case tok::exclaimequal:
1479     case tok::greaterequal:
1480     case tok::lessequal:
1481     case tok::question:
1482     case tok::colon:
1483     case tok::kw_true:
1484     case tok::kw_false:
1485       if (SeenArrow) {
1486         nextToken();
1487         break;
1488       }
1489       return true;
1490     case tok::arrow:
1491       // This might or might not actually be a lambda arrow (this could be an
1492       // ObjC method invocation followed by a dereferencing arrow). We might
1493       // reset this back to TT_Unknown in TokenAnnotator.
1494       FormatTok->Type = TT_LambdaArrow;
1495       SeenArrow = true;
1496       nextToken();
1497       break;
1498     default:
1499       return true;
1500     }
1501   }
1502   FormatTok->Type = TT_LambdaLBrace;
1503   LSquare.Type = TT_LambdaLSquare;
1504   parseChildBlock();
1505   return true;
1506 }
1507
1508 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1509   const FormatToken *Previous = FormatTok->Previous;
1510   if (Previous &&
1511       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1512                          tok::kw_delete, tok::l_square) ||
1513        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1514        Previous->isSimpleTypeSpecifier())) {
1515     nextToken();
1516     return false;
1517   }
1518   nextToken();
1519   if (FormatTok->is(tok::l_square)) {
1520     return false;
1521   }
1522   parseSquare(/*LambdaIntroducer=*/true);
1523   return true;
1524 }
1525
1526 void UnwrappedLineParser::tryToParseJSFunction() {
1527   assert(FormatTok->is(Keywords.kw_function) ||
1528          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1529   if (FormatTok->is(Keywords.kw_async))
1530     nextToken();
1531   // Consume "function".
1532   nextToken();
1533
1534   // Consume * (generator function). Treat it like C++'s overloaded operators.
1535   if (FormatTok->is(tok::star)) {
1536     FormatTok->Type = TT_OverloadedOperator;
1537     nextToken();
1538   }
1539
1540   // Consume function name.
1541   if (FormatTok->is(tok::identifier))
1542     nextToken();
1543
1544   if (FormatTok->isNot(tok::l_paren))
1545     return;
1546
1547   // Parse formal parameter list.
1548   parseParens();
1549
1550   if (FormatTok->is(tok::colon)) {
1551     // Parse a type definition.
1552     nextToken();
1553
1554     // Eat the type declaration. For braced inline object types, balance braces,
1555     // otherwise just parse until finding an l_brace for the function body.
1556     if (FormatTok->is(tok::l_brace))
1557       tryToParseBracedList();
1558     else
1559       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1560         nextToken();
1561   }
1562
1563   if (FormatTok->is(tok::semi))
1564     return;
1565
1566   parseChildBlock();
1567 }
1568
1569 bool UnwrappedLineParser::tryToParseBracedList() {
1570   if (FormatTok->BlockKind == BK_Unknown)
1571     calculateBraceTypes();
1572   assert(FormatTok->BlockKind != BK_Unknown);
1573   if (FormatTok->BlockKind == BK_Block)
1574     return false;
1575   nextToken();
1576   parseBracedList();
1577   return true;
1578 }
1579
1580 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1581                                           tok::TokenKind ClosingBraceKind) {
1582   bool HasError = false;
1583
1584   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1585   // replace this by using parseAssigmentExpression() inside.
1586   do {
1587     if (Style.Language == FormatStyle::LK_JavaScript) {
1588       if (FormatTok->is(Keywords.kw_function) ||
1589           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1590         tryToParseJSFunction();
1591         continue;
1592       }
1593       if (FormatTok->is(TT_JsFatArrow)) {
1594         nextToken();
1595         // Fat arrows can be followed by simple expressions or by child blocks
1596         // in curly braces.
1597         if (FormatTok->is(tok::l_brace)) {
1598           parseChildBlock();
1599           continue;
1600         }
1601       }
1602       if (FormatTok->is(tok::l_brace)) {
1603         // Could be a method inside of a braced list `{a() { return 1; }}`.
1604         if (tryToParseBracedList())
1605           continue;
1606         parseChildBlock();
1607       }
1608     }
1609     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1610       nextToken();
1611       return !HasError;
1612     }
1613     switch (FormatTok->Tok.getKind()) {
1614     case tok::caret:
1615       nextToken();
1616       if (FormatTok->is(tok::l_brace)) {
1617         parseChildBlock();
1618       }
1619       break;
1620     case tok::l_square:
1621       tryToParseLambda();
1622       break;
1623     case tok::l_paren:
1624       parseParens();
1625       // JavaScript can just have free standing methods and getters/setters in
1626       // object literals. Detect them by a "{" following ")".
1627       if (Style.Language == FormatStyle::LK_JavaScript) {
1628         if (FormatTok->is(tok::l_brace))
1629           parseChildBlock();
1630         break;
1631       }
1632       break;
1633     case tok::l_brace:
1634       // Assume there are no blocks inside a braced init list apart
1635       // from the ones we explicitly parse out (like lambdas).
1636       FormatTok->BlockKind = BK_BracedInit;
1637       nextToken();
1638       parseBracedList();
1639       break;
1640     case tok::less:
1641       if (Style.Language == FormatStyle::LK_Proto) {
1642         nextToken();
1643         parseBracedList(/*ContinueOnSemicolons=*/false,
1644                         /*ClosingBraceKind=*/tok::greater);
1645       } else {
1646         nextToken();
1647       }
1648       break;
1649     case tok::semi:
1650       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1651       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1652       // used for error recovery if we have otherwise determined that this is
1653       // a braced list.
1654       if (Style.Language == FormatStyle::LK_JavaScript) {
1655         nextToken();
1656         break;
1657       }
1658       HasError = true;
1659       if (!ContinueOnSemicolons)
1660         return !HasError;
1661       nextToken();
1662       break;
1663     case tok::comma:
1664       nextToken();
1665       break;
1666     default:
1667       nextToken();
1668       break;
1669     }
1670   } while (!eof());
1671   return false;
1672 }
1673
1674 void UnwrappedLineParser::parseParens() {
1675   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1676   nextToken();
1677   do {
1678     switch (FormatTok->Tok.getKind()) {
1679     case tok::l_paren:
1680       parseParens();
1681       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1682         parseChildBlock();
1683       break;
1684     case tok::r_paren:
1685       nextToken();
1686       return;
1687     case tok::r_brace:
1688       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1689       return;
1690     case tok::l_square:
1691       tryToParseLambda();
1692       break;
1693     case tok::l_brace:
1694       if (!tryToParseBracedList())
1695         parseChildBlock();
1696       break;
1697     case tok::at:
1698       nextToken();
1699       if (FormatTok->Tok.is(tok::l_brace)) {
1700         nextToken();
1701         parseBracedList();
1702       }
1703       break;
1704     case tok::kw_class:
1705       if (Style.Language == FormatStyle::LK_JavaScript)
1706         parseRecord(/*ParseAsExpr=*/true);
1707       else
1708         nextToken();
1709       break;
1710     case tok::identifier:
1711       if (Style.Language == FormatStyle::LK_JavaScript &&
1712           (FormatTok->is(Keywords.kw_function) ||
1713            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1714         tryToParseJSFunction();
1715       else
1716         nextToken();
1717       break;
1718     default:
1719       nextToken();
1720       break;
1721     }
1722   } while (!eof());
1723 }
1724
1725 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1726   if (!LambdaIntroducer) {
1727     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1728     if (tryToParseLambda())
1729       return;
1730   }
1731   do {
1732     switch (FormatTok->Tok.getKind()) {
1733     case tok::l_paren:
1734       parseParens();
1735       break;
1736     case tok::r_square:
1737       nextToken();
1738       return;
1739     case tok::r_brace:
1740       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1741       return;
1742     case tok::l_square:
1743       parseSquare();
1744       break;
1745     case tok::l_brace: {
1746       if (!tryToParseBracedList())
1747         parseChildBlock();
1748       break;
1749     }
1750     case tok::at:
1751       nextToken();
1752       if (FormatTok->Tok.is(tok::l_brace)) {
1753         nextToken();
1754         parseBracedList();
1755       }
1756       break;
1757     default:
1758       nextToken();
1759       break;
1760     }
1761   } while (!eof());
1762 }
1763
1764 void UnwrappedLineParser::parseIfThenElse() {
1765   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1766   nextToken();
1767   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1768     nextToken();
1769   if (FormatTok->Tok.is(tok::l_paren))
1770     parseParens();
1771   bool NeedsUnwrappedLine = false;
1772   if (FormatTok->Tok.is(tok::l_brace)) {
1773     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1774     parseBlock(/*MustBeDeclaration=*/false);
1775     if (Style.BraceWrapping.BeforeElse)
1776       addUnwrappedLine();
1777     else
1778       NeedsUnwrappedLine = true;
1779   } else {
1780     addUnwrappedLine();
1781     ++Line->Level;
1782     parseStructuralElement();
1783     --Line->Level;
1784   }
1785   if (FormatTok->Tok.is(tok::kw_else)) {
1786     nextToken();
1787     if (FormatTok->Tok.is(tok::l_brace)) {
1788       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1789       parseBlock(/*MustBeDeclaration=*/false);
1790       addUnwrappedLine();
1791     } else if (FormatTok->Tok.is(tok::kw_if)) {
1792       parseIfThenElse();
1793     } else {
1794       addUnwrappedLine();
1795       ++Line->Level;
1796       parseStructuralElement();
1797       if (FormatTok->is(tok::eof))
1798         addUnwrappedLine();
1799       --Line->Level;
1800     }
1801   } else if (NeedsUnwrappedLine) {
1802     addUnwrappedLine();
1803   }
1804 }
1805
1806 void UnwrappedLineParser::parseTryCatch() {
1807   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1808   nextToken();
1809   bool NeedsUnwrappedLine = false;
1810   if (FormatTok->is(tok::colon)) {
1811     // We are in a function try block, what comes is an initializer list.
1812     nextToken();
1813     while (FormatTok->is(tok::identifier)) {
1814       nextToken();
1815       if (FormatTok->is(tok::l_paren))
1816         parseParens();
1817       if (FormatTok->is(tok::comma))
1818         nextToken();
1819     }
1820   }
1821   // Parse try with resource.
1822   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1823     parseParens();
1824   }
1825   if (FormatTok->is(tok::l_brace)) {
1826     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1827     parseBlock(/*MustBeDeclaration=*/false);
1828     if (Style.BraceWrapping.BeforeCatch) {
1829       addUnwrappedLine();
1830     } else {
1831       NeedsUnwrappedLine = true;
1832     }
1833   } else if (!FormatTok->is(tok::kw_catch)) {
1834     // The C++ standard requires a compound-statement after a try.
1835     // If there's none, we try to assume there's a structuralElement
1836     // and try to continue.
1837     addUnwrappedLine();
1838     ++Line->Level;
1839     parseStructuralElement();
1840     --Line->Level;
1841   }
1842   while (1) {
1843     if (FormatTok->is(tok::at))
1844       nextToken();
1845     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1846                              tok::kw___finally) ||
1847           ((Style.Language == FormatStyle::LK_Java ||
1848             Style.Language == FormatStyle::LK_JavaScript) &&
1849            FormatTok->is(Keywords.kw_finally)) ||
1850           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1851            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1852       break;
1853     nextToken();
1854     while (FormatTok->isNot(tok::l_brace)) {
1855       if (FormatTok->is(tok::l_paren)) {
1856         parseParens();
1857         continue;
1858       }
1859       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1860         return;
1861       nextToken();
1862     }
1863     NeedsUnwrappedLine = false;
1864     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1865     parseBlock(/*MustBeDeclaration=*/false);
1866     if (Style.BraceWrapping.BeforeCatch)
1867       addUnwrappedLine();
1868     else
1869       NeedsUnwrappedLine = true;
1870   }
1871   if (NeedsUnwrappedLine)
1872     addUnwrappedLine();
1873 }
1874
1875 void UnwrappedLineParser::parseNamespace() {
1876   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1877          "'namespace' expected");
1878
1879   const FormatToken &InitialToken = *FormatTok;
1880   nextToken();
1881   if (InitialToken.is(TT_NamespaceMacro)) {
1882     parseParens();
1883   } else {
1884     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1885                               tok::l_square)) {
1886       if (FormatTok->is(tok::l_square))
1887         parseSquare();
1888       else
1889         nextToken();
1890     }
1891   }
1892   if (FormatTok->Tok.is(tok::l_brace)) {
1893     if (ShouldBreakBeforeBrace(Style, InitialToken))
1894       addUnwrappedLine();
1895
1896     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1897                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1898                      DeclarationScopeStack.size() > 1);
1899     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1900     // Munch the semicolon after a namespace. This is more common than one would
1901     // think. Puttin the semicolon into its own line is very ugly.
1902     if (FormatTok->Tok.is(tok::semi))
1903       nextToken();
1904     addUnwrappedLine();
1905   }
1906   // FIXME: Add error handling.
1907 }
1908
1909 void UnwrappedLineParser::parseNew() {
1910   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1911   nextToken();
1912   if (Style.Language != FormatStyle::LK_Java)
1913     return;
1914
1915   // In Java, we can parse everything up to the parens, which aren't optional.
1916   do {
1917     // There should not be a ;, { or } before the new's open paren.
1918     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1919       return;
1920
1921     // Consume the parens.
1922     if (FormatTok->is(tok::l_paren)) {
1923       parseParens();
1924
1925       // If there is a class body of an anonymous class, consume that as child.
1926       if (FormatTok->is(tok::l_brace))
1927         parseChildBlock();
1928       return;
1929     }
1930     nextToken();
1931   } while (!eof());
1932 }
1933
1934 void UnwrappedLineParser::parseForOrWhileLoop() {
1935   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1936          "'for', 'while' or foreach macro expected");
1937   nextToken();
1938   // JS' for await ( ...
1939   if (Style.Language == FormatStyle::LK_JavaScript &&
1940       FormatTok->is(Keywords.kw_await))
1941     nextToken();
1942   if (FormatTok->Tok.is(tok::l_paren))
1943     parseParens();
1944   if (FormatTok->Tok.is(tok::l_brace)) {
1945     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1946     parseBlock(/*MustBeDeclaration=*/false);
1947     addUnwrappedLine();
1948   } else {
1949     addUnwrappedLine();
1950     ++Line->Level;
1951     parseStructuralElement();
1952     --Line->Level;
1953   }
1954 }
1955
1956 void UnwrappedLineParser::parseDoWhile() {
1957   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1958   nextToken();
1959   if (FormatTok->Tok.is(tok::l_brace)) {
1960     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1961     parseBlock(/*MustBeDeclaration=*/false);
1962     if (Style.BraceWrapping.IndentBraces)
1963       addUnwrappedLine();
1964   } else {
1965     addUnwrappedLine();
1966     ++Line->Level;
1967     parseStructuralElement();
1968     --Line->Level;
1969   }
1970
1971   // FIXME: Add error handling.
1972   if (!FormatTok->Tok.is(tok::kw_while)) {
1973     addUnwrappedLine();
1974     return;
1975   }
1976
1977   nextToken();
1978   parseStructuralElement();
1979 }
1980
1981 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
1982   nextToken();
1983   unsigned OldLineLevel = Line->Level;
1984   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1985     --Line->Level;
1986   if (LeftAlignLabel)
1987     Line->Level = 0;
1988   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1989     CompoundStatementIndenter Indenter(this, Line->Level,
1990                                        Style.BraceWrapping.AfterCaseLabel,
1991                                        Style.BraceWrapping.IndentBraces);
1992     parseBlock(/*MustBeDeclaration=*/false);
1993     if (FormatTok->Tok.is(tok::kw_break)) {
1994       if (Style.BraceWrapping.AfterControlStatement ==
1995           FormatStyle::BWACS_Always)
1996         addUnwrappedLine();
1997       parseStructuralElement();
1998     }
1999     addUnwrappedLine();
2000   } else {
2001     if (FormatTok->is(tok::semi))
2002       nextToken();
2003     addUnwrappedLine();
2004   }
2005   Line->Level = OldLineLevel;
2006   if (FormatTok->isNot(tok::l_brace)) {
2007     parseStructuralElement();
2008     addUnwrappedLine();
2009   }
2010 }
2011
2012 void UnwrappedLineParser::parseCaseLabel() {
2013   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2014   // FIXME: fix handling of complex expressions here.
2015   do {
2016     nextToken();
2017   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2018   parseLabel();
2019 }
2020
2021 void UnwrappedLineParser::parseSwitch() {
2022   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2023   nextToken();
2024   if (FormatTok->Tok.is(tok::l_paren))
2025     parseParens();
2026   if (FormatTok->Tok.is(tok::l_brace)) {
2027     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2028     parseBlock(/*MustBeDeclaration=*/false);
2029     addUnwrappedLine();
2030   } else {
2031     addUnwrappedLine();
2032     ++Line->Level;
2033     parseStructuralElement();
2034     --Line->Level;
2035   }
2036 }
2037
2038 void UnwrappedLineParser::parseAccessSpecifier() {
2039   nextToken();
2040   // Understand Qt's slots.
2041   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2042     nextToken();
2043   // Otherwise, we don't know what it is, and we'd better keep the next token.
2044   if (FormatTok->Tok.is(tok::colon))
2045     nextToken();
2046   addUnwrappedLine();
2047 }
2048
2049 bool UnwrappedLineParser::parseEnum() {
2050   // Won't be 'enum' for NS_ENUMs.
2051   if (FormatTok->Tok.is(tok::kw_enum))
2052     nextToken();
2053
2054   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2055   // declarations. An "enum" keyword followed by a colon would be a syntax
2056   // error and thus assume it is just an identifier.
2057   if (Style.Language == FormatStyle::LK_JavaScript &&
2058       FormatTok->isOneOf(tok::colon, tok::question))
2059     return false;
2060
2061   // In protobuf, "enum" can be used as a field name.
2062   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2063     return false;
2064
2065   // Eat up enum class ...
2066   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2067     nextToken();
2068
2069   while (FormatTok->Tok.getIdentifierInfo() ||
2070          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2071                             tok::greater, tok::comma, tok::question)) {
2072     nextToken();
2073     // We can have macros or attributes in between 'enum' and the enum name.
2074     if (FormatTok->is(tok::l_paren))
2075       parseParens();
2076     if (FormatTok->is(tok::identifier)) {
2077       nextToken();
2078       // If there are two identifiers in a row, this is likely an elaborate
2079       // return type. In Java, this can be "implements", etc.
2080       if (Style.isCpp() && FormatTok->is(tok::identifier))
2081         return false;
2082     }
2083   }
2084
2085   // Just a declaration or something is wrong.
2086   if (FormatTok->isNot(tok::l_brace))
2087     return true;
2088   FormatTok->BlockKind = BK_Block;
2089
2090   if (Style.Language == FormatStyle::LK_Java) {
2091     // Java enums are different.
2092     parseJavaEnumBody();
2093     return true;
2094   }
2095   if (Style.Language == FormatStyle::LK_Proto) {
2096     parseBlock(/*MustBeDeclaration=*/true);
2097     return true;
2098   }
2099
2100   // Parse enum body.
2101   nextToken();
2102   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2103   if (HasError) {
2104     if (FormatTok->is(tok::semi))
2105       nextToken();
2106     addUnwrappedLine();
2107   }
2108   return true;
2109
2110   // There is no addUnwrappedLine() here so that we fall through to parsing a
2111   // structural element afterwards. Thus, in "enum A {} n, m;",
2112   // "} n, m;" will end up in one unwrapped line.
2113 }
2114
2115 void UnwrappedLineParser::parseJavaEnumBody() {
2116   // Determine whether the enum is simple, i.e. does not have a semicolon or
2117   // constants with class bodies. Simple enums can be formatted like braced
2118   // lists, contracted to a single line, etc.
2119   unsigned StoredPosition = Tokens->getPosition();
2120   bool IsSimple = true;
2121   FormatToken *Tok = Tokens->getNextToken();
2122   while (Tok) {
2123     if (Tok->is(tok::r_brace))
2124       break;
2125     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2126       IsSimple = false;
2127       break;
2128     }
2129     // FIXME: This will also mark enums with braces in the arguments to enum
2130     // constants as "not simple". This is probably fine in practice, though.
2131     Tok = Tokens->getNextToken();
2132   }
2133   FormatTok = Tokens->setPosition(StoredPosition);
2134
2135   if (IsSimple) {
2136     nextToken();
2137     parseBracedList();
2138     addUnwrappedLine();
2139     return;
2140   }
2141
2142   // Parse the body of a more complex enum.
2143   // First add a line for everything up to the "{".
2144   nextToken();
2145   addUnwrappedLine();
2146   ++Line->Level;
2147
2148   // Parse the enum constants.
2149   while (FormatTok) {
2150     if (FormatTok->is(tok::l_brace)) {
2151       // Parse the constant's class body.
2152       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2153                  /*MunchSemi=*/false);
2154     } else if (FormatTok->is(tok::l_paren)) {
2155       parseParens();
2156     } else if (FormatTok->is(tok::comma)) {
2157       nextToken();
2158       addUnwrappedLine();
2159     } else if (FormatTok->is(tok::semi)) {
2160       nextToken();
2161       addUnwrappedLine();
2162       break;
2163     } else if (FormatTok->is(tok::r_brace)) {
2164       addUnwrappedLine();
2165       break;
2166     } else {
2167       nextToken();
2168     }
2169   }
2170
2171   // Parse the class body after the enum's ";" if any.
2172   parseLevel(/*HasOpeningBrace=*/true);
2173   nextToken();
2174   --Line->Level;
2175   addUnwrappedLine();
2176 }
2177
2178 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2179   const FormatToken &InitialToken = *FormatTok;
2180   nextToken();
2181
2182   // The actual identifier can be a nested name specifier, and in macros
2183   // it is often token-pasted.
2184   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2185                             tok::kw___attribute, tok::kw___declspec,
2186                             tok::kw_alignas) ||
2187          ((Style.Language == FormatStyle::LK_Java ||
2188            Style.Language == FormatStyle::LK_JavaScript) &&
2189           FormatTok->isOneOf(tok::period, tok::comma))) {
2190     if (Style.Language == FormatStyle::LK_JavaScript &&
2191         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2192       // JavaScript/TypeScript supports inline object types in
2193       // extends/implements positions:
2194       //     class Foo implements {bar: number} { }
2195       nextToken();
2196       if (FormatTok->is(tok::l_brace)) {
2197         tryToParseBracedList();
2198         continue;
2199       }
2200     }
2201     bool IsNonMacroIdentifier =
2202         FormatTok->is(tok::identifier) &&
2203         FormatTok->TokenText != FormatTok->TokenText.upper();
2204     nextToken();
2205     // We can have macros or attributes in between 'class' and the class name.
2206     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2207       parseParens();
2208   }
2209
2210   // Note that parsing away template declarations here leads to incorrectly
2211   // accepting function declarations as record declarations.
2212   // In general, we cannot solve this problem. Consider:
2213   // class A<int> B() {}
2214   // which can be a function definition or a class definition when B() is a
2215   // macro. If we find enough real-world cases where this is a problem, we
2216   // can parse for the 'template' keyword in the beginning of the statement,
2217   // and thus rule out the record production in case there is no template
2218   // (this would still leave us with an ambiguity between template function
2219   // and class declarations).
2220   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2221     while (!eof()) {
2222       if (FormatTok->is(tok::l_brace)) {
2223         calculateBraceTypes(/*ExpectClassBody=*/true);
2224         if (!tryToParseBracedList())
2225           break;
2226       }
2227       if (FormatTok->Tok.is(tok::semi))
2228         return;
2229       nextToken();
2230     }
2231   }
2232   if (FormatTok->Tok.is(tok::l_brace)) {
2233     if (ParseAsExpr) {
2234       parseChildBlock();
2235     } else {
2236       if (ShouldBreakBeforeBrace(Style, InitialToken))
2237         addUnwrappedLine();
2238
2239       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2240                  /*MunchSemi=*/false);
2241     }
2242   }
2243   // There is no addUnwrappedLine() here so that we fall through to parsing a
2244   // structural element afterwards. Thus, in "class A {} n, m;",
2245   // "} n, m;" will end up in one unwrapped line.
2246 }
2247
2248 void UnwrappedLineParser::parseObjCMethod() {
2249   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2250          "'(' or identifier expected.");
2251   do {
2252     if (FormatTok->Tok.is(tok::semi)) {
2253       nextToken();
2254       addUnwrappedLine();
2255       return;
2256     } else if (FormatTok->Tok.is(tok::l_brace)) {
2257       if (Style.BraceWrapping.AfterFunction)
2258         addUnwrappedLine();
2259       parseBlock(/*MustBeDeclaration=*/false);
2260       addUnwrappedLine();
2261       return;
2262     } else {
2263       nextToken();
2264     }
2265   } while (!eof());
2266 }
2267
2268 void UnwrappedLineParser::parseObjCProtocolList() {
2269   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2270   do {
2271     nextToken();
2272     // Early exit in case someone forgot a close angle.
2273     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2274         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2275       return;
2276   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2277   nextToken(); // Skip '>'.
2278 }
2279
2280 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2281   do {
2282     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2283       nextToken();
2284       addUnwrappedLine();
2285       break;
2286     }
2287     if (FormatTok->is(tok::l_brace)) {
2288       parseBlock(/*MustBeDeclaration=*/false);
2289       // In ObjC interfaces, nothing should be following the "}".
2290       addUnwrappedLine();
2291     } else if (FormatTok->is(tok::r_brace)) {
2292       // Ignore stray "}". parseStructuralElement doesn't consume them.
2293       nextToken();
2294       addUnwrappedLine();
2295     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2296       nextToken();
2297       parseObjCMethod();
2298     } else {
2299       parseStructuralElement();
2300     }
2301   } while (!eof());
2302 }
2303
2304 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2305   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2306          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2307   nextToken();
2308   nextToken(); // interface name
2309
2310   // @interface can be followed by a lightweight generic
2311   // specialization list, then either a base class or a category.
2312   if (FormatTok->Tok.is(tok::less)) {
2313     // Unlike protocol lists, generic parameterizations support
2314     // nested angles:
2315     //
2316     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2317     //     NSObject <NSCopying, NSSecureCoding>
2318     //
2319     // so we need to count how many open angles we have left.
2320     unsigned NumOpenAngles = 1;
2321     do {
2322       nextToken();
2323       // Early exit in case someone forgot a close angle.
2324       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2325           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2326         break;
2327       if (FormatTok->Tok.is(tok::less))
2328         ++NumOpenAngles;
2329       else if (FormatTok->Tok.is(tok::greater)) {
2330         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2331         --NumOpenAngles;
2332       }
2333     } while (!eof() && NumOpenAngles != 0);
2334     nextToken(); // Skip '>'.
2335   }
2336   if (FormatTok->Tok.is(tok::colon)) {
2337     nextToken();
2338     nextToken(); // base class name
2339   } else if (FormatTok->Tok.is(tok::l_paren))
2340     // Skip category, if present.
2341     parseParens();
2342
2343   if (FormatTok->Tok.is(tok::less))
2344     parseObjCProtocolList();
2345
2346   if (FormatTok->Tok.is(tok::l_brace)) {
2347     if (Style.BraceWrapping.AfterObjCDeclaration)
2348       addUnwrappedLine();
2349     parseBlock(/*MustBeDeclaration=*/true);
2350   }
2351
2352   // With instance variables, this puts '}' on its own line.  Without instance
2353   // variables, this ends the @interface line.
2354   addUnwrappedLine();
2355
2356   parseObjCUntilAtEnd();
2357 }
2358
2359 // Returns true for the declaration/definition form of @protocol,
2360 // false for the expression form.
2361 bool UnwrappedLineParser::parseObjCProtocol() {
2362   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2363   nextToken();
2364
2365   if (FormatTok->is(tok::l_paren))
2366     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2367     return false;
2368
2369   // The definition/declaration form,
2370   // @protocol Foo
2371   // - (int)someMethod;
2372   // @end
2373
2374   nextToken(); // protocol name
2375
2376   if (FormatTok->Tok.is(tok::less))
2377     parseObjCProtocolList();
2378
2379   // Check for protocol declaration.
2380   if (FormatTok->Tok.is(tok::semi)) {
2381     nextToken();
2382     addUnwrappedLine();
2383     return true;
2384   }
2385
2386   addUnwrappedLine();
2387   parseObjCUntilAtEnd();
2388   return true;
2389 }
2390
2391 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2392   bool IsImport = FormatTok->is(Keywords.kw_import);
2393   assert(IsImport || FormatTok->is(tok::kw_export));
2394   nextToken();
2395
2396   // Consume the "default" in "export default class/function".
2397   if (FormatTok->is(tok::kw_default))
2398     nextToken();
2399
2400   // Consume "async function", "function" and "default function", so that these
2401   // get parsed as free-standing JS functions, i.e. do not require a trailing
2402   // semicolon.
2403   if (FormatTok->is(Keywords.kw_async))
2404     nextToken();
2405   if (FormatTok->is(Keywords.kw_function)) {
2406     nextToken();
2407     return;
2408   }
2409
2410   // For imports, `export *`, `export {...}`, consume the rest of the line up
2411   // to the terminating `;`. For everything else, just return and continue
2412   // parsing the structural element, i.e. the declaration or expression for
2413   // `export default`.
2414   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2415       !FormatTok->isStringLiteral())
2416     return;
2417
2418   while (!eof()) {
2419     if (FormatTok->is(tok::semi))
2420       return;
2421     if (Line->Tokens.empty()) {
2422       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2423       // import statement should terminate.
2424       return;
2425     }
2426     if (FormatTok->is(tok::l_brace)) {
2427       FormatTok->BlockKind = BK_Block;
2428       nextToken();
2429       parseBracedList();
2430     } else {
2431       nextToken();
2432     }
2433   }
2434 }
2435
2436 void UnwrappedLineParser::parseStatementMacro() {
2437   nextToken();
2438   if (FormatTok->is(tok::l_paren))
2439     parseParens();
2440   if (FormatTok->is(tok::semi))
2441     nextToken();
2442   addUnwrappedLine();
2443 }
2444
2445 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2446                                                  StringRef Prefix = "") {
2447   llvm::dbgs() << Prefix << "Line(" << Line.Level
2448                << ", FSC=" << Line.FirstStartColumn << ")"
2449                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2450   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2451                                                     E = Line.Tokens.end();
2452        I != E; ++I) {
2453     llvm::dbgs() << I->Tok->Tok.getName() << "["
2454                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2455                  << "] ";
2456   }
2457   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2458                                                     E = Line.Tokens.end();
2459        I != E; ++I) {
2460     const UnwrappedLineNode &Node = *I;
2461     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2462              I = Node.Children.begin(),
2463              E = Node.Children.end();
2464          I != E; ++I) {
2465       printDebugInfo(*I, "\nChild: ");
2466     }
2467   }
2468   llvm::dbgs() << "\n";
2469 }
2470
2471 void UnwrappedLineParser::addUnwrappedLine() {
2472   if (Line->Tokens.empty())
2473     return;
2474   LLVM_DEBUG({
2475     if (CurrentLines == &Lines)
2476       printDebugInfo(*Line);
2477   });
2478   CurrentLines->push_back(std::move(*Line));
2479   Line->Tokens.clear();
2480   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2481   Line->FirstStartColumn = 0;
2482   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2483     CurrentLines->append(
2484         std::make_move_iterator(PreprocessorDirectives.begin()),
2485         std::make_move_iterator(PreprocessorDirectives.end()));
2486     PreprocessorDirectives.clear();
2487   }
2488   // Disconnect the current token from the last token on the previous line.
2489   FormatTok->Previous = nullptr;
2490 }
2491
2492 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2493
2494 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2495   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2496          FormatTok.NewlinesBefore > 0;
2497 }
2498
2499 // Checks if \p FormatTok is a line comment that continues the line comment
2500 // section on \p Line.
2501 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2502                                         const UnwrappedLine &Line,
2503                                         llvm::Regex &CommentPragmasRegex) {
2504   if (Line.Tokens.empty())
2505     return false;
2506
2507   StringRef IndentContent = FormatTok.TokenText;
2508   if (FormatTok.TokenText.startswith("//") ||
2509       FormatTok.TokenText.startswith("/*"))
2510     IndentContent = FormatTok.TokenText.substr(2);
2511   if (CommentPragmasRegex.match(IndentContent))
2512     return false;
2513
2514   // If Line starts with a line comment, then FormatTok continues the comment
2515   // section if its original column is greater or equal to the original start
2516   // column of the line.
2517   //
2518   // Define the min column token of a line as follows: if a line ends in '{' or
2519   // contains a '{' followed by a line comment, then the min column token is
2520   // that '{'. Otherwise, the min column token of the line is the first token of
2521   // the line.
2522   //
2523   // If Line starts with a token other than a line comment, then FormatTok
2524   // continues the comment section if its original column is greater than the
2525   // original start column of the min column token of the line.
2526   //
2527   // For example, the second line comment continues the first in these cases:
2528   //
2529   // // first line
2530   // // second line
2531   //
2532   // and:
2533   //
2534   // // first line
2535   //  // second line
2536   //
2537   // and:
2538   //
2539   // int i; // first line
2540   //  // second line
2541   //
2542   // and:
2543   //
2544   // do { // first line
2545   //      // second line
2546   //   int i;
2547   // } while (true);
2548   //
2549   // and:
2550   //
2551   // enum {
2552   //   a, // first line
2553   //    // second line
2554   //   b
2555   // };
2556   //
2557   // The second line comment doesn't continue the first in these cases:
2558   //
2559   //   // first line
2560   //  // second line
2561   //
2562   // and:
2563   //
2564   // int i; // first line
2565   // // second line
2566   //
2567   // and:
2568   //
2569   // do { // first line
2570   //   // second line
2571   //   int i;
2572   // } while (true);
2573   //
2574   // and:
2575   //
2576   // enum {
2577   //   a, // first line
2578   //   // second line
2579   // };
2580   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2581
2582   // Scan for '{//'. If found, use the column of '{' as a min column for line
2583   // comment section continuation.
2584   const FormatToken *PreviousToken = nullptr;
2585   for (const UnwrappedLineNode &Node : Line.Tokens) {
2586     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2587         isLineComment(*Node.Tok)) {
2588       MinColumnToken = PreviousToken;
2589       break;
2590     }
2591     PreviousToken = Node.Tok;
2592
2593     // Grab the last newline preceding a token in this unwrapped line.
2594     if (Node.Tok->NewlinesBefore > 0) {
2595       MinColumnToken = Node.Tok;
2596     }
2597   }
2598   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2599     MinColumnToken = PreviousToken;
2600   }
2601
2602   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2603                               MinColumnToken);
2604 }
2605
2606 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2607   bool JustComments = Line->Tokens.empty();
2608   for (SmallVectorImpl<FormatToken *>::const_iterator
2609            I = CommentsBeforeNextToken.begin(),
2610            E = CommentsBeforeNextToken.end();
2611        I != E; ++I) {
2612     // Line comments that belong to the same line comment section are put on the
2613     // same line since later we might want to reflow content between them.
2614     // Additional fine-grained breaking of line comment sections is controlled
2615     // by the class BreakableLineCommentSection in case it is desirable to keep
2616     // several line comment sections in the same unwrapped line.
2617     //
2618     // FIXME: Consider putting separate line comment sections as children to the
2619     // unwrapped line instead.
2620     (*I)->ContinuesLineCommentSection =
2621         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2622     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2623       addUnwrappedLine();
2624     pushToken(*I);
2625   }
2626   if (NewlineBeforeNext && JustComments)
2627     addUnwrappedLine();
2628   CommentsBeforeNextToken.clear();
2629 }
2630
2631 void UnwrappedLineParser::nextToken(int LevelDifference) {
2632   if (eof())
2633     return;
2634   flushComments(isOnNewLine(*FormatTok));
2635   pushToken(FormatTok);
2636   FormatToken *Previous = FormatTok;
2637   if (Style.Language != FormatStyle::LK_JavaScript)
2638     readToken(LevelDifference);
2639   else
2640     readTokenWithJavaScriptASI();
2641   FormatTok->Previous = Previous;
2642 }
2643
2644 void UnwrappedLineParser::distributeComments(
2645     const SmallVectorImpl<FormatToken *> &Comments,
2646     const FormatToken *NextTok) {
2647   // Whether or not a line comment token continues a line is controlled by
2648   // the method continuesLineCommentSection, with the following caveat:
2649   //
2650   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2651   // that each comment line from the trail is aligned with the next token, if
2652   // the next token exists. If a trail exists, the beginning of the maximal
2653   // trail is marked as a start of a new comment section.
2654   //
2655   // For example in this code:
2656   //
2657   // int a; // line about a
2658   //   // line 1 about b
2659   //   // line 2 about b
2660   //   int b;
2661   //
2662   // the two lines about b form a maximal trail, so there are two sections, the
2663   // first one consisting of the single comment "// line about a" and the
2664   // second one consisting of the next two comments.
2665   if (Comments.empty())
2666     return;
2667   bool ShouldPushCommentsInCurrentLine = true;
2668   bool HasTrailAlignedWithNextToken = false;
2669   unsigned StartOfTrailAlignedWithNextToken = 0;
2670   if (NextTok) {
2671     // We are skipping the first element intentionally.
2672     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2673       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2674         HasTrailAlignedWithNextToken = true;
2675         StartOfTrailAlignedWithNextToken = i;
2676       }
2677     }
2678   }
2679   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2680     FormatToken *FormatTok = Comments[i];
2681     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2682       FormatTok->ContinuesLineCommentSection = false;
2683     } else {
2684       FormatTok->ContinuesLineCommentSection =
2685           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2686     }
2687     if (!FormatTok->ContinuesLineCommentSection &&
2688         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2689       ShouldPushCommentsInCurrentLine = false;
2690     }
2691     if (ShouldPushCommentsInCurrentLine) {
2692       pushToken(FormatTok);
2693     } else {
2694       CommentsBeforeNextToken.push_back(FormatTok);
2695     }
2696   }
2697 }
2698
2699 void UnwrappedLineParser::readToken(int LevelDifference) {
2700   SmallVector<FormatToken *, 1> Comments;
2701   do {
2702     FormatTok = Tokens->getNextToken();
2703     assert(FormatTok);
2704     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2705            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2706       distributeComments(Comments, FormatTok);
2707       Comments.clear();
2708       // If there is an unfinished unwrapped line, we flush the preprocessor
2709       // directives only after that unwrapped line was finished later.
2710       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2711       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2712       assert((LevelDifference >= 0 ||
2713               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2714              "LevelDifference makes Line->Level negative");
2715       Line->Level += LevelDifference;
2716       // Comments stored before the preprocessor directive need to be output
2717       // before the preprocessor directive, at the same level as the
2718       // preprocessor directive, as we consider them to apply to the directive.
2719       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2720           PPBranchLevel > 0)
2721         Line->Level += PPBranchLevel;
2722       flushComments(isOnNewLine(*FormatTok));
2723       parsePPDirective();
2724     }
2725     while (FormatTok->Type == TT_ConflictStart ||
2726            FormatTok->Type == TT_ConflictEnd ||
2727            FormatTok->Type == TT_ConflictAlternative) {
2728       if (FormatTok->Type == TT_ConflictStart) {
2729         conditionalCompilationStart(/*Unreachable=*/false);
2730       } else if (FormatTok->Type == TT_ConflictAlternative) {
2731         conditionalCompilationAlternative();
2732       } else if (FormatTok->Type == TT_ConflictEnd) {
2733         conditionalCompilationEnd();
2734       }
2735       FormatTok = Tokens->getNextToken();
2736       FormatTok->MustBreakBefore = true;
2737     }
2738
2739     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2740         !Line->InPPDirective) {
2741       continue;
2742     }
2743
2744     if (!FormatTok->Tok.is(tok::comment)) {
2745       distributeComments(Comments, FormatTok);
2746       Comments.clear();
2747       return;
2748     }
2749
2750     Comments.push_back(FormatTok);
2751   } while (!eof());
2752
2753   distributeComments(Comments, nullptr);
2754   Comments.clear();
2755 }
2756
2757 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2758   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2759   if (MustBreakBeforeNextToken) {
2760     Line->Tokens.back().Tok->MustBreakBefore = true;
2761     MustBreakBeforeNextToken = false;
2762   }
2763 }
2764
2765 } // end namespace format
2766 } // end namespace clang