contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #include <algorithm>
  22
  23 #define DEBUG_TYPE "format-parser"
  24
  25 namespace clang {
  26 namespace format {
  27
  28 class FormatTokenSource {
  29 public:
  30   virtual ~FormatTokenSource() {}
  31   virtual FormatToken *getNextToken() = 0;
  32
  33   virtual unsigned getPosition() = 0;
  34   virtual FormatToken *setPosition(unsigned Position) = 0;
  35 };
  36
  37 namespace {
  38
  39 class ScopedDeclarationState {
  40 public:
  41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  42                          bool MustBeDeclaration)
  43       : Line(Line), Stack(Stack) {
  44     Line.MustBeDeclaration = MustBeDeclaration;
  45     Stack.push_back(MustBeDeclaration);
  46   }
  47   ~ScopedDeclarationState() {
  48     Stack.pop_back();
  49     if (!Stack.empty())
  50       Line.MustBeDeclaration = Stack.back();
  51     else
  52       Line.MustBeDeclaration = true;
  53   }
  54
  55 private:
  56   UnwrappedLine &Line;
  57   std::vector<bool> &Stack;
  58 };
  59
  60 static bool isLineComment(const FormatToken &FormatTok) {
  61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  62 }
  63
  64 // Checks if \p FormatTok is a line comment that continues the line comment
  65 // \p Previous. The original column of \p MinColumnToken is used to determine
  66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
  67 static bool continuesLineComment(const FormatToken &FormatTok,
  68                                  const FormatToken *Previous,
  69                                  const FormatToken *MinColumnToken) {
  70   if (!Previous || !MinColumnToken)
  71     return false;
  72   unsigned MinContinueColumn =
  73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  75          isLineComment(*Previous) &&
  76          FormatTok.OriginalColumn >= MinContinueColumn;
  77 }
  78
  79 class ScopedMacroState : public FormatTokenSource {
  80 public:
  81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  82                    FormatToken *&ResetToken)
  83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  85         Token(nullptr), PreviousToken(nullptr) {
  86     FakeEOF.Tok.startToken();
  87     FakeEOF.Tok.setKind(tok::eof);
  88     TokenSource = this;
  89     Line.Level = 0;
  90     Line.InPPDirective = true;
  91   }
  92
  93   ~ScopedMacroState() override {
  94     TokenSource = PreviousTokenSource;
  95     ResetToken = Token;
  96     Line.InPPDirective = false;
  97     Line.Level = PreviousLineLevel;
  98   }
  99
 100   FormatToken *getNextToken() override {
 101     // The \c UnwrappedLineParser guards against this by never calling
 102     // \c getNextToken() after it has encountered the first eof token.
 103     assert(!eof());
 104     PreviousToken = Token;
 105     Token = PreviousTokenSource->getNextToken();
 106     if (eof())
 107       return &FakeEOF;
 108     return Token;
 109   }
 110
 111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
 112
 113   FormatToken *setPosition(unsigned Position) override {
 114     PreviousToken = nullptr;
 115     Token = PreviousTokenSource->setPosition(Position);
 116     return Token;
 117   }
 118
 119 private:
 120   bool eof() {
 121     return Token && Token->HasUnescapedNewline &&
 122            !continuesLineComment(*Token, PreviousToken,
 123                                  /*MinColumnToken=*/PreviousToken);
 124   }
 125
 126   FormatToken FakeEOF;
 127   UnwrappedLine &Line;
 128   FormatTokenSource *&TokenSource;
 129   FormatToken *&ResetToken;
 130   unsigned PreviousLineLevel;
 131   FormatTokenSource *PreviousTokenSource;
 132
 133   FormatToken *Token;
 134   FormatToken *PreviousToken;
 135 };
 136
 137 } // end anonymous namespace
 138
 139 class ScopedLineState {
 140 public:
 141   ScopedLineState(UnwrappedLineParser &Parser,
 142                   bool SwitchToPreprocessorLines = false)
 143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 144     if (SwitchToPreprocessorLines)
 145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 146     else if (!Parser.Line->Tokens.empty())
 147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 148     PreBlockLine = std::move(Parser.Line);
 149     Parser.Line = llvm::make_unique<UnwrappedLine>();
 150     Parser.Line->Level = PreBlockLine->Level;
 151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 152   }
 153
 154   ~ScopedLineState() {
 155     if (!Parser.Line->Tokens.empty()) {
 156       Parser.addUnwrappedLine();
 157     }
 158     assert(Parser.Line->Tokens.empty());
 159     Parser.Line = std::move(PreBlockLine);
 160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 161       Parser.MustBreakBeforeNextToken = true;
 162     Parser.CurrentLines = OriginalLines;
 163   }
 164
 165 private:
 166   UnwrappedLineParser &Parser;
 167
 168   std::unique_ptr<UnwrappedLine> PreBlockLine;
 169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 170 };
 171
 172 class CompoundStatementIndenter {
 173 public:
 174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 175                             const FormatStyle &Style, unsigned &LineLevel)
 176       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 177     if (Style.BraceWrapping.AfterControlStatement)
 178       Parser->addUnwrappedLine();
 179     if (Style.BraceWrapping.IndentBraces)
 180       ++LineLevel;
 181   }
 182   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 183
 184 private:
 185   unsigned &LineLevel;
 186   unsigned OldLineLevel;
 187 };
 188
 189 namespace {
 190
 191 class IndexedTokenSource : public FormatTokenSource {
 192 public:
 193   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 194       : Tokens(Tokens), Position(-1) {}
 195
 196   FormatToken *getNextToken() override {
 197     ++Position;
 198     return Tokens[Position];
 199   }
 200
 201   unsigned getPosition() override {
 202     assert(Position >= 0);
 203     return Position;
 204   }
 205
 206   FormatToken *setPosition(unsigned P) override {
 207     Position = P;
 208     return Tokens[Position];
 209   }
 210
 211   void reset() { Position = -1; }
 212
 213 private:
 214   ArrayRef<FormatToken *> Tokens;
 215   int Position;
 216 };
 217
 218 } // end anonymous namespace
 219
 220 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 221                                          const AdditionalKeywords &Keywords,
 222                                          unsigned FirstStartColumn,
 223                                          ArrayRef<FormatToken *> Tokens,
 224                                          UnwrappedLineConsumer &Callback)
 225     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 226       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 227       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 228       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 229       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 230                        ? IG_Rejected
 231                        : IG_Inited),
 232       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
 233
 234 void UnwrappedLineParser::reset() {
 235   PPBranchLevel = -1;
 236   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 237                      ? IG_Rejected
 238                      : IG_Inited;
 239   IncludeGuardToken = nullptr;
 240   Line.reset(new UnwrappedLine);
 241   CommentsBeforeNextToken.clear();
 242   FormatTok = nullptr;
 243   MustBreakBeforeNextToken = false;
 244   PreprocessorDirectives.clear();
 245   CurrentLines = &Lines;
 246   DeclarationScopeStack.clear();
 247   PPStack.clear();
 248   Line->FirstStartColumn = FirstStartColumn;
 249 }
 250
 251 void UnwrappedLineParser::parse() {
 252   IndexedTokenSource TokenSource(AllTokens);
 253   Line->FirstStartColumn = FirstStartColumn;
 254   do {
 255     LLVM_DEBUG(llvm::dbgs() << "----\n");
 256     reset();
 257     Tokens = &TokenSource;
 258     TokenSource.reset();
 259
 260     readToken();
 261     parseFile();
 262
 263     // If we found an include guard then all preprocessor directives (other than
 264     // the guard) are over-indented by one.
 265     if (IncludeGuard == IG_Found)
 266       for (auto &Line : Lines)
 267         if (Line.InPPDirective && Line.Level > 0)
 268           --Line.Level;
 269
 270     // Create line with eof token.
 271     pushToken(FormatTok);
 272     addUnwrappedLine();
 273
 274     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 275                                                   E = Lines.end();
 276          I != E; ++I) {
 277       Callback.consumeUnwrappedLine(*I);
 278     }
 279     Callback.finishRun();
 280     Lines.clear();
 281     while (!PPLevelBranchIndex.empty() &&
 282            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 283       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 284       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 285     }
 286     if (!PPLevelBranchIndex.empty()) {
 287       ++PPLevelBranchIndex.back();
 288       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 289       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 290     }
 291   } while (!PPLevelBranchIndex.empty());
 292 }
 293
 294 void UnwrappedLineParser::parseFile() {
 295   // The top-level context in a file always has declarations, except for pre-
 296   // processor directives and JavaScript files.
 297   bool MustBeDeclaration =
 298       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 299   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 300                                           MustBeDeclaration);
 301   if (Style.Language == FormatStyle::LK_TextProto)
 302     parseBracedList();
 303   else
 304     parseLevel(/*HasOpeningBrace=*/false);
 305   // Make sure to format the remaining tokens.
 306   //
 307   // LK_TextProto is special since its top-level is parsed as the body of a
 308   // braced list, which does not necessarily have natural line separators such
 309   // as a semicolon. Comments after the last entry that have been determined to
 310   // not belong to that line, as in:
 311   //   key: value
 312   //   // endfile comment
 313   // do not have a chance to be put on a line of their own until this point.
 314   // Here we add this newline before end-of-file comments.
 315   if (Style.Language == FormatStyle::LK_TextProto &&
 316       !CommentsBeforeNextToken.empty())
 317     addUnwrappedLine();
 318   flushComments(true);
 319   addUnwrappedLine();
 320 }
 321
 322 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 323   bool SwitchLabelEncountered = false;
 324   do {
 325     tok::TokenKind kind = FormatTok->Tok.getKind();
 326     if (FormatTok->Type == TT_MacroBlockBegin) {
 327       kind = tok::l_brace;
 328     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 329       kind = tok::r_brace;
 330     }
 331
 332     switch (kind) {
 333     case tok::comment:
 334       nextToken();
 335       addUnwrappedLine();
 336       break;
 337     case tok::l_brace:
 338       // FIXME: Add parameter whether this can happen - if this happens, we must
 339       // be in a non-declaration context.
 340       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
 341         continue;
 342       parseBlock(/*MustBeDeclaration=*/false);
 343       addUnwrappedLine();
 344       break;
 345     case tok::r_brace:
 346       if (HasOpeningBrace)
 347         return;
 348       nextToken();
 349       addUnwrappedLine();
 350       break;
 351     case tok::kw_default: {
 352       unsigned StoredPosition = Tokens->getPosition();
 353       FormatToken *Next = Tokens->getNextToken();
 354       FormatTok = Tokens->setPosition(StoredPosition);
 355       if (Next && Next->isNot(tok::colon)) {
 356         // default not followed by ':' is not a case label; treat it like
 357         // an identifier.
 358         parseStructuralElement();
 359         break;
 360       }
 361       // Else, if it is 'default:', fall through to the case handling.
 362       LLVM_FALLTHROUGH;
 363     }
 364     case tok::kw_case:
 365       if (Style.Language == FormatStyle::LK_JavaScript &&
 366           Line->MustBeDeclaration) {
 367         // A 'case: string' style field declaration.
 368         parseStructuralElement();
 369         break;
 370       }
 371       if (!SwitchLabelEncountered &&
 372           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 373         ++Line->Level;
 374       SwitchLabelEncountered = true;
 375       parseStructuralElement();
 376       break;
 377     default:
 378       parseStructuralElement();
 379       break;
 380     }
 381   } while (!eof());
 382 }
 383
 384 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 385   // We'll parse forward through the tokens until we hit
 386   // a closing brace or eof - note that getNextToken() will
 387   // parse macros, so this will magically work inside macro
 388   // definitions, too.
 389   unsigned StoredPosition = Tokens->getPosition();
 390   FormatToken *Tok = FormatTok;
 391   const FormatToken *PrevTok = Tok->Previous;
 392   // Keep a stack of positions of lbrace tokens. We will
 393   // update information about whether an lbrace starts a
 394   // braced init list or a different block during the loop.
 395   SmallVector<FormatToken *, 8> LBraceStack;
 396   assert(Tok->Tok.is(tok::l_brace));
 397   do {
 398     // Get next non-comment token.
 399     FormatToken *NextTok;
 400     unsigned ReadTokens = 0;
 401     do {
 402       NextTok = Tokens->getNextToken();
 403       ++ReadTokens;
 404     } while (NextTok->is(tok::comment));
 405
 406     switch (Tok->Tok.getKind()) {
 407     case tok::l_brace:
 408       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
 409         if (PrevTok->isOneOf(tok::colon, tok::less))
 410           // A ':' indicates this code is in a type, or a braced list
 411           // following a label in an object literal ({a: {b: 1}}).
 412           // A '<' could be an object used in a comparison, but that is nonsense
 413           // code (can never return true), so more likely it is a generic type
 414           // argument (`X<{a: string; b: number}>`).
 415           // The code below could be confused by semicolons between the
 416           // individual members in a type member list, which would normally
 417           // trigger BK_Block. In both cases, this must be parsed as an inline
 418           // braced init.
 419           Tok->BlockKind = BK_BracedInit;
 420         else if (PrevTok->is(tok::r_paren))
 421           // `) { }` can only occur in function or method declarations in JS.
 422           Tok->BlockKind = BK_Block;
 423       } else {
 424         Tok->BlockKind = BK_Unknown;
 425       }
 426       LBraceStack.push_back(Tok);
 427       break;
 428     case tok::r_brace:
 429       if (LBraceStack.empty())
 430         break;
 431       if (LBraceStack.back()->BlockKind == BK_Unknown) {
 432         bool ProbablyBracedList = false;
 433         if (Style.Language == FormatStyle::LK_Proto) {
 434           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 435         } else {
 436           // Using OriginalColumn to distinguish between ObjC methods and
 437           // binary operators is a bit hacky.
 438           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 439                                   NextTok->OriginalColumn == 0;
 440
 441           // If there is a comma, semicolon or right paren after the closing
 442           // brace, we assume this is a braced initializer list.  Note that
 443           // regardless how we mark inner braces here, we will overwrite the
 444           // BlockKind later if we parse a braced list (where all blocks
 445           // inside are by default braced lists), or when we explicitly detect
 446           // blocks (for example while parsing lambdas).
 447           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 448           // braced list in JS.
 449           ProbablyBracedList =
 450               (Style.Language == FormatStyle::LK_JavaScript &&
 451                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 452                                 Keywords.kw_as)) ||
 453               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
 454               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 455                                tok::r_paren, tok::r_square, tok::l_brace,
 456                                tok::ellipsis) ||
 457               (NextTok->is(tok::identifier) &&
 458                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
 459               (NextTok->is(tok::semi) &&
 460                (!ExpectClassBody || LBraceStack.size() != 1)) ||
 461               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 462           if (NextTok->is(tok::l_square)) {
 463             // We can have an array subscript after a braced init
 464             // list, but C++11 attributes are expected after blocks.
 465             NextTok = Tokens->getNextToken();
 466             ++ReadTokens;
 467             ProbablyBracedList = NextTok->isNot(tok::l_square);
 468           }
 469         }
 470         if (ProbablyBracedList) {
 471           Tok->BlockKind = BK_BracedInit;
 472           LBraceStack.back()->BlockKind = BK_BracedInit;
 473         } else {
 474           Tok->BlockKind = BK_Block;
 475           LBraceStack.back()->BlockKind = BK_Block;
 476         }
 477       }
 478       LBraceStack.pop_back();
 479       break;
 480     case tok::at:
 481     case tok::semi:
 482     case tok::kw_if:
 483     case tok::kw_while:
 484     case tok::kw_for:
 485     case tok::kw_switch:
 486     case tok::kw_try:
 487     case tok::kw___try:
 488       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
 489         LBraceStack.back()->BlockKind = BK_Block;
 490       break;
 491     default:
 492       break;
 493     }
 494     PrevTok = Tok;
 495     Tok = NextTok;
 496   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 497
 498   // Assume other blocks for all unclosed opening braces.
 499   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 500     if (LBraceStack[i]->BlockKind == BK_Unknown)
 501       LBraceStack[i]->BlockKind = BK_Block;
 502   }
 503
 504   FormatTok = Tokens->setPosition(StoredPosition);
 505 }
 506
 507 template <class T>
 508 static inline void hash_combine(std::size_t &seed, const T &v) {
 509   std::hash<T> hasher;
 510   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 511 }
 512
 513 size_t UnwrappedLineParser::computePPHash() const {
 514   size_t h = 0;
 515   for (const auto &i : PPStack) {
 516     hash_combine(h, size_t(i.Kind));
 517     hash_combine(h, i.Line);
 518   }
 519   return h;
 520 }
 521
 522 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 523                                      bool MunchSemi) {
 524   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 525          "'{' or macro block token expected");
 526   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 527   FormatTok->BlockKind = BK_Block;
 528
 529   size_t PPStartHash = computePPHash();
 530
 531   unsigned InitialLevel = Line->Level;
 532   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
 533
 534   if (MacroBlock && FormatTok->is(tok::l_paren))
 535     parseParens();
 536
 537   size_t NbPreprocessorDirectives =
 538       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
 539   addUnwrappedLine();
 540   size_t OpeningLineIndex =
 541       CurrentLines->empty()
 542           ? (UnwrappedLine::kInvalidIndex)
 543           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 544
 545   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 546                                           MustBeDeclaration);
 547   if (AddLevel)
 548     ++Line->Level;
 549   parseLevel(/*HasOpeningBrace=*/true);
 550
 551   if (eof())
 552     return;
 553
 554   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 555                  : !FormatTok->is(tok::r_brace)) {
 556     Line->Level = InitialLevel;
 557     FormatTok->BlockKind = BK_Block;
 558     return;
 559   }
 560
 561   size_t PPEndHash = computePPHash();
 562
 563   // Munch the closing brace.
 564   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
 565
 566   if (MacroBlock && FormatTok->is(tok::l_paren))
 567     parseParens();
 568
 569   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 570     nextToken();
 571   Line->Level = InitialLevel;
 572
 573   if (PPStartHash == PPEndHash) {
 574     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 575     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 576       // Update the opening line to add the forward reference as well
 577       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 578           CurrentLines->size() - 1;
 579     }
 580   }
 581 }
 582
 583 static bool isGoogScope(const UnwrappedLine &Line) {
 584   // FIXME: Closure-library specific stuff should not be hard-coded but be
 585   // configurable.
 586   if (Line.Tokens.size() < 4)
 587     return false;
 588   auto I = Line.Tokens.begin();
 589   if (I->Tok->TokenText != "goog")
 590     return false;
 591   ++I;
 592   if (I->Tok->isNot(tok::period))
 593     return false;
 594   ++I;
 595   if (I->Tok->TokenText != "scope")
 596     return false;
 597   ++I;
 598   return I->Tok->is(tok::l_paren);
 599 }
 600
 601 static bool isIIFE(const UnwrappedLine &Line,
 602                    const AdditionalKeywords &Keywords) {
 603   // Look for the start of an immediately invoked anonymous function.
 604   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 605   // This is commonly done in JavaScript to create a new, anonymous scope.
 606   // Example: (function() { ... })()
 607   if (Line.Tokens.size() < 3)
 608     return false;
 609   auto I = Line.Tokens.begin();
 610   if (I->Tok->isNot(tok::l_paren))
 611     return false;
 612   ++I;
 613   if (I->Tok->isNot(Keywords.kw_function))
 614     return false;
 615   ++I;
 616   return I->Tok->is(tok::l_paren);
 617 }
 618
 619 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 620                                    const FormatToken &InitialToken) {
 621   if (InitialToken.is(tok::kw_namespace))
 622     return Style.BraceWrapping.AfterNamespace;
 623   if (InitialToken.is(tok::kw_class))
 624     return Style.BraceWrapping.AfterClass;
 625   if (InitialToken.is(tok::kw_union))
 626     return Style.BraceWrapping.AfterUnion;
 627   if (InitialToken.is(tok::kw_struct))
 628     return Style.BraceWrapping.AfterStruct;
 629   return false;
 630 }
 631
 632 void UnwrappedLineParser::parseChildBlock() {
 633   FormatTok->BlockKind = BK_Block;
 634   nextToken();
 635   {
 636     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
 637                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 638     ScopedLineState LineState(*this);
 639     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 640                                             /*MustBeDeclaration=*/false);
 641     Line->Level += SkipIndent ? 0 : 1;
 642     parseLevel(/*HasOpeningBrace=*/true);
 643     flushComments(isOnNewLine(*FormatTok));
 644     Line->Level -= SkipIndent ? 0 : 1;
 645   }
 646   nextToken();
 647 }
 648
 649 void UnwrappedLineParser::parsePPDirective() {
 650   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 651   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 652   nextToken();
 653
 654   if (!FormatTok->Tok.getIdentifierInfo()) {
 655     parsePPUnknown();
 656     return;
 657   }
 658
 659   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 660   case tok::pp_define:
 661     parsePPDefine();
 662     return;
 663   case tok::pp_if:
 664     parsePPIf(/*IfDef=*/false);
 665     break;
 666   case tok::pp_ifdef:
 667   case tok::pp_ifndef:
 668     parsePPIf(/*IfDef=*/true);
 669     break;
 670   case tok::pp_else:
 671     parsePPElse();
 672     break;
 673   case tok::pp_elif:
 674     parsePPElIf();
 675     break;
 676   case tok::pp_endif:
 677     parsePPEndIf();
 678     break;
 679   default:
 680     parsePPUnknown();
 681     break;
 682   }
 683 }
 684
 685 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 686   size_t Line = CurrentLines->size();
 687   if (CurrentLines == &PreprocessorDirectives)
 688     Line += Lines.size();
 689
 690   if (Unreachable ||
 691       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
 692     PPStack.push_back({PP_Unreachable, Line});
 693   else
 694     PPStack.push_back({PP_Conditional, Line});
 695 }
 696
 697 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 698   ++PPBranchLevel;
 699   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 700   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 701     PPLevelBranchIndex.push_back(0);
 702     PPLevelBranchCount.push_back(0);
 703   }
 704   PPChainBranchIndex.push(0);
 705   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 706   conditionalCompilationCondition(Unreachable || Skip);
 707 }
 708
 709 void UnwrappedLineParser::conditionalCompilationAlternative() {
 710   if (!PPStack.empty())
 711     PPStack.pop_back();
 712   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 713   if (!PPChainBranchIndex.empty())
 714     ++PPChainBranchIndex.top();
 715   conditionalCompilationCondition(
 716       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 717       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 718 }
 719
 720 void UnwrappedLineParser::conditionalCompilationEnd() {
 721   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 722   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 723     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 724       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 725     }
 726   }
 727   // Guard against #endif's without #if.
 728   if (PPBranchLevel > -1)
 729     --PPBranchLevel;
 730   if (!PPChainBranchIndex.empty())
 731     PPChainBranchIndex.pop();
 732   if (!PPStack.empty())
 733     PPStack.pop_back();
 734 }
 735
 736 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 737   bool IfNDef = FormatTok->is(tok::pp_ifndef);
 738   nextToken();
 739   bool Unreachable = false;
 740   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
 741     Unreachable = true;
 742   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
 743     Unreachable = true;
 744   conditionalCompilationStart(Unreachable);
 745   FormatToken *IfCondition = FormatTok;
 746   // If there's a #ifndef on the first line, and the only lines before it are
 747   // comments, it could be an include guard.
 748   bool MaybeIncludeGuard = IfNDef;
 749   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
 750     for (auto &Line : Lines) {
 751       if (!Line.Tokens.front().Tok->is(tok::comment)) {
 752         MaybeIncludeGuard = false;
 753         IncludeGuard = IG_Rejected;
 754         break;
 755       }
 756     }
 757   --PPBranchLevel;
 758   parsePPUnknown();
 759   ++PPBranchLevel;
 760   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
 761     IncludeGuard = IG_IfNdefed;
 762     IncludeGuardToken = IfCondition;
 763   }
 764 }
 765
 766 void UnwrappedLineParser::parsePPElse() {
 767   // If a potential include guard has an #else, it's not an include guard.
 768   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
 769     IncludeGuard = IG_Rejected;
 770   conditionalCompilationAlternative();
 771   if (PPBranchLevel > -1)
 772     --PPBranchLevel;
 773   parsePPUnknown();
 774   ++PPBranchLevel;
 775 }
 776
 777 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 778
 779 void UnwrappedLineParser::parsePPEndIf() {
 780   conditionalCompilationEnd();
 781   parsePPUnknown();
 782   // If the #endif of a potential include guard is the last thing in the file,
 783   // then we found an include guard.
 784   unsigned TokenPosition = Tokens->getPosition();
 785   FormatToken *PeekNext = AllTokens[TokenPosition];
 786   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
 787       PeekNext->is(tok::eof) &&
 788       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 789     IncludeGuard = IG_Found;
 790 }
 791
 792 void UnwrappedLineParser::parsePPDefine() {
 793   nextToken();
 794
 795   if (FormatTok->Tok.getKind() != tok::identifier) {
 796     IncludeGuard = IG_Rejected;
 797     IncludeGuardToken = nullptr;
 798     parsePPUnknown();
 799     return;
 800   }
 801
 802   if (IncludeGuard == IG_IfNdefed &&
 803       IncludeGuardToken->TokenText == FormatTok->TokenText) {
 804     IncludeGuard = IG_Defined;
 805     IncludeGuardToken = nullptr;
 806     for (auto &Line : Lines) {
 807       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
 808         IncludeGuard = IG_Rejected;
 809         break;
 810       }
 811     }
 812   }
 813
 814   nextToken();
 815   if (FormatTok->Tok.getKind() == tok::l_paren &&
 816       FormatTok->WhitespaceRange.getBegin() ==
 817           FormatTok->WhitespaceRange.getEnd()) {
 818     parseParens();
 819   }
 820   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
 821     Line->Level += PPBranchLevel + 1;
 822   addUnwrappedLine();
 823   ++Line->Level;
 824
 825   // Errors during a preprocessor directive can only affect the layout of the
 826   // preprocessor directive, and thus we ignore them. An alternative approach
 827   // would be to use the same approach we use on the file level (no
 828   // re-indentation if there was a structural error) within the macro
 829   // definition.
 830   parseFile();
 831 }
 832
 833 void UnwrappedLineParser::parsePPUnknown() {
 834   do {
 835     nextToken();
 836   } while (!eof());
 837   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
 838     Line->Level += PPBranchLevel + 1;
 839   addUnwrappedLine();
 840 }
 841
 842 // Here we blacklist certain tokens that are not usually the first token in an
 843 // unwrapped line. This is used in attempt to distinguish macro calls without
 844 // trailing semicolons from other constructs split to several lines.
 845 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 846   // Semicolon can be a null-statement, l_square can be a start of a macro or
 847   // a C++11 attribute, but this doesn't seem to be common.
 848   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 849          Tok.isNot(tok::l_square) &&
 850          // Tokens that can only be used as binary operators and a part of
 851          // overloaded operator names.
 852          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 853          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 854          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 855          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 856          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 857          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 858          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 859          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 860          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 861          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 862          Tok.isNot(tok::lesslessequal) &&
 863          // Colon is used in labels, base class lists, initializer lists,
 864          // range-based for loops, ternary operator, but should never be the
 865          // first token in an unwrapped line.
 866          Tok.isNot(tok::colon) &&
 867          // 'noexcept' is a trailing annotation.
 868          Tok.isNot(tok::kw_noexcept);
 869 }
 870
 871 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
 872                           const FormatToken *FormatTok) {
 873   // FIXME: This returns true for C/C++ keywords like 'struct'.
 874   return FormatTok->is(tok::identifier) &&
 875          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
 876           !FormatTok->isOneOf(
 877               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
 878               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
 879               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
 880               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
 881               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
 882               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
 883               Keywords.kw_from));
 884 }
 885
 886 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
 887                                  const FormatToken *FormatTok) {
 888   return FormatTok->Tok.isLiteral() ||
 889          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
 890          mustBeJSIdent(Keywords, FormatTok);
 891 }
 892
 893 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
 894 // when encountered after a value (see mustBeJSIdentOrValue).
 895 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
 896                            const FormatToken *FormatTok) {
 897   return FormatTok->isOneOf(
 898       tok::kw_return, Keywords.kw_yield,
 899       // conditionals
 900       tok::kw_if, tok::kw_else,
 901       // loops
 902       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
 903       // switch/case
 904       tok::kw_switch, tok::kw_case,
 905       // exceptions
 906       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
 907       // declaration
 908       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
 909       Keywords.kw_async, Keywords.kw_function,
 910       // import/export
 911       Keywords.kw_import, tok::kw_export);
 912 }
 913
 914 // readTokenWithJavaScriptASI reads the next token and terminates the current
 915 // line if JavaScript Automatic Semicolon Insertion must
 916 // happen between the current token and the next token.
 917 //
 918 // This method is conservative - it cannot cover all edge cases of JavaScript,
 919 // but only aims to correctly handle certain well known cases. It *must not*
 920 // return true in speculative cases.
 921 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 922   FormatToken *Previous = FormatTok;
 923   readToken();
 924   FormatToken *Next = FormatTok;
 925
 926   bool IsOnSameLine =
 927       CommentsBeforeNextToken.empty()
 928           ? Next->NewlinesBefore == 0
 929           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
 930   if (IsOnSameLine)
 931     return;
 932
 933   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
 934   bool PreviousStartsTemplateExpr =
 935       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
 936   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
 937     // If the line contains an '@' sign, the previous token might be an
 938     // annotation, which can precede another identifier/value.
 939     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
 940                               [](UnwrappedLineNode &LineNode) {
 941                                 return LineNode.Tok->is(tok::at);
 942                               }) != Line->Tokens.end();
 943     if (HasAt)
 944       return;
 945   }
 946   if (Next->is(tok::exclaim) && PreviousMustBeValue)
 947     return addUnwrappedLine();
 948   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
 949   bool NextEndsTemplateExpr =
 950       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
 951   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
 952       (PreviousMustBeValue ||
 953        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
 954                          tok::minusminus)))
 955     return addUnwrappedLine();
 956   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
 957       isJSDeclOrStmt(Keywords, Next))
 958     return addUnwrappedLine();
 959 }
 960
 961 void UnwrappedLineParser::parseStructuralElement() {
 962   assert(!FormatTok->is(tok::l_brace));
 963   if (Style.Language == FormatStyle::LK_TableGen &&
 964       FormatTok->is(tok::pp_include)) {
 965     nextToken();
 966     if (FormatTok->is(tok::string_literal))
 967       nextToken();
 968     addUnwrappedLine();
 969     return;
 970   }
 971   switch (FormatTok->Tok.getKind()) {
 972   case tok::kw_asm:
 973     nextToken();
 974     if (FormatTok->is(tok::l_brace)) {
 975       FormatTok->Type = TT_InlineASMBrace;
 976       nextToken();
 977       while (FormatTok && FormatTok->isNot(tok::eof)) {
 978         if (FormatTok->is(tok::r_brace)) {
 979           FormatTok->Type = TT_InlineASMBrace;
 980           nextToken();
 981           addUnwrappedLine();
 982           break;
 983         }
 984         FormatTok->Finalized = true;
 985         nextToken();
 986       }
 987     }
 988     break;
 989   case tok::kw_namespace:
 990     parseNamespace();
 991     return;
 992   case tok::kw_inline:
 993     nextToken();
 994     if (FormatTok->Tok.is(tok::kw_namespace)) {
 995       parseNamespace();
 996       return;
 997     }
 998     break;
 999   case tok::kw_public:
1000   case tok::kw_protected:
1001   case tok::kw_private:
1002     if (Style.Language == FormatStyle::LK_Java ||
1003         Style.Language == FormatStyle::LK_JavaScript)
1004       nextToken();
1005     else
1006       parseAccessSpecifier();
1007     return;
1008   case tok::kw_if:
1009     parseIfThenElse();
1010     return;
1011   case tok::kw_for:
1012   case tok::kw_while:
1013     parseForOrWhileLoop();
1014     return;
1015   case tok::kw_do:
1016     parseDoWhile();
1017     return;
1018   case tok::kw_switch:
1019     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1020       // 'switch: string' field declaration.
1021       break;
1022     parseSwitch();
1023     return;
1024   case tok::kw_default:
1025     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026       // 'default: string' field declaration.
1027       break;
1028     nextToken();
1029     if (FormatTok->is(tok::colon)) {
1030       parseLabel();
1031       return;
1032     }
1033     // e.g. "default void f() {}" in a Java interface.
1034     break;
1035   case tok::kw_case:
1036     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1037       // 'case: string' field declaration.
1038       break;
1039     parseCaseLabel();
1040     return;
1041   case tok::kw_try:
1042   case tok::kw___try:
1043     parseTryCatch();
1044     return;
1045   case tok::kw_extern:
1046     nextToken();
1047     if (FormatTok->Tok.is(tok::string_literal)) {
1048       nextToken();
1049       if (FormatTok->Tok.is(tok::l_brace)) {
1050         if (Style.BraceWrapping.AfterExternBlock) {
1051           addUnwrappedLine();
1052           parseBlock(/*MustBeDeclaration=*/true);
1053         } else {
1054           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1055         }
1056         addUnwrappedLine();
1057         return;
1058       }
1059     }
1060     break;
1061   case tok::kw_export:
1062     if (Style.Language == FormatStyle::LK_JavaScript) {
1063       parseJavaScriptEs6ImportExport();
1064       return;
1065     }
1066     break;
1067   case tok::identifier:
1068     if (FormatTok->is(TT_ForEachMacro)) {
1069       parseForOrWhileLoop();
1070       return;
1071     }
1072     if (FormatTok->is(TT_MacroBlockBegin)) {
1073       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1074                  /*MunchSemi=*/false);
1075       return;
1076     }
1077     if (FormatTok->is(Keywords.kw_import)) {
1078       if (Style.Language == FormatStyle::LK_JavaScript) {
1079         parseJavaScriptEs6ImportExport();
1080         return;
1081       }
1082       if (Style.Language == FormatStyle::LK_Proto) {
1083         nextToken();
1084         if (FormatTok->is(tok::kw_public))
1085           nextToken();
1086         if (!FormatTok->is(tok::string_literal))
1087           return;
1088         nextToken();
1089         if (FormatTok->is(tok::semi))
1090           nextToken();
1091         addUnwrappedLine();
1092         return;
1093       }
1094     }
1095     if (Style.isCpp() &&
1096         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1097                            Keywords.kw_slots, Keywords.kw_qslots)) {
1098       nextToken();
1099       if (FormatTok->is(tok::colon)) {
1100         nextToken();
1101         addUnwrappedLine();
1102         return;
1103       }
1104     }
1105     // In all other cases, parse the declaration.
1106     break;
1107   default:
1108     break;
1109   }
1110   do {
1111     const FormatToken *Previous = FormatTok->Previous;
1112     switch (FormatTok->Tok.getKind()) {
1113     case tok::at:
1114       nextToken();
1115       if (FormatTok->Tok.is(tok::l_brace)) {
1116         nextToken();
1117         parseBracedList();
1118         break;
1119       }
1120       switch (FormatTok->Tok.getObjCKeywordID()) {
1121       case tok::objc_public:
1122       case tok::objc_protected:
1123       case tok::objc_package:
1124       case tok::objc_private:
1125         return parseAccessSpecifier();
1126       case tok::objc_interface:
1127       case tok::objc_implementation:
1128         return parseObjCInterfaceOrImplementation();
1129       case tok::objc_protocol:
1130         if (parseObjCProtocol())
1131           return;
1132         break;
1133       case tok::objc_end:
1134         return; // Handled by the caller.
1135       case tok::objc_optional:
1136       case tok::objc_required:
1137         nextToken();
1138         addUnwrappedLine();
1139         return;
1140       case tok::objc_autoreleasepool:
1141         nextToken();
1142         if (FormatTok->Tok.is(tok::l_brace)) {
1143           if (Style.BraceWrapping.AfterControlStatement)
1144             addUnwrappedLine();
1145           parseBlock(/*MustBeDeclaration=*/false);
1146         }
1147         addUnwrappedLine();
1148         return;
1149       case tok::objc_synchronized:
1150         nextToken();
1151         if (FormatTok->Tok.is(tok::l_paren))
1152            // Skip synchronization object
1153            parseParens();
1154         if (FormatTok->Tok.is(tok::l_brace)) {
1155           if (Style.BraceWrapping.AfterControlStatement)
1156             addUnwrappedLine();
1157           parseBlock(/*MustBeDeclaration=*/false);
1158         }
1159         addUnwrappedLine();
1160         return;
1161       case tok::objc_try:
1162         // This branch isn't strictly necessary (the kw_try case below would
1163         // do this too after the tok::at is parsed above).  But be explicit.
1164         parseTryCatch();
1165         return;
1166       default:
1167         break;
1168       }
1169       break;
1170     case tok::kw_enum:
1171       // Ignore if this is part of "template <enum ...".
1172       if (Previous && Previous->is(tok::less)) {
1173         nextToken();
1174         break;
1175       }
1176
1177       // parseEnum falls through and does not yet add an unwrapped line as an
1178       // enum definition can start a structural element.
1179       if (!parseEnum())
1180         break;
1181       // This only applies for C++.
1182       if (!Style.isCpp()) {
1183         addUnwrappedLine();
1184         return;
1185       }
1186       break;
1187     case tok::kw_typedef:
1188       nextToken();
1189       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1190                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1191         parseEnum();
1192       break;
1193     case tok::kw_struct:
1194     case tok::kw_union:
1195     case tok::kw_class:
1196       // parseRecord falls through and does not yet add an unwrapped line as a
1197       // record declaration or definition can start a structural element.
1198       parseRecord();
1199       // This does not apply for Java and JavaScript.
1200       if (Style.Language == FormatStyle::LK_Java ||
1201           Style.Language == FormatStyle::LK_JavaScript) {
1202         if (FormatTok->is(tok::semi))
1203           nextToken();
1204         addUnwrappedLine();
1205         return;
1206       }
1207       break;
1208     case tok::period:
1209       nextToken();
1210       // In Java, classes have an implicit static member "class".
1211       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1212           FormatTok->is(tok::kw_class))
1213         nextToken();
1214       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1215           FormatTok->Tok.getIdentifierInfo())
1216         // JavaScript only has pseudo keywords, all keywords are allowed to
1217         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1218         nextToken();
1219       break;
1220     case tok::semi:
1221       nextToken();
1222       addUnwrappedLine();
1223       return;
1224     case tok::r_brace:
1225       addUnwrappedLine();
1226       return;
1227     case tok::l_paren:
1228       parseParens();
1229       break;
1230     case tok::kw_operator:
1231       nextToken();
1232       if (FormatTok->isBinaryOperator())
1233         nextToken();
1234       break;
1235     case tok::caret:
1236       nextToken();
1237       if (FormatTok->Tok.isAnyIdentifier() ||
1238           FormatTok->isSimpleTypeSpecifier())
1239         nextToken();
1240       if (FormatTok->is(tok::l_paren))
1241         parseParens();
1242       if (FormatTok->is(tok::l_brace))
1243         parseChildBlock();
1244       break;
1245     case tok::l_brace:
1246       if (!tryToParseBracedList()) {
1247         // A block outside of parentheses must be the last part of a
1248         // structural element.
1249         // FIXME: Figure out cases where this is not true, and add projections
1250         // for them (the one we know is missing are lambdas).
1251         if (Style.BraceWrapping.AfterFunction)
1252           addUnwrappedLine();
1253         FormatTok->Type = TT_FunctionLBrace;
1254         parseBlock(/*MustBeDeclaration=*/false);
1255         addUnwrappedLine();
1256         return;
1257       }
1258       // Otherwise this was a braced init list, and the structural
1259       // element continues.
1260       break;
1261     case tok::kw_try:
1262       // We arrive here when parsing function-try blocks.
1263       parseTryCatch();
1264       return;
1265     case tok::identifier: {
1266       if (FormatTok->is(TT_MacroBlockEnd)) {
1267         addUnwrappedLine();
1268         return;
1269       }
1270
1271       // Function declarations (as opposed to function expressions) are parsed
1272       // on their own unwrapped line by continuing this loop. Function
1273       // expressions (functions that are not on their own line) must not create
1274       // a new unwrapped line, so they are special cased below.
1275       size_t TokenCount = Line->Tokens.size();
1276       if (Style.Language == FormatStyle::LK_JavaScript &&
1277           FormatTok->is(Keywords.kw_function) &&
1278           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1279                                                      Keywords.kw_async)))) {
1280         tryToParseJSFunction();
1281         break;
1282       }
1283       if ((Style.Language == FormatStyle::LK_JavaScript ||
1284            Style.Language == FormatStyle::LK_Java) &&
1285           FormatTok->is(Keywords.kw_interface)) {
1286         if (Style.Language == FormatStyle::LK_JavaScript) {
1287           // In JavaScript/TypeScript, "interface" can be used as a standalone
1288           // identifier, e.g. in `var interface = 1;`. If "interface" is
1289           // followed by another identifier, it is very like to be an actual
1290           // interface declaration.
1291           unsigned StoredPosition = Tokens->getPosition();
1292           FormatToken *Next = Tokens->getNextToken();
1293           FormatTok = Tokens->setPosition(StoredPosition);
1294           if (Next && !mustBeJSIdent(Keywords, Next)) {
1295             nextToken();
1296             break;
1297           }
1298         }
1299         parseRecord();
1300         addUnwrappedLine();
1301         return;
1302       }
1303
1304       // See if the following token should start a new unwrapped line.
1305       StringRef Text = FormatTok->TokenText;
1306       nextToken();
1307       if (Line->Tokens.size() == 1 &&
1308           // JS doesn't have macros, and within classes colons indicate fields,
1309           // not labels.
1310           Style.Language != FormatStyle::LK_JavaScript) {
1311         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1312           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1313           parseLabel();
1314           return;
1315         }
1316         // Recognize function-like macro usages without trailing semicolon as
1317         // well as free-standing macros like Q_OBJECT.
1318         bool FunctionLike = FormatTok->is(tok::l_paren);
1319         if (FunctionLike)
1320           parseParens();
1321
1322         bool FollowedByNewline =
1323             CommentsBeforeNextToken.empty()
1324                 ? FormatTok->NewlinesBefore > 0
1325                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1326
1327         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1328             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1329           addUnwrappedLine();
1330           return;
1331         }
1332       }
1333       break;
1334     }
1335     case tok::equal:
1336       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1337       // TT_JsFatArrow. The always start an expression or a child block if
1338       // followed by a curly.
1339       if (FormatTok->is(TT_JsFatArrow)) {
1340         nextToken();
1341         if (FormatTok->is(tok::l_brace))
1342           parseChildBlock();
1343         break;
1344       }
1345
1346       nextToken();
1347       if (FormatTok->Tok.is(tok::l_brace)) {
1348         nextToken();
1349         parseBracedList();
1350       } else if (Style.Language == FormatStyle::LK_Proto &&
1351                  FormatTok->Tok.is(tok::less)) {
1352         nextToken();
1353         parseBracedList(/*ContinueOnSemicolons=*/false,
1354                         /*ClosingBraceKind=*/tok::greater);
1355       }
1356       break;
1357     case tok::l_square:
1358       parseSquare();
1359       break;
1360     case tok::kw_new:
1361       parseNew();
1362       break;
1363     default:
1364       nextToken();
1365       break;
1366     }
1367   } while (!eof());
1368 }
1369
1370 bool UnwrappedLineParser::tryToParseLambda() {
1371   if (!Style.isCpp()) {
1372     nextToken();
1373     return false;
1374   }
1375   assert(FormatTok->is(tok::l_square));
1376   FormatToken &LSquare = *FormatTok;
1377   if (!tryToParseLambdaIntroducer())
1378     return false;
1379
1380   while (FormatTok->isNot(tok::l_brace)) {
1381     if (FormatTok->isSimpleTypeSpecifier()) {
1382       nextToken();
1383       continue;
1384     }
1385     switch (FormatTok->Tok.getKind()) {
1386     case tok::l_brace:
1387       break;
1388     case tok::l_paren:
1389       parseParens();
1390       break;
1391     case tok::amp:
1392     case tok::star:
1393     case tok::kw_const:
1394     case tok::comma:
1395     case tok::less:
1396     case tok::greater:
1397     case tok::identifier:
1398     case tok::numeric_constant:
1399     case tok::coloncolon:
1400     case tok::kw_mutable:
1401       nextToken();
1402       break;
1403     case tok::arrow:
1404       FormatTok->Type = TT_LambdaArrow;
1405       nextToken();
1406       break;
1407     default:
1408       return true;
1409     }
1410   }
1411   LSquare.Type = TT_LambdaLSquare;
1412   parseChildBlock();
1413   return true;
1414 }
1415
1416 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1417   const FormatToken *Previous = FormatTok->Previous;
1418   if (Previous &&
1419       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1420                          tok::kw_delete, tok::l_square) ||
1421        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1422        Previous->isSimpleTypeSpecifier())) {
1423     nextToken();
1424     return false;
1425   }
1426   nextToken();
1427   if (FormatTok->is(tok::l_square)) {
1428     return false;
1429   }
1430   parseSquare(/*LambdaIntroducer=*/true);
1431   return true;
1432 }
1433
1434 void UnwrappedLineParser::tryToParseJSFunction() {
1435   assert(FormatTok->is(Keywords.kw_function) ||
1436          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1437   if (FormatTok->is(Keywords.kw_async))
1438     nextToken();
1439   // Consume "function".
1440   nextToken();
1441
1442   // Consume * (generator function). Treat it like C++'s overloaded operators.
1443   if (FormatTok->is(tok::star)) {
1444     FormatTok->Type = TT_OverloadedOperator;
1445     nextToken();
1446   }
1447
1448   // Consume function name.
1449   if (FormatTok->is(tok::identifier))
1450     nextToken();
1451
1452   if (FormatTok->isNot(tok::l_paren))
1453     return;
1454
1455   // Parse formal parameter list.
1456   parseParens();
1457
1458   if (FormatTok->is(tok::colon)) {
1459     // Parse a type definition.
1460     nextToken();
1461
1462     // Eat the type declaration. For braced inline object types, balance braces,
1463     // otherwise just parse until finding an l_brace for the function body.
1464     if (FormatTok->is(tok::l_brace))
1465       tryToParseBracedList();
1466     else
1467       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1468         nextToken();
1469   }
1470
1471   if (FormatTok->is(tok::semi))
1472     return;
1473
1474   parseChildBlock();
1475 }
1476
1477 bool UnwrappedLineParser::tryToParseBracedList() {
1478   if (FormatTok->BlockKind == BK_Unknown)
1479     calculateBraceTypes();
1480   assert(FormatTok->BlockKind != BK_Unknown);
1481   if (FormatTok->BlockKind == BK_Block)
1482     return false;
1483   nextToken();
1484   parseBracedList();
1485   return true;
1486 }
1487
1488 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1489                                           tok::TokenKind ClosingBraceKind) {
1490   bool HasError = false;
1491
1492   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1493   // replace this by using parseAssigmentExpression() inside.
1494   do {
1495     if (Style.Language == FormatStyle::LK_JavaScript) {
1496       if (FormatTok->is(Keywords.kw_function) ||
1497           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1498         tryToParseJSFunction();
1499         continue;
1500       }
1501       if (FormatTok->is(TT_JsFatArrow)) {
1502         nextToken();
1503         // Fat arrows can be followed by simple expressions or by child blocks
1504         // in curly braces.
1505         if (FormatTok->is(tok::l_brace)) {
1506           parseChildBlock();
1507           continue;
1508         }
1509       }
1510       if (FormatTok->is(tok::l_brace)) {
1511         // Could be a method inside of a braced list `{a() { return 1; }}`.
1512         if (tryToParseBracedList())
1513           continue;
1514         parseChildBlock();
1515       }
1516     }
1517     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1518       nextToken();
1519       return !HasError;
1520     }
1521     switch (FormatTok->Tok.getKind()) {
1522     case tok::caret:
1523       nextToken();
1524       if (FormatTok->is(tok::l_brace)) {
1525         parseChildBlock();
1526       }
1527       break;
1528     case tok::l_square:
1529       tryToParseLambda();
1530       break;
1531     case tok::l_paren:
1532       parseParens();
1533       // JavaScript can just have free standing methods and getters/setters in
1534       // object literals. Detect them by a "{" following ")".
1535       if (Style.Language == FormatStyle::LK_JavaScript) {
1536         if (FormatTok->is(tok::l_brace))
1537           parseChildBlock();
1538         break;
1539       }
1540       break;
1541     case tok::l_brace:
1542       // Assume there are no blocks inside a braced init list apart
1543       // from the ones we explicitly parse out (like lambdas).
1544       FormatTok->BlockKind = BK_BracedInit;
1545       nextToken();
1546       parseBracedList();
1547       break;
1548     case tok::less:
1549       if (Style.Language == FormatStyle::LK_Proto) {
1550         nextToken();
1551         parseBracedList(/*ContinueOnSemicolons=*/false,
1552                         /*ClosingBraceKind=*/tok::greater);
1553       } else {
1554         nextToken();
1555       }
1556       break;
1557     case tok::semi:
1558       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1559       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1560       // used for error recovery if we have otherwise determined that this is
1561       // a braced list.
1562       if (Style.Language == FormatStyle::LK_JavaScript) {
1563         nextToken();
1564         break;
1565       }
1566       HasError = true;
1567       if (!ContinueOnSemicolons)
1568         return !HasError;
1569       nextToken();
1570       break;
1571     case tok::comma:
1572       nextToken();
1573       break;
1574     default:
1575       nextToken();
1576       break;
1577     }
1578   } while (!eof());
1579   return false;
1580 }
1581
1582 void UnwrappedLineParser::parseParens() {
1583   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1584   nextToken();
1585   do {
1586     switch (FormatTok->Tok.getKind()) {
1587     case tok::l_paren:
1588       parseParens();
1589       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1590         parseChildBlock();
1591       break;
1592     case tok::r_paren:
1593       nextToken();
1594       return;
1595     case tok::r_brace:
1596       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1597       return;
1598     case tok::l_square:
1599       tryToParseLambda();
1600       break;
1601     case tok::l_brace:
1602       if (!tryToParseBracedList())
1603         parseChildBlock();
1604       break;
1605     case tok::at:
1606       nextToken();
1607       if (FormatTok->Tok.is(tok::l_brace)) {
1608         nextToken();
1609         parseBracedList();
1610       }
1611       break;
1612     case tok::kw_class:
1613       if (Style.Language == FormatStyle::LK_JavaScript)
1614         parseRecord(/*ParseAsExpr=*/true);
1615       else
1616         nextToken();
1617       break;
1618     case tok::identifier:
1619       if (Style.Language == FormatStyle::LK_JavaScript &&
1620           (FormatTok->is(Keywords.kw_function) ||
1621            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1622         tryToParseJSFunction();
1623       else
1624         nextToken();
1625       break;
1626     default:
1627       nextToken();
1628       break;
1629     }
1630   } while (!eof());
1631 }
1632
1633 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1634   if (!LambdaIntroducer) {
1635     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1636     if (tryToParseLambda())
1637       return;
1638   }
1639   do {
1640     switch (FormatTok->Tok.getKind()) {
1641     case tok::l_paren:
1642       parseParens();
1643       break;
1644     case tok::r_square:
1645       nextToken();
1646       return;
1647     case tok::r_brace:
1648       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1649       return;
1650     case tok::l_square:
1651       parseSquare();
1652       break;
1653     case tok::l_brace: {
1654       if (!tryToParseBracedList())
1655         parseChildBlock();
1656       break;
1657     }
1658     case tok::at:
1659       nextToken();
1660       if (FormatTok->Tok.is(tok::l_brace)) {
1661         nextToken();
1662         parseBracedList();
1663       }
1664       break;
1665     default:
1666       nextToken();
1667       break;
1668     }
1669   } while (!eof());
1670 }
1671
1672 void UnwrappedLineParser::parseIfThenElse() {
1673   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1674   nextToken();
1675   if (FormatTok->Tok.is(tok::kw_constexpr))
1676     nextToken();
1677   if (FormatTok->Tok.is(tok::l_paren))
1678     parseParens();
1679   bool NeedsUnwrappedLine = false;
1680   if (FormatTok->Tok.is(tok::l_brace)) {
1681     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1682     parseBlock(/*MustBeDeclaration=*/false);
1683     if (Style.BraceWrapping.BeforeElse)
1684       addUnwrappedLine();
1685     else
1686       NeedsUnwrappedLine = true;
1687   } else {
1688     addUnwrappedLine();
1689     ++Line->Level;
1690     parseStructuralElement();
1691     --Line->Level;
1692   }
1693   if (FormatTok->Tok.is(tok::kw_else)) {
1694     nextToken();
1695     if (FormatTok->Tok.is(tok::l_brace)) {
1696       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1697       parseBlock(/*MustBeDeclaration=*/false);
1698       addUnwrappedLine();
1699     } else if (FormatTok->Tok.is(tok::kw_if)) {
1700       parseIfThenElse();
1701     } else {
1702       addUnwrappedLine();
1703       ++Line->Level;
1704       parseStructuralElement();
1705       if (FormatTok->is(tok::eof))
1706         addUnwrappedLine();
1707       --Line->Level;
1708     }
1709   } else if (NeedsUnwrappedLine) {
1710     addUnwrappedLine();
1711   }
1712 }
1713
1714 void UnwrappedLineParser::parseTryCatch() {
1715   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1716   nextToken();
1717   bool NeedsUnwrappedLine = false;
1718   if (FormatTok->is(tok::colon)) {
1719     // We are in a function try block, what comes is an initializer list.
1720     nextToken();
1721     while (FormatTok->is(tok::identifier)) {
1722       nextToken();
1723       if (FormatTok->is(tok::l_paren))
1724         parseParens();
1725       if (FormatTok->is(tok::comma))
1726         nextToken();
1727     }
1728   }
1729   // Parse try with resource.
1730   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1731     parseParens();
1732   }
1733   if (FormatTok->is(tok::l_brace)) {
1734     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1735     parseBlock(/*MustBeDeclaration=*/false);
1736     if (Style.BraceWrapping.BeforeCatch) {
1737       addUnwrappedLine();
1738     } else {
1739       NeedsUnwrappedLine = true;
1740     }
1741   } else if (!FormatTok->is(tok::kw_catch)) {
1742     // The C++ standard requires a compound-statement after a try.
1743     // If there's none, we try to assume there's a structuralElement
1744     // and try to continue.
1745     addUnwrappedLine();
1746     ++Line->Level;
1747     parseStructuralElement();
1748     --Line->Level;
1749   }
1750   while (1) {
1751     if (FormatTok->is(tok::at))
1752       nextToken();
1753     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1754                              tok::kw___finally) ||
1755           ((Style.Language == FormatStyle::LK_Java ||
1756             Style.Language == FormatStyle::LK_JavaScript) &&
1757            FormatTok->is(Keywords.kw_finally)) ||
1758           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1759            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1760       break;
1761     nextToken();
1762     while (FormatTok->isNot(tok::l_brace)) {
1763       if (FormatTok->is(tok::l_paren)) {
1764         parseParens();
1765         continue;
1766       }
1767       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1768         return;
1769       nextToken();
1770     }
1771     NeedsUnwrappedLine = false;
1772     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1773     parseBlock(/*MustBeDeclaration=*/false);
1774     if (Style.BraceWrapping.BeforeCatch)
1775       addUnwrappedLine();
1776     else
1777       NeedsUnwrappedLine = true;
1778   }
1779   if (NeedsUnwrappedLine)
1780     addUnwrappedLine();
1781 }
1782
1783 void UnwrappedLineParser::parseNamespace() {
1784   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1785
1786   const FormatToken &InitialToken = *FormatTok;
1787   nextToken();
1788   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1789     nextToken();
1790   if (FormatTok->Tok.is(tok::l_brace)) {
1791     if (ShouldBreakBeforeBrace(Style, InitialToken))
1792       addUnwrappedLine();
1793
1794     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1795                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1796                      DeclarationScopeStack.size() > 1);
1797     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1798     // Munch the semicolon after a namespace. This is more common than one would
1799     // think. Puttin the semicolon into its own line is very ugly.
1800     if (FormatTok->Tok.is(tok::semi))
1801       nextToken();
1802     addUnwrappedLine();
1803   }
1804   // FIXME: Add error handling.
1805 }
1806
1807 void UnwrappedLineParser::parseNew() {
1808   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1809   nextToken();
1810   if (Style.Language != FormatStyle::LK_Java)
1811     return;
1812
1813   // In Java, we can parse everything up to the parens, which aren't optional.
1814   do {
1815     // There should not be a ;, { or } before the new's open paren.
1816     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1817       return;
1818
1819     // Consume the parens.
1820     if (FormatTok->is(tok::l_paren)) {
1821       parseParens();
1822
1823       // If there is a class body of an anonymous class, consume that as child.
1824       if (FormatTok->is(tok::l_brace))
1825         parseChildBlock();
1826       return;
1827     }
1828     nextToken();
1829   } while (!eof());
1830 }
1831
1832 void UnwrappedLineParser::parseForOrWhileLoop() {
1833   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1834          "'for', 'while' or foreach macro expected");
1835   nextToken();
1836   // JS' for await ( ...
1837   if (Style.Language == FormatStyle::LK_JavaScript &&
1838       FormatTok->is(Keywords.kw_await))
1839     nextToken();
1840   if (FormatTok->Tok.is(tok::l_paren))
1841     parseParens();
1842   if (FormatTok->Tok.is(tok::l_brace)) {
1843     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1844     parseBlock(/*MustBeDeclaration=*/false);
1845     addUnwrappedLine();
1846   } else {
1847     addUnwrappedLine();
1848     ++Line->Level;
1849     parseStructuralElement();
1850     --Line->Level;
1851   }
1852 }
1853
1854 void UnwrappedLineParser::parseDoWhile() {
1855   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1856   nextToken();
1857   if (FormatTok->Tok.is(tok::l_brace)) {
1858     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1859     parseBlock(/*MustBeDeclaration=*/false);
1860     if (Style.BraceWrapping.IndentBraces)
1861       addUnwrappedLine();
1862   } else {
1863     addUnwrappedLine();
1864     ++Line->Level;
1865     parseStructuralElement();
1866     --Line->Level;
1867   }
1868
1869   // FIXME: Add error handling.
1870   if (!FormatTok->Tok.is(tok::kw_while)) {
1871     addUnwrappedLine();
1872     return;
1873   }
1874
1875   nextToken();
1876   parseStructuralElement();
1877 }
1878
1879 void UnwrappedLineParser::parseLabel() {
1880   nextToken();
1881   unsigned OldLineLevel = Line->Level;
1882   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1883     --Line->Level;
1884   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1885     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1886     parseBlock(/*MustBeDeclaration=*/false);
1887     if (FormatTok->Tok.is(tok::kw_break)) {
1888       if (Style.BraceWrapping.AfterControlStatement)
1889         addUnwrappedLine();
1890       parseStructuralElement();
1891     }
1892     addUnwrappedLine();
1893   } else {
1894     if (FormatTok->is(tok::semi))
1895       nextToken();
1896     addUnwrappedLine();
1897   }
1898   Line->Level = OldLineLevel;
1899   if (FormatTok->isNot(tok::l_brace)) {
1900     parseStructuralElement();
1901     addUnwrappedLine();
1902   }
1903 }
1904
1905 void UnwrappedLineParser::parseCaseLabel() {
1906   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1907   // FIXME: fix handling of complex expressions here.
1908   do {
1909     nextToken();
1910   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1911   parseLabel();
1912 }
1913
1914 void UnwrappedLineParser::parseSwitch() {
1915   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1916   nextToken();
1917   if (FormatTok->Tok.is(tok::l_paren))
1918     parseParens();
1919   if (FormatTok->Tok.is(tok::l_brace)) {
1920     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1921     parseBlock(/*MustBeDeclaration=*/false);
1922     addUnwrappedLine();
1923   } else {
1924     addUnwrappedLine();
1925     ++Line->Level;
1926     parseStructuralElement();
1927     --Line->Level;
1928   }
1929 }
1930
1931 void UnwrappedLineParser::parseAccessSpecifier() {
1932   nextToken();
1933   // Understand Qt's slots.
1934   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1935     nextToken();
1936   // Otherwise, we don't know what it is, and we'd better keep the next token.
1937   if (FormatTok->Tok.is(tok::colon))
1938     nextToken();
1939   addUnwrappedLine();
1940 }
1941
1942 bool UnwrappedLineParser::parseEnum() {
1943   // Won't be 'enum' for NS_ENUMs.
1944   if (FormatTok->Tok.is(tok::kw_enum))
1945     nextToken();
1946
1947   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1948   // declarations. An "enum" keyword followed by a colon would be a syntax
1949   // error and thus assume it is just an identifier.
1950   if (Style.Language == FormatStyle::LK_JavaScript &&
1951       FormatTok->isOneOf(tok::colon, tok::question))
1952     return false;
1953
1954   // Eat up enum class ...
1955   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1956     nextToken();
1957
1958   while (FormatTok->Tok.getIdentifierInfo() ||
1959          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1960                             tok::greater, tok::comma, tok::question)) {
1961     nextToken();
1962     // We can have macros or attributes in between 'enum' and the enum name.
1963     if (FormatTok->is(tok::l_paren))
1964       parseParens();
1965     if (FormatTok->is(tok::identifier)) {
1966       nextToken();
1967       // If there are two identifiers in a row, this is likely an elaborate
1968       // return type. In Java, this can be "implements", etc.
1969       if (Style.isCpp() && FormatTok->is(tok::identifier))
1970         return false;
1971     }
1972   }
1973
1974   // Just a declaration or something is wrong.
1975   if (FormatTok->isNot(tok::l_brace))
1976     return true;
1977   FormatTok->BlockKind = BK_Block;
1978
1979   if (Style.Language == FormatStyle::LK_Java) {
1980     // Java enums are different.
1981     parseJavaEnumBody();
1982     return true;
1983   }
1984   if (Style.Language == FormatStyle::LK_Proto) {
1985     parseBlock(/*MustBeDeclaration=*/true);
1986     return true;
1987   }
1988
1989   // Parse enum body.
1990   nextToken();
1991   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1992   if (HasError) {
1993     if (FormatTok->is(tok::semi))
1994       nextToken();
1995     addUnwrappedLine();
1996   }
1997   return true;
1998
1999   // There is no addUnwrappedLine() here so that we fall through to parsing a
2000   // structural element afterwards. Thus, in "enum A {} n, m;",
2001   // "} n, m;" will end up in one unwrapped line.
2002 }
2003
2004 void UnwrappedLineParser::parseJavaEnumBody() {
2005   // Determine whether the enum is simple, i.e. does not have a semicolon or
2006   // constants with class bodies. Simple enums can be formatted like braced
2007   // lists, contracted to a single line, etc.
2008   unsigned StoredPosition = Tokens->getPosition();
2009   bool IsSimple = true;
2010   FormatToken *Tok = Tokens->getNextToken();
2011   while (Tok) {
2012     if (Tok->is(tok::r_brace))
2013       break;
2014     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2015       IsSimple = false;
2016       break;
2017     }
2018     // FIXME: This will also mark enums with braces in the arguments to enum
2019     // constants as "not simple". This is probably fine in practice, though.
2020     Tok = Tokens->getNextToken();
2021   }
2022   FormatTok = Tokens->setPosition(StoredPosition);
2023
2024   if (IsSimple) {
2025     nextToken();
2026     parseBracedList();
2027     addUnwrappedLine();
2028     return;
2029   }
2030
2031   // Parse the body of a more complex enum.
2032   // First add a line for everything up to the "{".
2033   nextToken();
2034   addUnwrappedLine();
2035   ++Line->Level;
2036
2037   // Parse the enum constants.
2038   while (FormatTok) {
2039     if (FormatTok->is(tok::l_brace)) {
2040       // Parse the constant's class body.
2041       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2042                  /*MunchSemi=*/false);
2043     } else if (FormatTok->is(tok::l_paren)) {
2044       parseParens();
2045     } else if (FormatTok->is(tok::comma)) {
2046       nextToken();
2047       addUnwrappedLine();
2048     } else if (FormatTok->is(tok::semi)) {
2049       nextToken();
2050       addUnwrappedLine();
2051       break;
2052     } else if (FormatTok->is(tok::r_brace)) {
2053       addUnwrappedLine();
2054       break;
2055     } else {
2056       nextToken();
2057     }
2058   }
2059
2060   // Parse the class body after the enum's ";" if any.
2061   parseLevel(/*HasOpeningBrace=*/true);
2062   nextToken();
2063   --Line->Level;
2064   addUnwrappedLine();
2065 }
2066
2067 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2068   const FormatToken &InitialToken = *FormatTok;
2069   nextToken();
2070
2071   // The actual identifier can be a nested name specifier, and in macros
2072   // it is often token-pasted.
2073   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2074                             tok::kw___attribute, tok::kw___declspec,
2075                             tok::kw_alignas) ||
2076          ((Style.Language == FormatStyle::LK_Java ||
2077            Style.Language == FormatStyle::LK_JavaScript) &&
2078           FormatTok->isOneOf(tok::period, tok::comma))) {
2079     if (Style.Language == FormatStyle::LK_JavaScript &&
2080         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2081       // JavaScript/TypeScript supports inline object types in
2082       // extends/implements positions:
2083       //     class Foo implements {bar: number} { }
2084       nextToken();
2085       if (FormatTok->is(tok::l_brace)) {
2086         tryToParseBracedList();
2087         continue;
2088       }
2089     }
2090     bool IsNonMacroIdentifier =
2091         FormatTok->is(tok::identifier) &&
2092         FormatTok->TokenText != FormatTok->TokenText.upper();
2093     nextToken();
2094     // We can have macros or attributes in between 'class' and the class name.
2095     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2096       parseParens();
2097   }
2098
2099   // Note that parsing away template declarations here leads to incorrectly
2100   // accepting function declarations as record declarations.
2101   // In general, we cannot solve this problem. Consider:
2102   // class A<int> B() {}
2103   // which can be a function definition or a class definition when B() is a
2104   // macro. If we find enough real-world cases where this is a problem, we
2105   // can parse for the 'template' keyword in the beginning of the statement,
2106   // and thus rule out the record production in case there is no template
2107   // (this would still leave us with an ambiguity between template function
2108   // and class declarations).
2109   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2110     while (!eof()) {
2111       if (FormatTok->is(tok::l_brace)) {
2112         calculateBraceTypes(/*ExpectClassBody=*/true);
2113         if (!tryToParseBracedList())
2114           break;
2115       }
2116       if (FormatTok->Tok.is(tok::semi))
2117         return;
2118       nextToken();
2119     }
2120   }
2121   if (FormatTok->Tok.is(tok::l_brace)) {
2122     if (ParseAsExpr) {
2123       parseChildBlock();
2124     } else {
2125       if (ShouldBreakBeforeBrace(Style, InitialToken))
2126         addUnwrappedLine();
2127
2128       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2129                  /*MunchSemi=*/false);
2130     }
2131   }
2132   // There is no addUnwrappedLine() here so that we fall through to parsing a
2133   // structural element afterwards. Thus, in "class A {} n, m;",
2134   // "} n, m;" will end up in one unwrapped line.
2135 }
2136
2137 void UnwrappedLineParser::parseObjCMethod() {
2138   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2139          "'(' or identifier expected.");
2140   do {
2141     if (FormatTok->Tok.is(tok::semi)) {
2142       nextToken();
2143       addUnwrappedLine();
2144       return;
2145     } else if (FormatTok->Tok.is(tok::l_brace)) {
2146       parseBlock(/*MustBeDeclaration=*/false);
2147       addUnwrappedLine();
2148       return;
2149     } else {
2150       nextToken();
2151     }
2152   } while (!eof());
2153 }
2154
2155 void UnwrappedLineParser::parseObjCProtocolList() {
2156   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2157   do {
2158     nextToken();
2159     // Early exit in case someone forgot a close angle.
2160     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2161         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2162       return;
2163   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2164   nextToken(); // Skip '>'.
2165 }
2166
2167 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2168   do {
2169     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2170       nextToken();
2171       addUnwrappedLine();
2172       break;
2173     }
2174     if (FormatTok->is(tok::l_brace)) {
2175       parseBlock(/*MustBeDeclaration=*/false);
2176       // In ObjC interfaces, nothing should be following the "}".
2177       addUnwrappedLine();
2178     } else if (FormatTok->is(tok::r_brace)) {
2179       // Ignore stray "}". parseStructuralElement doesn't consume them.
2180       nextToken();
2181       addUnwrappedLine();
2182     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2183       nextToken();
2184       parseObjCMethod();
2185     } else {
2186       parseStructuralElement();
2187     }
2188   } while (!eof());
2189 }
2190
2191 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2192   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2193          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2194   nextToken();
2195   nextToken(); // interface name
2196
2197   // @interface can be followed by a lightweight generic
2198   // specialization list, then either a base class or a category.
2199   if (FormatTok->Tok.is(tok::less)) {
2200     // Unlike protocol lists, generic parameterizations support
2201     // nested angles:
2202     //
2203     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2204     //     NSObject <NSCopying, NSSecureCoding>
2205     //
2206     // so we need to count how many open angles we have left.
2207     unsigned NumOpenAngles = 1;
2208     do {
2209       nextToken();
2210       // Early exit in case someone forgot a close angle.
2211       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2212           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2213         break;
2214       if (FormatTok->Tok.is(tok::less))
2215         ++NumOpenAngles;
2216       else if (FormatTok->Tok.is(tok::greater)) {
2217         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2218         --NumOpenAngles;
2219       }
2220     } while (!eof() && NumOpenAngles != 0);
2221     nextToken(); // Skip '>'.
2222   }
2223   if (FormatTok->Tok.is(tok::colon)) {
2224     nextToken();
2225     nextToken(); // base class name
2226   } else if (FormatTok->Tok.is(tok::l_paren))
2227     // Skip category, if present.
2228     parseParens();
2229
2230   if (FormatTok->Tok.is(tok::less))
2231     parseObjCProtocolList();
2232
2233   if (FormatTok->Tok.is(tok::l_brace)) {
2234     if (Style.BraceWrapping.AfterObjCDeclaration)
2235       addUnwrappedLine();
2236     parseBlock(/*MustBeDeclaration=*/true);
2237   }
2238
2239   // With instance variables, this puts '}' on its own line.  Without instance
2240   // variables, this ends the @interface line.
2241   addUnwrappedLine();
2242
2243   parseObjCUntilAtEnd();
2244 }
2245
2246 // Returns true for the declaration/definition form of @protocol,
2247 // false for the expression form.
2248 bool UnwrappedLineParser::parseObjCProtocol() {
2249   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2250   nextToken();
2251
2252   if (FormatTok->is(tok::l_paren))
2253     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2254     return false;
2255
2256   // The definition/declaration form,
2257   // @protocol Foo
2258   // - (int)someMethod;
2259   // @end
2260
2261   nextToken(); // protocol name
2262
2263   if (FormatTok->Tok.is(tok::less))
2264     parseObjCProtocolList();
2265
2266   // Check for protocol declaration.
2267   if (FormatTok->Tok.is(tok::semi)) {
2268     nextToken();
2269     addUnwrappedLine();
2270     return true;
2271   }
2272
2273   addUnwrappedLine();
2274   parseObjCUntilAtEnd();
2275   return true;
2276 }
2277
2278 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2279   bool IsImport = FormatTok->is(Keywords.kw_import);
2280   assert(IsImport || FormatTok->is(tok::kw_export));
2281   nextToken();
2282
2283   // Consume the "default" in "export default class/function".
2284   if (FormatTok->is(tok::kw_default))
2285     nextToken();
2286
2287   // Consume "async function", "function" and "default function", so that these
2288   // get parsed as free-standing JS functions, i.e. do not require a trailing
2289   // semicolon.
2290   if (FormatTok->is(Keywords.kw_async))
2291     nextToken();
2292   if (FormatTok->is(Keywords.kw_function)) {
2293     nextToken();
2294     return;
2295   }
2296
2297   // For imports, `export *`, `export {...}`, consume the rest of the line up
2298   // to the terminating `;`. For everything else, just return and continue
2299   // parsing the structural element, i.e. the declaration or expression for
2300   // `export default`.
2301   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2302       !FormatTok->isStringLiteral())
2303     return;
2304
2305   while (!eof()) {
2306     if (FormatTok->is(tok::semi))
2307       return;
2308     if (Line->Tokens.empty()) {
2309       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2310       // import statement should terminate.
2311       return;
2312     }
2313     if (FormatTok->is(tok::l_brace)) {
2314       FormatTok->BlockKind = BK_Block;
2315       nextToken();
2316       parseBracedList();
2317     } else {
2318       nextToken();
2319     }
2320   }
2321 }
2322
2323 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2324                                                  StringRef Prefix = "") {
2325   llvm::dbgs() << Prefix << "Line(" << Line.Level
2326                << ", FSC=" << Line.FirstStartColumn << ")"
2327                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2328   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2329                                                     E = Line.Tokens.end();
2330        I != E; ++I) {
2331     llvm::dbgs() << I->Tok->Tok.getName() << "["
2332                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2333                  << "] ";
2334   }
2335   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2336                                                     E = Line.Tokens.end();
2337        I != E; ++I) {
2338     const UnwrappedLineNode &Node = *I;
2339     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2340              I = Node.Children.begin(),
2341              E = Node.Children.end();
2342          I != E; ++I) {
2343       printDebugInfo(*I, "\nChild: ");
2344     }
2345   }
2346   llvm::dbgs() << "\n";
2347 }
2348
2349 void UnwrappedLineParser::addUnwrappedLine() {
2350   if (Line->Tokens.empty())
2351     return;
2352   LLVM_DEBUG({
2353     if (CurrentLines == &Lines)
2354       printDebugInfo(*Line);
2355   });
2356   CurrentLines->push_back(std::move(*Line));
2357   Line->Tokens.clear();
2358   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2359   Line->FirstStartColumn = 0;
2360   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2361     CurrentLines->append(
2362         std::make_move_iterator(PreprocessorDirectives.begin()),
2363         std::make_move_iterator(PreprocessorDirectives.end()));
2364     PreprocessorDirectives.clear();
2365   }
2366   // Disconnect the current token from the last token on the previous line.
2367   FormatTok->Previous = nullptr;
2368 }
2369
2370 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2371
2372 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2373   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2374          FormatTok.NewlinesBefore > 0;
2375 }
2376
2377 // Checks if \p FormatTok is a line comment that continues the line comment
2378 // section on \p Line.
2379 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2380                                         const UnwrappedLine &Line,
2381                                         llvm::Regex &CommentPragmasRegex) {
2382   if (Line.Tokens.empty())
2383     return false;
2384
2385   StringRef IndentContent = FormatTok.TokenText;
2386   if (FormatTok.TokenText.startswith("//") ||
2387       FormatTok.TokenText.startswith("/*"))
2388     IndentContent = FormatTok.TokenText.substr(2);
2389   if (CommentPragmasRegex.match(IndentContent))
2390     return false;
2391
2392   // If Line starts with a line comment, then FormatTok continues the comment
2393   // section if its original column is greater or equal to the original start
2394   // column of the line.
2395   //
2396   // Define the min column token of a line as follows: if a line ends in '{' or
2397   // contains a '{' followed by a line comment, then the min column token is
2398   // that '{'. Otherwise, the min column token of the line is the first token of
2399   // the line.
2400   //
2401   // If Line starts with a token other than a line comment, then FormatTok
2402   // continues the comment section if its original column is greater than the
2403   // original start column of the min column token of the line.
2404   //
2405   // For example, the second line comment continues the first in these cases:
2406   //
2407   // // first line
2408   // // second line
2409   //
2410   // and:
2411   //
2412   // // first line
2413   //  // second line
2414   //
2415   // and:
2416   //
2417   // int i; // first line
2418   //  // second line
2419   //
2420   // and:
2421   //
2422   // do { // first line
2423   //      // second line
2424   //   int i;
2425   // } while (true);
2426   //
2427   // and:
2428   //
2429   // enum {
2430   //   a, // first line
2431   //    // second line
2432   //   b
2433   // };
2434   //
2435   // The second line comment doesn't continue the first in these cases:
2436   //
2437   //   // first line
2438   //  // second line
2439   //
2440   // and:
2441   //
2442   // int i; // first line
2443   // // second line
2444   //
2445   // and:
2446   //
2447   // do { // first line
2448   //   // second line
2449   //   int i;
2450   // } while (true);
2451   //
2452   // and:
2453   //
2454   // enum {
2455   //   a, // first line
2456   //   // second line
2457   // };
2458   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2459
2460   // Scan for '{//'. If found, use the column of '{' as a min column for line
2461   // comment section continuation.
2462   const FormatToken *PreviousToken = nullptr;
2463   for (const UnwrappedLineNode &Node : Line.Tokens) {
2464     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2465         isLineComment(*Node.Tok)) {
2466       MinColumnToken = PreviousToken;
2467       break;
2468     }
2469     PreviousToken = Node.Tok;
2470
2471     // Grab the last newline preceding a token in this unwrapped line.
2472     if (Node.Tok->NewlinesBefore > 0) {
2473       MinColumnToken = Node.Tok;
2474     }
2475   }
2476   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2477     MinColumnToken = PreviousToken;
2478   }
2479
2480   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2481                               MinColumnToken);
2482 }
2483
2484 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2485   bool JustComments = Line->Tokens.empty();
2486   for (SmallVectorImpl<FormatToken *>::const_iterator
2487            I = CommentsBeforeNextToken.begin(),
2488            E = CommentsBeforeNextToken.end();
2489        I != E; ++I) {
2490     // Line comments that belong to the same line comment section are put on the
2491     // same line since later we might want to reflow content between them.
2492     // Additional fine-grained breaking of line comment sections is controlled
2493     // by the class BreakableLineCommentSection in case it is desirable to keep
2494     // several line comment sections in the same unwrapped line.
2495     //
2496     // FIXME: Consider putting separate line comment sections as children to the
2497     // unwrapped line instead.
2498     (*I)->ContinuesLineCommentSection =
2499         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2500     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2501       addUnwrappedLine();
2502     pushToken(*I);
2503   }
2504   if (NewlineBeforeNext && JustComments)
2505     addUnwrappedLine();
2506   CommentsBeforeNextToken.clear();
2507 }
2508
2509 void UnwrappedLineParser::nextToken(int LevelDifference) {
2510   if (eof())
2511     return;
2512   flushComments(isOnNewLine(*FormatTok));
2513   pushToken(FormatTok);
2514   FormatToken *Previous = FormatTok;
2515   if (Style.Language != FormatStyle::LK_JavaScript)
2516     readToken(LevelDifference);
2517   else
2518     readTokenWithJavaScriptASI();
2519   FormatTok->Previous = Previous;
2520 }
2521
2522 void UnwrappedLineParser::distributeComments(
2523     const SmallVectorImpl<FormatToken *> &Comments,
2524     const FormatToken *NextTok) {
2525   // Whether or not a line comment token continues a line is controlled by
2526   // the method continuesLineCommentSection, with the following caveat:
2527   //
2528   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2529   // that each comment line from the trail is aligned with the next token, if
2530   // the next token exists. If a trail exists, the beginning of the maximal
2531   // trail is marked as a start of a new comment section.
2532   //
2533   // For example in this code:
2534   //
2535   // int a; // line about a
2536   //   // line 1 about b
2537   //   // line 2 about b
2538   //   int b;
2539   //
2540   // the two lines about b form a maximal trail, so there are two sections, the
2541   // first one consisting of the single comment "// line about a" and the
2542   // second one consisting of the next two comments.
2543   if (Comments.empty())
2544     return;
2545   bool ShouldPushCommentsInCurrentLine = true;
2546   bool HasTrailAlignedWithNextToken = false;
2547   unsigned StartOfTrailAlignedWithNextToken = 0;
2548   if (NextTok) {
2549     // We are skipping the first element intentionally.
2550     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2551       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2552         HasTrailAlignedWithNextToken = true;
2553         StartOfTrailAlignedWithNextToken = i;
2554       }
2555     }
2556   }
2557   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2558     FormatToken *FormatTok = Comments[i];
2559     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2560       FormatTok->ContinuesLineCommentSection = false;
2561     } else {
2562       FormatTok->ContinuesLineCommentSection =
2563           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2564     }
2565     if (!FormatTok->ContinuesLineCommentSection &&
2566         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2567       ShouldPushCommentsInCurrentLine = false;
2568     }
2569     if (ShouldPushCommentsInCurrentLine) {
2570       pushToken(FormatTok);
2571     } else {
2572       CommentsBeforeNextToken.push_back(FormatTok);
2573     }
2574   }
2575 }
2576
2577 void UnwrappedLineParser::readToken(int LevelDifference) {
2578   SmallVector<FormatToken *, 1> Comments;
2579   do {
2580     FormatTok = Tokens->getNextToken();
2581     assert(FormatTok);
2582     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2583            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2584       distributeComments(Comments, FormatTok);
2585       Comments.clear();
2586       // If there is an unfinished unwrapped line, we flush the preprocessor
2587       // directives only after that unwrapped line was finished later.
2588       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2589       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2590       assert((LevelDifference >= 0 ||
2591               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2592              "LevelDifference makes Line->Level negative");
2593       Line->Level += LevelDifference;
2594       // Comments stored before the preprocessor directive need to be output
2595       // before the preprocessor directive, at the same level as the
2596       // preprocessor directive, as we consider them to apply to the directive.
2597       flushComments(isOnNewLine(*FormatTok));
2598       parsePPDirective();
2599     }
2600     while (FormatTok->Type == TT_ConflictStart ||
2601            FormatTok->Type == TT_ConflictEnd ||
2602            FormatTok->Type == TT_ConflictAlternative) {
2603       if (FormatTok->Type == TT_ConflictStart) {
2604         conditionalCompilationStart(/*Unreachable=*/false);
2605       } else if (FormatTok->Type == TT_ConflictAlternative) {
2606         conditionalCompilationAlternative();
2607       } else if (FormatTok->Type == TT_ConflictEnd) {
2608         conditionalCompilationEnd();
2609       }
2610       FormatTok = Tokens->getNextToken();
2611       FormatTok->MustBreakBefore = true;
2612     }
2613
2614     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2615         !Line->InPPDirective) {
2616       continue;
2617     }
2618
2619     if (!FormatTok->Tok.is(tok::comment)) {
2620       distributeComments(Comments, FormatTok);
2621       Comments.clear();
2622       return;
2623     }
2624
2625     Comments.push_back(FormatTok);
2626   } while (!eof());
2627
2628   distributeComments(Comments, nullptr);
2629   Comments.clear();
2630 }
2631
2632 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2633   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2634   if (MustBreakBeforeNextToken) {
2635     Line->Tokens.back().Tok->MustBreakBefore = true;
2636     MustBreakBeforeNextToken = false;
2637   }
2638 }
2639
2640 } // end namespace format
2641 } // end namespace clang