contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #include <algorithm>
  22
  23 #define DEBUG_TYPE "format-parser"
  24
  25 namespace clang {
  26 namespace format {
  27
  28 class FormatTokenSource {
  29 public:
  30   virtual ~FormatTokenSource() {}
  31   virtual FormatToken *getNextToken() = 0;
  32
  33   virtual unsigned getPosition() = 0;
  34   virtual FormatToken *setPosition(unsigned Position) = 0;
  35 };
  36
  37 namespace {
  38
  39 class ScopedDeclarationState {
  40 public:
  41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  42                          bool MustBeDeclaration)
  43       : Line(Line), Stack(Stack) {
  44     Line.MustBeDeclaration = MustBeDeclaration;
  45     Stack.push_back(MustBeDeclaration);
  46   }
  47   ~ScopedDeclarationState() {
  48     Stack.pop_back();
  49     if (!Stack.empty())
  50       Line.MustBeDeclaration = Stack.back();
  51     else
  52       Line.MustBeDeclaration = true;
  53   }
  54
  55 private:
  56   UnwrappedLine &Line;
  57   std::vector<bool> &Stack;
  58 };
  59
  60 static bool isLineComment(const FormatToken &FormatTok) {
  61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  62 }
  63
  64 // Checks if \p FormatTok is a line comment that continues the line comment
  65 // \p Previous. The original column of \p MinColumnToken is used to determine
  66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
  67 static bool continuesLineComment(const FormatToken &FormatTok,
  68                                  const FormatToken *Previous,
  69                                  const FormatToken *MinColumnToken) {
  70   if (!Previous || !MinColumnToken)
  71     return false;
  72   unsigned MinContinueColumn =
  73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  75          isLineComment(*Previous) &&
  76          FormatTok.OriginalColumn >= MinContinueColumn;
  77 }
  78
  79 class ScopedMacroState : public FormatTokenSource {
  80 public:
  81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  82                    FormatToken *&ResetToken)
  83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  85         Token(nullptr), PreviousToken(nullptr) {
  86     FakeEOF.Tok.startToken();
  87     FakeEOF.Tok.setKind(tok::eof);
  88     TokenSource = this;
  89     Line.Level = 0;
  90     Line.InPPDirective = true;
  91   }
  92
  93   ~ScopedMacroState() override {
  94     TokenSource = PreviousTokenSource;
  95     ResetToken = Token;
  96     Line.InPPDirective = false;
  97     Line.Level = PreviousLineLevel;
  98   }
  99
 100   FormatToken *getNextToken() override {
 101     // The \c UnwrappedLineParser guards against this by never calling
 102     // \c getNextToken() after it has encountered the first eof token.
 103     assert(!eof());
 104     PreviousToken = Token;
 105     Token = PreviousTokenSource->getNextToken();
 106     if (eof())
 107       return &FakeEOF;
 108     return Token;
 109   }
 110
 111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
 112
 113   FormatToken *setPosition(unsigned Position) override {
 114     PreviousToken = nullptr;
 115     Token = PreviousTokenSource->setPosition(Position);
 116     return Token;
 117   }
 118
 119 private:
 120   bool eof() {
 121     return Token && Token->HasUnescapedNewline &&
 122            !continuesLineComment(*Token, PreviousToken,
 123                                  /*MinColumnToken=*/PreviousToken);
 124   }
 125
 126   FormatToken FakeEOF;
 127   UnwrappedLine &Line;
 128   FormatTokenSource *&TokenSource;
 129   FormatToken *&ResetToken;
 130   unsigned PreviousLineLevel;
 131   FormatTokenSource *PreviousTokenSource;
 132
 133   FormatToken *Token;
 134   FormatToken *PreviousToken;
 135 };
 136
 137 } // end anonymous namespace
 138
 139 class ScopedLineState {
 140 public:
 141   ScopedLineState(UnwrappedLineParser &Parser,
 142                   bool SwitchToPreprocessorLines = false)
 143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 144     if (SwitchToPreprocessorLines)
 145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 146     else if (!Parser.Line->Tokens.empty())
 147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 148     PreBlockLine = std::move(Parser.Line);
 149     Parser.Line = llvm::make_unique<UnwrappedLine>();
 150     Parser.Line->Level = PreBlockLine->Level;
 151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 152   }
 153
 154   ~ScopedLineState() {
 155     if (!Parser.Line->Tokens.empty()) {
 156       Parser.addUnwrappedLine();
 157     }
 158     assert(Parser.Line->Tokens.empty());
 159     Parser.Line = std::move(PreBlockLine);
 160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 161       Parser.MustBreakBeforeNextToken = true;
 162     Parser.CurrentLines = OriginalLines;
 163   }
 164
 165 private:
 166   UnwrappedLineParser &Parser;
 167
 168   std::unique_ptr<UnwrappedLine> PreBlockLine;
 169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 170 };
 171
 172 class CompoundStatementIndenter {
 173 public:
 174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 175                             const FormatStyle &Style, unsigned &LineLevel)
 176       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 177     if (Style.BraceWrapping.AfterControlStatement)
 178       Parser->addUnwrappedLine();
 179     if (Style.BraceWrapping.IndentBraces)
 180       ++LineLevel;
 181   }
 182   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 183
 184 private:
 185   unsigned &LineLevel;
 186   unsigned OldLineLevel;
 187 };
 188
 189 namespace {
 190
 191 class IndexedTokenSource : public FormatTokenSource {
 192 public:
 193   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 194       : Tokens(Tokens), Position(-1) {}
 195
 196   FormatToken *getNextToken() override {
 197     ++Position;
 198     return Tokens[Position];
 199   }
 200
 201   unsigned getPosition() override {
 202     assert(Position >= 0);
 203     return Position;
 204   }
 205
 206   FormatToken *setPosition(unsigned P) override {
 207     Position = P;
 208     return Tokens[Position];
 209   }
 210
 211   void reset() { Position = -1; }
 212
 213 private:
 214   ArrayRef<FormatToken *> Tokens;
 215   int Position;
 216 };
 217
 218 } // end anonymous namespace
 219
 220 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 221                                          const AdditionalKeywords &Keywords,
 222                                          unsigned FirstStartColumn,
 223                                          ArrayRef<FormatToken *> Tokens,
 224                                          UnwrappedLineConsumer &Callback)
 225     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 226       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 227       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 228       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 229       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 230                        ? IG_Rejected
 231                        : IG_Inited),
 232       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
 233
 234 void UnwrappedLineParser::reset() {
 235   PPBranchLevel = -1;
 236   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 237                      ? IG_Rejected
 238                      : IG_Inited;
 239   IncludeGuardToken = nullptr;
 240   Line.reset(new UnwrappedLine);
 241   CommentsBeforeNextToken.clear();
 242   FormatTok = nullptr;
 243   MustBreakBeforeNextToken = false;
 244   PreprocessorDirectives.clear();
 245   CurrentLines = &Lines;
 246   DeclarationScopeStack.clear();
 247   PPStack.clear();
 248   Line->FirstStartColumn = FirstStartColumn;
 249 }
 250
 251 void UnwrappedLineParser::parse() {
 252   IndexedTokenSource TokenSource(AllTokens);
 253   Line->FirstStartColumn = FirstStartColumn;
 254   do {
 255     LLVM_DEBUG(llvm::dbgs() << "----\n");
 256     reset();
 257     Tokens = &TokenSource;
 258     TokenSource.reset();
 259
 260     readToken();
 261     parseFile();
 262
 263     // If we found an include guard then all preprocessor directives (other than
 264     // the guard) are over-indented by one.
 265     if (IncludeGuard == IG_Found)
 266       for (auto &Line : Lines)
 267         if (Line.InPPDirective && Line.Level > 0)
 268           --Line.Level;
 269
 270     // Create line with eof token.
 271     pushToken(FormatTok);
 272     addUnwrappedLine();
 273
 274     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 275                                                   E = Lines.end();
 276          I != E; ++I) {
 277       Callback.consumeUnwrappedLine(*I);
 278     }
 279     Callback.finishRun();
 280     Lines.clear();
 281     while (!PPLevelBranchIndex.empty() &&
 282            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 283       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 284       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 285     }
 286     if (!PPLevelBranchIndex.empty()) {
 287       ++PPLevelBranchIndex.back();
 288       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 289       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 290     }
 291   } while (!PPLevelBranchIndex.empty());
 292 }
 293
 294 void UnwrappedLineParser::parseFile() {
 295   // The top-level context in a file always has declarations, except for pre-
 296   // processor directives and JavaScript files.
 297   bool MustBeDeclaration =
 298       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 299   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 300                                           MustBeDeclaration);
 301   if (Style.Language == FormatStyle::LK_TextProto)
 302     parseBracedList();
 303   else
 304     parseLevel(/*HasOpeningBrace=*/false);
 305   // Make sure to format the remaining tokens.
 306   //
 307   // LK_TextProto is special since its top-level is parsed as the body of a
 308   // braced list, which does not necessarily have natural line separators such
 309   // as a semicolon. Comments after the last entry that have been determined to
 310   // not belong to that line, as in:
 311   //   key: value
 312   //   // endfile comment
 313   // do not have a chance to be put on a line of their own until this point.
 314   // Here we add this newline before end-of-file comments.
 315   if (Style.Language == FormatStyle::LK_TextProto &&
 316       !CommentsBeforeNextToken.empty())
 317     addUnwrappedLine();
 318   flushComments(true);
 319   addUnwrappedLine();
 320 }
 321
 322 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 323   bool SwitchLabelEncountered = false;
 324   do {
 325     tok::TokenKind kind = FormatTok->Tok.getKind();
 326     if (FormatTok->Type == TT_MacroBlockBegin) {
 327       kind = tok::l_brace;
 328     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 329       kind = tok::r_brace;
 330     }
 331
 332     switch (kind) {
 333     case tok::comment:
 334       nextToken();
 335       addUnwrappedLine();
 336       break;
 337     case tok::l_brace:
 338       // FIXME: Add parameter whether this can happen - if this happens, we must
 339       // be in a non-declaration context.
 340       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
 341         continue;
 342       parseBlock(/*MustBeDeclaration=*/false);
 343       addUnwrappedLine();
 344       break;
 345     case tok::r_brace:
 346       if (HasOpeningBrace)
 347         return;
 348       nextToken();
 349       addUnwrappedLine();
 350       break;
 351     case tok::kw_default: {
 352       unsigned StoredPosition = Tokens->getPosition();
 353       FormatToken *Next;
 354       do {
 355         Next = Tokens->getNextToken();
 356       } while (Next && Next->is(tok::comment));
 357       FormatTok = Tokens->setPosition(StoredPosition);
 358       if (Next && Next->isNot(tok::colon)) {
 359         // default not followed by ':' is not a case label; treat it like
 360         // an identifier.
 361         parseStructuralElement();
 362         break;
 363       }
 364       // Else, if it is 'default:', fall through to the case handling.
 365       LLVM_FALLTHROUGH;
 366     }
 367     case tok::kw_case:
 368       if (Style.Language == FormatStyle::LK_JavaScript &&
 369           Line->MustBeDeclaration) {
 370         // A 'case: string' style field declaration.
 371         parseStructuralElement();
 372         break;
 373       }
 374       if (!SwitchLabelEncountered &&
 375           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 376         ++Line->Level;
 377       SwitchLabelEncountered = true;
 378       parseStructuralElement();
 379       break;
 380     default:
 381       parseStructuralElement();
 382       break;
 383     }
 384   } while (!eof());
 385 }
 386
 387 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 388   // We'll parse forward through the tokens until we hit
 389   // a closing brace or eof - note that getNextToken() will
 390   // parse macros, so this will magically work inside macro
 391   // definitions, too.
 392   unsigned StoredPosition = Tokens->getPosition();
 393   FormatToken *Tok = FormatTok;
 394   const FormatToken *PrevTok = Tok->Previous;
 395   // Keep a stack of positions of lbrace tokens. We will
 396   // update information about whether an lbrace starts a
 397   // braced init list or a different block during the loop.
 398   SmallVector<FormatToken *, 8> LBraceStack;
 399   assert(Tok->Tok.is(tok::l_brace));
 400   do {
 401     // Get next non-comment token.
 402     FormatToken *NextTok;
 403     unsigned ReadTokens = 0;
 404     do {
 405       NextTok = Tokens->getNextToken();
 406       ++ReadTokens;
 407     } while (NextTok->is(tok::comment));
 408
 409     switch (Tok->Tok.getKind()) {
 410     case tok::l_brace:
 411       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
 412         if (PrevTok->isOneOf(tok::colon, tok::less))
 413           // A ':' indicates this code is in a type, or a braced list
 414           // following a label in an object literal ({a: {b: 1}}).
 415           // A '<' could be an object used in a comparison, but that is nonsense
 416           // code (can never return true), so more likely it is a generic type
 417           // argument (`X<{a: string; b: number}>`).
 418           // The code below could be confused by semicolons between the
 419           // individual members in a type member list, which would normally
 420           // trigger BK_Block. In both cases, this must be parsed as an inline
 421           // braced init.
 422           Tok->BlockKind = BK_BracedInit;
 423         else if (PrevTok->is(tok::r_paren))
 424           // `) { }` can only occur in function or method declarations in JS.
 425           Tok->BlockKind = BK_Block;
 426       } else {
 427         Tok->BlockKind = BK_Unknown;
 428       }
 429       LBraceStack.push_back(Tok);
 430       break;
 431     case tok::r_brace:
 432       if (LBraceStack.empty())
 433         break;
 434       if (LBraceStack.back()->BlockKind == BK_Unknown) {
 435         bool ProbablyBracedList = false;
 436         if (Style.Language == FormatStyle::LK_Proto) {
 437           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 438         } else {
 439           // Using OriginalColumn to distinguish between ObjC methods and
 440           // binary operators is a bit hacky.
 441           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 442                                   NextTok->OriginalColumn == 0;
 443
 444           // If there is a comma, semicolon or right paren after the closing
 445           // brace, we assume this is a braced initializer list.  Note that
 446           // regardless how we mark inner braces here, we will overwrite the
 447           // BlockKind later if we parse a braced list (where all blocks
 448           // inside are by default braced lists), or when we explicitly detect
 449           // blocks (for example while parsing lambdas).
 450           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 451           // braced list in JS.
 452           ProbablyBracedList =
 453               (Style.Language == FormatStyle::LK_JavaScript &&
 454                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 455                                 Keywords.kw_as)) ||
 456               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
 457               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 458                                tok::r_paren, tok::r_square, tok::l_brace,
 459                                tok::ellipsis) ||
 460               (NextTok->is(tok::identifier) &&
 461                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
 462               (NextTok->is(tok::semi) &&
 463                (!ExpectClassBody || LBraceStack.size() != 1)) ||
 464               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 465           if (NextTok->is(tok::l_square)) {
 466             // We can have an array subscript after a braced init
 467             // list, but C++11 attributes are expected after blocks.
 468             NextTok = Tokens->getNextToken();
 469             ++ReadTokens;
 470             ProbablyBracedList = NextTok->isNot(tok::l_square);
 471           }
 472         }
 473         if (ProbablyBracedList) {
 474           Tok->BlockKind = BK_BracedInit;
 475           LBraceStack.back()->BlockKind = BK_BracedInit;
 476         } else {
 477           Tok->BlockKind = BK_Block;
 478           LBraceStack.back()->BlockKind = BK_Block;
 479         }
 480       }
 481       LBraceStack.pop_back();
 482       break;
 483     case tok::identifier:
 484       if (!Tok->is(TT_StatementMacro))
 485           break;
 486       LLVM_FALLTHROUGH;
 487     case tok::at:
 488     case tok::semi:
 489     case tok::kw_if:
 490     case tok::kw_while:
 491     case tok::kw_for:
 492     case tok::kw_switch:
 493     case tok::kw_try:
 494     case tok::kw___try:
 495       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
 496         LBraceStack.back()->BlockKind = BK_Block;
 497       break;
 498     default:
 499       break;
 500     }
 501     PrevTok = Tok;
 502     Tok = NextTok;
 503   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 504
 505   // Assume other blocks for all unclosed opening braces.
 506   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 507     if (LBraceStack[i]->BlockKind == BK_Unknown)
 508       LBraceStack[i]->BlockKind = BK_Block;
 509   }
 510
 511   FormatTok = Tokens->setPosition(StoredPosition);
 512 }
 513
 514 template <class T>
 515 static inline void hash_combine(std::size_t &seed, const T &v) {
 516   std::hash<T> hasher;
 517   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 518 }
 519
 520 size_t UnwrappedLineParser::computePPHash() const {
 521   size_t h = 0;
 522   for (const auto &i : PPStack) {
 523     hash_combine(h, size_t(i.Kind));
 524     hash_combine(h, i.Line);
 525   }
 526   return h;
 527 }
 528
 529 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 530                                      bool MunchSemi) {
 531   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 532          "'{' or macro block token expected");
 533   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 534   FormatTok->BlockKind = BK_Block;
 535
 536   size_t PPStartHash = computePPHash();
 537
 538   unsigned InitialLevel = Line->Level;
 539   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
 540
 541   if (MacroBlock && FormatTok->is(tok::l_paren))
 542     parseParens();
 543
 544   size_t NbPreprocessorDirectives =
 545       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
 546   addUnwrappedLine();
 547   size_t OpeningLineIndex =
 548       CurrentLines->empty()
 549           ? (UnwrappedLine::kInvalidIndex)
 550           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 551
 552   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 553                                           MustBeDeclaration);
 554   if (AddLevel)
 555     ++Line->Level;
 556   parseLevel(/*HasOpeningBrace=*/true);
 557
 558   if (eof())
 559     return;
 560
 561   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 562                  : !FormatTok->is(tok::r_brace)) {
 563     Line->Level = InitialLevel;
 564     FormatTok->BlockKind = BK_Block;
 565     return;
 566   }
 567
 568   size_t PPEndHash = computePPHash();
 569
 570   // Munch the closing brace.
 571   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
 572
 573   if (MacroBlock && FormatTok->is(tok::l_paren))
 574     parseParens();
 575
 576   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 577     nextToken();
 578   Line->Level = InitialLevel;
 579
 580   if (PPStartHash == PPEndHash) {
 581     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 582     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 583       // Update the opening line to add the forward reference as well
 584       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 585           CurrentLines->size() - 1;
 586     }
 587   }
 588 }
 589
 590 static bool isGoogScope(const UnwrappedLine &Line) {
 591   // FIXME: Closure-library specific stuff should not be hard-coded but be
 592   // configurable.
 593   if (Line.Tokens.size() < 4)
 594     return false;
 595   auto I = Line.Tokens.begin();
 596   if (I->Tok->TokenText != "goog")
 597     return false;
 598   ++I;
 599   if (I->Tok->isNot(tok::period))
 600     return false;
 601   ++I;
 602   if (I->Tok->TokenText != "scope")
 603     return false;
 604   ++I;
 605   return I->Tok->is(tok::l_paren);
 606 }
 607
 608 static bool isIIFE(const UnwrappedLine &Line,
 609                    const AdditionalKeywords &Keywords) {
 610   // Look for the start of an immediately invoked anonymous function.
 611   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 612   // This is commonly done in JavaScript to create a new, anonymous scope.
 613   // Example: (function() { ... })()
 614   if (Line.Tokens.size() < 3)
 615     return false;
 616   auto I = Line.Tokens.begin();
 617   if (I->Tok->isNot(tok::l_paren))
 618     return false;
 619   ++I;
 620   if (I->Tok->isNot(Keywords.kw_function))
 621     return false;
 622   ++I;
 623   return I->Tok->is(tok::l_paren);
 624 }
 625
 626 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 627                                    const FormatToken &InitialToken) {
 628   if (InitialToken.is(tok::kw_namespace))
 629     return Style.BraceWrapping.AfterNamespace;
 630   if (InitialToken.is(tok::kw_class))
 631     return Style.BraceWrapping.AfterClass;
 632   if (InitialToken.is(tok::kw_union))
 633     return Style.BraceWrapping.AfterUnion;
 634   if (InitialToken.is(tok::kw_struct))
 635     return Style.BraceWrapping.AfterStruct;
 636   return false;
 637 }
 638
 639 void UnwrappedLineParser::parseChildBlock() {
 640   FormatTok->BlockKind = BK_Block;
 641   nextToken();
 642   {
 643     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
 644                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 645     ScopedLineState LineState(*this);
 646     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 647                                             /*MustBeDeclaration=*/false);
 648     Line->Level += SkipIndent ? 0 : 1;
 649     parseLevel(/*HasOpeningBrace=*/true);
 650     flushComments(isOnNewLine(*FormatTok));
 651     Line->Level -= SkipIndent ? 0 : 1;
 652   }
 653   nextToken();
 654 }
 655
 656 void UnwrappedLineParser::parsePPDirective() {
 657   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 658   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 659   nextToken();
 660
 661   if (!FormatTok->Tok.getIdentifierInfo()) {
 662     parsePPUnknown();
 663     return;
 664   }
 665
 666   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 667   case tok::pp_define:
 668     parsePPDefine();
 669     return;
 670   case tok::pp_if:
 671     parsePPIf(/*IfDef=*/false);
 672     break;
 673   case tok::pp_ifdef:
 674   case tok::pp_ifndef:
 675     parsePPIf(/*IfDef=*/true);
 676     break;
 677   case tok::pp_else:
 678     parsePPElse();
 679     break;
 680   case tok::pp_elif:
 681     parsePPElIf();
 682     break;
 683   case tok::pp_endif:
 684     parsePPEndIf();
 685     break;
 686   default:
 687     parsePPUnknown();
 688     break;
 689   }
 690 }
 691
 692 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 693   size_t Line = CurrentLines->size();
 694   if (CurrentLines == &PreprocessorDirectives)
 695     Line += Lines.size();
 696
 697   if (Unreachable ||
 698       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
 699     PPStack.push_back({PP_Unreachable, Line});
 700   else
 701     PPStack.push_back({PP_Conditional, Line});
 702 }
 703
 704 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 705   ++PPBranchLevel;
 706   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 707   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 708     PPLevelBranchIndex.push_back(0);
 709     PPLevelBranchCount.push_back(0);
 710   }
 711   PPChainBranchIndex.push(0);
 712   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 713   conditionalCompilationCondition(Unreachable || Skip);
 714 }
 715
 716 void UnwrappedLineParser::conditionalCompilationAlternative() {
 717   if (!PPStack.empty())
 718     PPStack.pop_back();
 719   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 720   if (!PPChainBranchIndex.empty())
 721     ++PPChainBranchIndex.top();
 722   conditionalCompilationCondition(
 723       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 724       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 725 }
 726
 727 void UnwrappedLineParser::conditionalCompilationEnd() {
 728   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 729   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 730     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 731       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 732     }
 733   }
 734   // Guard against #endif's without #if.
 735   if (PPBranchLevel > -1)
 736     --PPBranchLevel;
 737   if (!PPChainBranchIndex.empty())
 738     PPChainBranchIndex.pop();
 739   if (!PPStack.empty())
 740     PPStack.pop_back();
 741 }
 742
 743 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 744   bool IfNDef = FormatTok->is(tok::pp_ifndef);
 745   nextToken();
 746   bool Unreachable = false;
 747   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
 748     Unreachable = true;
 749   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
 750     Unreachable = true;
 751   conditionalCompilationStart(Unreachable);
 752   FormatToken *IfCondition = FormatTok;
 753   // If there's a #ifndef on the first line, and the only lines before it are
 754   // comments, it could be an include guard.
 755   bool MaybeIncludeGuard = IfNDef;
 756   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
 757     for (auto &Line : Lines) {
 758       if (!Line.Tokens.front().Tok->is(tok::comment)) {
 759         MaybeIncludeGuard = false;
 760         IncludeGuard = IG_Rejected;
 761         break;
 762       }
 763     }
 764   --PPBranchLevel;
 765   parsePPUnknown();
 766   ++PPBranchLevel;
 767   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
 768     IncludeGuard = IG_IfNdefed;
 769     IncludeGuardToken = IfCondition;
 770   }
 771 }
 772
 773 void UnwrappedLineParser::parsePPElse() {
 774   // If a potential include guard has an #else, it's not an include guard.
 775   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
 776     IncludeGuard = IG_Rejected;
 777   conditionalCompilationAlternative();
 778   if (PPBranchLevel > -1)
 779     --PPBranchLevel;
 780   parsePPUnknown();
 781   ++PPBranchLevel;
 782 }
 783
 784 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 785
 786 void UnwrappedLineParser::parsePPEndIf() {
 787   conditionalCompilationEnd();
 788   parsePPUnknown();
 789   // If the #endif of a potential include guard is the last thing in the file,
 790   // then we found an include guard.
 791   unsigned TokenPosition = Tokens->getPosition();
 792   FormatToken *PeekNext = AllTokens[TokenPosition];
 793   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
 794       PeekNext->is(tok::eof) &&
 795       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 796     IncludeGuard = IG_Found;
 797 }
 798
 799 void UnwrappedLineParser::parsePPDefine() {
 800   nextToken();
 801
 802   if (FormatTok->Tok.getKind() != tok::identifier) {
 803     IncludeGuard = IG_Rejected;
 804     IncludeGuardToken = nullptr;
 805     parsePPUnknown();
 806     return;
 807   }
 808
 809   if (IncludeGuard == IG_IfNdefed &&
 810       IncludeGuardToken->TokenText == FormatTok->TokenText) {
 811     IncludeGuard = IG_Defined;
 812     IncludeGuardToken = nullptr;
 813     for (auto &Line : Lines) {
 814       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
 815         IncludeGuard = IG_Rejected;
 816         break;
 817       }
 818     }
 819   }
 820
 821   nextToken();
 822   if (FormatTok->Tok.getKind() == tok::l_paren &&
 823       FormatTok->WhitespaceRange.getBegin() ==
 824           FormatTok->WhitespaceRange.getEnd()) {
 825     parseParens();
 826   }
 827   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
 828     Line->Level += PPBranchLevel + 1;
 829   addUnwrappedLine();
 830   ++Line->Level;
 831
 832   // Errors during a preprocessor directive can only affect the layout of the
 833   // preprocessor directive, and thus we ignore them. An alternative approach
 834   // would be to use the same approach we use on the file level (no
 835   // re-indentation if there was a structural error) within the macro
 836   // definition.
 837   parseFile();
 838 }
 839
 840 void UnwrappedLineParser::parsePPUnknown() {
 841   do {
 842     nextToken();
 843   } while (!eof());
 844   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
 845     Line->Level += PPBranchLevel + 1;
 846   addUnwrappedLine();
 847 }
 848
 849 // Here we blacklist certain tokens that are not usually the first token in an
 850 // unwrapped line. This is used in attempt to distinguish macro calls without
 851 // trailing semicolons from other constructs split to several lines.
 852 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 853   // Semicolon can be a null-statement, l_square can be a start of a macro or
 854   // a C++11 attribute, but this doesn't seem to be common.
 855   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 856          Tok.isNot(tok::l_square) &&
 857          // Tokens that can only be used as binary operators and a part of
 858          // overloaded operator names.
 859          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 860          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 861          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 862          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 863          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 864          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 865          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 866          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 867          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 868          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 869          Tok.isNot(tok::lesslessequal) &&
 870          // Colon is used in labels, base class lists, initializer lists,
 871          // range-based for loops, ternary operator, but should never be the
 872          // first token in an unwrapped line.
 873          Tok.isNot(tok::colon) &&
 874          // 'noexcept' is a trailing annotation.
 875          Tok.isNot(tok::kw_noexcept);
 876 }
 877
 878 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
 879                           const FormatToken *FormatTok) {
 880   // FIXME: This returns true for C/C++ keywords like 'struct'.
 881   return FormatTok->is(tok::identifier) &&
 882          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
 883           !FormatTok->isOneOf(
 884               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
 885               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
 886               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
 887               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
 888               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
 889               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
 890               Keywords.kw_from));
 891 }
 892
 893 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
 894                                  const FormatToken *FormatTok) {
 895   return FormatTok->Tok.isLiteral() ||
 896          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
 897          mustBeJSIdent(Keywords, FormatTok);
 898 }
 899
 900 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
 901 // when encountered after a value (see mustBeJSIdentOrValue).
 902 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
 903                            const FormatToken *FormatTok) {
 904   return FormatTok->isOneOf(
 905       tok::kw_return, Keywords.kw_yield,
 906       // conditionals
 907       tok::kw_if, tok::kw_else,
 908       // loops
 909       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
 910       // switch/case
 911       tok::kw_switch, tok::kw_case,
 912       // exceptions
 913       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
 914       // declaration
 915       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
 916       Keywords.kw_async, Keywords.kw_function,
 917       // import/export
 918       Keywords.kw_import, tok::kw_export);
 919 }
 920
 921 // readTokenWithJavaScriptASI reads the next token and terminates the current
 922 // line if JavaScript Automatic Semicolon Insertion must
 923 // happen between the current token and the next token.
 924 //
 925 // This method is conservative - it cannot cover all edge cases of JavaScript,
 926 // but only aims to correctly handle certain well known cases. It *must not*
 927 // return true in speculative cases.
 928 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 929   FormatToken *Previous = FormatTok;
 930   readToken();
 931   FormatToken *Next = FormatTok;
 932
 933   bool IsOnSameLine =
 934       CommentsBeforeNextToken.empty()
 935           ? Next->NewlinesBefore == 0
 936           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
 937   if (IsOnSameLine)
 938     return;
 939
 940   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
 941   bool PreviousStartsTemplateExpr =
 942       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
 943   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
 944     // If the line contains an '@' sign, the previous token might be an
 945     // annotation, which can precede another identifier/value.
 946     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
 947                               [](UnwrappedLineNode &LineNode) {
 948                                 return LineNode.Tok->is(tok::at);
 949                               }) != Line->Tokens.end();
 950     if (HasAt)
 951       return;
 952   }
 953   if (Next->is(tok::exclaim) && PreviousMustBeValue)
 954     return addUnwrappedLine();
 955   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
 956   bool NextEndsTemplateExpr =
 957       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
 958   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
 959       (PreviousMustBeValue ||
 960        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
 961                          tok::minusminus)))
 962     return addUnwrappedLine();
 963   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
 964       isJSDeclOrStmt(Keywords, Next))
 965     return addUnwrappedLine();
 966 }
 967
 968 void UnwrappedLineParser::parseStructuralElement() {
 969   assert(!FormatTok->is(tok::l_brace));
 970   if (Style.Language == FormatStyle::LK_TableGen &&
 971       FormatTok->is(tok::pp_include)) {
 972     nextToken();
 973     if (FormatTok->is(tok::string_literal))
 974       nextToken();
 975     addUnwrappedLine();
 976     return;
 977   }
 978   switch (FormatTok->Tok.getKind()) {
 979   case tok::kw_asm:
 980     nextToken();
 981     if (FormatTok->is(tok::l_brace)) {
 982       FormatTok->Type = TT_InlineASMBrace;
 983       nextToken();
 984       while (FormatTok && FormatTok->isNot(tok::eof)) {
 985         if (FormatTok->is(tok::r_brace)) {
 986           FormatTok->Type = TT_InlineASMBrace;
 987           nextToken();
 988           addUnwrappedLine();
 989           break;
 990         }
 991         FormatTok->Finalized = true;
 992         nextToken();
 993       }
 994     }
 995     break;
 996   case tok::kw_namespace:
 997     parseNamespace();
 998     return;
 999   case tok::kw_public:
1000   case tok::kw_protected:
1001   case tok::kw_private:
1002     if (Style.Language == FormatStyle::LK_Java ||
1003         Style.Language == FormatStyle::LK_JavaScript)
1004       nextToken();
1005     else
1006       parseAccessSpecifier();
1007     return;
1008   case tok::kw_if:
1009     parseIfThenElse();
1010     return;
1011   case tok::kw_for:
1012   case tok::kw_while:
1013     parseForOrWhileLoop();
1014     return;
1015   case tok::kw_do:
1016     parseDoWhile();
1017     return;
1018   case tok::kw_switch:
1019     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1020       // 'switch: string' field declaration.
1021       break;
1022     parseSwitch();
1023     return;
1024   case tok::kw_default:
1025     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026       // 'default: string' field declaration.
1027       break;
1028     nextToken();
1029     if (FormatTok->is(tok::colon)) {
1030       parseLabel();
1031       return;
1032     }
1033     // e.g. "default void f() {}" in a Java interface.
1034     break;
1035   case tok::kw_case:
1036     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1037       // 'case: string' field declaration.
1038       break;
1039     parseCaseLabel();
1040     return;
1041   case tok::kw_try:
1042   case tok::kw___try:
1043     parseTryCatch();
1044     return;
1045   case tok::kw_extern:
1046     nextToken();
1047     if (FormatTok->Tok.is(tok::string_literal)) {
1048       nextToken();
1049       if (FormatTok->Tok.is(tok::l_brace)) {
1050         if (Style.BraceWrapping.AfterExternBlock) {
1051           addUnwrappedLine();
1052           parseBlock(/*MustBeDeclaration=*/true);
1053         } else {
1054           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1055         }
1056         addUnwrappedLine();
1057         return;
1058       }
1059     }
1060     break;
1061   case tok::kw_export:
1062     if (Style.Language == FormatStyle::LK_JavaScript) {
1063       parseJavaScriptEs6ImportExport();
1064       return;
1065     }
1066     if (!Style.isCpp())
1067       break;
1068     // Handle C++ "(inline|export) namespace".
1069     LLVM_FALLTHROUGH;
1070   case tok::kw_inline:
1071     nextToken();
1072     if (FormatTok->Tok.is(tok::kw_namespace)) {
1073       parseNamespace();
1074       return;
1075     }
1076     break;
1077   case tok::identifier:
1078     if (FormatTok->is(TT_ForEachMacro)) {
1079       parseForOrWhileLoop();
1080       return;
1081     }
1082     if (FormatTok->is(TT_MacroBlockBegin)) {
1083       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1084                  /*MunchSemi=*/false);
1085       return;
1086     }
1087     if (FormatTok->is(Keywords.kw_import)) {
1088       if (Style.Language == FormatStyle::LK_JavaScript) {
1089         parseJavaScriptEs6ImportExport();
1090         return;
1091       }
1092       if (Style.Language == FormatStyle::LK_Proto) {
1093         nextToken();
1094         if (FormatTok->is(tok::kw_public))
1095           nextToken();
1096         if (!FormatTok->is(tok::string_literal))
1097           return;
1098         nextToken();
1099         if (FormatTok->is(tok::semi))
1100           nextToken();
1101         addUnwrappedLine();
1102         return;
1103       }
1104     }
1105     if (Style.isCpp() &&
1106         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1107                            Keywords.kw_slots, Keywords.kw_qslots)) {
1108       nextToken();
1109       if (FormatTok->is(tok::colon)) {
1110         nextToken();
1111         addUnwrappedLine();
1112         return;
1113       }
1114     }
1115     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1116       parseStatementMacro();
1117       return;
1118     }
1119     // In all other cases, parse the declaration.
1120     break;
1121   default:
1122     break;
1123   }
1124   do {
1125     const FormatToken *Previous = FormatTok->Previous;
1126     switch (FormatTok->Tok.getKind()) {
1127     case tok::at:
1128       nextToken();
1129       if (FormatTok->Tok.is(tok::l_brace)) {
1130         nextToken();
1131         parseBracedList();
1132         break;
1133       } else if (Style.Language == FormatStyle::LK_Java &&
1134                  FormatTok->is(Keywords.kw_interface)) {
1135         nextToken();
1136         break;
1137       }
1138       switch (FormatTok->Tok.getObjCKeywordID()) {
1139       case tok::objc_public:
1140       case tok::objc_protected:
1141       case tok::objc_package:
1142       case tok::objc_private:
1143         return parseAccessSpecifier();
1144       case tok::objc_interface:
1145       case tok::objc_implementation:
1146         return parseObjCInterfaceOrImplementation();
1147       case tok::objc_protocol:
1148         if (parseObjCProtocol())
1149           return;
1150         break;
1151       case tok::objc_end:
1152         return; // Handled by the caller.
1153       case tok::objc_optional:
1154       case tok::objc_required:
1155         nextToken();
1156         addUnwrappedLine();
1157         return;
1158       case tok::objc_autoreleasepool:
1159         nextToken();
1160         if (FormatTok->Tok.is(tok::l_brace)) {
1161           if (Style.BraceWrapping.AfterControlStatement)
1162             addUnwrappedLine();
1163           parseBlock(/*MustBeDeclaration=*/false);
1164         }
1165         addUnwrappedLine();
1166         return;
1167       case tok::objc_synchronized:
1168         nextToken();
1169         if (FormatTok->Tok.is(tok::l_paren))
1170            // Skip synchronization object
1171            parseParens();
1172         if (FormatTok->Tok.is(tok::l_brace)) {
1173           if (Style.BraceWrapping.AfterControlStatement)
1174             addUnwrappedLine();
1175           parseBlock(/*MustBeDeclaration=*/false);
1176         }
1177         addUnwrappedLine();
1178         return;
1179       case tok::objc_try:
1180         // This branch isn't strictly necessary (the kw_try case below would
1181         // do this too after the tok::at is parsed above).  But be explicit.
1182         parseTryCatch();
1183         return;
1184       default:
1185         break;
1186       }
1187       break;
1188     case tok::kw_enum:
1189       // Ignore if this is part of "template <enum ...".
1190       if (Previous && Previous->is(tok::less)) {
1191         nextToken();
1192         break;
1193       }
1194
1195       // parseEnum falls through and does not yet add an unwrapped line as an
1196       // enum definition can start a structural element.
1197       if (!parseEnum())
1198         break;
1199       // This only applies for C++.
1200       if (!Style.isCpp()) {
1201         addUnwrappedLine();
1202         return;
1203       }
1204       break;
1205     case tok::kw_typedef:
1206       nextToken();
1207       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1208                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1209         parseEnum();
1210       break;
1211     case tok::kw_struct:
1212     case tok::kw_union:
1213     case tok::kw_class:
1214       // parseRecord falls through and does not yet add an unwrapped line as a
1215       // record declaration or definition can start a structural element.
1216       parseRecord();
1217       // This does not apply for Java and JavaScript.
1218       if (Style.Language == FormatStyle::LK_Java ||
1219           Style.Language == FormatStyle::LK_JavaScript) {
1220         if (FormatTok->is(tok::semi))
1221           nextToken();
1222         addUnwrappedLine();
1223         return;
1224       }
1225       break;
1226     case tok::period:
1227       nextToken();
1228       // In Java, classes have an implicit static member "class".
1229       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1230           FormatTok->is(tok::kw_class))
1231         nextToken();
1232       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1233           FormatTok->Tok.getIdentifierInfo())
1234         // JavaScript only has pseudo keywords, all keywords are allowed to
1235         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1236         nextToken();
1237       break;
1238     case tok::semi:
1239       nextToken();
1240       addUnwrappedLine();
1241       return;
1242     case tok::r_brace:
1243       addUnwrappedLine();
1244       return;
1245     case tok::l_paren:
1246       parseParens();
1247       break;
1248     case tok::kw_operator:
1249       nextToken();
1250       if (FormatTok->isBinaryOperator())
1251         nextToken();
1252       break;
1253     case tok::caret:
1254       nextToken();
1255       if (FormatTok->Tok.isAnyIdentifier() ||
1256           FormatTok->isSimpleTypeSpecifier())
1257         nextToken();
1258       if (FormatTok->is(tok::l_paren))
1259         parseParens();
1260       if (FormatTok->is(tok::l_brace))
1261         parseChildBlock();
1262       break;
1263     case tok::l_brace:
1264       if (!tryToParseBracedList()) {
1265         // A block outside of parentheses must be the last part of a
1266         // structural element.
1267         // FIXME: Figure out cases where this is not true, and add projections
1268         // for them (the one we know is missing are lambdas).
1269         if (Style.BraceWrapping.AfterFunction)
1270           addUnwrappedLine();
1271         FormatTok->Type = TT_FunctionLBrace;
1272         parseBlock(/*MustBeDeclaration=*/false);
1273         addUnwrappedLine();
1274         return;
1275       }
1276       // Otherwise this was a braced init list, and the structural
1277       // element continues.
1278       break;
1279     case tok::kw_try:
1280       // We arrive here when parsing function-try blocks.
1281       if (Style.BraceWrapping.AfterFunction)
1282         addUnwrappedLine();
1283       parseTryCatch();
1284       return;
1285     case tok::identifier: {
1286       if (FormatTok->is(TT_MacroBlockEnd)) {
1287         addUnwrappedLine();
1288         return;
1289       }
1290
1291       // Function declarations (as opposed to function expressions) are parsed
1292       // on their own unwrapped line by continuing this loop. Function
1293       // expressions (functions that are not on their own line) must not create
1294       // a new unwrapped line, so they are special cased below.
1295       size_t TokenCount = Line->Tokens.size();
1296       if (Style.Language == FormatStyle::LK_JavaScript &&
1297           FormatTok->is(Keywords.kw_function) &&
1298           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1299                                                      Keywords.kw_async)))) {
1300         tryToParseJSFunction();
1301         break;
1302       }
1303       if ((Style.Language == FormatStyle::LK_JavaScript ||
1304            Style.Language == FormatStyle::LK_Java) &&
1305           FormatTok->is(Keywords.kw_interface)) {
1306         if (Style.Language == FormatStyle::LK_JavaScript) {
1307           // In JavaScript/TypeScript, "interface" can be used as a standalone
1308           // identifier, e.g. in `var interface = 1;`. If "interface" is
1309           // followed by another identifier, it is very like to be an actual
1310           // interface declaration.
1311           unsigned StoredPosition = Tokens->getPosition();
1312           FormatToken *Next = Tokens->getNextToken();
1313           FormatTok = Tokens->setPosition(StoredPosition);
1314           if (Next && !mustBeJSIdent(Keywords, Next)) {
1315             nextToken();
1316             break;
1317           }
1318         }
1319         parseRecord();
1320         addUnwrappedLine();
1321         return;
1322       }
1323
1324       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1325         parseStatementMacro();
1326         return;
1327       }
1328
1329       // See if the following token should start a new unwrapped line.
1330       StringRef Text = FormatTok->TokenText;
1331       nextToken();
1332       if (Line->Tokens.size() == 1 &&
1333           // JS doesn't have macros, and within classes colons indicate fields,
1334           // not labels.
1335           Style.Language != FormatStyle::LK_JavaScript) {
1336         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1337           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1338           parseLabel();
1339           return;
1340         }
1341         // Recognize function-like macro usages without trailing semicolon as
1342         // well as free-standing macros like Q_OBJECT.
1343         bool FunctionLike = FormatTok->is(tok::l_paren);
1344         if (FunctionLike)
1345           parseParens();
1346
1347         bool FollowedByNewline =
1348             CommentsBeforeNextToken.empty()
1349                 ? FormatTok->NewlinesBefore > 0
1350                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1351
1352         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1353             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1354           addUnwrappedLine();
1355           return;
1356         }
1357       }
1358       break;
1359     }
1360     case tok::equal:
1361       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1362       // TT_JsFatArrow. The always start an expression or a child block if
1363       // followed by a curly.
1364       if (FormatTok->is(TT_JsFatArrow)) {
1365         nextToken();
1366         if (FormatTok->is(tok::l_brace))
1367           parseChildBlock();
1368         break;
1369       }
1370
1371       nextToken();
1372       if (FormatTok->Tok.is(tok::l_brace)) {
1373         nextToken();
1374         parseBracedList();
1375       } else if (Style.Language == FormatStyle::LK_Proto &&
1376                  FormatTok->Tok.is(tok::less)) {
1377         nextToken();
1378         parseBracedList(/*ContinueOnSemicolons=*/false,
1379                         /*ClosingBraceKind=*/tok::greater);
1380       }
1381       break;
1382     case tok::l_square:
1383       parseSquare();
1384       break;
1385     case tok::kw_new:
1386       parseNew();
1387       break;
1388     default:
1389       nextToken();
1390       break;
1391     }
1392   } while (!eof());
1393 }
1394
1395 bool UnwrappedLineParser::tryToParseLambda() {
1396   if (!Style.isCpp()) {
1397     nextToken();
1398     return false;
1399   }
1400   assert(FormatTok->is(tok::l_square));
1401   FormatToken &LSquare = *FormatTok;
1402   if (!tryToParseLambdaIntroducer())
1403     return false;
1404
1405   while (FormatTok->isNot(tok::l_brace)) {
1406     if (FormatTok->isSimpleTypeSpecifier()) {
1407       nextToken();
1408       continue;
1409     }
1410     switch (FormatTok->Tok.getKind()) {
1411     case tok::l_brace:
1412       break;
1413     case tok::l_paren:
1414       parseParens();
1415       break;
1416     case tok::amp:
1417     case tok::star:
1418     case tok::kw_const:
1419     case tok::comma:
1420     case tok::less:
1421     case tok::greater:
1422     case tok::identifier:
1423     case tok::numeric_constant:
1424     case tok::coloncolon:
1425     case tok::kw_mutable:
1426       nextToken();
1427       break;
1428     case tok::arrow:
1429       FormatTok->Type = TT_LambdaArrow;
1430       nextToken();
1431       break;
1432     default:
1433       return true;
1434     }
1435   }
1436   LSquare.Type = TT_LambdaLSquare;
1437   parseChildBlock();
1438   return true;
1439 }
1440
1441 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1442   const FormatToken *Previous = FormatTok->Previous;
1443   if (Previous &&
1444       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1445                          tok::kw_delete, tok::l_square) ||
1446        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1447        Previous->isSimpleTypeSpecifier())) {
1448     nextToken();
1449     return false;
1450   }
1451   nextToken();
1452   if (FormatTok->is(tok::l_square)) {
1453     return false;
1454   }
1455   parseSquare(/*LambdaIntroducer=*/true);
1456   return true;
1457 }
1458
1459 void UnwrappedLineParser::tryToParseJSFunction() {
1460   assert(FormatTok->is(Keywords.kw_function) ||
1461          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1462   if (FormatTok->is(Keywords.kw_async))
1463     nextToken();
1464   // Consume "function".
1465   nextToken();
1466
1467   // Consume * (generator function). Treat it like C++'s overloaded operators.
1468   if (FormatTok->is(tok::star)) {
1469     FormatTok->Type = TT_OverloadedOperator;
1470     nextToken();
1471   }
1472
1473   // Consume function name.
1474   if (FormatTok->is(tok::identifier))
1475     nextToken();
1476
1477   if (FormatTok->isNot(tok::l_paren))
1478     return;
1479
1480   // Parse formal parameter list.
1481   parseParens();
1482
1483   if (FormatTok->is(tok::colon)) {
1484     // Parse a type definition.
1485     nextToken();
1486
1487     // Eat the type declaration. For braced inline object types, balance braces,
1488     // otherwise just parse until finding an l_brace for the function body.
1489     if (FormatTok->is(tok::l_brace))
1490       tryToParseBracedList();
1491     else
1492       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1493         nextToken();
1494   }
1495
1496   if (FormatTok->is(tok::semi))
1497     return;
1498
1499   parseChildBlock();
1500 }
1501
1502 bool UnwrappedLineParser::tryToParseBracedList() {
1503   if (FormatTok->BlockKind == BK_Unknown)
1504     calculateBraceTypes();
1505   assert(FormatTok->BlockKind != BK_Unknown);
1506   if (FormatTok->BlockKind == BK_Block)
1507     return false;
1508   nextToken();
1509   parseBracedList();
1510   return true;
1511 }
1512
1513 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1514                                           tok::TokenKind ClosingBraceKind) {
1515   bool HasError = false;
1516
1517   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1518   // replace this by using parseAssigmentExpression() inside.
1519   do {
1520     if (Style.Language == FormatStyle::LK_JavaScript) {
1521       if (FormatTok->is(Keywords.kw_function) ||
1522           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1523         tryToParseJSFunction();
1524         continue;
1525       }
1526       if (FormatTok->is(TT_JsFatArrow)) {
1527         nextToken();
1528         // Fat arrows can be followed by simple expressions or by child blocks
1529         // in curly braces.
1530         if (FormatTok->is(tok::l_brace)) {
1531           parseChildBlock();
1532           continue;
1533         }
1534       }
1535       if (FormatTok->is(tok::l_brace)) {
1536         // Could be a method inside of a braced list `{a() { return 1; }}`.
1537         if (tryToParseBracedList())
1538           continue;
1539         parseChildBlock();
1540       }
1541     }
1542     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1543       nextToken();
1544       return !HasError;
1545     }
1546     switch (FormatTok->Tok.getKind()) {
1547     case tok::caret:
1548       nextToken();
1549       if (FormatTok->is(tok::l_brace)) {
1550         parseChildBlock();
1551       }
1552       break;
1553     case tok::l_square:
1554       tryToParseLambda();
1555       break;
1556     case tok::l_paren:
1557       parseParens();
1558       // JavaScript can just have free standing methods and getters/setters in
1559       // object literals. Detect them by a "{" following ")".
1560       if (Style.Language == FormatStyle::LK_JavaScript) {
1561         if (FormatTok->is(tok::l_brace))
1562           parseChildBlock();
1563         break;
1564       }
1565       break;
1566     case tok::l_brace:
1567       // Assume there are no blocks inside a braced init list apart
1568       // from the ones we explicitly parse out (like lambdas).
1569       FormatTok->BlockKind = BK_BracedInit;
1570       nextToken();
1571       parseBracedList();
1572       break;
1573     case tok::less:
1574       if (Style.Language == FormatStyle::LK_Proto) {
1575         nextToken();
1576         parseBracedList(/*ContinueOnSemicolons=*/false,
1577                         /*ClosingBraceKind=*/tok::greater);
1578       } else {
1579         nextToken();
1580       }
1581       break;
1582     case tok::semi:
1583       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1584       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1585       // used for error recovery if we have otherwise determined that this is
1586       // a braced list.
1587       if (Style.Language == FormatStyle::LK_JavaScript) {
1588         nextToken();
1589         break;
1590       }
1591       HasError = true;
1592       if (!ContinueOnSemicolons)
1593         return !HasError;
1594       nextToken();
1595       break;
1596     case tok::comma:
1597       nextToken();
1598       break;
1599     default:
1600       nextToken();
1601       break;
1602     }
1603   } while (!eof());
1604   return false;
1605 }
1606
1607 void UnwrappedLineParser::parseParens() {
1608   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1609   nextToken();
1610   do {
1611     switch (FormatTok->Tok.getKind()) {
1612     case tok::l_paren:
1613       parseParens();
1614       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1615         parseChildBlock();
1616       break;
1617     case tok::r_paren:
1618       nextToken();
1619       return;
1620     case tok::r_brace:
1621       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1622       return;
1623     case tok::l_square:
1624       tryToParseLambda();
1625       break;
1626     case tok::l_brace:
1627       if (!tryToParseBracedList())
1628         parseChildBlock();
1629       break;
1630     case tok::at:
1631       nextToken();
1632       if (FormatTok->Tok.is(tok::l_brace)) {
1633         nextToken();
1634         parseBracedList();
1635       }
1636       break;
1637     case tok::kw_class:
1638       if (Style.Language == FormatStyle::LK_JavaScript)
1639         parseRecord(/*ParseAsExpr=*/true);
1640       else
1641         nextToken();
1642       break;
1643     case tok::identifier:
1644       if (Style.Language == FormatStyle::LK_JavaScript &&
1645           (FormatTok->is(Keywords.kw_function) ||
1646            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1647         tryToParseJSFunction();
1648       else
1649         nextToken();
1650       break;
1651     default:
1652       nextToken();
1653       break;
1654     }
1655   } while (!eof());
1656 }
1657
1658 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1659   if (!LambdaIntroducer) {
1660     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1661     if (tryToParseLambda())
1662       return;
1663   }
1664   do {
1665     switch (FormatTok->Tok.getKind()) {
1666     case tok::l_paren:
1667       parseParens();
1668       break;
1669     case tok::r_square:
1670       nextToken();
1671       return;
1672     case tok::r_brace:
1673       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1674       return;
1675     case tok::l_square:
1676       parseSquare();
1677       break;
1678     case tok::l_brace: {
1679       if (!tryToParseBracedList())
1680         parseChildBlock();
1681       break;
1682     }
1683     case tok::at:
1684       nextToken();
1685       if (FormatTok->Tok.is(tok::l_brace)) {
1686         nextToken();
1687         parseBracedList();
1688       }
1689       break;
1690     default:
1691       nextToken();
1692       break;
1693     }
1694   } while (!eof());
1695 }
1696
1697 void UnwrappedLineParser::parseIfThenElse() {
1698   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1699   nextToken();
1700   if (FormatTok->Tok.is(tok::kw_constexpr))
1701     nextToken();
1702   if (FormatTok->Tok.is(tok::l_paren))
1703     parseParens();
1704   bool NeedsUnwrappedLine = false;
1705   if (FormatTok->Tok.is(tok::l_brace)) {
1706     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1707     parseBlock(/*MustBeDeclaration=*/false);
1708     if (Style.BraceWrapping.BeforeElse)
1709       addUnwrappedLine();
1710     else
1711       NeedsUnwrappedLine = true;
1712   } else {
1713     addUnwrappedLine();
1714     ++Line->Level;
1715     parseStructuralElement();
1716     --Line->Level;
1717   }
1718   if (FormatTok->Tok.is(tok::kw_else)) {
1719     nextToken();
1720     if (FormatTok->Tok.is(tok::l_brace)) {
1721       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1722       parseBlock(/*MustBeDeclaration=*/false);
1723       addUnwrappedLine();
1724     } else if (FormatTok->Tok.is(tok::kw_if)) {
1725       parseIfThenElse();
1726     } else {
1727       addUnwrappedLine();
1728       ++Line->Level;
1729       parseStructuralElement();
1730       if (FormatTok->is(tok::eof))
1731         addUnwrappedLine();
1732       --Line->Level;
1733     }
1734   } else if (NeedsUnwrappedLine) {
1735     addUnwrappedLine();
1736   }
1737 }
1738
1739 void UnwrappedLineParser::parseTryCatch() {
1740   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1741   nextToken();
1742   bool NeedsUnwrappedLine = false;
1743   if (FormatTok->is(tok::colon)) {
1744     // We are in a function try block, what comes is an initializer list.
1745     nextToken();
1746     while (FormatTok->is(tok::identifier)) {
1747       nextToken();
1748       if (FormatTok->is(tok::l_paren))
1749         parseParens();
1750       if (FormatTok->is(tok::comma))
1751         nextToken();
1752     }
1753   }
1754   // Parse try with resource.
1755   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1756     parseParens();
1757   }
1758   if (FormatTok->is(tok::l_brace)) {
1759     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1760     parseBlock(/*MustBeDeclaration=*/false);
1761     if (Style.BraceWrapping.BeforeCatch) {
1762       addUnwrappedLine();
1763     } else {
1764       NeedsUnwrappedLine = true;
1765     }
1766   } else if (!FormatTok->is(tok::kw_catch)) {
1767     // The C++ standard requires a compound-statement after a try.
1768     // If there's none, we try to assume there's a structuralElement
1769     // and try to continue.
1770     addUnwrappedLine();
1771     ++Line->Level;
1772     parseStructuralElement();
1773     --Line->Level;
1774   }
1775   while (1) {
1776     if (FormatTok->is(tok::at))
1777       nextToken();
1778     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1779                              tok::kw___finally) ||
1780           ((Style.Language == FormatStyle::LK_Java ||
1781             Style.Language == FormatStyle::LK_JavaScript) &&
1782            FormatTok->is(Keywords.kw_finally)) ||
1783           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1784            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1785       break;
1786     nextToken();
1787     while (FormatTok->isNot(tok::l_brace)) {
1788       if (FormatTok->is(tok::l_paren)) {
1789         parseParens();
1790         continue;
1791       }
1792       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1793         return;
1794       nextToken();
1795     }
1796     NeedsUnwrappedLine = false;
1797     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1798     parseBlock(/*MustBeDeclaration=*/false);
1799     if (Style.BraceWrapping.BeforeCatch)
1800       addUnwrappedLine();
1801     else
1802       NeedsUnwrappedLine = true;
1803   }
1804   if (NeedsUnwrappedLine)
1805     addUnwrappedLine();
1806 }
1807
1808 void UnwrappedLineParser::parseNamespace() {
1809   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1810
1811   const FormatToken &InitialToken = *FormatTok;
1812   nextToken();
1813   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1814     nextToken();
1815   if (FormatTok->Tok.is(tok::l_brace)) {
1816     if (ShouldBreakBeforeBrace(Style, InitialToken))
1817       addUnwrappedLine();
1818
1819     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1820                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1821                      DeclarationScopeStack.size() > 1);
1822     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1823     // Munch the semicolon after a namespace. This is more common than one would
1824     // think. Puttin the semicolon into its own line is very ugly.
1825     if (FormatTok->Tok.is(tok::semi))
1826       nextToken();
1827     addUnwrappedLine();
1828   }
1829   // FIXME: Add error handling.
1830 }
1831
1832 void UnwrappedLineParser::parseNew() {
1833   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1834   nextToken();
1835   if (Style.Language != FormatStyle::LK_Java)
1836     return;
1837
1838   // In Java, we can parse everything up to the parens, which aren't optional.
1839   do {
1840     // There should not be a ;, { or } before the new's open paren.
1841     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1842       return;
1843
1844     // Consume the parens.
1845     if (FormatTok->is(tok::l_paren)) {
1846       parseParens();
1847
1848       // If there is a class body of an anonymous class, consume that as child.
1849       if (FormatTok->is(tok::l_brace))
1850         parseChildBlock();
1851       return;
1852     }
1853     nextToken();
1854   } while (!eof());
1855 }
1856
1857 void UnwrappedLineParser::parseForOrWhileLoop() {
1858   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1859          "'for', 'while' or foreach macro expected");
1860   nextToken();
1861   // JS' for await ( ...
1862   if (Style.Language == FormatStyle::LK_JavaScript &&
1863       FormatTok->is(Keywords.kw_await))
1864     nextToken();
1865   if (FormatTok->Tok.is(tok::l_paren))
1866     parseParens();
1867   if (FormatTok->Tok.is(tok::l_brace)) {
1868     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1869     parseBlock(/*MustBeDeclaration=*/false);
1870     addUnwrappedLine();
1871   } else {
1872     addUnwrappedLine();
1873     ++Line->Level;
1874     parseStructuralElement();
1875     --Line->Level;
1876   }
1877 }
1878
1879 void UnwrappedLineParser::parseDoWhile() {
1880   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1881   nextToken();
1882   if (FormatTok->Tok.is(tok::l_brace)) {
1883     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1884     parseBlock(/*MustBeDeclaration=*/false);
1885     if (Style.BraceWrapping.IndentBraces)
1886       addUnwrappedLine();
1887   } else {
1888     addUnwrappedLine();
1889     ++Line->Level;
1890     parseStructuralElement();
1891     --Line->Level;
1892   }
1893
1894   // FIXME: Add error handling.
1895   if (!FormatTok->Tok.is(tok::kw_while)) {
1896     addUnwrappedLine();
1897     return;
1898   }
1899
1900   nextToken();
1901   parseStructuralElement();
1902 }
1903
1904 void UnwrappedLineParser::parseLabel() {
1905   nextToken();
1906   unsigned OldLineLevel = Line->Level;
1907   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1908     --Line->Level;
1909   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1910     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1911     parseBlock(/*MustBeDeclaration=*/false);
1912     if (FormatTok->Tok.is(tok::kw_break)) {
1913       if (Style.BraceWrapping.AfterControlStatement)
1914         addUnwrappedLine();
1915       parseStructuralElement();
1916     }
1917     addUnwrappedLine();
1918   } else {
1919     if (FormatTok->is(tok::semi))
1920       nextToken();
1921     addUnwrappedLine();
1922   }
1923   Line->Level = OldLineLevel;
1924   if (FormatTok->isNot(tok::l_brace)) {
1925     parseStructuralElement();
1926     addUnwrappedLine();
1927   }
1928 }
1929
1930 void UnwrappedLineParser::parseCaseLabel() {
1931   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1932   // FIXME: fix handling of complex expressions here.
1933   do {
1934     nextToken();
1935   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1936   parseLabel();
1937 }
1938
1939 void UnwrappedLineParser::parseSwitch() {
1940   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1941   nextToken();
1942   if (FormatTok->Tok.is(tok::l_paren))
1943     parseParens();
1944   if (FormatTok->Tok.is(tok::l_brace)) {
1945     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1946     parseBlock(/*MustBeDeclaration=*/false);
1947     addUnwrappedLine();
1948   } else {
1949     addUnwrappedLine();
1950     ++Line->Level;
1951     parseStructuralElement();
1952     --Line->Level;
1953   }
1954 }
1955
1956 void UnwrappedLineParser::parseAccessSpecifier() {
1957   nextToken();
1958   // Understand Qt's slots.
1959   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1960     nextToken();
1961   // Otherwise, we don't know what it is, and we'd better keep the next token.
1962   if (FormatTok->Tok.is(tok::colon))
1963     nextToken();
1964   addUnwrappedLine();
1965 }
1966
1967 bool UnwrappedLineParser::parseEnum() {
1968   // Won't be 'enum' for NS_ENUMs.
1969   if (FormatTok->Tok.is(tok::kw_enum))
1970     nextToken();
1971
1972   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1973   // declarations. An "enum" keyword followed by a colon would be a syntax
1974   // error and thus assume it is just an identifier.
1975   if (Style.Language == FormatStyle::LK_JavaScript &&
1976       FormatTok->isOneOf(tok::colon, tok::question))
1977     return false;
1978
1979   // Eat up enum class ...
1980   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1981     nextToken();
1982
1983   while (FormatTok->Tok.getIdentifierInfo() ||
1984          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1985                             tok::greater, tok::comma, tok::question)) {
1986     nextToken();
1987     // We can have macros or attributes in between 'enum' and the enum name.
1988     if (FormatTok->is(tok::l_paren))
1989       parseParens();
1990     if (FormatTok->is(tok::identifier)) {
1991       nextToken();
1992       // If there are two identifiers in a row, this is likely an elaborate
1993       // return type. In Java, this can be "implements", etc.
1994       if (Style.isCpp() && FormatTok->is(tok::identifier))
1995         return false;
1996     }
1997   }
1998
1999   // Just a declaration or something is wrong.
2000   if (FormatTok->isNot(tok::l_brace))
2001     return true;
2002   FormatTok->BlockKind = BK_Block;
2003
2004   if (Style.Language == FormatStyle::LK_Java) {
2005     // Java enums are different.
2006     parseJavaEnumBody();
2007     return true;
2008   }
2009   if (Style.Language == FormatStyle::LK_Proto) {
2010     parseBlock(/*MustBeDeclaration=*/true);
2011     return true;
2012   }
2013
2014   // Parse enum body.
2015   nextToken();
2016   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2017   if (HasError) {
2018     if (FormatTok->is(tok::semi))
2019       nextToken();
2020     addUnwrappedLine();
2021   }
2022   return true;
2023
2024   // There is no addUnwrappedLine() here so that we fall through to parsing a
2025   // structural element afterwards. Thus, in "enum A {} n, m;",
2026   // "} n, m;" will end up in one unwrapped line.
2027 }
2028
2029 void UnwrappedLineParser::parseJavaEnumBody() {
2030   // Determine whether the enum is simple, i.e. does not have a semicolon or
2031   // constants with class bodies. Simple enums can be formatted like braced
2032   // lists, contracted to a single line, etc.
2033   unsigned StoredPosition = Tokens->getPosition();
2034   bool IsSimple = true;
2035   FormatToken *Tok = Tokens->getNextToken();
2036   while (Tok) {
2037     if (Tok->is(tok::r_brace))
2038       break;
2039     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2040       IsSimple = false;
2041       break;
2042     }
2043     // FIXME: This will also mark enums with braces in the arguments to enum
2044     // constants as "not simple". This is probably fine in practice, though.
2045     Tok = Tokens->getNextToken();
2046   }
2047   FormatTok = Tokens->setPosition(StoredPosition);
2048
2049   if (IsSimple) {
2050     nextToken();
2051     parseBracedList();
2052     addUnwrappedLine();
2053     return;
2054   }
2055
2056   // Parse the body of a more complex enum.
2057   // First add a line for everything up to the "{".
2058   nextToken();
2059   addUnwrappedLine();
2060   ++Line->Level;
2061
2062   // Parse the enum constants.
2063   while (FormatTok) {
2064     if (FormatTok->is(tok::l_brace)) {
2065       // Parse the constant's class body.
2066       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2067                  /*MunchSemi=*/false);
2068     } else if (FormatTok->is(tok::l_paren)) {
2069       parseParens();
2070     } else if (FormatTok->is(tok::comma)) {
2071       nextToken();
2072       addUnwrappedLine();
2073     } else if (FormatTok->is(tok::semi)) {
2074       nextToken();
2075       addUnwrappedLine();
2076       break;
2077     } else if (FormatTok->is(tok::r_brace)) {
2078       addUnwrappedLine();
2079       break;
2080     } else {
2081       nextToken();
2082     }
2083   }
2084
2085   // Parse the class body after the enum's ";" if any.
2086   parseLevel(/*HasOpeningBrace=*/true);
2087   nextToken();
2088   --Line->Level;
2089   addUnwrappedLine();
2090 }
2091
2092 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2093   const FormatToken &InitialToken = *FormatTok;
2094   nextToken();
2095
2096   // The actual identifier can be a nested name specifier, and in macros
2097   // it is often token-pasted.
2098   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2099                             tok::kw___attribute, tok::kw___declspec,
2100                             tok::kw_alignas) ||
2101          ((Style.Language == FormatStyle::LK_Java ||
2102            Style.Language == FormatStyle::LK_JavaScript) &&
2103           FormatTok->isOneOf(tok::period, tok::comma))) {
2104     if (Style.Language == FormatStyle::LK_JavaScript &&
2105         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2106       // JavaScript/TypeScript supports inline object types in
2107       // extends/implements positions:
2108       //     class Foo implements {bar: number} { }
2109       nextToken();
2110       if (FormatTok->is(tok::l_brace)) {
2111         tryToParseBracedList();
2112         continue;
2113       }
2114     }
2115     bool IsNonMacroIdentifier =
2116         FormatTok->is(tok::identifier) &&
2117         FormatTok->TokenText != FormatTok->TokenText.upper();
2118     nextToken();
2119     // We can have macros or attributes in between 'class' and the class name.
2120     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2121       parseParens();
2122   }
2123
2124   // Note that parsing away template declarations here leads to incorrectly
2125   // accepting function declarations as record declarations.
2126   // In general, we cannot solve this problem. Consider:
2127   // class A<int> B() {}
2128   // which can be a function definition or a class definition when B() is a
2129   // macro. If we find enough real-world cases where this is a problem, we
2130   // can parse for the 'template' keyword in the beginning of the statement,
2131   // and thus rule out the record production in case there is no template
2132   // (this would still leave us with an ambiguity between template function
2133   // and class declarations).
2134   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2135     while (!eof()) {
2136       if (FormatTok->is(tok::l_brace)) {
2137         calculateBraceTypes(/*ExpectClassBody=*/true);
2138         if (!tryToParseBracedList())
2139           break;
2140       }
2141       if (FormatTok->Tok.is(tok::semi))
2142         return;
2143       nextToken();
2144     }
2145   }
2146   if (FormatTok->Tok.is(tok::l_brace)) {
2147     if (ParseAsExpr) {
2148       parseChildBlock();
2149     } else {
2150       if (ShouldBreakBeforeBrace(Style, InitialToken))
2151         addUnwrappedLine();
2152
2153       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2154                  /*MunchSemi=*/false);
2155     }
2156   }
2157   // There is no addUnwrappedLine() here so that we fall through to parsing a
2158   // structural element afterwards. Thus, in "class A {} n, m;",
2159   // "} n, m;" will end up in one unwrapped line.
2160 }
2161
2162 void UnwrappedLineParser::parseObjCMethod() {
2163   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2164          "'(' or identifier expected.");
2165   do {
2166     if (FormatTok->Tok.is(tok::semi)) {
2167       nextToken();
2168       addUnwrappedLine();
2169       return;
2170     } else if (FormatTok->Tok.is(tok::l_brace)) {
2171       if (Style.BraceWrapping.AfterFunction)
2172         addUnwrappedLine();
2173       parseBlock(/*MustBeDeclaration=*/false);
2174       addUnwrappedLine();
2175       return;
2176     } else {
2177       nextToken();
2178     }
2179   } while (!eof());
2180 }
2181
2182 void UnwrappedLineParser::parseObjCProtocolList() {
2183   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2184   do {
2185     nextToken();
2186     // Early exit in case someone forgot a close angle.
2187     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2188         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2189       return;
2190   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2191   nextToken(); // Skip '>'.
2192 }
2193
2194 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2195   do {
2196     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2197       nextToken();
2198       addUnwrappedLine();
2199       break;
2200     }
2201     if (FormatTok->is(tok::l_brace)) {
2202       parseBlock(/*MustBeDeclaration=*/false);
2203       // In ObjC interfaces, nothing should be following the "}".
2204       addUnwrappedLine();
2205     } else if (FormatTok->is(tok::r_brace)) {
2206       // Ignore stray "}". parseStructuralElement doesn't consume them.
2207       nextToken();
2208       addUnwrappedLine();
2209     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2210       nextToken();
2211       parseObjCMethod();
2212     } else {
2213       parseStructuralElement();
2214     }
2215   } while (!eof());
2216 }
2217
2218 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2219   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2220          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2221   nextToken();
2222   nextToken(); // interface name
2223
2224   // @interface can be followed by a lightweight generic
2225   // specialization list, then either a base class or a category.
2226   if (FormatTok->Tok.is(tok::less)) {
2227     // Unlike protocol lists, generic parameterizations support
2228     // nested angles:
2229     //
2230     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2231     //     NSObject <NSCopying, NSSecureCoding>
2232     //
2233     // so we need to count how many open angles we have left.
2234     unsigned NumOpenAngles = 1;
2235     do {
2236       nextToken();
2237       // Early exit in case someone forgot a close angle.
2238       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2239           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2240         break;
2241       if (FormatTok->Tok.is(tok::less))
2242         ++NumOpenAngles;
2243       else if (FormatTok->Tok.is(tok::greater)) {
2244         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2245         --NumOpenAngles;
2246       }
2247     } while (!eof() && NumOpenAngles != 0);
2248     nextToken(); // Skip '>'.
2249   }
2250   if (FormatTok->Tok.is(tok::colon)) {
2251     nextToken();
2252     nextToken(); // base class name
2253   } else if (FormatTok->Tok.is(tok::l_paren))
2254     // Skip category, if present.
2255     parseParens();
2256
2257   if (FormatTok->Tok.is(tok::less))
2258     parseObjCProtocolList();
2259
2260   if (FormatTok->Tok.is(tok::l_brace)) {
2261     if (Style.BraceWrapping.AfterObjCDeclaration)
2262       addUnwrappedLine();
2263     parseBlock(/*MustBeDeclaration=*/true);
2264   }
2265
2266   // With instance variables, this puts '}' on its own line.  Without instance
2267   // variables, this ends the @interface line.
2268   addUnwrappedLine();
2269
2270   parseObjCUntilAtEnd();
2271 }
2272
2273 // Returns true for the declaration/definition form of @protocol,
2274 // false for the expression form.
2275 bool UnwrappedLineParser::parseObjCProtocol() {
2276   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2277   nextToken();
2278
2279   if (FormatTok->is(tok::l_paren))
2280     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2281     return false;
2282
2283   // The definition/declaration form,
2284   // @protocol Foo
2285   // - (int)someMethod;
2286   // @end
2287
2288   nextToken(); // protocol name
2289
2290   if (FormatTok->Tok.is(tok::less))
2291     parseObjCProtocolList();
2292
2293   // Check for protocol declaration.
2294   if (FormatTok->Tok.is(tok::semi)) {
2295     nextToken();
2296     addUnwrappedLine();
2297     return true;
2298   }
2299
2300   addUnwrappedLine();
2301   parseObjCUntilAtEnd();
2302   return true;
2303 }
2304
2305 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2306   bool IsImport = FormatTok->is(Keywords.kw_import);
2307   assert(IsImport || FormatTok->is(tok::kw_export));
2308   nextToken();
2309
2310   // Consume the "default" in "export default class/function".
2311   if (FormatTok->is(tok::kw_default))
2312     nextToken();
2313
2314   // Consume "async function", "function" and "default function", so that these
2315   // get parsed as free-standing JS functions, i.e. do not require a trailing
2316   // semicolon.
2317   if (FormatTok->is(Keywords.kw_async))
2318     nextToken();
2319   if (FormatTok->is(Keywords.kw_function)) {
2320     nextToken();
2321     return;
2322   }
2323
2324   // For imports, `export *`, `export {...}`, consume the rest of the line up
2325   // to the terminating `;`. For everything else, just return and continue
2326   // parsing the structural element, i.e. the declaration or expression for
2327   // `export default`.
2328   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2329       !FormatTok->isStringLiteral())
2330     return;
2331
2332   while (!eof()) {
2333     if (FormatTok->is(tok::semi))
2334       return;
2335     if (Line->Tokens.empty()) {
2336       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2337       // import statement should terminate.
2338       return;
2339     }
2340     if (FormatTok->is(tok::l_brace)) {
2341       FormatTok->BlockKind = BK_Block;
2342       nextToken();
2343       parseBracedList();
2344     } else {
2345       nextToken();
2346     }
2347   }
2348 }
2349
2350 void UnwrappedLineParser::parseStatementMacro()
2351 {
2352   nextToken();
2353   if (FormatTok->is(tok::l_paren))
2354     parseParens();
2355   if (FormatTok->is(tok::semi))
2356     nextToken();
2357   addUnwrappedLine();
2358 }
2359
2360 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2361                                                  StringRef Prefix = "") {
2362   llvm::dbgs() << Prefix << "Line(" << Line.Level
2363                << ", FSC=" << Line.FirstStartColumn << ")"
2364                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2365   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2366                                                     E = Line.Tokens.end();
2367        I != E; ++I) {
2368     llvm::dbgs() << I->Tok->Tok.getName() << "["
2369                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2370                  << "] ";
2371   }
2372   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2373                                                     E = Line.Tokens.end();
2374        I != E; ++I) {
2375     const UnwrappedLineNode &Node = *I;
2376     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2377              I = Node.Children.begin(),
2378              E = Node.Children.end();
2379          I != E; ++I) {
2380       printDebugInfo(*I, "\nChild: ");
2381     }
2382   }
2383   llvm::dbgs() << "\n";
2384 }
2385
2386 void UnwrappedLineParser::addUnwrappedLine() {
2387   if (Line->Tokens.empty())
2388     return;
2389   LLVM_DEBUG({
2390     if (CurrentLines == &Lines)
2391       printDebugInfo(*Line);
2392   });
2393   CurrentLines->push_back(std::move(*Line));
2394   Line->Tokens.clear();
2395   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2396   Line->FirstStartColumn = 0;
2397   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2398     CurrentLines->append(
2399         std::make_move_iterator(PreprocessorDirectives.begin()),
2400         std::make_move_iterator(PreprocessorDirectives.end()));
2401     PreprocessorDirectives.clear();
2402   }
2403   // Disconnect the current token from the last token on the previous line.
2404   FormatTok->Previous = nullptr;
2405 }
2406
2407 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2408
2409 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2410   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2411          FormatTok.NewlinesBefore > 0;
2412 }
2413
2414 // Checks if \p FormatTok is a line comment that continues the line comment
2415 // section on \p Line.
2416 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2417                                         const UnwrappedLine &Line,
2418                                         llvm::Regex &CommentPragmasRegex) {
2419   if (Line.Tokens.empty())
2420     return false;
2421
2422   StringRef IndentContent = FormatTok.TokenText;
2423   if (FormatTok.TokenText.startswith("//") ||
2424       FormatTok.TokenText.startswith("/*"))
2425     IndentContent = FormatTok.TokenText.substr(2);
2426   if (CommentPragmasRegex.match(IndentContent))
2427     return false;
2428
2429   // If Line starts with a line comment, then FormatTok continues the comment
2430   // section if its original column is greater or equal to the original start
2431   // column of the line.
2432   //
2433   // Define the min column token of a line as follows: if a line ends in '{' or
2434   // contains a '{' followed by a line comment, then the min column token is
2435   // that '{'. Otherwise, the min column token of the line is the first token of
2436   // the line.
2437   //
2438   // If Line starts with a token other than a line comment, then FormatTok
2439   // continues the comment section if its original column is greater than the
2440   // original start column of the min column token of the line.
2441   //
2442   // For example, the second line comment continues the first in these cases:
2443   //
2444   // // first line
2445   // // second line
2446   //
2447   // and:
2448   //
2449   // // first line
2450   //  // second line
2451   //
2452   // and:
2453   //
2454   // int i; // first line
2455   //  // second line
2456   //
2457   // and:
2458   //
2459   // do { // first line
2460   //      // second line
2461   //   int i;
2462   // } while (true);
2463   //
2464   // and:
2465   //
2466   // enum {
2467   //   a, // first line
2468   //    // second line
2469   //   b
2470   // };
2471   //
2472   // The second line comment doesn't continue the first in these cases:
2473   //
2474   //   // first line
2475   //  // second line
2476   //
2477   // and:
2478   //
2479   // int i; // first line
2480   // // second line
2481   //
2482   // and:
2483   //
2484   // do { // first line
2485   //   // second line
2486   //   int i;
2487   // } while (true);
2488   //
2489   // and:
2490   //
2491   // enum {
2492   //   a, // first line
2493   //   // second line
2494   // };
2495   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2496
2497   // Scan for '{//'. If found, use the column of '{' as a min column for line
2498   // comment section continuation.
2499   const FormatToken *PreviousToken = nullptr;
2500   for (const UnwrappedLineNode &Node : Line.Tokens) {
2501     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2502         isLineComment(*Node.Tok)) {
2503       MinColumnToken = PreviousToken;
2504       break;
2505     }
2506     PreviousToken = Node.Tok;
2507
2508     // Grab the last newline preceding a token in this unwrapped line.
2509     if (Node.Tok->NewlinesBefore > 0) {
2510       MinColumnToken = Node.Tok;
2511     }
2512   }
2513   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2514     MinColumnToken = PreviousToken;
2515   }
2516
2517   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2518                               MinColumnToken);
2519 }
2520
2521 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2522   bool JustComments = Line->Tokens.empty();
2523   for (SmallVectorImpl<FormatToken *>::const_iterator
2524            I = CommentsBeforeNextToken.begin(),
2525            E = CommentsBeforeNextToken.end();
2526        I != E; ++I) {
2527     // Line comments that belong to the same line comment section are put on the
2528     // same line since later we might want to reflow content between them.
2529     // Additional fine-grained breaking of line comment sections is controlled
2530     // by the class BreakableLineCommentSection in case it is desirable to keep
2531     // several line comment sections in the same unwrapped line.
2532     //
2533     // FIXME: Consider putting separate line comment sections as children to the
2534     // unwrapped line instead.
2535     (*I)->ContinuesLineCommentSection =
2536         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2537     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2538       addUnwrappedLine();
2539     pushToken(*I);
2540   }
2541   if (NewlineBeforeNext && JustComments)
2542     addUnwrappedLine();
2543   CommentsBeforeNextToken.clear();
2544 }
2545
2546 void UnwrappedLineParser::nextToken(int LevelDifference) {
2547   if (eof())
2548     return;
2549   flushComments(isOnNewLine(*FormatTok));
2550   pushToken(FormatTok);
2551   FormatToken *Previous = FormatTok;
2552   if (Style.Language != FormatStyle::LK_JavaScript)
2553     readToken(LevelDifference);
2554   else
2555     readTokenWithJavaScriptASI();
2556   FormatTok->Previous = Previous;
2557 }
2558
2559 void UnwrappedLineParser::distributeComments(
2560     const SmallVectorImpl<FormatToken *> &Comments,
2561     const FormatToken *NextTok) {
2562   // Whether or not a line comment token continues a line is controlled by
2563   // the method continuesLineCommentSection, with the following caveat:
2564   //
2565   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2566   // that each comment line from the trail is aligned with the next token, if
2567   // the next token exists. If a trail exists, the beginning of the maximal
2568   // trail is marked as a start of a new comment section.
2569   //
2570   // For example in this code:
2571   //
2572   // int a; // line about a
2573   //   // line 1 about b
2574   //   // line 2 about b
2575   //   int b;
2576   //
2577   // the two lines about b form a maximal trail, so there are two sections, the
2578   // first one consisting of the single comment "// line about a" and the
2579   // second one consisting of the next two comments.
2580   if (Comments.empty())
2581     return;
2582   bool ShouldPushCommentsInCurrentLine = true;
2583   bool HasTrailAlignedWithNextToken = false;
2584   unsigned StartOfTrailAlignedWithNextToken = 0;
2585   if (NextTok) {
2586     // We are skipping the first element intentionally.
2587     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2588       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2589         HasTrailAlignedWithNextToken = true;
2590         StartOfTrailAlignedWithNextToken = i;
2591       }
2592     }
2593   }
2594   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2595     FormatToken *FormatTok = Comments[i];
2596     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2597       FormatTok->ContinuesLineCommentSection = false;
2598     } else {
2599       FormatTok->ContinuesLineCommentSection =
2600           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2601     }
2602     if (!FormatTok->ContinuesLineCommentSection &&
2603         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2604       ShouldPushCommentsInCurrentLine = false;
2605     }
2606     if (ShouldPushCommentsInCurrentLine) {
2607       pushToken(FormatTok);
2608     } else {
2609       CommentsBeforeNextToken.push_back(FormatTok);
2610     }
2611   }
2612 }
2613
2614 void UnwrappedLineParser::readToken(int LevelDifference) {
2615   SmallVector<FormatToken *, 1> Comments;
2616   do {
2617     FormatTok = Tokens->getNextToken();
2618     assert(FormatTok);
2619     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2620            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2621       distributeComments(Comments, FormatTok);
2622       Comments.clear();
2623       // If there is an unfinished unwrapped line, we flush the preprocessor
2624       // directives only after that unwrapped line was finished later.
2625       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2626       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2627       assert((LevelDifference >= 0 ||
2628               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2629              "LevelDifference makes Line->Level negative");
2630       Line->Level += LevelDifference;
2631       // Comments stored before the preprocessor directive need to be output
2632       // before the preprocessor directive, at the same level as the
2633       // preprocessor directive, as we consider them to apply to the directive.
2634       flushComments(isOnNewLine(*FormatTok));
2635       parsePPDirective();
2636     }
2637     while (FormatTok->Type == TT_ConflictStart ||
2638            FormatTok->Type == TT_ConflictEnd ||
2639            FormatTok->Type == TT_ConflictAlternative) {
2640       if (FormatTok->Type == TT_ConflictStart) {
2641         conditionalCompilationStart(/*Unreachable=*/false);
2642       } else if (FormatTok->Type == TT_ConflictAlternative) {
2643         conditionalCompilationAlternative();
2644       } else if (FormatTok->Type == TT_ConflictEnd) {
2645         conditionalCompilationEnd();
2646       }
2647       FormatTok = Tokens->getNextToken();
2648       FormatTok->MustBreakBefore = true;
2649     }
2650
2651     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2652         !Line->InPPDirective) {
2653       continue;
2654     }
2655
2656     if (!FormatTok->Tok.is(tok::comment)) {
2657       distributeComments(Comments, FormatTok);
2658       Comments.clear();
2659       return;
2660     }
2661
2662     Comments.push_back(FormatTok);
2663   } while (!eof());
2664
2665   distributeComments(Comments, nullptr);
2666   Comments.clear();
2667 }
2668
2669 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2670   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2671   if (MustBreakBeforeNextToken) {
2672     Line->Tokens.back().Tok->MustBreakBefore = true;
2673     MustBreakBeforeNextToken = false;
2674   }
2675 }
2676
2677 } // end namespace format
2678 } // end namespace clang