contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file contains the implementation of the UnwrappedLineParser,
  11 /// which turns a stream of tokens into UnwrappedLines.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "UnwrappedLineParser.h"
  16 #include "llvm/ADT/STLExtras.h"
  17 #include "llvm/Support/Debug.h"
  18 #include "llvm/Support/raw_ostream.h"
  19
  20 #include <algorithm>
  21
  22 #define DEBUG_TYPE "format-parser"
  23
  24 namespace clang {
  25 namespace format {
  26
  27 class FormatTokenSource {
  28 public:
  29   virtual ~FormatTokenSource() {}
  30   virtual FormatToken *getNextToken() = 0;
  31
  32   virtual unsigned getPosition() = 0;
  33   virtual FormatToken *setPosition(unsigned Position) = 0;
  34 };
  35
  36 namespace {
  37
  38 class ScopedDeclarationState {
  39 public:
  40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  41                          bool MustBeDeclaration)
  42       : Line(Line), Stack(Stack) {
  43     Line.MustBeDeclaration = MustBeDeclaration;
  44     Stack.push_back(MustBeDeclaration);
  45   }
  46   ~ScopedDeclarationState() {
  47     Stack.pop_back();
  48     if (!Stack.empty())
  49       Line.MustBeDeclaration = Stack.back();
  50     else
  51       Line.MustBeDeclaration = true;
  52   }
  53
  54 private:
  55   UnwrappedLine &Line;
  56   std::vector<bool> &Stack;
  57 };
  58
  59 static bool isLineComment(const FormatToken &FormatTok) {
  60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
  61 }
  62
  63 // Checks if \p FormatTok is a line comment that continues the line comment
  64 // \p Previous. The original column of \p MinColumnToken is used to determine
  65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
  66 static bool continuesLineComment(const FormatToken &FormatTok,
  67                                  const FormatToken *Previous,
  68                                  const FormatToken *MinColumnToken) {
  69   if (!Previous || !MinColumnToken)
  70     return false;
  71   unsigned MinContinueColumn =
  72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
  73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
  74          isLineComment(*Previous) &&
  75          FormatTok.OriginalColumn >= MinContinueColumn;
  76 }
  77
  78 class ScopedMacroState : public FormatTokenSource {
  79 public:
  80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  81                    FormatToken *&ResetToken)
  82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  84         Token(nullptr), PreviousToken(nullptr) {
  85     FakeEOF.Tok.startToken();
  86     FakeEOF.Tok.setKind(tok::eof);
  87     TokenSource = this;
  88     Line.Level = 0;
  89     Line.InPPDirective = true;
  90   }
  91
  92   ~ScopedMacroState() override {
  93     TokenSource = PreviousTokenSource;
  94     ResetToken = Token;
  95     Line.InPPDirective = false;
  96     Line.Level = PreviousLineLevel;
  97   }
  98
  99   FormatToken *getNextToken() override {
 100     // The \c UnwrappedLineParser guards against this by never calling
 101     // \c getNextToken() after it has encountered the first eof token.
 102     assert(!eof());
 103     PreviousToken = Token;
 104     Token = PreviousTokenSource->getNextToken();
 105     if (eof())
 106       return &FakeEOF;
 107     return Token;
 108   }
 109
 110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
 111
 112   FormatToken *setPosition(unsigned Position) override {
 113     PreviousToken = nullptr;
 114     Token = PreviousTokenSource->setPosition(Position);
 115     return Token;
 116   }
 117
 118 private:
 119   bool eof() {
 120     return Token && Token->HasUnescapedNewline &&
 121            !continuesLineComment(*Token, PreviousToken,
 122                                  /*MinColumnToken=*/PreviousToken);
 123   }
 124
 125   FormatToken FakeEOF;
 126   UnwrappedLine &Line;
 127   FormatTokenSource *&TokenSource;
 128   FormatToken *&ResetToken;
 129   unsigned PreviousLineLevel;
 130   FormatTokenSource *PreviousTokenSource;
 131
 132   FormatToken *Token;
 133   FormatToken *PreviousToken;
 134 };
 135
 136 } // end anonymous namespace
 137
 138 class ScopedLineState {
 139 public:
 140   ScopedLineState(UnwrappedLineParser &Parser,
 141                   bool SwitchToPreprocessorLines = false)
 142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 143     if (SwitchToPreprocessorLines)
 144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 145     else if (!Parser.Line->Tokens.empty())
 146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 147     PreBlockLine = std::move(Parser.Line);
 148     Parser.Line = llvm::make_unique<UnwrappedLine>();
 149     Parser.Line->Level = PreBlockLine->Level;
 150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 151   }
 152
 153   ~ScopedLineState() {
 154     if (!Parser.Line->Tokens.empty()) {
 155       Parser.addUnwrappedLine();
 156     }
 157     assert(Parser.Line->Tokens.empty());
 158     Parser.Line = std::move(PreBlockLine);
 159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 160       Parser.MustBreakBeforeNextToken = true;
 161     Parser.CurrentLines = OriginalLines;
 162   }
 163
 164 private:
 165   UnwrappedLineParser &Parser;
 166
 167   std::unique_ptr<UnwrappedLine> PreBlockLine;
 168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 169 };
 170
 171 class CompoundStatementIndenter {
 172 public:
 173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 174                             const FormatStyle &Style, unsigned &LineLevel)
 175       : CompoundStatementIndenter(Parser, LineLevel,
 176                                   Style.BraceWrapping.AfterControlStatement,
 177                                   Style.BraceWrapping.IndentBraces) {
 178   }
 179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
 180                             bool WrapBrace, bool IndentBrace)
 181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 182     if (WrapBrace)
 183       Parser->addUnwrappedLine();
 184     if (IndentBrace)
 185       ++LineLevel;
 186   }
 187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 188
 189 private:
 190   unsigned &LineLevel;
 191   unsigned OldLineLevel;
 192 };
 193
 194 namespace {
 195
 196 class IndexedTokenSource : public FormatTokenSource {
 197 public:
 198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 199       : Tokens(Tokens), Position(-1) {}
 200
 201   FormatToken *getNextToken() override {
 202     ++Position;
 203     return Tokens[Position];
 204   }
 205
 206   unsigned getPosition() override {
 207     assert(Position >= 0);
 208     return Position;
 209   }
 210
 211   FormatToken *setPosition(unsigned P) override {
 212     Position = P;
 213     return Tokens[Position];
 214   }
 215
 216   void reset() { Position = -1; }
 217
 218 private:
 219   ArrayRef<FormatToken *> Tokens;
 220   int Position;
 221 };
 222
 223 } // end anonymous namespace
 224
 225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 226                                          const AdditionalKeywords &Keywords,
 227                                          unsigned FirstStartColumn,
 228                                          ArrayRef<FormatToken *> Tokens,
 229                                          UnwrappedLineConsumer &Callback)
 230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
 234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
 235                        ? IG_Rejected
 236                        : IG_Inited),
 237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
 238
 239 void UnwrappedLineParser::reset() {
 240   PPBranchLevel = -1;
 241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
 242                      ? IG_Rejected
 243                      : IG_Inited;
 244   IncludeGuardToken = nullptr;
 245   Line.reset(new UnwrappedLine);
 246   CommentsBeforeNextToken.clear();
 247   FormatTok = nullptr;
 248   MustBreakBeforeNextToken = false;
 249   PreprocessorDirectives.clear();
 250   CurrentLines = &Lines;
 251   DeclarationScopeStack.clear();
 252   PPStack.clear();
 253   Line->FirstStartColumn = FirstStartColumn;
 254 }
 255
 256 void UnwrappedLineParser::parse() {
 257   IndexedTokenSource TokenSource(AllTokens);
 258   Line->FirstStartColumn = FirstStartColumn;
 259   do {
 260     LLVM_DEBUG(llvm::dbgs() << "----\n");
 261     reset();
 262     Tokens = &TokenSource;
 263     TokenSource.reset();
 264
 265     readToken();
 266     parseFile();
 267
 268     // If we found an include guard then all preprocessor directives (other than
 269     // the guard) are over-indented by one.
 270     if (IncludeGuard == IG_Found)
 271       for (auto &Line : Lines)
 272         if (Line.InPPDirective && Line.Level > 0)
 273           --Line.Level;
 274
 275     // Create line with eof token.
 276     pushToken(FormatTok);
 277     addUnwrappedLine();
 278
 279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 280                                                   E = Lines.end();
 281          I != E; ++I) {
 282       Callback.consumeUnwrappedLine(*I);
 283     }
 284     Callback.finishRun();
 285     Lines.clear();
 286     while (!PPLevelBranchIndex.empty() &&
 287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 290     }
 291     if (!PPLevelBranchIndex.empty()) {
 292       ++PPLevelBranchIndex.back();
 293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 295     }
 296   } while (!PPLevelBranchIndex.empty());
 297 }
 298
 299 void UnwrappedLineParser::parseFile() {
 300   // The top-level context in a file always has declarations, except for pre-
 301   // processor directives and JavaScript files.
 302   bool MustBeDeclaration =
 303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 305                                           MustBeDeclaration);
 306   if (Style.Language == FormatStyle::LK_TextProto)
 307     parseBracedList();
 308   else
 309     parseLevel(/*HasOpeningBrace=*/false);
 310   // Make sure to format the remaining tokens.
 311   //
 312   // LK_TextProto is special since its top-level is parsed as the body of a
 313   // braced list, which does not necessarily have natural line separators such
 314   // as a semicolon. Comments after the last entry that have been determined to
 315   // not belong to that line, as in:
 316   //   key: value
 317   //   // endfile comment
 318   // do not have a chance to be put on a line of their own until this point.
 319   // Here we add this newline before end-of-file comments.
 320   if (Style.Language == FormatStyle::LK_TextProto &&
 321       !CommentsBeforeNextToken.empty())
 322     addUnwrappedLine();
 323   flushComments(true);
 324   addUnwrappedLine();
 325 }
 326
 327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 328   bool SwitchLabelEncountered = false;
 329   do {
 330     tok::TokenKind kind = FormatTok->Tok.getKind();
 331     if (FormatTok->Type == TT_MacroBlockBegin) {
 332       kind = tok::l_brace;
 333     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 334       kind = tok::r_brace;
 335     }
 336
 337     switch (kind) {
 338     case tok::comment:
 339       nextToken();
 340       addUnwrappedLine();
 341       break;
 342     case tok::l_brace:
 343       // FIXME: Add parameter whether this can happen - if this happens, we must
 344       // be in a non-declaration context.
 345       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
 346         continue;
 347       parseBlock(/*MustBeDeclaration=*/false);
 348       addUnwrappedLine();
 349       break;
 350     case tok::r_brace:
 351       if (HasOpeningBrace)
 352         return;
 353       nextToken();
 354       addUnwrappedLine();
 355       break;
 356     case tok::kw_default: {
 357       unsigned StoredPosition = Tokens->getPosition();
 358       FormatToken *Next;
 359       do {
 360         Next = Tokens->getNextToken();
 361       } while (Next && Next->is(tok::comment));
 362       FormatTok = Tokens->setPosition(StoredPosition);
 363       if (Next && Next->isNot(tok::colon)) {
 364         // default not followed by ':' is not a case label; treat it like
 365         // an identifier.
 366         parseStructuralElement();
 367         break;
 368       }
 369       // Else, if it is 'default:', fall through to the case handling.
 370       LLVM_FALLTHROUGH;
 371     }
 372     case tok::kw_case:
 373       if (Style.Language == FormatStyle::LK_JavaScript &&
 374           Line->MustBeDeclaration) {
 375         // A 'case: string' style field declaration.
 376         parseStructuralElement();
 377         break;
 378       }
 379       if (!SwitchLabelEncountered &&
 380           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 381         ++Line->Level;
 382       SwitchLabelEncountered = true;
 383       parseStructuralElement();
 384       break;
 385     default:
 386       parseStructuralElement();
 387       break;
 388     }
 389   } while (!eof());
 390 }
 391
 392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 393   // We'll parse forward through the tokens until we hit
 394   // a closing brace or eof - note that getNextToken() will
 395   // parse macros, so this will magically work inside macro
 396   // definitions, too.
 397   unsigned StoredPosition = Tokens->getPosition();
 398   FormatToken *Tok = FormatTok;
 399   const FormatToken *PrevTok = Tok->Previous;
 400   // Keep a stack of positions of lbrace tokens. We will
 401   // update information about whether an lbrace starts a
 402   // braced init list or a different block during the loop.
 403   SmallVector<FormatToken *, 8> LBraceStack;
 404   assert(Tok->Tok.is(tok::l_brace));
 405   do {
 406     // Get next non-comment token.
 407     FormatToken *NextTok;
 408     unsigned ReadTokens = 0;
 409     do {
 410       NextTok = Tokens->getNextToken();
 411       ++ReadTokens;
 412     } while (NextTok->is(tok::comment));
 413
 414     switch (Tok->Tok.getKind()) {
 415     case tok::l_brace:
 416       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
 417         if (PrevTok->isOneOf(tok::colon, tok::less))
 418           // A ':' indicates this code is in a type, or a braced list
 419           // following a label in an object literal ({a: {b: 1}}).
 420           // A '<' could be an object used in a comparison, but that is nonsense
 421           // code (can never return true), so more likely it is a generic type
 422           // argument (`X<{a: string; b: number}>`).
 423           // The code below could be confused by semicolons between the
 424           // individual members in a type member list, which would normally
 425           // trigger BK_Block. In both cases, this must be parsed as an inline
 426           // braced init.
 427           Tok->BlockKind = BK_BracedInit;
 428         else if (PrevTok->is(tok::r_paren))
 429           // `) { }` can only occur in function or method declarations in JS.
 430           Tok->BlockKind = BK_Block;
 431       } else {
 432         Tok->BlockKind = BK_Unknown;
 433       }
 434       LBraceStack.push_back(Tok);
 435       break;
 436     case tok::r_brace:
 437       if (LBraceStack.empty())
 438         break;
 439       if (LBraceStack.back()->BlockKind == BK_Unknown) {
 440         bool ProbablyBracedList = false;
 441         if (Style.Language == FormatStyle::LK_Proto) {
 442           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 443         } else {
 444           // Using OriginalColumn to distinguish between ObjC methods and
 445           // binary operators is a bit hacky.
 446           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 447                                   NextTok->OriginalColumn == 0;
 448
 449           // If there is a comma, semicolon or right paren after the closing
 450           // brace, we assume this is a braced initializer list.  Note that
 451           // regardless how we mark inner braces here, we will overwrite the
 452           // BlockKind later if we parse a braced list (where all blocks
 453           // inside are by default braced lists), or when we explicitly detect
 454           // blocks (for example while parsing lambdas).
 455           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
 456           // braced list in JS.
 457           ProbablyBracedList =
 458               (Style.Language == FormatStyle::LK_JavaScript &&
 459                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 460                                 Keywords.kw_as)) ||
 461               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
 462               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 463                                tok::r_paren, tok::r_square, tok::l_brace,
 464                                tok::ellipsis) ||
 465               (NextTok->is(tok::identifier) &&
 466                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
 467               (NextTok->is(tok::semi) &&
 468                (!ExpectClassBody || LBraceStack.size() != 1)) ||
 469               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 470           if (NextTok->is(tok::l_square)) {
 471             // We can have an array subscript after a braced init
 472             // list, but C++11 attributes are expected after blocks.
 473             NextTok = Tokens->getNextToken();
 474             ++ReadTokens;
 475             ProbablyBracedList = NextTok->isNot(tok::l_square);
 476           }
 477         }
 478         if (ProbablyBracedList) {
 479           Tok->BlockKind = BK_BracedInit;
 480           LBraceStack.back()->BlockKind = BK_BracedInit;
 481         } else {
 482           Tok->BlockKind = BK_Block;
 483           LBraceStack.back()->BlockKind = BK_Block;
 484         }
 485       }
 486       LBraceStack.pop_back();
 487       break;
 488     case tok::identifier:
 489       if (!Tok->is(TT_StatementMacro))
 490         break;
 491       LLVM_FALLTHROUGH;
 492     case tok::at:
 493     case tok::semi:
 494     case tok::kw_if:
 495     case tok::kw_while:
 496     case tok::kw_for:
 497     case tok::kw_switch:
 498     case tok::kw_try:
 499     case tok::kw___try:
 500       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
 501         LBraceStack.back()->BlockKind = BK_Block;
 502       break;
 503     default:
 504       break;
 505     }
 506     PrevTok = Tok;
 507     Tok = NextTok;
 508   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 509
 510   // Assume other blocks for all unclosed opening braces.
 511   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 512     if (LBraceStack[i]->BlockKind == BK_Unknown)
 513       LBraceStack[i]->BlockKind = BK_Block;
 514   }
 515
 516   FormatTok = Tokens->setPosition(StoredPosition);
 517 }
 518
 519 template <class T>
 520 static inline void hash_combine(std::size_t &seed, const T &v) {
 521   std::hash<T> hasher;
 522   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
 523 }
 524
 525 size_t UnwrappedLineParser::computePPHash() const {
 526   size_t h = 0;
 527   for (const auto &i : PPStack) {
 528     hash_combine(h, size_t(i.Kind));
 529     hash_combine(h, i.Line);
 530   }
 531   return h;
 532 }
 533
 534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 535                                      bool MunchSemi) {
 536   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 537          "'{' or macro block token expected");
 538   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 539   FormatTok->BlockKind = BK_Block;
 540
 541   size_t PPStartHash = computePPHash();
 542
 543   unsigned InitialLevel = Line->Level;
 544   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
 545
 546   if (MacroBlock && FormatTok->is(tok::l_paren))
 547     parseParens();
 548
 549   size_t NbPreprocessorDirectives =
 550       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
 551   addUnwrappedLine();
 552   size_t OpeningLineIndex =
 553       CurrentLines->empty()
 554           ? (UnwrappedLine::kInvalidIndex)
 555           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
 556
 557   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 558                                           MustBeDeclaration);
 559   if (AddLevel)
 560     ++Line->Level;
 561   parseLevel(/*HasOpeningBrace=*/true);
 562
 563   if (eof())
 564     return;
 565
 566   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 567                  : !FormatTok->is(tok::r_brace)) {
 568     Line->Level = InitialLevel;
 569     FormatTok->BlockKind = BK_Block;
 570     return;
 571   }
 572
 573   size_t PPEndHash = computePPHash();
 574
 575   // Munch the closing brace.
 576   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
 577
 578   if (MacroBlock && FormatTok->is(tok::l_paren))
 579     parseParens();
 580
 581   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 582     nextToken();
 583   Line->Level = InitialLevel;
 584
 585   if (PPStartHash == PPEndHash) {
 586     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 587     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
 588       // Update the opening line to add the forward reference as well
 589       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
 590           CurrentLines->size() - 1;
 591     }
 592   }
 593 }
 594
 595 static bool isGoogScope(const UnwrappedLine &Line) {
 596   // FIXME: Closure-library specific stuff should not be hard-coded but be
 597   // configurable.
 598   if (Line.Tokens.size() < 4)
 599     return false;
 600   auto I = Line.Tokens.begin();
 601   if (I->Tok->TokenText != "goog")
 602     return false;
 603   ++I;
 604   if (I->Tok->isNot(tok::period))
 605     return false;
 606   ++I;
 607   if (I->Tok->TokenText != "scope")
 608     return false;
 609   ++I;
 610   return I->Tok->is(tok::l_paren);
 611 }
 612
 613 static bool isIIFE(const UnwrappedLine &Line,
 614                    const AdditionalKeywords &Keywords) {
 615   // Look for the start of an immediately invoked anonymous function.
 616   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
 617   // This is commonly done in JavaScript to create a new, anonymous scope.
 618   // Example: (function() { ... })()
 619   if (Line.Tokens.size() < 3)
 620     return false;
 621   auto I = Line.Tokens.begin();
 622   if (I->Tok->isNot(tok::l_paren))
 623     return false;
 624   ++I;
 625   if (I->Tok->isNot(Keywords.kw_function))
 626     return false;
 627   ++I;
 628   return I->Tok->is(tok::l_paren);
 629 }
 630
 631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 632                                    const FormatToken &InitialToken) {
 633   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
 634     return Style.BraceWrapping.AfterNamespace;
 635   if (InitialToken.is(tok::kw_class))
 636     return Style.BraceWrapping.AfterClass;
 637   if (InitialToken.is(tok::kw_union))
 638     return Style.BraceWrapping.AfterUnion;
 639   if (InitialToken.is(tok::kw_struct))
 640     return Style.BraceWrapping.AfterStruct;
 641   return false;
 642 }
 643
 644 void UnwrappedLineParser::parseChildBlock() {
 645   FormatTok->BlockKind = BK_Block;
 646   nextToken();
 647   {
 648     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
 649                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
 650     ScopedLineState LineState(*this);
 651     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 652                                             /*MustBeDeclaration=*/false);
 653     Line->Level += SkipIndent ? 0 : 1;
 654     parseLevel(/*HasOpeningBrace=*/true);
 655     flushComments(isOnNewLine(*FormatTok));
 656     Line->Level -= SkipIndent ? 0 : 1;
 657   }
 658   nextToken();
 659 }
 660
 661 void UnwrappedLineParser::parsePPDirective() {
 662   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 663   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 664
 665   nextToken();
 666
 667   if (!FormatTok->Tok.getIdentifierInfo()) {
 668     parsePPUnknown();
 669     return;
 670   }
 671
 672   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 673   case tok::pp_define:
 674     parsePPDefine();
 675     return;
 676   case tok::pp_if:
 677     parsePPIf(/*IfDef=*/false);
 678     break;
 679   case tok::pp_ifdef:
 680   case tok::pp_ifndef:
 681     parsePPIf(/*IfDef=*/true);
 682     break;
 683   case tok::pp_else:
 684     parsePPElse();
 685     break;
 686   case tok::pp_elif:
 687     parsePPElIf();
 688     break;
 689   case tok::pp_endif:
 690     parsePPEndIf();
 691     break;
 692   default:
 693     parsePPUnknown();
 694     break;
 695   }
 696 }
 697
 698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 699   size_t Line = CurrentLines->size();
 700   if (CurrentLines == &PreprocessorDirectives)
 701     Line += Lines.size();
 702
 703   if (Unreachable ||
 704       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
 705     PPStack.push_back({PP_Unreachable, Line});
 706   else
 707     PPStack.push_back({PP_Conditional, Line});
 708 }
 709
 710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 711   ++PPBranchLevel;
 712   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 713   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 714     PPLevelBranchIndex.push_back(0);
 715     PPLevelBranchCount.push_back(0);
 716   }
 717   PPChainBranchIndex.push(0);
 718   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 719   conditionalCompilationCondition(Unreachable || Skip);
 720 }
 721
 722 void UnwrappedLineParser::conditionalCompilationAlternative() {
 723   if (!PPStack.empty())
 724     PPStack.pop_back();
 725   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 726   if (!PPChainBranchIndex.empty())
 727     ++PPChainBranchIndex.top();
 728   conditionalCompilationCondition(
 729       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 730       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 731 }
 732
 733 void UnwrappedLineParser::conditionalCompilationEnd() {
 734   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 735   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 736     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 737       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 738     }
 739   }
 740   // Guard against #endif's without #if.
 741   if (PPBranchLevel > -1)
 742     --PPBranchLevel;
 743   if (!PPChainBranchIndex.empty())
 744     PPChainBranchIndex.pop();
 745   if (!PPStack.empty())
 746     PPStack.pop_back();
 747 }
 748
 749 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 750   bool IfNDef = FormatTok->is(tok::pp_ifndef);
 751   nextToken();
 752   bool Unreachable = false;
 753   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
 754     Unreachable = true;
 755   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
 756     Unreachable = true;
 757   conditionalCompilationStart(Unreachable);
 758   FormatToken *IfCondition = FormatTok;
 759   // If there's a #ifndef on the first line, and the only lines before it are
 760   // comments, it could be an include guard.
 761   bool MaybeIncludeGuard = IfNDef;
 762   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
 763     for (auto &Line : Lines) {
 764       if (!Line.Tokens.front().Tok->is(tok::comment)) {
 765         MaybeIncludeGuard = false;
 766         IncludeGuard = IG_Rejected;
 767         break;
 768       }
 769     }
 770   --PPBranchLevel;
 771   parsePPUnknown();
 772   ++PPBranchLevel;
 773   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
 774     IncludeGuard = IG_IfNdefed;
 775     IncludeGuardToken = IfCondition;
 776   }
 777 }
 778
 779 void UnwrappedLineParser::parsePPElse() {
 780   // If a potential include guard has an #else, it's not an include guard.
 781   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
 782     IncludeGuard = IG_Rejected;
 783   conditionalCompilationAlternative();
 784   if (PPBranchLevel > -1)
 785     --PPBranchLevel;
 786   parsePPUnknown();
 787   ++PPBranchLevel;
 788 }
 789
 790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 791
 792 void UnwrappedLineParser::parsePPEndIf() {
 793   conditionalCompilationEnd();
 794   parsePPUnknown();
 795   // If the #endif of a potential include guard is the last thing in the file,
 796   // then we found an include guard.
 797   unsigned TokenPosition = Tokens->getPosition();
 798   FormatToken *PeekNext = AllTokens[TokenPosition];
 799   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
 800       PeekNext->is(tok::eof) &&
 801       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 802     IncludeGuard = IG_Found;
 803 }
 804
 805 void UnwrappedLineParser::parsePPDefine() {
 806   nextToken();
 807
 808   if (!FormatTok->Tok.getIdentifierInfo()) {
 809     IncludeGuard = IG_Rejected;
 810     IncludeGuardToken = nullptr;
 811     parsePPUnknown();
 812     return;
 813   }
 814
 815   if (IncludeGuard == IG_IfNdefed &&
 816       IncludeGuardToken->TokenText == FormatTok->TokenText) {
 817     IncludeGuard = IG_Defined;
 818     IncludeGuardToken = nullptr;
 819     for (auto &Line : Lines) {
 820       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
 821         IncludeGuard = IG_Rejected;
 822         break;
 823       }
 824     }
 825   }
 826
 827   nextToken();
 828   if (FormatTok->Tok.getKind() == tok::l_paren &&
 829       FormatTok->WhitespaceRange.getBegin() ==
 830           FormatTok->WhitespaceRange.getEnd()) {
 831     parseParens();
 832   }
 833   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 834     Line->Level += PPBranchLevel + 1;
 835   addUnwrappedLine();
 836   ++Line->Level;
 837
 838   // Errors during a preprocessor directive can only affect the layout of the
 839   // preprocessor directive, and thus we ignore them. An alternative approach
 840   // would be to use the same approach we use on the file level (no
 841   // re-indentation if there was a structural error) within the macro
 842   // definition.
 843   parseFile();
 844 }
 845
 846 void UnwrappedLineParser::parsePPUnknown() {
 847   do {
 848     nextToken();
 849   } while (!eof());
 850   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
 851     Line->Level += PPBranchLevel + 1;
 852   addUnwrappedLine();
 853 }
 854
 855 // Here we blacklist certain tokens that are not usually the first token in an
 856 // unwrapped line. This is used in attempt to distinguish macro calls without
 857 // trailing semicolons from other constructs split to several lines.
 858 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 859   // Semicolon can be a null-statement, l_square can be a start of a macro or
 860   // a C++11 attribute, but this doesn't seem to be common.
 861   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 862          Tok.isNot(tok::l_square) &&
 863          // Tokens that can only be used as binary operators and a part of
 864          // overloaded operator names.
 865          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 866          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 867          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 868          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 869          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 870          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 871          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 872          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 873          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 874          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 875          Tok.isNot(tok::lesslessequal) &&
 876          // Colon is used in labels, base class lists, initializer lists,
 877          // range-based for loops, ternary operator, but should never be the
 878          // first token in an unwrapped line.
 879          Tok.isNot(tok::colon) &&
 880          // 'noexcept' is a trailing annotation.
 881          Tok.isNot(tok::kw_noexcept);
 882 }
 883
 884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
 885                           const FormatToken *FormatTok) {
 886   // FIXME: This returns true for C/C++ keywords like 'struct'.
 887   return FormatTok->is(tok::identifier) &&
 888          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
 889           !FormatTok->isOneOf(
 890               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
 891               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
 892               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
 893               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
 894               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
 895               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
 896               Keywords.kw_from));
 897 }
 898
 899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
 900                                  const FormatToken *FormatTok) {
 901   return FormatTok->Tok.isLiteral() ||
 902          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
 903          mustBeJSIdent(Keywords, FormatTok);
 904 }
 905
 906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
 907 // when encountered after a value (see mustBeJSIdentOrValue).
 908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
 909                            const FormatToken *FormatTok) {
 910   return FormatTok->isOneOf(
 911       tok::kw_return, Keywords.kw_yield,
 912       // conditionals
 913       tok::kw_if, tok::kw_else,
 914       // loops
 915       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
 916       // switch/case
 917       tok::kw_switch, tok::kw_case,
 918       // exceptions
 919       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
 920       // declaration
 921       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
 922       Keywords.kw_async, Keywords.kw_function,
 923       // import/export
 924       Keywords.kw_import, tok::kw_export);
 925 }
 926
 927 // readTokenWithJavaScriptASI reads the next token and terminates the current
 928 // line if JavaScript Automatic Semicolon Insertion must
 929 // happen between the current token and the next token.
 930 //
 931 // This method is conservative - it cannot cover all edge cases of JavaScript,
 932 // but only aims to correctly handle certain well known cases. It *must not*
 933 // return true in speculative cases.
 934 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 935   FormatToken *Previous = FormatTok;
 936   readToken();
 937   FormatToken *Next = FormatTok;
 938
 939   bool IsOnSameLine =
 940       CommentsBeforeNextToken.empty()
 941           ? Next->NewlinesBefore == 0
 942           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
 943   if (IsOnSameLine)
 944     return;
 945
 946   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
 947   bool PreviousStartsTemplateExpr =
 948       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
 949   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
 950     // If the line contains an '@' sign, the previous token might be an
 951     // annotation, which can precede another identifier/value.
 952     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
 953                               [](UnwrappedLineNode &LineNode) {
 954                                 return LineNode.Tok->is(tok::at);
 955                               }) != Line->Tokens.end();
 956     if (HasAt)
 957       return;
 958   }
 959   if (Next->is(tok::exclaim) && PreviousMustBeValue)
 960     return addUnwrappedLine();
 961   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
 962   bool NextEndsTemplateExpr =
 963       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
 964   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
 965       (PreviousMustBeValue ||
 966        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
 967                          tok::minusminus)))
 968     return addUnwrappedLine();
 969   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
 970       isJSDeclOrStmt(Keywords, Next))
 971     return addUnwrappedLine();
 972 }
 973
 974 void UnwrappedLineParser::parseStructuralElement() {
 975   assert(!FormatTok->is(tok::l_brace));
 976   if (Style.Language == FormatStyle::LK_TableGen &&
 977       FormatTok->is(tok::pp_include)) {
 978     nextToken();
 979     if (FormatTok->is(tok::string_literal))
 980       nextToken();
 981     addUnwrappedLine();
 982     return;
 983   }
 984   switch (FormatTok->Tok.getKind()) {
 985   case tok::kw_asm:
 986     nextToken();
 987     if (FormatTok->is(tok::l_brace)) {
 988       FormatTok->Type = TT_InlineASMBrace;
 989       nextToken();
 990       while (FormatTok && FormatTok->isNot(tok::eof)) {
 991         if (FormatTok->is(tok::r_brace)) {
 992           FormatTok->Type = TT_InlineASMBrace;
 993           nextToken();
 994           addUnwrappedLine();
 995           break;
 996         }
 997         FormatTok->Finalized = true;
 998         nextToken();
 999       }
1000     }
1001     break;
1002   case tok::kw_namespace:
1003     parseNamespace();
1004     return;
1005   case tok::kw_public:
1006   case tok::kw_protected:
1007   case tok::kw_private:
1008     if (Style.Language == FormatStyle::LK_Java ||
1009         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1010       nextToken();
1011     else
1012       parseAccessSpecifier();
1013     return;
1014   case tok::kw_if:
1015     parseIfThenElse();
1016     return;
1017   case tok::kw_for:
1018   case tok::kw_while:
1019     parseForOrWhileLoop();
1020     return;
1021   case tok::kw_do:
1022     parseDoWhile();
1023     return;
1024   case tok::kw_switch:
1025     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026       // 'switch: string' field declaration.
1027       break;
1028     parseSwitch();
1029     return;
1030   case tok::kw_default:
1031     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1032       // 'default: string' field declaration.
1033       break;
1034     nextToken();
1035     if (FormatTok->is(tok::colon)) {
1036       parseLabel();
1037       return;
1038     }
1039     // e.g. "default void f() {}" in a Java interface.
1040     break;
1041   case tok::kw_case:
1042     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1043       // 'case: string' field declaration.
1044       break;
1045     parseCaseLabel();
1046     return;
1047   case tok::kw_try:
1048   case tok::kw___try:
1049     parseTryCatch();
1050     return;
1051   case tok::kw_extern:
1052     nextToken();
1053     if (FormatTok->Tok.is(tok::string_literal)) {
1054       nextToken();
1055       if (FormatTok->Tok.is(tok::l_brace)) {
1056         if (Style.BraceWrapping.AfterExternBlock) {
1057           addUnwrappedLine();
1058           parseBlock(/*MustBeDeclaration=*/true);
1059         } else {
1060           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1061         }
1062         addUnwrappedLine();
1063         return;
1064       }
1065     }
1066     break;
1067   case tok::kw_export:
1068     if (Style.Language == FormatStyle::LK_JavaScript) {
1069       parseJavaScriptEs6ImportExport();
1070       return;
1071     }
1072     if (!Style.isCpp())
1073       break;
1074     // Handle C++ "(inline|export) namespace".
1075     LLVM_FALLTHROUGH;
1076   case tok::kw_inline:
1077     nextToken();
1078     if (FormatTok->Tok.is(tok::kw_namespace)) {
1079       parseNamespace();
1080       return;
1081     }
1082     break;
1083   case tok::identifier:
1084     if (FormatTok->is(TT_ForEachMacro)) {
1085       parseForOrWhileLoop();
1086       return;
1087     }
1088     if (FormatTok->is(TT_MacroBlockBegin)) {
1089       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1090                  /*MunchSemi=*/false);
1091       return;
1092     }
1093     if (FormatTok->is(Keywords.kw_import)) {
1094       if (Style.Language == FormatStyle::LK_JavaScript) {
1095         parseJavaScriptEs6ImportExport();
1096         return;
1097       }
1098       if (Style.Language == FormatStyle::LK_Proto) {
1099         nextToken();
1100         if (FormatTok->is(tok::kw_public))
1101           nextToken();
1102         if (!FormatTok->is(tok::string_literal))
1103           return;
1104         nextToken();
1105         if (FormatTok->is(tok::semi))
1106           nextToken();
1107         addUnwrappedLine();
1108         return;
1109       }
1110     }
1111     if (Style.isCpp() &&
1112         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1113                            Keywords.kw_slots, Keywords.kw_qslots)) {
1114       nextToken();
1115       if (FormatTok->is(tok::colon)) {
1116         nextToken();
1117         addUnwrappedLine();
1118         return;
1119       }
1120     }
1121     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1122       parseStatementMacro();
1123       return;
1124     }
1125     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1126       parseNamespace();
1127       return;
1128     }
1129     // In all other cases, parse the declaration.
1130     break;
1131   default:
1132     break;
1133   }
1134   do {
1135     const FormatToken *Previous = FormatTok->Previous;
1136     switch (FormatTok->Tok.getKind()) {
1137     case tok::at:
1138       nextToken();
1139       if (FormatTok->Tok.is(tok::l_brace)) {
1140         nextToken();
1141         parseBracedList();
1142         break;
1143       } else if (Style.Language == FormatStyle::LK_Java &&
1144                  FormatTok->is(Keywords.kw_interface)) {
1145         nextToken();
1146         break;
1147       }
1148       switch (FormatTok->Tok.getObjCKeywordID()) {
1149       case tok::objc_public:
1150       case tok::objc_protected:
1151       case tok::objc_package:
1152       case tok::objc_private:
1153         return parseAccessSpecifier();
1154       case tok::objc_interface:
1155       case tok::objc_implementation:
1156         return parseObjCInterfaceOrImplementation();
1157       case tok::objc_protocol:
1158         if (parseObjCProtocol())
1159           return;
1160         break;
1161       case tok::objc_end:
1162         return; // Handled by the caller.
1163       case tok::objc_optional:
1164       case tok::objc_required:
1165         nextToken();
1166         addUnwrappedLine();
1167         return;
1168       case tok::objc_autoreleasepool:
1169         nextToken();
1170         if (FormatTok->Tok.is(tok::l_brace)) {
1171           if (Style.BraceWrapping.AfterControlStatement)
1172             addUnwrappedLine();
1173           parseBlock(/*MustBeDeclaration=*/false);
1174         }
1175         addUnwrappedLine();
1176         return;
1177       case tok::objc_synchronized:
1178         nextToken();
1179         if (FormatTok->Tok.is(tok::l_paren))
1180           // Skip synchronization object
1181           parseParens();
1182         if (FormatTok->Tok.is(tok::l_brace)) {
1183           if (Style.BraceWrapping.AfterControlStatement)
1184             addUnwrappedLine();
1185           parseBlock(/*MustBeDeclaration=*/false);
1186         }
1187         addUnwrappedLine();
1188         return;
1189       case tok::objc_try:
1190         // This branch isn't strictly necessary (the kw_try case below would
1191         // do this too after the tok::at is parsed above).  But be explicit.
1192         parseTryCatch();
1193         return;
1194       default:
1195         break;
1196       }
1197       break;
1198     case tok::kw_enum:
1199       // Ignore if this is part of "template <enum ...".
1200       if (Previous && Previous->is(tok::less)) {
1201         nextToken();
1202         break;
1203       }
1204
1205       // parseEnum falls through and does not yet add an unwrapped line as an
1206       // enum definition can start a structural element.
1207       if (!parseEnum())
1208         break;
1209       // This only applies for C++.
1210       if (!Style.isCpp()) {
1211         addUnwrappedLine();
1212         return;
1213       }
1214       break;
1215     case tok::kw_typedef:
1216       nextToken();
1217       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1218                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1219         parseEnum();
1220       break;
1221     case tok::kw_struct:
1222     case tok::kw_union:
1223     case tok::kw_class:
1224       // parseRecord falls through and does not yet add an unwrapped line as a
1225       // record declaration or definition can start a structural element.
1226       parseRecord();
1227       // This does not apply for Java, JavaScript and C#.
1228       if (Style.Language == FormatStyle::LK_Java ||
1229           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1230         if (FormatTok->is(tok::semi))
1231           nextToken();
1232         addUnwrappedLine();
1233         return;
1234       }
1235       break;
1236     case tok::period:
1237       nextToken();
1238       // In Java, classes have an implicit static member "class".
1239       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1240           FormatTok->is(tok::kw_class))
1241         nextToken();
1242       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1243           FormatTok->Tok.getIdentifierInfo())
1244         // JavaScript only has pseudo keywords, all keywords are allowed to
1245         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1246         nextToken();
1247       break;
1248     case tok::semi:
1249       nextToken();
1250       addUnwrappedLine();
1251       return;
1252     case tok::r_brace:
1253       addUnwrappedLine();
1254       return;
1255     case tok::l_paren:
1256       parseParens();
1257       break;
1258     case tok::kw_operator:
1259       nextToken();
1260       if (FormatTok->isBinaryOperator())
1261         nextToken();
1262       break;
1263     case tok::caret:
1264       nextToken();
1265       if (FormatTok->Tok.isAnyIdentifier() ||
1266           FormatTok->isSimpleTypeSpecifier())
1267         nextToken();
1268       if (FormatTok->is(tok::l_paren))
1269         parseParens();
1270       if (FormatTok->is(tok::l_brace))
1271         parseChildBlock();
1272       break;
1273     case tok::l_brace:
1274       if (!tryToParseBracedList()) {
1275         // A block outside of parentheses must be the last part of a
1276         // structural element.
1277         // FIXME: Figure out cases where this is not true, and add projections
1278         // for them (the one we know is missing are lambdas).
1279         if (Style.BraceWrapping.AfterFunction)
1280           addUnwrappedLine();
1281         FormatTok->Type = TT_FunctionLBrace;
1282         parseBlock(/*MustBeDeclaration=*/false);
1283         addUnwrappedLine();
1284         return;
1285       }
1286       // Otherwise this was a braced init list, and the structural
1287       // element continues.
1288       break;
1289     case tok::kw_try:
1290       // We arrive here when parsing function-try blocks.
1291       if (Style.BraceWrapping.AfterFunction)
1292         addUnwrappedLine();
1293       parseTryCatch();
1294       return;
1295     case tok::identifier: {
1296       if (FormatTok->is(TT_MacroBlockEnd)) {
1297         addUnwrappedLine();
1298         return;
1299       }
1300
1301       // Function declarations (as opposed to function expressions) are parsed
1302       // on their own unwrapped line by continuing this loop. Function
1303       // expressions (functions that are not on their own line) must not create
1304       // a new unwrapped line, so they are special cased below.
1305       size_t TokenCount = Line->Tokens.size();
1306       if (Style.Language == FormatStyle::LK_JavaScript &&
1307           FormatTok->is(Keywords.kw_function) &&
1308           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1309                                                      Keywords.kw_async)))) {
1310         tryToParseJSFunction();
1311         break;
1312       }
1313       if ((Style.Language == FormatStyle::LK_JavaScript ||
1314            Style.Language == FormatStyle::LK_Java) &&
1315           FormatTok->is(Keywords.kw_interface)) {
1316         if (Style.Language == FormatStyle::LK_JavaScript) {
1317           // In JavaScript/TypeScript, "interface" can be used as a standalone
1318           // identifier, e.g. in `var interface = 1;`. If "interface" is
1319           // followed by another identifier, it is very like to be an actual
1320           // interface declaration.
1321           unsigned StoredPosition = Tokens->getPosition();
1322           FormatToken *Next = Tokens->getNextToken();
1323           FormatTok = Tokens->setPosition(StoredPosition);
1324           if (Next && !mustBeJSIdent(Keywords, Next)) {
1325             nextToken();
1326             break;
1327           }
1328         }
1329         parseRecord();
1330         addUnwrappedLine();
1331         return;
1332       }
1333
1334       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1335         parseStatementMacro();
1336         return;
1337       }
1338
1339       // See if the following token should start a new unwrapped line.
1340       StringRef Text = FormatTok->TokenText;
1341       nextToken();
1342
1343       // JS doesn't have macros, and within classes colons indicate fields, not
1344       // labels.
1345       if (Style.Language == FormatStyle::LK_JavaScript)
1346         break;
1347
1348       TokenCount = Line->Tokens.size();
1349       if (TokenCount == 1 ||
1350           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1351         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1352           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1353           parseLabel();
1354           return;
1355         }
1356         // Recognize function-like macro usages without trailing semicolon as
1357         // well as free-standing macros like Q_OBJECT.
1358         bool FunctionLike = FormatTok->is(tok::l_paren);
1359         if (FunctionLike)
1360           parseParens();
1361
1362         bool FollowedByNewline =
1363             CommentsBeforeNextToken.empty()
1364                 ? FormatTok->NewlinesBefore > 0
1365                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1366
1367         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1368             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1369           addUnwrappedLine();
1370           return;
1371         }
1372       }
1373       break;
1374     }
1375     case tok::equal:
1376       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1377       // TT_JsFatArrow. The always start an expression or a child block if
1378       // followed by a curly.
1379       if (FormatTok->is(TT_JsFatArrow)) {
1380         nextToken();
1381         if (FormatTok->is(tok::l_brace))
1382           parseChildBlock();
1383         break;
1384       }
1385
1386       nextToken();
1387       if (FormatTok->Tok.is(tok::l_brace)) {
1388         nextToken();
1389         parseBracedList();
1390       } else if (Style.Language == FormatStyle::LK_Proto &&
1391                  FormatTok->Tok.is(tok::less)) {
1392         nextToken();
1393         parseBracedList(/*ContinueOnSemicolons=*/false,
1394                         /*ClosingBraceKind=*/tok::greater);
1395       }
1396       break;
1397     case tok::l_square:
1398       parseSquare();
1399       break;
1400     case tok::kw_new:
1401       parseNew();
1402       break;
1403     default:
1404       nextToken();
1405       break;
1406     }
1407   } while (!eof());
1408 }
1409
1410 bool UnwrappedLineParser::tryToParseLambda() {
1411   if (!Style.isCpp()) {
1412     nextToken();
1413     return false;
1414   }
1415   assert(FormatTok->is(tok::l_square));
1416   FormatToken &LSquare = *FormatTok;
1417   if (!tryToParseLambdaIntroducer())
1418     return false;
1419
1420   bool SeenArrow = false;
1421
1422   while (FormatTok->isNot(tok::l_brace)) {
1423     if (FormatTok->isSimpleTypeSpecifier()) {
1424       nextToken();
1425       continue;
1426     }
1427     switch (FormatTok->Tok.getKind()) {
1428     case tok::l_brace:
1429       break;
1430     case tok::l_paren:
1431       parseParens();
1432       break;
1433     case tok::amp:
1434     case tok::star:
1435     case tok::kw_const:
1436     case tok::comma:
1437     case tok::less:
1438     case tok::greater:
1439     case tok::identifier:
1440     case tok::numeric_constant:
1441     case tok::coloncolon:
1442     case tok::kw_mutable:
1443     case tok::kw_noexcept:
1444       nextToken();
1445       break;
1446     // Specialization of a template with an integer parameter can contain
1447     // arithmetic, logical, comparison and ternary operators.
1448     //
1449     // FIXME: This also accepts sequences of operators that are not in the scope
1450     // of a template argument list.
1451     //
1452     // In a C++ lambda a template type can only occur after an arrow. We use
1453     // this as an heuristic to distinguish between Objective-C expressions
1454     // followed by an `a->b` expression, such as:
1455     // ([obj func:arg] + a->b)
1456     // Otherwise the code below would parse as a lambda.
1457     case tok::plus:
1458     case tok::minus:
1459     case tok::exclaim:
1460     case tok::tilde:
1461     case tok::slash:
1462     case tok::percent:
1463     case tok::lessless:
1464     case tok::pipe:
1465     case tok::pipepipe:
1466     case tok::ampamp:
1467     case tok::caret:
1468     case tok::equalequal:
1469     case tok::exclaimequal:
1470     case tok::greaterequal:
1471     case tok::lessequal:
1472     case tok::question:
1473     case tok::colon:
1474     case tok::kw_true:
1475     case tok::kw_false:
1476       if (SeenArrow) {
1477         nextToken();
1478         break;
1479       }
1480       return true;
1481     case tok::arrow:
1482       // This might or might not actually be a lambda arrow (this could be an
1483       // ObjC method invocation followed by a dereferencing arrow). We might
1484       // reset this back to TT_Unknown in TokenAnnotator.
1485       FormatTok->Type = TT_LambdaArrow;
1486       SeenArrow = true;
1487       nextToken();
1488       break;
1489     default:
1490       return true;
1491     }
1492   }
1493   FormatTok->Type = TT_LambdaLBrace;
1494   LSquare.Type = TT_LambdaLSquare;
1495   parseChildBlock();
1496   return true;
1497 }
1498
1499 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1500   const FormatToken *Previous = FormatTok->Previous;
1501   if (Previous &&
1502       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1503                          tok::kw_delete, tok::l_square) ||
1504        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1505        Previous->isSimpleTypeSpecifier())) {
1506     nextToken();
1507     return false;
1508   }
1509   nextToken();
1510   if (FormatTok->is(tok::l_square)) {
1511     return false;
1512   }
1513   parseSquare(/*LambdaIntroducer=*/true);
1514   return true;
1515 }
1516
1517 void UnwrappedLineParser::tryToParseJSFunction() {
1518   assert(FormatTok->is(Keywords.kw_function) ||
1519          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1520   if (FormatTok->is(Keywords.kw_async))
1521     nextToken();
1522   // Consume "function".
1523   nextToken();
1524
1525   // Consume * (generator function). Treat it like C++'s overloaded operators.
1526   if (FormatTok->is(tok::star)) {
1527     FormatTok->Type = TT_OverloadedOperator;
1528     nextToken();
1529   }
1530
1531   // Consume function name.
1532   if (FormatTok->is(tok::identifier))
1533     nextToken();
1534
1535   if (FormatTok->isNot(tok::l_paren))
1536     return;
1537
1538   // Parse formal parameter list.
1539   parseParens();
1540
1541   if (FormatTok->is(tok::colon)) {
1542     // Parse a type definition.
1543     nextToken();
1544
1545     // Eat the type declaration. For braced inline object types, balance braces,
1546     // otherwise just parse until finding an l_brace for the function body.
1547     if (FormatTok->is(tok::l_brace))
1548       tryToParseBracedList();
1549     else
1550       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1551         nextToken();
1552   }
1553
1554   if (FormatTok->is(tok::semi))
1555     return;
1556
1557   parseChildBlock();
1558 }
1559
1560 bool UnwrappedLineParser::tryToParseBracedList() {
1561   if (FormatTok->BlockKind == BK_Unknown)
1562     calculateBraceTypes();
1563   assert(FormatTok->BlockKind != BK_Unknown);
1564   if (FormatTok->BlockKind == BK_Block)
1565     return false;
1566   nextToken();
1567   parseBracedList();
1568   return true;
1569 }
1570
1571 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1572                                           tok::TokenKind ClosingBraceKind) {
1573   bool HasError = false;
1574
1575   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1576   // replace this by using parseAssigmentExpression() inside.
1577   do {
1578     if (Style.Language == FormatStyle::LK_JavaScript) {
1579       if (FormatTok->is(Keywords.kw_function) ||
1580           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1581         tryToParseJSFunction();
1582         continue;
1583       }
1584       if (FormatTok->is(TT_JsFatArrow)) {
1585         nextToken();
1586         // Fat arrows can be followed by simple expressions or by child blocks
1587         // in curly braces.
1588         if (FormatTok->is(tok::l_brace)) {
1589           parseChildBlock();
1590           continue;
1591         }
1592       }
1593       if (FormatTok->is(tok::l_brace)) {
1594         // Could be a method inside of a braced list `{a() { return 1; }}`.
1595         if (tryToParseBracedList())
1596           continue;
1597         parseChildBlock();
1598       }
1599     }
1600     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1601       nextToken();
1602       return !HasError;
1603     }
1604     switch (FormatTok->Tok.getKind()) {
1605     case tok::caret:
1606       nextToken();
1607       if (FormatTok->is(tok::l_brace)) {
1608         parseChildBlock();
1609       }
1610       break;
1611     case tok::l_square:
1612       tryToParseLambda();
1613       break;
1614     case tok::l_paren:
1615       parseParens();
1616       // JavaScript can just have free standing methods and getters/setters in
1617       // object literals. Detect them by a "{" following ")".
1618       if (Style.Language == FormatStyle::LK_JavaScript) {
1619         if (FormatTok->is(tok::l_brace))
1620           parseChildBlock();
1621         break;
1622       }
1623       break;
1624     case tok::l_brace:
1625       // Assume there are no blocks inside a braced init list apart
1626       // from the ones we explicitly parse out (like lambdas).
1627       FormatTok->BlockKind = BK_BracedInit;
1628       nextToken();
1629       parseBracedList();
1630       break;
1631     case tok::less:
1632       if (Style.Language == FormatStyle::LK_Proto) {
1633         nextToken();
1634         parseBracedList(/*ContinueOnSemicolons=*/false,
1635                         /*ClosingBraceKind=*/tok::greater);
1636       } else {
1637         nextToken();
1638       }
1639       break;
1640     case tok::semi:
1641       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1642       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1643       // used for error recovery if we have otherwise determined that this is
1644       // a braced list.
1645       if (Style.Language == FormatStyle::LK_JavaScript) {
1646         nextToken();
1647         break;
1648       }
1649       HasError = true;
1650       if (!ContinueOnSemicolons)
1651         return !HasError;
1652       nextToken();
1653       break;
1654     case tok::comma:
1655       nextToken();
1656       break;
1657     default:
1658       nextToken();
1659       break;
1660     }
1661   } while (!eof());
1662   return false;
1663 }
1664
1665 void UnwrappedLineParser::parseParens() {
1666   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1667   nextToken();
1668   do {
1669     switch (FormatTok->Tok.getKind()) {
1670     case tok::l_paren:
1671       parseParens();
1672       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1673         parseChildBlock();
1674       break;
1675     case tok::r_paren:
1676       nextToken();
1677       return;
1678     case tok::r_brace:
1679       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1680       return;
1681     case tok::l_square:
1682       tryToParseLambda();
1683       break;
1684     case tok::l_brace:
1685       if (!tryToParseBracedList())
1686         parseChildBlock();
1687       break;
1688     case tok::at:
1689       nextToken();
1690       if (FormatTok->Tok.is(tok::l_brace)) {
1691         nextToken();
1692         parseBracedList();
1693       }
1694       break;
1695     case tok::kw_class:
1696       if (Style.Language == FormatStyle::LK_JavaScript)
1697         parseRecord(/*ParseAsExpr=*/true);
1698       else
1699         nextToken();
1700       break;
1701     case tok::identifier:
1702       if (Style.Language == FormatStyle::LK_JavaScript &&
1703           (FormatTok->is(Keywords.kw_function) ||
1704            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1705         tryToParseJSFunction();
1706       else
1707         nextToken();
1708       break;
1709     default:
1710       nextToken();
1711       break;
1712     }
1713   } while (!eof());
1714 }
1715
1716 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1717   if (!LambdaIntroducer) {
1718     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1719     if (tryToParseLambda())
1720       return;
1721   }
1722   do {
1723     switch (FormatTok->Tok.getKind()) {
1724     case tok::l_paren:
1725       parseParens();
1726       break;
1727     case tok::r_square:
1728       nextToken();
1729       return;
1730     case tok::r_brace:
1731       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1732       return;
1733     case tok::l_square:
1734       parseSquare();
1735       break;
1736     case tok::l_brace: {
1737       if (!tryToParseBracedList())
1738         parseChildBlock();
1739       break;
1740     }
1741     case tok::at:
1742       nextToken();
1743       if (FormatTok->Tok.is(tok::l_brace)) {
1744         nextToken();
1745         parseBracedList();
1746       }
1747       break;
1748     default:
1749       nextToken();
1750       break;
1751     }
1752   } while (!eof());
1753 }
1754
1755 void UnwrappedLineParser::parseIfThenElse() {
1756   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1757   nextToken();
1758   if (FormatTok->Tok.is(tok::kw_constexpr))
1759     nextToken();
1760   if (FormatTok->Tok.is(tok::l_paren))
1761     parseParens();
1762   bool NeedsUnwrappedLine = false;
1763   if (FormatTok->Tok.is(tok::l_brace)) {
1764     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1765     parseBlock(/*MustBeDeclaration=*/false);
1766     if (Style.BraceWrapping.BeforeElse)
1767       addUnwrappedLine();
1768     else
1769       NeedsUnwrappedLine = true;
1770   } else {
1771     addUnwrappedLine();
1772     ++Line->Level;
1773     parseStructuralElement();
1774     --Line->Level;
1775   }
1776   if (FormatTok->Tok.is(tok::kw_else)) {
1777     nextToken();
1778     if (FormatTok->Tok.is(tok::l_brace)) {
1779       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1780       parseBlock(/*MustBeDeclaration=*/false);
1781       addUnwrappedLine();
1782     } else if (FormatTok->Tok.is(tok::kw_if)) {
1783       parseIfThenElse();
1784     } else {
1785       addUnwrappedLine();
1786       ++Line->Level;
1787       parseStructuralElement();
1788       if (FormatTok->is(tok::eof))
1789         addUnwrappedLine();
1790       --Line->Level;
1791     }
1792   } else if (NeedsUnwrappedLine) {
1793     addUnwrappedLine();
1794   }
1795 }
1796
1797 void UnwrappedLineParser::parseTryCatch() {
1798   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1799   nextToken();
1800   bool NeedsUnwrappedLine = false;
1801   if (FormatTok->is(tok::colon)) {
1802     // We are in a function try block, what comes is an initializer list.
1803     nextToken();
1804     while (FormatTok->is(tok::identifier)) {
1805       nextToken();
1806       if (FormatTok->is(tok::l_paren))
1807         parseParens();
1808       if (FormatTok->is(tok::comma))
1809         nextToken();
1810     }
1811   }
1812   // Parse try with resource.
1813   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1814     parseParens();
1815   }
1816   if (FormatTok->is(tok::l_brace)) {
1817     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1818     parseBlock(/*MustBeDeclaration=*/false);
1819     if (Style.BraceWrapping.BeforeCatch) {
1820       addUnwrappedLine();
1821     } else {
1822       NeedsUnwrappedLine = true;
1823     }
1824   } else if (!FormatTok->is(tok::kw_catch)) {
1825     // The C++ standard requires a compound-statement after a try.
1826     // If there's none, we try to assume there's a structuralElement
1827     // and try to continue.
1828     addUnwrappedLine();
1829     ++Line->Level;
1830     parseStructuralElement();
1831     --Line->Level;
1832   }
1833   while (1) {
1834     if (FormatTok->is(tok::at))
1835       nextToken();
1836     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1837                              tok::kw___finally) ||
1838           ((Style.Language == FormatStyle::LK_Java ||
1839             Style.Language == FormatStyle::LK_JavaScript) &&
1840            FormatTok->is(Keywords.kw_finally)) ||
1841           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1842            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1843       break;
1844     nextToken();
1845     while (FormatTok->isNot(tok::l_brace)) {
1846       if (FormatTok->is(tok::l_paren)) {
1847         parseParens();
1848         continue;
1849       }
1850       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1851         return;
1852       nextToken();
1853     }
1854     NeedsUnwrappedLine = false;
1855     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1856     parseBlock(/*MustBeDeclaration=*/false);
1857     if (Style.BraceWrapping.BeforeCatch)
1858       addUnwrappedLine();
1859     else
1860       NeedsUnwrappedLine = true;
1861   }
1862   if (NeedsUnwrappedLine)
1863     addUnwrappedLine();
1864 }
1865
1866 void UnwrappedLineParser::parseNamespace() {
1867   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1868          "'namespace' expected");
1869
1870   const FormatToken &InitialToken = *FormatTok;
1871   nextToken();
1872   if (InitialToken.is(TT_NamespaceMacro)) {
1873     parseParens();
1874   } else {
1875     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1876       nextToken();
1877   }
1878   if (FormatTok->Tok.is(tok::l_brace)) {
1879     if (ShouldBreakBeforeBrace(Style, InitialToken))
1880       addUnwrappedLine();
1881
1882     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1883                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1884                      DeclarationScopeStack.size() > 1);
1885     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1886     // Munch the semicolon after a namespace. This is more common than one would
1887     // think. Puttin the semicolon into its own line is very ugly.
1888     if (FormatTok->Tok.is(tok::semi))
1889       nextToken();
1890     addUnwrappedLine();
1891   }
1892   // FIXME: Add error handling.
1893 }
1894
1895 void UnwrappedLineParser::parseNew() {
1896   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1897   nextToken();
1898   if (Style.Language != FormatStyle::LK_Java)
1899     return;
1900
1901   // In Java, we can parse everything up to the parens, which aren't optional.
1902   do {
1903     // There should not be a ;, { or } before the new's open paren.
1904     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1905       return;
1906
1907     // Consume the parens.
1908     if (FormatTok->is(tok::l_paren)) {
1909       parseParens();
1910
1911       // If there is a class body of an anonymous class, consume that as child.
1912       if (FormatTok->is(tok::l_brace))
1913         parseChildBlock();
1914       return;
1915     }
1916     nextToken();
1917   } while (!eof());
1918 }
1919
1920 void UnwrappedLineParser::parseForOrWhileLoop() {
1921   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1922          "'for', 'while' or foreach macro expected");
1923   nextToken();
1924   // JS' for await ( ...
1925   if (Style.Language == FormatStyle::LK_JavaScript &&
1926       FormatTok->is(Keywords.kw_await))
1927     nextToken();
1928   if (FormatTok->Tok.is(tok::l_paren))
1929     parseParens();
1930   if (FormatTok->Tok.is(tok::l_brace)) {
1931     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1932     parseBlock(/*MustBeDeclaration=*/false);
1933     addUnwrappedLine();
1934   } else {
1935     addUnwrappedLine();
1936     ++Line->Level;
1937     parseStructuralElement();
1938     --Line->Level;
1939   }
1940 }
1941
1942 void UnwrappedLineParser::parseDoWhile() {
1943   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1944   nextToken();
1945   if (FormatTok->Tok.is(tok::l_brace)) {
1946     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1947     parseBlock(/*MustBeDeclaration=*/false);
1948     if (Style.BraceWrapping.IndentBraces)
1949       addUnwrappedLine();
1950   } else {
1951     addUnwrappedLine();
1952     ++Line->Level;
1953     parseStructuralElement();
1954     --Line->Level;
1955   }
1956
1957   // FIXME: Add error handling.
1958   if (!FormatTok->Tok.is(tok::kw_while)) {
1959     addUnwrappedLine();
1960     return;
1961   }
1962
1963   nextToken();
1964   parseStructuralElement();
1965 }
1966
1967 void UnwrappedLineParser::parseLabel() {
1968   nextToken();
1969   unsigned OldLineLevel = Line->Level;
1970   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1971     --Line->Level;
1972   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1973     CompoundStatementIndenter Indenter(this, Line->Level,
1974                                        Style.BraceWrapping.AfterCaseLabel,
1975                                        Style.BraceWrapping.IndentBraces);
1976     parseBlock(/*MustBeDeclaration=*/false);
1977     if (FormatTok->Tok.is(tok::kw_break)) {
1978       if (Style.BraceWrapping.AfterControlStatement)
1979         addUnwrappedLine();
1980       parseStructuralElement();
1981     }
1982     addUnwrappedLine();
1983   } else {
1984     if (FormatTok->is(tok::semi))
1985       nextToken();
1986     addUnwrappedLine();
1987   }
1988   Line->Level = OldLineLevel;
1989   if (FormatTok->isNot(tok::l_brace)) {
1990     parseStructuralElement();
1991     addUnwrappedLine();
1992   }
1993 }
1994
1995 void UnwrappedLineParser::parseCaseLabel() {
1996   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1997   // FIXME: fix handling of complex expressions here.
1998   do {
1999     nextToken();
2000   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2001   parseLabel();
2002 }
2003
2004 void UnwrappedLineParser::parseSwitch() {
2005   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2006   nextToken();
2007   if (FormatTok->Tok.is(tok::l_paren))
2008     parseParens();
2009   if (FormatTok->Tok.is(tok::l_brace)) {
2010     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2011     parseBlock(/*MustBeDeclaration=*/false);
2012     addUnwrappedLine();
2013   } else {
2014     addUnwrappedLine();
2015     ++Line->Level;
2016     parseStructuralElement();
2017     --Line->Level;
2018   }
2019 }
2020
2021 void UnwrappedLineParser::parseAccessSpecifier() {
2022   nextToken();
2023   // Understand Qt's slots.
2024   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2025     nextToken();
2026   // Otherwise, we don't know what it is, and we'd better keep the next token.
2027   if (FormatTok->Tok.is(tok::colon))
2028     nextToken();
2029   addUnwrappedLine();
2030 }
2031
2032 bool UnwrappedLineParser::parseEnum() {
2033   // Won't be 'enum' for NS_ENUMs.
2034   if (FormatTok->Tok.is(tok::kw_enum))
2035     nextToken();
2036
2037   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2038   // declarations. An "enum" keyword followed by a colon would be a syntax
2039   // error and thus assume it is just an identifier.
2040   if (Style.Language == FormatStyle::LK_JavaScript &&
2041       FormatTok->isOneOf(tok::colon, tok::question))
2042     return false;
2043
2044   // In protobuf, "enum" can be used as a field name.
2045   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2046     return false;
2047
2048   // Eat up enum class ...
2049   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2050     nextToken();
2051
2052   while (FormatTok->Tok.getIdentifierInfo() ||
2053          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2054                             tok::greater, tok::comma, tok::question)) {
2055     nextToken();
2056     // We can have macros or attributes in between 'enum' and the enum name.
2057     if (FormatTok->is(tok::l_paren))
2058       parseParens();
2059     if (FormatTok->is(tok::identifier)) {
2060       nextToken();
2061       // If there are two identifiers in a row, this is likely an elaborate
2062       // return type. In Java, this can be "implements", etc.
2063       if (Style.isCpp() && FormatTok->is(tok::identifier))
2064         return false;
2065     }
2066   }
2067
2068   // Just a declaration or something is wrong.
2069   if (FormatTok->isNot(tok::l_brace))
2070     return true;
2071   FormatTok->BlockKind = BK_Block;
2072
2073   if (Style.Language == FormatStyle::LK_Java) {
2074     // Java enums are different.
2075     parseJavaEnumBody();
2076     return true;
2077   }
2078   if (Style.Language == FormatStyle::LK_Proto) {
2079     parseBlock(/*MustBeDeclaration=*/true);
2080     return true;
2081   }
2082
2083   // Parse enum body.
2084   nextToken();
2085   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2086   if (HasError) {
2087     if (FormatTok->is(tok::semi))
2088       nextToken();
2089     addUnwrappedLine();
2090   }
2091   return true;
2092
2093   // There is no addUnwrappedLine() here so that we fall through to parsing a
2094   // structural element afterwards. Thus, in "enum A {} n, m;",
2095   // "} n, m;" will end up in one unwrapped line.
2096 }
2097
2098 void UnwrappedLineParser::parseJavaEnumBody() {
2099   // Determine whether the enum is simple, i.e. does not have a semicolon or
2100   // constants with class bodies. Simple enums can be formatted like braced
2101   // lists, contracted to a single line, etc.
2102   unsigned StoredPosition = Tokens->getPosition();
2103   bool IsSimple = true;
2104   FormatToken *Tok = Tokens->getNextToken();
2105   while (Tok) {
2106     if (Tok->is(tok::r_brace))
2107       break;
2108     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2109       IsSimple = false;
2110       break;
2111     }
2112     // FIXME: This will also mark enums with braces in the arguments to enum
2113     // constants as "not simple". This is probably fine in practice, though.
2114     Tok = Tokens->getNextToken();
2115   }
2116   FormatTok = Tokens->setPosition(StoredPosition);
2117
2118   if (IsSimple) {
2119     nextToken();
2120     parseBracedList();
2121     addUnwrappedLine();
2122     return;
2123   }
2124
2125   // Parse the body of a more complex enum.
2126   // First add a line for everything up to the "{".
2127   nextToken();
2128   addUnwrappedLine();
2129   ++Line->Level;
2130
2131   // Parse the enum constants.
2132   while (FormatTok) {
2133     if (FormatTok->is(tok::l_brace)) {
2134       // Parse the constant's class body.
2135       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2136                  /*MunchSemi=*/false);
2137     } else if (FormatTok->is(tok::l_paren)) {
2138       parseParens();
2139     } else if (FormatTok->is(tok::comma)) {
2140       nextToken();
2141       addUnwrappedLine();
2142     } else if (FormatTok->is(tok::semi)) {
2143       nextToken();
2144       addUnwrappedLine();
2145       break;
2146     } else if (FormatTok->is(tok::r_brace)) {
2147       addUnwrappedLine();
2148       break;
2149     } else {
2150       nextToken();
2151     }
2152   }
2153
2154   // Parse the class body after the enum's ";" if any.
2155   parseLevel(/*HasOpeningBrace=*/true);
2156   nextToken();
2157   --Line->Level;
2158   addUnwrappedLine();
2159 }
2160
2161 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2162   const FormatToken &InitialToken = *FormatTok;
2163   nextToken();
2164
2165   // The actual identifier can be a nested name specifier, and in macros
2166   // it is often token-pasted.
2167   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2168                             tok::kw___attribute, tok::kw___declspec,
2169                             tok::kw_alignas) ||
2170          ((Style.Language == FormatStyle::LK_Java ||
2171            Style.Language == FormatStyle::LK_JavaScript) &&
2172           FormatTok->isOneOf(tok::period, tok::comma))) {
2173     if (Style.Language == FormatStyle::LK_JavaScript &&
2174         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2175       // JavaScript/TypeScript supports inline object types in
2176       // extends/implements positions:
2177       //     class Foo implements {bar: number} { }
2178       nextToken();
2179       if (FormatTok->is(tok::l_brace)) {
2180         tryToParseBracedList();
2181         continue;
2182       }
2183     }
2184     bool IsNonMacroIdentifier =
2185         FormatTok->is(tok::identifier) &&
2186         FormatTok->TokenText != FormatTok->TokenText.upper();
2187     nextToken();
2188     // We can have macros or attributes in between 'class' and the class name.
2189     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2190       parseParens();
2191   }
2192
2193   // Note that parsing away template declarations here leads to incorrectly
2194   // accepting function declarations as record declarations.
2195   // In general, we cannot solve this problem. Consider:
2196   // class A<int> B() {}
2197   // which can be a function definition or a class definition when B() is a
2198   // macro. If we find enough real-world cases where this is a problem, we
2199   // can parse for the 'template' keyword in the beginning of the statement,
2200   // and thus rule out the record production in case there is no template
2201   // (this would still leave us with an ambiguity between template function
2202   // and class declarations).
2203   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2204     while (!eof()) {
2205       if (FormatTok->is(tok::l_brace)) {
2206         calculateBraceTypes(/*ExpectClassBody=*/true);
2207         if (!tryToParseBracedList())
2208           break;
2209       }
2210       if (FormatTok->Tok.is(tok::semi))
2211         return;
2212       nextToken();
2213     }
2214   }
2215   if (FormatTok->Tok.is(tok::l_brace)) {
2216     if (ParseAsExpr) {
2217       parseChildBlock();
2218     } else {
2219       if (ShouldBreakBeforeBrace(Style, InitialToken))
2220         addUnwrappedLine();
2221
2222       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2223                  /*MunchSemi=*/false);
2224     }
2225   }
2226   // There is no addUnwrappedLine() here so that we fall through to parsing a
2227   // structural element afterwards. Thus, in "class A {} n, m;",
2228   // "} n, m;" will end up in one unwrapped line.
2229 }
2230
2231 void UnwrappedLineParser::parseObjCMethod() {
2232   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2233          "'(' or identifier expected.");
2234   do {
2235     if (FormatTok->Tok.is(tok::semi)) {
2236       nextToken();
2237       addUnwrappedLine();
2238       return;
2239     } else if (FormatTok->Tok.is(tok::l_brace)) {
2240       if (Style.BraceWrapping.AfterFunction)
2241         addUnwrappedLine();
2242       parseBlock(/*MustBeDeclaration=*/false);
2243       addUnwrappedLine();
2244       return;
2245     } else {
2246       nextToken();
2247     }
2248   } while (!eof());
2249 }
2250
2251 void UnwrappedLineParser::parseObjCProtocolList() {
2252   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2253   do {
2254     nextToken();
2255     // Early exit in case someone forgot a close angle.
2256     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2257         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2258       return;
2259   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2260   nextToken(); // Skip '>'.
2261 }
2262
2263 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2264   do {
2265     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2266       nextToken();
2267       addUnwrappedLine();
2268       break;
2269     }
2270     if (FormatTok->is(tok::l_brace)) {
2271       parseBlock(/*MustBeDeclaration=*/false);
2272       // In ObjC interfaces, nothing should be following the "}".
2273       addUnwrappedLine();
2274     } else if (FormatTok->is(tok::r_brace)) {
2275       // Ignore stray "}". parseStructuralElement doesn't consume them.
2276       nextToken();
2277       addUnwrappedLine();
2278     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2279       nextToken();
2280       parseObjCMethod();
2281     } else {
2282       parseStructuralElement();
2283     }
2284   } while (!eof());
2285 }
2286
2287 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2288   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2289          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2290   nextToken();
2291   nextToken(); // interface name
2292
2293   // @interface can be followed by a lightweight generic
2294   // specialization list, then either a base class or a category.
2295   if (FormatTok->Tok.is(tok::less)) {
2296     // Unlike protocol lists, generic parameterizations support
2297     // nested angles:
2298     //
2299     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2300     //     NSObject <NSCopying, NSSecureCoding>
2301     //
2302     // so we need to count how many open angles we have left.
2303     unsigned NumOpenAngles = 1;
2304     do {
2305       nextToken();
2306       // Early exit in case someone forgot a close angle.
2307       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2308           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2309         break;
2310       if (FormatTok->Tok.is(tok::less))
2311         ++NumOpenAngles;
2312       else if (FormatTok->Tok.is(tok::greater)) {
2313         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2314         --NumOpenAngles;
2315       }
2316     } while (!eof() && NumOpenAngles != 0);
2317     nextToken(); // Skip '>'.
2318   }
2319   if (FormatTok->Tok.is(tok::colon)) {
2320     nextToken();
2321     nextToken(); // base class name
2322   } else if (FormatTok->Tok.is(tok::l_paren))
2323     // Skip category, if present.
2324     parseParens();
2325
2326   if (FormatTok->Tok.is(tok::less))
2327     parseObjCProtocolList();
2328
2329   if (FormatTok->Tok.is(tok::l_brace)) {
2330     if (Style.BraceWrapping.AfterObjCDeclaration)
2331       addUnwrappedLine();
2332     parseBlock(/*MustBeDeclaration=*/true);
2333   }
2334
2335   // With instance variables, this puts '}' on its own line.  Without instance
2336   // variables, this ends the @interface line.
2337   addUnwrappedLine();
2338
2339   parseObjCUntilAtEnd();
2340 }
2341
2342 // Returns true for the declaration/definition form of @protocol,
2343 // false for the expression form.
2344 bool UnwrappedLineParser::parseObjCProtocol() {
2345   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2346   nextToken();
2347
2348   if (FormatTok->is(tok::l_paren))
2349     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2350     return false;
2351
2352   // The definition/declaration form,
2353   // @protocol Foo
2354   // - (int)someMethod;
2355   // @end
2356
2357   nextToken(); // protocol name
2358
2359   if (FormatTok->Tok.is(tok::less))
2360     parseObjCProtocolList();
2361
2362   // Check for protocol declaration.
2363   if (FormatTok->Tok.is(tok::semi)) {
2364     nextToken();
2365     addUnwrappedLine();
2366     return true;
2367   }
2368
2369   addUnwrappedLine();
2370   parseObjCUntilAtEnd();
2371   return true;
2372 }
2373
2374 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2375   bool IsImport = FormatTok->is(Keywords.kw_import);
2376   assert(IsImport || FormatTok->is(tok::kw_export));
2377   nextToken();
2378
2379   // Consume the "default" in "export default class/function".
2380   if (FormatTok->is(tok::kw_default))
2381     nextToken();
2382
2383   // Consume "async function", "function" and "default function", so that these
2384   // get parsed as free-standing JS functions, i.e. do not require a trailing
2385   // semicolon.
2386   if (FormatTok->is(Keywords.kw_async))
2387     nextToken();
2388   if (FormatTok->is(Keywords.kw_function)) {
2389     nextToken();
2390     return;
2391   }
2392
2393   // For imports, `export *`, `export {...}`, consume the rest of the line up
2394   // to the terminating `;`. For everything else, just return and continue
2395   // parsing the structural element, i.e. the declaration or expression for
2396   // `export default`.
2397   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2398       !FormatTok->isStringLiteral())
2399     return;
2400
2401   while (!eof()) {
2402     if (FormatTok->is(tok::semi))
2403       return;
2404     if (Line->Tokens.empty()) {
2405       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2406       // import statement should terminate.
2407       return;
2408     }
2409     if (FormatTok->is(tok::l_brace)) {
2410       FormatTok->BlockKind = BK_Block;
2411       nextToken();
2412       parseBracedList();
2413     } else {
2414       nextToken();
2415     }
2416   }
2417 }
2418
2419 void UnwrappedLineParser::parseStatementMacro() {
2420   nextToken();
2421   if (FormatTok->is(tok::l_paren))
2422     parseParens();
2423   if (FormatTok->is(tok::semi))
2424     nextToken();
2425   addUnwrappedLine();
2426 }
2427
2428 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2429                                                  StringRef Prefix = "") {
2430   llvm::dbgs() << Prefix << "Line(" << Line.Level
2431                << ", FSC=" << Line.FirstStartColumn << ")"
2432                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2433   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2434                                                     E = Line.Tokens.end();
2435        I != E; ++I) {
2436     llvm::dbgs() << I->Tok->Tok.getName() << "["
2437                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2438                  << "] ";
2439   }
2440   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2441                                                     E = Line.Tokens.end();
2442        I != E; ++I) {
2443     const UnwrappedLineNode &Node = *I;
2444     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2445              I = Node.Children.begin(),
2446              E = Node.Children.end();
2447          I != E; ++I) {
2448       printDebugInfo(*I, "\nChild: ");
2449     }
2450   }
2451   llvm::dbgs() << "\n";
2452 }
2453
2454 void UnwrappedLineParser::addUnwrappedLine() {
2455   if (Line->Tokens.empty())
2456     return;
2457   LLVM_DEBUG({
2458     if (CurrentLines == &Lines)
2459       printDebugInfo(*Line);
2460   });
2461   CurrentLines->push_back(std::move(*Line));
2462   Line->Tokens.clear();
2463   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2464   Line->FirstStartColumn = 0;
2465   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2466     CurrentLines->append(
2467         std::make_move_iterator(PreprocessorDirectives.begin()),
2468         std::make_move_iterator(PreprocessorDirectives.end()));
2469     PreprocessorDirectives.clear();
2470   }
2471   // Disconnect the current token from the last token on the previous line.
2472   FormatTok->Previous = nullptr;
2473 }
2474
2475 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2476
2477 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2478   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2479          FormatTok.NewlinesBefore > 0;
2480 }
2481
2482 // Checks if \p FormatTok is a line comment that continues the line comment
2483 // section on \p Line.
2484 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2485                                         const UnwrappedLine &Line,
2486                                         llvm::Regex &CommentPragmasRegex) {
2487   if (Line.Tokens.empty())
2488     return false;
2489
2490   StringRef IndentContent = FormatTok.TokenText;
2491   if (FormatTok.TokenText.startswith("//") ||
2492       FormatTok.TokenText.startswith("/*"))
2493     IndentContent = FormatTok.TokenText.substr(2);
2494   if (CommentPragmasRegex.match(IndentContent))
2495     return false;
2496
2497   // If Line starts with a line comment, then FormatTok continues the comment
2498   // section if its original column is greater or equal to the original start
2499   // column of the line.
2500   //
2501   // Define the min column token of a line as follows: if a line ends in '{' or
2502   // contains a '{' followed by a line comment, then the min column token is
2503   // that '{'. Otherwise, the min column token of the line is the first token of
2504   // the line.
2505   //
2506   // If Line starts with a token other than a line comment, then FormatTok
2507   // continues the comment section if its original column is greater than the
2508   // original start column of the min column token of the line.
2509   //
2510   // For example, the second line comment continues the first in these cases:
2511   //
2512   // // first line
2513   // // second line
2514   //
2515   // and:
2516   //
2517   // // first line
2518   //  // second line
2519   //
2520   // and:
2521   //
2522   // int i; // first line
2523   //  // second line
2524   //
2525   // and:
2526   //
2527   // do { // first line
2528   //      // second line
2529   //   int i;
2530   // } while (true);
2531   //
2532   // and:
2533   //
2534   // enum {
2535   //   a, // first line
2536   //    // second line
2537   //   b
2538   // };
2539   //
2540   // The second line comment doesn't continue the first in these cases:
2541   //
2542   //   // first line
2543   //  // second line
2544   //
2545   // and:
2546   //
2547   // int i; // first line
2548   // // second line
2549   //
2550   // and:
2551   //
2552   // do { // first line
2553   //   // second line
2554   //   int i;
2555   // } while (true);
2556   //
2557   // and:
2558   //
2559   // enum {
2560   //   a, // first line
2561   //   // second line
2562   // };
2563   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2564
2565   // Scan for '{//'. If found, use the column of '{' as a min column for line
2566   // comment section continuation.
2567   const FormatToken *PreviousToken = nullptr;
2568   for (const UnwrappedLineNode &Node : Line.Tokens) {
2569     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2570         isLineComment(*Node.Tok)) {
2571       MinColumnToken = PreviousToken;
2572       break;
2573     }
2574     PreviousToken = Node.Tok;
2575
2576     // Grab the last newline preceding a token in this unwrapped line.
2577     if (Node.Tok->NewlinesBefore > 0) {
2578       MinColumnToken = Node.Tok;
2579     }
2580   }
2581   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2582     MinColumnToken = PreviousToken;
2583   }
2584
2585   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2586                               MinColumnToken);
2587 }
2588
2589 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2590   bool JustComments = Line->Tokens.empty();
2591   for (SmallVectorImpl<FormatToken *>::const_iterator
2592            I = CommentsBeforeNextToken.begin(),
2593            E = CommentsBeforeNextToken.end();
2594        I != E; ++I) {
2595     // Line comments that belong to the same line comment section are put on the
2596     // same line since later we might want to reflow content between them.
2597     // Additional fine-grained breaking of line comment sections is controlled
2598     // by the class BreakableLineCommentSection in case it is desirable to keep
2599     // several line comment sections in the same unwrapped line.
2600     //
2601     // FIXME: Consider putting separate line comment sections as children to the
2602     // unwrapped line instead.
2603     (*I)->ContinuesLineCommentSection =
2604         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2605     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2606       addUnwrappedLine();
2607     pushToken(*I);
2608   }
2609   if (NewlineBeforeNext && JustComments)
2610     addUnwrappedLine();
2611   CommentsBeforeNextToken.clear();
2612 }
2613
2614 void UnwrappedLineParser::nextToken(int LevelDifference) {
2615   if (eof())
2616     return;
2617   flushComments(isOnNewLine(*FormatTok));
2618   pushToken(FormatTok);
2619   FormatToken *Previous = FormatTok;
2620   if (Style.Language != FormatStyle::LK_JavaScript)
2621     readToken(LevelDifference);
2622   else
2623     readTokenWithJavaScriptASI();
2624   FormatTok->Previous = Previous;
2625 }
2626
2627 void UnwrappedLineParser::distributeComments(
2628     const SmallVectorImpl<FormatToken *> &Comments,
2629     const FormatToken *NextTok) {
2630   // Whether or not a line comment token continues a line is controlled by
2631   // the method continuesLineCommentSection, with the following caveat:
2632   //
2633   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2634   // that each comment line from the trail is aligned with the next token, if
2635   // the next token exists. If a trail exists, the beginning of the maximal
2636   // trail is marked as a start of a new comment section.
2637   //
2638   // For example in this code:
2639   //
2640   // int a; // line about a
2641   //   // line 1 about b
2642   //   // line 2 about b
2643   //   int b;
2644   //
2645   // the two lines about b form a maximal trail, so there are two sections, the
2646   // first one consisting of the single comment "// line about a" and the
2647   // second one consisting of the next two comments.
2648   if (Comments.empty())
2649     return;
2650   bool ShouldPushCommentsInCurrentLine = true;
2651   bool HasTrailAlignedWithNextToken = false;
2652   unsigned StartOfTrailAlignedWithNextToken = 0;
2653   if (NextTok) {
2654     // We are skipping the first element intentionally.
2655     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2656       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2657         HasTrailAlignedWithNextToken = true;
2658         StartOfTrailAlignedWithNextToken = i;
2659       }
2660     }
2661   }
2662   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2663     FormatToken *FormatTok = Comments[i];
2664     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2665       FormatTok->ContinuesLineCommentSection = false;
2666     } else {
2667       FormatTok->ContinuesLineCommentSection =
2668           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2669     }
2670     if (!FormatTok->ContinuesLineCommentSection &&
2671         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2672       ShouldPushCommentsInCurrentLine = false;
2673     }
2674     if (ShouldPushCommentsInCurrentLine) {
2675       pushToken(FormatTok);
2676     } else {
2677       CommentsBeforeNextToken.push_back(FormatTok);
2678     }
2679   }
2680 }
2681
2682 void UnwrappedLineParser::readToken(int LevelDifference) {
2683   SmallVector<FormatToken *, 1> Comments;
2684   do {
2685     FormatTok = Tokens->getNextToken();
2686     assert(FormatTok);
2687     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2688            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2689       distributeComments(Comments, FormatTok);
2690       Comments.clear();
2691       // If there is an unfinished unwrapped line, we flush the preprocessor
2692       // directives only after that unwrapped line was finished later.
2693       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2694       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2695       assert((LevelDifference >= 0 ||
2696               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2697              "LevelDifference makes Line->Level negative");
2698       Line->Level += LevelDifference;
2699       // Comments stored before the preprocessor directive need to be output
2700       // before the preprocessor directive, at the same level as the
2701       // preprocessor directive, as we consider them to apply to the directive.
2702       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2703           PPBranchLevel > 0)
2704         Line->Level += PPBranchLevel;
2705       flushComments(isOnNewLine(*FormatTok));
2706       parsePPDirective();
2707     }
2708     while (FormatTok->Type == TT_ConflictStart ||
2709            FormatTok->Type == TT_ConflictEnd ||
2710            FormatTok->Type == TT_ConflictAlternative) {
2711       if (FormatTok->Type == TT_ConflictStart) {
2712         conditionalCompilationStart(/*Unreachable=*/false);
2713       } else if (FormatTok->Type == TT_ConflictAlternative) {
2714         conditionalCompilationAlternative();
2715       } else if (FormatTok->Type == TT_ConflictEnd) {
2716         conditionalCompilationEnd();
2717       }
2718       FormatTok = Tokens->getNextToken();
2719       FormatTok->MustBreakBefore = true;
2720     }
2721
2722     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2723         !Line->InPPDirective) {
2724       continue;
2725     }
2726
2727     if (!FormatTok->Tok.is(tok::comment)) {
2728       distributeComments(Comments, FormatTok);
2729       Comments.clear();
2730       return;
2731     }
2732
2733     Comments.push_back(FormatTok);
2734   } while (!eof());
2735
2736   distributeComments(Comments, nullptr);
2737   Comments.clear();
2738 }
2739
2740 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2741   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2742   if (MustBreakBeforeNextToken) {
2743     Line->Tokens.back().Tok->MustBreakBefore = true;
2744     MustBreakBeforeNextToken = false;
2745   }
2746 }
2747
2748 } // end namespace format
2749 } // end namespace clang