contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #define DEBUG_TYPE "format-parser"
  22
  23 namespace clang {
  24 namespace format {
  25
  26 class FormatTokenSource {
  27 public:
  28   virtual ~FormatTokenSource() {}
  29   virtual FormatToken *getNextToken() = 0;
  30
  31   virtual unsigned getPosition() = 0;
  32   virtual FormatToken *setPosition(unsigned Position) = 0;
  33 };
  34
  35 namespace {
  36
  37 class ScopedDeclarationState {
  38 public:
  39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  40                          bool MustBeDeclaration)
  41       : Line(Line), Stack(Stack) {
  42     Line.MustBeDeclaration = MustBeDeclaration;
  43     Stack.push_back(MustBeDeclaration);
  44   }
  45   ~ScopedDeclarationState() {
  46     Stack.pop_back();
  47     if (!Stack.empty())
  48       Line.MustBeDeclaration = Stack.back();
  49     else
  50       Line.MustBeDeclaration = true;
  51   }
  52
  53 private:
  54   UnwrappedLine &Line;
  55   std::vector<bool> &Stack;
  56 };
  57
  58 class ScopedMacroState : public FormatTokenSource {
  59 public:
  60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  61                    FormatToken *&ResetToken)
  62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  64         Token(nullptr) {
  65     TokenSource = this;
  66     Line.Level = 0;
  67     Line.InPPDirective = true;
  68   }
  69
  70   ~ScopedMacroState() override {
  71     TokenSource = PreviousTokenSource;
  72     ResetToken = Token;
  73     Line.InPPDirective = false;
  74     Line.Level = PreviousLineLevel;
  75   }
  76
  77   FormatToken *getNextToken() override {
  78     // The \c UnwrappedLineParser guards against this by never calling
  79     // \c getNextToken() after it has encountered the first eof token.
  80     assert(!eof());
  81     Token = PreviousTokenSource->getNextToken();
  82     if (eof())
  83       return getFakeEOF();
  84     return Token;
  85   }
  86
  87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  88
  89   FormatToken *setPosition(unsigned Position) override {
  90     Token = PreviousTokenSource->setPosition(Position);
  91     return Token;
  92   }
  93
  94 private:
  95   bool eof() { return Token && Token->HasUnescapedNewline; }
  96
  97   FormatToken *getFakeEOF() {
  98     static bool EOFInitialized = false;
  99     static FormatToken FormatTok;
 100     if (!EOFInitialized) {
 101       FormatTok.Tok.startToken();
 102       FormatTok.Tok.setKind(tok::eof);
 103       EOFInitialized = true;
 104     }
 105     return &FormatTok;
 106   }
 107
 108   UnwrappedLine &Line;
 109   FormatTokenSource *&TokenSource;
 110   FormatToken *&ResetToken;
 111   unsigned PreviousLineLevel;
 112   FormatTokenSource *PreviousTokenSource;
 113
 114   FormatToken *Token;
 115 };
 116
 117 } // end anonymous namespace
 118
 119 class ScopedLineState {
 120 public:
 121   ScopedLineState(UnwrappedLineParser &Parser,
 122                   bool SwitchToPreprocessorLines = false)
 123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 124     if (SwitchToPreprocessorLines)
 125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 126     else if (!Parser.Line->Tokens.empty())
 127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 128     PreBlockLine = std::move(Parser.Line);
 129     Parser.Line = llvm::make_unique<UnwrappedLine>();
 130     Parser.Line->Level = PreBlockLine->Level;
 131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 132   }
 133
 134   ~ScopedLineState() {
 135     if (!Parser.Line->Tokens.empty()) {
 136       Parser.addUnwrappedLine();
 137     }
 138     assert(Parser.Line->Tokens.empty());
 139     Parser.Line = std::move(PreBlockLine);
 140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 141       Parser.MustBreakBeforeNextToken = true;
 142     Parser.CurrentLines = OriginalLines;
 143   }
 144
 145 private:
 146   UnwrappedLineParser &Parser;
 147
 148   std::unique_ptr<UnwrappedLine> PreBlockLine;
 149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 150 };
 151
 152 class CompoundStatementIndenter {
 153 public:
 154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 155                             const FormatStyle &Style, unsigned &LineLevel)
 156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 157     if (Style.BraceWrapping.AfterControlStatement)
 158       Parser->addUnwrappedLine();
 159     if (Style.BraceWrapping.IndentBraces)
 160       ++LineLevel;
 161   }
 162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 163
 164 private:
 165   unsigned &LineLevel;
 166   unsigned OldLineLevel;
 167 };
 168
 169 namespace {
 170
 171 class IndexedTokenSource : public FormatTokenSource {
 172 public:
 173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 174       : Tokens(Tokens), Position(-1) {}
 175
 176   FormatToken *getNextToken() override {
 177     ++Position;
 178     return Tokens[Position];
 179   }
 180
 181   unsigned getPosition() override {
 182     assert(Position >= 0);
 183     return Position;
 184   }
 185
 186   FormatToken *setPosition(unsigned P) override {
 187     Position = P;
 188     return Tokens[Position];
 189   }
 190
 191   void reset() { Position = -1; }
 192
 193 private:
 194   ArrayRef<FormatToken *> Tokens;
 195   int Position;
 196 };
 197
 198 } // end anonymous namespace
 199
 200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 201                                          const AdditionalKeywords &Keywords,
 202                                          ArrayRef<FormatToken *> Tokens,
 203                                          UnwrappedLineConsumer &Callback)
 204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 205       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 206       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 207       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 208
 209 void UnwrappedLineParser::reset() {
 210   PPBranchLevel = -1;
 211   Line.reset(new UnwrappedLine);
 212   CommentsBeforeNextToken.clear();
 213   FormatTok = nullptr;
 214   MustBreakBeforeNextToken = false;
 215   PreprocessorDirectives.clear();
 216   CurrentLines = &Lines;
 217   DeclarationScopeStack.clear();
 218   PPStack.clear();
 219 }
 220
 221 void UnwrappedLineParser::parse() {
 222   IndexedTokenSource TokenSource(AllTokens);
 223   do {
 224     DEBUG(llvm::dbgs() << "----\n");
 225     reset();
 226     Tokens = &TokenSource;
 227     TokenSource.reset();
 228
 229     readToken();
 230     parseFile();
 231     // Create line with eof token.
 232     pushToken(FormatTok);
 233     addUnwrappedLine();
 234
 235     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 236                                                   E = Lines.end();
 237          I != E; ++I) {
 238       Callback.consumeUnwrappedLine(*I);
 239     }
 240     Callback.finishRun();
 241     Lines.clear();
 242     while (!PPLevelBranchIndex.empty() &&
 243            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 244       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 245       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 246     }
 247     if (!PPLevelBranchIndex.empty()) {
 248       ++PPLevelBranchIndex.back();
 249       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 250       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 251     }
 252   } while (!PPLevelBranchIndex.empty());
 253 }
 254
 255 void UnwrappedLineParser::parseFile() {
 256   // The top-level context in a file always has declarations, except for pre-
 257   // processor directives and JavaScript files.
 258   bool MustBeDeclaration =
 259       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 260   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 261                                           MustBeDeclaration);
 262   parseLevel(/*HasOpeningBrace=*/false);
 263   // Make sure to format the remaining tokens.
 264   flushComments(true);
 265   addUnwrappedLine();
 266 }
 267
 268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 269   bool SwitchLabelEncountered = false;
 270   do {
 271     tok::TokenKind kind = FormatTok->Tok.getKind();
 272     if (FormatTok->Type == TT_MacroBlockBegin) {
 273       kind = tok::l_brace;
 274     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 275       kind = tok::r_brace;
 276     }
 277
 278     switch (kind) {
 279     case tok::comment:
 280       nextToken();
 281       addUnwrappedLine();
 282       break;
 283     case tok::l_brace:
 284       // FIXME: Add parameter whether this can happen - if this happens, we must
 285       // be in a non-declaration context.
 286       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
 287         continue;
 288       parseBlock(/*MustBeDeclaration=*/false);
 289       addUnwrappedLine();
 290       break;
 291     case tok::r_brace:
 292       if (HasOpeningBrace)
 293         return;
 294       nextToken();
 295       addUnwrappedLine();
 296       break;
 297     case tok::kw_default:
 298     case tok::kw_case:
 299       if (!SwitchLabelEncountered &&
 300           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 301         ++Line->Level;
 302       SwitchLabelEncountered = true;
 303       parseStructuralElement();
 304       break;
 305     default:
 306       parseStructuralElement();
 307       break;
 308     }
 309   } while (!eof());
 310 }
 311
 312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 313   // We'll parse forward through the tokens until we hit
 314   // a closing brace or eof - note that getNextToken() will
 315   // parse macros, so this will magically work inside macro
 316   // definitions, too.
 317   unsigned StoredPosition = Tokens->getPosition();
 318   FormatToken *Tok = FormatTok;
 319   const FormatToken *PrevTok = getPreviousToken();
 320   // Keep a stack of positions of lbrace tokens. We will
 321   // update information about whether an lbrace starts a
 322   // braced init list or a different block during the loop.
 323   SmallVector<FormatToken *, 8> LBraceStack;
 324   assert(Tok->Tok.is(tok::l_brace));
 325   do {
 326     // Get next non-comment token.
 327     FormatToken *NextTok;
 328     unsigned ReadTokens = 0;
 329     do {
 330       NextTok = Tokens->getNextToken();
 331       ++ReadTokens;
 332     } while (NextTok->is(tok::comment));
 333
 334     switch (Tok->Tok.getKind()) {
 335     case tok::l_brace:
 336       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
 337           PrevTok->is(tok::colon))
 338         // A colon indicates this code is in a type, or a braced list following
 339         // a label in an object literal ({a: {b: 1}}).
 340         // The code below could be confused by semicolons between the individual
 341         // members in a type member list, which would normally trigger BK_Block.
 342         // In both cases, this must be parsed as an inline braced init.
 343         Tok->BlockKind = BK_BracedInit;
 344       else
 345         Tok->BlockKind = BK_Unknown;
 346       LBraceStack.push_back(Tok);
 347       break;
 348     case tok::r_brace:
 349       if (LBraceStack.empty())
 350         break;
 351       if (LBraceStack.back()->BlockKind == BK_Unknown) {
 352         bool ProbablyBracedList = false;
 353         if (Style.Language == FormatStyle::LK_Proto) {
 354           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 355         } else {
 356           // Using OriginalColumn to distinguish between ObjC methods and
 357           // binary operators is a bit hacky.
 358           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 359                                   NextTok->OriginalColumn == 0;
 360
 361           // If there is a comma, semicolon or right paren after the closing
 362           // brace, we assume this is a braced initializer list.  Note that
 363           // regardless how we mark inner braces here, we will overwrite the
 364           // BlockKind later if we parse a braced list (where all blocks
 365           // inside are by default braced lists), or when we explicitly detect
 366           // blocks (for example while parsing lambdas).
 367           ProbablyBracedList =
 368               (Style.Language == FormatStyle::LK_JavaScript &&
 369                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 370                                 Keywords.kw_as)) ||
 371               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 372                                tok::r_paren, tok::r_square, tok::l_brace,
 373                                tok::l_square, tok::l_paren, tok::ellipsis) ||
 374               (NextTok->is(tok::identifier) &&
 375                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
 376               (NextTok->is(tok::semi) &&
 377                (!ExpectClassBody || LBraceStack.size() != 1)) ||
 378               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 379         }
 380         if (ProbablyBracedList) {
 381           Tok->BlockKind = BK_BracedInit;
 382           LBraceStack.back()->BlockKind = BK_BracedInit;
 383         } else {
 384           Tok->BlockKind = BK_Block;
 385           LBraceStack.back()->BlockKind = BK_Block;
 386         }
 387       }
 388       LBraceStack.pop_back();
 389       break;
 390     case tok::at:
 391     case tok::semi:
 392     case tok::kw_if:
 393     case tok::kw_while:
 394     case tok::kw_for:
 395     case tok::kw_switch:
 396     case tok::kw_try:
 397     case tok::kw___try:
 398       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
 399         LBraceStack.back()->BlockKind = BK_Block;
 400       break;
 401     default:
 402       break;
 403     }
 404     PrevTok = Tok;
 405     Tok = NextTok;
 406   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 407
 408   // Assume other blocks for all unclosed opening braces.
 409   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 410     if (LBraceStack[i]->BlockKind == BK_Unknown)
 411       LBraceStack[i]->BlockKind = BK_Block;
 412   }
 413
 414   FormatTok = Tokens->setPosition(StoredPosition);
 415 }
 416
 417 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 418                                      bool MunchSemi) {
 419   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 420          "'{' or macro block token expected");
 421   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 422   FormatTok->BlockKind = BK_Block;
 423
 424   unsigned InitialLevel = Line->Level;
 425   nextToken();
 426
 427   if (MacroBlock && FormatTok->is(tok::l_paren))
 428     parseParens();
 429
 430   addUnwrappedLine();
 431   size_t OpeningLineIndex =
 432       Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
 433
 434   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 435                                           MustBeDeclaration);
 436   if (AddLevel)
 437     ++Line->Level;
 438   parseLevel(/*HasOpeningBrace=*/true);
 439
 440   if (eof())
 441     return;
 442
 443   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 444                  : !FormatTok->is(tok::r_brace)) {
 445     Line->Level = InitialLevel;
 446     FormatTok->BlockKind = BK_Block;
 447     return;
 448   }
 449
 450   nextToken(); // Munch the closing brace.
 451
 452   if (MacroBlock && FormatTok->is(tok::l_paren))
 453     parseParens();
 454
 455   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 456     nextToken();
 457   Line->Level = InitialLevel;
 458   Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 459 }
 460
 461 static bool isGoogScope(const UnwrappedLine &Line) {
 462   // FIXME: Closure-library specific stuff should not be hard-coded but be
 463   // configurable.
 464   if (Line.Tokens.size() < 4)
 465     return false;
 466   auto I = Line.Tokens.begin();
 467   if (I->Tok->TokenText != "goog")
 468     return false;
 469   ++I;
 470   if (I->Tok->isNot(tok::period))
 471     return false;
 472   ++I;
 473   if (I->Tok->TokenText != "scope")
 474     return false;
 475   ++I;
 476   return I->Tok->is(tok::l_paren);
 477 }
 478
 479 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 480                                    const FormatToken &InitialToken) {
 481   if (InitialToken.is(tok::kw_namespace))
 482     return Style.BraceWrapping.AfterNamespace;
 483   if (InitialToken.is(tok::kw_class))
 484     return Style.BraceWrapping.AfterClass;
 485   if (InitialToken.is(tok::kw_union))
 486     return Style.BraceWrapping.AfterUnion;
 487   if (InitialToken.is(tok::kw_struct))
 488     return Style.BraceWrapping.AfterStruct;
 489   return false;
 490 }
 491
 492 void UnwrappedLineParser::parseChildBlock() {
 493   FormatTok->BlockKind = BK_Block;
 494   nextToken();
 495   {
 496     bool GoogScope =
 497         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
 498     ScopedLineState LineState(*this);
 499     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 500                                             /*MustBeDeclaration=*/false);
 501     Line->Level += GoogScope ? 0 : 1;
 502     parseLevel(/*HasOpeningBrace=*/true);
 503     flushComments(isOnNewLine(*FormatTok));
 504     Line->Level -= GoogScope ? 0 : 1;
 505   }
 506   nextToken();
 507 }
 508
 509 void UnwrappedLineParser::parsePPDirective() {
 510   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 511   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 512   nextToken();
 513
 514   if (!FormatTok->Tok.getIdentifierInfo()) {
 515     parsePPUnknown();
 516     return;
 517   }
 518
 519   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 520   case tok::pp_define:
 521     parsePPDefine();
 522     return;
 523   case tok::pp_if:
 524     parsePPIf(/*IfDef=*/false);
 525     break;
 526   case tok::pp_ifdef:
 527   case tok::pp_ifndef:
 528     parsePPIf(/*IfDef=*/true);
 529     break;
 530   case tok::pp_else:
 531     parsePPElse();
 532     break;
 533   case tok::pp_elif:
 534     parsePPElIf();
 535     break;
 536   case tok::pp_endif:
 537     parsePPEndIf();
 538     break;
 539   default:
 540     parsePPUnknown();
 541     break;
 542   }
 543 }
 544
 545 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 546   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
 547     PPStack.push_back(PP_Unreachable);
 548   else
 549     PPStack.push_back(PP_Conditional);
 550 }
 551
 552 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 553   ++PPBranchLevel;
 554   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 555   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 556     PPLevelBranchIndex.push_back(0);
 557     PPLevelBranchCount.push_back(0);
 558   }
 559   PPChainBranchIndex.push(0);
 560   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 561   conditionalCompilationCondition(Unreachable || Skip);
 562 }
 563
 564 void UnwrappedLineParser::conditionalCompilationAlternative() {
 565   if (!PPStack.empty())
 566     PPStack.pop_back();
 567   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 568   if (!PPChainBranchIndex.empty())
 569     ++PPChainBranchIndex.top();
 570   conditionalCompilationCondition(
 571       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 572       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 573 }
 574
 575 void UnwrappedLineParser::conditionalCompilationEnd() {
 576   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 577   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 578     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 579       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 580     }
 581   }
 582   // Guard against #endif's without #if.
 583   if (PPBranchLevel > 0)
 584     --PPBranchLevel;
 585   if (!PPChainBranchIndex.empty())
 586     PPChainBranchIndex.pop();
 587   if (!PPStack.empty())
 588     PPStack.pop_back();
 589 }
 590
 591 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 592   bool IfNDef = FormatTok->is(tok::pp_ifndef);
 593   nextToken();
 594   bool Unreachable = false;
 595   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
 596     Unreachable = true;
 597   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
 598     Unreachable = true;
 599   conditionalCompilationStart(Unreachable);
 600   parsePPUnknown();
 601 }
 602
 603 void UnwrappedLineParser::parsePPElse() {
 604   conditionalCompilationAlternative();
 605   parsePPUnknown();
 606 }
 607
 608 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 609
 610 void UnwrappedLineParser::parsePPEndIf() {
 611   conditionalCompilationEnd();
 612   parsePPUnknown();
 613 }
 614
 615 void UnwrappedLineParser::parsePPDefine() {
 616   nextToken();
 617
 618   if (FormatTok->Tok.getKind() != tok::identifier) {
 619     parsePPUnknown();
 620     return;
 621   }
 622   nextToken();
 623   if (FormatTok->Tok.getKind() == tok::l_paren &&
 624       FormatTok->WhitespaceRange.getBegin() ==
 625           FormatTok->WhitespaceRange.getEnd()) {
 626     parseParens();
 627   }
 628   addUnwrappedLine();
 629   Line->Level = 1;
 630
 631   // Errors during a preprocessor directive can only affect the layout of the
 632   // preprocessor directive, and thus we ignore them. An alternative approach
 633   // would be to use the same approach we use on the file level (no
 634   // re-indentation if there was a structural error) within the macro
 635   // definition.
 636   parseFile();
 637 }
 638
 639 void UnwrappedLineParser::parsePPUnknown() {
 640   do {
 641     nextToken();
 642   } while (!eof());
 643   addUnwrappedLine();
 644 }
 645
 646 // Here we blacklist certain tokens that are not usually the first token in an
 647 // unwrapped line. This is used in attempt to distinguish macro calls without
 648 // trailing semicolons from other constructs split to several lines.
 649 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 650   // Semicolon can be a null-statement, l_square can be a start of a macro or
 651   // a C++11 attribute, but this doesn't seem to be common.
 652   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 653          Tok.isNot(tok::l_square) &&
 654          // Tokens that can only be used as binary operators and a part of
 655          // overloaded operator names.
 656          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 657          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 658          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 659          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 660          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 661          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 662          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 663          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 664          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 665          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 666          Tok.isNot(tok::lesslessequal) &&
 667          // Colon is used in labels, base class lists, initializer lists,
 668          // range-based for loops, ternary operator, but should never be the
 669          // first token in an unwrapped line.
 670          Tok.isNot(tok::colon) &&
 671          // 'noexcept' is a trailing annotation.
 672          Tok.isNot(tok::kw_noexcept);
 673 }
 674
 675 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
 676                           const FormatToken *FormatTok) {
 677   // FIXME: This returns true for C/C++ keywords like 'struct'.
 678   return FormatTok->is(tok::identifier) &&
 679          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
 680           !FormatTok->isOneOf(
 681               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
 682               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
 683               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
 684               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
 685               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
 686               Keywords.kw_instanceof, Keywords.kw_interface,
 687               Keywords.kw_throws));
 688 }
 689
 690 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
 691                                  const FormatToken *FormatTok) {
 692   return FormatTok->Tok.isLiteral() ||
 693          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
 694          mustBeJSIdent(Keywords, FormatTok);
 695 }
 696
 697 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
 698 // when encountered after a value (see mustBeJSIdentOrValue).
 699 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
 700                            const FormatToken *FormatTok) {
 701   return FormatTok->isOneOf(
 702       tok::kw_return, Keywords.kw_yield,
 703       // conditionals
 704       tok::kw_if, tok::kw_else,
 705       // loops
 706       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
 707       // switch/case
 708       tok::kw_switch, tok::kw_case,
 709       // exceptions
 710       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
 711       // declaration
 712       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
 713       Keywords.kw_async, Keywords.kw_function,
 714       // import/export
 715       Keywords.kw_import, tok::kw_export);
 716 }
 717
 718 // readTokenWithJavaScriptASI reads the next token and terminates the current
 719 // line if JavaScript Automatic Semicolon Insertion must
 720 // happen between the current token and the next token.
 721 //
 722 // This method is conservative - it cannot cover all edge cases of JavaScript,
 723 // but only aims to correctly handle certain well known cases. It *must not*
 724 // return true in speculative cases.
 725 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 726   FormatToken *Previous = FormatTok;
 727   readToken();
 728   FormatToken *Next = FormatTok;
 729
 730   bool IsOnSameLine =
 731       CommentsBeforeNextToken.empty()
 732           ? Next->NewlinesBefore == 0
 733           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
 734   if (IsOnSameLine)
 735     return;
 736
 737   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
 738   bool PreviousStartsTemplateExpr =
 739       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
 740   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
 741     // If the token before the previous one is an '@', the previous token is an
 742     // annotation and can precede another identifier/value.
 743     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
 744     if (PrePrevious->is(tok::at))
 745       return;
 746   }
 747   if (Next->is(tok::exclaim) && PreviousMustBeValue)
 748     return addUnwrappedLine();
 749   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
 750   bool NextEndsTemplateExpr =
 751       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
 752   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
 753       (PreviousMustBeValue ||
 754        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
 755                          tok::minusminus)))
 756     return addUnwrappedLine();
 757   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
 758     return addUnwrappedLine();
 759 }
 760
 761 void UnwrappedLineParser::parseStructuralElement() {
 762   assert(!FormatTok->is(tok::l_brace));
 763   if (Style.Language == FormatStyle::LK_TableGen &&
 764       FormatTok->is(tok::pp_include)) {
 765     nextToken();
 766     if (FormatTok->is(tok::string_literal))
 767       nextToken();
 768     addUnwrappedLine();
 769     return;
 770   }
 771   switch (FormatTok->Tok.getKind()) {
 772   case tok::at:
 773     nextToken();
 774     if (FormatTok->Tok.is(tok::l_brace)) {
 775       parseBracedList();
 776       break;
 777     }
 778     switch (FormatTok->Tok.getObjCKeywordID()) {
 779     case tok::objc_public:
 780     case tok::objc_protected:
 781     case tok::objc_package:
 782     case tok::objc_private:
 783       return parseAccessSpecifier();
 784     case tok::objc_interface:
 785     case tok::objc_implementation:
 786       return parseObjCInterfaceOrImplementation();
 787     case tok::objc_protocol:
 788       return parseObjCProtocol();
 789     case tok::objc_end:
 790       return; // Handled by the caller.
 791     case tok::objc_optional:
 792     case tok::objc_required:
 793       nextToken();
 794       addUnwrappedLine();
 795       return;
 796     case tok::objc_autoreleasepool:
 797       nextToken();
 798       if (FormatTok->Tok.is(tok::l_brace)) {
 799         if (Style.BraceWrapping.AfterObjCDeclaration)
 800           addUnwrappedLine();
 801         parseBlock(/*MustBeDeclaration=*/false);
 802       }
 803       addUnwrappedLine();
 804       return;
 805     case tok::objc_try:
 806       // This branch isn't strictly necessary (the kw_try case below would
 807       // do this too after the tok::at is parsed above).  But be explicit.
 808       parseTryCatch();
 809       return;
 810     default:
 811       break;
 812     }
 813     break;
 814   case tok::kw_asm:
 815     nextToken();
 816     if (FormatTok->is(tok::l_brace)) {
 817       FormatTok->Type = TT_InlineASMBrace;
 818       nextToken();
 819       while (FormatTok && FormatTok->isNot(tok::eof)) {
 820         if (FormatTok->is(tok::r_brace)) {
 821           FormatTok->Type = TT_InlineASMBrace;
 822           nextToken();
 823           addUnwrappedLine();
 824           break;
 825         }
 826         FormatTok->Finalized = true;
 827         nextToken();
 828       }
 829     }
 830     break;
 831   case tok::kw_namespace:
 832     parseNamespace();
 833     return;
 834   case tok::kw_inline:
 835     nextToken();
 836     if (FormatTok->Tok.is(tok::kw_namespace)) {
 837       parseNamespace();
 838       return;
 839     }
 840     break;
 841   case tok::kw_public:
 842   case tok::kw_protected:
 843   case tok::kw_private:
 844     if (Style.Language == FormatStyle::LK_Java ||
 845         Style.Language == FormatStyle::LK_JavaScript)
 846       nextToken();
 847     else
 848       parseAccessSpecifier();
 849     return;
 850   case tok::kw_if:
 851     parseIfThenElse();
 852     return;
 853   case tok::kw_for:
 854   case tok::kw_while:
 855     parseForOrWhileLoop();
 856     return;
 857   case tok::kw_do:
 858     parseDoWhile();
 859     return;
 860   case tok::kw_switch:
 861     parseSwitch();
 862     return;
 863   case tok::kw_default:
 864     nextToken();
 865     parseLabel();
 866     return;
 867   case tok::kw_case:
 868     parseCaseLabel();
 869     return;
 870   case tok::kw_try:
 871   case tok::kw___try:
 872     parseTryCatch();
 873     return;
 874   case tok::kw_extern:
 875     nextToken();
 876     if (FormatTok->Tok.is(tok::string_literal)) {
 877       nextToken();
 878       if (FormatTok->Tok.is(tok::l_brace)) {
 879         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
 880         addUnwrappedLine();
 881         return;
 882       }
 883     }
 884     break;
 885   case tok::kw_export:
 886     if (Style.Language == FormatStyle::LK_JavaScript) {
 887       parseJavaScriptEs6ImportExport();
 888       return;
 889     }
 890     break;
 891   case tok::identifier:
 892     if (FormatTok->is(TT_ForEachMacro)) {
 893       parseForOrWhileLoop();
 894       return;
 895     }
 896     if (FormatTok->is(TT_MacroBlockBegin)) {
 897       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
 898                  /*MunchSemi=*/false);
 899       return;
 900     }
 901     if (FormatTok->is(Keywords.kw_import)) {
 902       if (Style.Language == FormatStyle::LK_JavaScript) {
 903         parseJavaScriptEs6ImportExport();
 904         return;
 905       }
 906       if (Style.Language == FormatStyle::LK_Proto) {
 907         nextToken();
 908         if (FormatTok->is(tok::kw_public))
 909           nextToken();
 910         if (!FormatTok->is(tok::string_literal))
 911           return;
 912         nextToken();
 913         if (FormatTok->is(tok::semi))
 914           nextToken();
 915         addUnwrappedLine();
 916         return;
 917       }
 918     }
 919     if (Style.isCpp() &&
 920         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
 921                            Keywords.kw_slots, Keywords.kw_qslots)) {
 922       nextToken();
 923       if (FormatTok->is(tok::colon)) {
 924         nextToken();
 925         addUnwrappedLine();
 926         return;
 927       }
 928     }
 929     // In all other cases, parse the declaration.
 930     break;
 931   default:
 932     break;
 933   }
 934   do {
 935     const FormatToken *Previous = getPreviousToken();
 936     switch (FormatTok->Tok.getKind()) {
 937     case tok::at:
 938       nextToken();
 939       if (FormatTok->Tok.is(tok::l_brace))
 940         parseBracedList();
 941       break;
 942     case tok::kw_enum:
 943       // Ignore if this is part of "template <enum ...".
 944       if (Previous && Previous->is(tok::less)) {
 945         nextToken();
 946         break;
 947       }
 948
 949       // parseEnum falls through and does not yet add an unwrapped line as an
 950       // enum definition can start a structural element.
 951       if (!parseEnum())
 952         break;
 953       // This only applies for C++.
 954       if (!Style.isCpp()) {
 955         addUnwrappedLine();
 956         return;
 957       }
 958       break;
 959     case tok::kw_typedef:
 960       nextToken();
 961       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
 962                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
 963         parseEnum();
 964       break;
 965     case tok::kw_struct:
 966     case tok::kw_union:
 967     case tok::kw_class:
 968       // parseRecord falls through and does not yet add an unwrapped line as a
 969       // record declaration or definition can start a structural element.
 970       parseRecord();
 971       // This does not apply for Java and JavaScript.
 972       if (Style.Language == FormatStyle::LK_Java ||
 973           Style.Language == FormatStyle::LK_JavaScript) {
 974         if (FormatTok->is(tok::semi))
 975           nextToken();
 976         addUnwrappedLine();
 977         return;
 978       }
 979       break;
 980     case tok::period:
 981       nextToken();
 982       // In Java, classes have an implicit static member "class".
 983       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
 984           FormatTok->is(tok::kw_class))
 985         nextToken();
 986       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
 987           FormatTok->Tok.getIdentifierInfo())
 988         // JavaScript only has pseudo keywords, all keywords are allowed to
 989         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
 990         nextToken();
 991       break;
 992     case tok::semi:
 993       nextToken();
 994       addUnwrappedLine();
 995       return;
 996     case tok::r_brace:
 997       addUnwrappedLine();
 998       return;
 999     case tok::l_paren:
1000       parseParens();
1001       break;
1002     case tok::kw_operator:
1003       nextToken();
1004       if (FormatTok->isBinaryOperator())
1005         nextToken();
1006       break;
1007     case tok::caret:
1008       nextToken();
1009       if (FormatTok->Tok.isAnyIdentifier() ||
1010           FormatTok->isSimpleTypeSpecifier())
1011         nextToken();
1012       if (FormatTok->is(tok::l_paren))
1013         parseParens();
1014       if (FormatTok->is(tok::l_brace))
1015         parseChildBlock();
1016       break;
1017     case tok::l_brace:
1018       if (!tryToParseBracedList()) {
1019         // A block outside of parentheses must be the last part of a
1020         // structural element.
1021         // FIXME: Figure out cases where this is not true, and add projections
1022         // for them (the one we know is missing are lambdas).
1023         if (Style.BraceWrapping.AfterFunction)
1024           addUnwrappedLine();
1025         FormatTok->Type = TT_FunctionLBrace;
1026         parseBlock(/*MustBeDeclaration=*/false);
1027         addUnwrappedLine();
1028         return;
1029       }
1030       // Otherwise this was a braced init list, and the structural
1031       // element continues.
1032       break;
1033     case tok::kw_try:
1034       // We arrive here when parsing function-try blocks.
1035       parseTryCatch();
1036       return;
1037     case tok::identifier: {
1038       if (FormatTok->is(TT_MacroBlockEnd)) {
1039         addUnwrappedLine();
1040         return;
1041       }
1042
1043       // Function declarations (as opposed to function expressions) are parsed
1044       // on their own unwrapped line by continuing this loop. Function
1045       // expressions (functions that are not on their own line) must not create
1046       // a new unwrapped line, so they are special cased below.
1047       size_t TokenCount = Line->Tokens.size();
1048       if (Style.Language == FormatStyle::LK_JavaScript &&
1049           FormatTok->is(Keywords.kw_function) &&
1050           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1051                                                      Keywords.kw_async)))) {
1052         tryToParseJSFunction();
1053         break;
1054       }
1055       if ((Style.Language == FormatStyle::LK_JavaScript ||
1056            Style.Language == FormatStyle::LK_Java) &&
1057           FormatTok->is(Keywords.kw_interface)) {
1058         if (Style.Language == FormatStyle::LK_JavaScript) {
1059           // In JavaScript/TypeScript, "interface" can be used as a standalone
1060           // identifier, e.g. in `var interface = 1;`. If "interface" is
1061           // followed by another identifier, it is very like to be an actual
1062           // interface declaration.
1063           unsigned StoredPosition = Tokens->getPosition();
1064           FormatToken *Next = Tokens->getNextToken();
1065           FormatTok = Tokens->setPosition(StoredPosition);
1066           if (Next && !mustBeJSIdent(Keywords, Next)) {
1067             nextToken();
1068             break;
1069           }
1070         }
1071         parseRecord();
1072         addUnwrappedLine();
1073         return;
1074       }
1075
1076       // See if the following token should start a new unwrapped line.
1077       StringRef Text = FormatTok->TokenText;
1078       nextToken();
1079       if (Line->Tokens.size() == 1 &&
1080           // JS doesn't have macros, and within classes colons indicate fields,
1081           // not labels.
1082           Style.Language != FormatStyle::LK_JavaScript) {
1083         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1084           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1085           parseLabel();
1086           return;
1087         }
1088         // Recognize function-like macro usages without trailing semicolon as
1089         // well as free-standing macros like Q_OBJECT.
1090         bool FunctionLike = FormatTok->is(tok::l_paren);
1091         if (FunctionLike)
1092           parseParens();
1093
1094         bool FollowedByNewline =
1095             CommentsBeforeNextToken.empty()
1096                 ? FormatTok->NewlinesBefore > 0
1097                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1098
1099         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1100             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1101           addUnwrappedLine();
1102           return;
1103         }
1104       }
1105       break;
1106     }
1107     case tok::equal:
1108       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1109       // TT_JsFatArrow. The always start an expression or a child block if
1110       // followed by a curly.
1111       if (FormatTok->is(TT_JsFatArrow)) {
1112         nextToken();
1113         if (FormatTok->is(tok::l_brace))
1114           parseChildBlock();
1115         break;
1116       }
1117
1118       nextToken();
1119       if (FormatTok->Tok.is(tok::l_brace)) {
1120         parseBracedList();
1121       }
1122       break;
1123     case tok::l_square:
1124       parseSquare();
1125       break;
1126     case tok::kw_new:
1127       parseNew();
1128       break;
1129     default:
1130       nextToken();
1131       break;
1132     }
1133   } while (!eof());
1134 }
1135
1136 bool UnwrappedLineParser::tryToParseLambda() {
1137   if (!Style.isCpp()) {
1138     nextToken();
1139     return false;
1140   }
1141   const FormatToken* Previous = getPreviousToken();
1142   if (Previous &&
1143       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1144                          tok::kw_delete) ||
1145        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1146     nextToken();
1147     return false;
1148   }
1149   assert(FormatTok->is(tok::l_square));
1150   FormatToken &LSquare = *FormatTok;
1151   if (!tryToParseLambdaIntroducer())
1152     return false;
1153
1154   while (FormatTok->isNot(tok::l_brace)) {
1155     if (FormatTok->isSimpleTypeSpecifier()) {
1156       nextToken();
1157       continue;
1158     }
1159     switch (FormatTok->Tok.getKind()) {
1160     case tok::l_brace:
1161       break;
1162     case tok::l_paren:
1163       parseParens();
1164       break;
1165     case tok::amp:
1166     case tok::star:
1167     case tok::kw_const:
1168     case tok::comma:
1169     case tok::less:
1170     case tok::greater:
1171     case tok::identifier:
1172     case tok::numeric_constant:
1173     case tok::coloncolon:
1174     case tok::kw_mutable:
1175       nextToken();
1176       break;
1177     case tok::arrow:
1178       FormatTok->Type = TT_LambdaArrow;
1179       nextToken();
1180       break;
1181     default:
1182       return true;
1183     }
1184   }
1185   LSquare.Type = TT_LambdaLSquare;
1186   parseChildBlock();
1187   return true;
1188 }
1189
1190 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1191   nextToken();
1192   if (FormatTok->is(tok::equal)) {
1193     nextToken();
1194     if (FormatTok->is(tok::r_square)) {
1195       nextToken();
1196       return true;
1197     }
1198     if (FormatTok->isNot(tok::comma))
1199       return false;
1200     nextToken();
1201   } else if (FormatTok->is(tok::amp)) {
1202     nextToken();
1203     if (FormatTok->is(tok::r_square)) {
1204       nextToken();
1205       return true;
1206     }
1207     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1208       return false;
1209     }
1210     if (FormatTok->is(tok::comma))
1211       nextToken();
1212   } else if (FormatTok->is(tok::r_square)) {
1213     nextToken();
1214     return true;
1215   }
1216   do {
1217     if (FormatTok->is(tok::amp))
1218       nextToken();
1219     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1220       return false;
1221     nextToken();
1222     if (FormatTok->is(tok::ellipsis))
1223       nextToken();
1224     if (FormatTok->is(tok::comma)) {
1225       nextToken();
1226     } else if (FormatTok->is(tok::r_square)) {
1227       nextToken();
1228       return true;
1229     } else {
1230       return false;
1231     }
1232   } while (!eof());
1233   return false;
1234 }
1235
1236 void UnwrappedLineParser::tryToParseJSFunction() {
1237   assert(FormatTok->is(Keywords.kw_function) ||
1238          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1239   if (FormatTok->is(Keywords.kw_async))
1240     nextToken();
1241   // Consume "function".
1242   nextToken();
1243
1244   // Consume * (generator function). Treat it like C++'s overloaded operators.
1245   if (FormatTok->is(tok::star)) {
1246     FormatTok->Type = TT_OverloadedOperator;
1247     nextToken();
1248   }
1249
1250   // Consume function name.
1251   if (FormatTok->is(tok::identifier))
1252     nextToken();
1253
1254   if (FormatTok->isNot(tok::l_paren))
1255     return;
1256
1257   // Parse formal parameter list.
1258   parseParens();
1259
1260   if (FormatTok->is(tok::colon)) {
1261     // Parse a type definition.
1262     nextToken();
1263
1264     // Eat the type declaration. For braced inline object types, balance braces,
1265     // otherwise just parse until finding an l_brace for the function body.
1266     if (FormatTok->is(tok::l_brace))
1267       tryToParseBracedList();
1268     else
1269       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1270         nextToken();
1271   }
1272
1273   if (FormatTok->is(tok::semi))
1274     return;
1275
1276   parseChildBlock();
1277 }
1278
1279 bool UnwrappedLineParser::tryToParseBracedList() {
1280   if (FormatTok->BlockKind == BK_Unknown)
1281     calculateBraceTypes();
1282   assert(FormatTok->BlockKind != BK_Unknown);
1283   if (FormatTok->BlockKind == BK_Block)
1284     return false;
1285   parseBracedList();
1286   return true;
1287 }
1288
1289 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1290   bool HasError = false;
1291   nextToken();
1292
1293   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1294   // replace this by using parseAssigmentExpression() inside.
1295   do {
1296     if (Style.Language == FormatStyle::LK_JavaScript) {
1297       if (FormatTok->is(Keywords.kw_function) ||
1298           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1299         tryToParseJSFunction();
1300         continue;
1301       }
1302       if (FormatTok->is(TT_JsFatArrow)) {
1303         nextToken();
1304         // Fat arrows can be followed by simple expressions or by child blocks
1305         // in curly braces.
1306         if (FormatTok->is(tok::l_brace)) {
1307           parseChildBlock();
1308           continue;
1309         }
1310       }
1311       if (FormatTok->is(tok::l_brace)) {
1312         // Could be a method inside of a braced list `{a() { return 1; }}`.
1313         if (tryToParseBracedList())
1314           continue;
1315         parseChildBlock();
1316       }
1317     }
1318     switch (FormatTok->Tok.getKind()) {
1319     case tok::caret:
1320       nextToken();
1321       if (FormatTok->is(tok::l_brace)) {
1322         parseChildBlock();
1323       }
1324       break;
1325     case tok::l_square:
1326       tryToParseLambda();
1327       break;
1328     case tok::l_paren:
1329       parseParens();
1330       // JavaScript can just have free standing methods and getters/setters in
1331       // object literals. Detect them by a "{" following ")".
1332       if (Style.Language == FormatStyle::LK_JavaScript) {
1333         if (FormatTok->is(tok::l_brace))
1334           parseChildBlock();
1335         break;
1336       }
1337       break;
1338     case tok::l_brace:
1339       // Assume there are no blocks inside a braced init list apart
1340       // from the ones we explicitly parse out (like lambdas).
1341       FormatTok->BlockKind = BK_BracedInit;
1342       parseBracedList();
1343       break;
1344     case tok::r_brace:
1345       nextToken();
1346       return !HasError;
1347     case tok::semi:
1348       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1349       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1350       // used for error recovery if we have otherwise determined that this is
1351       // a braced list.
1352       if (Style.Language == FormatStyle::LK_JavaScript) {
1353         nextToken();
1354         break;
1355       }
1356       HasError = true;
1357       if (!ContinueOnSemicolons)
1358         return !HasError;
1359       nextToken();
1360       break;
1361     case tok::comma:
1362       nextToken();
1363       break;
1364     default:
1365       nextToken();
1366       break;
1367     }
1368   } while (!eof());
1369   return false;
1370 }
1371
1372 void UnwrappedLineParser::parseParens() {
1373   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1374   nextToken();
1375   do {
1376     switch (FormatTok->Tok.getKind()) {
1377     case tok::l_paren:
1378       parseParens();
1379       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1380         parseChildBlock();
1381       break;
1382     case tok::r_paren:
1383       nextToken();
1384       return;
1385     case tok::r_brace:
1386       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1387       return;
1388     case tok::l_square:
1389       tryToParseLambda();
1390       break;
1391     case tok::l_brace:
1392       if (!tryToParseBracedList())
1393         parseChildBlock();
1394       break;
1395     case tok::at:
1396       nextToken();
1397       if (FormatTok->Tok.is(tok::l_brace))
1398         parseBracedList();
1399       break;
1400     case tok::kw_class:
1401       if (Style.Language == FormatStyle::LK_JavaScript)
1402         parseRecord(/*ParseAsExpr=*/true);
1403       else
1404         nextToken();
1405       break;
1406     case tok::identifier:
1407       if (Style.Language == FormatStyle::LK_JavaScript &&
1408           (FormatTok->is(Keywords.kw_function) ||
1409            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1410         tryToParseJSFunction();
1411       else
1412         nextToken();
1413       break;
1414     default:
1415       nextToken();
1416       break;
1417     }
1418   } while (!eof());
1419 }
1420
1421 void UnwrappedLineParser::parseSquare() {
1422   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1423   if (tryToParseLambda())
1424     return;
1425   do {
1426     switch (FormatTok->Tok.getKind()) {
1427     case tok::l_paren:
1428       parseParens();
1429       break;
1430     case tok::r_square:
1431       nextToken();
1432       return;
1433     case tok::r_brace:
1434       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1435       return;
1436     case tok::l_square:
1437       parseSquare();
1438       break;
1439     case tok::l_brace: {
1440       if (!tryToParseBracedList())
1441         parseChildBlock();
1442       break;
1443     }
1444     case tok::at:
1445       nextToken();
1446       if (FormatTok->Tok.is(tok::l_brace))
1447         parseBracedList();
1448       break;
1449     default:
1450       nextToken();
1451       break;
1452     }
1453   } while (!eof());
1454 }
1455
1456 void UnwrappedLineParser::parseIfThenElse() {
1457   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1458   nextToken();
1459   if (FormatTok->Tok.is(tok::l_paren))
1460     parseParens();
1461   bool NeedsUnwrappedLine = false;
1462   if (FormatTok->Tok.is(tok::l_brace)) {
1463     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1464     parseBlock(/*MustBeDeclaration=*/false);
1465     if (Style.BraceWrapping.BeforeElse)
1466       addUnwrappedLine();
1467     else
1468       NeedsUnwrappedLine = true;
1469   } else {
1470     addUnwrappedLine();
1471     ++Line->Level;
1472     parseStructuralElement();
1473     --Line->Level;
1474   }
1475   if (FormatTok->Tok.is(tok::kw_else)) {
1476     nextToken();
1477     if (FormatTok->Tok.is(tok::l_brace)) {
1478       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1479       parseBlock(/*MustBeDeclaration=*/false);
1480       addUnwrappedLine();
1481     } else if (FormatTok->Tok.is(tok::kw_if)) {
1482       parseIfThenElse();
1483     } else {
1484       addUnwrappedLine();
1485       ++Line->Level;
1486       parseStructuralElement();
1487       if (FormatTok->is(tok::eof))
1488         addUnwrappedLine();
1489       --Line->Level;
1490     }
1491   } else if (NeedsUnwrappedLine) {
1492     addUnwrappedLine();
1493   }
1494 }
1495
1496 void UnwrappedLineParser::parseTryCatch() {
1497   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1498   nextToken();
1499   bool NeedsUnwrappedLine = false;
1500   if (FormatTok->is(tok::colon)) {
1501     // We are in a function try block, what comes is an initializer list.
1502     nextToken();
1503     while (FormatTok->is(tok::identifier)) {
1504       nextToken();
1505       if (FormatTok->is(tok::l_paren))
1506         parseParens();
1507       if (FormatTok->is(tok::comma))
1508         nextToken();
1509     }
1510   }
1511   // Parse try with resource.
1512   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1513     parseParens();
1514   }
1515   if (FormatTok->is(tok::l_brace)) {
1516     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1517     parseBlock(/*MustBeDeclaration=*/false);
1518     if (Style.BraceWrapping.BeforeCatch) {
1519       addUnwrappedLine();
1520     } else {
1521       NeedsUnwrappedLine = true;
1522     }
1523   } else if (!FormatTok->is(tok::kw_catch)) {
1524     // The C++ standard requires a compound-statement after a try.
1525     // If there's none, we try to assume there's a structuralElement
1526     // and try to continue.
1527     addUnwrappedLine();
1528     ++Line->Level;
1529     parseStructuralElement();
1530     --Line->Level;
1531   }
1532   while (1) {
1533     if (FormatTok->is(tok::at))
1534       nextToken();
1535     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1536                              tok::kw___finally) ||
1537           ((Style.Language == FormatStyle::LK_Java ||
1538             Style.Language == FormatStyle::LK_JavaScript) &&
1539            FormatTok->is(Keywords.kw_finally)) ||
1540           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1541            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1542       break;
1543     nextToken();
1544     while (FormatTok->isNot(tok::l_brace)) {
1545       if (FormatTok->is(tok::l_paren)) {
1546         parseParens();
1547         continue;
1548       }
1549       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1550         return;
1551       nextToken();
1552     }
1553     NeedsUnwrappedLine = false;
1554     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1555     parseBlock(/*MustBeDeclaration=*/false);
1556     if (Style.BraceWrapping.BeforeCatch)
1557       addUnwrappedLine();
1558     else
1559       NeedsUnwrappedLine = true;
1560   }
1561   if (NeedsUnwrappedLine)
1562     addUnwrappedLine();
1563 }
1564
1565 void UnwrappedLineParser::parseNamespace() {
1566   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1567
1568   const FormatToken &InitialToken = *FormatTok;
1569   nextToken();
1570   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1571     nextToken();
1572   if (FormatTok->Tok.is(tok::l_brace)) {
1573     if (ShouldBreakBeforeBrace(Style, InitialToken))
1574       addUnwrappedLine();
1575
1576     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1577                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1578                      DeclarationScopeStack.size() > 1);
1579     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1580     // Munch the semicolon after a namespace. This is more common than one would
1581     // think. Puttin the semicolon into its own line is very ugly.
1582     if (FormatTok->Tok.is(tok::semi))
1583       nextToken();
1584     addUnwrappedLine();
1585   }
1586   // FIXME: Add error handling.
1587 }
1588
1589 void UnwrappedLineParser::parseNew() {
1590   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1591   nextToken();
1592   if (Style.Language != FormatStyle::LK_Java)
1593     return;
1594
1595   // In Java, we can parse everything up to the parens, which aren't optional.
1596   do {
1597     // There should not be a ;, { or } before the new's open paren.
1598     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1599       return;
1600
1601     // Consume the parens.
1602     if (FormatTok->is(tok::l_paren)) {
1603       parseParens();
1604
1605       // If there is a class body of an anonymous class, consume that as child.
1606       if (FormatTok->is(tok::l_brace))
1607         parseChildBlock();
1608       return;
1609     }
1610     nextToken();
1611   } while (!eof());
1612 }
1613
1614 void UnwrappedLineParser::parseForOrWhileLoop() {
1615   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1616          "'for', 'while' or foreach macro expected");
1617   nextToken();
1618   if (FormatTok->Tok.is(tok::l_paren))
1619     parseParens();
1620   if (FormatTok->Tok.is(tok::l_brace)) {
1621     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1622     parseBlock(/*MustBeDeclaration=*/false);
1623     addUnwrappedLine();
1624   } else {
1625     addUnwrappedLine();
1626     ++Line->Level;
1627     parseStructuralElement();
1628     --Line->Level;
1629   }
1630 }
1631
1632 void UnwrappedLineParser::parseDoWhile() {
1633   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1634   nextToken();
1635   if (FormatTok->Tok.is(tok::l_brace)) {
1636     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1637     parseBlock(/*MustBeDeclaration=*/false);
1638     if (Style.BraceWrapping.IndentBraces)
1639       addUnwrappedLine();
1640   } else {
1641     addUnwrappedLine();
1642     ++Line->Level;
1643     parseStructuralElement();
1644     --Line->Level;
1645   }
1646
1647   // FIXME: Add error handling.
1648   if (!FormatTok->Tok.is(tok::kw_while)) {
1649     addUnwrappedLine();
1650     return;
1651   }
1652
1653   nextToken();
1654   parseStructuralElement();
1655 }
1656
1657 void UnwrappedLineParser::parseLabel() {
1658   nextToken();
1659   unsigned OldLineLevel = Line->Level;
1660   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1661     --Line->Level;
1662   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1663     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1664     parseBlock(/*MustBeDeclaration=*/false);
1665     if (FormatTok->Tok.is(tok::kw_break)) {
1666       if (Style.BraceWrapping.AfterControlStatement)
1667         addUnwrappedLine();
1668       parseStructuralElement();
1669     }
1670     addUnwrappedLine();
1671   } else {
1672     if (FormatTok->is(tok::semi))
1673       nextToken();
1674     addUnwrappedLine();
1675   }
1676   Line->Level = OldLineLevel;
1677   if (FormatTok->isNot(tok::l_brace)) {
1678     parseStructuralElement();
1679     addUnwrappedLine();
1680   }
1681 }
1682
1683 void UnwrappedLineParser::parseCaseLabel() {
1684   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1685   // FIXME: fix handling of complex expressions here.
1686   do {
1687     nextToken();
1688   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1689   parseLabel();
1690 }
1691
1692 void UnwrappedLineParser::parseSwitch() {
1693   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1694   nextToken();
1695   if (FormatTok->Tok.is(tok::l_paren))
1696     parseParens();
1697   if (FormatTok->Tok.is(tok::l_brace)) {
1698     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1699     parseBlock(/*MustBeDeclaration=*/false);
1700     addUnwrappedLine();
1701   } else {
1702     addUnwrappedLine();
1703     ++Line->Level;
1704     parseStructuralElement();
1705     --Line->Level;
1706   }
1707 }
1708
1709 void UnwrappedLineParser::parseAccessSpecifier() {
1710   nextToken();
1711   // Understand Qt's slots.
1712   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1713     nextToken();
1714   // Otherwise, we don't know what it is, and we'd better keep the next token.
1715   if (FormatTok->Tok.is(tok::colon))
1716     nextToken();
1717   addUnwrappedLine();
1718 }
1719
1720 bool UnwrappedLineParser::parseEnum() {
1721   // Won't be 'enum' for NS_ENUMs.
1722   if (FormatTok->Tok.is(tok::kw_enum))
1723     nextToken();
1724
1725   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1726   // declarations. An "enum" keyword followed by a colon would be a syntax
1727   // error and thus assume it is just an identifier.
1728   if (Style.Language == FormatStyle::LK_JavaScript &&
1729       FormatTok->isOneOf(tok::colon, tok::question))
1730     return false;
1731
1732   // Eat up enum class ...
1733   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1734     nextToken();
1735
1736   while (FormatTok->Tok.getIdentifierInfo() ||
1737          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1738                             tok::greater, tok::comma, tok::question)) {
1739     nextToken();
1740     // We can have macros or attributes in between 'enum' and the enum name.
1741     if (FormatTok->is(tok::l_paren))
1742       parseParens();
1743     if (FormatTok->is(tok::identifier)) {
1744       nextToken();
1745       // If there are two identifiers in a row, this is likely an elaborate
1746       // return type. In Java, this can be "implements", etc.
1747       if (Style.isCpp() && FormatTok->is(tok::identifier))
1748         return false;
1749     }
1750   }
1751
1752   // Just a declaration or something is wrong.
1753   if (FormatTok->isNot(tok::l_brace))
1754     return true;
1755   FormatTok->BlockKind = BK_Block;
1756
1757   if (Style.Language == FormatStyle::LK_Java) {
1758     // Java enums are different.
1759     parseJavaEnumBody();
1760     return true;
1761   }
1762   if (Style.Language == FormatStyle::LK_Proto) {
1763     parseBlock(/*MustBeDeclaration=*/true);
1764     return true;
1765   }
1766
1767   // Parse enum body.
1768   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1769   if (HasError) {
1770     if (FormatTok->is(tok::semi))
1771       nextToken();
1772     addUnwrappedLine();
1773   }
1774   return true;
1775
1776   // There is no addUnwrappedLine() here so that we fall through to parsing a
1777   // structural element afterwards. Thus, in "enum A {} n, m;",
1778   // "} n, m;" will end up in one unwrapped line.
1779 }
1780
1781 void UnwrappedLineParser::parseJavaEnumBody() {
1782   // Determine whether the enum is simple, i.e. does not have a semicolon or
1783   // constants with class bodies. Simple enums can be formatted like braced
1784   // lists, contracted to a single line, etc.
1785   unsigned StoredPosition = Tokens->getPosition();
1786   bool IsSimple = true;
1787   FormatToken *Tok = Tokens->getNextToken();
1788   while (Tok) {
1789     if (Tok->is(tok::r_brace))
1790       break;
1791     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1792       IsSimple = false;
1793       break;
1794     }
1795     // FIXME: This will also mark enums with braces in the arguments to enum
1796     // constants as "not simple". This is probably fine in practice, though.
1797     Tok = Tokens->getNextToken();
1798   }
1799   FormatTok = Tokens->setPosition(StoredPosition);
1800
1801   if (IsSimple) {
1802     parseBracedList();
1803     addUnwrappedLine();
1804     return;
1805   }
1806
1807   // Parse the body of a more complex enum.
1808   // First add a line for everything up to the "{".
1809   nextToken();
1810   addUnwrappedLine();
1811   ++Line->Level;
1812
1813   // Parse the enum constants.
1814   while (FormatTok) {
1815     if (FormatTok->is(tok::l_brace)) {
1816       // Parse the constant's class body.
1817       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1818                  /*MunchSemi=*/false);
1819     } else if (FormatTok->is(tok::l_paren)) {
1820       parseParens();
1821     } else if (FormatTok->is(tok::comma)) {
1822       nextToken();
1823       addUnwrappedLine();
1824     } else if (FormatTok->is(tok::semi)) {
1825       nextToken();
1826       addUnwrappedLine();
1827       break;
1828     } else if (FormatTok->is(tok::r_brace)) {
1829       addUnwrappedLine();
1830       break;
1831     } else {
1832       nextToken();
1833     }
1834   }
1835
1836   // Parse the class body after the enum's ";" if any.
1837   parseLevel(/*HasOpeningBrace=*/true);
1838   nextToken();
1839   --Line->Level;
1840   addUnwrappedLine();
1841 }
1842
1843 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1844   const FormatToken &InitialToken = *FormatTok;
1845   nextToken();
1846
1847   // The actual identifier can be a nested name specifier, and in macros
1848   // it is often token-pasted.
1849   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1850                             tok::kw___attribute, tok::kw___declspec,
1851                             tok::kw_alignas) ||
1852          ((Style.Language == FormatStyle::LK_Java ||
1853            Style.Language == FormatStyle::LK_JavaScript) &&
1854           FormatTok->isOneOf(tok::period, tok::comma))) {
1855     bool IsNonMacroIdentifier =
1856         FormatTok->is(tok::identifier) &&
1857         FormatTok->TokenText != FormatTok->TokenText.upper();
1858     nextToken();
1859     // We can have macros or attributes in between 'class' and the class name.
1860     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1861       parseParens();
1862   }
1863
1864   // Note that parsing away template declarations here leads to incorrectly
1865   // accepting function declarations as record declarations.
1866   // In general, we cannot solve this problem. Consider:
1867   // class A<int> B() {}
1868   // which can be a function definition or a class definition when B() is a
1869   // macro. If we find enough real-world cases where this is a problem, we
1870   // can parse for the 'template' keyword in the beginning of the statement,
1871   // and thus rule out the record production in case there is no template
1872   // (this would still leave us with an ambiguity between template function
1873   // and class declarations).
1874   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1875     while (!eof()) {
1876       if (FormatTok->is(tok::l_brace)) {
1877         calculateBraceTypes(/*ExpectClassBody=*/true);
1878         if (!tryToParseBracedList())
1879           break;
1880       }
1881       if (FormatTok->Tok.is(tok::semi))
1882         return;
1883       nextToken();
1884     }
1885   }
1886   if (FormatTok->Tok.is(tok::l_brace)) {
1887     if (ParseAsExpr) {
1888       parseChildBlock();
1889     } else {
1890       if (ShouldBreakBeforeBrace(Style, InitialToken))
1891         addUnwrappedLine();
1892
1893       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1894                  /*MunchSemi=*/false);
1895     }
1896   }
1897   // There is no addUnwrappedLine() here so that we fall through to parsing a
1898   // structural element afterwards. Thus, in "class A {} n, m;",
1899   // "} n, m;" will end up in one unwrapped line.
1900 }
1901
1902 void UnwrappedLineParser::parseObjCProtocolList() {
1903   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1904   do
1905     nextToken();
1906   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1907   nextToken(); // Skip '>'.
1908 }
1909
1910 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1911   do {
1912     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1913       nextToken();
1914       addUnwrappedLine();
1915       break;
1916     }
1917     if (FormatTok->is(tok::l_brace)) {
1918       parseBlock(/*MustBeDeclaration=*/false);
1919       // In ObjC interfaces, nothing should be following the "}".
1920       addUnwrappedLine();
1921     } else if (FormatTok->is(tok::r_brace)) {
1922       // Ignore stray "}". parseStructuralElement doesn't consume them.
1923       nextToken();
1924       addUnwrappedLine();
1925     } else {
1926       parseStructuralElement();
1927     }
1928   } while (!eof());
1929 }
1930
1931 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1932   nextToken();
1933   nextToken(); // interface name
1934
1935   // @interface can be followed by either a base class, or a category.
1936   if (FormatTok->Tok.is(tok::colon)) {
1937     nextToken();
1938     nextToken(); // base class name
1939   } else if (FormatTok->Tok.is(tok::l_paren))
1940     // Skip category, if present.
1941     parseParens();
1942
1943   if (FormatTok->Tok.is(tok::less))
1944     parseObjCProtocolList();
1945
1946   if (FormatTok->Tok.is(tok::l_brace)) {
1947     if (Style.BraceWrapping.AfterObjCDeclaration)
1948       addUnwrappedLine();
1949     parseBlock(/*MustBeDeclaration=*/true);
1950   }
1951
1952   // With instance variables, this puts '}' on its own line.  Without instance
1953   // variables, this ends the @interface line.
1954   addUnwrappedLine();
1955
1956   parseObjCUntilAtEnd();
1957 }
1958
1959 void UnwrappedLineParser::parseObjCProtocol() {
1960   nextToken();
1961   nextToken(); // protocol name
1962
1963   if (FormatTok->Tok.is(tok::less))
1964     parseObjCProtocolList();
1965
1966   // Check for protocol declaration.
1967   if (FormatTok->Tok.is(tok::semi)) {
1968     nextToken();
1969     return addUnwrappedLine();
1970   }
1971
1972   addUnwrappedLine();
1973   parseObjCUntilAtEnd();
1974 }
1975
1976 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1977   bool IsImport = FormatTok->is(Keywords.kw_import);
1978   assert(IsImport || FormatTok->is(tok::kw_export));
1979   nextToken();
1980
1981   // Consume the "default" in "export default class/function".
1982   if (FormatTok->is(tok::kw_default))
1983     nextToken();
1984
1985   // Consume "async function", "function" and "default function", so that these
1986   // get parsed as free-standing JS functions, i.e. do not require a trailing
1987   // semicolon.
1988   if (FormatTok->is(Keywords.kw_async))
1989     nextToken();
1990   if (FormatTok->is(Keywords.kw_function)) {
1991     nextToken();
1992     return;
1993   }
1994
1995   // For imports, `export *`, `export {...}`, consume the rest of the line up
1996   // to the terminating `;`. For everything else, just return and continue
1997   // parsing the structural element, i.e. the declaration or expression for
1998   // `export default`.
1999   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2000       !FormatTok->isStringLiteral())
2001     return;
2002
2003   while (!eof()) {
2004     if (FormatTok->is(tok::semi))
2005       return;
2006     if (Line->Tokens.size() == 0) {
2007       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2008       // import statement should terminate.
2009       return;
2010     }
2011     if (FormatTok->is(tok::l_brace)) {
2012       FormatTok->BlockKind = BK_Block;
2013       parseBracedList();
2014     } else {
2015       nextToken();
2016     }
2017   }
2018 }
2019
2020 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2021                                                  StringRef Prefix = "") {
2022   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2023                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2024   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2025                                                     E = Line.Tokens.end();
2026        I != E; ++I) {
2027     llvm::dbgs() << I->Tok->Tok.getName() << "["
2028                  << "T=" << I->Tok->Type
2029                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2030   }
2031   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2032                                                     E = Line.Tokens.end();
2033        I != E; ++I) {
2034     const UnwrappedLineNode &Node = *I;
2035     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2036              I = Node.Children.begin(),
2037              E = Node.Children.end();
2038          I != E; ++I) {
2039       printDebugInfo(*I, "\nChild: ");
2040     }
2041   }
2042   llvm::dbgs() << "\n";
2043 }
2044
2045 void UnwrappedLineParser::addUnwrappedLine() {
2046   if (Line->Tokens.empty())
2047     return;
2048   DEBUG({
2049     if (CurrentLines == &Lines)
2050       printDebugInfo(*Line);
2051   });
2052   CurrentLines->push_back(std::move(*Line));
2053   Line->Tokens.clear();
2054   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2055   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2056     CurrentLines->append(
2057         std::make_move_iterator(PreprocessorDirectives.begin()),
2058         std::make_move_iterator(PreprocessorDirectives.end()));
2059     PreprocessorDirectives.clear();
2060   }
2061 }
2062
2063 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2064
2065 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2066   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2067          FormatTok.NewlinesBefore > 0;
2068 }
2069
2070 static bool isLineComment(const FormatToken &FormatTok) {
2071   return FormatTok.is(tok::comment) &&
2072          FormatTok.TokenText.startswith("//");
2073 }
2074
2075 // Checks if \p FormatTok is a line comment that continues the line comment
2076 // section on \p Line.
2077 static bool continuesLineComment(const FormatToken &FormatTok,
2078                                  const UnwrappedLine &Line,
2079                                  llvm::Regex &CommentPragmasRegex) {
2080   if (Line.Tokens.empty())
2081     return false;
2082
2083   StringRef IndentContent = FormatTok.TokenText;
2084   if (FormatTok.TokenText.startswith("//") ||
2085       FormatTok.TokenText.startswith("/*"))
2086     IndentContent = FormatTok.TokenText.substr(2);
2087   if (CommentPragmasRegex.match(IndentContent))
2088     return false;
2089
2090   // If Line starts with a line comment, then FormatTok continues the comment
2091   // section if its original column is greater or equal to the original start
2092   // column of the line.
2093   //
2094   // Define the min column token of a line as follows: if a line ends in '{' or
2095   // contains a '{' followed by a line comment, then the min column token is
2096   // that '{'. Otherwise, the min column token of the line is the first token of
2097   // the line.
2098   //
2099   // If Line starts with a token other than a line comment, then FormatTok
2100   // continues the comment section if its original column is greater than the
2101   // original start column of the min column token of the line.
2102   //
2103   // For example, the second line comment continues the first in these cases:
2104   //
2105   // // first line
2106   // // second line
2107   //
2108   // and:
2109   //
2110   // // first line
2111   //  // second line
2112   //
2113   // and:
2114   //
2115   // int i; // first line
2116   //  // second line
2117   //
2118   // and:
2119   //
2120   // do { // first line
2121   //      // second line
2122   //   int i;
2123   // } while (true);
2124   //
2125   // and:
2126   //
2127   // enum {
2128   //   a, // first line
2129   //    // second line
2130   //   b
2131   // };
2132   //
2133   // The second line comment doesn't continue the first in these cases:
2134   //
2135   //   // first line
2136   //  // second line
2137   //
2138   // and:
2139   //
2140   // int i; // first line
2141   // // second line
2142   //
2143   // and:
2144   //
2145   // do { // first line
2146   //   // second line
2147   //   int i;
2148   // } while (true);
2149   //
2150   // and:
2151   //
2152   // enum {
2153   //   a, // first line
2154   //   // second line
2155   // };
2156   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2157
2158   // Scan for '{//'. If found, use the column of '{' as a min column for line
2159   // comment section continuation.
2160   const FormatToken *PreviousToken = nullptr;
2161   for (const UnwrappedLineNode &Node : Line.Tokens) {
2162     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2163         isLineComment(*Node.Tok)) {
2164       MinColumnToken = PreviousToken;
2165       break;
2166     }
2167     PreviousToken = Node.Tok;
2168
2169     // Grab the last newline preceding a token in this unwrapped line.
2170     if (Node.Tok->NewlinesBefore > 0) {
2171       MinColumnToken = Node.Tok;
2172     }
2173   }
2174   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2175     MinColumnToken = PreviousToken;
2176   }
2177
2178   unsigned MinContinueColumn =
2179       MinColumnToken->OriginalColumn +
2180       (isLineComment(*MinColumnToken) ? 0 : 1);
2181   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2182          isLineComment(*(Line.Tokens.back().Tok)) &&
2183          FormatTok.OriginalColumn >= MinContinueColumn;
2184 }
2185
2186 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2187   bool JustComments = Line->Tokens.empty();
2188   for (SmallVectorImpl<FormatToken *>::const_iterator
2189            I = CommentsBeforeNextToken.begin(),
2190            E = CommentsBeforeNextToken.end();
2191        I != E; ++I) {
2192     // Line comments that belong to the same line comment section are put on the
2193     // same line since later we might want to reflow content between them.
2194     // Additional fine-grained breaking of line comment sections is controlled
2195     // by the class BreakableLineCommentSection in case it is desirable to keep
2196     // several line comment sections in the same unwrapped line.
2197     //
2198     // FIXME: Consider putting separate line comment sections as children to the
2199     // unwrapped line instead.
2200     (*I)->ContinuesLineCommentSection =
2201         continuesLineComment(**I, *Line, CommentPragmasRegex);
2202     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2203       addUnwrappedLine();
2204     pushToken(*I);
2205   }
2206   if (NewlineBeforeNext && JustComments)
2207     addUnwrappedLine();
2208   CommentsBeforeNextToken.clear();
2209 }
2210
2211 void UnwrappedLineParser::nextToken() {
2212   if (eof())
2213     return;
2214   flushComments(isOnNewLine(*FormatTok));
2215   pushToken(FormatTok);
2216   if (Style.Language != FormatStyle::LK_JavaScript)
2217     readToken();
2218   else
2219     readTokenWithJavaScriptASI();
2220 }
2221
2222 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2223   // FIXME: This is a dirty way to access the previous token. Find a better
2224   // solution.
2225   if (!Line || Line->Tokens.empty())
2226     return nullptr;
2227   return Line->Tokens.back().Tok;
2228 }
2229
2230 void UnwrappedLineParser::distributeComments(
2231     const SmallVectorImpl<FormatToken *> &Comments,
2232     const FormatToken *NextTok) {
2233   // Whether or not a line comment token continues a line is controlled by
2234   // the method continuesLineComment, with the following caveat:
2235   //
2236   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2237   // that each comment line from the trail is aligned with the next token, if
2238   // the next token exists. If a trail exists, the beginning of the maximal
2239   // trail is marked as a start of a new comment section.
2240   //
2241   // For example in this code:
2242   //
2243   // int a; // line about a
2244   //   // line 1 about b
2245   //   // line 2 about b
2246   //   int b;
2247   //
2248   // the two lines about b form a maximal trail, so there are two sections, the
2249   // first one consisting of the single comment "// line about a" and the
2250   // second one consisting of the next two comments.
2251   if (Comments.empty())
2252     return;
2253   bool ShouldPushCommentsInCurrentLine = true;
2254   bool HasTrailAlignedWithNextToken = false;
2255   unsigned StartOfTrailAlignedWithNextToken = 0;
2256   if (NextTok) {
2257     // We are skipping the first element intentionally.
2258     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2259       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2260         HasTrailAlignedWithNextToken = true;
2261         StartOfTrailAlignedWithNextToken = i;
2262       }
2263     }
2264   }
2265   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2266     FormatToken *FormatTok = Comments[i];
2267     if (HasTrailAlignedWithNextToken &&
2268         i == StartOfTrailAlignedWithNextToken) {
2269       FormatTok->ContinuesLineCommentSection = false;
2270     } else {
2271       FormatTok->ContinuesLineCommentSection =
2272           continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
2273     }
2274     if (!FormatTok->ContinuesLineCommentSection &&
2275         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2276       ShouldPushCommentsInCurrentLine = false;
2277     }
2278     if (ShouldPushCommentsInCurrentLine) {
2279       pushToken(FormatTok);
2280     } else {
2281       CommentsBeforeNextToken.push_back(FormatTok);
2282     }
2283   }
2284 }
2285
2286 void UnwrappedLineParser::readToken() {
2287   SmallVector<FormatToken *, 1> Comments;
2288   do {
2289     FormatTok = Tokens->getNextToken();
2290     assert(FormatTok);
2291     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2292            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2293       distributeComments(Comments, FormatTok);
2294       Comments.clear();
2295       // If there is an unfinished unwrapped line, we flush the preprocessor
2296       // directives only after that unwrapped line was finished later.
2297       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2298       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2299       // Comments stored before the preprocessor directive need to be output
2300       // before the preprocessor directive, at the same level as the
2301       // preprocessor directive, as we consider them to apply to the directive.
2302       flushComments(isOnNewLine(*FormatTok));
2303       parsePPDirective();
2304     }
2305     while (FormatTok->Type == TT_ConflictStart ||
2306            FormatTok->Type == TT_ConflictEnd ||
2307            FormatTok->Type == TT_ConflictAlternative) {
2308       if (FormatTok->Type == TT_ConflictStart) {
2309         conditionalCompilationStart(/*Unreachable=*/false);
2310       } else if (FormatTok->Type == TT_ConflictAlternative) {
2311         conditionalCompilationAlternative();
2312       } else if (FormatTok->Type == TT_ConflictEnd) {
2313         conditionalCompilationEnd();
2314       }
2315       FormatTok = Tokens->getNextToken();
2316       FormatTok->MustBreakBefore = true;
2317     }
2318
2319     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2320         !Line->InPPDirective) {
2321       continue;
2322     }
2323
2324     if (!FormatTok->Tok.is(tok::comment)) {
2325       distributeComments(Comments, FormatTok);
2326       Comments.clear();
2327       return;
2328     }
2329
2330     Comments.push_back(FormatTok);
2331   } while (!eof());
2332
2333   distributeComments(Comments, nullptr);
2334   Comments.clear();
2335 }
2336
2337 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2338   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2339   if (MustBreakBeforeNextToken) {
2340     Line->Tokens.back().Tok->MustBreakBefore = true;
2341     MustBreakBeforeNextToken = false;
2342   }
2343 }
2344
2345 } // end namespace format
2346 } // end namespace clang