contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #define DEBUG_TYPE "format-parser"
  22
  23 namespace clang {
  24 namespace format {
  25
  26 class FormatTokenSource {
  27 public:
  28   virtual ~FormatTokenSource() {}
  29   virtual FormatToken *getNextToken() = 0;
  30
  31   virtual unsigned getPosition() = 0;
  32   virtual FormatToken *setPosition(unsigned Position) = 0;
  33 };
  34
  35 namespace {
  36
  37 class ScopedDeclarationState {
  38 public:
  39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  40                          bool MustBeDeclaration)
  41       : Line(Line), Stack(Stack) {
  42     Line.MustBeDeclaration = MustBeDeclaration;
  43     Stack.push_back(MustBeDeclaration);
  44   }
  45   ~ScopedDeclarationState() {
  46     Stack.pop_back();
  47     if (!Stack.empty())
  48       Line.MustBeDeclaration = Stack.back();
  49     else
  50       Line.MustBeDeclaration = true;
  51   }
  52
  53 private:
  54   UnwrappedLine &Line;
  55   std::vector<bool> &Stack;
  56 };
  57
  58 class ScopedMacroState : public FormatTokenSource {
  59 public:
  60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  61                    FormatToken *&ResetToken)
  62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  64         Token(nullptr) {
  65     TokenSource = this;
  66     Line.Level = 0;
  67     Line.InPPDirective = true;
  68   }
  69
  70   ~ScopedMacroState() override {
  71     TokenSource = PreviousTokenSource;
  72     ResetToken = Token;
  73     Line.InPPDirective = false;
  74     Line.Level = PreviousLineLevel;
  75   }
  76
  77   FormatToken *getNextToken() override {
  78     // The \c UnwrappedLineParser guards against this by never calling
  79     // \c getNextToken() after it has encountered the first eof token.
  80     assert(!eof());
  81     Token = PreviousTokenSource->getNextToken();
  82     if (eof())
  83       return getFakeEOF();
  84     return Token;
  85   }
  86
  87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  88
  89   FormatToken *setPosition(unsigned Position) override {
  90     Token = PreviousTokenSource->setPosition(Position);
  91     return Token;
  92   }
  93
  94 private:
  95   bool eof() { return Token && Token->HasUnescapedNewline; }
  96
  97   FormatToken *getFakeEOF() {
  98     static bool EOFInitialized = false;
  99     static FormatToken FormatTok;
 100     if (!EOFInitialized) {
 101       FormatTok.Tok.startToken();
 102       FormatTok.Tok.setKind(tok::eof);
 103       EOFInitialized = true;
 104     }
 105     return &FormatTok;
 106   }
 107
 108   UnwrappedLine &Line;
 109   FormatTokenSource *&TokenSource;
 110   FormatToken *&ResetToken;
 111   unsigned PreviousLineLevel;
 112   FormatTokenSource *PreviousTokenSource;
 113
 114   FormatToken *Token;
 115 };
 116
 117 } // end anonymous namespace
 118
 119 class ScopedLineState {
 120 public:
 121   ScopedLineState(UnwrappedLineParser &Parser,
 122                   bool SwitchToPreprocessorLines = false)
 123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 124     if (SwitchToPreprocessorLines)
 125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 126     else if (!Parser.Line->Tokens.empty())
 127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 128     PreBlockLine = std::move(Parser.Line);
 129     Parser.Line = llvm::make_unique<UnwrappedLine>();
 130     Parser.Line->Level = PreBlockLine->Level;
 131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 132   }
 133
 134   ~ScopedLineState() {
 135     if (!Parser.Line->Tokens.empty()) {
 136       Parser.addUnwrappedLine();
 137     }
 138     assert(Parser.Line->Tokens.empty());
 139     Parser.Line = std::move(PreBlockLine);
 140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 141       Parser.MustBreakBeforeNextToken = true;
 142     Parser.CurrentLines = OriginalLines;
 143   }
 144
 145 private:
 146   UnwrappedLineParser &Parser;
 147
 148   std::unique_ptr<UnwrappedLine> PreBlockLine;
 149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 150 };
 151
 152 class CompoundStatementIndenter {
 153 public:
 154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 155                             const FormatStyle &Style, unsigned &LineLevel)
 156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 157     if (Style.BraceWrapping.AfterControlStatement)
 158       Parser->addUnwrappedLine();
 159     if (Style.BraceWrapping.IndentBraces)
 160       ++LineLevel;
 161   }
 162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 163
 164 private:
 165   unsigned &LineLevel;
 166   unsigned OldLineLevel;
 167 };
 168
 169 namespace {
 170
 171 class IndexedTokenSource : public FormatTokenSource {
 172 public:
 173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 174       : Tokens(Tokens), Position(-1) {}
 175
 176   FormatToken *getNextToken() override {
 177     ++Position;
 178     return Tokens[Position];
 179   }
 180
 181   unsigned getPosition() override {
 182     assert(Position >= 0);
 183     return Position;
 184   }
 185
 186   FormatToken *setPosition(unsigned P) override {
 187     Position = P;
 188     return Tokens[Position];
 189   }
 190
 191   void reset() { Position = -1; }
 192
 193 private:
 194   ArrayRef<FormatToken *> Tokens;
 195   int Position;
 196 };
 197
 198 } // end anonymous namespace
 199
 200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 201                                          const AdditionalKeywords &Keywords,
 202                                          ArrayRef<FormatToken *> Tokens,
 203                                          UnwrappedLineConsumer &Callback)
 204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 205       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
 206       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
 207       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 208
 209 void UnwrappedLineParser::reset() {
 210   PPBranchLevel = -1;
 211   Line.reset(new UnwrappedLine);
 212   CommentsBeforeNextToken.clear();
 213   FormatTok = nullptr;
 214   MustBreakBeforeNextToken = false;
 215   PreprocessorDirectives.clear();
 216   CurrentLines = &Lines;
 217   DeclarationScopeStack.clear();
 218   PPStack.clear();
 219 }
 220
 221 void UnwrappedLineParser::parse() {
 222   IndexedTokenSource TokenSource(AllTokens);
 223   do {
 224     DEBUG(llvm::dbgs() << "----\n");
 225     reset();
 226     Tokens = &TokenSource;
 227     TokenSource.reset();
 228
 229     readToken();
 230     parseFile();
 231     // Create line with eof token.
 232     pushToken(FormatTok);
 233     addUnwrappedLine();
 234
 235     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 236                                                   E = Lines.end();
 237          I != E; ++I) {
 238       Callback.consumeUnwrappedLine(*I);
 239     }
 240     Callback.finishRun();
 241     Lines.clear();
 242     while (!PPLevelBranchIndex.empty() &&
 243            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 244       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 245       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 246     }
 247     if (!PPLevelBranchIndex.empty()) {
 248       ++PPLevelBranchIndex.back();
 249       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 250       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 251     }
 252   } while (!PPLevelBranchIndex.empty());
 253 }
 254
 255 void UnwrappedLineParser::parseFile() {
 256   // The top-level context in a file always has declarations, except for pre-
 257   // processor directives and JavaScript files.
 258   bool MustBeDeclaration =
 259       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 260   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 261                                           MustBeDeclaration);
 262   parseLevel(/*HasOpeningBrace=*/false);
 263   // Make sure to format the remaining tokens.
 264   flushComments(true);
 265   addUnwrappedLine();
 266 }
 267
 268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 269   bool SwitchLabelEncountered = false;
 270   do {
 271     tok::TokenKind kind = FormatTok->Tok.getKind();
 272     if (FormatTok->Type == TT_MacroBlockBegin) {
 273       kind = tok::l_brace;
 274     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 275       kind = tok::r_brace;
 276     }
 277
 278     switch (kind) {
 279     case tok::comment:
 280       nextToken();
 281       addUnwrappedLine();
 282       break;
 283     case tok::l_brace:
 284       // FIXME: Add parameter whether this can happen - if this happens, we must
 285       // be in a non-declaration context.
 286       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
 287         continue;
 288       parseBlock(/*MustBeDeclaration=*/false);
 289       addUnwrappedLine();
 290       break;
 291     case tok::r_brace:
 292       if (HasOpeningBrace)
 293         return;
 294       nextToken();
 295       addUnwrappedLine();
 296       break;
 297     case tok::kw_default:
 298     case tok::kw_case:
 299       if (!SwitchLabelEncountered &&
 300           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 301         ++Line->Level;
 302       SwitchLabelEncountered = true;
 303       parseStructuralElement();
 304       break;
 305     default:
 306       parseStructuralElement();
 307       break;
 308     }
 309   } while (!eof());
 310 }
 311
 312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 313   // We'll parse forward through the tokens until we hit
 314   // a closing brace or eof - note that getNextToken() will
 315   // parse macros, so this will magically work inside macro
 316   // definitions, too.
 317   unsigned StoredPosition = Tokens->getPosition();
 318   FormatToken *Tok = FormatTok;
 319   const FormatToken *PrevTok = getPreviousToken();
 320   // Keep a stack of positions of lbrace tokens. We will
 321   // update information about whether an lbrace starts a
 322   // braced init list or a different block during the loop.
 323   SmallVector<FormatToken *, 8> LBraceStack;
 324   assert(Tok->Tok.is(tok::l_brace));
 325   do {
 326     // Get next non-comment token.
 327     FormatToken *NextTok;
 328     unsigned ReadTokens = 0;
 329     do {
 330       NextTok = Tokens->getNextToken();
 331       ++ReadTokens;
 332     } while (NextTok->is(tok::comment));
 333
 334     switch (Tok->Tok.getKind()) {
 335     case tok::l_brace:
 336       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
 337           PrevTok->is(tok::colon))
 338         // A colon indicates this code is in a type, or a braced list following
 339         // a label in an object literal ({a: {b: 1}}).
 340         // The code below could be confused by semicolons between the individual
 341         // members in a type member list, which would normally trigger BK_Block.
 342         // In both cases, this must be parsed as an inline braced init.
 343         Tok->BlockKind = BK_BracedInit;
 344       else
 345         Tok->BlockKind = BK_Unknown;
 346       LBraceStack.push_back(Tok);
 347       break;
 348     case tok::r_brace:
 349       if (LBraceStack.empty())
 350         break;
 351       if (LBraceStack.back()->BlockKind == BK_Unknown) {
 352         bool ProbablyBracedList = false;
 353         if (Style.Language == FormatStyle::LK_Proto) {
 354           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 355         } else {
 356           // Using OriginalColumn to distinguish between ObjC methods and
 357           // binary operators is a bit hacky.
 358           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 359                                   NextTok->OriginalColumn == 0;
 360
 361           // If there is a comma, semicolon or right paren after the closing
 362           // brace, we assume this is a braced initializer list.  Note that
 363           // regardless how we mark inner braces here, we will overwrite the
 364           // BlockKind later if we parse a braced list (where all blocks
 365           // inside are by default braced lists), or when we explicitly detect
 366           // blocks (for example while parsing lambdas).
 367           ProbablyBracedList =
 368               (Style.Language == FormatStyle::LK_JavaScript &&
 369                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
 370                                 Keywords.kw_as)) ||
 371               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 372                                tok::r_paren, tok::r_square, tok::l_brace,
 373                                tok::l_square, tok::l_paren, tok::ellipsis) ||
 374               (NextTok->is(tok::identifier) &&
 375                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
 376               (NextTok->is(tok::semi) &&
 377                (!ExpectClassBody || LBraceStack.size() != 1)) ||
 378               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 379         }
 380         if (ProbablyBracedList) {
 381           Tok->BlockKind = BK_BracedInit;
 382           LBraceStack.back()->BlockKind = BK_BracedInit;
 383         } else {
 384           Tok->BlockKind = BK_Block;
 385           LBraceStack.back()->BlockKind = BK_Block;
 386         }
 387       }
 388       LBraceStack.pop_back();
 389       break;
 390     case tok::at:
 391     case tok::semi:
 392     case tok::kw_if:
 393     case tok::kw_while:
 394     case tok::kw_for:
 395     case tok::kw_switch:
 396     case tok::kw_try:
 397     case tok::kw___try:
 398       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
 399         LBraceStack.back()->BlockKind = BK_Block;
 400       break;
 401     default:
 402       break;
 403     }
 404     PrevTok = Tok;
 405     Tok = NextTok;
 406   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 407
 408   // Assume other blocks for all unclosed opening braces.
 409   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 410     if (LBraceStack[i]->BlockKind == BK_Unknown)
 411       LBraceStack[i]->BlockKind = BK_Block;
 412   }
 413
 414   FormatTok = Tokens->setPosition(StoredPosition);
 415 }
 416
 417 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 418                                      bool MunchSemi) {
 419   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 420          "'{' or macro block token expected");
 421   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 422   FormatTok->BlockKind = BK_Block;
 423
 424   unsigned InitialLevel = Line->Level;
 425   nextToken();
 426
 427   if (MacroBlock && FormatTok->is(tok::l_paren))
 428     parseParens();
 429
 430   addUnwrappedLine();
 431   size_t OpeningLineIndex =
 432       Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
 433
 434   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 435                                           MustBeDeclaration);
 436   if (AddLevel)
 437     ++Line->Level;
 438   parseLevel(/*HasOpeningBrace=*/true);
 439
 440   if (eof())
 441     return;
 442
 443   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 444                  : !FormatTok->is(tok::r_brace)) {
 445     Line->Level = InitialLevel;
 446     FormatTok->BlockKind = BK_Block;
 447     return;
 448   }
 449
 450   nextToken(); // Munch the closing brace.
 451
 452   if (MacroBlock && FormatTok->is(tok::l_paren))
 453     parseParens();
 454
 455   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 456     nextToken();
 457   Line->Level = InitialLevel;
 458   Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
 459 }
 460
 461 static bool isGoogScope(const UnwrappedLine &Line) {
 462   // FIXME: Closure-library specific stuff should not be hard-coded but be
 463   // configurable.
 464   if (Line.Tokens.size() < 4)
 465     return false;
 466   auto I = Line.Tokens.begin();
 467   if (I->Tok->TokenText != "goog")
 468     return false;
 469   ++I;
 470   if (I->Tok->isNot(tok::period))
 471     return false;
 472   ++I;
 473   if (I->Tok->TokenText != "scope")
 474     return false;
 475   ++I;
 476   return I->Tok->is(tok::l_paren);
 477 }
 478
 479 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 480                                    const FormatToken &InitialToken) {
 481   if (InitialToken.is(tok::kw_namespace))
 482     return Style.BraceWrapping.AfterNamespace;
 483   if (InitialToken.is(tok::kw_class))
 484     return Style.BraceWrapping.AfterClass;
 485   if (InitialToken.is(tok::kw_union))
 486     return Style.BraceWrapping.AfterUnion;
 487   if (InitialToken.is(tok::kw_struct))
 488     return Style.BraceWrapping.AfterStruct;
 489   return false;
 490 }
 491
 492 void UnwrappedLineParser::parseChildBlock() {
 493   FormatTok->BlockKind = BK_Block;
 494   nextToken();
 495   {
 496     bool GoogScope =
 497         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
 498     ScopedLineState LineState(*this);
 499     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 500                                             /*MustBeDeclaration=*/false);
 501     Line->Level += GoogScope ? 0 : 1;
 502     parseLevel(/*HasOpeningBrace=*/true);
 503     flushComments(isOnNewLine(*FormatTok));
 504     Line->Level -= GoogScope ? 0 : 1;
 505   }
 506   nextToken();
 507 }
 508
 509 void UnwrappedLineParser::parsePPDirective() {
 510   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 511   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 512   nextToken();
 513
 514   if (!FormatTok->Tok.getIdentifierInfo()) {
 515     parsePPUnknown();
 516     return;
 517   }
 518
 519   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 520   case tok::pp_define:
 521     parsePPDefine();
 522     return;
 523   case tok::pp_if:
 524     parsePPIf(/*IfDef=*/false);
 525     break;
 526   case tok::pp_ifdef:
 527   case tok::pp_ifndef:
 528     parsePPIf(/*IfDef=*/true);
 529     break;
 530   case tok::pp_else:
 531     parsePPElse();
 532     break;
 533   case tok::pp_elif:
 534     parsePPElIf();
 535     break;
 536   case tok::pp_endif:
 537     parsePPEndIf();
 538     break;
 539   default:
 540     parsePPUnknown();
 541     break;
 542   }
 543 }
 544
 545 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 546   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
 547     PPStack.push_back(PP_Unreachable);
 548   else
 549     PPStack.push_back(PP_Conditional);
 550 }
 551
 552 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 553   ++PPBranchLevel;
 554   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 555   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 556     PPLevelBranchIndex.push_back(0);
 557     PPLevelBranchCount.push_back(0);
 558   }
 559   PPChainBranchIndex.push(0);
 560   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 561   conditionalCompilationCondition(Unreachable || Skip);
 562 }
 563
 564 void UnwrappedLineParser::conditionalCompilationAlternative() {
 565   if (!PPStack.empty())
 566     PPStack.pop_back();
 567   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 568   if (!PPChainBranchIndex.empty())
 569     ++PPChainBranchIndex.top();
 570   conditionalCompilationCondition(
 571       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 572       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 573 }
 574
 575 void UnwrappedLineParser::conditionalCompilationEnd() {
 576   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 577   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 578     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 579       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 580     }
 581   }
 582   // Guard against #endif's without #if.
 583   if (PPBranchLevel > 0)
 584     --PPBranchLevel;
 585   if (!PPChainBranchIndex.empty())
 586     PPChainBranchIndex.pop();
 587   if (!PPStack.empty())
 588     PPStack.pop_back();
 589 }
 590
 591 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 592   bool IfNDef = FormatTok->is(tok::pp_ifndef);
 593   nextToken();
 594   bool Unreachable = false;
 595   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
 596     Unreachable = true;
 597   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
 598     Unreachable = true;
 599   conditionalCompilationStart(Unreachable);
 600   parsePPUnknown();
 601 }
 602
 603 void UnwrappedLineParser::parsePPElse() {
 604   conditionalCompilationAlternative();
 605   parsePPUnknown();
 606 }
 607
 608 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 609
 610 void UnwrappedLineParser::parsePPEndIf() {
 611   conditionalCompilationEnd();
 612   parsePPUnknown();
 613 }
 614
 615 void UnwrappedLineParser::parsePPDefine() {
 616   nextToken();
 617
 618   if (FormatTok->Tok.getKind() != tok::identifier) {
 619     parsePPUnknown();
 620     return;
 621   }
 622   nextToken();
 623   if (FormatTok->Tok.getKind() == tok::l_paren &&
 624       FormatTok->WhitespaceRange.getBegin() ==
 625           FormatTok->WhitespaceRange.getEnd()) {
 626     parseParens();
 627   }
 628   addUnwrappedLine();
 629   Line->Level = 1;
 630
 631   // Errors during a preprocessor directive can only affect the layout of the
 632   // preprocessor directive, and thus we ignore them. An alternative approach
 633   // would be to use the same approach we use on the file level (no
 634   // re-indentation if there was a structural error) within the macro
 635   // definition.
 636   parseFile();
 637 }
 638
 639 void UnwrappedLineParser::parsePPUnknown() {
 640   do {
 641     nextToken();
 642   } while (!eof());
 643   addUnwrappedLine();
 644 }
 645
 646 // Here we blacklist certain tokens that are not usually the first token in an
 647 // unwrapped line. This is used in attempt to distinguish macro calls without
 648 // trailing semicolons from other constructs split to several lines.
 649 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 650   // Semicolon can be a null-statement, l_square can be a start of a macro or
 651   // a C++11 attribute, but this doesn't seem to be common.
 652   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 653          Tok.isNot(tok::l_square) &&
 654          // Tokens that can only be used as binary operators and a part of
 655          // overloaded operator names.
 656          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 657          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 658          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 659          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 660          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 661          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 662          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 663          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 664          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 665          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 666          Tok.isNot(tok::lesslessequal) &&
 667          // Colon is used in labels, base class lists, initializer lists,
 668          // range-based for loops, ternary operator, but should never be the
 669          // first token in an unwrapped line.
 670          Tok.isNot(tok::colon) &&
 671          // 'noexcept' is a trailing annotation.
 672          Tok.isNot(tok::kw_noexcept);
 673 }
 674
 675 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
 676                           const FormatToken *FormatTok) {
 677   // FIXME: This returns true for C/C++ keywords like 'struct'.
 678   return FormatTok->is(tok::identifier) &&
 679          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
 680           !FormatTok->isOneOf(
 681               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
 682               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
 683               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
 684               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
 685               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
 686               Keywords.kw_instanceof, Keywords.kw_interface,
 687               Keywords.kw_throws));
 688 }
 689
 690 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
 691                                  const FormatToken *FormatTok) {
 692   return FormatTok->Tok.isLiteral() ||
 693          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
 694          mustBeJSIdent(Keywords, FormatTok);
 695 }
 696
 697 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
 698 // when encountered after a value (see mustBeJSIdentOrValue).
 699 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
 700                            const FormatToken *FormatTok) {
 701   return FormatTok->isOneOf(
 702       tok::kw_return, Keywords.kw_yield,
 703       // conditionals
 704       tok::kw_if, tok::kw_else,
 705       // loops
 706       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
 707       // switch/case
 708       tok::kw_switch, tok::kw_case,
 709       // exceptions
 710       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
 711       // declaration
 712       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
 713       Keywords.kw_async, Keywords.kw_function,
 714       // import/export
 715       Keywords.kw_import, tok::kw_export);
 716 }
 717
 718 // readTokenWithJavaScriptASI reads the next token and terminates the current
 719 // line if JavaScript Automatic Semicolon Insertion must
 720 // happen between the current token and the next token.
 721 //
 722 // This method is conservative - it cannot cover all edge cases of JavaScript,
 723 // but only aims to correctly handle certain well known cases. It *must not*
 724 // return true in speculative cases.
 725 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
 726   FormatToken *Previous = FormatTok;
 727   readToken();
 728   FormatToken *Next = FormatTok;
 729
 730   bool IsOnSameLine =
 731       CommentsBeforeNextToken.empty()
 732           ? Next->NewlinesBefore == 0
 733           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
 734   if (IsOnSameLine)
 735     return;
 736
 737   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
 738   bool PreviousStartsTemplateExpr =
 739       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
 740   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
 741     // If the token before the previous one is an '@', the previous token is an
 742     // annotation and can precede another identifier/value.
 743     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
 744     if (PrePrevious->is(tok::at))
 745       return;
 746   }
 747   if (Next->is(tok::exclaim) && PreviousMustBeValue)
 748     return addUnwrappedLine();
 749   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
 750   bool NextEndsTemplateExpr =
 751       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
 752   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
 753       (PreviousMustBeValue ||
 754        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
 755                          tok::minusminus)))
 756     return addUnwrappedLine();
 757   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
 758     return addUnwrappedLine();
 759 }
 760
 761 void UnwrappedLineParser::parseStructuralElement() {
 762   assert(!FormatTok->is(tok::l_brace));
 763   if (Style.Language == FormatStyle::LK_TableGen &&
 764       FormatTok->is(tok::pp_include)) {
 765     nextToken();
 766     if (FormatTok->is(tok::string_literal))
 767       nextToken();
 768     addUnwrappedLine();
 769     return;
 770   }
 771   switch (FormatTok->Tok.getKind()) {
 772   case tok::at:
 773     nextToken();
 774     if (FormatTok->Tok.is(tok::l_brace)) {
 775       parseBracedList();
 776       break;
 777     }
 778     switch (FormatTok->Tok.getObjCKeywordID()) {
 779     case tok::objc_public:
 780     case tok::objc_protected:
 781     case tok::objc_package:
 782     case tok::objc_private:
 783       return parseAccessSpecifier();
 784     case tok::objc_interface:
 785     case tok::objc_implementation:
 786       return parseObjCInterfaceOrImplementation();
 787     case tok::objc_protocol:
 788       return parseObjCProtocol();
 789     case tok::objc_end:
 790       return; // Handled by the caller.
 791     case tok::objc_optional:
 792     case tok::objc_required:
 793       nextToken();
 794       addUnwrappedLine();
 795       return;
 796     case tok::objc_autoreleasepool:
 797       nextToken();
 798       if (FormatTok->Tok.is(tok::l_brace)) {
 799         if (Style.BraceWrapping.AfterObjCDeclaration)
 800           addUnwrappedLine();
 801         parseBlock(/*MustBeDeclaration=*/false);
 802       }
 803       addUnwrappedLine();
 804       return;
 805     case tok::objc_try:
 806       // This branch isn't strictly necessary (the kw_try case below would
 807       // do this too after the tok::at is parsed above).  But be explicit.
 808       parseTryCatch();
 809       return;
 810     default:
 811       break;
 812     }
 813     break;
 814   case tok::kw_asm:
 815     nextToken();
 816     if (FormatTok->is(tok::l_brace)) {
 817       FormatTok->Type = TT_InlineASMBrace;
 818       nextToken();
 819       while (FormatTok && FormatTok->isNot(tok::eof)) {
 820         if (FormatTok->is(tok::r_brace)) {
 821           FormatTok->Type = TT_InlineASMBrace;
 822           nextToken();
 823           addUnwrappedLine();
 824           break;
 825         }
 826         FormatTok->Finalized = true;
 827         nextToken();
 828       }
 829     }
 830     break;
 831   case tok::kw_namespace:
 832     parseNamespace();
 833     return;
 834   case tok::kw_inline:
 835     nextToken();
 836     if (FormatTok->Tok.is(tok::kw_namespace)) {
 837       parseNamespace();
 838       return;
 839     }
 840     break;
 841   case tok::kw_public:
 842   case tok::kw_protected:
 843   case tok::kw_private:
 844     if (Style.Language == FormatStyle::LK_Java ||
 845         Style.Language == FormatStyle::LK_JavaScript)
 846       nextToken();
 847     else
 848       parseAccessSpecifier();
 849     return;
 850   case tok::kw_if:
 851     parseIfThenElse();
 852     return;
 853   case tok::kw_for:
 854   case tok::kw_while:
 855     parseForOrWhileLoop();
 856     return;
 857   case tok::kw_do:
 858     parseDoWhile();
 859     return;
 860   case tok::kw_switch:
 861     parseSwitch();
 862     return;
 863   case tok::kw_default:
 864     nextToken();
 865     parseLabel();
 866     return;
 867   case tok::kw_case:
 868     parseCaseLabel();
 869     return;
 870   case tok::kw_try:
 871   case tok::kw___try:
 872     parseTryCatch();
 873     return;
 874   case tok::kw_extern:
 875     nextToken();
 876     if (FormatTok->Tok.is(tok::string_literal)) {
 877       nextToken();
 878       if (FormatTok->Tok.is(tok::l_brace)) {
 879         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
 880         addUnwrappedLine();
 881         return;
 882       }
 883     }
 884     break;
 885   case tok::kw_export:
 886     if (Style.Language == FormatStyle::LK_JavaScript) {
 887       parseJavaScriptEs6ImportExport();
 888       return;
 889     }
 890     break;
 891   case tok::identifier:
 892     if (FormatTok->is(TT_ForEachMacro)) {
 893       parseForOrWhileLoop();
 894       return;
 895     }
 896     if (FormatTok->is(TT_MacroBlockBegin)) {
 897       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
 898                  /*MunchSemi=*/false);
 899       return;
 900     }
 901     if (FormatTok->is(Keywords.kw_import)) {
 902       if (Style.Language == FormatStyle::LK_JavaScript) {
 903         parseJavaScriptEs6ImportExport();
 904         return;
 905       }
 906       if (Style.Language == FormatStyle::LK_Proto) {
 907         nextToken();
 908         if (FormatTok->is(tok::kw_public))
 909           nextToken();
 910         if (!FormatTok->is(tok::string_literal))
 911           return;
 912         nextToken();
 913         if (FormatTok->is(tok::semi))
 914           nextToken();
 915         addUnwrappedLine();
 916         return;
 917       }
 918     }
 919     if (Style.isCpp() &&
 920         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
 921                            Keywords.kw_slots, Keywords.kw_qslots)) {
 922       nextToken();
 923       if (FormatTok->is(tok::colon)) {
 924         nextToken();
 925         addUnwrappedLine();
 926         return;
 927       }
 928     }
 929     // In all other cases, parse the declaration.
 930     break;
 931   default:
 932     break;
 933   }
 934   do {
 935     const FormatToken *Previous = getPreviousToken();
 936     switch (FormatTok->Tok.getKind()) {
 937     case tok::at:
 938       nextToken();
 939       if (FormatTok->Tok.is(tok::l_brace))
 940         parseBracedList();
 941       break;
 942     case tok::kw_enum:
 943       // Ignore if this is part of "template <enum ...".
 944       if (Previous && Previous->is(tok::less)) {
 945         nextToken();
 946         break;
 947       }
 948
 949       // parseEnum falls through and does not yet add an unwrapped line as an
 950       // enum definition can start a structural element.
 951       if (!parseEnum())
 952         break;
 953       // This only applies for C++.
 954       if (!Style.isCpp()) {
 955         addUnwrappedLine();
 956         return;
 957       }
 958       break;
 959     case tok::kw_typedef:
 960       nextToken();
 961       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
 962                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
 963         parseEnum();
 964       break;
 965     case tok::kw_struct:
 966     case tok::kw_union:
 967     case tok::kw_class:
 968       // parseRecord falls through and does not yet add an unwrapped line as a
 969       // record declaration or definition can start a structural element.
 970       parseRecord();
 971       // This does not apply for Java and JavaScript.
 972       if (Style.Language == FormatStyle::LK_Java ||
 973           Style.Language == FormatStyle::LK_JavaScript) {
 974         if (FormatTok->is(tok::semi))
 975           nextToken();
 976         addUnwrappedLine();
 977         return;
 978       }
 979       break;
 980     case tok::period:
 981       nextToken();
 982       // In Java, classes have an implicit static member "class".
 983       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
 984           FormatTok->is(tok::kw_class))
 985         nextToken();
 986       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
 987           FormatTok->Tok.getIdentifierInfo())
 988         // JavaScript only has pseudo keywords, all keywords are allowed to
 989         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
 990         nextToken();
 991       break;
 992     case tok::semi:
 993       nextToken();
 994       addUnwrappedLine();
 995       return;
 996     case tok::r_brace:
 997       addUnwrappedLine();
 998       return;
 999     case tok::l_paren:
1000       parseParens();
1001       break;
1002     case tok::kw_operator:
1003       nextToken();
1004       if (FormatTok->isBinaryOperator())
1005         nextToken();
1006       break;
1007     case tok::caret:
1008       nextToken();
1009       if (FormatTok->Tok.isAnyIdentifier() ||
1010           FormatTok->isSimpleTypeSpecifier())
1011         nextToken();
1012       if (FormatTok->is(tok::l_paren))
1013         parseParens();
1014       if (FormatTok->is(tok::l_brace))
1015         parseChildBlock();
1016       break;
1017     case tok::l_brace:
1018       if (!tryToParseBracedList()) {
1019         // A block outside of parentheses must be the last part of a
1020         // structural element.
1021         // FIXME: Figure out cases where this is not true, and add projections
1022         // for them (the one we know is missing are lambdas).
1023         if (Style.BraceWrapping.AfterFunction)
1024           addUnwrappedLine();
1025         FormatTok->Type = TT_FunctionLBrace;
1026         parseBlock(/*MustBeDeclaration=*/false);
1027         addUnwrappedLine();
1028         return;
1029       }
1030       // Otherwise this was a braced init list, and the structural
1031       // element continues.
1032       break;
1033     case tok::kw_try:
1034       // We arrive here when parsing function-try blocks.
1035       parseTryCatch();
1036       return;
1037     case tok::identifier: {
1038       if (FormatTok->is(TT_MacroBlockEnd)) {
1039         addUnwrappedLine();
1040         return;
1041       }
1042
1043       // Parse function literal unless 'function' is the first token in a line
1044       // in which case this should be treated as a free-standing function.
1045       if (Style.Language == FormatStyle::LK_JavaScript &&
1046           (FormatTok->is(Keywords.kw_function) ||
1047            FormatTok->startsSequence(Keywords.kw_async,
1048                                      Keywords.kw_function)) &&
1049           Line->Tokens.size() > 0) {
1050         tryToParseJSFunction();
1051         break;
1052       }
1053       if ((Style.Language == FormatStyle::LK_JavaScript ||
1054            Style.Language == FormatStyle::LK_Java) &&
1055           FormatTok->is(Keywords.kw_interface)) {
1056         if (Style.Language == FormatStyle::LK_JavaScript) {
1057           // In JavaScript/TypeScript, "interface" can be used as a standalone
1058           // identifier, e.g. in `var interface = 1;`. If "interface" is
1059           // followed by another identifier, it is very like to be an actual
1060           // interface declaration.
1061           unsigned StoredPosition = Tokens->getPosition();
1062           FormatToken *Next = Tokens->getNextToken();
1063           FormatTok = Tokens->setPosition(StoredPosition);
1064           if (Next && !mustBeJSIdent(Keywords, Next)) {
1065             nextToken();
1066             break;
1067           }
1068         }
1069         parseRecord();
1070         addUnwrappedLine();
1071         return;
1072       }
1073
1074       // See if the following token should start a new unwrapped line.
1075       StringRef Text = FormatTok->TokenText;
1076       nextToken();
1077       if (Line->Tokens.size() == 1 &&
1078           // JS doesn't have macros, and within classes colons indicate fields,
1079           // not labels.
1080           Style.Language != FormatStyle::LK_JavaScript) {
1081         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1082           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1083           parseLabel();
1084           return;
1085         }
1086         // Recognize function-like macro usages without trailing semicolon as
1087         // well as free-standing macros like Q_OBJECT.
1088         bool FunctionLike = FormatTok->is(tok::l_paren);
1089         if (FunctionLike)
1090           parseParens();
1091
1092         bool FollowedByNewline =
1093             CommentsBeforeNextToken.empty()
1094                 ? FormatTok->NewlinesBefore > 0
1095                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1096
1097         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1098             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1099           addUnwrappedLine();
1100           return;
1101         }
1102       }
1103       break;
1104     }
1105     case tok::equal:
1106       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1107       // TT_JsFatArrow. The always start an expression or a child block if
1108       // followed by a curly.
1109       if (FormatTok->is(TT_JsFatArrow)) {
1110         nextToken();
1111         if (FormatTok->is(tok::l_brace))
1112           parseChildBlock();
1113         break;
1114       }
1115
1116       nextToken();
1117       if (FormatTok->Tok.is(tok::l_brace)) {
1118         parseBracedList();
1119       }
1120       break;
1121     case tok::l_square:
1122       parseSquare();
1123       break;
1124     case tok::kw_new:
1125       parseNew();
1126       break;
1127     default:
1128       nextToken();
1129       break;
1130     }
1131   } while (!eof());
1132 }
1133
1134 bool UnwrappedLineParser::tryToParseLambda() {
1135   if (!Style.isCpp()) {
1136     nextToken();
1137     return false;
1138   }
1139   const FormatToken* Previous = getPreviousToken();
1140   if (Previous &&
1141       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1142                          tok::kw_delete) ||
1143        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1144     nextToken();
1145     return false;
1146   }
1147   assert(FormatTok->is(tok::l_square));
1148   FormatToken &LSquare = *FormatTok;
1149   if (!tryToParseLambdaIntroducer())
1150     return false;
1151
1152   while (FormatTok->isNot(tok::l_brace)) {
1153     if (FormatTok->isSimpleTypeSpecifier()) {
1154       nextToken();
1155       continue;
1156     }
1157     switch (FormatTok->Tok.getKind()) {
1158     case tok::l_brace:
1159       break;
1160     case tok::l_paren:
1161       parseParens();
1162       break;
1163     case tok::amp:
1164     case tok::star:
1165     case tok::kw_const:
1166     case tok::comma:
1167     case tok::less:
1168     case tok::greater:
1169     case tok::identifier:
1170     case tok::numeric_constant:
1171     case tok::coloncolon:
1172     case tok::kw_mutable:
1173       nextToken();
1174       break;
1175     case tok::arrow:
1176       FormatTok->Type = TT_LambdaArrow;
1177       nextToken();
1178       break;
1179     default:
1180       return true;
1181     }
1182   }
1183   LSquare.Type = TT_LambdaLSquare;
1184   parseChildBlock();
1185   return true;
1186 }
1187
1188 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1189   nextToken();
1190   if (FormatTok->is(tok::equal)) {
1191     nextToken();
1192     if (FormatTok->is(tok::r_square)) {
1193       nextToken();
1194       return true;
1195     }
1196     if (FormatTok->isNot(tok::comma))
1197       return false;
1198     nextToken();
1199   } else if (FormatTok->is(tok::amp)) {
1200     nextToken();
1201     if (FormatTok->is(tok::r_square)) {
1202       nextToken();
1203       return true;
1204     }
1205     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1206       return false;
1207     }
1208     if (FormatTok->is(tok::comma))
1209       nextToken();
1210   } else if (FormatTok->is(tok::r_square)) {
1211     nextToken();
1212     return true;
1213   }
1214   do {
1215     if (FormatTok->is(tok::amp))
1216       nextToken();
1217     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1218       return false;
1219     nextToken();
1220     if (FormatTok->is(tok::ellipsis))
1221       nextToken();
1222     if (FormatTok->is(tok::comma)) {
1223       nextToken();
1224     } else if (FormatTok->is(tok::r_square)) {
1225       nextToken();
1226       return true;
1227     } else {
1228       return false;
1229     }
1230   } while (!eof());
1231   return false;
1232 }
1233
1234 void UnwrappedLineParser::tryToParseJSFunction() {
1235   assert(FormatTok->is(Keywords.kw_function) ||
1236          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1237   if (FormatTok->is(Keywords.kw_async))
1238     nextToken();
1239   // Consume "function".
1240   nextToken();
1241
1242   // Consume * (generator function). Treat it like C++'s overloaded operators.
1243   if (FormatTok->is(tok::star)) {
1244     FormatTok->Type = TT_OverloadedOperator;
1245     nextToken();
1246   }
1247
1248   // Consume function name.
1249   if (FormatTok->is(tok::identifier))
1250     nextToken();
1251
1252   if (FormatTok->isNot(tok::l_paren))
1253     return;
1254
1255   // Parse formal parameter list.
1256   parseParens();
1257
1258   if (FormatTok->is(tok::colon)) {
1259     // Parse a type definition.
1260     nextToken();
1261
1262     // Eat the type declaration. For braced inline object types, balance braces,
1263     // otherwise just parse until finding an l_brace for the function body.
1264     if (FormatTok->is(tok::l_brace))
1265       tryToParseBracedList();
1266     else
1267       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1268         nextToken();
1269   }
1270
1271   if (FormatTok->is(tok::semi))
1272     return;
1273
1274   parseChildBlock();
1275 }
1276
1277 bool UnwrappedLineParser::tryToParseBracedList() {
1278   if (FormatTok->BlockKind == BK_Unknown)
1279     calculateBraceTypes();
1280   assert(FormatTok->BlockKind != BK_Unknown);
1281   if (FormatTok->BlockKind == BK_Block)
1282     return false;
1283   parseBracedList();
1284   return true;
1285 }
1286
1287 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1288   bool HasError = false;
1289   nextToken();
1290
1291   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1292   // replace this by using parseAssigmentExpression() inside.
1293   do {
1294     if (Style.Language == FormatStyle::LK_JavaScript) {
1295       if (FormatTok->is(Keywords.kw_function) ||
1296           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1297         tryToParseJSFunction();
1298         continue;
1299       }
1300       if (FormatTok->is(TT_JsFatArrow)) {
1301         nextToken();
1302         // Fat arrows can be followed by simple expressions or by child blocks
1303         // in curly braces.
1304         if (FormatTok->is(tok::l_brace)) {
1305           parseChildBlock();
1306           continue;
1307         }
1308       }
1309       if (FormatTok->is(tok::l_brace)) {
1310         // Could be a method inside of a braced list `{a() { return 1; }}`.
1311         if (tryToParseBracedList())
1312           continue;
1313         parseChildBlock();
1314       }
1315     }
1316     switch (FormatTok->Tok.getKind()) {
1317     case tok::caret:
1318       nextToken();
1319       if (FormatTok->is(tok::l_brace)) {
1320         parseChildBlock();
1321       }
1322       break;
1323     case tok::l_square:
1324       tryToParseLambda();
1325       break;
1326     case tok::l_paren:
1327       parseParens();
1328       // JavaScript can just have free standing methods and getters/setters in
1329       // object literals. Detect them by a "{" following ")".
1330       if (Style.Language == FormatStyle::LK_JavaScript) {
1331         if (FormatTok->is(tok::l_brace))
1332           parseChildBlock();
1333         break;
1334       }
1335       break;
1336     case tok::l_brace:
1337       // Assume there are no blocks inside a braced init list apart
1338       // from the ones we explicitly parse out (like lambdas).
1339       FormatTok->BlockKind = BK_BracedInit;
1340       parseBracedList();
1341       break;
1342     case tok::r_brace:
1343       nextToken();
1344       return !HasError;
1345     case tok::semi:
1346       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1347       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1348       // used for error recovery if we have otherwise determined that this is
1349       // a braced list.
1350       if (Style.Language == FormatStyle::LK_JavaScript) {
1351         nextToken();
1352         break;
1353       }
1354       HasError = true;
1355       if (!ContinueOnSemicolons)
1356         return !HasError;
1357       nextToken();
1358       break;
1359     case tok::comma:
1360       nextToken();
1361       break;
1362     default:
1363       nextToken();
1364       break;
1365     }
1366   } while (!eof());
1367   return false;
1368 }
1369
1370 void UnwrappedLineParser::parseParens() {
1371   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1372   nextToken();
1373   do {
1374     switch (FormatTok->Tok.getKind()) {
1375     case tok::l_paren:
1376       parseParens();
1377       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1378         parseChildBlock();
1379       break;
1380     case tok::r_paren:
1381       nextToken();
1382       return;
1383     case tok::r_brace:
1384       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1385       return;
1386     case tok::l_square:
1387       tryToParseLambda();
1388       break;
1389     case tok::l_brace:
1390       if (!tryToParseBracedList())
1391         parseChildBlock();
1392       break;
1393     case tok::at:
1394       nextToken();
1395       if (FormatTok->Tok.is(tok::l_brace))
1396         parseBracedList();
1397       break;
1398     case tok::kw_class:
1399       if (Style.Language == FormatStyle::LK_JavaScript)
1400         parseRecord(/*ParseAsExpr=*/true);
1401       else
1402         nextToken();
1403       break;
1404     case tok::identifier:
1405       if (Style.Language == FormatStyle::LK_JavaScript &&
1406           (FormatTok->is(Keywords.kw_function) ||
1407            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1408         tryToParseJSFunction();
1409       else
1410         nextToken();
1411       break;
1412     default:
1413       nextToken();
1414       break;
1415     }
1416   } while (!eof());
1417 }
1418
1419 void UnwrappedLineParser::parseSquare() {
1420   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1421   if (tryToParseLambda())
1422     return;
1423   do {
1424     switch (FormatTok->Tok.getKind()) {
1425     case tok::l_paren:
1426       parseParens();
1427       break;
1428     case tok::r_square:
1429       nextToken();
1430       return;
1431     case tok::r_brace:
1432       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1433       return;
1434     case tok::l_square:
1435       parseSquare();
1436       break;
1437     case tok::l_brace: {
1438       if (!tryToParseBracedList())
1439         parseChildBlock();
1440       break;
1441     }
1442     case tok::at:
1443       nextToken();
1444       if (FormatTok->Tok.is(tok::l_brace))
1445         parseBracedList();
1446       break;
1447     default:
1448       nextToken();
1449       break;
1450     }
1451   } while (!eof());
1452 }
1453
1454 void UnwrappedLineParser::parseIfThenElse() {
1455   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1456   nextToken();
1457   if (FormatTok->Tok.is(tok::l_paren))
1458     parseParens();
1459   bool NeedsUnwrappedLine = false;
1460   if (FormatTok->Tok.is(tok::l_brace)) {
1461     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1462     parseBlock(/*MustBeDeclaration=*/false);
1463     if (Style.BraceWrapping.BeforeElse)
1464       addUnwrappedLine();
1465     else
1466       NeedsUnwrappedLine = true;
1467   } else {
1468     addUnwrappedLine();
1469     ++Line->Level;
1470     parseStructuralElement();
1471     --Line->Level;
1472   }
1473   if (FormatTok->Tok.is(tok::kw_else)) {
1474     nextToken();
1475     if (FormatTok->Tok.is(tok::l_brace)) {
1476       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1477       parseBlock(/*MustBeDeclaration=*/false);
1478       addUnwrappedLine();
1479     } else if (FormatTok->Tok.is(tok::kw_if)) {
1480       parseIfThenElse();
1481     } else {
1482       addUnwrappedLine();
1483       ++Line->Level;
1484       parseStructuralElement();
1485       if (FormatTok->is(tok::eof))
1486         addUnwrappedLine();
1487       --Line->Level;
1488     }
1489   } else if (NeedsUnwrappedLine) {
1490     addUnwrappedLine();
1491   }
1492 }
1493
1494 void UnwrappedLineParser::parseTryCatch() {
1495   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1496   nextToken();
1497   bool NeedsUnwrappedLine = false;
1498   if (FormatTok->is(tok::colon)) {
1499     // We are in a function try block, what comes is an initializer list.
1500     nextToken();
1501     while (FormatTok->is(tok::identifier)) {
1502       nextToken();
1503       if (FormatTok->is(tok::l_paren))
1504         parseParens();
1505       if (FormatTok->is(tok::comma))
1506         nextToken();
1507     }
1508   }
1509   // Parse try with resource.
1510   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1511     parseParens();
1512   }
1513   if (FormatTok->is(tok::l_brace)) {
1514     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1515     parseBlock(/*MustBeDeclaration=*/false);
1516     if (Style.BraceWrapping.BeforeCatch) {
1517       addUnwrappedLine();
1518     } else {
1519       NeedsUnwrappedLine = true;
1520     }
1521   } else if (!FormatTok->is(tok::kw_catch)) {
1522     // The C++ standard requires a compound-statement after a try.
1523     // If there's none, we try to assume there's a structuralElement
1524     // and try to continue.
1525     addUnwrappedLine();
1526     ++Line->Level;
1527     parseStructuralElement();
1528     --Line->Level;
1529   }
1530   while (1) {
1531     if (FormatTok->is(tok::at))
1532       nextToken();
1533     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1534                              tok::kw___finally) ||
1535           ((Style.Language == FormatStyle::LK_Java ||
1536             Style.Language == FormatStyle::LK_JavaScript) &&
1537            FormatTok->is(Keywords.kw_finally)) ||
1538           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1539            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1540       break;
1541     nextToken();
1542     while (FormatTok->isNot(tok::l_brace)) {
1543       if (FormatTok->is(tok::l_paren)) {
1544         parseParens();
1545         continue;
1546       }
1547       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1548         return;
1549       nextToken();
1550     }
1551     NeedsUnwrappedLine = false;
1552     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1553     parseBlock(/*MustBeDeclaration=*/false);
1554     if (Style.BraceWrapping.BeforeCatch)
1555       addUnwrappedLine();
1556     else
1557       NeedsUnwrappedLine = true;
1558   }
1559   if (NeedsUnwrappedLine)
1560     addUnwrappedLine();
1561 }
1562
1563 void UnwrappedLineParser::parseNamespace() {
1564   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1565
1566   const FormatToken &InitialToken = *FormatTok;
1567   nextToken();
1568   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1569     nextToken();
1570   if (FormatTok->Tok.is(tok::l_brace)) {
1571     if (ShouldBreakBeforeBrace(Style, InitialToken))
1572       addUnwrappedLine();
1573
1574     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1575                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1576                      DeclarationScopeStack.size() > 1);
1577     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1578     // Munch the semicolon after a namespace. This is more common than one would
1579     // think. Puttin the semicolon into its own line is very ugly.
1580     if (FormatTok->Tok.is(tok::semi))
1581       nextToken();
1582     addUnwrappedLine();
1583   }
1584   // FIXME: Add error handling.
1585 }
1586
1587 void UnwrappedLineParser::parseNew() {
1588   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1589   nextToken();
1590   if (Style.Language != FormatStyle::LK_Java)
1591     return;
1592
1593   // In Java, we can parse everything up to the parens, which aren't optional.
1594   do {
1595     // There should not be a ;, { or } before the new's open paren.
1596     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1597       return;
1598
1599     // Consume the parens.
1600     if (FormatTok->is(tok::l_paren)) {
1601       parseParens();
1602
1603       // If there is a class body of an anonymous class, consume that as child.
1604       if (FormatTok->is(tok::l_brace))
1605         parseChildBlock();
1606       return;
1607     }
1608     nextToken();
1609   } while (!eof());
1610 }
1611
1612 void UnwrappedLineParser::parseForOrWhileLoop() {
1613   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1614          "'for', 'while' or foreach macro expected");
1615   nextToken();
1616   if (FormatTok->Tok.is(tok::l_paren))
1617     parseParens();
1618   if (FormatTok->Tok.is(tok::l_brace)) {
1619     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1620     parseBlock(/*MustBeDeclaration=*/false);
1621     addUnwrappedLine();
1622   } else {
1623     addUnwrappedLine();
1624     ++Line->Level;
1625     parseStructuralElement();
1626     --Line->Level;
1627   }
1628 }
1629
1630 void UnwrappedLineParser::parseDoWhile() {
1631   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1632   nextToken();
1633   if (FormatTok->Tok.is(tok::l_brace)) {
1634     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1635     parseBlock(/*MustBeDeclaration=*/false);
1636     if (Style.BraceWrapping.IndentBraces)
1637       addUnwrappedLine();
1638   } else {
1639     addUnwrappedLine();
1640     ++Line->Level;
1641     parseStructuralElement();
1642     --Line->Level;
1643   }
1644
1645   // FIXME: Add error handling.
1646   if (!FormatTok->Tok.is(tok::kw_while)) {
1647     addUnwrappedLine();
1648     return;
1649   }
1650
1651   nextToken();
1652   parseStructuralElement();
1653 }
1654
1655 void UnwrappedLineParser::parseLabel() {
1656   nextToken();
1657   unsigned OldLineLevel = Line->Level;
1658   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1659     --Line->Level;
1660   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1661     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1662     parseBlock(/*MustBeDeclaration=*/false);
1663     if (FormatTok->Tok.is(tok::kw_break)) {
1664       if (Style.BraceWrapping.AfterControlStatement)
1665         addUnwrappedLine();
1666       parseStructuralElement();
1667     }
1668     addUnwrappedLine();
1669   } else {
1670     if (FormatTok->is(tok::semi))
1671       nextToken();
1672     addUnwrappedLine();
1673   }
1674   Line->Level = OldLineLevel;
1675   if (FormatTok->isNot(tok::l_brace)) {
1676     parseStructuralElement();
1677     addUnwrappedLine();
1678   }
1679 }
1680
1681 void UnwrappedLineParser::parseCaseLabel() {
1682   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1683   // FIXME: fix handling of complex expressions here.
1684   do {
1685     nextToken();
1686   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1687   parseLabel();
1688 }
1689
1690 void UnwrappedLineParser::parseSwitch() {
1691   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1692   nextToken();
1693   if (FormatTok->Tok.is(tok::l_paren))
1694     parseParens();
1695   if (FormatTok->Tok.is(tok::l_brace)) {
1696     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1697     parseBlock(/*MustBeDeclaration=*/false);
1698     addUnwrappedLine();
1699   } else {
1700     addUnwrappedLine();
1701     ++Line->Level;
1702     parseStructuralElement();
1703     --Line->Level;
1704   }
1705 }
1706
1707 void UnwrappedLineParser::parseAccessSpecifier() {
1708   nextToken();
1709   // Understand Qt's slots.
1710   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1711     nextToken();
1712   // Otherwise, we don't know what it is, and we'd better keep the next token.
1713   if (FormatTok->Tok.is(tok::colon))
1714     nextToken();
1715   addUnwrappedLine();
1716 }
1717
1718 bool UnwrappedLineParser::parseEnum() {
1719   // Won't be 'enum' for NS_ENUMs.
1720   if (FormatTok->Tok.is(tok::kw_enum))
1721     nextToken();
1722
1723   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1724   // declarations. An "enum" keyword followed by a colon would be a syntax
1725   // error and thus assume it is just an identifier.
1726   if (Style.Language == FormatStyle::LK_JavaScript &&
1727       FormatTok->isOneOf(tok::colon, tok::question))
1728     return false;
1729
1730   // Eat up enum class ...
1731   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1732     nextToken();
1733
1734   while (FormatTok->Tok.getIdentifierInfo() ||
1735          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1736                             tok::greater, tok::comma, tok::question)) {
1737     nextToken();
1738     // We can have macros or attributes in between 'enum' and the enum name.
1739     if (FormatTok->is(tok::l_paren))
1740       parseParens();
1741     if (FormatTok->is(tok::identifier)) {
1742       nextToken();
1743       // If there are two identifiers in a row, this is likely an elaborate
1744       // return type. In Java, this can be "implements", etc.
1745       if (Style.isCpp() && FormatTok->is(tok::identifier))
1746         return false;
1747     }
1748   }
1749
1750   // Just a declaration or something is wrong.
1751   if (FormatTok->isNot(tok::l_brace))
1752     return true;
1753   FormatTok->BlockKind = BK_Block;
1754
1755   if (Style.Language == FormatStyle::LK_Java) {
1756     // Java enums are different.
1757     parseJavaEnumBody();
1758     return true;
1759   }
1760   if (Style.Language == FormatStyle::LK_Proto) {
1761     parseBlock(/*MustBeDeclaration=*/true);
1762     return true;
1763   }
1764
1765   // Parse enum body.
1766   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1767   if (HasError) {
1768     if (FormatTok->is(tok::semi))
1769       nextToken();
1770     addUnwrappedLine();
1771   }
1772   return true;
1773
1774   // There is no addUnwrappedLine() here so that we fall through to parsing a
1775   // structural element afterwards. Thus, in "enum A {} n, m;",
1776   // "} n, m;" will end up in one unwrapped line.
1777 }
1778
1779 void UnwrappedLineParser::parseJavaEnumBody() {
1780   // Determine whether the enum is simple, i.e. does not have a semicolon or
1781   // constants with class bodies. Simple enums can be formatted like braced
1782   // lists, contracted to a single line, etc.
1783   unsigned StoredPosition = Tokens->getPosition();
1784   bool IsSimple = true;
1785   FormatToken *Tok = Tokens->getNextToken();
1786   while (Tok) {
1787     if (Tok->is(tok::r_brace))
1788       break;
1789     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1790       IsSimple = false;
1791       break;
1792     }
1793     // FIXME: This will also mark enums with braces in the arguments to enum
1794     // constants as "not simple". This is probably fine in practice, though.
1795     Tok = Tokens->getNextToken();
1796   }
1797   FormatTok = Tokens->setPosition(StoredPosition);
1798
1799   if (IsSimple) {
1800     parseBracedList();
1801     addUnwrappedLine();
1802     return;
1803   }
1804
1805   // Parse the body of a more complex enum.
1806   // First add a line for everything up to the "{".
1807   nextToken();
1808   addUnwrappedLine();
1809   ++Line->Level;
1810
1811   // Parse the enum constants.
1812   while (FormatTok) {
1813     if (FormatTok->is(tok::l_brace)) {
1814       // Parse the constant's class body.
1815       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1816                  /*MunchSemi=*/false);
1817     } else if (FormatTok->is(tok::l_paren)) {
1818       parseParens();
1819     } else if (FormatTok->is(tok::comma)) {
1820       nextToken();
1821       addUnwrappedLine();
1822     } else if (FormatTok->is(tok::semi)) {
1823       nextToken();
1824       addUnwrappedLine();
1825       break;
1826     } else if (FormatTok->is(tok::r_brace)) {
1827       addUnwrappedLine();
1828       break;
1829     } else {
1830       nextToken();
1831     }
1832   }
1833
1834   // Parse the class body after the enum's ";" if any.
1835   parseLevel(/*HasOpeningBrace=*/true);
1836   nextToken();
1837   --Line->Level;
1838   addUnwrappedLine();
1839 }
1840
1841 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1842   const FormatToken &InitialToken = *FormatTok;
1843   nextToken();
1844
1845   // The actual identifier can be a nested name specifier, and in macros
1846   // it is often token-pasted.
1847   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1848                             tok::kw___attribute, tok::kw___declspec,
1849                             tok::kw_alignas) ||
1850          ((Style.Language == FormatStyle::LK_Java ||
1851            Style.Language == FormatStyle::LK_JavaScript) &&
1852           FormatTok->isOneOf(tok::period, tok::comma))) {
1853     bool IsNonMacroIdentifier =
1854         FormatTok->is(tok::identifier) &&
1855         FormatTok->TokenText != FormatTok->TokenText.upper();
1856     nextToken();
1857     // We can have macros or attributes in between 'class' and the class name.
1858     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1859       parseParens();
1860   }
1861
1862   // Note that parsing away template declarations here leads to incorrectly
1863   // accepting function declarations as record declarations.
1864   // In general, we cannot solve this problem. Consider:
1865   // class A<int> B() {}
1866   // which can be a function definition or a class definition when B() is a
1867   // macro. If we find enough real-world cases where this is a problem, we
1868   // can parse for the 'template' keyword in the beginning of the statement,
1869   // and thus rule out the record production in case there is no template
1870   // (this would still leave us with an ambiguity between template function
1871   // and class declarations).
1872   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1873     while (!eof()) {
1874       if (FormatTok->is(tok::l_brace)) {
1875         calculateBraceTypes(/*ExpectClassBody=*/true);
1876         if (!tryToParseBracedList())
1877           break;
1878       }
1879       if (FormatTok->Tok.is(tok::semi))
1880         return;
1881       nextToken();
1882     }
1883   }
1884   if (FormatTok->Tok.is(tok::l_brace)) {
1885     if (ParseAsExpr) {
1886       parseChildBlock();
1887     } else {
1888       if (ShouldBreakBeforeBrace(Style, InitialToken))
1889         addUnwrappedLine();
1890
1891       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1892                  /*MunchSemi=*/false);
1893     }
1894   }
1895   // There is no addUnwrappedLine() here so that we fall through to parsing a
1896   // structural element afterwards. Thus, in "class A {} n, m;",
1897   // "} n, m;" will end up in one unwrapped line.
1898 }
1899
1900 void UnwrappedLineParser::parseObjCProtocolList() {
1901   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1902   do
1903     nextToken();
1904   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1905   nextToken(); // Skip '>'.
1906 }
1907
1908 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1909   do {
1910     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1911       nextToken();
1912       addUnwrappedLine();
1913       break;
1914     }
1915     if (FormatTok->is(tok::l_brace)) {
1916       parseBlock(/*MustBeDeclaration=*/false);
1917       // In ObjC interfaces, nothing should be following the "}".
1918       addUnwrappedLine();
1919     } else if (FormatTok->is(tok::r_brace)) {
1920       // Ignore stray "}". parseStructuralElement doesn't consume them.
1921       nextToken();
1922       addUnwrappedLine();
1923     } else {
1924       parseStructuralElement();
1925     }
1926   } while (!eof());
1927 }
1928
1929 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1930   nextToken();
1931   nextToken(); // interface name
1932
1933   // @interface can be followed by either a base class, or a category.
1934   if (FormatTok->Tok.is(tok::colon)) {
1935     nextToken();
1936     nextToken(); // base class name
1937   } else if (FormatTok->Tok.is(tok::l_paren))
1938     // Skip category, if present.
1939     parseParens();
1940
1941   if (FormatTok->Tok.is(tok::less))
1942     parseObjCProtocolList();
1943
1944   if (FormatTok->Tok.is(tok::l_brace)) {
1945     if (Style.BraceWrapping.AfterObjCDeclaration)
1946       addUnwrappedLine();
1947     parseBlock(/*MustBeDeclaration=*/true);
1948   }
1949
1950   // With instance variables, this puts '}' on its own line.  Without instance
1951   // variables, this ends the @interface line.
1952   addUnwrappedLine();
1953
1954   parseObjCUntilAtEnd();
1955 }
1956
1957 void UnwrappedLineParser::parseObjCProtocol() {
1958   nextToken();
1959   nextToken(); // protocol name
1960
1961   if (FormatTok->Tok.is(tok::less))
1962     parseObjCProtocolList();
1963
1964   // Check for protocol declaration.
1965   if (FormatTok->Tok.is(tok::semi)) {
1966     nextToken();
1967     return addUnwrappedLine();
1968   }
1969
1970   addUnwrappedLine();
1971   parseObjCUntilAtEnd();
1972 }
1973
1974 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1975   bool IsImport = FormatTok->is(Keywords.kw_import);
1976   assert(IsImport || FormatTok->is(tok::kw_export));
1977   nextToken();
1978
1979   // Consume the "default" in "export default class/function".
1980   if (FormatTok->is(tok::kw_default))
1981     nextToken();
1982
1983   // Consume "async function", "function" and "default function", so that these
1984   // get parsed as free-standing JS functions, i.e. do not require a trailing
1985   // semicolon.
1986   if (FormatTok->is(Keywords.kw_async))
1987     nextToken();
1988   if (FormatTok->is(Keywords.kw_function)) {
1989     nextToken();
1990     return;
1991   }
1992
1993   // For imports, `export *`, `export {...}`, consume the rest of the line up
1994   // to the terminating `;`. For everything else, just return and continue
1995   // parsing the structural element, i.e. the declaration or expression for
1996   // `export default`.
1997   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
1998       !FormatTok->isStringLiteral())
1999     return;
2000
2001   while (!eof()) {
2002     if (FormatTok->is(tok::semi))
2003       return;
2004     if (Line->Tokens.size() == 0) {
2005       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2006       // import statement should terminate.
2007       return;
2008     }
2009     if (FormatTok->is(tok::l_brace)) {
2010       FormatTok->BlockKind = BK_Block;
2011       parseBracedList();
2012     } else {
2013       nextToken();
2014     }
2015   }
2016 }
2017
2018 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2019                                                  StringRef Prefix = "") {
2020   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2021                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2022   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2023                                                     E = Line.Tokens.end();
2024        I != E; ++I) {
2025     llvm::dbgs() << I->Tok->Tok.getName() << "["
2026                  << "T=" << I->Tok->Type
2027                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2028   }
2029   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2030                                                     E = Line.Tokens.end();
2031        I != E; ++I) {
2032     const UnwrappedLineNode &Node = *I;
2033     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2034              I = Node.Children.begin(),
2035              E = Node.Children.end();
2036          I != E; ++I) {
2037       printDebugInfo(*I, "\nChild: ");
2038     }
2039   }
2040   llvm::dbgs() << "\n";
2041 }
2042
2043 void UnwrappedLineParser::addUnwrappedLine() {
2044   if (Line->Tokens.empty())
2045     return;
2046   DEBUG({
2047     if (CurrentLines == &Lines)
2048       printDebugInfo(*Line);
2049   });
2050   CurrentLines->push_back(std::move(*Line));
2051   Line->Tokens.clear();
2052   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2053   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2054     CurrentLines->append(
2055         std::make_move_iterator(PreprocessorDirectives.begin()),
2056         std::make_move_iterator(PreprocessorDirectives.end()));
2057     PreprocessorDirectives.clear();
2058   }
2059 }
2060
2061 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2062
2063 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2064   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2065          FormatTok.NewlinesBefore > 0;
2066 }
2067
2068 static bool isLineComment(const FormatToken &FormatTok) {
2069   return FormatTok.is(tok::comment) &&
2070          FormatTok.TokenText.startswith("//");
2071 }
2072
2073 // Checks if \p FormatTok is a line comment that continues the line comment
2074 // section on \p Line.
2075 static bool continuesLineComment(const FormatToken &FormatTok,
2076                                  const UnwrappedLine &Line,
2077                                  llvm::Regex &CommentPragmasRegex) {
2078   if (Line.Tokens.empty())
2079     return false;
2080
2081   StringRef IndentContent = FormatTok.TokenText;
2082   if (FormatTok.TokenText.startswith("//") ||
2083       FormatTok.TokenText.startswith("/*"))
2084     IndentContent = FormatTok.TokenText.substr(2);
2085   if (CommentPragmasRegex.match(IndentContent))
2086     return false;
2087
2088   // If Line starts with a line comment, then FormatTok continues the comment
2089   // section if its original column is greater or equal to the original start
2090   // column of the line.
2091   //
2092   // Define the min column token of a line as follows: if a line ends in '{' or
2093   // contains a '{' followed by a line comment, then the min column token is
2094   // that '{'. Otherwise, the min column token of the line is the first token of
2095   // the line.
2096   //
2097   // If Line starts with a token other than a line comment, then FormatTok
2098   // continues the comment section if its original column is greater than the
2099   // original start column of the min column token of the line.
2100   //
2101   // For example, the second line comment continues the first in these cases:
2102   //
2103   // // first line
2104   // // second line
2105   //
2106   // and:
2107   //
2108   // // first line
2109   //  // second line
2110   //
2111   // and:
2112   //
2113   // int i; // first line
2114   //  // second line
2115   //
2116   // and:
2117   //
2118   // do { // first line
2119   //      // second line
2120   //   int i;
2121   // } while (true);
2122   //
2123   // and:
2124   //
2125   // enum {
2126   //   a, // first line
2127   //    // second line
2128   //   b
2129   // };
2130   //
2131   // The second line comment doesn't continue the first in these cases:
2132   //
2133   //   // first line
2134   //  // second line
2135   //
2136   // and:
2137   //
2138   // int i; // first line
2139   // // second line
2140   //
2141   // and:
2142   //
2143   // do { // first line
2144   //   // second line
2145   //   int i;
2146   // } while (true);
2147   //
2148   // and:
2149   //
2150   // enum {
2151   //   a, // first line
2152   //   // second line
2153   // };
2154   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2155
2156   // Scan for '{//'. If found, use the column of '{' as a min column for line
2157   // comment section continuation.
2158   const FormatToken *PreviousToken = nullptr;
2159   for (const UnwrappedLineNode &Node : Line.Tokens) {
2160     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2161         isLineComment(*Node.Tok)) {
2162       MinColumnToken = PreviousToken;
2163       break;
2164     }
2165     PreviousToken = Node.Tok;
2166
2167     // Grab the last newline preceding a token in this unwrapped line.
2168     if (Node.Tok->NewlinesBefore > 0) {
2169       MinColumnToken = Node.Tok;
2170     }
2171   }
2172   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2173     MinColumnToken = PreviousToken;
2174   }
2175
2176   unsigned MinContinueColumn =
2177       MinColumnToken->OriginalColumn +
2178       (isLineComment(*MinColumnToken) ? 0 : 1);
2179   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2180          isLineComment(*(Line.Tokens.back().Tok)) &&
2181          FormatTok.OriginalColumn >= MinContinueColumn;
2182 }
2183
2184 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2185   bool JustComments = Line->Tokens.empty();
2186   for (SmallVectorImpl<FormatToken *>::const_iterator
2187            I = CommentsBeforeNextToken.begin(),
2188            E = CommentsBeforeNextToken.end();
2189        I != E; ++I) {
2190     // Line comments that belong to the same line comment section are put on the
2191     // same line since later we might want to reflow content between them.
2192     // Additional fine-grained breaking of line comment sections is controlled
2193     // by the class BreakableLineCommentSection in case it is desirable to keep
2194     // several line comment sections in the same unwrapped line.
2195     //
2196     // FIXME: Consider putting separate line comment sections as children to the
2197     // unwrapped line instead.
2198     (*I)->ContinuesLineCommentSection =
2199         continuesLineComment(**I, *Line, CommentPragmasRegex);
2200     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2201       addUnwrappedLine();
2202     pushToken(*I);
2203   }
2204   if (NewlineBeforeNext && JustComments)
2205     addUnwrappedLine();
2206   CommentsBeforeNextToken.clear();
2207 }
2208
2209 void UnwrappedLineParser::nextToken() {
2210   if (eof())
2211     return;
2212   flushComments(isOnNewLine(*FormatTok));
2213   pushToken(FormatTok);
2214   if (Style.Language != FormatStyle::LK_JavaScript)
2215     readToken();
2216   else
2217     readTokenWithJavaScriptASI();
2218 }
2219
2220 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2221   // FIXME: This is a dirty way to access the previous token. Find a better
2222   // solution.
2223   if (!Line || Line->Tokens.empty())
2224     return nullptr;
2225   return Line->Tokens.back().Tok;
2226 }
2227
2228 void UnwrappedLineParser::distributeComments(
2229     const SmallVectorImpl<FormatToken *> &Comments,
2230     const FormatToken *NextTok) {
2231   // Whether or not a line comment token continues a line is controlled by
2232   // the method continuesLineComment, with the following caveat:
2233   //
2234   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2235   // that each comment line from the trail is aligned with the next token, if
2236   // the next token exists. If a trail exists, the beginning of the maximal
2237   // trail is marked as a start of a new comment section.
2238   //
2239   // For example in this code:
2240   //
2241   // int a; // line about a
2242   //   // line 1 about b
2243   //   // line 2 about b
2244   //   int b;
2245   //
2246   // the two lines about b form a maximal trail, so there are two sections, the
2247   // first one consisting of the single comment "// line about a" and the
2248   // second one consisting of the next two comments.
2249   if (Comments.empty())
2250     return;
2251   bool ShouldPushCommentsInCurrentLine = true;
2252   bool HasTrailAlignedWithNextToken = false;
2253   unsigned StartOfTrailAlignedWithNextToken = 0;
2254   if (NextTok) {
2255     // We are skipping the first element intentionally.
2256     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2257       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2258         HasTrailAlignedWithNextToken = true;
2259         StartOfTrailAlignedWithNextToken = i;
2260       }
2261     }
2262   }
2263   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2264     FormatToken *FormatTok = Comments[i];
2265     if (HasTrailAlignedWithNextToken &&
2266         i == StartOfTrailAlignedWithNextToken) {
2267       FormatTok->ContinuesLineCommentSection = false;
2268     } else {
2269       FormatTok->ContinuesLineCommentSection =
2270           continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
2271     }
2272     if (!FormatTok->ContinuesLineCommentSection &&
2273         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2274       ShouldPushCommentsInCurrentLine = false;
2275     }
2276     if (ShouldPushCommentsInCurrentLine) {
2277       pushToken(FormatTok);
2278     } else {
2279       CommentsBeforeNextToken.push_back(FormatTok);
2280     }
2281   }
2282 }
2283
2284 void UnwrappedLineParser::readToken() {
2285   SmallVector<FormatToken *, 1> Comments;
2286   do {
2287     FormatTok = Tokens->getNextToken();
2288     assert(FormatTok);
2289     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2290            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2291       distributeComments(Comments, FormatTok);
2292       Comments.clear();
2293       // If there is an unfinished unwrapped line, we flush the preprocessor
2294       // directives only after that unwrapped line was finished later.
2295       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2296       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2297       // Comments stored before the preprocessor directive need to be output
2298       // before the preprocessor directive, at the same level as the
2299       // preprocessor directive, as we consider them to apply to the directive.
2300       flushComments(isOnNewLine(*FormatTok));
2301       parsePPDirective();
2302     }
2303     while (FormatTok->Type == TT_ConflictStart ||
2304            FormatTok->Type == TT_ConflictEnd ||
2305            FormatTok->Type == TT_ConflictAlternative) {
2306       if (FormatTok->Type == TT_ConflictStart) {
2307         conditionalCompilationStart(/*Unreachable=*/false);
2308       } else if (FormatTok->Type == TT_ConflictAlternative) {
2309         conditionalCompilationAlternative();
2310       } else if (FormatTok->Type == TT_ConflictEnd) {
2311         conditionalCompilationEnd();
2312       }
2313       FormatTok = Tokens->getNextToken();
2314       FormatTok->MustBreakBefore = true;
2315     }
2316
2317     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2318         !Line->InPPDirective) {
2319       continue;
2320     }
2321
2322     if (!FormatTok->Tok.is(tok::comment)) {
2323       distributeComments(Comments, FormatTok);
2324       Comments.clear();
2325       return;
2326     }
2327
2328     Comments.push_back(FormatTok);
2329   } while (!eof());
2330
2331   distributeComments(Comments, nullptr);
2332   Comments.clear();
2333 }
2334
2335 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2336   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2337   if (MustBreakBeforeNextToken) {
2338     Line->Tokens.back().Tok->MustBreakBefore = true;
2339     MustBreakBeforeNextToken = false;
2340   }
2341 }
2342
2343 } // end namespace format
2344 } // end namespace clang