contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #define DEBUG_TYPE "format-parser"
  22
  23 namespace clang {
  24 namespace format {
  25
  26 class FormatTokenSource {
  27 public:
  28   virtual ~FormatTokenSource() {}
  29   virtual FormatToken *getNextToken() = 0;
  30
  31   virtual unsigned getPosition() = 0;
  32   virtual FormatToken *setPosition(unsigned Position) = 0;
  33 };
  34
  35 namespace {
  36
  37 class ScopedDeclarationState {
  38 public:
  39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  40                          bool MustBeDeclaration)
  41       : Line(Line), Stack(Stack) {
  42     Line.MustBeDeclaration = MustBeDeclaration;
  43     Stack.push_back(MustBeDeclaration);
  44   }
  45   ~ScopedDeclarationState() {
  46     Stack.pop_back();
  47     if (!Stack.empty())
  48       Line.MustBeDeclaration = Stack.back();
  49     else
  50       Line.MustBeDeclaration = true;
  51   }
  52
  53 private:
  54   UnwrappedLine &Line;
  55   std::vector<bool> &Stack;
  56 };
  57
  58 class ScopedMacroState : public FormatTokenSource {
  59 public:
  60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  61                    FormatToken *&ResetToken)
  62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  64         Token(nullptr) {
  65     TokenSource = this;
  66     Line.Level = 0;
  67     Line.InPPDirective = true;
  68   }
  69
  70   ~ScopedMacroState() override {
  71     TokenSource = PreviousTokenSource;
  72     ResetToken = Token;
  73     Line.InPPDirective = false;
  74     Line.Level = PreviousLineLevel;
  75   }
  76
  77   FormatToken *getNextToken() override {
  78     // The \c UnwrappedLineParser guards against this by never calling
  79     // \c getNextToken() after it has encountered the first eof token.
  80     assert(!eof());
  81     Token = PreviousTokenSource->getNextToken();
  82     if (eof())
  83       return getFakeEOF();
  84     return Token;
  85   }
  86
  87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  88
  89   FormatToken *setPosition(unsigned Position) override {
  90     Token = PreviousTokenSource->setPosition(Position);
  91     return Token;
  92   }
  93
  94 private:
  95   bool eof() { return Token && Token->HasUnescapedNewline; }
  96
  97   FormatToken *getFakeEOF() {
  98     static bool EOFInitialized = false;
  99     static FormatToken FormatTok;
 100     if (!EOFInitialized) {
 101       FormatTok.Tok.startToken();
 102       FormatTok.Tok.setKind(tok::eof);
 103       EOFInitialized = true;
 104     }
 105     return &FormatTok;
 106   }
 107
 108   UnwrappedLine &Line;
 109   FormatTokenSource *&TokenSource;
 110   FormatToken *&ResetToken;
 111   unsigned PreviousLineLevel;
 112   FormatTokenSource *PreviousTokenSource;
 113
 114   FormatToken *Token;
 115 };
 116
 117 } // end anonymous namespace
 118
 119 class ScopedLineState {
 120 public:
 121   ScopedLineState(UnwrappedLineParser &Parser,
 122                   bool SwitchToPreprocessorLines = false)
 123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 124     if (SwitchToPreprocessorLines)
 125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 126     else if (!Parser.Line->Tokens.empty())
 127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 128     PreBlockLine = std::move(Parser.Line);
 129     Parser.Line = llvm::make_unique<UnwrappedLine>();
 130     Parser.Line->Level = PreBlockLine->Level;
 131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 132   }
 133
 134   ~ScopedLineState() {
 135     if (!Parser.Line->Tokens.empty()) {
 136       Parser.addUnwrappedLine();
 137     }
 138     assert(Parser.Line->Tokens.empty());
 139     Parser.Line = std::move(PreBlockLine);
 140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 141       Parser.MustBreakBeforeNextToken = true;
 142     Parser.CurrentLines = OriginalLines;
 143   }
 144
 145 private:
 146   UnwrappedLineParser &Parser;
 147
 148   std::unique_ptr<UnwrappedLine> PreBlockLine;
 149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 150 };
 151
 152 class CompoundStatementIndenter {
 153 public:
 154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 155                             const FormatStyle &Style, unsigned &LineLevel)
 156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
 158       Parser->addUnwrappedLine();
 159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
 160       Parser->addUnwrappedLine();
 161       ++LineLevel;
 162     }
 163   }
 164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 165
 166 private:
 167   unsigned &LineLevel;
 168   unsigned OldLineLevel;
 169 };
 170
 171 namespace {
 172
 173 class IndexedTokenSource : public FormatTokenSource {
 174 public:
 175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 176       : Tokens(Tokens), Position(-1) {}
 177
 178   FormatToken *getNextToken() override {
 179     ++Position;
 180     return Tokens[Position];
 181   }
 182
 183   unsigned getPosition() override {
 184     assert(Position >= 0);
 185     return Position;
 186   }
 187
 188   FormatToken *setPosition(unsigned P) override {
 189     Position = P;
 190     return Tokens[Position];
 191   }
 192
 193   void reset() { Position = -1; }
 194
 195 private:
 196   ArrayRef<FormatToken *> Tokens;
 197   int Position;
 198 };
 199
 200 } // end anonymous namespace
 201
 202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 203                                          const AdditionalKeywords &Keywords,
 204                                          ArrayRef<FormatToken *> Tokens,
 205                                          UnwrappedLineConsumer &Callback)
 206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
 208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 209
 210 void UnwrappedLineParser::reset() {
 211   PPBranchLevel = -1;
 212   Line.reset(new UnwrappedLine);
 213   CommentsBeforeNextToken.clear();
 214   FormatTok = nullptr;
 215   MustBreakBeforeNextToken = false;
 216   PreprocessorDirectives.clear();
 217   CurrentLines = &Lines;
 218   DeclarationScopeStack.clear();
 219   PPStack.clear();
 220 }
 221
 222 void UnwrappedLineParser::parse() {
 223   IndexedTokenSource TokenSource(AllTokens);
 224   do {
 225     DEBUG(llvm::dbgs() << "----\n");
 226     reset();
 227     Tokens = &TokenSource;
 228     TokenSource.reset();
 229
 230     readToken();
 231     parseFile();
 232     // Create line with eof token.
 233     pushToken(FormatTok);
 234     addUnwrappedLine();
 235
 236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 237                                                   E = Lines.end();
 238          I != E; ++I) {
 239       Callback.consumeUnwrappedLine(*I);
 240     }
 241     Callback.finishRun();
 242     Lines.clear();
 243     while (!PPLevelBranchIndex.empty() &&
 244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 247     }
 248     if (!PPLevelBranchIndex.empty()) {
 249       ++PPLevelBranchIndex.back();
 250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 252     }
 253   } while (!PPLevelBranchIndex.empty());
 254 }
 255
 256 void UnwrappedLineParser::parseFile() {
 257   // The top-level context in a file always has declarations, except for pre-
 258   // processor directives and JavaScript files.
 259   bool MustBeDeclaration =
 260       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 261   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 262                                           MustBeDeclaration);
 263   parseLevel(/*HasOpeningBrace=*/false);
 264   // Make sure to format the remaining tokens.
 265   flushComments(true);
 266   addUnwrappedLine();
 267 }
 268
 269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 270   bool SwitchLabelEncountered = false;
 271   do {
 272     tok::TokenKind kind = FormatTok->Tok.getKind();
 273     if (FormatTok->Type == TT_MacroBlockBegin) {
 274       kind = tok::l_brace;
 275     } else if (FormatTok->Type == TT_MacroBlockEnd) {
 276       kind = tok::r_brace;
 277     }
 278
 279     switch (kind) {
 280     case tok::comment:
 281       nextToken();
 282       addUnwrappedLine();
 283       break;
 284     case tok::l_brace:
 285       // FIXME: Add parameter whether this can happen - if this happens, we must
 286       // be in a non-declaration context.
 287       parseBlock(/*MustBeDeclaration=*/false);
 288       addUnwrappedLine();
 289       break;
 290     case tok::r_brace:
 291       if (HasOpeningBrace)
 292         return;
 293       nextToken();
 294       addUnwrappedLine();
 295       break;
 296     case tok::kw_default:
 297     case tok::kw_case:
 298       if (!SwitchLabelEncountered &&
 299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 300         ++Line->Level;
 301       SwitchLabelEncountered = true;
 302       parseStructuralElement();
 303       break;
 304     default:
 305       parseStructuralElement();
 306       break;
 307     }
 308   } while (!eof());
 309 }
 310
 311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 312   // We'll parse forward through the tokens until we hit
 313   // a closing brace or eof - note that getNextToken() will
 314   // parse macros, so this will magically work inside macro
 315   // definitions, too.
 316   unsigned StoredPosition = Tokens->getPosition();
 317   FormatToken *Tok = FormatTok;
 318   // Keep a stack of positions of lbrace tokens. We will
 319   // update information about whether an lbrace starts a
 320   // braced init list or a different block during the loop.
 321   SmallVector<FormatToken *, 8> LBraceStack;
 322   assert(Tok->Tok.is(tok::l_brace));
 323   do {
 324     // Get next none-comment token.
 325     FormatToken *NextTok;
 326     unsigned ReadTokens = 0;
 327     do {
 328       NextTok = Tokens->getNextToken();
 329       ++ReadTokens;
 330     } while (NextTok->is(tok::comment));
 331
 332     switch (Tok->Tok.getKind()) {
 333     case tok::l_brace:
 334       Tok->BlockKind = BK_Unknown;
 335       LBraceStack.push_back(Tok);
 336       break;
 337     case tok::r_brace:
 338       if (!LBraceStack.empty()) {
 339         if (LBraceStack.back()->BlockKind == BK_Unknown) {
 340           bool ProbablyBracedList = false;
 341           if (Style.Language == FormatStyle::LK_Proto) {
 342             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 343           } else {
 344             // Using OriginalColumn to distinguish between ObjC methods and
 345             // binary operators is a bit hacky.
 346             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 347                                     NextTok->OriginalColumn == 0;
 348
 349             // If there is a comma, semicolon or right paren after the closing
 350             // brace, we assume this is a braced initializer list.  Note that
 351             // regardless how we mark inner braces here, we will overwrite the
 352             // BlockKind later if we parse a braced list (where all blocks
 353             // inside are by default braced lists), or when we explicitly detect
 354             // blocks (for example while parsing lambdas).
 355             //
 356             // We exclude + and - as they can be ObjC visibility modifiers.
 357             ProbablyBracedList =
 358                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 359                                  tok::r_paren, tok::r_square, tok::l_brace,
 360                                  tok::l_paren, tok::ellipsis) ||
 361                 (NextTok->is(tok::semi) &&
 362                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
 363                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 364           }
 365           if (ProbablyBracedList) {
 366             Tok->BlockKind = BK_BracedInit;
 367             LBraceStack.back()->BlockKind = BK_BracedInit;
 368           } else {
 369             Tok->BlockKind = BK_Block;
 370             LBraceStack.back()->BlockKind = BK_Block;
 371           }
 372         }
 373         LBraceStack.pop_back();
 374       }
 375       break;
 376     case tok::at:
 377     case tok::semi:
 378     case tok::kw_if:
 379     case tok::kw_while:
 380     case tok::kw_for:
 381     case tok::kw_switch:
 382     case tok::kw_try:
 383     case tok::kw___try:
 384       if (!LBraceStack.empty())
 385         LBraceStack.back()->BlockKind = BK_Block;
 386       break;
 387     default:
 388       break;
 389     }
 390     Tok = NextTok;
 391   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 392   // Assume other blocks for all unclosed opening braces.
 393   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 394     if (LBraceStack[i]->BlockKind == BK_Unknown)
 395       LBraceStack[i]->BlockKind = BK_Block;
 396   }
 397
 398   FormatTok = Tokens->setPosition(StoredPosition);
 399 }
 400
 401 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 402                                      bool MunchSemi) {
 403   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
 404          "'{' or macro block token expected");
 405   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
 406
 407   unsigned InitialLevel = Line->Level;
 408   nextToken();
 409
 410   if (MacroBlock && FormatTok->is(tok::l_paren))
 411     parseParens();
 412
 413   addUnwrappedLine();
 414
 415   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 416                                           MustBeDeclaration);
 417   if (AddLevel)
 418     ++Line->Level;
 419   parseLevel(/*HasOpeningBrace=*/true);
 420
 421   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
 422                  : !FormatTok->is(tok::r_brace)) {
 423     Line->Level = InitialLevel;
 424     return;
 425   }
 426
 427   nextToken(); // Munch the closing brace.
 428
 429   if (MacroBlock && FormatTok->is(tok::l_paren))
 430     parseParens();
 431
 432   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 433     nextToken();
 434   Line->Level = InitialLevel;
 435 }
 436
 437 static bool isGoogScope(const UnwrappedLine &Line) {
 438   // FIXME: Closure-library specific stuff should not be hard-coded but be
 439   // configurable.
 440   if (Line.Tokens.size() < 4)
 441     return false;
 442   auto I = Line.Tokens.begin();
 443   if (I->Tok->TokenText != "goog")
 444     return false;
 445   ++I;
 446   if (I->Tok->isNot(tok::period))
 447     return false;
 448   ++I;
 449   if (I->Tok->TokenText != "scope")
 450     return false;
 451   ++I;
 452   return I->Tok->is(tok::l_paren);
 453 }
 454
 455 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 456                                    const FormatToken &InitialToken) {
 457   switch (Style.BreakBeforeBraces) {
 458   case FormatStyle::BS_Linux:
 459     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
 460   case FormatStyle::BS_Mozilla:
 461     return InitialToken.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union);
 462   case FormatStyle::BS_Allman:
 463   case FormatStyle::BS_GNU:
 464     return true;
 465   default:
 466     return false;
 467   }
 468 }
 469
 470 void UnwrappedLineParser::parseChildBlock() {
 471   FormatTok->BlockKind = BK_Block;
 472   nextToken();
 473   {
 474     bool GoogScope =
 475         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
 476     ScopedLineState LineState(*this);
 477     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 478                                             /*MustBeDeclaration=*/false);
 479     Line->Level += GoogScope ? 0 : 1;
 480     parseLevel(/*HasOpeningBrace=*/true);
 481     flushComments(isOnNewLine(*FormatTok));
 482     Line->Level -= GoogScope ? 0 : 1;
 483   }
 484   nextToken();
 485 }
 486
 487 void UnwrappedLineParser::parsePPDirective() {
 488   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 489   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 490   nextToken();
 491
 492   if (!FormatTok->Tok.getIdentifierInfo()) {
 493     parsePPUnknown();
 494     return;
 495   }
 496
 497   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 498   case tok::pp_define:
 499     parsePPDefine();
 500     return;
 501   case tok::pp_if:
 502     parsePPIf(/*IfDef=*/false);
 503     break;
 504   case tok::pp_ifdef:
 505   case tok::pp_ifndef:
 506     parsePPIf(/*IfDef=*/true);
 507     break;
 508   case tok::pp_else:
 509     parsePPElse();
 510     break;
 511   case tok::pp_elif:
 512     parsePPElIf();
 513     break;
 514   case tok::pp_endif:
 515     parsePPEndIf();
 516     break;
 517   default:
 518     parsePPUnknown();
 519     break;
 520   }
 521 }
 522
 523 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 524   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
 525     PPStack.push_back(PP_Unreachable);
 526   else
 527     PPStack.push_back(PP_Conditional);
 528 }
 529
 530 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 531   ++PPBranchLevel;
 532   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 533   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 534     PPLevelBranchIndex.push_back(0);
 535     PPLevelBranchCount.push_back(0);
 536   }
 537   PPChainBranchIndex.push(0);
 538   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 539   conditionalCompilationCondition(Unreachable || Skip);
 540 }
 541
 542 void UnwrappedLineParser::conditionalCompilationAlternative() {
 543   if (!PPStack.empty())
 544     PPStack.pop_back();
 545   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 546   if (!PPChainBranchIndex.empty())
 547     ++PPChainBranchIndex.top();
 548   conditionalCompilationCondition(
 549       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 550       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 551 }
 552
 553 void UnwrappedLineParser::conditionalCompilationEnd() {
 554   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 555   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 556     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 557       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 558     }
 559   }
 560   // Guard against #endif's without #if.
 561   if (PPBranchLevel > 0)
 562     --PPBranchLevel;
 563   if (!PPChainBranchIndex.empty())
 564     PPChainBranchIndex.pop();
 565   if (!PPStack.empty())
 566     PPStack.pop_back();
 567 }
 568
 569 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 570   nextToken();
 571   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
 572                          FormatTok->Tok.getLiteralData() != nullptr &&
 573                          StringRef(FormatTok->Tok.getLiteralData(),
 574                                    FormatTok->Tok.getLength()) == "0") ||
 575                         FormatTok->Tok.is(tok::kw_false);
 576   conditionalCompilationStart(!IfDef && IsLiteralFalse);
 577   parsePPUnknown();
 578 }
 579
 580 void UnwrappedLineParser::parsePPElse() {
 581   conditionalCompilationAlternative();
 582   parsePPUnknown();
 583 }
 584
 585 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 586
 587 void UnwrappedLineParser::parsePPEndIf() {
 588   conditionalCompilationEnd();
 589   parsePPUnknown();
 590 }
 591
 592 void UnwrappedLineParser::parsePPDefine() {
 593   nextToken();
 594
 595   if (FormatTok->Tok.getKind() != tok::identifier) {
 596     parsePPUnknown();
 597     return;
 598   }
 599   nextToken();
 600   if (FormatTok->Tok.getKind() == tok::l_paren &&
 601       FormatTok->WhitespaceRange.getBegin() ==
 602           FormatTok->WhitespaceRange.getEnd()) {
 603     parseParens();
 604   }
 605   addUnwrappedLine();
 606   Line->Level = 1;
 607
 608   // Errors during a preprocessor directive can only affect the layout of the
 609   // preprocessor directive, and thus we ignore them. An alternative approach
 610   // would be to use the same approach we use on the file level (no
 611   // re-indentation if there was a structural error) within the macro
 612   // definition.
 613   parseFile();
 614 }
 615
 616 void UnwrappedLineParser::parsePPUnknown() {
 617   do {
 618     nextToken();
 619   } while (!eof());
 620   addUnwrappedLine();
 621 }
 622
 623 // Here we blacklist certain tokens that are not usually the first token in an
 624 // unwrapped line. This is used in attempt to distinguish macro calls without
 625 // trailing semicolons from other constructs split to several lines.
 626 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 627   // Semicolon can be a null-statement, l_square can be a start of a macro or
 628   // a C++11 attribute, but this doesn't seem to be common.
 629   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 630          Tok.isNot(tok::l_square) &&
 631          // Tokens that can only be used as binary operators and a part of
 632          // overloaded operator names.
 633          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 634          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 635          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 636          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 637          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 638          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 639          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 640          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 641          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 642          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 643          Tok.isNot(tok::lesslessequal) &&
 644          // Colon is used in labels, base class lists, initializer lists,
 645          // range-based for loops, ternary operator, but should never be the
 646          // first token in an unwrapped line.
 647          Tok.isNot(tok::colon) &&
 648          // 'noexcept' is a trailing annotation.
 649          Tok.isNot(tok::kw_noexcept);
 650 }
 651
 652 void UnwrappedLineParser::parseStructuralElement() {
 653   assert(!FormatTok->Tok.is(tok::l_brace));
 654   switch (FormatTok->Tok.getKind()) {
 655   case tok::at:
 656     nextToken();
 657     if (FormatTok->Tok.is(tok::l_brace)) {
 658       parseBracedList();
 659       break;
 660     }
 661     switch (FormatTok->Tok.getObjCKeywordID()) {
 662     case tok::objc_public:
 663     case tok::objc_protected:
 664     case tok::objc_package:
 665     case tok::objc_private:
 666       return parseAccessSpecifier();
 667     case tok::objc_interface:
 668     case tok::objc_implementation:
 669       return parseObjCInterfaceOrImplementation();
 670     case tok::objc_protocol:
 671       return parseObjCProtocol();
 672     case tok::objc_end:
 673       return; // Handled by the caller.
 674     case tok::objc_optional:
 675     case tok::objc_required:
 676       nextToken();
 677       addUnwrappedLine();
 678       return;
 679     case tok::objc_autoreleasepool:
 680       nextToken();
 681       if (FormatTok->Tok.is(tok::l_brace)) {
 682         if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
 683             Style.BreakBeforeBraces == FormatStyle::BS_GNU)
 684           addUnwrappedLine();
 685         parseBlock(/*MustBeDeclaration=*/false);
 686       }
 687       addUnwrappedLine();
 688       return;
 689     case tok::objc_try:
 690       // This branch isn't strictly necessary (the kw_try case below would
 691       // do this too after the tok::at is parsed above).  But be explicit.
 692       parseTryCatch();
 693       return;
 694     default:
 695       break;
 696     }
 697     break;
 698   case tok::kw_asm:
 699     nextToken();
 700     if (FormatTok->is(tok::l_brace)) {
 701       FormatTok->Type = TT_InlineASMBrace;
 702       nextToken();
 703       while (FormatTok && FormatTok->isNot(tok::eof)) {
 704         if (FormatTok->is(tok::r_brace)) {
 705           FormatTok->Type = TT_InlineASMBrace;
 706           nextToken();
 707           addUnwrappedLine();
 708           break;
 709         }
 710         FormatTok->Finalized = true;
 711         nextToken();
 712       }
 713     }
 714     break;
 715   case tok::kw_namespace:
 716     parseNamespace();
 717     return;
 718   case tok::kw_inline:
 719     nextToken();
 720     if (FormatTok->Tok.is(tok::kw_namespace)) {
 721       parseNamespace();
 722       return;
 723     }
 724     break;
 725   case tok::kw_public:
 726   case tok::kw_protected:
 727   case tok::kw_private:
 728     if (Style.Language == FormatStyle::LK_Java ||
 729         Style.Language == FormatStyle::LK_JavaScript)
 730       nextToken();
 731     else
 732       parseAccessSpecifier();
 733     return;
 734   case tok::kw_if:
 735     parseIfThenElse();
 736     return;
 737   case tok::kw_for:
 738   case tok::kw_while:
 739     parseForOrWhileLoop();
 740     return;
 741   case tok::kw_do:
 742     parseDoWhile();
 743     return;
 744   case tok::kw_switch:
 745     parseSwitch();
 746     return;
 747   case tok::kw_default:
 748     nextToken();
 749     parseLabel();
 750     return;
 751   case tok::kw_case:
 752     parseCaseLabel();
 753     return;
 754   case tok::kw_try:
 755   case tok::kw___try:
 756     parseTryCatch();
 757     return;
 758   case tok::kw_extern:
 759     nextToken();
 760     if (FormatTok->Tok.is(tok::string_literal)) {
 761       nextToken();
 762       if (FormatTok->Tok.is(tok::l_brace)) {
 763         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
 764         addUnwrappedLine();
 765         return;
 766       }
 767     }
 768     break;
 769   case tok::kw_export:
 770     if (Style.Language == FormatStyle::LK_JavaScript) {
 771       parseJavaScriptEs6ImportExport();
 772       return;
 773     }
 774     break;
 775   case tok::identifier:
 776     if (FormatTok->is(TT_ForEachMacro)) {
 777       parseForOrWhileLoop();
 778       return;
 779     }
 780     if (FormatTok->is(TT_MacroBlockBegin)) {
 781       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
 782                  /*MunchSemi=*/false);
 783       return;
 784     }
 785     if (Style.Language == FormatStyle::LK_JavaScript &&
 786         FormatTok->is(Keywords.kw_import)) {
 787       parseJavaScriptEs6ImportExport();
 788       return;
 789     }
 790     if (FormatTok->is(Keywords.kw_signals)) {
 791       nextToken();
 792       if (FormatTok->is(tok::colon)) {
 793         nextToken();
 794         addUnwrappedLine();
 795       }
 796       return;
 797     }
 798     // In all other cases, parse the declaration.
 799     break;
 800   default:
 801     break;
 802   }
 803   do {
 804     switch (FormatTok->Tok.getKind()) {
 805     case tok::at:
 806       nextToken();
 807       if (FormatTok->Tok.is(tok::l_brace))
 808         parseBracedList();
 809       break;
 810     case tok::kw_enum:
 811       // parseEnum falls through and does not yet add an unwrapped line as an
 812       // enum definition can start a structural element.
 813       parseEnum();
 814       // This does not apply for Java and JavaScript.
 815       if (Style.Language == FormatStyle::LK_Java ||
 816           Style.Language == FormatStyle::LK_JavaScript) {
 817         addUnwrappedLine();
 818         return;
 819       }
 820       break;
 821     case tok::kw_typedef:
 822       nextToken();
 823       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
 824                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
 825         parseEnum();
 826       break;
 827     case tok::kw_struct:
 828     case tok::kw_union:
 829     case tok::kw_class:
 830       // parseRecord falls through and does not yet add an unwrapped line as a
 831       // record declaration or definition can start a structural element.
 832       parseRecord();
 833       // This does not apply for Java and JavaScript.
 834       if (Style.Language == FormatStyle::LK_Java ||
 835           Style.Language == FormatStyle::LK_JavaScript) {
 836         addUnwrappedLine();
 837         return;
 838       }
 839       break;
 840     case tok::period:
 841       nextToken();
 842       // In Java, classes have an implicit static member "class".
 843       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
 844           FormatTok->is(tok::kw_class))
 845         nextToken();
 846       break;
 847     case tok::semi:
 848       nextToken();
 849       addUnwrappedLine();
 850       return;
 851     case tok::r_brace:
 852       addUnwrappedLine();
 853       return;
 854     case tok::l_paren:
 855       parseParens();
 856       break;
 857     case tok::caret:
 858       nextToken();
 859       if (FormatTok->Tok.isAnyIdentifier() ||
 860           FormatTok->isSimpleTypeSpecifier())
 861         nextToken();
 862       if (FormatTok->is(tok::l_paren))
 863         parseParens();
 864       if (FormatTok->is(tok::l_brace))
 865         parseChildBlock();
 866       break;
 867     case tok::l_brace:
 868       if (!tryToParseBracedList()) {
 869         // A block outside of parentheses must be the last part of a
 870         // structural element.
 871         // FIXME: Figure out cases where this is not true, and add projections
 872         // for them (the one we know is missing are lambdas).
 873         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
 874           addUnwrappedLine();
 875         FormatTok->Type = TT_FunctionLBrace;
 876         parseBlock(/*MustBeDeclaration=*/false);
 877         addUnwrappedLine();
 878         return;
 879       }
 880       // Otherwise this was a braced init list, and the structural
 881       // element continues.
 882       break;
 883     case tok::kw_try:
 884       // We arrive here when parsing function-try blocks.
 885       parseTryCatch();
 886       return;
 887     case tok::identifier: {
 888       if (FormatTok->is(TT_MacroBlockEnd)) {
 889         addUnwrappedLine();
 890         return;
 891       }
 892
 893       // Parse function literal unless 'function' is the first token in a line
 894       // in which case this should be treated as a free-standing function.
 895       if (Style.Language == FormatStyle::LK_JavaScript &&
 896           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
 897         tryToParseJSFunction();
 898         break;
 899       }
 900       if ((Style.Language == FormatStyle::LK_JavaScript ||
 901            Style.Language == FormatStyle::LK_Java) &&
 902           FormatTok->is(Keywords.kw_interface)) {
 903         parseRecord();
 904         addUnwrappedLine();
 905         return;
 906       }
 907
 908       StringRef Text = FormatTok->TokenText;
 909       nextToken();
 910       if (Line->Tokens.size() == 1 &&
 911           // JS doesn't have macros, and within classes colons indicate fields,
 912           // not labels.
 913           Style.Language != FormatStyle::LK_JavaScript) {
 914         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
 915           parseLabel();
 916           return;
 917         }
 918         // Recognize function-like macro usages without trailing semicolon as
 919         // well as free-standing macros like Q_OBJECT.
 920         bool FunctionLike = FormatTok->is(tok::l_paren);
 921         if (FunctionLike)
 922           parseParens();
 923
 924         bool FollowedByNewline =
 925             CommentsBeforeNextToken.empty()
 926                 ? FormatTok->NewlinesBefore > 0
 927                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
 928
 929         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
 930             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
 931           addUnwrappedLine();
 932           return;
 933         }
 934       }
 935       break;
 936     }
 937     case tok::equal:
 938       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
 939       // TT_JsFatArrow. The always start an expression or a child block if
 940       // followed by a curly.
 941       if (FormatTok->is(TT_JsFatArrow)) {
 942         nextToken();
 943         if (FormatTok->is(tok::l_brace))
 944           parseChildBlock();
 945         break;
 946       }
 947
 948       nextToken();
 949       if (FormatTok->Tok.is(tok::l_brace)) {
 950         parseBracedList();
 951       }
 952       break;
 953     case tok::l_square:
 954       parseSquare();
 955       break;
 956     case tok::kw_new:
 957       parseNew();
 958       break;
 959     default:
 960       nextToken();
 961       break;
 962     }
 963   } while (!eof());
 964 }
 965
 966 bool UnwrappedLineParser::tryToParseLambda() {
 967   if (Style.Language != FormatStyle::LK_Cpp) {
 968     nextToken();
 969     return false;
 970   }
 971   // FIXME: This is a dirty way to access the previous token. Find a better
 972   // solution.
 973   if (!Line->Tokens.empty() &&
 974       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
 975                                         tok::kw_new, tok::kw_delete) ||
 976        Line->Tokens.back().Tok->closesScope() ||
 977        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
 978     nextToken();
 979     return false;
 980   }
 981   assert(FormatTok->is(tok::l_square));
 982   FormatToken &LSquare = *FormatTok;
 983   if (!tryToParseLambdaIntroducer())
 984     return false;
 985
 986   while (FormatTok->isNot(tok::l_brace)) {
 987     if (FormatTok->isSimpleTypeSpecifier()) {
 988       nextToken();
 989       continue;
 990     }
 991     switch (FormatTok->Tok.getKind()) {
 992     case tok::l_brace:
 993       break;
 994     case tok::l_paren:
 995       parseParens();
 996       break;
 997     case tok::amp:
 998     case tok::star:
 999     case tok::kw_const:
1000     case tok::comma:
1001     case tok::less:
1002     case tok::greater:
1003     case tok::identifier:
1004     case tok::coloncolon:
1005     case tok::kw_mutable:
1006       nextToken();
1007       break;
1008     case tok::arrow:
1009       FormatTok->Type = TT_LambdaArrow;
1010       nextToken();
1011       break;
1012     default:
1013       return true;
1014     }
1015   }
1016   LSquare.Type = TT_LambdaLSquare;
1017   parseChildBlock();
1018   return true;
1019 }
1020
1021 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1022   nextToken();
1023   if (FormatTok->is(tok::equal)) {
1024     nextToken();
1025     if (FormatTok->is(tok::r_square)) {
1026       nextToken();
1027       return true;
1028     }
1029     if (FormatTok->isNot(tok::comma))
1030       return false;
1031     nextToken();
1032   } else if (FormatTok->is(tok::amp)) {
1033     nextToken();
1034     if (FormatTok->is(tok::r_square)) {
1035       nextToken();
1036       return true;
1037     }
1038     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1039       return false;
1040     }
1041     if (FormatTok->is(tok::comma))
1042       nextToken();
1043   } else if (FormatTok->is(tok::r_square)) {
1044     nextToken();
1045     return true;
1046   }
1047   do {
1048     if (FormatTok->is(tok::amp))
1049       nextToken();
1050     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1051       return false;
1052     nextToken();
1053     if (FormatTok->is(tok::ellipsis))
1054       nextToken();
1055     if (FormatTok->is(tok::comma)) {
1056       nextToken();
1057     } else if (FormatTok->is(tok::r_square)) {
1058       nextToken();
1059       return true;
1060     } else {
1061       return false;
1062     }
1063   } while (!eof());
1064   return false;
1065 }
1066
1067 void UnwrappedLineParser::tryToParseJSFunction() {
1068   nextToken();
1069
1070   // Consume function name.
1071   if (FormatTok->is(tok::identifier))
1072     nextToken();
1073
1074   if (FormatTok->isNot(tok::l_paren))
1075     return;
1076
1077   // Parse formal parameter list.
1078   parseParens();
1079
1080   if (FormatTok->is(tok::colon)) {
1081     // Parse a type definition.
1082     nextToken();
1083
1084     // Eat the type declaration. For braced inline object types, balance braces,
1085     // otherwise just parse until finding an l_brace for the function body.
1086     if (FormatTok->is(tok::l_brace))
1087       tryToParseBracedList();
1088     else
1089       while (FormatTok->isNot(tok::l_brace) && !eof())
1090         nextToken();
1091   }
1092
1093   parseChildBlock();
1094 }
1095
1096 bool UnwrappedLineParser::tryToParseBracedList() {
1097   if (FormatTok->BlockKind == BK_Unknown)
1098     calculateBraceTypes();
1099   assert(FormatTok->BlockKind != BK_Unknown);
1100   if (FormatTok->BlockKind == BK_Block)
1101     return false;
1102   parseBracedList();
1103   return true;
1104 }
1105
1106 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1107   bool HasError = false;
1108   nextToken();
1109
1110   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1111   // replace this by using parseAssigmentExpression() inside.
1112   do {
1113     if (Style.Language == FormatStyle::LK_JavaScript) {
1114       if (FormatTok->is(Keywords.kw_function)) {
1115         tryToParseJSFunction();
1116         continue;
1117       }
1118       if (FormatTok->is(TT_JsFatArrow)) {
1119         nextToken();
1120         // Fat arrows can be followed by simple expressions or by child blocks
1121         // in curly braces.
1122         if (FormatTok->is(tok::l_brace)) {
1123           parseChildBlock();
1124           continue;
1125         }
1126       }
1127     }
1128     switch (FormatTok->Tok.getKind()) {
1129     case tok::caret:
1130       nextToken();
1131       if (FormatTok->is(tok::l_brace)) {
1132         parseChildBlock();
1133       }
1134       break;
1135     case tok::l_square:
1136       tryToParseLambda();
1137       break;
1138     case tok::l_brace:
1139       // Assume there are no blocks inside a braced init list apart
1140       // from the ones we explicitly parse out (like lambdas).
1141       FormatTok->BlockKind = BK_BracedInit;
1142       parseBracedList();
1143       break;
1144     case tok::l_paren:
1145       parseParens();
1146       // JavaScript can just have free standing methods and getters/setters in
1147       // object literals. Detect them by a "{" following ")".
1148       if (Style.Language == FormatStyle::LK_JavaScript) {
1149         if (FormatTok->is(tok::l_brace))
1150           parseChildBlock();
1151         break;
1152       }
1153       break;
1154     case tok::r_brace:
1155       nextToken();
1156       return !HasError;
1157     case tok::semi:
1158       HasError = true;
1159       if (!ContinueOnSemicolons)
1160         return !HasError;
1161       nextToken();
1162       break;
1163     case tok::comma:
1164       nextToken();
1165       break;
1166     default:
1167       nextToken();
1168       break;
1169     }
1170   } while (!eof());
1171   return false;
1172 }
1173
1174 void UnwrappedLineParser::parseParens() {
1175   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1176   nextToken();
1177   do {
1178     switch (FormatTok->Tok.getKind()) {
1179     case tok::l_paren:
1180       parseParens();
1181       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1182         parseChildBlock();
1183       break;
1184     case tok::r_paren:
1185       nextToken();
1186       return;
1187     case tok::r_brace:
1188       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1189       return;
1190     case tok::l_square:
1191       tryToParseLambda();
1192       break;
1193     case tok::l_brace:
1194       if (!tryToParseBracedList())
1195         parseChildBlock();
1196       break;
1197     case tok::at:
1198       nextToken();
1199       if (FormatTok->Tok.is(tok::l_brace))
1200         parseBracedList();
1201       break;
1202     case tok::identifier:
1203       if (Style.Language == FormatStyle::LK_JavaScript &&
1204           FormatTok->is(Keywords.kw_function))
1205         tryToParseJSFunction();
1206       else
1207         nextToken();
1208       break;
1209     default:
1210       nextToken();
1211       break;
1212     }
1213   } while (!eof());
1214 }
1215
1216 void UnwrappedLineParser::parseSquare() {
1217   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1218   if (tryToParseLambda())
1219     return;
1220   do {
1221     switch (FormatTok->Tok.getKind()) {
1222     case tok::l_paren:
1223       parseParens();
1224       break;
1225     case tok::r_square:
1226       nextToken();
1227       return;
1228     case tok::r_brace:
1229       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1230       return;
1231     case tok::l_square:
1232       parseSquare();
1233       break;
1234     case tok::l_brace: {
1235       if (!tryToParseBracedList())
1236         parseChildBlock();
1237       break;
1238     }
1239     case tok::at:
1240       nextToken();
1241       if (FormatTok->Tok.is(tok::l_brace))
1242         parseBracedList();
1243       break;
1244     default:
1245       nextToken();
1246       break;
1247     }
1248   } while (!eof());
1249 }
1250
1251 void UnwrappedLineParser::parseIfThenElse() {
1252   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1253   nextToken();
1254   if (FormatTok->Tok.is(tok::l_paren))
1255     parseParens();
1256   bool NeedsUnwrappedLine = false;
1257   if (FormatTok->Tok.is(tok::l_brace)) {
1258     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1259     parseBlock(/*MustBeDeclaration=*/false);
1260     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1261         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1262       addUnwrappedLine();
1263     } else {
1264       NeedsUnwrappedLine = true;
1265     }
1266   } else {
1267     addUnwrappedLine();
1268     ++Line->Level;
1269     parseStructuralElement();
1270     --Line->Level;
1271   }
1272   if (FormatTok->Tok.is(tok::kw_else)) {
1273     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1274       addUnwrappedLine();
1275     nextToken();
1276     if (FormatTok->Tok.is(tok::l_brace)) {
1277       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1278       parseBlock(/*MustBeDeclaration=*/false);
1279       addUnwrappedLine();
1280     } else if (FormatTok->Tok.is(tok::kw_if)) {
1281       parseIfThenElse();
1282     } else {
1283       addUnwrappedLine();
1284       ++Line->Level;
1285       parseStructuralElement();
1286       --Line->Level;
1287     }
1288   } else if (NeedsUnwrappedLine) {
1289     addUnwrappedLine();
1290   }
1291 }
1292
1293 void UnwrappedLineParser::parseTryCatch() {
1294   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1295   nextToken();
1296   bool NeedsUnwrappedLine = false;
1297   if (FormatTok->is(tok::colon)) {
1298     // We are in a function try block, what comes is an initializer list.
1299     nextToken();
1300     while (FormatTok->is(tok::identifier)) {
1301       nextToken();
1302       if (FormatTok->is(tok::l_paren))
1303         parseParens();
1304       if (FormatTok->is(tok::comma))
1305         nextToken();
1306     }
1307   }
1308   // Parse try with resource.
1309   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1310     parseParens();
1311   }
1312   if (FormatTok->is(tok::l_brace)) {
1313     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1314     parseBlock(/*MustBeDeclaration=*/false);
1315     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1316         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1317         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1318       addUnwrappedLine();
1319     } else {
1320       NeedsUnwrappedLine = true;
1321     }
1322   } else if (!FormatTok->is(tok::kw_catch)) {
1323     // The C++ standard requires a compound-statement after a try.
1324     // If there's none, we try to assume there's a structuralElement
1325     // and try to continue.
1326     addUnwrappedLine();
1327     ++Line->Level;
1328     parseStructuralElement();
1329     --Line->Level;
1330   }
1331   while (1) {
1332     if (FormatTok->is(tok::at))
1333       nextToken();
1334     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1335                              tok::kw___finally) ||
1336           ((Style.Language == FormatStyle::LK_Java ||
1337             Style.Language == FormatStyle::LK_JavaScript) &&
1338            FormatTok->is(Keywords.kw_finally)) ||
1339           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1340            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1341       break;
1342     nextToken();
1343     while (FormatTok->isNot(tok::l_brace)) {
1344       if (FormatTok->is(tok::l_paren)) {
1345         parseParens();
1346         continue;
1347       }
1348       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1349         return;
1350       nextToken();
1351     }
1352     NeedsUnwrappedLine = false;
1353     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1354     parseBlock(/*MustBeDeclaration=*/false);
1355     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1356         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1357         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1358       addUnwrappedLine();
1359     } else {
1360       NeedsUnwrappedLine = true;
1361     }
1362   }
1363   if (NeedsUnwrappedLine) {
1364     addUnwrappedLine();
1365   }
1366 }
1367
1368 void UnwrappedLineParser::parseNamespace() {
1369   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1370
1371   const FormatToken &InitialToken = *FormatTok;
1372   nextToken();
1373   if (FormatTok->Tok.is(tok::identifier))
1374     nextToken();
1375   if (FormatTok->Tok.is(tok::l_brace)) {
1376     if (ShouldBreakBeforeBrace(Style, InitialToken))
1377       addUnwrappedLine();
1378
1379     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1380                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1381                      DeclarationScopeStack.size() > 1);
1382     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1383     // Munch the semicolon after a namespace. This is more common than one would
1384     // think. Puttin the semicolon into its own line is very ugly.
1385     if (FormatTok->Tok.is(tok::semi))
1386       nextToken();
1387     addUnwrappedLine();
1388   }
1389   // FIXME: Add error handling.
1390 }
1391
1392 void UnwrappedLineParser::parseNew() {
1393   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1394   nextToken();
1395   if (Style.Language != FormatStyle::LK_Java)
1396     return;
1397
1398   // In Java, we can parse everything up to the parens, which aren't optional.
1399   do {
1400     // There should not be a ;, { or } before the new's open paren.
1401     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1402       return;
1403
1404     // Consume the parens.
1405     if (FormatTok->is(tok::l_paren)) {
1406       parseParens();
1407
1408       // If there is a class body of an anonymous class, consume that as child.
1409       if (FormatTok->is(tok::l_brace))
1410         parseChildBlock();
1411       return;
1412     }
1413     nextToken();
1414   } while (!eof());
1415 }
1416
1417 void UnwrappedLineParser::parseForOrWhileLoop() {
1418   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1419          "'for', 'while' or foreach macro expected");
1420   nextToken();
1421   if (FormatTok->Tok.is(tok::l_paren))
1422     parseParens();
1423   if (FormatTok->Tok.is(tok::l_brace)) {
1424     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1425     parseBlock(/*MustBeDeclaration=*/false);
1426     addUnwrappedLine();
1427   } else {
1428     addUnwrappedLine();
1429     ++Line->Level;
1430     parseStructuralElement();
1431     --Line->Level;
1432   }
1433 }
1434
1435 void UnwrappedLineParser::parseDoWhile() {
1436   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1437   nextToken();
1438   if (FormatTok->Tok.is(tok::l_brace)) {
1439     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1440     parseBlock(/*MustBeDeclaration=*/false);
1441     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1442       addUnwrappedLine();
1443   } else {
1444     addUnwrappedLine();
1445     ++Line->Level;
1446     parseStructuralElement();
1447     --Line->Level;
1448   }
1449
1450   // FIXME: Add error handling.
1451   if (!FormatTok->Tok.is(tok::kw_while)) {
1452     addUnwrappedLine();
1453     return;
1454   }
1455
1456   nextToken();
1457   parseStructuralElement();
1458 }
1459
1460 void UnwrappedLineParser::parseLabel() {
1461   nextToken();
1462   unsigned OldLineLevel = Line->Level;
1463   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1464     --Line->Level;
1465   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1466     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1467     parseBlock(/*MustBeDeclaration=*/false);
1468     if (FormatTok->Tok.is(tok::kw_break)) {
1469       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1470       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1471           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1472         addUnwrappedLine();
1473       }
1474       parseStructuralElement();
1475     }
1476     addUnwrappedLine();
1477   } else {
1478     if (FormatTok->is(tok::semi))
1479       nextToken();
1480     addUnwrappedLine();
1481   }
1482   Line->Level = OldLineLevel;
1483 }
1484
1485 void UnwrappedLineParser::parseCaseLabel() {
1486   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1487   // FIXME: fix handling of complex expressions here.
1488   do {
1489     nextToken();
1490   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1491   parseLabel();
1492 }
1493
1494 void UnwrappedLineParser::parseSwitch() {
1495   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1496   nextToken();
1497   if (FormatTok->Tok.is(tok::l_paren))
1498     parseParens();
1499   if (FormatTok->Tok.is(tok::l_brace)) {
1500     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1501     parseBlock(/*MustBeDeclaration=*/false);
1502     addUnwrappedLine();
1503   } else {
1504     addUnwrappedLine();
1505     ++Line->Level;
1506     parseStructuralElement();
1507     --Line->Level;
1508   }
1509 }
1510
1511 void UnwrappedLineParser::parseAccessSpecifier() {
1512   nextToken();
1513   // Understand Qt's slots.
1514   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1515     nextToken();
1516   // Otherwise, we don't know what it is, and we'd better keep the next token.
1517   if (FormatTok->Tok.is(tok::colon))
1518     nextToken();
1519   addUnwrappedLine();
1520 }
1521
1522 void UnwrappedLineParser::parseEnum() {
1523   // Won't be 'enum' for NS_ENUMs.
1524   if (FormatTok->Tok.is(tok::kw_enum))
1525     nextToken();
1526
1527   // Eat up enum class ...
1528   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1529     nextToken();
1530
1531   while (FormatTok->Tok.getIdentifierInfo() ||
1532          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1533                             tok::greater, tok::comma, tok::question)) {
1534     nextToken();
1535     // We can have macros or attributes in between 'enum' and the enum name.
1536     if (FormatTok->is(tok::l_paren))
1537       parseParens();
1538     if (FormatTok->is(tok::identifier)) {
1539       nextToken();
1540       // If there are two identifiers in a row, this is likely an elaborate
1541       // return type. In Java, this can be "implements", etc.
1542       if (Style.Language == FormatStyle::LK_Cpp &&
1543           FormatTok->is(tok::identifier))
1544         return;
1545     }
1546   }
1547
1548   // Just a declaration or something is wrong.
1549   if (FormatTok->isNot(tok::l_brace))
1550     return;
1551   FormatTok->BlockKind = BK_Block;
1552
1553   if (Style.Language == FormatStyle::LK_Java) {
1554     // Java enums are different.
1555     parseJavaEnumBody();
1556     return;
1557   }
1558
1559   // Parse enum body.
1560   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1561   if (HasError) {
1562     if (FormatTok->is(tok::semi))
1563       nextToken();
1564     addUnwrappedLine();
1565   }
1566
1567   // There is no addUnwrappedLine() here so that we fall through to parsing a
1568   // structural element afterwards. Thus, in "enum A {} n, m;",
1569   // "} n, m;" will end up in one unwrapped line.
1570 }
1571
1572 void UnwrappedLineParser::parseJavaEnumBody() {
1573   // Determine whether the enum is simple, i.e. does not have a semicolon or
1574   // constants with class bodies. Simple enums can be formatted like braced
1575   // lists, contracted to a single line, etc.
1576   unsigned StoredPosition = Tokens->getPosition();
1577   bool IsSimple = true;
1578   FormatToken *Tok = Tokens->getNextToken();
1579   while (Tok) {
1580     if (Tok->is(tok::r_brace))
1581       break;
1582     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1583       IsSimple = false;
1584       break;
1585     }
1586     // FIXME: This will also mark enums with braces in the arguments to enum
1587     // constants as "not simple". This is probably fine in practice, though.
1588     Tok = Tokens->getNextToken();
1589   }
1590   FormatTok = Tokens->setPosition(StoredPosition);
1591
1592   if (IsSimple) {
1593     parseBracedList();
1594     addUnwrappedLine();
1595     return;
1596   }
1597
1598   // Parse the body of a more complex enum.
1599   // First add a line for everything up to the "{".
1600   nextToken();
1601   addUnwrappedLine();
1602   ++Line->Level;
1603
1604   // Parse the enum constants.
1605   while (FormatTok) {
1606     if (FormatTok->is(tok::l_brace)) {
1607       // Parse the constant's class body.
1608       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1609                  /*MunchSemi=*/false);
1610     } else if (FormatTok->is(tok::l_paren)) {
1611       parseParens();
1612     } else if (FormatTok->is(tok::comma)) {
1613       nextToken();
1614       addUnwrappedLine();
1615     } else if (FormatTok->is(tok::semi)) {
1616       nextToken();
1617       addUnwrappedLine();
1618       break;
1619     } else if (FormatTok->is(tok::r_brace)) {
1620       addUnwrappedLine();
1621       break;
1622     } else {
1623       nextToken();
1624     }
1625   }
1626
1627   // Parse the class body after the enum's ";" if any.
1628   parseLevel(/*HasOpeningBrace=*/true);
1629   nextToken();
1630   --Line->Level;
1631   addUnwrappedLine();
1632 }
1633
1634 void UnwrappedLineParser::parseRecord() {
1635   const FormatToken &InitialToken = *FormatTok;
1636   nextToken();
1637
1638   // The actual identifier can be a nested name specifier, and in macros
1639   // it is often token-pasted.
1640   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1641                             tok::kw___attribute, tok::kw___declspec,
1642                             tok::kw_alignas) ||
1643          ((Style.Language == FormatStyle::LK_Java ||
1644            Style.Language == FormatStyle::LK_JavaScript) &&
1645           FormatTok->isOneOf(tok::period, tok::comma))) {
1646     bool IsNonMacroIdentifier =
1647         FormatTok->is(tok::identifier) &&
1648         FormatTok->TokenText != FormatTok->TokenText.upper();
1649     nextToken();
1650     // We can have macros or attributes in between 'class' and the class name.
1651     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1652       parseParens();
1653   }
1654
1655   // Note that parsing away template declarations here leads to incorrectly
1656   // accepting function declarations as record declarations.
1657   // In general, we cannot solve this problem. Consider:
1658   // class A<int> B() {}
1659   // which can be a function definition or a class definition when B() is a
1660   // macro. If we find enough real-world cases where this is a problem, we
1661   // can parse for the 'template' keyword in the beginning of the statement,
1662   // and thus rule out the record production in case there is no template
1663   // (this would still leave us with an ambiguity between template function
1664   // and class declarations).
1665   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1666     while (!eof()) {
1667       if (FormatTok->is(tok::l_brace)) {
1668         calculateBraceTypes(/*ExpectClassBody=*/true);
1669         if (!tryToParseBracedList())
1670           break;
1671       }
1672       if (FormatTok->Tok.is(tok::semi))
1673         return;
1674       nextToken();
1675     }
1676   }
1677   if (FormatTok->Tok.is(tok::l_brace)) {
1678     if (ShouldBreakBeforeBrace(Style, InitialToken))
1679       addUnwrappedLine();
1680
1681     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1682                /*MunchSemi=*/false);
1683   }
1684   // There is no addUnwrappedLine() here so that we fall through to parsing a
1685   // structural element afterwards. Thus, in "class A {} n, m;",
1686   // "} n, m;" will end up in one unwrapped line.
1687 }
1688
1689 void UnwrappedLineParser::parseObjCProtocolList() {
1690   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1691   do
1692     nextToken();
1693   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1694   nextToken(); // Skip '>'.
1695 }
1696
1697 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1698   do {
1699     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1700       nextToken();
1701       addUnwrappedLine();
1702       break;
1703     }
1704     if (FormatTok->is(tok::l_brace)) {
1705       parseBlock(/*MustBeDeclaration=*/false);
1706       // In ObjC interfaces, nothing should be following the "}".
1707       addUnwrappedLine();
1708     } else if (FormatTok->is(tok::r_brace)) {
1709       // Ignore stray "}". parseStructuralElement doesn't consume them.
1710       nextToken();
1711       addUnwrappedLine();
1712     } else {
1713       parseStructuralElement();
1714     }
1715   } while (!eof());
1716 }
1717
1718 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1719   nextToken();
1720   nextToken(); // interface name
1721
1722   // @interface can be followed by either a base class, or a category.
1723   if (FormatTok->Tok.is(tok::colon)) {
1724     nextToken();
1725     nextToken(); // base class name
1726   } else if (FormatTok->Tok.is(tok::l_paren))
1727     // Skip category, if present.
1728     parseParens();
1729
1730   if (FormatTok->Tok.is(tok::less))
1731     parseObjCProtocolList();
1732
1733   if (FormatTok->Tok.is(tok::l_brace)) {
1734     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1735         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1736       addUnwrappedLine();
1737     parseBlock(/*MustBeDeclaration=*/true);
1738   }
1739
1740   // With instance variables, this puts '}' on its own line.  Without instance
1741   // variables, this ends the @interface line.
1742   addUnwrappedLine();
1743
1744   parseObjCUntilAtEnd();
1745 }
1746
1747 void UnwrappedLineParser::parseObjCProtocol() {
1748   nextToken();
1749   nextToken(); // protocol name
1750
1751   if (FormatTok->Tok.is(tok::less))
1752     parseObjCProtocolList();
1753
1754   // Check for protocol declaration.
1755   if (FormatTok->Tok.is(tok::semi)) {
1756     nextToken();
1757     return addUnwrappedLine();
1758   }
1759
1760   addUnwrappedLine();
1761   parseObjCUntilAtEnd();
1762 }
1763
1764 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1765   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1766   nextToken();
1767
1768   // Consume the "default" in "export default class/function".
1769   if (FormatTok->is(tok::kw_default))
1770     nextToken();
1771
1772   // Consume "function" and "default function", so that these get parsed as
1773   // free-standing JS functions, i.e. do not require a trailing semicolon.
1774   if (FormatTok->is(Keywords.kw_function)) {
1775     nextToken();
1776     return;
1777   }
1778
1779   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1780                          Keywords.kw_var))
1781     return; // Fall through to parsing the corresponding structure.
1782
1783   if (FormatTok->is(tok::l_brace)) {
1784     FormatTok->BlockKind = BK_Block;
1785     parseBracedList();
1786   }
1787
1788   while (!eof() && FormatTok->isNot(tok::semi) &&
1789          FormatTok->isNot(tok::l_brace)) {
1790     nextToken();
1791   }
1792 }
1793
1794 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1795                                                  StringRef Prefix = "") {
1796   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1797                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1798   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1799                                                     E = Line.Tokens.end();
1800        I != E; ++I) {
1801     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1802   }
1803   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1804                                                     E = Line.Tokens.end();
1805        I != E; ++I) {
1806     const UnwrappedLineNode &Node = *I;
1807     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1808              I = Node.Children.begin(),
1809              E = Node.Children.end();
1810          I != E; ++I) {
1811       printDebugInfo(*I, "\nChild: ");
1812     }
1813   }
1814   llvm::dbgs() << "\n";
1815 }
1816
1817 void UnwrappedLineParser::addUnwrappedLine() {
1818   if (Line->Tokens.empty())
1819     return;
1820   DEBUG({
1821     if (CurrentLines == &Lines)
1822       printDebugInfo(*Line);
1823   });
1824   CurrentLines->push_back(std::move(*Line));
1825   Line->Tokens.clear();
1826   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1827     CurrentLines->append(
1828         std::make_move_iterator(PreprocessorDirectives.begin()),
1829         std::make_move_iterator(PreprocessorDirectives.end()));
1830     PreprocessorDirectives.clear();
1831   }
1832 }
1833
1834 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1835
1836 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1837   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1838          FormatTok.NewlinesBefore > 0;
1839 }
1840
1841 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1842   bool JustComments = Line->Tokens.empty();
1843   for (SmallVectorImpl<FormatToken *>::const_iterator
1844            I = CommentsBeforeNextToken.begin(),
1845            E = CommentsBeforeNextToken.end();
1846        I != E; ++I) {
1847     if (isOnNewLine(**I) && JustComments)
1848       addUnwrappedLine();
1849     pushToken(*I);
1850   }
1851   if (NewlineBeforeNext && JustComments)
1852     addUnwrappedLine();
1853   CommentsBeforeNextToken.clear();
1854 }
1855
1856 void UnwrappedLineParser::nextToken() {
1857   if (eof())
1858     return;
1859   flushComments(isOnNewLine(*FormatTok));
1860   pushToken(FormatTok);
1861   readToken();
1862 }
1863
1864 void UnwrappedLineParser::readToken() {
1865   bool CommentsInCurrentLine = true;
1866   do {
1867     FormatTok = Tokens->getNextToken();
1868     assert(FormatTok);
1869     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1870            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1871       // If there is an unfinished unwrapped line, we flush the preprocessor
1872       // directives only after that unwrapped line was finished later.
1873       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1874       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1875       // Comments stored before the preprocessor directive need to be output
1876       // before the preprocessor directive, at the same level as the
1877       // preprocessor directive, as we consider them to apply to the directive.
1878       flushComments(isOnNewLine(*FormatTok));
1879       parsePPDirective();
1880     }
1881     while (FormatTok->Type == TT_ConflictStart ||
1882            FormatTok->Type == TT_ConflictEnd ||
1883            FormatTok->Type == TT_ConflictAlternative) {
1884       if (FormatTok->Type == TT_ConflictStart) {
1885         conditionalCompilationStart(/*Unreachable=*/false);
1886       } else if (FormatTok->Type == TT_ConflictAlternative) {
1887         conditionalCompilationAlternative();
1888       } else if (FormatTok->Type == TT_ConflictEnd) {
1889         conditionalCompilationEnd();
1890       }
1891       FormatTok = Tokens->getNextToken();
1892       FormatTok->MustBreakBefore = true;
1893     }
1894
1895     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1896         !Line->InPPDirective) {
1897       continue;
1898     }
1899
1900     if (!FormatTok->Tok.is(tok::comment))
1901       return;
1902     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1903       CommentsInCurrentLine = false;
1904     }
1905     if (CommentsInCurrentLine) {
1906       pushToken(FormatTok);
1907     } else {
1908       CommentsBeforeNextToken.push_back(FormatTok);
1909     }
1910   } while (!eof());
1911 }
1912
1913 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1914   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1915   if (MustBreakBeforeNextToken) {
1916     Line->Tokens.back().Tok->MustBreakBefore = true;
1917     MustBreakBeforeNextToken = false;
1918   }
1919 }
1920
1921 } // end namespace format
1922 } // end namespace clang