contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #define DEBUG_TYPE "format-parser"
  22
  23 namespace clang {
  24 namespace format {
  25
  26 class FormatTokenSource {
  27 public:
  28   virtual ~FormatTokenSource() {}
  29   virtual FormatToken *getNextToken() = 0;
  30
  31   virtual unsigned getPosition() = 0;
  32   virtual FormatToken *setPosition(unsigned Position) = 0;
  33 };
  34
  35 namespace {
  36
  37 class ScopedDeclarationState {
  38 public:
  39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  40                          bool MustBeDeclaration)
  41       : Line(Line), Stack(Stack) {
  42     Line.MustBeDeclaration = MustBeDeclaration;
  43     Stack.push_back(MustBeDeclaration);
  44   }
  45   ~ScopedDeclarationState() {
  46     Stack.pop_back();
  47     if (!Stack.empty())
  48       Line.MustBeDeclaration = Stack.back();
  49     else
  50       Line.MustBeDeclaration = true;
  51   }
  52
  53 private:
  54   UnwrappedLine &Line;
  55   std::vector<bool> &Stack;
  56 };
  57
  58 class ScopedMacroState : public FormatTokenSource {
  59 public:
  60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  61                    FormatToken *&ResetToken)
  62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  64         Token(nullptr) {
  65     TokenSource = this;
  66     Line.Level = 0;
  67     Line.InPPDirective = true;
  68   }
  69
  70   ~ScopedMacroState() override {
  71     TokenSource = PreviousTokenSource;
  72     ResetToken = Token;
  73     Line.InPPDirective = false;
  74     Line.Level = PreviousLineLevel;
  75   }
  76
  77   FormatToken *getNextToken() override {
  78     // The \c UnwrappedLineParser guards against this by never calling
  79     // \c getNextToken() after it has encountered the first eof token.
  80     assert(!eof());
  81     Token = PreviousTokenSource->getNextToken();
  82     if (eof())
  83       return getFakeEOF();
  84     return Token;
  85   }
  86
  87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  88
  89   FormatToken *setPosition(unsigned Position) override {
  90     Token = PreviousTokenSource->setPosition(Position);
  91     return Token;
  92   }
  93
  94 private:
  95   bool eof() { return Token && Token->HasUnescapedNewline; }
  96
  97   FormatToken *getFakeEOF() {
  98     static bool EOFInitialized = false;
  99     static FormatToken FormatTok;
 100     if (!EOFInitialized) {
 101       FormatTok.Tok.startToken();
 102       FormatTok.Tok.setKind(tok::eof);
 103       EOFInitialized = true;
 104     }
 105     return &FormatTok;
 106   }
 107
 108   UnwrappedLine &Line;
 109   FormatTokenSource *&TokenSource;
 110   FormatToken *&ResetToken;
 111   unsigned PreviousLineLevel;
 112   FormatTokenSource *PreviousTokenSource;
 113
 114   FormatToken *Token;
 115 };
 116
 117 } // end anonymous namespace
 118
 119 class ScopedLineState {
 120 public:
 121   ScopedLineState(UnwrappedLineParser &Parser,
 122                   bool SwitchToPreprocessorLines = false)
 123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 124     if (SwitchToPreprocessorLines)
 125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 126     else if (!Parser.Line->Tokens.empty())
 127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 128     PreBlockLine = std::move(Parser.Line);
 129     Parser.Line = llvm::make_unique<UnwrappedLine>();
 130     Parser.Line->Level = PreBlockLine->Level;
 131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 132   }
 133
 134   ~ScopedLineState() {
 135     if (!Parser.Line->Tokens.empty()) {
 136       Parser.addUnwrappedLine();
 137     }
 138     assert(Parser.Line->Tokens.empty());
 139     Parser.Line = std::move(PreBlockLine);
 140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 141       Parser.MustBreakBeforeNextToken = true;
 142     Parser.CurrentLines = OriginalLines;
 143   }
 144
 145 private:
 146   UnwrappedLineParser &Parser;
 147
 148   std::unique_ptr<UnwrappedLine> PreBlockLine;
 149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 150 };
 151
 152 class CompoundStatementIndenter {
 153 public:
 154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 155                             const FormatStyle &Style, unsigned &LineLevel)
 156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
 158       Parser->addUnwrappedLine();
 159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
 160       Parser->addUnwrappedLine();
 161       ++LineLevel;
 162     }
 163   }
 164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 165
 166 private:
 167   unsigned &LineLevel;
 168   unsigned OldLineLevel;
 169 };
 170
 171 namespace {
 172
 173 class IndexedTokenSource : public FormatTokenSource {
 174 public:
 175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 176       : Tokens(Tokens), Position(-1) {}
 177
 178   FormatToken *getNextToken() override {
 179     ++Position;
 180     return Tokens[Position];
 181   }
 182
 183   unsigned getPosition() override {
 184     assert(Position >= 0);
 185     return Position;
 186   }
 187
 188   FormatToken *setPosition(unsigned P) override {
 189     Position = P;
 190     return Tokens[Position];
 191   }
 192
 193   void reset() { Position = -1; }
 194
 195 private:
 196   ArrayRef<FormatToken *> Tokens;
 197   int Position;
 198 };
 199
 200 } // end anonymous namespace
 201
 202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 203                                          const AdditionalKeywords &Keywords,
 204                                          ArrayRef<FormatToken *> Tokens,
 205                                          UnwrappedLineConsumer &Callback)
 206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
 208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 209
 210 void UnwrappedLineParser::reset() {
 211   PPBranchLevel = -1;
 212   Line.reset(new UnwrappedLine);
 213   CommentsBeforeNextToken.clear();
 214   FormatTok = nullptr;
 215   MustBreakBeforeNextToken = false;
 216   PreprocessorDirectives.clear();
 217   CurrentLines = &Lines;
 218   DeclarationScopeStack.clear();
 219   PPStack.clear();
 220 }
 221
 222 void UnwrappedLineParser::parse() {
 223   IndexedTokenSource TokenSource(AllTokens);
 224   do {
 225     DEBUG(llvm::dbgs() << "----\n");
 226     reset();
 227     Tokens = &TokenSource;
 228     TokenSource.reset();
 229
 230     readToken();
 231     parseFile();
 232     // Create line with eof token.
 233     pushToken(FormatTok);
 234     addUnwrappedLine();
 235
 236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 237                                                   E = Lines.end();
 238          I != E; ++I) {
 239       Callback.consumeUnwrappedLine(*I);
 240     }
 241     Callback.finishRun();
 242     Lines.clear();
 243     while (!PPLevelBranchIndex.empty() &&
 244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 247     }
 248     if (!PPLevelBranchIndex.empty()) {
 249       ++PPLevelBranchIndex.back();
 250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 252     }
 253   } while (!PPLevelBranchIndex.empty());
 254 }
 255
 256 void UnwrappedLineParser::parseFile() {
 257   // The top-level context in a file always has declarations, except for pre-
 258   // processor directives and JavaScript files.
 259   bool MustBeDeclaration =
 260       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 261   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 262                                           MustBeDeclaration);
 263   parseLevel(/*HasOpeningBrace=*/false);
 264   // Make sure to format the remaining tokens.
 265   flushComments(true);
 266   addUnwrappedLine();
 267 }
 268
 269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 270   bool SwitchLabelEncountered = false;
 271   do {
 272     switch (FormatTok->Tok.getKind()) {
 273     case tok::comment:
 274       nextToken();
 275       addUnwrappedLine();
 276       break;
 277     case tok::l_brace:
 278       // FIXME: Add parameter whether this can happen - if this happens, we must
 279       // be in a non-declaration context.
 280       parseBlock(/*MustBeDeclaration=*/false);
 281       addUnwrappedLine();
 282       break;
 283     case tok::r_brace:
 284       if (HasOpeningBrace)
 285         return;
 286       nextToken();
 287       addUnwrappedLine();
 288       break;
 289     case tok::kw_default:
 290     case tok::kw_case:
 291       if (!SwitchLabelEncountered &&
 292           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 293         ++Line->Level;
 294       SwitchLabelEncountered = true;
 295       parseStructuralElement();
 296       break;
 297     default:
 298       parseStructuralElement();
 299       break;
 300     }
 301   } while (!eof());
 302 }
 303
 304 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 305   // We'll parse forward through the tokens until we hit
 306   // a closing brace or eof - note that getNextToken() will
 307   // parse macros, so this will magically work inside macro
 308   // definitions, too.
 309   unsigned StoredPosition = Tokens->getPosition();
 310   FormatToken *Tok = FormatTok;
 311   // Keep a stack of positions of lbrace tokens. We will
 312   // update information about whether an lbrace starts a
 313   // braced init list or a different block during the loop.
 314   SmallVector<FormatToken *, 8> LBraceStack;
 315   assert(Tok->Tok.is(tok::l_brace));
 316   do {
 317     // Get next none-comment token.
 318     FormatToken *NextTok;
 319     unsigned ReadTokens = 0;
 320     do {
 321       NextTok = Tokens->getNextToken();
 322       ++ReadTokens;
 323     } while (NextTok->is(tok::comment));
 324
 325     switch (Tok->Tok.getKind()) {
 326     case tok::l_brace:
 327       Tok->BlockKind = BK_Unknown;
 328       LBraceStack.push_back(Tok);
 329       break;
 330     case tok::r_brace:
 331       if (!LBraceStack.empty()) {
 332         if (LBraceStack.back()->BlockKind == BK_Unknown) {
 333           bool ProbablyBracedList = false;
 334           if (Style.Language == FormatStyle::LK_Proto) {
 335             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 336           } else {
 337             // Using OriginalColumn to distinguish between ObjC methods and
 338             // binary operators is a bit hacky.
 339             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 340                                     NextTok->OriginalColumn == 0;
 341
 342             // If there is a comma, semicolon or right paren after the closing
 343             // brace, we assume this is a braced initializer list.  Note that
 344             // regardless how we mark inner braces here, we will overwrite the
 345             // BlockKind later if we parse a braced list (where all blocks
 346             // inside are by default braced lists), or when we explicitly detect
 347             // blocks (for example while parsing lambdas).
 348             //
 349             // We exclude + and - as they can be ObjC visibility modifiers.
 350             ProbablyBracedList =
 351                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 352                                  tok::r_paren, tok::r_square, tok::l_brace,
 353                                  tok::l_paren, tok::ellipsis) ||
 354                 (NextTok->is(tok::semi) &&
 355                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
 356                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 357           }
 358           if (ProbablyBracedList) {
 359             Tok->BlockKind = BK_BracedInit;
 360             LBraceStack.back()->BlockKind = BK_BracedInit;
 361           } else {
 362             Tok->BlockKind = BK_Block;
 363             LBraceStack.back()->BlockKind = BK_Block;
 364           }
 365         }
 366         LBraceStack.pop_back();
 367       }
 368       break;
 369     case tok::at:
 370     case tok::semi:
 371     case tok::kw_if:
 372     case tok::kw_while:
 373     case tok::kw_for:
 374     case tok::kw_switch:
 375     case tok::kw_try:
 376     case tok::kw___try:
 377       if (!LBraceStack.empty())
 378         LBraceStack.back()->BlockKind = BK_Block;
 379       break;
 380     default:
 381       break;
 382     }
 383     Tok = NextTok;
 384   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 385   // Assume other blocks for all unclosed opening braces.
 386   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 387     if (LBraceStack[i]->BlockKind == BK_Unknown)
 388       LBraceStack[i]->BlockKind = BK_Block;
 389   }
 390
 391   FormatTok = Tokens->setPosition(StoredPosition);
 392 }
 393
 394 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 395                                      bool MunchSemi) {
 396   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
 397   unsigned InitialLevel = Line->Level;
 398   nextToken();
 399
 400   addUnwrappedLine();
 401
 402   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 403                                           MustBeDeclaration);
 404   if (AddLevel)
 405     ++Line->Level;
 406   parseLevel(/*HasOpeningBrace=*/true);
 407
 408   if (!FormatTok->Tok.is(tok::r_brace)) {
 409     Line->Level = InitialLevel;
 410     return;
 411   }
 412
 413   nextToken(); // Munch the closing brace.
 414   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 415     nextToken();
 416   Line->Level = InitialLevel;
 417 }
 418
 419 static bool isGoogScope(const UnwrappedLine &Line) {
 420   // FIXME: Closure-library specific stuff should not be hard-coded but be
 421   // configurable.
 422   if (Line.Tokens.size() < 4)
 423     return false;
 424   auto I = Line.Tokens.begin();
 425   if (I->Tok->TokenText != "goog")
 426     return false;
 427   ++I;
 428   if (I->Tok->isNot(tok::period))
 429     return false;
 430   ++I;
 431   if (I->Tok->TokenText != "scope")
 432     return false;
 433   ++I;
 434   return I->Tok->is(tok::l_paren);
 435 }
 436
 437 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 438                                    const FormatToken &InitialToken) {
 439   switch (Style.BreakBeforeBraces) {
 440   case FormatStyle::BS_Linux:
 441     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
 442   case FormatStyle::BS_Allman:
 443   case FormatStyle::BS_GNU:
 444     return true;
 445   default:
 446     return false;
 447   }
 448 }
 449
 450 void UnwrappedLineParser::parseChildBlock() {
 451   FormatTok->BlockKind = BK_Block;
 452   nextToken();
 453   {
 454     bool GoogScope =
 455         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
 456     ScopedLineState LineState(*this);
 457     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 458                                             /*MustBeDeclaration=*/false);
 459     Line->Level += GoogScope ? 0 : 1;
 460     parseLevel(/*HasOpeningBrace=*/true);
 461     flushComments(isOnNewLine(*FormatTok));
 462     Line->Level -= GoogScope ? 0 : 1;
 463   }
 464   nextToken();
 465 }
 466
 467 void UnwrappedLineParser::parsePPDirective() {
 468   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 469   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 470   nextToken();
 471
 472   if (!FormatTok->Tok.getIdentifierInfo()) {
 473     parsePPUnknown();
 474     return;
 475   }
 476
 477   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 478   case tok::pp_define:
 479     parsePPDefine();
 480     return;
 481   case tok::pp_if:
 482     parsePPIf(/*IfDef=*/false);
 483     break;
 484   case tok::pp_ifdef:
 485   case tok::pp_ifndef:
 486     parsePPIf(/*IfDef=*/true);
 487     break;
 488   case tok::pp_else:
 489     parsePPElse();
 490     break;
 491   case tok::pp_elif:
 492     parsePPElIf();
 493     break;
 494   case tok::pp_endif:
 495     parsePPEndIf();
 496     break;
 497   default:
 498     parsePPUnknown();
 499     break;
 500   }
 501 }
 502
 503 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 504   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
 505     PPStack.push_back(PP_Unreachable);
 506   else
 507     PPStack.push_back(PP_Conditional);
 508 }
 509
 510 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 511   ++PPBranchLevel;
 512   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 513   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 514     PPLevelBranchIndex.push_back(0);
 515     PPLevelBranchCount.push_back(0);
 516   }
 517   PPChainBranchIndex.push(0);
 518   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 519   conditionalCompilationCondition(Unreachable || Skip);
 520 }
 521
 522 void UnwrappedLineParser::conditionalCompilationAlternative() {
 523   if (!PPStack.empty())
 524     PPStack.pop_back();
 525   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 526   if (!PPChainBranchIndex.empty())
 527     ++PPChainBranchIndex.top();
 528   conditionalCompilationCondition(
 529       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 530       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 531 }
 532
 533 void UnwrappedLineParser::conditionalCompilationEnd() {
 534   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 535   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 536     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 537       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 538     }
 539   }
 540   // Guard against #endif's without #if.
 541   if (PPBranchLevel > 0)
 542     --PPBranchLevel;
 543   if (!PPChainBranchIndex.empty())
 544     PPChainBranchIndex.pop();
 545   if (!PPStack.empty())
 546     PPStack.pop_back();
 547 }
 548
 549 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 550   nextToken();
 551   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
 552                          FormatTok->Tok.getLiteralData() != nullptr &&
 553                          StringRef(FormatTok->Tok.getLiteralData(),
 554                                    FormatTok->Tok.getLength()) == "0") ||
 555                         FormatTok->Tok.is(tok::kw_false);
 556   conditionalCompilationStart(!IfDef && IsLiteralFalse);
 557   parsePPUnknown();
 558 }
 559
 560 void UnwrappedLineParser::parsePPElse() {
 561   conditionalCompilationAlternative();
 562   parsePPUnknown();
 563 }
 564
 565 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 566
 567 void UnwrappedLineParser::parsePPEndIf() {
 568   conditionalCompilationEnd();
 569   parsePPUnknown();
 570 }
 571
 572 void UnwrappedLineParser::parsePPDefine() {
 573   nextToken();
 574
 575   if (FormatTok->Tok.getKind() != tok::identifier) {
 576     parsePPUnknown();
 577     return;
 578   }
 579   nextToken();
 580   if (FormatTok->Tok.getKind() == tok::l_paren &&
 581       FormatTok->WhitespaceRange.getBegin() ==
 582           FormatTok->WhitespaceRange.getEnd()) {
 583     parseParens();
 584   }
 585   addUnwrappedLine();
 586   Line->Level = 1;
 587
 588   // Errors during a preprocessor directive can only affect the layout of the
 589   // preprocessor directive, and thus we ignore them. An alternative approach
 590   // would be to use the same approach we use on the file level (no
 591   // re-indentation if there was a structural error) within the macro
 592   // definition.
 593   parseFile();
 594 }
 595
 596 void UnwrappedLineParser::parsePPUnknown() {
 597   do {
 598     nextToken();
 599   } while (!eof());
 600   addUnwrappedLine();
 601 }
 602
 603 // Here we blacklist certain tokens that are not usually the first token in an
 604 // unwrapped line. This is used in attempt to distinguish macro calls without
 605 // trailing semicolons from other constructs split to several lines.
 606 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 607   // Semicolon can be a null-statement, l_square can be a start of a macro or
 608   // a C++11 attribute, but this doesn't seem to be common.
 609   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 610          Tok.isNot(tok::l_square) &&
 611          // Tokens that can only be used as binary operators and a part of
 612          // overloaded operator names.
 613          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 614          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 615          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 616          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 617          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 618          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 619          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 620          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 621          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 622          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 623          Tok.isNot(tok::lesslessequal) &&
 624          // Colon is used in labels, base class lists, initializer lists,
 625          // range-based for loops, ternary operator, but should never be the
 626          // first token in an unwrapped line.
 627          Tok.isNot(tok::colon) &&
 628          // 'noexcept' is a trailing annotation.
 629          Tok.isNot(tok::kw_noexcept);
 630 }
 631
 632 void UnwrappedLineParser::parseStructuralElement() {
 633   assert(!FormatTok->Tok.is(tok::l_brace));
 634   switch (FormatTok->Tok.getKind()) {
 635   case tok::at:
 636     nextToken();
 637     if (FormatTok->Tok.is(tok::l_brace)) {
 638       parseBracedList();
 639       break;
 640     }
 641     switch (FormatTok->Tok.getObjCKeywordID()) {
 642     case tok::objc_public:
 643     case tok::objc_protected:
 644     case tok::objc_package:
 645     case tok::objc_private:
 646       return parseAccessSpecifier();
 647     case tok::objc_interface:
 648     case tok::objc_implementation:
 649       return parseObjCInterfaceOrImplementation();
 650     case tok::objc_protocol:
 651       return parseObjCProtocol();
 652     case tok::objc_end:
 653       return; // Handled by the caller.
 654     case tok::objc_optional:
 655     case tok::objc_required:
 656       nextToken();
 657       addUnwrappedLine();
 658       return;
 659     case tok::objc_try:
 660       // This branch isn't strictly necessary (the kw_try case below would
 661       // do this too after the tok::at is parsed above).  But be explicit.
 662       parseTryCatch();
 663       return;
 664     default:
 665       break;
 666     }
 667     break;
 668   case tok::kw_asm:
 669     nextToken();
 670     if (FormatTok->is(tok::l_brace)) {
 671       FormatTok->Type = TT_InlineASMBrace;
 672       nextToken();
 673       while (FormatTok && FormatTok->isNot(tok::eof)) {
 674         if (FormatTok->is(tok::r_brace)) {
 675           FormatTok->Type = TT_InlineASMBrace;
 676           nextToken();
 677           addUnwrappedLine();
 678           break;
 679         }
 680         FormatTok->Finalized = true;
 681         nextToken();
 682       }
 683     }
 684     break;
 685   case tok::kw_namespace:
 686     parseNamespace();
 687     return;
 688   case tok::kw_inline:
 689     nextToken();
 690     if (FormatTok->Tok.is(tok::kw_namespace)) {
 691       parseNamespace();
 692       return;
 693     }
 694     break;
 695   case tok::kw_public:
 696   case tok::kw_protected:
 697   case tok::kw_private:
 698     if (Style.Language == FormatStyle::LK_Java ||
 699         Style.Language == FormatStyle::LK_JavaScript)
 700       nextToken();
 701     else
 702       parseAccessSpecifier();
 703     return;
 704   case tok::kw_if:
 705     parseIfThenElse();
 706     return;
 707   case tok::kw_for:
 708   case tok::kw_while:
 709     parseForOrWhileLoop();
 710     return;
 711   case tok::kw_do:
 712     parseDoWhile();
 713     return;
 714   case tok::kw_switch:
 715     parseSwitch();
 716     return;
 717   case tok::kw_default:
 718     nextToken();
 719     parseLabel();
 720     return;
 721   case tok::kw_case:
 722     parseCaseLabel();
 723     return;
 724   case tok::kw_try:
 725   case tok::kw___try:
 726     parseTryCatch();
 727     return;
 728   case tok::kw_extern:
 729     nextToken();
 730     if (FormatTok->Tok.is(tok::string_literal)) {
 731       nextToken();
 732       if (FormatTok->Tok.is(tok::l_brace)) {
 733         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
 734         addUnwrappedLine();
 735         return;
 736       }
 737     }
 738     break;
 739   case tok::kw_export:
 740     if (Style.Language == FormatStyle::LK_JavaScript) {
 741       parseJavaScriptEs6ImportExport();
 742       return;
 743     }
 744     break;
 745   case tok::identifier:
 746     if (FormatTok->is(TT_ForEachMacro)) {
 747       parseForOrWhileLoop();
 748       return;
 749     }
 750     if (Style.Language == FormatStyle::LK_JavaScript &&
 751         FormatTok->is(Keywords.kw_import)) {
 752       parseJavaScriptEs6ImportExport();
 753       return;
 754     }
 755     if (FormatTok->is(Keywords.kw_signals)) {
 756       nextToken();
 757       if (FormatTok->is(tok::colon)) {
 758         nextToken();
 759         addUnwrappedLine();
 760       }
 761       return;
 762     }
 763     // In all other cases, parse the declaration.
 764     break;
 765   default:
 766     break;
 767   }
 768   do {
 769     switch (FormatTok->Tok.getKind()) {
 770     case tok::at:
 771       nextToken();
 772       if (FormatTok->Tok.is(tok::l_brace))
 773         parseBracedList();
 774       break;
 775     case tok::kw_enum:
 776       // parseEnum falls through and does not yet add an unwrapped line as an
 777       // enum definition can start a structural element.
 778       parseEnum();
 779       // This does not apply for Java and JavaScript.
 780       if (Style.Language == FormatStyle::LK_Java ||
 781           Style.Language == FormatStyle::LK_JavaScript) {
 782         addUnwrappedLine();
 783         return;
 784       }
 785       break;
 786     case tok::kw_typedef:
 787       nextToken();
 788       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
 789                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
 790         parseEnum();
 791       break;
 792     case tok::kw_struct:
 793     case tok::kw_union:
 794     case tok::kw_class:
 795       // parseRecord falls through and does not yet add an unwrapped line as a
 796       // record declaration or definition can start a structural element.
 797       parseRecord();
 798       // This does not apply for Java and JavaScript.
 799       if (Style.Language == FormatStyle::LK_Java ||
 800           Style.Language == FormatStyle::LK_JavaScript) {
 801         addUnwrappedLine();
 802         return;
 803       }
 804       break;
 805     case tok::period:
 806       nextToken();
 807       // In Java, classes have an implicit static member "class".
 808       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
 809           FormatTok->is(tok::kw_class))
 810         nextToken();
 811       break;
 812     case tok::semi:
 813       nextToken();
 814       addUnwrappedLine();
 815       return;
 816     case tok::r_brace:
 817       addUnwrappedLine();
 818       return;
 819     case tok::l_paren:
 820       parseParens();
 821       break;
 822     case tok::caret:
 823       nextToken();
 824       if (FormatTok->Tok.isAnyIdentifier() ||
 825           FormatTok->isSimpleTypeSpecifier())
 826         nextToken();
 827       if (FormatTok->is(tok::l_paren))
 828         parseParens();
 829       if (FormatTok->is(tok::l_brace))
 830         parseChildBlock();
 831       break;
 832     case tok::l_brace:
 833       if (!tryToParseBracedList()) {
 834         // A block outside of parentheses must be the last part of a
 835         // structural element.
 836         // FIXME: Figure out cases where this is not true, and add projections
 837         // for them (the one we know is missing are lambdas).
 838         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
 839           addUnwrappedLine();
 840         FormatTok->Type = TT_FunctionLBrace;
 841         parseBlock(/*MustBeDeclaration=*/false);
 842         addUnwrappedLine();
 843         return;
 844       }
 845       // Otherwise this was a braced init list, and the structural
 846       // element continues.
 847       break;
 848     case tok::kw_try:
 849       // We arrive here when parsing function-try blocks.
 850       parseTryCatch();
 851       return;
 852     case tok::identifier: {
 853       // Parse function literal unless 'function' is the first token in a line
 854       // in which case this should be treated as a free-standing function.
 855       if (Style.Language == FormatStyle::LK_JavaScript &&
 856           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
 857         tryToParseJSFunction();
 858         break;
 859       }
 860       if ((Style.Language == FormatStyle::LK_JavaScript ||
 861            Style.Language == FormatStyle::LK_Java) &&
 862           FormatTok->is(Keywords.kw_interface)) {
 863         parseRecord();
 864         addUnwrappedLine();
 865         break;
 866       }
 867
 868       StringRef Text = FormatTok->TokenText;
 869       nextToken();
 870       if (Line->Tokens.size() == 1 &&
 871           // JS doesn't have macros, and within classes colons indicate fields,
 872           // not labels.
 873           Style.Language != FormatStyle::LK_JavaScript) {
 874         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
 875           parseLabel();
 876           return;
 877         }
 878         // Recognize function-like macro usages without trailing semicolon as
 879         // well as free-standing macros like Q_OBJECT.
 880         bool FunctionLike = FormatTok->is(tok::l_paren);
 881         if (FunctionLike)
 882           parseParens();
 883
 884         bool FollowedByNewline =
 885             CommentsBeforeNextToken.empty()
 886                 ? FormatTok->NewlinesBefore > 0
 887                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
 888
 889         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
 890             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
 891           addUnwrappedLine();
 892           return;
 893         }
 894       }
 895       break;
 896     }
 897     case tok::equal:
 898       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
 899       // TT_JsFatArrow. The always start an expression or a child block if
 900       // followed by a curly.
 901       if (FormatTok->is(TT_JsFatArrow)) {
 902         nextToken();
 903         if (FormatTok->is(tok::l_brace))
 904           parseChildBlock();
 905         break;
 906       }
 907
 908       nextToken();
 909       if (FormatTok->Tok.is(tok::l_brace)) {
 910         parseBracedList();
 911       }
 912       break;
 913     case tok::l_square:
 914       parseSquare();
 915       break;
 916     case tok::kw_new:
 917       parseNew();
 918       break;
 919     default:
 920       nextToken();
 921       break;
 922     }
 923   } while (!eof());
 924 }
 925
 926 bool UnwrappedLineParser::tryToParseLambda() {
 927   if (Style.Language != FormatStyle::LK_Cpp) {
 928     nextToken();
 929     return false;
 930   }
 931   // FIXME: This is a dirty way to access the previous token. Find a better
 932   // solution.
 933   if (!Line->Tokens.empty() &&
 934       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
 935                                         tok::kw_new, tok::kw_delete) ||
 936        Line->Tokens.back().Tok->closesScope() ||
 937        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
 938     nextToken();
 939     return false;
 940   }
 941   assert(FormatTok->is(tok::l_square));
 942   FormatToken &LSquare = *FormatTok;
 943   if (!tryToParseLambdaIntroducer())
 944     return false;
 945
 946   while (FormatTok->isNot(tok::l_brace)) {
 947     if (FormatTok->isSimpleTypeSpecifier()) {
 948       nextToken();
 949       continue;
 950     }
 951     switch (FormatTok->Tok.getKind()) {
 952     case tok::l_brace:
 953       break;
 954     case tok::l_paren:
 955       parseParens();
 956       break;
 957     case tok::amp:
 958     case tok::star:
 959     case tok::kw_const:
 960     case tok::comma:
 961     case tok::less:
 962     case tok::greater:
 963     case tok::identifier:
 964     case tok::coloncolon:
 965     case tok::kw_mutable:
 966       nextToken();
 967       break;
 968     case tok::arrow:
 969       FormatTok->Type = TT_LambdaArrow;
 970       nextToken();
 971       break;
 972     default:
 973       return true;
 974     }
 975   }
 976   LSquare.Type = TT_LambdaLSquare;
 977   parseChildBlock();
 978   return true;
 979 }
 980
 981 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
 982   nextToken();
 983   if (FormatTok->is(tok::equal)) {
 984     nextToken();
 985     if (FormatTok->is(tok::r_square)) {
 986       nextToken();
 987       return true;
 988     }
 989     if (FormatTok->isNot(tok::comma))
 990       return false;
 991     nextToken();
 992   } else if (FormatTok->is(tok::amp)) {
 993     nextToken();
 994     if (FormatTok->is(tok::r_square)) {
 995       nextToken();
 996       return true;
 997     }
 998     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
 999       return false;
1000     }
1001     if (FormatTok->is(tok::comma))
1002       nextToken();
1003   } else if (FormatTok->is(tok::r_square)) {
1004     nextToken();
1005     return true;
1006   }
1007   do {
1008     if (FormatTok->is(tok::amp))
1009       nextToken();
1010     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1011       return false;
1012     nextToken();
1013     if (FormatTok->is(tok::ellipsis))
1014       nextToken();
1015     if (FormatTok->is(tok::comma)) {
1016       nextToken();
1017     } else if (FormatTok->is(tok::r_square)) {
1018       nextToken();
1019       return true;
1020     } else {
1021       return false;
1022     }
1023   } while (!eof());
1024   return false;
1025 }
1026
1027 void UnwrappedLineParser::tryToParseJSFunction() {
1028   nextToken();
1029
1030   // Consume function name.
1031   if (FormatTok->is(tok::identifier))
1032     nextToken();
1033
1034   if (FormatTok->isNot(tok::l_paren))
1035     return;
1036
1037   // Parse formal parameter list.
1038   parseParens();
1039
1040   if (FormatTok->is(tok::colon)) {
1041     // Parse a type definition.
1042     nextToken();
1043
1044     // Eat the type declaration. For braced inline object types, balance braces,
1045     // otherwise just parse until finding an l_brace for the function body.
1046     if (FormatTok->is(tok::l_brace))
1047       tryToParseBracedList();
1048     else
1049       while (FormatTok->isNot(tok::l_brace) && !eof())
1050         nextToken();
1051   }
1052
1053   parseChildBlock();
1054 }
1055
1056 bool UnwrappedLineParser::tryToParseBracedList() {
1057   if (FormatTok->BlockKind == BK_Unknown)
1058     calculateBraceTypes();
1059   assert(FormatTok->BlockKind != BK_Unknown);
1060   if (FormatTok->BlockKind == BK_Block)
1061     return false;
1062   parseBracedList();
1063   return true;
1064 }
1065
1066 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1067   bool HasError = false;
1068   nextToken();
1069
1070   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1071   // replace this by using parseAssigmentExpression() inside.
1072   do {
1073     if (Style.Language == FormatStyle::LK_JavaScript) {
1074       if (FormatTok->is(Keywords.kw_function)) {
1075         tryToParseJSFunction();
1076         continue;
1077       }
1078       if (FormatTok->is(TT_JsFatArrow)) {
1079         nextToken();
1080         // Fat arrows can be followed by simple expressions or by child blocks
1081         // in curly braces.
1082         if (FormatTok->is(tok::l_brace)) {
1083           parseChildBlock();
1084           continue;
1085         }
1086       }
1087     }
1088     switch (FormatTok->Tok.getKind()) {
1089     case tok::caret:
1090       nextToken();
1091       if (FormatTok->is(tok::l_brace)) {
1092         parseChildBlock();
1093       }
1094       break;
1095     case tok::l_square:
1096       tryToParseLambda();
1097       break;
1098     case tok::l_brace:
1099       // Assume there are no blocks inside a braced init list apart
1100       // from the ones we explicitly parse out (like lambdas).
1101       FormatTok->BlockKind = BK_BracedInit;
1102       parseBracedList();
1103       break;
1104     case tok::r_paren:
1105       // JavaScript can just have free standing methods and getters/setters in
1106       // object literals. Detect them by a "{" following ")".
1107       if (Style.Language == FormatStyle::LK_JavaScript) {
1108         nextToken();
1109         if (FormatTok->is(tok::l_brace))
1110           parseChildBlock();
1111         break;
1112       }
1113       nextToken();
1114       break;
1115     case tok::r_brace:
1116       nextToken();
1117       return !HasError;
1118     case tok::semi:
1119       HasError = true;
1120       if (!ContinueOnSemicolons)
1121         return !HasError;
1122       nextToken();
1123       break;
1124     case tok::comma:
1125       nextToken();
1126       break;
1127     default:
1128       nextToken();
1129       break;
1130     }
1131   } while (!eof());
1132   return false;
1133 }
1134
1135 void UnwrappedLineParser::parseParens() {
1136   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1137   nextToken();
1138   do {
1139     switch (FormatTok->Tok.getKind()) {
1140     case tok::l_paren:
1141       parseParens();
1142       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1143         parseChildBlock();
1144       break;
1145     case tok::r_paren:
1146       nextToken();
1147       return;
1148     case tok::r_brace:
1149       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1150       return;
1151     case tok::l_square:
1152       tryToParseLambda();
1153       break;
1154     case tok::l_brace:
1155       if (!tryToParseBracedList())
1156         parseChildBlock();
1157       break;
1158     case tok::at:
1159       nextToken();
1160       if (FormatTok->Tok.is(tok::l_brace))
1161         parseBracedList();
1162       break;
1163     case tok::identifier:
1164       if (Style.Language == FormatStyle::LK_JavaScript &&
1165           FormatTok->is(Keywords.kw_function))
1166         tryToParseJSFunction();
1167       else
1168         nextToken();
1169       break;
1170     default:
1171       nextToken();
1172       break;
1173     }
1174   } while (!eof());
1175 }
1176
1177 void UnwrappedLineParser::parseSquare() {
1178   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1179   if (tryToParseLambda())
1180     return;
1181   do {
1182     switch (FormatTok->Tok.getKind()) {
1183     case tok::l_paren:
1184       parseParens();
1185       break;
1186     case tok::r_square:
1187       nextToken();
1188       return;
1189     case tok::r_brace:
1190       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1191       return;
1192     case tok::l_square:
1193       parseSquare();
1194       break;
1195     case tok::l_brace: {
1196       if (!tryToParseBracedList())
1197         parseChildBlock();
1198       break;
1199     }
1200     case tok::at:
1201       nextToken();
1202       if (FormatTok->Tok.is(tok::l_brace))
1203         parseBracedList();
1204       break;
1205     default:
1206       nextToken();
1207       break;
1208     }
1209   } while (!eof());
1210 }
1211
1212 void UnwrappedLineParser::parseIfThenElse() {
1213   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1214   nextToken();
1215   if (FormatTok->Tok.is(tok::l_paren))
1216     parseParens();
1217   bool NeedsUnwrappedLine = false;
1218   if (FormatTok->Tok.is(tok::l_brace)) {
1219     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1220     parseBlock(/*MustBeDeclaration=*/false);
1221     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1222         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1223       addUnwrappedLine();
1224     } else {
1225       NeedsUnwrappedLine = true;
1226     }
1227   } else {
1228     addUnwrappedLine();
1229     ++Line->Level;
1230     parseStructuralElement();
1231     --Line->Level;
1232   }
1233   if (FormatTok->Tok.is(tok::kw_else)) {
1234     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1235       addUnwrappedLine();
1236     nextToken();
1237     if (FormatTok->Tok.is(tok::l_brace)) {
1238       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1239       parseBlock(/*MustBeDeclaration=*/false);
1240       addUnwrappedLine();
1241     } else if (FormatTok->Tok.is(tok::kw_if)) {
1242       parseIfThenElse();
1243     } else {
1244       addUnwrappedLine();
1245       ++Line->Level;
1246       parseStructuralElement();
1247       --Line->Level;
1248     }
1249   } else if (NeedsUnwrappedLine) {
1250     addUnwrappedLine();
1251   }
1252 }
1253
1254 void UnwrappedLineParser::parseTryCatch() {
1255   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1256   nextToken();
1257   bool NeedsUnwrappedLine = false;
1258   if (FormatTok->is(tok::colon)) {
1259     // We are in a function try block, what comes is an initializer list.
1260     nextToken();
1261     while (FormatTok->is(tok::identifier)) {
1262       nextToken();
1263       if (FormatTok->is(tok::l_paren))
1264         parseParens();
1265       if (FormatTok->is(tok::comma))
1266         nextToken();
1267     }
1268   }
1269   // Parse try with resource.
1270   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1271     parseParens();
1272   }
1273   if (FormatTok->is(tok::l_brace)) {
1274     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1275     parseBlock(/*MustBeDeclaration=*/false);
1276     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1277         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1278         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1279       addUnwrappedLine();
1280     } else {
1281       NeedsUnwrappedLine = true;
1282     }
1283   } else if (!FormatTok->is(tok::kw_catch)) {
1284     // The C++ standard requires a compound-statement after a try.
1285     // If there's none, we try to assume there's a structuralElement
1286     // and try to continue.
1287     addUnwrappedLine();
1288     ++Line->Level;
1289     parseStructuralElement();
1290     --Line->Level;
1291   }
1292   while (1) {
1293     if (FormatTok->is(tok::at))
1294       nextToken();
1295     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1296                              tok::kw___finally) ||
1297           ((Style.Language == FormatStyle::LK_Java ||
1298             Style.Language == FormatStyle::LK_JavaScript) &&
1299            FormatTok->is(Keywords.kw_finally)) ||
1300           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1301            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1302       break;
1303     nextToken();
1304     while (FormatTok->isNot(tok::l_brace)) {
1305       if (FormatTok->is(tok::l_paren)) {
1306         parseParens();
1307         continue;
1308       }
1309       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1310         return;
1311       nextToken();
1312     }
1313     NeedsUnwrappedLine = false;
1314     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1315     parseBlock(/*MustBeDeclaration=*/false);
1316     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1317         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1318         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1319       addUnwrappedLine();
1320     } else {
1321       NeedsUnwrappedLine = true;
1322     }
1323   }
1324   if (NeedsUnwrappedLine) {
1325     addUnwrappedLine();
1326   }
1327 }
1328
1329 void UnwrappedLineParser::parseNamespace() {
1330   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1331
1332   const FormatToken &InitialToken = *FormatTok;
1333   nextToken();
1334   if (FormatTok->Tok.is(tok::identifier))
1335     nextToken();
1336   if (FormatTok->Tok.is(tok::l_brace)) {
1337     if (ShouldBreakBeforeBrace(Style, InitialToken))
1338       addUnwrappedLine();
1339
1340     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1341                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1342                      DeclarationScopeStack.size() > 1);
1343     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1344     // Munch the semicolon after a namespace. This is more common than one would
1345     // think. Puttin the semicolon into its own line is very ugly.
1346     if (FormatTok->Tok.is(tok::semi))
1347       nextToken();
1348     addUnwrappedLine();
1349   }
1350   // FIXME: Add error handling.
1351 }
1352
1353 void UnwrappedLineParser::parseNew() {
1354   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1355   nextToken();
1356   if (Style.Language != FormatStyle::LK_Java)
1357     return;
1358
1359   // In Java, we can parse everything up to the parens, which aren't optional.
1360   do {
1361     // There should not be a ;, { or } before the new's open paren.
1362     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1363       return;
1364
1365     // Consume the parens.
1366     if (FormatTok->is(tok::l_paren)) {
1367       parseParens();
1368
1369       // If there is a class body of an anonymous class, consume that as child.
1370       if (FormatTok->is(tok::l_brace))
1371         parseChildBlock();
1372       return;
1373     }
1374     nextToken();
1375   } while (!eof());
1376 }
1377
1378 void UnwrappedLineParser::parseForOrWhileLoop() {
1379   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1380          "'for', 'while' or foreach macro expected");
1381   nextToken();
1382   if (FormatTok->Tok.is(tok::l_paren))
1383     parseParens();
1384   if (FormatTok->Tok.is(tok::l_brace)) {
1385     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1386     parseBlock(/*MustBeDeclaration=*/false);
1387     addUnwrappedLine();
1388   } else {
1389     addUnwrappedLine();
1390     ++Line->Level;
1391     parseStructuralElement();
1392     --Line->Level;
1393   }
1394 }
1395
1396 void UnwrappedLineParser::parseDoWhile() {
1397   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1398   nextToken();
1399   if (FormatTok->Tok.is(tok::l_brace)) {
1400     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1401     parseBlock(/*MustBeDeclaration=*/false);
1402     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1403       addUnwrappedLine();
1404   } else {
1405     addUnwrappedLine();
1406     ++Line->Level;
1407     parseStructuralElement();
1408     --Line->Level;
1409   }
1410
1411   // FIXME: Add error handling.
1412   if (!FormatTok->Tok.is(tok::kw_while)) {
1413     addUnwrappedLine();
1414     return;
1415   }
1416
1417   nextToken();
1418   parseStructuralElement();
1419 }
1420
1421 void UnwrappedLineParser::parseLabel() {
1422   nextToken();
1423   unsigned OldLineLevel = Line->Level;
1424   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1425     --Line->Level;
1426   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1427     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1428     parseBlock(/*MustBeDeclaration=*/false);
1429     if (FormatTok->Tok.is(tok::kw_break)) {
1430       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1431       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1432           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1433         addUnwrappedLine();
1434       }
1435       parseStructuralElement();
1436     }
1437     addUnwrappedLine();
1438   } else {
1439     if (FormatTok->is(tok::semi))
1440       nextToken();
1441     addUnwrappedLine();
1442   }
1443   Line->Level = OldLineLevel;
1444 }
1445
1446 void UnwrappedLineParser::parseCaseLabel() {
1447   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1448   // FIXME: fix handling of complex expressions here.
1449   do {
1450     nextToken();
1451   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1452   parseLabel();
1453 }
1454
1455 void UnwrappedLineParser::parseSwitch() {
1456   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1457   nextToken();
1458   if (FormatTok->Tok.is(tok::l_paren))
1459     parseParens();
1460   if (FormatTok->Tok.is(tok::l_brace)) {
1461     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1462     parseBlock(/*MustBeDeclaration=*/false);
1463     addUnwrappedLine();
1464   } else {
1465     addUnwrappedLine();
1466     ++Line->Level;
1467     parseStructuralElement();
1468     --Line->Level;
1469   }
1470 }
1471
1472 void UnwrappedLineParser::parseAccessSpecifier() {
1473   nextToken();
1474   // Understand Qt's slots.
1475   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1476     nextToken();
1477   // Otherwise, we don't know what it is, and we'd better keep the next token.
1478   if (FormatTok->Tok.is(tok::colon))
1479     nextToken();
1480   addUnwrappedLine();
1481 }
1482
1483 void UnwrappedLineParser::parseEnum() {
1484   // Won't be 'enum' for NS_ENUMs.
1485   if (FormatTok->Tok.is(tok::kw_enum))
1486     nextToken();
1487
1488   // Eat up enum class ...
1489   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1490     nextToken();
1491
1492   while (FormatTok->Tok.getIdentifierInfo() ||
1493          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1494                             tok::greater, tok::comma, tok::question)) {
1495     nextToken();
1496     // We can have macros or attributes in between 'enum' and the enum name.
1497     if (FormatTok->is(tok::l_paren))
1498       parseParens();
1499     if (FormatTok->is(tok::identifier)) {
1500       nextToken();
1501       // If there are two identifiers in a row, this is likely an elaborate
1502       // return type. In Java, this can be "implements", etc.
1503       if (Style.Language == FormatStyle::LK_Cpp &&
1504           FormatTok->is(tok::identifier))
1505         return;
1506     }
1507   }
1508
1509   // Just a declaration or something is wrong.
1510   if (FormatTok->isNot(tok::l_brace))
1511     return;
1512   FormatTok->BlockKind = BK_Block;
1513
1514   if (Style.Language == FormatStyle::LK_Java) {
1515     // Java enums are different.
1516     parseJavaEnumBody();
1517     return;
1518   }
1519
1520   // Parse enum body.
1521   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1522   if (HasError) {
1523     if (FormatTok->is(tok::semi))
1524       nextToken();
1525     addUnwrappedLine();
1526   }
1527
1528   // There is no addUnwrappedLine() here so that we fall through to parsing a
1529   // structural element afterwards. Thus, in "enum A {} n, m;",
1530   // "} n, m;" will end up in one unwrapped line.
1531 }
1532
1533 void UnwrappedLineParser::parseJavaEnumBody() {
1534   // Determine whether the enum is simple, i.e. does not have a semicolon or
1535   // constants with class bodies. Simple enums can be formatted like braced
1536   // lists, contracted to a single line, etc.
1537   unsigned StoredPosition = Tokens->getPosition();
1538   bool IsSimple = true;
1539   FormatToken *Tok = Tokens->getNextToken();
1540   while (Tok) {
1541     if (Tok->is(tok::r_brace))
1542       break;
1543     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1544       IsSimple = false;
1545       break;
1546     }
1547     // FIXME: This will also mark enums with braces in the arguments to enum
1548     // constants as "not simple". This is probably fine in practice, though.
1549     Tok = Tokens->getNextToken();
1550   }
1551   FormatTok = Tokens->setPosition(StoredPosition);
1552
1553   if (IsSimple) {
1554     parseBracedList();
1555     addUnwrappedLine();
1556     return;
1557   }
1558
1559   // Parse the body of a more complex enum.
1560   // First add a line for everything up to the "{".
1561   nextToken();
1562   addUnwrappedLine();
1563   ++Line->Level;
1564
1565   // Parse the enum constants.
1566   while (FormatTok) {
1567     if (FormatTok->is(tok::l_brace)) {
1568       // Parse the constant's class body.
1569       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1570                  /*MunchSemi=*/false);
1571     } else if (FormatTok->is(tok::l_paren)) {
1572       parseParens();
1573     } else if (FormatTok->is(tok::comma)) {
1574       nextToken();
1575       addUnwrappedLine();
1576     } else if (FormatTok->is(tok::semi)) {
1577       nextToken();
1578       addUnwrappedLine();
1579       break;
1580     } else if (FormatTok->is(tok::r_brace)) {
1581       addUnwrappedLine();
1582       break;
1583     } else {
1584       nextToken();
1585     }
1586   }
1587
1588   // Parse the class body after the enum's ";" if any.
1589   parseLevel(/*HasOpeningBrace=*/true);
1590   nextToken();
1591   --Line->Level;
1592   addUnwrappedLine();
1593 }
1594
1595 void UnwrappedLineParser::parseRecord() {
1596   const FormatToken &InitialToken = *FormatTok;
1597   nextToken();
1598
1599   // The actual identifier can be a nested name specifier, and in macros
1600   // it is often token-pasted.
1601   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1602                             tok::kw___attribute, tok::kw___declspec,
1603                             tok::kw_alignas) ||
1604          ((Style.Language == FormatStyle::LK_Java ||
1605            Style.Language == FormatStyle::LK_JavaScript) &&
1606           FormatTok->isOneOf(tok::period, tok::comma))) {
1607     bool IsNonMacroIdentifier =
1608         FormatTok->is(tok::identifier) &&
1609         FormatTok->TokenText != FormatTok->TokenText.upper();
1610     nextToken();
1611     // We can have macros or attributes in between 'class' and the class name.
1612     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1613       parseParens();
1614   }
1615
1616   // Note that parsing away template declarations here leads to incorrectly
1617   // accepting function declarations as record declarations.
1618   // In general, we cannot solve this problem. Consider:
1619   // class A<int> B() {}
1620   // which can be a function definition or a class definition when B() is a
1621   // macro. If we find enough real-world cases where this is a problem, we
1622   // can parse for the 'template' keyword in the beginning of the statement,
1623   // and thus rule out the record production in case there is no template
1624   // (this would still leave us with an ambiguity between template function
1625   // and class declarations).
1626   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1627     while (!eof()) {
1628       if (FormatTok->is(tok::l_brace)) {
1629         calculateBraceTypes(/*ExpectClassBody=*/true);
1630         if (!tryToParseBracedList())
1631           break;
1632       }
1633       if (FormatTok->Tok.is(tok::semi))
1634         return;
1635       nextToken();
1636     }
1637   }
1638   if (FormatTok->Tok.is(tok::l_brace)) {
1639     if (ShouldBreakBeforeBrace(Style, InitialToken))
1640       addUnwrappedLine();
1641
1642     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1643                /*MunchSemi=*/false);
1644   }
1645   // There is no addUnwrappedLine() here so that we fall through to parsing a
1646   // structural element afterwards. Thus, in "class A {} n, m;",
1647   // "} n, m;" will end up in one unwrapped line.
1648 }
1649
1650 void UnwrappedLineParser::parseObjCProtocolList() {
1651   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1652   do
1653     nextToken();
1654   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1655   nextToken(); // Skip '>'.
1656 }
1657
1658 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1659   do {
1660     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1661       nextToken();
1662       addUnwrappedLine();
1663       break;
1664     }
1665     if (FormatTok->is(tok::l_brace)) {
1666       parseBlock(/*MustBeDeclaration=*/false);
1667       // In ObjC interfaces, nothing should be following the "}".
1668       addUnwrappedLine();
1669     } else if (FormatTok->is(tok::r_brace)) {
1670       // Ignore stray "}". parseStructuralElement doesn't consume them.
1671       nextToken();
1672       addUnwrappedLine();
1673     } else {
1674       parseStructuralElement();
1675     }
1676   } while (!eof());
1677 }
1678
1679 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1680   nextToken();
1681   nextToken(); // interface name
1682
1683   // @interface can be followed by either a base class, or a category.
1684   if (FormatTok->Tok.is(tok::colon)) {
1685     nextToken();
1686     nextToken(); // base class name
1687   } else if (FormatTok->Tok.is(tok::l_paren))
1688     // Skip category, if present.
1689     parseParens();
1690
1691   if (FormatTok->Tok.is(tok::less))
1692     parseObjCProtocolList();
1693
1694   if (FormatTok->Tok.is(tok::l_brace)) {
1695     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1696         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1697       addUnwrappedLine();
1698     parseBlock(/*MustBeDeclaration=*/true);
1699   }
1700
1701   // With instance variables, this puts '}' on its own line.  Without instance
1702   // variables, this ends the @interface line.
1703   addUnwrappedLine();
1704
1705   parseObjCUntilAtEnd();
1706 }
1707
1708 void UnwrappedLineParser::parseObjCProtocol() {
1709   nextToken();
1710   nextToken(); // protocol name
1711
1712   if (FormatTok->Tok.is(tok::less))
1713     parseObjCProtocolList();
1714
1715   // Check for protocol declaration.
1716   if (FormatTok->Tok.is(tok::semi)) {
1717     nextToken();
1718     return addUnwrappedLine();
1719   }
1720
1721   addUnwrappedLine();
1722   parseObjCUntilAtEnd();
1723 }
1724
1725 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1726   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1727   nextToken();
1728
1729   // Consume the "default" in "export default class/function".
1730   if (FormatTok->is(tok::kw_default))
1731     nextToken();
1732
1733   // Consume "function" and "default function", so that these get parsed as
1734   // free-standing JS functions, i.e. do not require a trailing semicolon.
1735   if (FormatTok->is(Keywords.kw_function)) {
1736     nextToken();
1737     return;
1738   }
1739
1740   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1741                          Keywords.kw_var))
1742     return; // Fall through to parsing the corresponding structure.
1743
1744   if (FormatTok->is(tok::l_brace)) {
1745     FormatTok->BlockKind = BK_Block;
1746     parseBracedList();
1747   }
1748
1749   while (!eof() && FormatTok->isNot(tok::semi) &&
1750          FormatTok->isNot(tok::l_brace)) {
1751     nextToken();
1752   }
1753 }
1754
1755 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1756                                                  StringRef Prefix = "") {
1757   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1758                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1759   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1760                                                     E = Line.Tokens.end();
1761        I != E; ++I) {
1762     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1763   }
1764   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1765                                                     E = Line.Tokens.end();
1766        I != E; ++I) {
1767     const UnwrappedLineNode &Node = *I;
1768     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1769              I = Node.Children.begin(),
1770              E = Node.Children.end();
1771          I != E; ++I) {
1772       printDebugInfo(*I, "\nChild: ");
1773     }
1774   }
1775   llvm::dbgs() << "\n";
1776 }
1777
1778 void UnwrappedLineParser::addUnwrappedLine() {
1779   if (Line->Tokens.empty())
1780     return;
1781   DEBUG({
1782     if (CurrentLines == &Lines)
1783       printDebugInfo(*Line);
1784   });
1785   CurrentLines->push_back(std::move(*Line));
1786   Line->Tokens.clear();
1787   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1788     CurrentLines->append(
1789         std::make_move_iterator(PreprocessorDirectives.begin()),
1790         std::make_move_iterator(PreprocessorDirectives.end()));
1791     PreprocessorDirectives.clear();
1792   }
1793 }
1794
1795 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1796
1797 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1798   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1799          FormatTok.NewlinesBefore > 0;
1800 }
1801
1802 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1803   bool JustComments = Line->Tokens.empty();
1804   for (SmallVectorImpl<FormatToken *>::const_iterator
1805            I = CommentsBeforeNextToken.begin(),
1806            E = CommentsBeforeNextToken.end();
1807        I != E; ++I) {
1808     if (isOnNewLine(**I) && JustComments)
1809       addUnwrappedLine();
1810     pushToken(*I);
1811   }
1812   if (NewlineBeforeNext && JustComments)
1813     addUnwrappedLine();
1814   CommentsBeforeNextToken.clear();
1815 }
1816
1817 void UnwrappedLineParser::nextToken() {
1818   if (eof())
1819     return;
1820   flushComments(isOnNewLine(*FormatTok));
1821   pushToken(FormatTok);
1822   readToken();
1823 }
1824
1825 void UnwrappedLineParser::readToken() {
1826   bool CommentsInCurrentLine = true;
1827   do {
1828     FormatTok = Tokens->getNextToken();
1829     assert(FormatTok);
1830     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1831            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1832       // If there is an unfinished unwrapped line, we flush the preprocessor
1833       // directives only after that unwrapped line was finished later.
1834       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1835       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1836       // Comments stored before the preprocessor directive need to be output
1837       // before the preprocessor directive, at the same level as the
1838       // preprocessor directive, as we consider them to apply to the directive.
1839       flushComments(isOnNewLine(*FormatTok));
1840       parsePPDirective();
1841     }
1842     while (FormatTok->Type == TT_ConflictStart ||
1843            FormatTok->Type == TT_ConflictEnd ||
1844            FormatTok->Type == TT_ConflictAlternative) {
1845       if (FormatTok->Type == TT_ConflictStart) {
1846         conditionalCompilationStart(/*Unreachable=*/false);
1847       } else if (FormatTok->Type == TT_ConflictAlternative) {
1848         conditionalCompilationAlternative();
1849       } else if (FormatTok->Type == TT_ConflictEnd) {
1850         conditionalCompilationEnd();
1851       }
1852       FormatTok = Tokens->getNextToken();
1853       FormatTok->MustBreakBefore = true;
1854     }
1855
1856     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1857         !Line->InPPDirective) {
1858       continue;
1859     }
1860
1861     if (!FormatTok->Tok.is(tok::comment))
1862       return;
1863     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1864       CommentsInCurrentLine = false;
1865     }
1866     if (CommentsInCurrentLine) {
1867       pushToken(FormatTok);
1868     } else {
1869       CommentsBeforeNextToken.push_back(FormatTok);
1870     }
1871   } while (!eof());
1872 }
1873
1874 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1875   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1876   if (MustBreakBeforeNextToken) {
1877     Line->Tokens.back().Tok->MustBreakBefore = true;
1878     MustBreakBeforeNextToken = false;
1879   }
1880 }
1881
1882 } // end namespace format
1883 } // end namespace clang