contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp

   1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file contains the implementation of the UnwrappedLineParser,
  12 /// which turns a stream of tokens into UnwrappedLines.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "UnwrappedLineParser.h"
  17 #include "llvm/ADT/STLExtras.h"
  18 #include "llvm/Support/Debug.h"
  19 #include "llvm/Support/raw_ostream.h"
  20
  21 #define DEBUG_TYPE "format-parser"
  22
  23 namespace clang {
  24 namespace format {
  25
  26 class FormatTokenSource {
  27 public:
  28   virtual ~FormatTokenSource() {}
  29   virtual FormatToken *getNextToken() = 0;
  30
  31   virtual unsigned getPosition() = 0;
  32   virtual FormatToken *setPosition(unsigned Position) = 0;
  33 };
  34
  35 namespace {
  36
  37 class ScopedDeclarationState {
  38 public:
  39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
  40                          bool MustBeDeclaration)
  41       : Line(Line), Stack(Stack) {
  42     Line.MustBeDeclaration = MustBeDeclaration;
  43     Stack.push_back(MustBeDeclaration);
  44   }
  45   ~ScopedDeclarationState() {
  46     Stack.pop_back();
  47     if (!Stack.empty())
  48       Line.MustBeDeclaration = Stack.back();
  49     else
  50       Line.MustBeDeclaration = true;
  51   }
  52
  53 private:
  54   UnwrappedLine &Line;
  55   std::vector<bool> &Stack;
  56 };
  57
  58 class ScopedMacroState : public FormatTokenSource {
  59 public:
  60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
  61                    FormatToken *&ResetToken)
  62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
  63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
  64         Token(nullptr) {
  65     TokenSource = this;
  66     Line.Level = 0;
  67     Line.InPPDirective = true;
  68   }
  69
  70   ~ScopedMacroState() override {
  71     TokenSource = PreviousTokenSource;
  72     ResetToken = Token;
  73     Line.InPPDirective = false;
  74     Line.Level = PreviousLineLevel;
  75   }
  76
  77   FormatToken *getNextToken() override {
  78     // The \c UnwrappedLineParser guards against this by never calling
  79     // \c getNextToken() after it has encountered the first eof token.
  80     assert(!eof());
  81     Token = PreviousTokenSource->getNextToken();
  82     if (eof())
  83       return getFakeEOF();
  84     return Token;
  85   }
  86
  87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
  88
  89   FormatToken *setPosition(unsigned Position) override {
  90     Token = PreviousTokenSource->setPosition(Position);
  91     return Token;
  92   }
  93
  94 private:
  95   bool eof() { return Token && Token->HasUnescapedNewline; }
  96
  97   FormatToken *getFakeEOF() {
  98     static bool EOFInitialized = false;
  99     static FormatToken FormatTok;
 100     if (!EOFInitialized) {
 101       FormatTok.Tok.startToken();
 102       FormatTok.Tok.setKind(tok::eof);
 103       EOFInitialized = true;
 104     }
 105     return &FormatTok;
 106   }
 107
 108   UnwrappedLine &Line;
 109   FormatTokenSource *&TokenSource;
 110   FormatToken *&ResetToken;
 111   unsigned PreviousLineLevel;
 112   FormatTokenSource *PreviousTokenSource;
 113
 114   FormatToken *Token;
 115 };
 116
 117 } // end anonymous namespace
 118
 119 class ScopedLineState {
 120 public:
 121   ScopedLineState(UnwrappedLineParser &Parser,
 122                   bool SwitchToPreprocessorLines = false)
 123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
 124     if (SwitchToPreprocessorLines)
 125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
 126     else if (!Parser.Line->Tokens.empty())
 127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
 128     PreBlockLine = std::move(Parser.Line);
 129     Parser.Line = llvm::make_unique<UnwrappedLine>();
 130     Parser.Line->Level = PreBlockLine->Level;
 131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
 132   }
 133
 134   ~ScopedLineState() {
 135     if (!Parser.Line->Tokens.empty()) {
 136       Parser.addUnwrappedLine();
 137     }
 138     assert(Parser.Line->Tokens.empty());
 139     Parser.Line = std::move(PreBlockLine);
 140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
 141       Parser.MustBreakBeforeNextToken = true;
 142     Parser.CurrentLines = OriginalLines;
 143   }
 144
 145 private:
 146   UnwrappedLineParser &Parser;
 147
 148   std::unique_ptr<UnwrappedLine> PreBlockLine;
 149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
 150 };
 151
 152 class CompoundStatementIndenter {
 153 public:
 154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
 155                             const FormatStyle &Style, unsigned &LineLevel)
 156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
 157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
 158       Parser->addUnwrappedLine();
 159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
 160       Parser->addUnwrappedLine();
 161       ++LineLevel;
 162     }
 163   }
 164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 165
 166 private:
 167   unsigned &LineLevel;
 168   unsigned OldLineLevel;
 169 };
 170
 171 namespace {
 172
 173 class IndexedTokenSource : public FormatTokenSource {
 174 public:
 175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
 176       : Tokens(Tokens), Position(-1) {}
 177
 178   FormatToken *getNextToken() override {
 179     ++Position;
 180     return Tokens[Position];
 181   }
 182
 183   unsigned getPosition() override {
 184     assert(Position >= 0);
 185     return Position;
 186   }
 187
 188   FormatToken *setPosition(unsigned P) override {
 189     Position = P;
 190     return Tokens[Position];
 191   }
 192
 193   void reset() { Position = -1; }
 194
 195 private:
 196   ArrayRef<FormatToken *> Tokens;
 197   int Position;
 198 };
 199
 200 } // end anonymous namespace
 201
 202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
 203                                          const AdditionalKeywords &Keywords,
 204                                          ArrayRef<FormatToken *> Tokens,
 205                                          UnwrappedLineConsumer &Callback)
 206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
 207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
 208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 209
 210 void UnwrappedLineParser::reset() {
 211   PPBranchLevel = -1;
 212   Line.reset(new UnwrappedLine);
 213   CommentsBeforeNextToken.clear();
 214   FormatTok = nullptr;
 215   MustBreakBeforeNextToken = false;
 216   PreprocessorDirectives.clear();
 217   CurrentLines = &Lines;
 218   DeclarationScopeStack.clear();
 219   PPStack.clear();
 220 }
 221
 222 void UnwrappedLineParser::parse() {
 223   IndexedTokenSource TokenSource(AllTokens);
 224   do {
 225     DEBUG(llvm::dbgs() << "----\n");
 226     reset();
 227     Tokens = &TokenSource;
 228     TokenSource.reset();
 229
 230     readToken();
 231     parseFile();
 232     // Create line with eof token.
 233     pushToken(FormatTok);
 234     addUnwrappedLine();
 235
 236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
 237                                                   E = Lines.end();
 238          I != E; ++I) {
 239       Callback.consumeUnwrappedLine(*I);
 240     }
 241     Callback.finishRun();
 242     Lines.clear();
 243     while (!PPLevelBranchIndex.empty() &&
 244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
 245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
 246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
 247     }
 248     if (!PPLevelBranchIndex.empty()) {
 249       ++PPLevelBranchIndex.back();
 250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
 251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
 252     }
 253   } while (!PPLevelBranchIndex.empty());
 254 }
 255
 256 void UnwrappedLineParser::parseFile() {
 257   // The top-level context in a file always has declarations, except for pre-
 258   // processor directives and JavaScript files.
 259   bool MustBeDeclaration =
 260       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
 261   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 262                                           MustBeDeclaration);
 263   parseLevel(/*HasOpeningBrace=*/false);
 264   // Make sure to format the remaining tokens.
 265   flushComments(true);
 266   addUnwrappedLine();
 267 }
 268
 269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
 270   bool SwitchLabelEncountered = false;
 271   do {
 272     switch (FormatTok->Tok.getKind()) {
 273     case tok::comment:
 274       nextToken();
 275       addUnwrappedLine();
 276       break;
 277     case tok::l_brace:
 278       // FIXME: Add parameter whether this can happen - if this happens, we must
 279       // be in a non-declaration context.
 280       parseBlock(/*MustBeDeclaration=*/false);
 281       addUnwrappedLine();
 282       break;
 283     case tok::r_brace:
 284       if (HasOpeningBrace)
 285         return;
 286       nextToken();
 287       addUnwrappedLine();
 288       break;
 289     case tok::kw_default:
 290     case tok::kw_case:
 291       if (!SwitchLabelEncountered &&
 292           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
 293         ++Line->Level;
 294       SwitchLabelEncountered = true;
 295       parseStructuralElement();
 296       break;
 297     default:
 298       parseStructuralElement();
 299       break;
 300     }
 301   } while (!eof());
 302 }
 303
 304 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
 305   // We'll parse forward through the tokens until we hit
 306   // a closing brace or eof - note that getNextToken() will
 307   // parse macros, so this will magically work inside macro
 308   // definitions, too.
 309   unsigned StoredPosition = Tokens->getPosition();
 310   FormatToken *Tok = FormatTok;
 311   // Keep a stack of positions of lbrace tokens. We will
 312   // update information about whether an lbrace starts a
 313   // braced init list or a different block during the loop.
 314   SmallVector<FormatToken *, 8> LBraceStack;
 315   assert(Tok->Tok.is(tok::l_brace));
 316   do {
 317     // Get next none-comment token.
 318     FormatToken *NextTok;
 319     unsigned ReadTokens = 0;
 320     do {
 321       NextTok = Tokens->getNextToken();
 322       ++ReadTokens;
 323     } while (NextTok->is(tok::comment));
 324
 325     switch (Tok->Tok.getKind()) {
 326     case tok::l_brace:
 327       Tok->BlockKind = BK_Unknown;
 328       LBraceStack.push_back(Tok);
 329       break;
 330     case tok::r_brace:
 331       if (!LBraceStack.empty()) {
 332         if (LBraceStack.back()->BlockKind == BK_Unknown) {
 333           bool ProbablyBracedList = false;
 334           if (Style.Language == FormatStyle::LK_Proto) {
 335             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
 336           } else {
 337             // Using OriginalColumn to distinguish between ObjC methods and
 338             // binary operators is a bit hacky.
 339             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
 340                                     NextTok->OriginalColumn == 0;
 341
 342             // If there is a comma, semicolon or right paren after the closing
 343             // brace, we assume this is a braced initializer list.  Note that
 344             // regardless how we mark inner braces here, we will overwrite the
 345             // BlockKind later if we parse a braced list (where all blocks
 346             // inside are by default braced lists), or when we explicitly detect
 347             // blocks (for example while parsing lambdas).
 348             //
 349             // We exclude + and - as they can be ObjC visibility modifiers.
 350             ProbablyBracedList =
 351                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
 352                                  tok::r_paren, tok::r_square, tok::l_brace,
 353                                  tok::l_paren, tok::ellipsis) ||
 354                 (NextTok->is(tok::semi) &&
 355                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
 356                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
 357           }
 358           if (ProbablyBracedList) {
 359             Tok->BlockKind = BK_BracedInit;
 360             LBraceStack.back()->BlockKind = BK_BracedInit;
 361           } else {
 362             Tok->BlockKind = BK_Block;
 363             LBraceStack.back()->BlockKind = BK_Block;
 364           }
 365         }
 366         LBraceStack.pop_back();
 367       }
 368       break;
 369     case tok::at:
 370     case tok::semi:
 371     case tok::kw_if:
 372     case tok::kw_while:
 373     case tok::kw_for:
 374     case tok::kw_switch:
 375     case tok::kw_try:
 376     case tok::kw___try:
 377       if (!LBraceStack.empty())
 378         LBraceStack.back()->BlockKind = BK_Block;
 379       break;
 380     default:
 381       break;
 382     }
 383     Tok = NextTok;
 384   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
 385   // Assume other blocks for all unclosed opening braces.
 386   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
 387     if (LBraceStack[i]->BlockKind == BK_Unknown)
 388       LBraceStack[i]->BlockKind = BK_Block;
 389   }
 390
 391   FormatTok = Tokens->setPosition(StoredPosition);
 392 }
 393
 394 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
 395                                      bool MunchSemi) {
 396   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
 397   unsigned InitialLevel = Line->Level;
 398   nextToken();
 399
 400   addUnwrappedLine();
 401
 402   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 403                                           MustBeDeclaration);
 404   if (AddLevel)
 405     ++Line->Level;
 406   parseLevel(/*HasOpeningBrace=*/true);
 407
 408   if (!FormatTok->Tok.is(tok::r_brace)) {
 409     Line->Level = InitialLevel;
 410     return;
 411   }
 412
 413   nextToken(); // Munch the closing brace.
 414   if (MunchSemi && FormatTok->Tok.is(tok::semi))
 415     nextToken();
 416   Line->Level = InitialLevel;
 417 }
 418
 419 static bool isGoogScope(const UnwrappedLine &Line) {
 420   // FIXME: Closure-library specific stuff should not be hard-coded but be
 421   // configurable.
 422   if (Line.Tokens.size() < 4)
 423     return false;
 424   auto I = Line.Tokens.begin();
 425   if (I->Tok->TokenText != "goog")
 426     return false;
 427   ++I;
 428   if (I->Tok->isNot(tok::period))
 429     return false;
 430   ++I;
 431   if (I->Tok->TokenText != "scope")
 432     return false;
 433   ++I;
 434   return I->Tok->is(tok::l_paren);
 435 }
 436
 437 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
 438                                    const FormatToken &InitialToken) {
 439   switch (Style.BreakBeforeBraces) {
 440   case FormatStyle::BS_Linux:
 441     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
 442   case FormatStyle::BS_Allman:
 443   case FormatStyle::BS_GNU:
 444     return true;
 445   default:
 446     return false;
 447   }
 448 }
 449
 450 void UnwrappedLineParser::parseChildBlock() {
 451   FormatTok->BlockKind = BK_Block;
 452   nextToken();
 453   {
 454     bool GoogScope =
 455         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
 456     ScopedLineState LineState(*this);
 457     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
 458                                             /*MustBeDeclaration=*/false);
 459     Line->Level += GoogScope ? 0 : 1;
 460     parseLevel(/*HasOpeningBrace=*/true);
 461     flushComments(isOnNewLine(*FormatTok));
 462     Line->Level -= GoogScope ? 0 : 1;
 463   }
 464   nextToken();
 465 }
 466
 467 void UnwrappedLineParser::parsePPDirective() {
 468   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
 469   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
 470   nextToken();
 471
 472   if (!FormatTok->Tok.getIdentifierInfo()) {
 473     parsePPUnknown();
 474     return;
 475   }
 476
 477   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
 478   case tok::pp_define:
 479     parsePPDefine();
 480     return;
 481   case tok::pp_if:
 482     parsePPIf(/*IfDef=*/false);
 483     break;
 484   case tok::pp_ifdef:
 485   case tok::pp_ifndef:
 486     parsePPIf(/*IfDef=*/true);
 487     break;
 488   case tok::pp_else:
 489     parsePPElse();
 490     break;
 491   case tok::pp_elif:
 492     parsePPElIf();
 493     break;
 494   case tok::pp_endif:
 495     parsePPEndIf();
 496     break;
 497   default:
 498     parsePPUnknown();
 499     break;
 500   }
 501 }
 502
 503 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
 504   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
 505     PPStack.push_back(PP_Unreachable);
 506   else
 507     PPStack.push_back(PP_Conditional);
 508 }
 509
 510 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
 511   ++PPBranchLevel;
 512   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
 513   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
 514     PPLevelBranchIndex.push_back(0);
 515     PPLevelBranchCount.push_back(0);
 516   }
 517   PPChainBranchIndex.push(0);
 518   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
 519   conditionalCompilationCondition(Unreachable || Skip);
 520 }
 521
 522 void UnwrappedLineParser::conditionalCompilationAlternative() {
 523   if (!PPStack.empty())
 524     PPStack.pop_back();
 525   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 526   if (!PPChainBranchIndex.empty())
 527     ++PPChainBranchIndex.top();
 528   conditionalCompilationCondition(
 529       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
 530       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
 531 }
 532
 533 void UnwrappedLineParser::conditionalCompilationEnd() {
 534   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
 535   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
 536     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
 537       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
 538     }
 539   }
 540   // Guard against #endif's without #if.
 541   if (PPBranchLevel > 0)
 542     --PPBranchLevel;
 543   if (!PPChainBranchIndex.empty())
 544     PPChainBranchIndex.pop();
 545   if (!PPStack.empty())
 546     PPStack.pop_back();
 547 }
 548
 549 void UnwrappedLineParser::parsePPIf(bool IfDef) {
 550   nextToken();
 551   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
 552                          FormatTok->Tok.getLiteralData() != nullptr &&
 553                          StringRef(FormatTok->Tok.getLiteralData(),
 554                                    FormatTok->Tok.getLength()) == "0") ||
 555                         FormatTok->Tok.is(tok::kw_false);
 556   conditionalCompilationStart(!IfDef && IsLiteralFalse);
 557   parsePPUnknown();
 558 }
 559
 560 void UnwrappedLineParser::parsePPElse() {
 561   conditionalCompilationAlternative();
 562   parsePPUnknown();
 563 }
 564
 565 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
 566
 567 void UnwrappedLineParser::parsePPEndIf() {
 568   conditionalCompilationEnd();
 569   parsePPUnknown();
 570 }
 571
 572 void UnwrappedLineParser::parsePPDefine() {
 573   nextToken();
 574
 575   if (FormatTok->Tok.getKind() != tok::identifier) {
 576     parsePPUnknown();
 577     return;
 578   }
 579   nextToken();
 580   if (FormatTok->Tok.getKind() == tok::l_paren &&
 581       FormatTok->WhitespaceRange.getBegin() ==
 582           FormatTok->WhitespaceRange.getEnd()) {
 583     parseParens();
 584   }
 585   addUnwrappedLine();
 586   Line->Level = 1;
 587
 588   // Errors during a preprocessor directive can only affect the layout of the
 589   // preprocessor directive, and thus we ignore them. An alternative approach
 590   // would be to use the same approach we use on the file level (no
 591   // re-indentation if there was a structural error) within the macro
 592   // definition.
 593   parseFile();
 594 }
 595
 596 void UnwrappedLineParser::parsePPUnknown() {
 597   do {
 598     nextToken();
 599   } while (!eof());
 600   addUnwrappedLine();
 601 }
 602
 603 // Here we blacklist certain tokens that are not usually the first token in an
 604 // unwrapped line. This is used in attempt to distinguish macro calls without
 605 // trailing semicolons from other constructs split to several lines.
 606 static bool tokenCanStartNewLine(const clang::Token &Tok) {
 607   // Semicolon can be a null-statement, l_square can be a start of a macro or
 608   // a C++11 attribute, but this doesn't seem to be common.
 609   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
 610          Tok.isNot(tok::l_square) &&
 611          // Tokens that can only be used as binary operators and a part of
 612          // overloaded operator names.
 613          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
 614          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
 615          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
 616          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
 617          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
 618          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
 619          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
 620          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
 621          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
 622          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
 623          Tok.isNot(tok::lesslessequal) &&
 624          // Colon is used in labels, base class lists, initializer lists,
 625          // range-based for loops, ternary operator, but should never be the
 626          // first token in an unwrapped line.
 627          Tok.isNot(tok::colon) &&
 628          // 'noexcept' is a trailing annotation.
 629          Tok.isNot(tok::kw_noexcept);
 630 }
 631
 632 void UnwrappedLineParser::parseStructuralElement() {
 633   assert(!FormatTok->Tok.is(tok::l_brace));
 634   switch (FormatTok->Tok.getKind()) {
 635   case tok::at:
 636     nextToken();
 637     if (FormatTok->Tok.is(tok::l_brace)) {
 638       parseBracedList();
 639       break;
 640     }
 641     switch (FormatTok->Tok.getObjCKeywordID()) {
 642     case tok::objc_public:
 643     case tok::objc_protected:
 644     case tok::objc_package:
 645     case tok::objc_private:
 646       return parseAccessSpecifier();
 647     case tok::objc_interface:
 648     case tok::objc_implementation:
 649       return parseObjCInterfaceOrImplementation();
 650     case tok::objc_protocol:
 651       return parseObjCProtocol();
 652     case tok::objc_end:
 653       return; // Handled by the caller.
 654     case tok::objc_optional:
 655     case tok::objc_required:
 656       nextToken();
 657       addUnwrappedLine();
 658       return;
 659     case tok::objc_autoreleasepool:
 660       nextToken();
 661       if (FormatTok->Tok.is(tok::l_brace)) {
 662         if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
 663             Style.BreakBeforeBraces == FormatStyle::BS_GNU)
 664           addUnwrappedLine();
 665         parseBlock(/*MustBeDeclaration=*/false);
 666       }
 667       addUnwrappedLine();
 668       return;
 669     case tok::objc_try:
 670       // This branch isn't strictly necessary (the kw_try case below would
 671       // do this too after the tok::at is parsed above).  But be explicit.
 672       parseTryCatch();
 673       return;
 674     default:
 675       break;
 676     }
 677     break;
 678   case tok::kw_asm:
 679     nextToken();
 680     if (FormatTok->is(tok::l_brace)) {
 681       FormatTok->Type = TT_InlineASMBrace;
 682       nextToken();
 683       while (FormatTok && FormatTok->isNot(tok::eof)) {
 684         if (FormatTok->is(tok::r_brace)) {
 685           FormatTok->Type = TT_InlineASMBrace;
 686           nextToken();
 687           addUnwrappedLine();
 688           break;
 689         }
 690         FormatTok->Finalized = true;
 691         nextToken();
 692       }
 693     }
 694     break;
 695   case tok::kw_namespace:
 696     parseNamespace();
 697     return;
 698   case tok::kw_inline:
 699     nextToken();
 700     if (FormatTok->Tok.is(tok::kw_namespace)) {
 701       parseNamespace();
 702       return;
 703     }
 704     break;
 705   case tok::kw_public:
 706   case tok::kw_protected:
 707   case tok::kw_private:
 708     if (Style.Language == FormatStyle::LK_Java ||
 709         Style.Language == FormatStyle::LK_JavaScript)
 710       nextToken();
 711     else
 712       parseAccessSpecifier();
 713     return;
 714   case tok::kw_if:
 715     parseIfThenElse();
 716     return;
 717   case tok::kw_for:
 718   case tok::kw_while:
 719     parseForOrWhileLoop();
 720     return;
 721   case tok::kw_do:
 722     parseDoWhile();
 723     return;
 724   case tok::kw_switch:
 725     parseSwitch();
 726     return;
 727   case tok::kw_default:
 728     nextToken();
 729     parseLabel();
 730     return;
 731   case tok::kw_case:
 732     parseCaseLabel();
 733     return;
 734   case tok::kw_try:
 735   case tok::kw___try:
 736     parseTryCatch();
 737     return;
 738   case tok::kw_extern:
 739     nextToken();
 740     if (FormatTok->Tok.is(tok::string_literal)) {
 741       nextToken();
 742       if (FormatTok->Tok.is(tok::l_brace)) {
 743         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
 744         addUnwrappedLine();
 745         return;
 746       }
 747     }
 748     break;
 749   case tok::kw_export:
 750     if (Style.Language == FormatStyle::LK_JavaScript) {
 751       parseJavaScriptEs6ImportExport();
 752       return;
 753     }
 754     break;
 755   case tok::identifier:
 756     if (FormatTok->is(TT_ForEachMacro)) {
 757       parseForOrWhileLoop();
 758       return;
 759     }
 760     if (Style.Language == FormatStyle::LK_JavaScript &&
 761         FormatTok->is(Keywords.kw_import)) {
 762       parseJavaScriptEs6ImportExport();
 763       return;
 764     }
 765     if (FormatTok->is(Keywords.kw_signals)) {
 766       nextToken();
 767       if (FormatTok->is(tok::colon)) {
 768         nextToken();
 769         addUnwrappedLine();
 770       }
 771       return;
 772     }
 773     // In all other cases, parse the declaration.
 774     break;
 775   default:
 776     break;
 777   }
 778   do {
 779     switch (FormatTok->Tok.getKind()) {
 780     case tok::at:
 781       nextToken();
 782       if (FormatTok->Tok.is(tok::l_brace))
 783         parseBracedList();
 784       break;
 785     case tok::kw_enum:
 786       // parseEnum falls through and does not yet add an unwrapped line as an
 787       // enum definition can start a structural element.
 788       parseEnum();
 789       // This does not apply for Java and JavaScript.
 790       if (Style.Language == FormatStyle::LK_Java ||
 791           Style.Language == FormatStyle::LK_JavaScript) {
 792         addUnwrappedLine();
 793         return;
 794       }
 795       break;
 796     case tok::kw_typedef:
 797       nextToken();
 798       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
 799                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
 800         parseEnum();
 801       break;
 802     case tok::kw_struct:
 803     case tok::kw_union:
 804     case tok::kw_class:
 805       // parseRecord falls through and does not yet add an unwrapped line as a
 806       // record declaration or definition can start a structural element.
 807       parseRecord();
 808       // This does not apply for Java and JavaScript.
 809       if (Style.Language == FormatStyle::LK_Java ||
 810           Style.Language == FormatStyle::LK_JavaScript) {
 811         addUnwrappedLine();
 812         return;
 813       }
 814       break;
 815     case tok::period:
 816       nextToken();
 817       // In Java, classes have an implicit static member "class".
 818       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
 819           FormatTok->is(tok::kw_class))
 820         nextToken();
 821       break;
 822     case tok::semi:
 823       nextToken();
 824       addUnwrappedLine();
 825       return;
 826     case tok::r_brace:
 827       addUnwrappedLine();
 828       return;
 829     case tok::l_paren:
 830       parseParens();
 831       break;
 832     case tok::caret:
 833       nextToken();
 834       if (FormatTok->Tok.isAnyIdentifier() ||
 835           FormatTok->isSimpleTypeSpecifier())
 836         nextToken();
 837       if (FormatTok->is(tok::l_paren))
 838         parseParens();
 839       if (FormatTok->is(tok::l_brace))
 840         parseChildBlock();
 841       break;
 842     case tok::l_brace:
 843       if (!tryToParseBracedList()) {
 844         // A block outside of parentheses must be the last part of a
 845         // structural element.
 846         // FIXME: Figure out cases where this is not true, and add projections
 847         // for them (the one we know is missing are lambdas).
 848         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
 849           addUnwrappedLine();
 850         FormatTok->Type = TT_FunctionLBrace;
 851         parseBlock(/*MustBeDeclaration=*/false);
 852         addUnwrappedLine();
 853         return;
 854       }
 855       // Otherwise this was a braced init list, and the structural
 856       // element continues.
 857       break;
 858     case tok::kw_try:
 859       // We arrive here when parsing function-try blocks.
 860       parseTryCatch();
 861       return;
 862     case tok::identifier: {
 863       // Parse function literal unless 'function' is the first token in a line
 864       // in which case this should be treated as a free-standing function.
 865       if (Style.Language == FormatStyle::LK_JavaScript &&
 866           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
 867         tryToParseJSFunction();
 868         break;
 869       }
 870       if ((Style.Language == FormatStyle::LK_JavaScript ||
 871            Style.Language == FormatStyle::LK_Java) &&
 872           FormatTok->is(Keywords.kw_interface)) {
 873         parseRecord();
 874         addUnwrappedLine();
 875         break;
 876       }
 877
 878       StringRef Text = FormatTok->TokenText;
 879       nextToken();
 880       if (Line->Tokens.size() == 1 &&
 881           // JS doesn't have macros, and within classes colons indicate fields,
 882           // not labels.
 883           Style.Language != FormatStyle::LK_JavaScript) {
 884         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
 885           parseLabel();
 886           return;
 887         }
 888         // Recognize function-like macro usages without trailing semicolon as
 889         // well as free-standing macros like Q_OBJECT.
 890         bool FunctionLike = FormatTok->is(tok::l_paren);
 891         if (FunctionLike)
 892           parseParens();
 893
 894         bool FollowedByNewline =
 895             CommentsBeforeNextToken.empty()
 896                 ? FormatTok->NewlinesBefore > 0
 897                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
 898
 899         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
 900             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
 901           addUnwrappedLine();
 902           return;
 903         }
 904       }
 905       break;
 906     }
 907     case tok::equal:
 908       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
 909       // TT_JsFatArrow. The always start an expression or a child block if
 910       // followed by a curly.
 911       if (FormatTok->is(TT_JsFatArrow)) {
 912         nextToken();
 913         if (FormatTok->is(tok::l_brace))
 914           parseChildBlock();
 915         break;
 916       }
 917
 918       nextToken();
 919       if (FormatTok->Tok.is(tok::l_brace)) {
 920         parseBracedList();
 921       }
 922       break;
 923     case tok::l_square:
 924       parseSquare();
 925       break;
 926     case tok::kw_new:
 927       parseNew();
 928       break;
 929     default:
 930       nextToken();
 931       break;
 932     }
 933   } while (!eof());
 934 }
 935
 936 bool UnwrappedLineParser::tryToParseLambda() {
 937   if (Style.Language != FormatStyle::LK_Cpp) {
 938     nextToken();
 939     return false;
 940   }
 941   // FIXME: This is a dirty way to access the previous token. Find a better
 942   // solution.
 943   if (!Line->Tokens.empty() &&
 944       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
 945                                         tok::kw_new, tok::kw_delete) ||
 946        Line->Tokens.back().Tok->closesScope() ||
 947        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
 948     nextToken();
 949     return false;
 950   }
 951   assert(FormatTok->is(tok::l_square));
 952   FormatToken &LSquare = *FormatTok;
 953   if (!tryToParseLambdaIntroducer())
 954     return false;
 955
 956   while (FormatTok->isNot(tok::l_brace)) {
 957     if (FormatTok->isSimpleTypeSpecifier()) {
 958       nextToken();
 959       continue;
 960     }
 961     switch (FormatTok->Tok.getKind()) {
 962     case tok::l_brace:
 963       break;
 964     case tok::l_paren:
 965       parseParens();
 966       break;
 967     case tok::amp:
 968     case tok::star:
 969     case tok::kw_const:
 970     case tok::comma:
 971     case tok::less:
 972     case tok::greater:
 973     case tok::identifier:
 974     case tok::coloncolon:
 975     case tok::kw_mutable:
 976       nextToken();
 977       break;
 978     case tok::arrow:
 979       FormatTok->Type = TT_LambdaArrow;
 980       nextToken();
 981       break;
 982     default:
 983       return true;
 984     }
 985   }
 986   LSquare.Type = TT_LambdaLSquare;
 987   parseChildBlock();
 988   return true;
 989 }
 990
 991 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
 992   nextToken();
 993   if (FormatTok->is(tok::equal)) {
 994     nextToken();
 995     if (FormatTok->is(tok::r_square)) {
 996       nextToken();
 997       return true;
 998     }
 999     if (FormatTok->isNot(tok::comma))
1000       return false;
1001     nextToken();
1002   } else if (FormatTok->is(tok::amp)) {
1003     nextToken();
1004     if (FormatTok->is(tok::r_square)) {
1005       nextToken();
1006       return true;
1007     }
1008     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1009       return false;
1010     }
1011     if (FormatTok->is(tok::comma))
1012       nextToken();
1013   } else if (FormatTok->is(tok::r_square)) {
1014     nextToken();
1015     return true;
1016   }
1017   do {
1018     if (FormatTok->is(tok::amp))
1019       nextToken();
1020     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1021       return false;
1022     nextToken();
1023     if (FormatTok->is(tok::ellipsis))
1024       nextToken();
1025     if (FormatTok->is(tok::comma)) {
1026       nextToken();
1027     } else if (FormatTok->is(tok::r_square)) {
1028       nextToken();
1029       return true;
1030     } else {
1031       return false;
1032     }
1033   } while (!eof());
1034   return false;
1035 }
1036
1037 void UnwrappedLineParser::tryToParseJSFunction() {
1038   nextToken();
1039
1040   // Consume function name.
1041   if (FormatTok->is(tok::identifier))
1042     nextToken();
1043
1044   if (FormatTok->isNot(tok::l_paren))
1045     return;
1046
1047   // Parse formal parameter list.
1048   parseParens();
1049
1050   if (FormatTok->is(tok::colon)) {
1051     // Parse a type definition.
1052     nextToken();
1053
1054     // Eat the type declaration. For braced inline object types, balance braces,
1055     // otherwise just parse until finding an l_brace for the function body.
1056     if (FormatTok->is(tok::l_brace))
1057       tryToParseBracedList();
1058     else
1059       while (FormatTok->isNot(tok::l_brace) && !eof())
1060         nextToken();
1061   }
1062
1063   parseChildBlock();
1064 }
1065
1066 bool UnwrappedLineParser::tryToParseBracedList() {
1067   if (FormatTok->BlockKind == BK_Unknown)
1068     calculateBraceTypes();
1069   assert(FormatTok->BlockKind != BK_Unknown);
1070   if (FormatTok->BlockKind == BK_Block)
1071     return false;
1072   parseBracedList();
1073   return true;
1074 }
1075
1076 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1077   bool HasError = false;
1078   nextToken();
1079
1080   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1081   // replace this by using parseAssigmentExpression() inside.
1082   do {
1083     if (Style.Language == FormatStyle::LK_JavaScript) {
1084       if (FormatTok->is(Keywords.kw_function)) {
1085         tryToParseJSFunction();
1086         continue;
1087       }
1088       if (FormatTok->is(TT_JsFatArrow)) {
1089         nextToken();
1090         // Fat arrows can be followed by simple expressions or by child blocks
1091         // in curly braces.
1092         if (FormatTok->is(tok::l_brace)) {
1093           parseChildBlock();
1094           continue;
1095         }
1096       }
1097     }
1098     switch (FormatTok->Tok.getKind()) {
1099     case tok::caret:
1100       nextToken();
1101       if (FormatTok->is(tok::l_brace)) {
1102         parseChildBlock();
1103       }
1104       break;
1105     case tok::l_square:
1106       tryToParseLambda();
1107       break;
1108     case tok::l_brace:
1109       // Assume there are no blocks inside a braced init list apart
1110       // from the ones we explicitly parse out (like lambdas).
1111       FormatTok->BlockKind = BK_BracedInit;
1112       parseBracedList();
1113       break;
1114     case tok::l_paren:
1115       parseParens();
1116       // JavaScript can just have free standing methods and getters/setters in
1117       // object literals. Detect them by a "{" following ")".
1118       if (Style.Language == FormatStyle::LK_JavaScript) {
1119         if (FormatTok->is(tok::l_brace))
1120           parseChildBlock();
1121         break;
1122       }
1123       break;
1124     case tok::r_brace:
1125       nextToken();
1126       return !HasError;
1127     case tok::semi:
1128       HasError = true;
1129       if (!ContinueOnSemicolons)
1130         return !HasError;
1131       nextToken();
1132       break;
1133     case tok::comma:
1134       nextToken();
1135       break;
1136     default:
1137       nextToken();
1138       break;
1139     }
1140   } while (!eof());
1141   return false;
1142 }
1143
1144 void UnwrappedLineParser::parseParens() {
1145   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1146   nextToken();
1147   do {
1148     switch (FormatTok->Tok.getKind()) {
1149     case tok::l_paren:
1150       parseParens();
1151       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1152         parseChildBlock();
1153       break;
1154     case tok::r_paren:
1155       nextToken();
1156       return;
1157     case tok::r_brace:
1158       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1159       return;
1160     case tok::l_square:
1161       tryToParseLambda();
1162       break;
1163     case tok::l_brace:
1164       if (!tryToParseBracedList())
1165         parseChildBlock();
1166       break;
1167     case tok::at:
1168       nextToken();
1169       if (FormatTok->Tok.is(tok::l_brace))
1170         parseBracedList();
1171       break;
1172     case tok::identifier:
1173       if (Style.Language == FormatStyle::LK_JavaScript &&
1174           FormatTok->is(Keywords.kw_function))
1175         tryToParseJSFunction();
1176       else
1177         nextToken();
1178       break;
1179     default:
1180       nextToken();
1181       break;
1182     }
1183   } while (!eof());
1184 }
1185
1186 void UnwrappedLineParser::parseSquare() {
1187   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1188   if (tryToParseLambda())
1189     return;
1190   do {
1191     switch (FormatTok->Tok.getKind()) {
1192     case tok::l_paren:
1193       parseParens();
1194       break;
1195     case tok::r_square:
1196       nextToken();
1197       return;
1198     case tok::r_brace:
1199       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1200       return;
1201     case tok::l_square:
1202       parseSquare();
1203       break;
1204     case tok::l_brace: {
1205       if (!tryToParseBracedList())
1206         parseChildBlock();
1207       break;
1208     }
1209     case tok::at:
1210       nextToken();
1211       if (FormatTok->Tok.is(tok::l_brace))
1212         parseBracedList();
1213       break;
1214     default:
1215       nextToken();
1216       break;
1217     }
1218   } while (!eof());
1219 }
1220
1221 void UnwrappedLineParser::parseIfThenElse() {
1222   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1223   nextToken();
1224   if (FormatTok->Tok.is(tok::l_paren))
1225     parseParens();
1226   bool NeedsUnwrappedLine = false;
1227   if (FormatTok->Tok.is(tok::l_brace)) {
1228     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1229     parseBlock(/*MustBeDeclaration=*/false);
1230     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1231         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1232       addUnwrappedLine();
1233     } else {
1234       NeedsUnwrappedLine = true;
1235     }
1236   } else {
1237     addUnwrappedLine();
1238     ++Line->Level;
1239     parseStructuralElement();
1240     --Line->Level;
1241   }
1242   if (FormatTok->Tok.is(tok::kw_else)) {
1243     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1244       addUnwrappedLine();
1245     nextToken();
1246     if (FormatTok->Tok.is(tok::l_brace)) {
1247       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1248       parseBlock(/*MustBeDeclaration=*/false);
1249       addUnwrappedLine();
1250     } else if (FormatTok->Tok.is(tok::kw_if)) {
1251       parseIfThenElse();
1252     } else {
1253       addUnwrappedLine();
1254       ++Line->Level;
1255       parseStructuralElement();
1256       --Line->Level;
1257     }
1258   } else if (NeedsUnwrappedLine) {
1259     addUnwrappedLine();
1260   }
1261 }
1262
1263 void UnwrappedLineParser::parseTryCatch() {
1264   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1265   nextToken();
1266   bool NeedsUnwrappedLine = false;
1267   if (FormatTok->is(tok::colon)) {
1268     // We are in a function try block, what comes is an initializer list.
1269     nextToken();
1270     while (FormatTok->is(tok::identifier)) {
1271       nextToken();
1272       if (FormatTok->is(tok::l_paren))
1273         parseParens();
1274       if (FormatTok->is(tok::comma))
1275         nextToken();
1276     }
1277   }
1278   // Parse try with resource.
1279   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1280     parseParens();
1281   }
1282   if (FormatTok->is(tok::l_brace)) {
1283     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1284     parseBlock(/*MustBeDeclaration=*/false);
1285     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1286         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1287         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1288       addUnwrappedLine();
1289     } else {
1290       NeedsUnwrappedLine = true;
1291     }
1292   } else if (!FormatTok->is(tok::kw_catch)) {
1293     // The C++ standard requires a compound-statement after a try.
1294     // If there's none, we try to assume there's a structuralElement
1295     // and try to continue.
1296     addUnwrappedLine();
1297     ++Line->Level;
1298     parseStructuralElement();
1299     --Line->Level;
1300   }
1301   while (1) {
1302     if (FormatTok->is(tok::at))
1303       nextToken();
1304     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1305                              tok::kw___finally) ||
1306           ((Style.Language == FormatStyle::LK_Java ||
1307             Style.Language == FormatStyle::LK_JavaScript) &&
1308            FormatTok->is(Keywords.kw_finally)) ||
1309           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1310            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1311       break;
1312     nextToken();
1313     while (FormatTok->isNot(tok::l_brace)) {
1314       if (FormatTok->is(tok::l_paren)) {
1315         parseParens();
1316         continue;
1317       }
1318       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1319         return;
1320       nextToken();
1321     }
1322     NeedsUnwrappedLine = false;
1323     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1324     parseBlock(/*MustBeDeclaration=*/false);
1325     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1326         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1327         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1328       addUnwrappedLine();
1329     } else {
1330       NeedsUnwrappedLine = true;
1331     }
1332   }
1333   if (NeedsUnwrappedLine) {
1334     addUnwrappedLine();
1335   }
1336 }
1337
1338 void UnwrappedLineParser::parseNamespace() {
1339   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1340
1341   const FormatToken &InitialToken = *FormatTok;
1342   nextToken();
1343   if (FormatTok->Tok.is(tok::identifier))
1344     nextToken();
1345   if (FormatTok->Tok.is(tok::l_brace)) {
1346     if (ShouldBreakBeforeBrace(Style, InitialToken))
1347       addUnwrappedLine();
1348
1349     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1350                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1351                      DeclarationScopeStack.size() > 1);
1352     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1353     // Munch the semicolon after a namespace. This is more common than one would
1354     // think. Puttin the semicolon into its own line is very ugly.
1355     if (FormatTok->Tok.is(tok::semi))
1356       nextToken();
1357     addUnwrappedLine();
1358   }
1359   // FIXME: Add error handling.
1360 }
1361
1362 void UnwrappedLineParser::parseNew() {
1363   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1364   nextToken();
1365   if (Style.Language != FormatStyle::LK_Java)
1366     return;
1367
1368   // In Java, we can parse everything up to the parens, which aren't optional.
1369   do {
1370     // There should not be a ;, { or } before the new's open paren.
1371     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1372       return;
1373
1374     // Consume the parens.
1375     if (FormatTok->is(tok::l_paren)) {
1376       parseParens();
1377
1378       // If there is a class body of an anonymous class, consume that as child.
1379       if (FormatTok->is(tok::l_brace))
1380         parseChildBlock();
1381       return;
1382     }
1383     nextToken();
1384   } while (!eof());
1385 }
1386
1387 void UnwrappedLineParser::parseForOrWhileLoop() {
1388   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1389          "'for', 'while' or foreach macro expected");
1390   nextToken();
1391   if (FormatTok->Tok.is(tok::l_paren))
1392     parseParens();
1393   if (FormatTok->Tok.is(tok::l_brace)) {
1394     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1395     parseBlock(/*MustBeDeclaration=*/false);
1396     addUnwrappedLine();
1397   } else {
1398     addUnwrappedLine();
1399     ++Line->Level;
1400     parseStructuralElement();
1401     --Line->Level;
1402   }
1403 }
1404
1405 void UnwrappedLineParser::parseDoWhile() {
1406   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1407   nextToken();
1408   if (FormatTok->Tok.is(tok::l_brace)) {
1409     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1410     parseBlock(/*MustBeDeclaration=*/false);
1411     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1412       addUnwrappedLine();
1413   } else {
1414     addUnwrappedLine();
1415     ++Line->Level;
1416     parseStructuralElement();
1417     --Line->Level;
1418   }
1419
1420   // FIXME: Add error handling.
1421   if (!FormatTok->Tok.is(tok::kw_while)) {
1422     addUnwrappedLine();
1423     return;
1424   }
1425
1426   nextToken();
1427   parseStructuralElement();
1428 }
1429
1430 void UnwrappedLineParser::parseLabel() {
1431   nextToken();
1432   unsigned OldLineLevel = Line->Level;
1433   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1434     --Line->Level;
1435   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1436     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1437     parseBlock(/*MustBeDeclaration=*/false);
1438     if (FormatTok->Tok.is(tok::kw_break)) {
1439       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1440       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1441           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1442         addUnwrappedLine();
1443       }
1444       parseStructuralElement();
1445     }
1446     addUnwrappedLine();
1447   } else {
1448     if (FormatTok->is(tok::semi))
1449       nextToken();
1450     addUnwrappedLine();
1451   }
1452   Line->Level = OldLineLevel;
1453 }
1454
1455 void UnwrappedLineParser::parseCaseLabel() {
1456   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1457   // FIXME: fix handling of complex expressions here.
1458   do {
1459     nextToken();
1460   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1461   parseLabel();
1462 }
1463
1464 void UnwrappedLineParser::parseSwitch() {
1465   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1466   nextToken();
1467   if (FormatTok->Tok.is(tok::l_paren))
1468     parseParens();
1469   if (FormatTok->Tok.is(tok::l_brace)) {
1470     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1471     parseBlock(/*MustBeDeclaration=*/false);
1472     addUnwrappedLine();
1473   } else {
1474     addUnwrappedLine();
1475     ++Line->Level;
1476     parseStructuralElement();
1477     --Line->Level;
1478   }
1479 }
1480
1481 void UnwrappedLineParser::parseAccessSpecifier() {
1482   nextToken();
1483   // Understand Qt's slots.
1484   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1485     nextToken();
1486   // Otherwise, we don't know what it is, and we'd better keep the next token.
1487   if (FormatTok->Tok.is(tok::colon))
1488     nextToken();
1489   addUnwrappedLine();
1490 }
1491
1492 void UnwrappedLineParser::parseEnum() {
1493   // Won't be 'enum' for NS_ENUMs.
1494   if (FormatTok->Tok.is(tok::kw_enum))
1495     nextToken();
1496
1497   // Eat up enum class ...
1498   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1499     nextToken();
1500
1501   while (FormatTok->Tok.getIdentifierInfo() ||
1502          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1503                             tok::greater, tok::comma, tok::question)) {
1504     nextToken();
1505     // We can have macros or attributes in between 'enum' and the enum name.
1506     if (FormatTok->is(tok::l_paren))
1507       parseParens();
1508     if (FormatTok->is(tok::identifier)) {
1509       nextToken();
1510       // If there are two identifiers in a row, this is likely an elaborate
1511       // return type. In Java, this can be "implements", etc.
1512       if (Style.Language == FormatStyle::LK_Cpp &&
1513           FormatTok->is(tok::identifier))
1514         return;
1515     }
1516   }
1517
1518   // Just a declaration or something is wrong.
1519   if (FormatTok->isNot(tok::l_brace))
1520     return;
1521   FormatTok->BlockKind = BK_Block;
1522
1523   if (Style.Language == FormatStyle::LK_Java) {
1524     // Java enums are different.
1525     parseJavaEnumBody();
1526     return;
1527   }
1528
1529   // Parse enum body.
1530   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1531   if (HasError) {
1532     if (FormatTok->is(tok::semi))
1533       nextToken();
1534     addUnwrappedLine();
1535   }
1536
1537   // There is no addUnwrappedLine() here so that we fall through to parsing a
1538   // structural element afterwards. Thus, in "enum A {} n, m;",
1539   // "} n, m;" will end up in one unwrapped line.
1540 }
1541
1542 void UnwrappedLineParser::parseJavaEnumBody() {
1543   // Determine whether the enum is simple, i.e. does not have a semicolon or
1544   // constants with class bodies. Simple enums can be formatted like braced
1545   // lists, contracted to a single line, etc.
1546   unsigned StoredPosition = Tokens->getPosition();
1547   bool IsSimple = true;
1548   FormatToken *Tok = Tokens->getNextToken();
1549   while (Tok) {
1550     if (Tok->is(tok::r_brace))
1551       break;
1552     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1553       IsSimple = false;
1554       break;
1555     }
1556     // FIXME: This will also mark enums with braces in the arguments to enum
1557     // constants as "not simple". This is probably fine in practice, though.
1558     Tok = Tokens->getNextToken();
1559   }
1560   FormatTok = Tokens->setPosition(StoredPosition);
1561
1562   if (IsSimple) {
1563     parseBracedList();
1564     addUnwrappedLine();
1565     return;
1566   }
1567
1568   // Parse the body of a more complex enum.
1569   // First add a line for everything up to the "{".
1570   nextToken();
1571   addUnwrappedLine();
1572   ++Line->Level;
1573
1574   // Parse the enum constants.
1575   while (FormatTok) {
1576     if (FormatTok->is(tok::l_brace)) {
1577       // Parse the constant's class body.
1578       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1579                  /*MunchSemi=*/false);
1580     } else if (FormatTok->is(tok::l_paren)) {
1581       parseParens();
1582     } else if (FormatTok->is(tok::comma)) {
1583       nextToken();
1584       addUnwrappedLine();
1585     } else if (FormatTok->is(tok::semi)) {
1586       nextToken();
1587       addUnwrappedLine();
1588       break;
1589     } else if (FormatTok->is(tok::r_brace)) {
1590       addUnwrappedLine();
1591       break;
1592     } else {
1593       nextToken();
1594     }
1595   }
1596
1597   // Parse the class body after the enum's ";" if any.
1598   parseLevel(/*HasOpeningBrace=*/true);
1599   nextToken();
1600   --Line->Level;
1601   addUnwrappedLine();
1602 }
1603
1604 void UnwrappedLineParser::parseRecord() {
1605   const FormatToken &InitialToken = *FormatTok;
1606   nextToken();
1607
1608   // The actual identifier can be a nested name specifier, and in macros
1609   // it is often token-pasted.
1610   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1611                             tok::kw___attribute, tok::kw___declspec,
1612                             tok::kw_alignas) ||
1613          ((Style.Language == FormatStyle::LK_Java ||
1614            Style.Language == FormatStyle::LK_JavaScript) &&
1615           FormatTok->isOneOf(tok::period, tok::comma))) {
1616     bool IsNonMacroIdentifier =
1617         FormatTok->is(tok::identifier) &&
1618         FormatTok->TokenText != FormatTok->TokenText.upper();
1619     nextToken();
1620     // We can have macros or attributes in between 'class' and the class name.
1621     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1622       parseParens();
1623   }
1624
1625   // Note that parsing away template declarations here leads to incorrectly
1626   // accepting function declarations as record declarations.
1627   // In general, we cannot solve this problem. Consider:
1628   // class A<int> B() {}
1629   // which can be a function definition or a class definition when B() is a
1630   // macro. If we find enough real-world cases where this is a problem, we
1631   // can parse for the 'template' keyword in the beginning of the statement,
1632   // and thus rule out the record production in case there is no template
1633   // (this would still leave us with an ambiguity between template function
1634   // and class declarations).
1635   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1636     while (!eof()) {
1637       if (FormatTok->is(tok::l_brace)) {
1638         calculateBraceTypes(/*ExpectClassBody=*/true);
1639         if (!tryToParseBracedList())
1640           break;
1641       }
1642       if (FormatTok->Tok.is(tok::semi))
1643         return;
1644       nextToken();
1645     }
1646   }
1647   if (FormatTok->Tok.is(tok::l_brace)) {
1648     if (ShouldBreakBeforeBrace(Style, InitialToken))
1649       addUnwrappedLine();
1650
1651     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1652                /*MunchSemi=*/false);
1653   }
1654   // There is no addUnwrappedLine() here so that we fall through to parsing a
1655   // structural element afterwards. Thus, in "class A {} n, m;",
1656   // "} n, m;" will end up in one unwrapped line.
1657 }
1658
1659 void UnwrappedLineParser::parseObjCProtocolList() {
1660   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1661   do
1662     nextToken();
1663   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1664   nextToken(); // Skip '>'.
1665 }
1666
1667 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1668   do {
1669     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1670       nextToken();
1671       addUnwrappedLine();
1672       break;
1673     }
1674     if (FormatTok->is(tok::l_brace)) {
1675       parseBlock(/*MustBeDeclaration=*/false);
1676       // In ObjC interfaces, nothing should be following the "}".
1677       addUnwrappedLine();
1678     } else if (FormatTok->is(tok::r_brace)) {
1679       // Ignore stray "}". parseStructuralElement doesn't consume them.
1680       nextToken();
1681       addUnwrappedLine();
1682     } else {
1683       parseStructuralElement();
1684     }
1685   } while (!eof());
1686 }
1687
1688 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1689   nextToken();
1690   nextToken(); // interface name
1691
1692   // @interface can be followed by either a base class, or a category.
1693   if (FormatTok->Tok.is(tok::colon)) {
1694     nextToken();
1695     nextToken(); // base class name
1696   } else if (FormatTok->Tok.is(tok::l_paren))
1697     // Skip category, if present.
1698     parseParens();
1699
1700   if (FormatTok->Tok.is(tok::less))
1701     parseObjCProtocolList();
1702
1703   if (FormatTok->Tok.is(tok::l_brace)) {
1704     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1705         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1706       addUnwrappedLine();
1707     parseBlock(/*MustBeDeclaration=*/true);
1708   }
1709
1710   // With instance variables, this puts '}' on its own line.  Without instance
1711   // variables, this ends the @interface line.
1712   addUnwrappedLine();
1713
1714   parseObjCUntilAtEnd();
1715 }
1716
1717 void UnwrappedLineParser::parseObjCProtocol() {
1718   nextToken();
1719   nextToken(); // protocol name
1720
1721   if (FormatTok->Tok.is(tok::less))
1722     parseObjCProtocolList();
1723
1724   // Check for protocol declaration.
1725   if (FormatTok->Tok.is(tok::semi)) {
1726     nextToken();
1727     return addUnwrappedLine();
1728   }
1729
1730   addUnwrappedLine();
1731   parseObjCUntilAtEnd();
1732 }
1733
1734 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1735   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1736   nextToken();
1737
1738   // Consume the "default" in "export default class/function".
1739   if (FormatTok->is(tok::kw_default))
1740     nextToken();
1741
1742   // Consume "function" and "default function", so that these get parsed as
1743   // free-standing JS functions, i.e. do not require a trailing semicolon.
1744   if (FormatTok->is(Keywords.kw_function)) {
1745     nextToken();
1746     return;
1747   }
1748
1749   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1750                          Keywords.kw_var))
1751     return; // Fall through to parsing the corresponding structure.
1752
1753   if (FormatTok->is(tok::l_brace)) {
1754     FormatTok->BlockKind = BK_Block;
1755     parseBracedList();
1756   }
1757
1758   while (!eof() && FormatTok->isNot(tok::semi) &&
1759          FormatTok->isNot(tok::l_brace)) {
1760     nextToken();
1761   }
1762 }
1763
1764 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1765                                                  StringRef Prefix = "") {
1766   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1767                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1768   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1769                                                     E = Line.Tokens.end();
1770        I != E; ++I) {
1771     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1772   }
1773   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1774                                                     E = Line.Tokens.end();
1775        I != E; ++I) {
1776     const UnwrappedLineNode &Node = *I;
1777     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1778              I = Node.Children.begin(),
1779              E = Node.Children.end();
1780          I != E; ++I) {
1781       printDebugInfo(*I, "\nChild: ");
1782     }
1783   }
1784   llvm::dbgs() << "\n";
1785 }
1786
1787 void UnwrappedLineParser::addUnwrappedLine() {
1788   if (Line->Tokens.empty())
1789     return;
1790   DEBUG({
1791     if (CurrentLines == &Lines)
1792       printDebugInfo(*Line);
1793   });
1794   CurrentLines->push_back(std::move(*Line));
1795   Line->Tokens.clear();
1796   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1797     CurrentLines->append(
1798         std::make_move_iterator(PreprocessorDirectives.begin()),
1799         std::make_move_iterator(PreprocessorDirectives.end()));
1800     PreprocessorDirectives.clear();
1801   }
1802 }
1803
1804 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1805
1806 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1807   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1808          FormatTok.NewlinesBefore > 0;
1809 }
1810
1811 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1812   bool JustComments = Line->Tokens.empty();
1813   for (SmallVectorImpl<FormatToken *>::const_iterator
1814            I = CommentsBeforeNextToken.begin(),
1815            E = CommentsBeforeNextToken.end();
1816        I != E; ++I) {
1817     if (isOnNewLine(**I) && JustComments)
1818       addUnwrappedLine();
1819     pushToken(*I);
1820   }
1821   if (NewlineBeforeNext && JustComments)
1822     addUnwrappedLine();
1823   CommentsBeforeNextToken.clear();
1824 }
1825
1826 void UnwrappedLineParser::nextToken() {
1827   if (eof())
1828     return;
1829   flushComments(isOnNewLine(*FormatTok));
1830   pushToken(FormatTok);
1831   readToken();
1832 }
1833
1834 void UnwrappedLineParser::readToken() {
1835   bool CommentsInCurrentLine = true;
1836   do {
1837     FormatTok = Tokens->getNextToken();
1838     assert(FormatTok);
1839     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1840            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1841       // If there is an unfinished unwrapped line, we flush the preprocessor
1842       // directives only after that unwrapped line was finished later.
1843       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1844       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1845       // Comments stored before the preprocessor directive need to be output
1846       // before the preprocessor directive, at the same level as the
1847       // preprocessor directive, as we consider them to apply to the directive.
1848       flushComments(isOnNewLine(*FormatTok));
1849       parsePPDirective();
1850     }
1851     while (FormatTok->Type == TT_ConflictStart ||
1852            FormatTok->Type == TT_ConflictEnd ||
1853            FormatTok->Type == TT_ConflictAlternative) {
1854       if (FormatTok->Type == TT_ConflictStart) {
1855         conditionalCompilationStart(/*Unreachable=*/false);
1856       } else if (FormatTok->Type == TT_ConflictAlternative) {
1857         conditionalCompilationAlternative();
1858       } else if (FormatTok->Type == TT_ConflictEnd) {
1859         conditionalCompilationEnd();
1860       }
1861       FormatTok = Tokens->getNextToken();
1862       FormatTok->MustBreakBefore = true;
1863     }
1864
1865     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1866         !Line->InPPDirective) {
1867       continue;
1868     }
1869
1870     if (!FormatTok->Tok.is(tok::comment))
1871       return;
1872     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1873       CommentsInCurrentLine = false;
1874     }
1875     if (CommentsInCurrentLine) {
1876       pushToken(FormatTok);
1877     } else {
1878       CommentsBeforeNextToken.push_back(FormatTok);
1879     }
1880   } while (!eof());
1881 }
1882
1883 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1884   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1885   if (MustBreakBeforeNextToken) {
1886     Line->Tokens.back().Tok->MustBreakBefore = true;
1887     MustBreakBeforeNextToken = false;
1888   }
1889 }
1890
1891 } // end namespace format
1892 } // end namespace clang