1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
14 //===----------------------------------------------------------------------===//
16 #define DEBUG_TYPE "format-parser"
18 #include "UnwrappedLineParser.h"
19 #include "llvm/Support/Debug.h"
24 class FormatTokenSource {
26 virtual ~FormatTokenSource() {}
27 virtual FormatToken *getNextToken() = 0;
29 virtual unsigned getPosition() = 0;
30 virtual FormatToken *setPosition(unsigned Position) = 0;
35 class ScopedDeclarationState {
37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38 bool MustBeDeclaration)
39 : Line(Line), Stack(Stack) {
40 Line.MustBeDeclaration = MustBeDeclaration;
41 Stack.push_back(MustBeDeclaration);
43 ~ScopedDeclarationState() {
46 Line.MustBeDeclaration = Stack.back();
48 Line.MustBeDeclaration = true;
53 std::vector<bool> &Stack;
56 class ScopedMacroState : public FormatTokenSource {
58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59 FormatToken *&ResetToken, bool &StructuralError)
60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62 StructuralError(StructuralError),
63 PreviousStructuralError(StructuralError), Token(NULL) {
66 Line.InPPDirective = true;
70 TokenSource = PreviousTokenSource;
72 Line.InPPDirective = false;
73 Line.Level = PreviousLineLevel;
74 StructuralError = PreviousStructuralError;
77 virtual FormatToken *getNextToken() {
78 // The \c UnwrappedLineParser guards against this by never calling
79 // \c getNextToken() after it has encountered the first eof token.
81 Token = PreviousTokenSource->getNextToken();
87 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
89 virtual FormatToken *setPosition(unsigned Position) {
90 Token = PreviousTokenSource->setPosition(Position);
95 bool eof() { return Token && Token->HasUnescapedNewline; }
97 FormatToken *getFakeEOF() {
98 static bool EOFInitialized = false;
99 static FormatToken FormatTok;
100 if (!EOFInitialized) {
101 FormatTok.Tok.startToken();
102 FormatTok.Tok.setKind(tok::eof);
103 EOFInitialized = true;
109 FormatTokenSource *&TokenSource;
110 FormatToken *&ResetToken;
111 unsigned PreviousLineLevel;
112 FormatTokenSource *PreviousTokenSource;
113 bool &StructuralError;
114 bool PreviousStructuralError;
119 } // end anonymous namespace
121 class ScopedLineState {
123 ScopedLineState(UnwrappedLineParser &Parser,
124 bool SwitchToPreprocessorLines = false)
126 OriginalLines = Parser.CurrentLines;
127 if (SwitchToPreprocessorLines)
128 Parser.CurrentLines = &Parser.PreprocessorDirectives;
129 else if (!Parser.Line->Tokens.empty())
130 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
131 PreBlockLine = Parser.Line.take();
132 Parser.Line.reset(new UnwrappedLine());
133 Parser.Line->Level = PreBlockLine->Level;
134 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
138 if (!Parser.Line->Tokens.empty()) {
139 Parser.addUnwrappedLine();
141 assert(Parser.Line->Tokens.empty());
142 Parser.Line.reset(PreBlockLine);
143 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
144 Parser.MustBreakBeforeNextToken = true;
145 Parser.CurrentLines = OriginalLines;
149 UnwrappedLineParser &Parser;
151 UnwrappedLine *PreBlockLine;
152 SmallVectorImpl<UnwrappedLine> *OriginalLines;
157 class IndexedTokenSource : public FormatTokenSource {
159 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
160 : Tokens(Tokens), Position(-1) {}
162 virtual FormatToken *getNextToken() {
164 return Tokens[Position];
167 virtual unsigned getPosition() {
168 assert(Position >= 0);
172 virtual FormatToken *setPosition(unsigned P) {
174 return Tokens[Position];
177 void reset() { Position = -1; }
180 ArrayRef<FormatToken *> Tokens;
184 } // end anonymous namespace
186 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
187 ArrayRef<FormatToken *> Tokens,
188 UnwrappedLineConsumer &Callback)
189 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
190 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
191 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
193 void UnwrappedLineParser::reset() {
195 Line.reset(new UnwrappedLine);
196 CommentsBeforeNextToken.clear();
198 MustBreakBeforeNextToken = false;
199 PreprocessorDirectives.clear();
200 CurrentLines = &Lines;
201 DeclarationScopeStack.clear();
202 StructuralError = false;
206 bool UnwrappedLineParser::parse() {
207 IndexedTokenSource TokenSource(AllTokens);
209 DEBUG(llvm::dbgs() << "----\n");
211 Tokens = &TokenSource;
216 // Create line with eof token.
217 pushToken(FormatTok);
220 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
223 Callback.consumeUnwrappedLine(*I);
225 Callback.finishRun();
227 while (!PPLevelBranchIndex.empty() &&
228 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
229 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
230 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
232 if (!PPLevelBranchIndex.empty()) {
233 ++PPLevelBranchIndex.back();
234 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
235 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
237 } while (!PPLevelBranchIndex.empty());
239 return StructuralError;
242 void UnwrappedLineParser::parseFile() {
243 ScopedDeclarationState DeclarationState(
244 *Line, DeclarationScopeStack,
245 /*MustBeDeclaration=*/ !Line->InPPDirective);
246 parseLevel(/*HasOpeningBrace=*/false);
247 // Make sure to format the remaining tokens.
252 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
253 bool SwitchLabelEncountered = false;
255 switch (FormatTok->Tok.getKind()) {
261 // FIXME: Add parameter whether this can happen - if this happens, we must
262 // be in a non-declaration context.
263 parseBlock(/*MustBeDeclaration=*/false);
269 StructuralError = true;
273 case tok::kw_default:
275 if (!SwitchLabelEncountered &&
276 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
278 SwitchLabelEncountered = true;
279 parseStructuralElement();
282 parseStructuralElement();
288 void UnwrappedLineParser::calculateBraceTypes() {
289 // We'll parse forward through the tokens until we hit
290 // a closing brace or eof - note that getNextToken() will
291 // parse macros, so this will magically work inside macro
293 unsigned StoredPosition = Tokens->getPosition();
294 unsigned Position = StoredPosition;
295 FormatToken *Tok = FormatTok;
296 // Keep a stack of positions of lbrace tokens. We will
297 // update information about whether an lbrace starts a
298 // braced init list or a different block during the loop.
299 SmallVector<FormatToken *, 8> LBraceStack;
300 assert(Tok->Tok.is(tok::l_brace));
302 // Get next none-comment token.
303 FormatToken *NextTok;
304 unsigned ReadTokens = 0;
306 NextTok = Tokens->getNextToken();
308 } while (NextTok->is(tok::comment));
310 switch (Tok->Tok.getKind()) {
312 LBraceStack.push_back(Tok);
315 if (!LBraceStack.empty()) {
316 if (LBraceStack.back()->BlockKind == BK_Unknown) {
317 // If there is a comma, semicolon or right paren after the closing
318 // brace, we assume this is a braced initializer list. Note that
319 // regardless how we mark inner braces here, we will overwrite the
320 // BlockKind later if we parse a braced list (where all blocks inside
321 // are by default braced lists), or when we explicitly detect blocks
322 // (for example while parsing lambdas).
324 // We exclude + and - as they can be ObjC visibility modifiers.
325 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
326 tok::r_square, tok::l_brace, tok::colon) ||
327 (NextTok->isBinaryOperator() &&
328 !NextTok->isOneOf(tok::plus, tok::minus))) {
329 Tok->BlockKind = BK_BracedInit;
330 LBraceStack.back()->BlockKind = BK_BracedInit;
332 Tok->BlockKind = BK_Block;
333 LBraceStack.back()->BlockKind = BK_Block;
336 LBraceStack.pop_back();
345 if (!LBraceStack.empty())
346 LBraceStack.back()->BlockKind = BK_Block;
352 Position += ReadTokens;
353 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
354 // Assume other blocks for all unclosed opening braces.
355 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
356 if (LBraceStack[i]->BlockKind == BK_Unknown)
357 LBraceStack[i]->BlockKind = BK_Block;
360 FormatTok = Tokens->setPosition(StoredPosition);
363 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
365 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
366 unsigned InitialLevel = Line->Level;
371 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
375 parseLevel(/*HasOpeningBrace=*/true);
377 if (!FormatTok->Tok.is(tok::r_brace)) {
378 Line->Level = InitialLevel;
379 StructuralError = true;
383 nextToken(); // Munch the closing brace.
384 if (MunchSemi && FormatTok->Tok.is(tok::semi))
386 Line->Level = InitialLevel;
389 void UnwrappedLineParser::parseChildBlock() {
390 FormatTok->BlockKind = BK_Block;
393 ScopedLineState LineState(*this);
394 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
395 /*MustBeDeclaration=*/false);
397 parseLevel(/*HasOpeningBrace=*/true);
403 void UnwrappedLineParser::parsePPDirective() {
404 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
405 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
408 if (FormatTok->Tok.getIdentifierInfo() == NULL) {
413 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
418 parsePPIf(/*IfDef=*/false);
422 parsePPIf(/*IfDef=*/true);
439 void UnwrappedLineParser::pushPPConditional() {
440 if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
441 PPStack.push_back(PP_Unreachable);
443 PPStack.push_back(PP_Conditional);
446 void UnwrappedLineParser::parsePPIf(bool IfDef) {
448 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
449 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
450 PPLevelBranchIndex.push_back(0);
451 PPLevelBranchCount.push_back(0);
453 PPChainBranchIndex.push(0);
455 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
456 StringRef(FormatTok->Tok.getLiteralData(),
457 FormatTok->Tok.getLength()) == "0") ||
458 FormatTok->Tok.is(tok::kw_false);
459 if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) {
460 PPStack.push_back(PP_Unreachable);
467 void UnwrappedLineParser::parsePPElse() {
468 if (!PPStack.empty())
470 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
471 if (!PPChainBranchIndex.empty())
472 ++PPChainBranchIndex.top();
473 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
474 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) {
475 PPStack.push_back(PP_Unreachable);
482 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
484 void UnwrappedLineParser::parsePPEndIf() {
485 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
486 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
487 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
488 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
492 if (!PPChainBranchIndex.empty())
493 PPChainBranchIndex.pop();
494 if (!PPStack.empty())
499 void UnwrappedLineParser::parsePPDefine() {
502 if (FormatTok->Tok.getKind() != tok::identifier) {
507 if (FormatTok->Tok.getKind() == tok::l_paren &&
508 FormatTok->WhitespaceRange.getBegin() ==
509 FormatTok->WhitespaceRange.getEnd()) {
515 // Errors during a preprocessor directive can only affect the layout of the
516 // preprocessor directive, and thus we ignore them. An alternative approach
517 // would be to use the same approach we use on the file level (no
518 // re-indentation if there was a structural error) within the macro
523 void UnwrappedLineParser::parsePPUnknown() {
530 // Here we blacklist certain tokens that are not usually the first token in an
531 // unwrapped line. This is used in attempt to distinguish macro calls without
532 // trailing semicolons from other constructs split to several lines.
533 bool tokenCanStartNewLine(clang::Token Tok) {
534 // Semicolon can be a null-statement, l_square can be a start of a macro or
535 // a C++11 attribute, but this doesn't seem to be common.
536 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
537 Tok.isNot(tok::l_square) &&
538 // Tokens that can only be used as binary operators and a part of
539 // overloaded operator names.
540 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
541 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
542 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
543 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
544 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
545 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
546 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
547 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
548 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
549 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
550 Tok.isNot(tok::lesslessequal) &&
551 // Colon is used in labels, base class lists, initializer lists,
552 // range-based for loops, ternary operator, but should never be the
553 // first token in an unwrapped line.
554 Tok.isNot(tok::colon);
557 void UnwrappedLineParser::parseStructuralElement() {
558 assert(!FormatTok->Tok.is(tok::l_brace));
559 switch (FormatTok->Tok.getKind()) {
562 if (FormatTok->Tok.is(tok::l_brace)) {
566 switch (FormatTok->Tok.getObjCKeywordID()) {
567 case tok::objc_public:
568 case tok::objc_protected:
569 case tok::objc_package:
570 case tok::objc_private:
571 return parseAccessSpecifier();
572 case tok::objc_interface:
573 case tok::objc_implementation:
574 return parseObjCInterfaceOrImplementation();
575 case tok::objc_protocol:
576 return parseObjCProtocol();
578 return; // Handled by the caller.
579 case tok::objc_optional:
580 case tok::objc_required:
588 case tok::kw_namespace:
593 if (FormatTok->Tok.is(tok::kw_namespace)) {
599 case tok::kw_protected:
600 case tok::kw_private:
601 parseAccessSpecifier();
608 parseForOrWhileLoop();
616 case tok::kw_default:
628 if (FormatTok->Tok.is(tok::string_literal)) {
630 if (FormatTok->Tok.is(tok::l_brace)) {
631 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
636 // In all other cases, parse the declaration.
642 switch (FormatTok->Tok.getKind()) {
645 if (FormatTok->Tok.is(tok::l_brace))
655 // A record declaration or definition is always the start of a structural
670 if (FormatTok->is(tok::l_brace)) {
675 if (!tryToParseBracedList()) {
676 // A block outside of parentheses must be the last part of a
677 // structural element.
678 // FIXME: Figure out cases where this is not true, and add projections
679 // for them (the one we know is missing are lambdas).
680 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
681 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
682 Style.BreakBeforeBraces == FormatStyle::BS_Allman)
684 parseBlock(/*MustBeDeclaration=*/false);
688 // Otherwise this was a braced init list, and the structural
689 // element continues.
691 case tok::identifier: {
692 StringRef Text = FormatTok->TokenText;
694 if (Line->Tokens.size() == 1) {
695 if (FormatTok->Tok.is(tok::colon)) {
699 // Recognize function-like macro usages without trailing semicolon.
700 if (FormatTok->Tok.is(tok::l_paren)) {
702 if (FormatTok->HasUnescapedNewline &&
703 tokenCanStartNewLine(FormatTok->Tok)) {
707 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
708 Text == Text.upper()) {
709 // Recognize free-standing macros like Q_OBJECT.
718 if (FormatTok->Tok.is(tok::l_brace)) {
732 void UnwrappedLineParser::tryToParseLambda() {
733 // FIXME: This is a dirty way to access the previous token. Find a better
735 if (!Line->Tokens.empty() &&
736 Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) {
740 assert(FormatTok->is(tok::l_square));
741 FormatToken &LSquare = *FormatTok;
742 if (!tryToParseLambdaIntroducer())
745 while (FormatTok->isNot(tok::l_brace)) {
746 switch (FormatTok->Tok.getKind()) {
752 case tok::identifier:
753 case tok::kw_mutable:
760 LSquare.Type = TT_LambdaLSquare;
764 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
766 if (FormatTok->is(tok::equal)) {
768 if (FormatTok->is(tok::r_square)) {
772 if (FormatTok->isNot(tok::comma))
775 } else if (FormatTok->is(tok::amp)) {
777 if (FormatTok->is(tok::r_square)) {
781 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
784 if (FormatTok->is(tok::comma))
786 } else if (FormatTok->is(tok::r_square)) {
791 if (FormatTok->is(tok::amp))
793 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
796 if (FormatTok->is(tok::comma)) {
798 } else if (FormatTok->is(tok::r_square)) {
808 bool UnwrappedLineParser::tryToParseBracedList() {
809 if (FormatTok->BlockKind == BK_Unknown)
810 calculateBraceTypes();
811 assert(FormatTok->BlockKind != BK_Unknown);
812 if (FormatTok->BlockKind == BK_Block)
818 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
819 bool HasError = false;
822 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
823 // replace this by using parseAssigmentExpression() inside.
825 // FIXME: When we start to support lambdas, we'll want to parse them away
826 // here, otherwise our bail-out scenarios below break. The better solution
827 // might be to just implement a more or less complete expression parser.
828 switch (FormatTok->Tok.getKind()) {
831 if (FormatTok->is(tok::l_brace)) {
839 // Assume there are no blocks inside a braced init list apart
840 // from the ones we explicitly parse out (like lambdas).
841 FormatTok->BlockKind = BK_BracedInit;
849 if (!ContinueOnSemicolons)
864 void UnwrappedLineParser::parseReturn() {
868 switch (FormatTok->Tok.getKind()) {
871 if (FormatTok->Tok.isNot(tok::semi)) {
872 // Assume missing ';'.
881 // Assume missing ';'.
898 void UnwrappedLineParser::parseParens() {
899 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
902 switch (FormatTok->Tok.getKind()) {
910 // A "}" inside parenthesis is an error if there wasn't a matching "{".
916 if (!tryToParseBracedList()) {
923 if (FormatTok->Tok.is(tok::l_brace))
933 void UnwrappedLineParser::parseIfThenElse() {
934 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
936 if (FormatTok->Tok.is(tok::l_paren))
938 bool NeedsUnwrappedLine = false;
939 if (FormatTok->Tok.is(tok::l_brace)) {
940 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
942 parseBlock(/*MustBeDeclaration=*/false);
943 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
946 NeedsUnwrappedLine = true;
950 parseStructuralElement();
953 if (FormatTok->Tok.is(tok::kw_else)) {
955 if (FormatTok->Tok.is(tok::l_brace)) {
956 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
958 parseBlock(/*MustBeDeclaration=*/false);
960 } else if (FormatTok->Tok.is(tok::kw_if)) {
965 parseStructuralElement();
968 } else if (NeedsUnwrappedLine) {
973 void UnwrappedLineParser::parseNamespace() {
974 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
976 if (FormatTok->Tok.is(tok::identifier))
978 if (FormatTok->Tok.is(tok::l_brace)) {
979 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
980 Style.BreakBeforeBraces == FormatStyle::BS_Allman)
983 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
984 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
985 DeclarationScopeStack.size() > 1);
986 parseBlock(/*MustBeDeclaration=*/true, AddLevel);
987 // Munch the semicolon after a namespace. This is more common than one would
988 // think. Puttin the semicolon into its own line is very ugly.
989 if (FormatTok->Tok.is(tok::semi))
993 // FIXME: Add error handling.
996 void UnwrappedLineParser::parseForOrWhileLoop() {
997 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
998 "'for' or 'while' expected");
1000 if (FormatTok->Tok.is(tok::l_paren))
1002 if (FormatTok->Tok.is(tok::l_brace)) {
1003 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1005 parseBlock(/*MustBeDeclaration=*/false);
1010 parseStructuralElement();
1015 void UnwrappedLineParser::parseDoWhile() {
1016 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1018 if (FormatTok->Tok.is(tok::l_brace)) {
1019 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1021 parseBlock(/*MustBeDeclaration=*/false);
1025 parseStructuralElement();
1029 // FIXME: Add error handling.
1030 if (!FormatTok->Tok.is(tok::kw_while)) {
1036 parseStructuralElement();
1039 void UnwrappedLineParser::parseLabel() {
1041 unsigned OldLineLevel = Line->Level;
1042 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1044 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1045 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1047 parseBlock(/*MustBeDeclaration=*/false);
1048 if (FormatTok->Tok.is(tok::kw_break)) {
1049 // "break;" after "}" on its own line only for BS_Allman
1050 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1052 parseStructuralElement();
1056 Line->Level = OldLineLevel;
1059 void UnwrappedLineParser::parseCaseLabel() {
1060 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1061 // FIXME: fix handling of complex expressions here.
1064 } while (!eof() && !FormatTok->Tok.is(tok::colon));
1068 void UnwrappedLineParser::parseSwitch() {
1069 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1071 if (FormatTok->Tok.is(tok::l_paren))
1073 if (FormatTok->Tok.is(tok::l_brace)) {
1074 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1076 parseBlock(/*MustBeDeclaration=*/false);
1081 parseStructuralElement();
1086 void UnwrappedLineParser::parseAccessSpecifier() {
1088 // Otherwise, we don't know what it is, and we'd better keep the next token.
1089 if (FormatTok->Tok.is(tok::colon))
1094 void UnwrappedLineParser::parseEnum() {
1096 // Eat up enum class ...
1097 if (FormatTok->Tok.is(tok::kw_class) ||
1098 FormatTok->Tok.is(tok::kw_struct))
1100 while (FormatTok->Tok.getIdentifierInfo() ||
1101 FormatTok->isOneOf(tok::colon, tok::coloncolon)) {
1103 // We can have macros or attributes in between 'enum' and the enum name.
1104 if (FormatTok->Tok.is(tok::l_paren)) {
1107 if (FormatTok->Tok.is(tok::identifier))
1110 if (FormatTok->Tok.is(tok::l_brace)) {
1111 FormatTok->BlockKind = BK_Block;
1112 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1114 if (FormatTok->is(tok::semi))
1119 // We fall through to parsing a structural element afterwards, so that in
1121 // "} n, m;" will end up in one unwrapped line.
1124 void UnwrappedLineParser::parseRecord() {
1126 if (FormatTok->Tok.is(tok::identifier) ||
1127 FormatTok->Tok.is(tok::kw___attribute) ||
1128 FormatTok->Tok.is(tok::kw___declspec) ||
1129 FormatTok->Tok.is(tok::kw_alignas)) {
1131 // We can have macros or attributes in between 'class' and the class name.
1132 if (FormatTok->Tok.is(tok::l_paren)) {
1135 // The actual identifier can be a nested name specifier, and in macros
1136 // it is often token-pasted.
1137 while (FormatTok->Tok.is(tok::identifier) ||
1138 FormatTok->Tok.is(tok::coloncolon) ||
1139 FormatTok->Tok.is(tok::hashhash))
1142 // Note that parsing away template declarations here leads to incorrectly
1143 // accepting function declarations as record declarations.
1144 // In general, we cannot solve this problem. Consider:
1145 // class A<int> B() {}
1146 // which can be a function definition or a class definition when B() is a
1147 // macro. If we find enough real-world cases where this is a problem, we
1148 // can parse for the 'template' keyword in the beginning of the statement,
1149 // and thus rule out the record production in case there is no template
1150 // (this would still leave us with an ambiguity between template function
1151 // and class declarations).
1152 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1153 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1154 if (FormatTok->Tok.is(tok::semi))
1160 if (FormatTok->Tok.is(tok::l_brace)) {
1161 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
1162 Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1165 parseBlock(/*MustBeDeclaration=*/true, /*Addlevel=*/true,
1166 /*MunchSemi=*/false);
1168 // We fall through to parsing a structural element afterwards, so
1170 // will end up in one unwrapped line.
1173 void UnwrappedLineParser::parseObjCProtocolList() {
1174 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1177 while (!eof() && FormatTok->Tok.isNot(tok::greater));
1178 nextToken(); // Skip '>'.
1181 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1183 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1188 if (FormatTok->is(tok::l_brace)) {
1189 parseBlock(/*MustBeDeclaration=*/false);
1190 // In ObjC interfaces, nothing should be following the "}".
1193 parseStructuralElement();
1198 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1200 nextToken(); // interface name
1202 // @interface can be followed by either a base class, or a category.
1203 if (FormatTok->Tok.is(tok::colon)) {
1205 nextToken(); // base class name
1206 } else if (FormatTok->Tok.is(tok::l_paren))
1207 // Skip category, if present.
1210 if (FormatTok->Tok.is(tok::less))
1211 parseObjCProtocolList();
1213 // If instance variables are present, keep the '{' on the first line too.
1214 if (FormatTok->Tok.is(tok::l_brace))
1215 parseBlock(/*MustBeDeclaration=*/true);
1217 // With instance variables, this puts '}' on its own line. Without instance
1218 // variables, this ends the @interface line.
1221 parseObjCUntilAtEnd();
1224 void UnwrappedLineParser::parseObjCProtocol() {
1226 nextToken(); // protocol name
1228 if (FormatTok->Tok.is(tok::less))
1229 parseObjCProtocolList();
1231 // Check for protocol declaration.
1232 if (FormatTok->Tok.is(tok::semi)) {
1234 return addUnwrappedLine();
1238 parseObjCUntilAtEnd();
1241 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1242 StringRef Prefix = "") {
1243 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1244 << (Line.InPPDirective ? " MACRO" : "") << ": ";
1245 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1246 E = Line.Tokens.end();
1248 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1250 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1251 E = Line.Tokens.end();
1253 const UnwrappedLineNode &Node = *I;
1254 for (SmallVectorImpl<UnwrappedLine>::const_iterator
1255 I = Node.Children.begin(),
1256 E = Node.Children.end();
1258 printDebugInfo(*I, "\nChild: ");
1261 llvm::dbgs() << "\n";
1264 void UnwrappedLineParser::addUnwrappedLine() {
1265 if (Line->Tokens.empty())
1268 if (CurrentLines == &Lines)
1269 printDebugInfo(*Line);
1271 CurrentLines->push_back(*Line);
1272 Line->Tokens.clear();
1273 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1274 for (SmallVectorImpl<UnwrappedLine>::iterator
1275 I = PreprocessorDirectives.begin(),
1276 E = PreprocessorDirectives.end();
1278 CurrentLines->push_back(*I);
1280 PreprocessorDirectives.clear();
1284 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1286 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1287 bool JustComments = Line->Tokens.empty();
1288 for (SmallVectorImpl<FormatToken *>::const_iterator
1289 I = CommentsBeforeNextToken.begin(),
1290 E = CommentsBeforeNextToken.end();
1292 if ((*I)->NewlinesBefore && JustComments) {
1297 if (NewlineBeforeNext && JustComments) {
1300 CommentsBeforeNextToken.clear();
1303 void UnwrappedLineParser::nextToken() {
1306 flushComments(FormatTok->NewlinesBefore > 0);
1307 pushToken(FormatTok);
1311 void UnwrappedLineParser::readToken() {
1312 bool CommentsInCurrentLine = true;
1314 FormatTok = Tokens->getNextToken();
1315 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1316 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1317 // If there is an unfinished unwrapped line, we flush the preprocessor
1318 // directives only after that unwrapped line was finished later.
1319 bool SwitchToPreprocessorLines =
1320 !Line->Tokens.empty() && CurrentLines == &Lines;
1321 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1322 // Comments stored before the preprocessor directive need to be output
1323 // before the preprocessor directive, at the same level as the
1324 // preprocessor directive, as we consider them to apply to the directive.
1325 flushComments(FormatTok->NewlinesBefore > 0);
1329 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1330 !Line->InPPDirective) {
1334 if (!FormatTok->Tok.is(tok::comment))
1336 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1337 CommentsInCurrentLine = false;
1339 if (CommentsInCurrentLine) {
1340 pushToken(FormatTok);
1342 CommentsBeforeNextToken.push_back(FormatTok);
1347 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1348 Line->Tokens.push_back(UnwrappedLineNode(Tok));
1349 if (MustBreakBeforeNextToken) {
1350 Line->Tokens.back().Tok->MustBreakBefore = true;
1351 MustBreakBeforeNextToken = false;
1355 } // end namespace format
1356 } // end namespace clang