1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
14 //===----------------------------------------------------------------------===//
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Lex/Lexer.h"
23 static bool isUnaryOperator(const AnnotatedToken &Tok) {
24 switch (Tok.FormatTok.Tok.getKind()) {
39 static bool isBinaryOperator(const AnnotatedToken &Tok) {
40 // Comma is a binary operator, but does not behave as such wrt. formatting.
41 return getPrecedence(Tok) > prec::Comma;
44 // Returns the previous token ignoring comments.
45 static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) {
46 AnnotatedToken *PrevToken = Tok.Parent;
47 while (PrevToken != NULL && PrevToken->is(tok::comment))
48 PrevToken = PrevToken->Parent;
51 static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
52 return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
55 static bool isTrailingComment(AnnotatedToken *Tok) {
56 return Tok != NULL && Tok->is(tok::comment) &&
57 (Tok->Children.empty() ||
58 Tok->Children[0].FormatTok.NewlinesBefore > 0);
61 // Returns the next token ignoring comments.
62 static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
63 if (Tok.Children.empty())
65 const AnnotatedToken *NextToken = &Tok.Children[0];
66 while (NextToken->is(tok::comment)) {
67 if (NextToken->Children.empty())
69 NextToken = &NextToken->Children[0];
74 static bool closesScope(const AnnotatedToken &Tok) {
75 return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
76 Tok.Type == TT_TemplateCloser;
79 static bool opensScope(const AnnotatedToken &Tok) {
80 return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
81 Tok.Type == TT_TemplateOpener;
84 /// \brief A parser that gathers additional information about tokens.
86 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
87 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
88 /// into template parameter lists.
89 class AnnotatingParser {
91 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
92 IdentifierInfo &Ident_in)
93 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
94 KeywordVirtualFound(false), Ident_in(Ident_in) {
95 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
100 if (CurrentToken == NULL)
102 ScopedContextCreator ContextCreator(*this, tok::less, 10);
103 AnnotatedToken *Left = CurrentToken->Parent;
104 Contexts.back().IsExpression = false;
105 while (CurrentToken != NULL) {
106 if (CurrentToken->is(tok::greater)) {
107 Left->MatchingParen = CurrentToken;
108 CurrentToken->MatchingParen = Left;
109 CurrentToken->Type = TT_TemplateCloser;
113 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
114 tok::pipepipe, tok::ampamp, tok::question,
117 updateParameterCount(Left, CurrentToken);
124 bool parseParens(bool LookForDecls = false) {
125 if (CurrentToken == NULL)
127 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
129 // FIXME: This is a bit of a hack. Do better.
130 Contexts.back().ColonIsForRangeExpr =
131 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
133 bool StartsObjCMethodExpr = false;
134 AnnotatedToken *Left = CurrentToken->Parent;
135 if (CurrentToken->is(tok::caret)) {
136 // ^( starts a block.
137 Left->Type = TT_ObjCBlockLParen;
138 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
139 // @selector( starts a selector.
140 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
141 MaybeSel->Parent->is(tok::at)) {
142 StartsObjCMethodExpr = true;
146 if (StartsObjCMethodExpr) {
147 Contexts.back().ColonIsObjCMethodExpr = true;
148 Left->Type = TT_ObjCMethodExpr;
151 while (CurrentToken != NULL) {
152 // LookForDecls is set when "if (" has been seen. Check for
153 // 'identifier' '*' 'identifier' followed by not '=' -- this
154 // '*' has to be a binary operator but determineStarAmpUsage() will
155 // categorize it as an unary operator, so set the right type here.
156 if (LookForDecls && !CurrentToken->Children.empty()) {
157 AnnotatedToken &Prev = *CurrentToken->Parent;
158 AnnotatedToken &Next = CurrentToken->Children[0];
159 if (Prev.Parent->is(tok::identifier) &&
160 Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
161 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
162 Prev.Type = TT_BinaryOperator;
163 LookForDecls = false;
167 if (CurrentToken->is(tok::r_paren)) {
168 Left->MatchingParen = CurrentToken;
169 CurrentToken->MatchingParen = Left;
171 if (StartsObjCMethodExpr) {
172 CurrentToken->Type = TT_ObjCMethodExpr;
173 if (Contexts.back().FirstObjCSelectorName != NULL) {
174 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
175 Contexts.back().LongestObjCSelectorName;
182 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
184 updateParameterCount(Left, CurrentToken);
195 // A '[' could be an index subscript (after an indentifier or after
196 // ')' or ']'), it could be the start of an Objective-C method
197 // expression, or it could the the start of an Objective-C array literal.
198 AnnotatedToken *Left = CurrentToken->Parent;
199 AnnotatedToken *Parent = getPreviousToken(*Left);
200 bool StartsObjCMethodExpr =
201 Contexts.back().CanBeExpression &&
202 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
203 tok::kw_return, tok::kw_throw) ||
204 isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn ||
205 Parent->Type == TT_CastRParen ||
206 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
208 ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
209 Contexts.back().IsExpression = true;
210 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
212 if (StartsObjCMethodExpr) {
213 Contexts.back().ColonIsObjCMethodExpr = true;
214 Left->Type = TT_ObjCMethodExpr;
215 } else if (StartsObjCArrayLiteral) {
216 Left->Type = TT_ObjCArrayLiteral;
219 while (CurrentToken != NULL) {
220 if (CurrentToken->is(tok::r_square)) {
221 if (!CurrentToken->Children.empty() &&
222 CurrentToken->Children[0].is(tok::l_paren)) {
223 // An ObjC method call is rarely followed by an open parenthesis.
224 // FIXME: Do we incorrectly label ":" with this?
225 StartsObjCMethodExpr = false;
226 Left->Type = TT_Unknown;
228 if (StartsObjCMethodExpr) {
229 CurrentToken->Type = TT_ObjCMethodExpr;
230 // determineStarAmpUsage() thinks that '*' '[' is allocating an
231 // array of pointers, but if '[' starts a selector then '*' is a
233 if (Parent != NULL && Parent->Type == TT_PointerOrReference)
234 Parent->Type = TT_BinaryOperator;
235 } else if (StartsObjCArrayLiteral) {
236 CurrentToken->Type = TT_ObjCArrayLiteral;
238 Left->MatchingParen = CurrentToken;
239 CurrentToken->MatchingParen = Left;
240 if (Contexts.back().FirstObjCSelectorName != NULL)
241 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
242 Contexts.back().LongestObjCSelectorName;
246 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
248 updateParameterCount(Left, CurrentToken);
256 // Lines are fine to end with '{'.
257 if (CurrentToken == NULL)
259 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
260 AnnotatedToken *Left = CurrentToken->Parent;
261 while (CurrentToken != NULL) {
262 if (CurrentToken->is(tok::r_brace)) {
263 Left->MatchingParen = CurrentToken;
264 CurrentToken->MatchingParen = Left;
268 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
270 updateParameterCount(Left, CurrentToken);
277 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
278 if (Current->is(tok::comma))
279 ++Left->ParameterCount;
280 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
281 Left->ParameterCount = 1;
284 bool parseConditional() {
285 while (CurrentToken != NULL) {
286 if (CurrentToken->is(tok::colon)) {
287 CurrentToken->Type = TT_ConditionalExpr;
297 bool parseTemplateDeclaration() {
298 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
299 CurrentToken->Type = TT_TemplateOpener;
303 if (CurrentToken != NULL)
304 CurrentToken->Parent->ClosesTemplateDeclaration = true;
310 bool consumeToken() {
311 AnnotatedToken *Tok = CurrentToken;
313 switch (Tok->FormatTok.Tok.getKind()) {
316 if (Tok->Parent == NULL && Line.MustBeDeclaration)
317 Tok->Type = TT_ObjCMethodSpecifier;
320 if (Tok->Parent == NULL)
322 // Colons from ?: are handled in parseConditional().
323 if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) {
324 Tok->Type = TT_CtorInitializerColon;
325 } else if (Contexts.back().ColonIsObjCMethodExpr ||
326 Line.First.Type == TT_ObjCMethodSpecifier) {
327 Tok->Type = TT_ObjCMethodExpr;
328 Tok->Parent->Type = TT_ObjCSelectorName;
329 if (Tok->Parent->FormatTok.TokenLength >
330 Contexts.back().LongestObjCSelectorName)
331 Contexts.back().LongestObjCSelectorName =
332 Tok->Parent->FormatTok.TokenLength;
333 if (Contexts.back().FirstObjCSelectorName == NULL)
334 Contexts.back().FirstObjCSelectorName = Tok->Parent;
335 } else if (Contexts.back().ColonIsForRangeExpr) {
336 Tok->Type = TT_RangeBasedForLoopColon;
337 } else if (Contexts.size() == 1) {
338 Tok->Type = TT_InheritanceColon;
339 } else if (Contexts.back().ContextKind == tok::l_paren) {
340 Tok->Type = TT_InlineASMColon;
345 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
347 if (!parseParens(/*LookForDecls=*/ true))
352 Contexts.back().ColonIsForRangeExpr = true;
360 if (Line.MustBeDeclaration)
361 Line.MightBeFunctionDecl = true;
373 Tok->Type = TT_TemplateOpener;
375 Tok->Type = TT_BinaryOperator;
384 // Lines can start with '}'.
385 if (Tok->Parent != NULL)
389 Tok->Type = TT_BinaryOperator;
391 case tok::kw_operator:
392 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
393 if (CurrentToken->isOneOf(tok::star, tok::amp))
394 CurrentToken->Type = TT_PointerOrReference;
398 CurrentToken->Type = TT_OverloadedOperatorLParen;
403 case tok::kw_template:
404 parseTemplateDeclaration();
406 case tok::identifier:
407 if (Line.First.is(tok::kw_for) &&
408 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
409 Tok->Type = TT_ObjCForIn;
412 if (Contexts.back().FirstStartOfName)
413 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
421 void parseIncludeDirective() {
423 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
425 while (CurrentToken != NULL) {
426 if (CurrentToken->isNot(tok::comment) ||
427 !CurrentToken->Children.empty())
428 CurrentToken->Type = TT_ImplicitStringLiteral;
432 while (CurrentToken != NULL) {
433 if (CurrentToken->is(tok::string_literal))
434 // Mark these string literals as "implicit" literals, too, so that
435 // they are not split or line-wrapped.
436 CurrentToken->Type = TT_ImplicitStringLiteral;
442 void parseWarningOrError() {
444 // We still want to format the whitespace left of the first token of the
447 while (CurrentToken != NULL) {
448 CurrentToken->Type = TT_ImplicitStringLiteral;
453 void parsePreprocessorDirective() {
455 if (CurrentToken == NULL)
457 // Hashes in the middle of a line can lead to any strange token
459 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
461 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
462 case tok::pp_include:
464 parseIncludeDirective();
467 case tok::pp_warning:
468 parseWarningOrError();
473 while (CurrentToken != NULL)
478 LineType parseLine() {
479 int PeriodsAndArrows = 0;
480 AnnotatedToken *LastPeriodOrArrow = NULL;
481 bool CanBeBuilderTypeStmt = true;
482 if (CurrentToken->is(tok::hash)) {
483 parsePreprocessorDirective();
484 return LT_PreprocessorDirective;
486 while (CurrentToken != NULL) {
487 if (CurrentToken->is(tok::kw_virtual))
488 KeywordVirtualFound = true;
489 if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
491 LastPeriodOrArrow = CurrentToken;
493 AnnotatedToken *TheToken = CurrentToken;
496 if (getPrecedence(*TheToken) > prec::Assignment &&
497 TheToken->Type == TT_BinaryOperator)
498 CanBeBuilderTypeStmt = false;
500 if (KeywordVirtualFound)
501 return LT_VirtualFunctionDecl;
503 // Assume a builder-type call if there are 2 or more "." and "->".
504 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
505 LastPeriodOrArrow->LastInChainOfCalls = true;
506 return LT_BuilderTypeCall;
509 if (Line.First.Type == TT_ObjCMethodSpecifier) {
510 if (Contexts.back().FirstObjCSelectorName != NULL)
511 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
512 Contexts.back().LongestObjCSelectorName;
513 return LT_ObjCMethodDecl;
521 if (CurrentToken != NULL) {
522 determineTokenType(*CurrentToken);
523 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
526 if (CurrentToken != NULL && !CurrentToken->Children.empty())
527 CurrentToken = &CurrentToken->Children[0];
531 // Reset token type in case we have already looked at it and then recovered
532 // from an error (e.g. failure to find the matching >).
533 if (CurrentToken != NULL)
534 CurrentToken->Type = TT_Unknown;
537 /// \brief A struct to hold information valid in a specific context, e.g.
538 /// a pair of parenthesis.
540 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
542 : ContextKind(ContextKind), BindingStrength(BindingStrength),
543 LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
544 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
545 FirstStartOfName(NULL), IsExpression(IsExpression),
546 CanBeExpression(true) {}
548 tok::TokenKind ContextKind;
549 unsigned BindingStrength;
550 unsigned LongestObjCSelectorName;
551 bool ColonIsForRangeExpr;
552 bool ColonIsObjCMethodExpr;
553 AnnotatedToken *FirstObjCSelectorName;
554 AnnotatedToken *FirstStartOfName;
556 bool CanBeExpression;
559 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
560 /// of each instance.
561 struct ScopedContextCreator {
564 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
567 P.Contexts.push_back(
568 Context(ContextKind, P.Contexts.back().BindingStrength + Increase,
569 P.Contexts.back().IsExpression));
572 ~ScopedContextCreator() { P.Contexts.pop_back(); }
575 void determineTokenType(AnnotatedToken &Current) {
576 if (getPrecedence(Current) == prec::Assignment) {
577 Contexts.back().IsExpression = true;
578 for (AnnotatedToken *Previous = Current.Parent;
579 Previous && Previous->isNot(tok::comma);
580 Previous = Previous->Parent) {
581 if (Previous->is(tok::r_square))
582 Previous = Previous->MatchingParen;
583 if (Previous->Type == TT_BinaryOperator &&
584 Previous->isOneOf(tok::star, tok::amp)) {
585 Previous->Type = TT_PointerOrReference;
588 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
589 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
590 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
591 Contexts.back().IsExpression = true;
592 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
593 for (AnnotatedToken *Previous = Current.Parent;
594 Previous && Previous->isOneOf(tok::star, tok::amp);
595 Previous = Previous->Parent)
596 Previous->Type = TT_PointerOrReference;
597 } else if (Current.Parent &&
598 Current.Parent->Type == TT_CtorInitializerColon) {
599 Contexts.back().IsExpression = true;
600 } else if (Current.is(tok::kw_new)) {
601 Contexts.back().CanBeExpression = false;
604 if (Current.Type == TT_Unknown) {
605 if (Current.Parent && Current.is(tok::identifier) &&
606 ((Current.Parent->is(tok::identifier) &&
607 Current.Parent->FormatTok.Tok.getIdentifierInfo()
608 ->getPPKeywordID() == tok::pp_not_keyword) ||
609 isSimpleTypeSpecifier(*Current.Parent) ||
610 Current.Parent->Type == TT_PointerOrReference ||
611 Current.Parent->Type == TT_TemplateCloser)) {
612 Contexts.back().FirstStartOfName = &Current;
613 Current.Type = TT_StartOfName;
614 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
616 determineStarAmpUsage(Current, Contexts.back().IsExpression);
617 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
618 Current.Type = determinePlusMinusCaretUsage(Current);
619 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
620 Current.Type = determineIncrementUsage(Current);
621 } else if (Current.is(tok::exclaim)) {
622 Current.Type = TT_UnaryOperator;
623 } else if (isBinaryOperator(Current)) {
624 Current.Type = TT_BinaryOperator;
625 } else if (Current.is(tok::comment)) {
626 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
628 if (StringRef(Data).startswith("//"))
629 Current.Type = TT_LineComment;
631 Current.Type = TT_BlockComment;
632 } else if (Current.is(tok::r_paren)) {
633 bool ParensNotExpr = !Current.Parent ||
634 Current.Parent->Type == TT_PointerOrReference ||
635 Current.Parent->Type == TT_TemplateCloser;
636 bool ParensCouldEndDecl =
637 !Current.Children.empty() &&
638 Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
639 bool IsSizeOfOrAlignOf =
640 Current.MatchingParen && Current.MatchingParen->Parent &&
641 Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
643 if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
644 Contexts.back().IsExpression)
645 // FIXME: We need to get smarter and understand more cases of casts.
646 Current.Type = TT_CastRParen;
647 } else if (Current.is(tok::at) && Current.Children.size()) {
648 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
649 case tok::objc_interface:
650 case tok::objc_implementation:
651 case tok::objc_protocol:
652 Current.Type = TT_ObjCDecl;
654 case tok::objc_property:
655 Current.Type = TT_ObjCProperty;
664 /// \brief Return the type of the given token assuming it is * or &.
666 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
667 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
668 if (PrevToken == NULL)
669 return TT_UnaryOperator;
671 const AnnotatedToken *NextToken = getNextToken(Tok);
672 if (NextToken == NULL)
675 if (PrevToken->is(tok::l_paren) && !IsExpression)
676 return TT_PointerOrReference;
678 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
679 tok::comma, tok::semi, tok::kw_return, tok::colon,
681 PrevToken->Type == TT_BinaryOperator ||
682 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
683 return TT_UnaryOperator;
685 if (NextToken->is(tok::l_square))
686 return TT_PointerOrReference;
688 if (PrevToken->FormatTok.Tok.isLiteral() ||
689 PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
690 NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken))
691 return TT_BinaryOperator;
693 // It is very unlikely that we are going to find a pointer or reference type
694 // definition on the RHS of an assignment.
696 return TT_BinaryOperator;
698 return TT_PointerOrReference;
701 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
702 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
703 if (PrevToken == NULL)
704 return TT_UnaryOperator;
706 // Use heuristics to recognize unary operators.
707 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
708 tok::question, tok::colon, tok::kw_return,
709 tok::kw_case, tok::at, tok::l_brace))
710 return TT_UnaryOperator;
712 // There can't be two consecutive binary operators.
713 if (PrevToken->Type == TT_BinaryOperator)
714 return TT_UnaryOperator;
716 // Fall back to marking the token as binary operator.
717 return TT_BinaryOperator;
720 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
721 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
722 const AnnotatedToken *PrevToken = getPreviousToken(Tok);
723 if (PrevToken == NULL)
724 return TT_UnaryOperator;
725 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
726 return TT_TrailingUnaryOperator;
728 return TT_UnaryOperator;
731 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
733 /// \brief Determine whether the token kind starts a simple-type-specifier.
734 bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const {
735 switch (Tok.FormatTok.Tok.getKind()) {
738 case tok::kw___int64:
739 case tok::kw___int128:
741 case tok::kw_unsigned:
748 case tok::kw_wchar_t:
750 case tok::kw___underlying_type:
752 case tok::annot_typename:
753 case tok::kw_char16_t:
754 case tok::kw_char32_t:
756 case tok::kw_decltype:
757 return Lex.getLangOpts().CPlusPlus;
764 SmallVector<Context, 8> Contexts;
766 SourceManager &SourceMgr;
769 AnnotatedToken *CurrentToken;
770 bool KeywordVirtualFound;
771 IdentifierInfo &Ident_in;
774 /// \brief Parses binary expressions by inserting fake parenthesis based on
775 /// operator precedence.
776 class ExpressionParser {
778 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
780 /// \brief Parse expressions with the given operatore precedence.
781 void parse(int Precedence = 0) {
782 if (Precedence > prec::PointerToMember || Current == NULL)
785 // Skip over "return" until we can properly parse it.
786 if (Current->is(tok::kw_return))
789 // Eagerly consume trailing comments.
790 while (isTrailingComment(Current)) {
794 AnnotatedToken *Start = Current;
795 bool OperatorFound = false;
798 // Consume operators with higher precedence.
799 parse(prec::Level(Precedence + 1));
801 int CurrentPrecedence = 0;
803 if (Current->Type == TT_ConditionalExpr)
804 CurrentPrecedence = 1 + (int) prec::Conditional;
805 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
806 Current->Type == TT_CtorInitializerColon)
807 CurrentPrecedence = 1;
808 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
809 CurrentPrecedence = 1 + (int) getPrecedence(*Current);
812 // At the end of the line or when an operator with higher precedence is
813 // found, insert fake parenthesis and return.
814 if (Current == NULL || closesScope(*Current) ||
815 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
817 ++Start->FakeLParens;
819 ++Current->Parent->FakeRParens;
824 // Consume scopes: (), [], <> and {}
825 if (opensScope(*Current)) {
826 AnnotatedToken *Left = Current;
827 while (Current && !closesScope(*Current)) {
831 // Remove fake parens that just duplicate the real parens.
832 if (Current && Left->Children[0].FakeLParens > 0 &&
833 Current->Parent->FakeRParens > 0) {
834 --Left->Children[0].FakeLParens;
835 --Current->Parent->FakeRParens;
840 if (CurrentPrecedence == Precedence)
841 OperatorFound = true;
851 Current = Current->Children.empty() ? NULL : &Current->Children[0];
854 AnnotatedToken *Current;
857 void TokenAnnotator::annotate(AnnotatedLine &Line) {
858 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
859 Line.Type = Parser.parseLine();
860 if (Line.Type == LT_Invalid)
863 ExpressionParser ExprParser(Line);
866 if (Line.First.Type == TT_ObjCMethodSpecifier)
867 Line.Type = LT_ObjCMethodDecl;
868 else if (Line.First.Type == TT_ObjCDecl)
869 Line.Type = LT_ObjCDecl;
870 else if (Line.First.Type == TT_ObjCProperty)
871 Line.Type = LT_ObjCProperty;
873 Line.First.SpacesRequiredBefore = 1;
874 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
875 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
877 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
880 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
881 if (Line.First.Children.empty())
883 AnnotatedToken *Current = &Line.First.Children[0];
884 while (Current != NULL) {
885 if (Current->Type == TT_LineComment)
886 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
888 Current->SpacesRequiredBefore =
889 spaceRequiredBefore(Line, *Current) ? 1 : 0;
891 if (Current->FormatTok.MustBreakBefore) {
892 Current->MustBreakBefore = true;
893 } else if (Current->Type == TT_LineComment) {
894 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
895 } else if (isTrailingComment(Current->Parent) ||
896 (Current->is(tok::string_literal) &&
897 Current->Parent->is(tok::string_literal))) {
898 Current->MustBreakBefore = true;
899 } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
900 Current->Parent->is(tok::string_literal) &&
901 Current->Children[0].is(tok::string_literal)) {
902 Current->MustBreakBefore = true;
904 Current->MustBreakBefore = false;
906 Current->CanBreakBefore =
907 Current->MustBreakBefore || canBreakBefore(Line, *Current);
908 if (Current->MustBreakBefore)
909 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
911 Current->TotalLength =
912 Current->Parent->TotalLength + Current->FormatTok.TokenLength +
913 Current->SpacesRequiredBefore;
914 // FIXME: Only calculate this if CanBreakBefore is true once static
915 // initializers etc. are sorted out.
916 // FIXME: Move magic numbers to a better place.
917 Current->SplitPenalty =
918 20 * Current->BindingStrength + splitPenalty(Line, *Current);
920 Current = Current->Children.empty() ? NULL : &Current->Children[0];
924 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
925 const AnnotatedToken &Tok) {
926 const AnnotatedToken &Left = *Tok.Parent;
927 const AnnotatedToken &Right = Tok;
929 if (Right.Type == TT_StartOfName) {
930 if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
932 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
933 // FIXME: Clean up hack of using BindingStrength to find top-level names.
934 return Style.PenaltyReturnTypeOnItsOwnLine;
938 if (Left.is(tok::equal) && Right.is(tok::l_brace))
940 if (Left.is(tok::coloncolon))
943 if (Left.Type == TT_RangeBasedForLoopColon ||
944 Left.Type == TT_InheritanceColon)
947 if (Right.isOneOf(tok::arrow, tok::period)) {
948 if (Line.Type == LT_BuilderTypeCall)
949 return prec::PointerToMember;
950 if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
951 Left.MatchingParen->ParameterCount > 0)
952 return 20; // Should be smaller than breaking at a nested comma.
956 // In for-loops, prefer breaking at ',' and ';'.
957 if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
960 if (Left.is(tok::semi))
962 if (Left.is(tok::comma))
965 // In Objective-C method expressions, prefer breaking before "param:" over
966 // breaking after it.
967 if (Right.Type == TT_ObjCSelectorName)
969 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
972 if (opensScope(Left))
973 return Left.ParameterCount > 1 ? prec::Comma : 20;
975 if (Right.is(tok::lessless)) {
976 if (Left.is(tok::string_literal)) {
977 StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(),
978 Left.FormatTok.TokenLength);
979 Content = Content.drop_back(1).drop_front(1).trim();
980 if (Content.size() > 1 &&
981 (Content.back() == ':' || Content.back() == '='))
986 if (Left.Type == TT_ConditionalExpr)
987 return prec::Conditional;
988 prec::Level Level = getPrecedence(Left);
990 if (Level != prec::Unknown)
996 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
997 const AnnotatedToken &Left,
998 const AnnotatedToken &Right) {
999 if (Right.is(tok::hashhash))
1000 return Left.is(tok::hash);
1001 if (Left.isOneOf(tok::hashhash, tok::hash))
1002 return Right.is(tok::hash);
1003 if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
1005 if (Right.is(tok::less) &&
1006 (Left.is(tok::kw_template) ||
1007 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1009 if (Left.is(tok::arrow) || Right.is(tok::arrow))
1011 if (Left.isOneOf(tok::exclaim, tok::tilde))
1013 if (Left.is(tok::at) &&
1014 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1015 tok::numeric_constant, tok::l_paren, tok::l_brace,
1016 tok::kw_true, tok::kw_false))
1018 if (Left.is(tok::coloncolon))
1020 if (Right.is(tok::coloncolon))
1021 return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
1022 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1024 if (Right.Type == TT_PointerOrReference)
1025 return Left.FormatTok.Tok.isLiteral() ||
1026 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1027 !Style.PointerBindsToType);
1028 if (Left.Type == TT_PointerOrReference)
1029 return Right.FormatTok.Tok.isLiteral() ||
1030 ((Right.Type != TT_PointerOrReference) &&
1031 Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1032 Left.Parent && Left.Parent->isNot(tok::l_paren));
1033 if (Right.is(tok::star) && Left.is(tok::l_paren))
1035 if (Left.is(tok::l_square))
1036 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
1037 if (Right.is(tok::r_square))
1038 return Right.Type == TT_ObjCArrayLiteral;
1039 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1041 if (Left.is(tok::period) || Right.is(tok::period))
1043 if (Left.is(tok::colon))
1044 return Left.Type != TT_ObjCMethodExpr;
1045 if (Right.is(tok::colon))
1046 return Right.Type != TT_ObjCMethodExpr;
1047 if (Left.is(tok::l_paren))
1049 if (Right.is(tok::l_paren)) {
1050 return Line.Type == LT_ObjCDecl ||
1051 Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1052 tok::kw_return, tok::kw_catch, tok::kw_new,
1055 if (Left.is(tok::at) &&
1056 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1058 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1063 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1064 const AnnotatedToken &Tok) {
1065 if (Tok.FormatTok.Tok.getIdentifierInfo() &&
1066 Tok.Parent->FormatTok.Tok.getIdentifierInfo())
1067 return true; // Never ever merge two identifiers.
1068 if (Line.Type == LT_ObjCMethodDecl) {
1069 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1071 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1072 // Don't space between ')' and <id>
1075 if (Line.Type == LT_ObjCProperty &&
1076 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1079 if (Tok.Parent->is(tok::comma))
1081 if (Tok.is(tok::comma))
1083 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1085 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
1087 if (Tok.Type == TT_OverloadedOperatorLParen)
1089 if (Tok.is(tok::colon))
1090 return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
1091 !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr;
1092 if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
1093 Tok.Children[0].Type == TT_PointerOrReference &&
1094 !Tok.Children[0].Children.empty() &&
1095 Tok.Children[0].Children[0].isNot(tok::r_paren) &&
1096 Tok.Parent->isNot(tok::l_paren) &&
1097 (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType))
1099 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
1101 if (Tok.Type == TT_UnaryOperator)
1102 return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1103 (Tok.Parent->isNot(tok::colon) ||
1104 Tok.Parent->Type != TT_ObjCMethodExpr);
1105 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1106 return Tok.Type == TT_TemplateCloser &&
1107 Tok.Parent->Type == TT_TemplateCloser &&
1108 Style.Standard != FormatStyle::LS_Cpp11;
1110 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1111 Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar))
1113 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1115 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1117 if (Tok.is(tok::less) && Line.First.is(tok::hash))
1119 if (Tok.Type == TT_TrailingUnaryOperator)
1121 return spaceRequiredBetween(Line, *Tok.Parent, Tok);
1124 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1125 const AnnotatedToken &Right) {
1126 const AnnotatedToken &Left = *Right.Parent;
1127 if (Right.Type == TT_StartOfName)
1129 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1131 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1133 if (Right.Type == TT_ObjCSelectorName)
1135 if (Left.ClosesTemplateDeclaration)
1137 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1139 if (Right.Type == TT_RangeBasedForLoopColon ||
1140 Right.Type == TT_InheritanceColon)
1142 if (Left.Type == TT_RangeBasedForLoopColon ||
1143 Left.Type == TT_InheritanceColon)
1145 if (Right.Type == TT_RangeBasedForLoopColon)
1147 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1148 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1149 Left.isOneOf(tok::question, tok::kw_operator))
1151 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1153 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
1154 Left.Parent->is(tok::kw___attribute))
1157 if (Right.Type == TT_LineComment)
1158 // We rely on MustBreakBefore being set correctly here as we should not
1159 // change the "binding" behavior of a comment.
1162 // Allow breaking after a trailing 'const', e.g. after a method declaration,
1163 // unless it is follow by ';', '{' or '='.
1164 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1165 Left.Parent->is(tok::r_paren))
1166 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1168 if (Right.is(tok::kw___attribute))
1171 // We only break before r_brace if there was a corresponding break before
1172 // the l_brace, which is tracked by BreakBeforeClosingBrace.
1173 if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
1175 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1177 return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
1178 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) ||
1179 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
1180 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1181 Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
1182 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1183 (Left.is(tok::l_square) && !Right.is(tok::r_square));
1186 } // namespace format
1187 } // namespace clang