1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
14 //===----------------------------------------------------------------------===//
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Lex/Lexer.h"
19 #include "llvm/Support/Debug.h"
24 bool AnnotatedToken::isUnaryOperator() const {
25 switch (FormatTok.Tok.getKind()) {
40 bool AnnotatedToken::isBinaryOperator() const {
41 // Comma is a binary operator, but does not behave as such wrt. formatting.
42 return getPrecedence(*this) > prec::Comma;
45 bool AnnotatedToken::isTrailingComment() const {
46 return is(tok::comment) &&
47 (Children.empty() || Children[0].FormatTok.NewlinesBefore > 0);
50 AnnotatedToken *AnnotatedToken::getPreviousNoneComment() const {
51 AnnotatedToken *Tok = Parent;
52 while (Tok != NULL && Tok->is(tok::comment))
57 const AnnotatedToken *AnnotatedToken::getNextNoneComment() const {
58 const AnnotatedToken *Tok = Children.empty() ? NULL : &Children[0];
59 while (Tok != NULL && Tok->is(tok::comment))
60 Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
64 bool AnnotatedToken::closesScope() const {
65 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
66 Type == TT_TemplateCloser;
69 bool AnnotatedToken::opensScope() const {
70 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
71 Type == TT_TemplateOpener;
74 /// \brief A parser that gathers additional information about tokens.
76 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
77 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
78 /// into template parameter lists.
79 class AnnotatingParser {
81 AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
82 IdentifierInfo &Ident_in)
83 : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
84 KeywordVirtualFound(false), NameFound(false), Ident_in(Ident_in) {
85 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
90 if (CurrentToken == NULL)
92 ScopedContextCreator ContextCreator(*this, tok::less, 10);
93 AnnotatedToken *Left = CurrentToken->Parent;
94 Contexts.back().IsExpression = false;
95 while (CurrentToken != NULL) {
96 if (CurrentToken->is(tok::greater)) {
97 Left->MatchingParen = CurrentToken;
98 CurrentToken->MatchingParen = Left;
99 CurrentToken->Type = TT_TemplateCloser;
103 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
104 tok::pipepipe, tok::ampamp, tok::question,
107 updateParameterCount(Left, CurrentToken);
114 bool parseParens(bool LookForDecls = false) {
115 if (CurrentToken == NULL)
117 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
119 // FIXME: This is a bit of a hack. Do better.
120 Contexts.back().ColonIsForRangeExpr =
121 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
123 bool StartsObjCMethodExpr = false;
124 AnnotatedToken *Left = CurrentToken->Parent;
125 if (CurrentToken->is(tok::caret)) {
126 // ^( starts a block.
127 Left->Type = TT_ObjCBlockLParen;
128 } else if (AnnotatedToken *MaybeSel = Left->Parent) {
129 // @selector( starts a selector.
130 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
131 MaybeSel->Parent->is(tok::at)) {
132 StartsObjCMethodExpr = true;
136 if (StartsObjCMethodExpr) {
137 Contexts.back().ColonIsObjCMethodExpr = true;
138 Left->Type = TT_ObjCMethodExpr;
141 while (CurrentToken != NULL) {
142 // LookForDecls is set when "if (" has been seen. Check for
143 // 'identifier' '*' 'identifier' followed by not '=' -- this
144 // '*' has to be a binary operator but determineStarAmpUsage() will
145 // categorize it as an unary operator, so set the right type here.
146 if (LookForDecls && !CurrentToken->Children.empty()) {
147 AnnotatedToken &Prev = *CurrentToken->Parent;
148 AnnotatedToken &Next = CurrentToken->Children[0];
149 if (Prev.Parent->is(tok::identifier) &&
150 Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
151 CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
152 Prev.Type = TT_BinaryOperator;
153 LookForDecls = false;
157 if (CurrentToken->is(tok::r_paren)) {
158 if (CurrentToken->Parent->closesScope())
159 CurrentToken->Parent->MatchingParen->NoMoreTokensOnLevel = true;
160 Left->MatchingParen = CurrentToken;
161 CurrentToken->MatchingParen = Left;
163 if (StartsObjCMethodExpr) {
164 CurrentToken->Type = TT_ObjCMethodExpr;
165 if (Contexts.back().FirstObjCSelectorName != NULL) {
166 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
167 Contexts.back().LongestObjCSelectorName;
174 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
176 updateParameterCount(Left, CurrentToken);
187 // A '[' could be an index subscript (after an indentifier or after
188 // ')' or ']'), it could be the start of an Objective-C method
189 // expression, or it could the the start of an Objective-C array literal.
190 AnnotatedToken *Left = CurrentToken->Parent;
191 AnnotatedToken *Parent = Left->getPreviousNoneComment();
192 bool StartsObjCMethodExpr =
193 Contexts.back().CanBeExpression &&
194 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
195 tok::kw_return, tok::kw_throw) ||
196 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
197 Parent->Type == TT_CastRParen ||
198 getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
200 ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
201 Contexts.back().IsExpression = true;
202 bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
204 if (StartsObjCMethodExpr) {
205 Contexts.back().ColonIsObjCMethodExpr = true;
206 Left->Type = TT_ObjCMethodExpr;
207 } else if (StartsObjCArrayLiteral) {
208 Left->Type = TT_ObjCArrayLiteral;
211 while (CurrentToken != NULL) {
212 if (CurrentToken->is(tok::r_square)) {
213 if (!CurrentToken->Children.empty() &&
214 CurrentToken->Children[0].is(tok::l_paren)) {
215 // An ObjC method call is rarely followed by an open parenthesis.
216 // FIXME: Do we incorrectly label ":" with this?
217 StartsObjCMethodExpr = false;
218 Left->Type = TT_Unknown;
220 if (StartsObjCMethodExpr) {
221 CurrentToken->Type = TT_ObjCMethodExpr;
222 // determineStarAmpUsage() thinks that '*' '[' is allocating an
223 // array of pointers, but if '[' starts a selector then '*' is a
225 if (Parent != NULL && Parent->Type == TT_PointerOrReference)
226 Parent->Type = TT_BinaryOperator;
227 } else if (StartsObjCArrayLiteral) {
228 CurrentToken->Type = TT_ObjCArrayLiteral;
230 Left->MatchingParen = CurrentToken;
231 CurrentToken->MatchingParen = Left;
232 if (Contexts.back().FirstObjCSelectorName != NULL)
233 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
234 Contexts.back().LongestObjCSelectorName;
238 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
240 updateParameterCount(Left, CurrentToken);
248 if (CurrentToken != NULL) {
249 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
250 AnnotatedToken *Left = CurrentToken->Parent;
251 while (CurrentToken != NULL) {
252 if (CurrentToken->is(tok::r_brace)) {
253 Left->MatchingParen = CurrentToken;
254 CurrentToken->MatchingParen = Left;
258 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
260 updateParameterCount(Left, CurrentToken);
265 // No closing "}" found, this probably starts a definition.
266 Line.StartsDefinition = true;
270 void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
271 if (Current->is(tok::comma))
272 ++Left->ParameterCount;
273 else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
274 Left->ParameterCount = 1;
277 bool parseConditional() {
278 while (CurrentToken != NULL) {
279 if (CurrentToken->is(tok::colon)) {
280 CurrentToken->Type = TT_ConditionalExpr;
290 bool parseTemplateDeclaration() {
291 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
292 CurrentToken->Type = TT_TemplateOpener;
296 if (CurrentToken != NULL)
297 CurrentToken->Parent->ClosesTemplateDeclaration = true;
303 bool consumeToken() {
304 AnnotatedToken *Tok = CurrentToken;
306 switch (Tok->FormatTok.Tok.getKind()) {
309 if (Tok->Parent == NULL && Line.MustBeDeclaration)
310 Tok->Type = TT_ObjCMethodSpecifier;
313 if (Tok->Parent == NULL)
315 // Colons from ?: are handled in parseConditional().
316 if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) {
317 Tok->Type = TT_CtorInitializerColon;
318 } else if (Contexts.back().ColonIsObjCMethodExpr ||
319 Line.First.Type == TT_ObjCMethodSpecifier) {
320 Tok->Type = TT_ObjCMethodExpr;
321 Tok->Parent->Type = TT_ObjCSelectorName;
322 if (Tok->Parent->FormatTok.TokenLength >
323 Contexts.back().LongestObjCSelectorName)
324 Contexts.back().LongestObjCSelectorName =
325 Tok->Parent->FormatTok.TokenLength;
326 if (Contexts.back().FirstObjCSelectorName == NULL)
327 Contexts.back().FirstObjCSelectorName = Tok->Parent;
328 } else if (Contexts.back().ColonIsForRangeExpr) {
329 Tok->Type = TT_RangeBasedForLoopColon;
330 } else if (Contexts.size() == 1) {
331 Tok->Type = TT_InheritanceColon;
332 } else if (Contexts.back().ContextKind == tok::l_paren) {
333 Tok->Type = TT_InlineASMColon;
338 if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
340 if (!parseParens(/*LookForDecls=*/ true))
345 Contexts.back().ColonIsForRangeExpr = true;
353 if (Line.MustBeDeclaration && NameFound && !Contexts.back().IsExpression)
354 Line.MightBeFunctionDecl = true;
366 Tok->Type = TT_TemplateOpener;
368 Tok->Type = TT_BinaryOperator;
377 // Lines can start with '}'.
378 if (Tok->Parent != NULL)
382 Tok->Type = TT_BinaryOperator;
384 case tok::kw_operator:
385 while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
386 if (CurrentToken->isOneOf(tok::star, tok::amp))
387 CurrentToken->Type = TT_PointerOrReference;
391 CurrentToken->Type = TT_OverloadedOperatorLParen;
396 case tok::kw_template:
397 parseTemplateDeclaration();
399 case tok::identifier:
400 if (Line.First.is(tok::kw_for) &&
401 Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
402 Tok->Type = TT_ObjCForIn;
405 if (Contexts.back().FirstStartOfName)
406 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
414 void parseIncludeDirective() {
416 if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
418 while (CurrentToken != NULL) {
419 if (CurrentToken->isNot(tok::comment) ||
420 !CurrentToken->Children.empty())
421 CurrentToken->Type = TT_ImplicitStringLiteral;
425 while (CurrentToken != NULL) {
426 if (CurrentToken->is(tok::string_literal))
427 // Mark these string literals as "implicit" literals, too, so that
428 // they are not split or line-wrapped.
429 CurrentToken->Type = TT_ImplicitStringLiteral;
435 void parseWarningOrError() {
437 // We still want to format the whitespace left of the first token of the
440 while (CurrentToken != NULL) {
441 CurrentToken->Type = TT_ImplicitStringLiteral;
446 void parsePreprocessorDirective() {
448 if (CurrentToken == NULL)
450 // Hashes in the middle of a line can lead to any strange token
452 if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
454 switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
455 case tok::pp_include:
457 parseIncludeDirective();
460 case tok::pp_warning:
461 parseWarningOrError();
470 while (CurrentToken != NULL)
475 LineType parseLine() {
476 int PeriodsAndArrows = 0;
477 AnnotatedToken *LastPeriodOrArrow = NULL;
478 bool CanBeBuilderTypeStmt = true;
479 if (CurrentToken->is(tok::hash)) {
480 parsePreprocessorDirective();
481 return LT_PreprocessorDirective;
483 while (CurrentToken != NULL) {
484 if (CurrentToken->is(tok::kw_virtual))
485 KeywordVirtualFound = true;
486 if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
488 LastPeriodOrArrow = CurrentToken;
490 AnnotatedToken *TheToken = CurrentToken;
493 if (getPrecedence(*TheToken) > prec::Assignment &&
494 TheToken->Type == TT_BinaryOperator)
495 CanBeBuilderTypeStmt = false;
497 if (KeywordVirtualFound)
498 return LT_VirtualFunctionDecl;
500 // Assume a builder-type call if there are 2 or more "." and "->".
501 if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
502 LastPeriodOrArrow->LastInChainOfCalls = true;
503 return LT_BuilderTypeCall;
506 if (Line.First.Type == TT_ObjCMethodSpecifier) {
507 if (Contexts.back().FirstObjCSelectorName != NULL)
508 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
509 Contexts.back().LongestObjCSelectorName;
510 return LT_ObjCMethodDecl;
518 if (CurrentToken != NULL) {
519 determineTokenType(*CurrentToken);
520 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
523 if (CurrentToken != NULL && !CurrentToken->Children.empty())
524 CurrentToken = &CurrentToken->Children[0];
528 // Reset token type in case we have already looked at it and then recovered
529 // from an error (e.g. failure to find the matching >).
530 if (CurrentToken != NULL)
531 CurrentToken->Type = TT_Unknown;
534 /// \brief A struct to hold information valid in a specific context, e.g.
535 /// a pair of parenthesis.
537 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
539 : ContextKind(ContextKind), BindingStrength(BindingStrength),
540 LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
541 ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
542 FirstStartOfName(NULL), IsExpression(IsExpression),
543 CanBeExpression(true) {}
545 tok::TokenKind ContextKind;
546 unsigned BindingStrength;
547 unsigned LongestObjCSelectorName;
548 bool ColonIsForRangeExpr;
549 bool ColonIsObjCMethodExpr;
550 AnnotatedToken *FirstObjCSelectorName;
551 AnnotatedToken *FirstStartOfName;
553 bool CanBeExpression;
556 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
557 /// of each instance.
558 struct ScopedContextCreator {
561 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
564 P.Contexts.push_back(
565 Context(ContextKind, P.Contexts.back().BindingStrength + Increase,
566 P.Contexts.back().IsExpression));
569 ~ScopedContextCreator() { P.Contexts.pop_back(); }
572 void determineTokenType(AnnotatedToken &Current) {
573 if (getPrecedence(Current) == prec::Assignment &&
574 (!Current.Parent || Current.Parent->isNot(tok::kw_operator))) {
575 Contexts.back().IsExpression = true;
576 for (AnnotatedToken *Previous = Current.Parent;
577 Previous && Previous->isNot(tok::comma);
578 Previous = Previous->Parent) {
579 if (Previous->is(tok::r_square))
580 Previous = Previous->MatchingParen;
581 if (Previous->Type == TT_BinaryOperator &&
582 Previous->isOneOf(tok::star, tok::amp)) {
583 Previous->Type = TT_PointerOrReference;
586 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
587 (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
588 (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
589 Contexts.back().IsExpression = true;
590 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
591 for (AnnotatedToken *Previous = Current.Parent;
592 Previous && Previous->isOneOf(tok::star, tok::amp);
593 Previous = Previous->Parent)
594 Previous->Type = TT_PointerOrReference;
595 } else if (Current.Parent &&
596 Current.Parent->Type == TT_CtorInitializerColon) {
597 Contexts.back().IsExpression = true;
598 } else if (Current.is(tok::kw_new)) {
599 Contexts.back().CanBeExpression = false;
600 } else if (Current.is(tok::semi)) {
601 // This should be the condition or increment in a for-loop.
602 Contexts.back().IsExpression = true;
605 if (Current.Type == TT_Unknown) {
606 if (Current.Parent && Current.is(tok::identifier) &&
607 ((Current.Parent->is(tok::identifier) &&
608 Current.Parent->FormatTok.Tok.getIdentifierInfo()
609 ->getPPKeywordID() == tok::pp_not_keyword) ||
610 isSimpleTypeSpecifier(*Current.Parent) ||
611 Current.Parent->Type == TT_PointerOrReference ||
612 Current.Parent->Type == TT_TemplateCloser)) {
613 Contexts.back().FirstStartOfName = &Current;
614 Current.Type = TT_StartOfName;
616 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
618 determineStarAmpUsage(Current, Contexts.back().IsExpression);
619 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
620 Current.Type = determinePlusMinusCaretUsage(Current);
621 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
622 Current.Type = determineIncrementUsage(Current);
623 } else if (Current.is(tok::exclaim)) {
624 Current.Type = TT_UnaryOperator;
625 } else if (Current.isBinaryOperator()) {
626 Current.Type = TT_BinaryOperator;
627 } else if (Current.is(tok::comment)) {
628 std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
630 if (StringRef(Data).startswith("//"))
631 Current.Type = TT_LineComment;
633 Current.Type = TT_BlockComment;
634 } else if (Current.is(tok::r_paren)) {
635 bool ParensNotExpr = !Current.Parent ||
636 Current.Parent->Type == TT_PointerOrReference ||
637 Current.Parent->Type == TT_TemplateCloser;
638 bool ParensCouldEndDecl =
639 !Current.Children.empty() &&
640 Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
641 bool IsSizeOfOrAlignOf =
642 Current.MatchingParen && Current.MatchingParen->Parent &&
643 Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
645 if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
646 Contexts.back().IsExpression)
647 // FIXME: We need to get smarter and understand more cases of casts.
648 Current.Type = TT_CastRParen;
649 } else if (Current.is(tok::at) && Current.Children.size()) {
650 switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
651 case tok::objc_interface:
652 case tok::objc_implementation:
653 case tok::objc_protocol:
654 Current.Type = TT_ObjCDecl;
656 case tok::objc_property:
657 Current.Type = TT_ObjCProperty;
666 /// \brief Return the type of the given token assuming it is * or &.
668 determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
669 const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
670 if (PrevToken == NULL)
671 return TT_UnaryOperator;
673 const AnnotatedToken *NextToken = Tok.getNextNoneComment();
674 if (NextToken == NULL)
677 if (PrevToken->is(tok::l_paren) && !IsExpression)
678 return TT_PointerOrReference;
680 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
681 tok::comma, tok::semi, tok::kw_return, tok::colon,
683 PrevToken->Type == TT_BinaryOperator ||
684 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
685 return TT_UnaryOperator;
687 if (NextToken->is(tok::l_square))
688 return TT_PointerOrReference;
690 if (PrevToken->FormatTok.Tok.isLiteral() ||
691 PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
692 NextToken->FormatTok.Tok.isLiteral() || NextToken->isUnaryOperator())
693 return TT_BinaryOperator;
695 // It is very unlikely that we are going to find a pointer or reference type
696 // definition on the RHS of an assignment.
698 return TT_BinaryOperator;
700 return TT_PointerOrReference;
703 TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
704 const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
705 if (PrevToken == NULL)
706 return TT_UnaryOperator;
708 // Use heuristics to recognize unary operators.
709 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
710 tok::question, tok::colon, tok::kw_return,
711 tok::kw_case, tok::at, tok::l_brace))
712 return TT_UnaryOperator;
714 // There can't be two consecutive binary operators.
715 if (PrevToken->Type == TT_BinaryOperator)
716 return TT_UnaryOperator;
718 // Fall back to marking the token as binary operator.
719 return TT_BinaryOperator;
722 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
723 TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
724 const AnnotatedToken *PrevToken = Tok.getPreviousNoneComment();
725 if (PrevToken == NULL)
726 return TT_UnaryOperator;
727 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
728 return TT_TrailingUnaryOperator;
730 return TT_UnaryOperator;
733 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
735 /// \brief Determine whether the token kind starts a simple-type-specifier.
736 bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const {
737 switch (Tok.FormatTok.Tok.getKind()) {
740 case tok::kw___int64:
741 case tok::kw___int128:
743 case tok::kw_unsigned:
750 case tok::kw_wchar_t:
752 case tok::kw___underlying_type:
754 case tok::annot_typename:
755 case tok::kw_char16_t:
756 case tok::kw_char32_t:
758 case tok::kw_decltype:
759 return Lex.getLangOpts().CPlusPlus;
766 SmallVector<Context, 8> Contexts;
768 SourceManager &SourceMgr;
771 AnnotatedToken *CurrentToken;
772 bool KeywordVirtualFound;
774 IdentifierInfo &Ident_in;
777 /// \brief Parses binary expressions by inserting fake parenthesis based on
778 /// operator precedence.
779 class ExpressionParser {
781 ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
783 /// \brief Parse expressions with the given operatore precedence.
784 void parse(int Precedence = 0) {
785 if (Precedence > prec::PointerToMember || Current == NULL)
788 // Eagerly consume trailing comments.
789 while (Current && Current->isTrailingComment()) {
793 AnnotatedToken *Start = Current;
794 bool OperatorFound = false;
797 // Consume operators with higher precedence.
798 parse(Precedence + 1);
800 int CurrentPrecedence = 0;
802 if (Current->Type == TT_ConditionalExpr)
803 CurrentPrecedence = 1 + (int) prec::Conditional;
804 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon)
805 CurrentPrecedence = 1;
806 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
807 CurrentPrecedence = 1 + (int) getPrecedence(*Current);
810 // At the end of the line or when an operator with higher precedence is
811 // found, insert fake parenthesis and return.
812 if (Current == NULL || Current->closesScope() ||
813 (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
815 Start->FakeLParens.push_back(prec::Level(Precedence - 1));
817 ++Current->Parent->FakeRParens;
822 // Consume scopes: (), [], <> and {}
823 if (Current->opensScope()) {
824 while (Current && !Current->closesScope()) {
831 if (CurrentPrecedence == Precedence)
832 OperatorFound = true;
842 Current = Current->Children.empty() ? NULL : &Current->Children[0];
845 AnnotatedToken *Current;
848 void TokenAnnotator::annotate(AnnotatedLine &Line) {
849 AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
850 Line.Type = Parser.parseLine();
851 if (Line.Type == LT_Invalid)
854 ExpressionParser ExprParser(Line);
857 if (Line.First.Type == TT_ObjCMethodSpecifier)
858 Line.Type = LT_ObjCMethodDecl;
859 else if (Line.First.Type == TT_ObjCDecl)
860 Line.Type = LT_ObjCDecl;
861 else if (Line.First.Type == TT_ObjCProperty)
862 Line.Type = LT_ObjCProperty;
864 Line.First.SpacesRequiredBefore = 1;
865 Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
866 Line.First.CanBreakBefore = Line.First.MustBreakBefore;
868 Line.First.TotalLength = Line.First.FormatTok.TokenLength;
871 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
872 if (Line.First.Children.empty())
874 AnnotatedToken *Current = &Line.First.Children[0];
875 while (Current != NULL) {
876 if (Current->Type == TT_LineComment)
877 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
879 Current->SpacesRequiredBefore =
880 spaceRequiredBefore(Line, *Current) ? 1 : 0;
882 if (Current->FormatTok.MustBreakBefore) {
883 Current->MustBreakBefore = true;
884 } else if (Current->Type == TT_LineComment) {
885 Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
886 } else if (Current->Parent->isTrailingComment() ||
887 (Current->is(tok::string_literal) &&
888 Current->Parent->is(tok::string_literal))) {
889 Current->MustBreakBefore = true;
890 } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
891 Current->Parent->is(tok::string_literal) &&
892 Current->Children[0].is(tok::string_literal)) {
893 Current->MustBreakBefore = true;
895 Current->MustBreakBefore = false;
897 Current->CanBreakBefore =
898 Current->MustBreakBefore || canBreakBefore(Line, *Current);
899 if (Current->MustBreakBefore)
900 Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
902 Current->TotalLength =
903 Current->Parent->TotalLength + Current->FormatTok.TokenLength +
904 Current->SpacesRequiredBefore;
905 // FIXME: Only calculate this if CanBreakBefore is true once static
906 // initializers etc. are sorted out.
907 // FIXME: Move magic numbers to a better place.
908 Current->SplitPenalty =
909 20 * Current->BindingStrength + splitPenalty(Line, *Current);
911 Current = Current->Children.empty() ? NULL : &Current->Children[0];
915 printDebugInfo(Line);
919 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
920 const AnnotatedToken &Tok) {
921 const AnnotatedToken &Left = *Tok.Parent;
922 const AnnotatedToken &Right = Tok;
924 if (Right.Type == TT_StartOfName) {
925 if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
927 else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
928 // FIXME: Clean up hack of using BindingStrength to find top-level names.
929 return Style.PenaltyReturnTypeOnItsOwnLine;
933 if (Left.is(tok::equal) && Right.is(tok::l_brace))
935 if (Left.is(tok::coloncolon))
937 if (Left.isOneOf(tok::kw_class, tok::kw_struct))
940 if (Left.Type == TT_RangeBasedForLoopColon ||
941 Left.Type == TT_InheritanceColon)
944 if (Right.isOneOf(tok::arrow, tok::period)) {
945 if (Line.Type == LT_BuilderTypeCall)
946 return prec::PointerToMember;
947 if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
948 Left.MatchingParen->ParameterCount > 0)
949 return 20; // Should be smaller than breaking at a nested comma.
953 // In for-loops, prefer breaking at ',' and ';'.
954 if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
957 if (Left.is(tok::semi))
959 if (Left.is(tok::comma))
962 // In Objective-C method expressions, prefer breaking before "param:" over
963 // breaking after it.
964 if (Right.Type == TT_ObjCSelectorName)
966 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
969 if (Left.is(tok::l_paren) && Line.MightBeFunctionDecl)
971 if (Left.opensScope())
972 return Left.ParameterCount > 1 ? prec::Comma : 20;
974 if (Right.is(tok::lessless)) {
975 if (Left.is(tok::string_literal)) {
976 StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(),
977 Left.FormatTok.TokenLength);
978 Content = Content.drop_back(1).drop_front(1).trim();
979 if (Content.size() > 1 &&
980 (Content.back() == ':' || Content.back() == '='))
985 if (Left.Type == TT_ConditionalExpr)
986 return prec::Conditional;
987 prec::Level Level = getPrecedence(Left);
989 if (Level != prec::Unknown)
995 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
996 const AnnotatedToken &Left,
997 const AnnotatedToken &Right) {
998 if (Right.is(tok::hashhash))
999 return Left.is(tok::hash);
1000 if (Left.isOneOf(tok::hashhash, tok::hash))
1001 return Right.is(tok::hash);
1002 if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
1004 if (Right.is(tok::less) &&
1005 (Left.is(tok::kw_template) ||
1006 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
1008 if (Left.is(tok::arrow) || Right.is(tok::arrow))
1010 if (Left.isOneOf(tok::exclaim, tok::tilde))
1012 if (Left.is(tok::at) &&
1013 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
1014 tok::numeric_constant, tok::l_paren, tok::l_brace,
1015 tok::kw_true, tok::kw_false))
1017 if (Left.is(tok::coloncolon))
1019 if (Right.is(tok::coloncolon))
1020 return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
1021 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
1023 if (Right.Type == TT_PointerOrReference)
1024 return Left.FormatTok.Tok.isLiteral() ||
1025 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
1026 !Style.PointerBindsToType);
1027 if (Left.Type == TT_PointerOrReference)
1028 return Right.FormatTok.Tok.isLiteral() ||
1029 ((Right.Type != TT_PointerOrReference) &&
1030 Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
1031 Left.Parent && Left.Parent->isNot(tok::l_paren));
1032 if (Right.is(tok::star) && Left.is(tok::l_paren))
1034 if (Left.is(tok::l_square))
1035 return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
1036 if (Right.is(tok::r_square))
1037 return Right.Type == TT_ObjCArrayLiteral;
1038 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
1040 if (Left.is(tok::period) || Right.is(tok::period))
1042 if (Left.is(tok::colon))
1043 return Left.Type != TT_ObjCMethodExpr;
1044 if (Right.is(tok::colon))
1045 return Right.Type != TT_ObjCMethodExpr;
1046 if (Left.is(tok::l_paren))
1048 if (Right.is(tok::l_paren)) {
1049 return Line.Type == LT_ObjCDecl ||
1050 Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
1051 tok::kw_return, tok::kw_catch, tok::kw_new,
1052 tok::kw_delete, tok::semi);
1054 if (Left.is(tok::at) &&
1055 Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
1057 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
1059 if (Right.is(tok::ellipsis))
1064 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
1065 const AnnotatedToken &Tok) {
1066 if (Tok.FormatTok.Tok.getIdentifierInfo() &&
1067 Tok.Parent->FormatTok.Tok.getIdentifierInfo())
1068 return true; // Never ever merge two identifiers.
1069 if (Line.Type == LT_ObjCMethodDecl) {
1070 if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
1072 if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
1073 // Don't space between ')' and <id>
1076 if (Line.Type == LT_ObjCProperty &&
1077 (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
1080 if (Tok.Parent->is(tok::comma))
1082 if (Tok.is(tok::comma))
1084 if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
1086 if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
1088 if (Tok.Type == TT_OverloadedOperatorLParen)
1090 if (Tok.is(tok::colon))
1091 return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
1092 Tok.getNextNoneComment() != NULL && Tok.Type != TT_ObjCMethodExpr;
1093 if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
1094 Tok.Children[0].Type == TT_PointerOrReference &&
1095 !Tok.Children[0].Children.empty() &&
1096 Tok.Children[0].Children[0].isNot(tok::r_paren) &&
1097 Tok.Parent->isNot(tok::l_paren) &&
1098 (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType))
1100 if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
1102 if (Tok.Type == TT_UnaryOperator)
1103 return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
1104 (Tok.Parent->isNot(tok::colon) ||
1105 Tok.Parent->Type != TT_ObjCMethodExpr);
1106 if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
1107 return Tok.Type == TT_TemplateCloser &&
1108 Tok.Parent->Type == TT_TemplateCloser &&
1109 Style.Standard != FormatStyle::LS_Cpp11;
1111 if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
1112 Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar))
1114 if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
1116 if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
1118 if (Tok.is(tok::less) && Line.First.is(tok::hash))
1120 if (Tok.Type == TT_TrailingUnaryOperator)
1122 return spaceRequiredBetween(Line, *Tok.Parent, Tok);
1125 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
1126 const AnnotatedToken &Right) {
1127 const AnnotatedToken &Left = *Right.Parent;
1128 if (Right.Type == TT_StartOfName)
1130 if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
1132 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
1134 if (Right.Type == TT_ObjCSelectorName)
1136 if (Left.ClosesTemplateDeclaration)
1138 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
1140 if (Right.Type == TT_RangeBasedForLoopColon ||
1141 Right.Type == TT_OverloadedOperatorLParen)
1143 if (Left.Type == TT_RangeBasedForLoopColon)
1145 if (Right.Type == TT_RangeBasedForLoopColon)
1147 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
1148 Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
1149 Left.isOneOf(tok::question, tok::kw_operator))
1151 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
1153 if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
1154 Left.Parent->is(tok::kw___attribute))
1157 if (Right.Type == TT_LineComment)
1158 // We rely on MustBreakBefore being set correctly here as we should not
1159 // change the "binding" behavior of a comment.
1162 // Allow breaking after a trailing 'const', e.g. after a method declaration,
1163 // unless it is follow by ';', '{' or '='.
1164 if (Left.is(tok::kw_const) && Left.Parent != NULL &&
1165 Left.Parent->is(tok::r_paren))
1166 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
1168 if (Right.is(tok::kw___attribute))
1171 // We only break before r_brace if there was a corresponding break before
1172 // the l_brace, which is tracked by BreakBeforeClosingBrace.
1173 if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
1175 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
1177 return (Left.isBinaryOperator() && Left.isNot(tok::lessless)) ||
1178 Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
1179 tok::kw_class, tok::kw_struct) ||
1180 Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
1181 (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
1182 Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
1183 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
1184 (Left.is(tok::l_square) && !Right.is(tok::r_square));
1187 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
1188 llvm::errs() << "AnnotatedTokens:\n";
1189 const AnnotatedToken *Tok = &Line.First;
1191 llvm::errs() << " M=" << Tok->MustBreakBefore
1192 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
1193 << " S=" << Tok->SpacesRequiredBefore
1194 << " P=" << Tok->SplitPenalty
1195 << " Name=" << Tok->FormatTok.Tok.getName() << " FakeLParens=";
1196 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
1197 llvm::errs() << Tok->FakeLParens[i] << "/";
1198 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
1199 Tok = Tok->Children.empty() ? NULL : &Tok->Children[0];
1201 llvm::errs() << "----\n";
1204 } // namespace format
1205 } // namespace clang