1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
14 //===----------------------------------------------------------------------===//
16 #include "TokenAnnotator.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/Support/Debug.h"
21 #define DEBUG_TYPE "format-token-annotator"
28 /// Returns \c true if the token can be used as an identifier in
29 /// an Objective-C \c @selector, \c false otherwise.
31 /// Because getFormattingLangOpts() always lexes source code as
32 /// Objective-C++, C++ keywords like \c new and \c delete are
33 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
35 /// For Objective-C and Objective-C++, both identifiers and keywords
36 /// are valid inside @selector(...) (or a macro which
37 /// invokes @selector(...)). So, we allow treat any identifier or
38 /// keyword as a potential Objective-C selector component.
39 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
40 return Tok.Tok.getIdentifierInfo() != nullptr;
43 /// A parser that gathers additional information about tokens.
45 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
46 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
47 /// into template parameter lists.
48 class AnnotatingParser {
50 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
51 const AdditionalKeywords &Keywords)
52 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
54 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
55 resetTokenMetadata(CurrentToken);
60 if (!CurrentToken || !CurrentToken->Previous)
62 if (NonTemplateLess.count(CurrentToken->Previous))
65 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
66 if (Previous.Previous) {
67 if (Previous.Previous->Tok.isLiteral())
69 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
70 (!Previous.Previous->MatchingParen ||
71 !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen)))
75 FormatToken *Left = CurrentToken->Previous;
76 Left->ParentBracket = Contexts.back().ContextKind;
77 ScopedContextCreator ContextCreator(*this, tok::less, 12);
79 // If this angle is in the context of an expression, we need to be more
80 // hesitant to detect it as opening template parameters.
81 bool InExprContext = Contexts.back().IsExpression;
83 Contexts.back().IsExpression = false;
84 // If there's a template keyword before the opening angle bracket, this is a
85 // template parameter, not an argument.
86 Contexts.back().InTemplateArgument =
87 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
89 if (Style.Language == FormatStyle::LK_Java &&
90 CurrentToken->is(tok::question))
93 while (CurrentToken) {
94 if (CurrentToken->is(tok::greater)) {
95 Left->MatchingParen = CurrentToken;
96 CurrentToken->MatchingParen = Left;
97 // In TT_Proto, we must distignuish between:
100 // msg: < item: data >
101 // In TT_TextProto, map<key, value> does not occur.
102 if (Style.Language == FormatStyle::LK_TextProto ||
103 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
104 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral)))
105 CurrentToken->Type = TT_DictLiteral;
107 CurrentToken->Type = TT_TemplateCloser;
111 if (CurrentToken->is(tok::question) &&
112 Style.Language == FormatStyle::LK_Java) {
116 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
117 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
118 Style.Language != FormatStyle::LK_Proto &&
119 Style.Language != FormatStyle::LK_TextProto))
121 // If a && or || is found and interpreted as a binary operator, this set
122 // of angles is likely part of something like "a < b && c > d". If the
123 // angles are inside an expression, the ||/&& might also be a binary
124 // operator that was misinterpreted because we are parsing template
126 // FIXME: This is getting out of hand, write a decent parser.
127 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
128 CurrentToken->Previous->is(TT_BinaryOperator) &&
129 Contexts[Contexts.size() - 2].IsExpression &&
130 !Line.startsWith(tok::kw_template))
132 updateParameterCount(Left, CurrentToken);
133 if (Style.Language == FormatStyle::LK_Proto) {
134 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
135 if (CurrentToken->is(tok::colon) ||
136 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
137 Previous->isNot(tok::colon)))
138 Previous->Type = TT_SelectorName;
147 bool parseParens(bool LookForDecls = false) {
150 FormatToken *Left = CurrentToken->Previous;
151 Left->ParentBracket = Contexts.back().ContextKind;
152 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
154 // FIXME: This is a bit of a hack. Do better.
155 Contexts.back().ColonIsForRangeExpr =
156 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
158 bool StartsObjCMethodExpr = false;
159 if (FormatToken *MaybeSel = Left->Previous) {
160 // @selector( starts a selector.
161 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
162 MaybeSel->Previous->is(tok::at)) {
163 StartsObjCMethodExpr = true;
167 if (Left->is(TT_OverloadedOperatorLParen)) {
168 Contexts.back().IsExpression = false;
169 } else if (Style.Language == FormatStyle::LK_JavaScript &&
170 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
171 Line.startsWith(tok::kw_export, Keywords.kw_type,
174 // export type X = (...);
175 Contexts.back().IsExpression = false;
176 } else if (Left->Previous &&
177 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
178 tok::kw_if, tok::kw_while, tok::l_paren,
180 Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) ||
181 Left->Previous->is(TT_BinaryOperator))) {
182 // static_assert, if and while usually contain expressions.
183 Contexts.back().IsExpression = true;
184 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
185 (Left->Previous->is(Keywords.kw_function) ||
186 (Left->Previous->endsSequence(tok::identifier,
187 Keywords.kw_function)))) {
188 // function(...) or function f(...)
189 Contexts.back().IsExpression = false;
190 } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
191 Left->Previous->is(TT_JsTypeColon)) {
192 // let x: (SomeType);
193 Contexts.back().IsExpression = false;
194 } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
195 Left->Previous->MatchingParen &&
196 Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
197 // This is a parameter list of a lambda expression.
198 Contexts.back().IsExpression = false;
199 } else if (Line.InPPDirective &&
200 (!Left->Previous || !Left->Previous->is(tok::identifier))) {
201 Contexts.back().IsExpression = true;
202 } else if (Contexts[Contexts.size() - 2].CaretFound) {
203 // This is the parameter list of an ObjC block.
204 Contexts.back().IsExpression = false;
205 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
206 Left->Type = TT_AttributeParen;
207 } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
208 // The first argument to a foreach macro is a declaration.
209 Contexts.back().IsForEachMacro = true;
210 Contexts.back().IsExpression = false;
211 } else if (Left->Previous && Left->Previous->MatchingParen &&
212 Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
213 Contexts.back().IsExpression = false;
214 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
216 Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
217 Contexts.back().IsExpression = !IsForOrCatch;
220 if (StartsObjCMethodExpr) {
221 Contexts.back().ColonIsObjCMethodExpr = true;
222 Left->Type = TT_ObjCMethodExpr;
225 // MightBeFunctionType and ProbablyFunctionType are used for
226 // function pointer and reference types as well as Objective-C
229 // void (*FunctionPointer)(void);
230 // void (&FunctionReference)(void);
231 // void (^ObjCBlock)(void);
232 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
233 bool ProbablyFunctionType =
234 CurrentToken->isOneOf(tok::star, tok::amp, tok::caret);
235 bool HasMultipleLines = false;
236 bool HasMultipleParametersOnALine = false;
237 bool MightBeObjCForRangeLoop =
238 Left->Previous && Left->Previous->is(tok::kw_for);
239 FormatToken *PossibleObjCForInToken = nullptr;
240 while (CurrentToken) {
241 // LookForDecls is set when "if (" has been seen. Check for
242 // 'identifier' '*' 'identifier' followed by not '=' -- this
243 // '*' has to be a binary operator but determineStarAmpUsage() will
244 // categorize it as an unary operator, so set the right type here.
245 if (LookForDecls && CurrentToken->Next) {
246 FormatToken *Prev = CurrentToken->getPreviousNonComment();
248 FormatToken *PrevPrev = Prev->getPreviousNonComment();
249 FormatToken *Next = CurrentToken->Next;
250 if (PrevPrev && PrevPrev->is(tok::identifier) &&
251 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
252 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
253 Prev->Type = TT_BinaryOperator;
254 LookForDecls = false;
259 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
260 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
262 ProbablyFunctionType = true;
263 if (CurrentToken->is(tok::comma))
264 MightBeFunctionType = false;
265 if (CurrentToken->Previous->is(TT_BinaryOperator))
266 Contexts.back().IsExpression = true;
267 if (CurrentToken->is(tok::r_paren)) {
268 if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next &&
269 (CurrentToken->Next->is(tok::l_paren) ||
270 (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration)))
271 Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen
272 : TT_FunctionTypeLParen;
273 Left->MatchingParen = CurrentToken;
274 CurrentToken->MatchingParen = Left;
276 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
277 Left->Previous && Left->Previous->is(tok::l_paren)) {
278 // Detect the case where macros are used to generate lambdas or
279 // function bodies, e.g.:
280 // auto my_lambda = MARCO((Type *type, int i) { .. body .. });
281 for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) {
282 if (Tok->is(TT_BinaryOperator) &&
283 Tok->isOneOf(tok::star, tok::amp, tok::ampamp))
284 Tok->Type = TT_PointerOrReference;
288 if (StartsObjCMethodExpr) {
289 CurrentToken->Type = TT_ObjCMethodExpr;
290 if (Contexts.back().FirstObjCSelectorName) {
291 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
292 Contexts.back().LongestObjCSelectorName;
296 if (Left->is(TT_AttributeParen))
297 CurrentToken->Type = TT_AttributeParen;
298 if (Left->Previous && Left->Previous->is(TT_JavaAnnotation))
299 CurrentToken->Type = TT_JavaAnnotation;
300 if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
301 CurrentToken->Type = TT_LeadingJavaAnnotation;
303 if (!HasMultipleLines)
304 Left->PackingKind = PPK_Inconclusive;
305 else if (HasMultipleParametersOnALine)
306 Left->PackingKind = PPK_BinPacked;
308 Left->PackingKind = PPK_OnePerLine;
313 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
316 if (CurrentToken->is(tok::l_brace))
317 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
318 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
319 !CurrentToken->Next->HasUnescapedNewline &&
320 !CurrentToken->Next->isTrailingComment())
321 HasMultipleParametersOnALine = true;
322 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
323 CurrentToken->Previous->isSimpleTypeSpecifier()) &&
324 !CurrentToken->is(tok::l_brace))
325 Contexts.back().IsExpression = false;
326 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
327 MightBeObjCForRangeLoop = false;
328 if (PossibleObjCForInToken) {
329 PossibleObjCForInToken->Type = TT_Unknown;
330 PossibleObjCForInToken = nullptr;
333 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
334 PossibleObjCForInToken = CurrentToken;
335 PossibleObjCForInToken->Type = TT_ObjCForIn;
337 // When we discover a 'new', we set CanBeExpression to 'false' in order to
338 // parse the type correctly. Reset that after a comma.
339 if (CurrentToken->is(tok::comma))
340 Contexts.back().CanBeExpression = true;
342 FormatToken *Tok = CurrentToken;
345 updateParameterCount(Left, Tok);
346 if (CurrentToken && CurrentToken->HasUnescapedNewline)
347 HasMultipleLines = true;
352 bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
353 if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
355 const FormatToken *AttrTok = Tok.Next->Next;
358 // C++17 '[[using ns: foo, bar(baz, blech)]]'
359 // We assume nobody will name an ObjC variable 'using'.
360 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
362 if (AttrTok->isNot(tok::identifier))
364 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
365 // ObjC message send. We assume nobody will use : in a C++11 attribute
366 // specifier parameter, although this is technically valid:
368 if (AttrTok->is(tok::colon) ||
369 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
370 AttrTok->startsSequence(tok::r_paren, tok::identifier))
372 if (AttrTok->is(tok::ellipsis))
374 AttrTok = AttrTok->Next;
376 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
383 // A '[' could be an index subscript (after an identifier or after
384 // ')' or ']'), it could be the start of an Objective-C method
385 // expression, it could the start of an Objective-C array literal,
386 // or it could be a C++ attribute specifier [[foo::bar]].
387 FormatToken *Left = CurrentToken->Previous;
388 Left->ParentBracket = Contexts.back().ContextKind;
389 FormatToken *Parent = Left->getPreviousNonComment();
391 // Cases where '>' is followed by '['.
392 // In C++, this can happen either in array of templates (foo<int>[10])
393 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
394 bool CppArrayTemplates =
395 Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) &&
396 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
397 Contexts.back().InTemplateArgument);
399 bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
400 Contexts.back().InCpp11AttributeSpecifier;
402 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
403 bool StartsObjCMethodExpr =
404 !InsideInlineASM && !CppArrayTemplates && Style.isCpp() &&
405 !IsCpp11AttributeSpecifier && Contexts.back().CanBeExpression &&
406 Left->isNot(TT_LambdaLSquare) &&
407 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
409 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
410 tok::kw_return, tok::kw_throw) ||
411 Parent->isUnaryOperator() ||
412 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
413 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
414 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
415 bool ColonFound = false;
417 unsigned BindingIncrease = 1;
418 if (Left->isCppStructuredBinding(Style)) {
419 Left->Type = TT_StructuredBindingLSquare;
420 } else if (Left->is(TT_Unknown)) {
421 if (StartsObjCMethodExpr) {
422 Left->Type = TT_ObjCMethodExpr;
423 } else if (IsCpp11AttributeSpecifier) {
424 Left->Type = TT_AttributeSquare;
425 } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
426 Contexts.back().ContextKind == tok::l_brace &&
427 Parent->isOneOf(tok::l_brace, tok::comma)) {
428 Left->Type = TT_JsComputedPropertyName;
429 } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace &&
430 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
431 Left->Type = TT_DesignatedInitializerLSquare;
432 } else if (CurrentToken->is(tok::r_square) && Parent &&
433 Parent->is(TT_TemplateCloser)) {
434 Left->Type = TT_ArraySubscriptLSquare;
435 } else if (Style.Language == FormatStyle::LK_Proto ||
436 Style.Language == FormatStyle::LK_TextProto) {
437 // Square braces in LK_Proto can either be message field attributes:
439 // optional Aaa aaa = 1 [
447 // or text proto extensions (in options):
449 // option (Aaa.options) = {
450 // [type.type/type] {
455 // or repeated fields (in options):
457 // option (Aaa.options) = {
461 // In the first and the third case we want to spread the contents inside
462 // the square braces; in the second we want to keep them inline.
463 Left->Type = TT_ArrayInitializerLSquare;
464 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
466 !Left->endsSequence(tok::l_square, tok::numeric_constant,
468 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
469 Left->Type = TT_ProtoExtensionLSquare;
470 BindingIncrease = 10;
472 } else if (!CppArrayTemplates && Parent &&
473 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
474 tok::comma, tok::l_paren, tok::l_square,
475 tok::question, tok::colon, tok::kw_return,
476 // Should only be relevant to JavaScript:
478 Left->Type = TT_ArrayInitializerLSquare;
480 BindingIncrease = 10;
481 Left->Type = TT_ArraySubscriptLSquare;
485 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
486 Contexts.back().IsExpression = true;
487 if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
488 Parent->is(TT_JsTypeColon))
489 Contexts.back().IsExpression = false;
491 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
492 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
494 while (CurrentToken) {
495 if (CurrentToken->is(tok::r_square)) {
496 if (IsCpp11AttributeSpecifier)
497 CurrentToken->Type = TT_AttributeSquare;
498 else if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
499 Left->is(TT_ObjCMethodExpr)) {
500 // An ObjC method call is rarely followed by an open parenthesis.
501 // FIXME: Do we incorrectly label ":" with this?
502 StartsObjCMethodExpr = false;
503 Left->Type = TT_Unknown;
505 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
506 CurrentToken->Type = TT_ObjCMethodExpr;
507 // If we haven't seen a colon yet, make sure the last identifier
508 // before the r_square is tagged as a selector name component.
509 if (!ColonFound && CurrentToken->Previous &&
510 CurrentToken->Previous->is(TT_Unknown) &&
511 canBeObjCSelectorComponent(*CurrentToken->Previous))
512 CurrentToken->Previous->Type = TT_SelectorName;
513 // determineStarAmpUsage() thinks that '*' '[' is allocating an
514 // array of pointers, but if '[' starts a selector then '*' is a
516 if (Parent && Parent->is(TT_PointerOrReference))
517 Parent->Type = TT_BinaryOperator;
519 Left->MatchingParen = CurrentToken;
520 CurrentToken->MatchingParen = Left;
521 // FirstObjCSelectorName is set when a colon is found. This does
522 // not work, however, when the method has no parameters.
523 // Here, we set FirstObjCSelectorName when the end of the method call is
524 // reached, in case it was not set already.
525 if (!Contexts.back().FirstObjCSelectorName) {
526 FormatToken* Previous = CurrentToken->getPreviousNonComment();
527 if (Previous && Previous->is(TT_SelectorName)) {
528 Previous->ObjCSelectorNameParts = 1;
529 Contexts.back().FirstObjCSelectorName = Previous;
532 Left->ParameterCount =
533 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
535 if (Contexts.back().FirstObjCSelectorName) {
536 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
537 Contexts.back().LongestObjCSelectorName;
538 if (Left->BlockParameterCount > 1)
539 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
544 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
546 if (CurrentToken->is(tok::colon)) {
547 if (IsCpp11AttributeSpecifier &&
548 CurrentToken->endsSequence(tok::colon, tok::identifier,
550 // Remember that this is a [[using ns: foo]] C++ attribute, so we
551 // don't add a space before the colon (unlike other colons).
552 CurrentToken->Type = TT_AttributeColon;
553 } else if (Left->isOneOf(TT_ArraySubscriptLSquare,
554 TT_DesignatedInitializerLSquare)) {
555 Left->Type = TT_ObjCMethodExpr;
556 StartsObjCMethodExpr = true;
557 Contexts.back().ColonIsObjCMethodExpr = true;
558 if (Parent && Parent->is(tok::r_paren))
559 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
560 Parent->Type = TT_CastRParen;
564 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
566 Left->Type = TT_ArrayInitializerLSquare;
567 FormatToken *Tok = CurrentToken;
570 updateParameterCount(Left, Tok);
577 FormatToken *Left = CurrentToken->Previous;
578 Left->ParentBracket = Contexts.back().ContextKind;
580 if (Contexts.back().CaretFound)
581 Left->Type = TT_ObjCBlockLBrace;
582 Contexts.back().CaretFound = false;
584 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
585 Contexts.back().ColonIsDictLiteral = true;
586 if (Left->BlockKind == BK_BracedInit)
587 Contexts.back().IsExpression = true;
588 if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous &&
589 Left->Previous->is(TT_JsTypeColon))
590 Contexts.back().IsExpression = false;
592 while (CurrentToken) {
593 if (CurrentToken->is(tok::r_brace)) {
594 Left->MatchingParen = CurrentToken;
595 CurrentToken->MatchingParen = Left;
599 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
601 updateParameterCount(Left, CurrentToken);
602 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
603 FormatToken *Previous = CurrentToken->getPreviousNonComment();
604 if (Previous->is(TT_JsTypeOptionalQuestion))
605 Previous = Previous->getPreviousNonComment();
606 if ((CurrentToken->is(tok::colon) &&
607 (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
608 Style.Language == FormatStyle::LK_Proto ||
609 Style.Language == FormatStyle::LK_TextProto) {
610 Left->Type = TT_DictLiteral;
611 if (Previous->Tok.getIdentifierInfo() ||
612 Previous->is(tok::string_literal))
613 Previous->Type = TT_SelectorName;
615 if (CurrentToken->is(tok::colon) ||
616 Style.Language == FormatStyle::LK_JavaScript)
617 Left->Type = TT_DictLiteral;
619 if (CurrentToken->is(tok::comma) &&
620 Style.Language == FormatStyle::LK_JavaScript)
621 Left->Type = TT_DictLiteral;
629 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
630 // For ObjC methods, the number of parameters is calculated differently as
631 // method declarations have a different structure (the parameters are not
632 // inside a bracket scope).
633 if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block)
634 ++Left->BlockParameterCount;
635 if (Current->is(tok::comma)) {
636 ++Left->ParameterCount;
638 Left->Role.reset(new CommaSeparatedList(Style));
639 Left->Role->CommaFound(Current);
640 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
641 Left->ParameterCount = 1;
645 bool parseConditional() {
646 while (CurrentToken) {
647 if (CurrentToken->is(tok::colon)) {
648 CurrentToken->Type = TT_ConditionalExpr;
658 bool parseTemplateDeclaration() {
659 if (CurrentToken && CurrentToken->is(tok::less)) {
660 CurrentToken->Type = TT_TemplateOpener;
665 CurrentToken->Previous->ClosesTemplateDeclaration = true;
671 bool consumeToken() {
672 FormatToken *Tok = CurrentToken;
674 switch (Tok->Tok.getKind()) {
677 if (!Tok->Previous && Line.MustBeDeclaration)
678 Tok->Type = TT_ObjCMethodSpecifier;
683 // Colons from ?: are handled in parseConditional().
684 if (Style.Language == FormatStyle::LK_JavaScript) {
685 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
686 (Contexts.size() == 1 && // switch/case labels
687 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
688 Contexts.back().ContextKind == tok::l_paren || // function params
689 Contexts.back().ContextKind == tok::l_square || // array type
690 (!Contexts.back().IsExpression &&
691 Contexts.back().ContextKind == tok::l_brace) || // object type
692 (Contexts.size() == 1 &&
693 Line.MustBeDeclaration)) { // method/property declaration
694 Contexts.back().IsExpression = false;
695 Tok->Type = TT_JsTypeColon;
699 if (Contexts.back().ColonIsDictLiteral ||
700 Style.Language == FormatStyle::LK_Proto ||
701 Style.Language == FormatStyle::LK_TextProto) {
702 Tok->Type = TT_DictLiteral;
703 if (Style.Language == FormatStyle::LK_TextProto) {
704 if (FormatToken *Previous = Tok->getPreviousNonComment())
705 Previous->Type = TT_SelectorName;
707 } else if (Contexts.back().ColonIsObjCMethodExpr ||
708 Line.startsWith(TT_ObjCMethodSpecifier)) {
709 Tok->Type = TT_ObjCMethodExpr;
710 const FormatToken *BeforePrevious = Tok->Previous->Previous;
711 // Ensure we tag all identifiers in method declarations as
713 bool UnknownIdentifierInMethodDeclaration =
714 Line.startsWith(TT_ObjCMethodSpecifier) &&
715 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
716 if (!BeforePrevious ||
717 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
718 !(BeforePrevious->is(TT_CastRParen) ||
719 (BeforePrevious->is(TT_ObjCMethodExpr) &&
720 BeforePrevious->is(tok::colon))) ||
721 BeforePrevious->is(tok::r_square) ||
722 Contexts.back().LongestObjCSelectorName == 0 ||
723 UnknownIdentifierInMethodDeclaration) {
724 Tok->Previous->Type = TT_SelectorName;
725 if (!Contexts.back().FirstObjCSelectorName)
726 Contexts.back().FirstObjCSelectorName = Tok->Previous;
727 else if (Tok->Previous->ColumnWidth >
728 Contexts.back().LongestObjCSelectorName)
729 Contexts.back().LongestObjCSelectorName =
730 Tok->Previous->ColumnWidth;
731 Tok->Previous->ParameterIndex =
732 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
733 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
735 } else if (Contexts.back().ColonIsForRangeExpr) {
736 Tok->Type = TT_RangeBasedForLoopColon;
737 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
738 Tok->Type = TT_BitFieldColon;
739 } else if (Contexts.size() == 1 &&
740 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
741 if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren,
743 Tok->Type = TT_CtorInitializerColon;
745 Tok->Type = TT_InheritanceColon;
746 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
747 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
748 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
749 Tok->Next->Next->is(tok::colon)))) {
750 // This handles a special macro in ObjC code where selectors including
751 // the colon are passed as macro arguments.
752 Tok->Type = TT_ObjCMethodExpr;
753 } else if (Contexts.back().ContextKind == tok::l_paren) {
754 Tok->Type = TT_InlineASMColon;
759 // | and & in declarations/type expressions represent union and
760 // intersection types, respectively.
761 if (Style.Language == FormatStyle::LK_JavaScript &&
762 !Contexts.back().IsExpression)
763 Tok->Type = TT_JsTypeOperator;
767 if (Tok->is(tok::kw_if) && CurrentToken &&
768 CurrentToken->is(tok::kw_constexpr))
770 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
772 if (!parseParens(/*LookForDecls=*/true))
777 if (Style.Language == FormatStyle::LK_JavaScript) {
778 // x.for and {for: ...}
779 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
780 (Tok->Next && Tok->Next->is(tok::colon)))
782 // JS' for await ( ...
783 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
786 Contexts.back().ColonIsForRangeExpr = true;
792 // When faced with 'operator()()', the kw_operator handler incorrectly
793 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
794 // the first two parens OverloadedOperators and the second l_paren an
795 // OverloadedOperatorLParen.
796 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
797 Tok->Previous->MatchingParen &&
798 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
799 Tok->Previous->Type = TT_OverloadedOperator;
800 Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
801 Tok->Type = TT_OverloadedOperatorLParen;
806 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
807 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
809 !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute,
810 TT_LeadingJavaAnnotation)))
811 Line.MightBeFunctionDecl = true;
818 if (Style.Language == FormatStyle::LK_TextProto) {
819 FormatToken *Previous = Tok->getPreviousNonComment();
820 if (Previous && Previous->Type != TT_DictLiteral)
821 Previous->Type = TT_SelectorName;
828 Tok->Type = TT_TemplateOpener;
829 // In TT_Proto, we must distignuish between:
831 // msg < item: data >
832 // msg: < item: data >
833 // In TT_TextProto, map<key, value> does not occur.
834 if (Style.Language == FormatStyle::LK_TextProto ||
835 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
836 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
837 Tok->Type = TT_DictLiteral;
838 FormatToken *Previous = Tok->getPreviousNonComment();
839 if (Previous && Previous->Type != TT_DictLiteral)
840 Previous->Type = TT_SelectorName;
843 Tok->Type = TT_BinaryOperator;
844 NonTemplateLess.insert(Tok);
853 // Lines can start with '}'.
858 if (Style.Language != FormatStyle::LK_TextProto)
859 Tok->Type = TT_BinaryOperator;
861 case tok::kw_operator:
862 if (Style.Language == FormatStyle::LK_TextProto ||
863 Style.Language == FormatStyle::LK_Proto)
865 while (CurrentToken &&
866 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
867 if (CurrentToken->isOneOf(tok::star, tok::amp))
868 CurrentToken->Type = TT_PointerOrReference;
871 CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator,
873 CurrentToken->Previous->Type = TT_OverloadedOperator;
876 CurrentToken->Type = TT_OverloadedOperatorLParen;
877 if (CurrentToken->Previous->is(TT_BinaryOperator))
878 CurrentToken->Previous->Type = TT_OverloadedOperator;
882 if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
883 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
885 // Question marks before semicolons, colons, etc. indicate optional
886 // types (fields, parameters), e.g.
887 // function(x?: string, y?) {...}
889 Tok->Type = TT_JsTypeOptionalQuestion;
892 // Declarations cannot be conditional expressions, this can only be part
893 // of a type declaration.
894 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
895 Style.Language == FormatStyle::LK_JavaScript)
899 case tok::kw_template:
900 parseTemplateDeclaration();
903 if (Contexts.back().InCtorInitializer)
904 Tok->Type = TT_CtorInitializerComma;
905 else if (Contexts.back().InInheritanceList)
906 Tok->Type = TT_InheritanceComma;
907 else if (Contexts.back().FirstStartOfName &&
908 (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) {
909 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
910 Line.IsMultiVariableDeclStmt = true;
912 if (Contexts.back().IsForEachMacro)
913 Contexts.back().IsExpression = true;
915 case tok::identifier:
916 if (Tok->isOneOf(Keywords.kw___has_include,
917 Keywords.kw___has_include_next)) {
927 void parseIncludeDirective() {
928 if (CurrentToken && CurrentToken->is(tok::less)) {
930 while (CurrentToken) {
931 // Mark tokens up to the trailing line comments as implicit string
933 if (CurrentToken->isNot(tok::comment) &&
934 !CurrentToken->TokenText.startswith("//"))
935 CurrentToken->Type = TT_ImplicitStringLiteral;
941 void parseWarningOrError() {
943 // We still want to format the whitespace left of the first token of the
946 while (CurrentToken) {
947 CurrentToken->Type = TT_ImplicitStringLiteral;
953 next(); // Consume "pragma".
955 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
956 bool IsMark = CurrentToken->is(Keywords.kw_mark);
957 next(); // Consume "mark".
958 next(); // Consume first token (so we fix leading whitespace).
959 while (CurrentToken) {
960 if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
961 CurrentToken->Type = TT_ImplicitStringLiteral;
967 void parseHasInclude() {
968 if (!CurrentToken || !CurrentToken->is(tok::l_paren))
971 parseIncludeDirective();
975 LineType parsePreprocessorDirective() {
976 bool IsFirstToken = CurrentToken->IsFirst;
977 LineType Type = LT_PreprocessorDirective;
982 if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) {
983 // JavaScript files can contain shebang lines of the form:
984 // #!/usr/bin/env node
985 // Treat these like C++ #include directives.
986 while (CurrentToken) {
987 // Tokens cannot be comments here.
988 CurrentToken->Type = TT_ImplicitStringLiteral;
991 return LT_ImportStatement;
994 if (CurrentToken->Tok.is(tok::numeric_constant)) {
995 CurrentToken->SpacesRequiredBefore = 1;
998 // Hashes in the middle of a line can lead to any strange token
1000 if (!CurrentToken->Tok.getIdentifierInfo())
1002 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1003 case tok::pp_include:
1004 case tok::pp_include_next:
1005 case tok::pp_import:
1007 parseIncludeDirective();
1008 Type = LT_ImportStatement;
1011 case tok::pp_warning:
1012 parseWarningOrError();
1014 case tok::pp_pragma:
1019 Contexts.back().IsExpression = true;
1025 while (CurrentToken) {
1026 FormatToken *Tok = CurrentToken;
1028 if (Tok->is(tok::l_paren))
1030 else if (Tok->isOneOf(Keywords.kw___has_include,
1031 Keywords.kw___has_include_next))
1038 LineType parseLine() {
1039 NonTemplateLess.clear();
1040 if (CurrentToken->is(tok::hash))
1041 return parsePreprocessorDirective();
1043 // Directly allow to 'import <string-literal>' to support protocol buffer
1044 // definitions (github.com/google/protobuf) or missing "#" (either way we
1045 // should not break the line).
1046 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1047 if ((Style.Language == FormatStyle::LK_Java &&
1048 CurrentToken->is(Keywords.kw_package)) ||
1049 (Info && Info->getPPKeywordID() == tok::pp_import &&
1050 CurrentToken->Next &&
1051 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1054 parseIncludeDirective();
1055 return LT_ImportStatement;
1058 // If this line starts and ends in '<' and '>', respectively, it is likely
1059 // part of "#define <a/b.h>".
1060 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1061 parseIncludeDirective();
1062 return LT_ImportStatement;
1065 // In .proto files, top-level options are very similar to import statements
1066 // and should not be line-wrapped.
1067 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1068 CurrentToken->is(Keywords.kw_option)) {
1070 if (CurrentToken && CurrentToken->is(tok::identifier))
1071 return LT_ImportStatement;
1074 bool KeywordVirtualFound = false;
1075 bool ImportStatement = false;
1077 // import {...} from '...';
1078 if (Style.Language == FormatStyle::LK_JavaScript &&
1079 CurrentToken->is(Keywords.kw_import))
1080 ImportStatement = true;
1082 while (CurrentToken) {
1083 if (CurrentToken->is(tok::kw_virtual))
1084 KeywordVirtualFound = true;
1085 if (Style.Language == FormatStyle::LK_JavaScript) {
1086 // export {...} from '...';
1087 // An export followed by "from 'some string';" is a re-export from
1088 // another module identified by a URI and is treated as a
1089 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1090 // Just "export {...};" or "export class ..." should not be treated as
1091 // an import in this sense.
1092 if (Line.First->is(tok::kw_export) &&
1093 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1094 CurrentToken->Next->isStringLiteral())
1095 ImportStatement = true;
1096 if (isClosureImportStatement(*CurrentToken))
1097 ImportStatement = true;
1099 if (!consumeToken())
1102 if (KeywordVirtualFound)
1103 return LT_VirtualFunctionDecl;
1104 if (ImportStatement)
1105 return LT_ImportStatement;
1107 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
1108 if (Contexts.back().FirstObjCSelectorName)
1109 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
1110 Contexts.back().LongestObjCSelectorName;
1111 return LT_ObjCMethodDecl;
1118 bool isClosureImportStatement(const FormatToken &Tok) {
1119 // FIXME: Closure-library specific stuff should not be hard-coded but be
1121 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
1123 (Tok.Next->Next->TokenText == "module" ||
1124 Tok.Next->Next->TokenText == "provide" ||
1125 Tok.Next->Next->TokenText == "require" ||
1126 Tok.Next->Next->TokenText == "requireType" ||
1127 Tok.Next->Next->TokenText == "forwardDeclare") &&
1128 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
1131 void resetTokenMetadata(FormatToken *Token) {
1135 // Reset token type in case we have already looked at it and then
1136 // recovered from an error (e.g. failure to find the matching >).
1137 if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
1138 TT_FunctionLBrace, TT_ImplicitStringLiteral,
1139 TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
1140 TT_OverloadedOperator, TT_RegexLiteral,
1141 TT_TemplateString, TT_ObjCStringLiteral))
1142 CurrentToken->Type = TT_Unknown;
1143 CurrentToken->Role.reset();
1144 CurrentToken->MatchingParen = nullptr;
1145 CurrentToken->FakeLParens.clear();
1146 CurrentToken->FakeRParens = 0;
1151 CurrentToken->NestingLevel = Contexts.size() - 1;
1152 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
1153 modifyContext(*CurrentToken);
1154 determineTokenType(*CurrentToken);
1155 CurrentToken = CurrentToken->Next;
1158 resetTokenMetadata(CurrentToken);
1161 /// A struct to hold information valid in a specific context, e.g.
1162 /// a pair of parenthesis.
1164 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
1166 : ContextKind(ContextKind), BindingStrength(BindingStrength),
1167 IsExpression(IsExpression) {}
1169 tok::TokenKind ContextKind;
1170 unsigned BindingStrength;
1172 unsigned LongestObjCSelectorName = 0;
1173 bool ColonIsForRangeExpr = false;
1174 bool ColonIsDictLiteral = false;
1175 bool ColonIsObjCMethodExpr = false;
1176 FormatToken *FirstObjCSelectorName = nullptr;
1177 FormatToken *FirstStartOfName = nullptr;
1178 bool CanBeExpression = true;
1179 bool InTemplateArgument = false;
1180 bool InCtorInitializer = false;
1181 bool InInheritanceList = false;
1182 bool CaretFound = false;
1183 bool IsForEachMacro = false;
1184 bool InCpp11AttributeSpecifier = false;
1187 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
1188 /// of each instance.
1189 struct ScopedContextCreator {
1190 AnnotatingParser &P;
1192 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
1195 P.Contexts.push_back(Context(ContextKind,
1196 P.Contexts.back().BindingStrength + Increase,
1197 P.Contexts.back().IsExpression));
1200 ~ScopedContextCreator() { P.Contexts.pop_back(); }
1203 void modifyContext(const FormatToken &Current) {
1204 if (Current.getPrecedence() == prec::Assignment &&
1205 !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
1206 // Type aliases use `type X = ...;` in TypeScript and can be exported
1207 // using `export type ...`.
1208 !(Style.Language == FormatStyle::LK_JavaScript &&
1209 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
1210 Line.startsWith(tok::kw_export, Keywords.kw_type,
1211 tok::identifier))) &&
1212 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
1213 Contexts.back().IsExpression = true;
1214 if (!Line.startsWith(TT_UnaryOperator)) {
1215 for (FormatToken *Previous = Current.Previous;
1216 Previous && Previous->Previous &&
1217 !Previous->Previous->isOneOf(tok::comma, tok::semi);
1218 Previous = Previous->Previous) {
1219 if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
1220 Previous = Previous->MatchingParen;
1224 if (Previous->opensScope())
1226 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
1227 Previous->isOneOf(tok::star, tok::amp, tok::ampamp) &&
1228 Previous->Previous && Previous->Previous->isNot(tok::equal))
1229 Previous->Type = TT_PointerOrReference;
1232 } else if (Current.is(tok::lessless) &&
1233 (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
1234 Contexts.back().IsExpression = true;
1235 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
1236 Contexts.back().IsExpression = true;
1237 } else if (Current.is(TT_TrailingReturnArrow)) {
1238 Contexts.back().IsExpression = false;
1239 } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
1240 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
1241 } else if (Current.Previous &&
1242 Current.Previous->is(TT_CtorInitializerColon)) {
1243 Contexts.back().IsExpression = true;
1244 Contexts.back().InCtorInitializer = true;
1245 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
1246 Contexts.back().InInheritanceList = true;
1247 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
1248 for (FormatToken *Previous = Current.Previous;
1249 Previous && Previous->isOneOf(tok::star, tok::amp);
1250 Previous = Previous->Previous)
1251 Previous->Type = TT_PointerOrReference;
1252 if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer)
1253 Contexts.back().IsExpression = false;
1254 } else if (Current.is(tok::kw_new)) {
1255 Contexts.back().CanBeExpression = false;
1256 } else if (Current.isOneOf(tok::semi, tok::exclaim)) {
1257 // This should be the condition or increment in a for-loop.
1258 Contexts.back().IsExpression = true;
1262 void determineTokenType(FormatToken &Current) {
1263 if (!Current.is(TT_Unknown))
1264 // The token type is already known.
1267 if (Style.Language == FormatStyle::LK_JavaScript) {
1268 if (Current.is(tok::exclaim)) {
1269 if (Current.Previous &&
1270 (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace,
1271 tok::r_paren, tok::r_square,
1273 Current.Previous->Tok.isLiteral())) {
1274 Current.Type = TT_JsNonNullAssertion;
1278 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
1279 Current.Type = TT_JsNonNullAssertion;
1285 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1286 // function declaration have been found. In this case, 'Current' is a
1287 // trailing token of this declaration and thus cannot be a name.
1288 if (Current.is(Keywords.kw_instanceof)) {
1289 Current.Type = TT_BinaryOperator;
1290 } else if (isStartOfName(Current) &&
1291 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
1292 Contexts.back().FirstStartOfName = &Current;
1293 Current.Type = TT_StartOfName;
1294 } else if (Current.is(tok::semi)) {
1295 // Reset FirstStartOfName after finding a semicolon so that a for loop
1296 // with multiple increment statements is not confused with a for loop
1297 // having multiple variable declarations.
1298 Contexts.back().FirstStartOfName = nullptr;
1299 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
1301 } else if (Current.is(tok::arrow) &&
1302 Style.Language == FormatStyle::LK_Java) {
1303 Current.Type = TT_LambdaArrow;
1304 } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration &&
1305 Current.NestingLevel == 0) {
1306 Current.Type = TT_TrailingReturnArrow;
1307 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
1308 Current.Type = determineStarAmpUsage(Current,
1309 Contexts.back().CanBeExpression &&
1310 Contexts.back().IsExpression,
1311 Contexts.back().InTemplateArgument);
1312 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
1313 Current.Type = determinePlusMinusCaretUsage(Current);
1314 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
1315 Contexts.back().CaretFound = true;
1316 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
1317 Current.Type = determineIncrementUsage(Current);
1318 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
1319 Current.Type = TT_UnaryOperator;
1320 } else if (Current.is(tok::question)) {
1321 if (Style.Language == FormatStyle::LK_JavaScript &&
1322 Line.MustBeDeclaration && !Contexts.back().IsExpression) {
1323 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
1324 // on the interface, not a ternary expression.
1325 Current.Type = TT_JsTypeOptionalQuestion;
1327 Current.Type = TT_ConditionalExpr;
1329 } else if (Current.isBinaryOperator() &&
1330 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
1331 (!Current.is(tok::greater) &&
1332 Style.Language != FormatStyle::LK_TextProto)) {
1333 Current.Type = TT_BinaryOperator;
1334 } else if (Current.is(tok::comment)) {
1335 if (Current.TokenText.startswith("/*")) {
1336 if (Current.TokenText.endswith("*/"))
1337 Current.Type = TT_BlockComment;
1339 // The lexer has for some reason determined a comment here. But we
1340 // cannot really handle it, if it isn't properly terminated.
1341 Current.Tok.setKind(tok::unknown);
1343 Current.Type = TT_LineComment;
1345 } else if (Current.is(tok::r_paren)) {
1346 if (rParenEndsCast(Current))
1347 Current.Type = TT_CastRParen;
1348 if (Current.MatchingParen && Current.Next &&
1349 !Current.Next->isBinaryOperator() &&
1350 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
1351 tok::comma, tok::period, tok::arrow,
1353 if (FormatToken *AfterParen = Current.MatchingParen->Next) {
1354 // Make sure this isn't the return type of an Obj-C block declaration
1355 if (AfterParen->Tok.isNot(tok::caret)) {
1356 if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
1357 if (BeforeParen->is(tok::identifier) &&
1358 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
1359 (!BeforeParen->Previous ||
1360 BeforeParen->Previous->ClosesTemplateDeclaration))
1361 Current.Type = TT_FunctionAnnotationRParen;
1364 } else if (Current.is(tok::at) && Current.Next &&
1365 Style.Language != FormatStyle::LK_JavaScript &&
1366 Style.Language != FormatStyle::LK_Java) {
1367 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
1368 // marks declarations and properties that need special formatting.
1369 switch (Current.Next->Tok.getObjCKeywordID()) {
1370 case tok::objc_interface:
1371 case tok::objc_implementation:
1372 case tok::objc_protocol:
1373 Current.Type = TT_ObjCDecl;
1375 case tok::objc_property:
1376 Current.Type = TT_ObjCProperty;
1381 } else if (Current.is(tok::period)) {
1382 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
1383 if (PreviousNoComment &&
1384 PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
1385 Current.Type = TT_DesignatedInitializerPeriod;
1386 else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
1387 Current.Previous->isOneOf(TT_JavaAnnotation,
1388 TT_LeadingJavaAnnotation)) {
1389 Current.Type = Current.Previous->Type;
1391 } else if (canBeObjCSelectorComponent(Current) &&
1392 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1393 Current.Previous && Current.Previous->is(TT_CastRParen) &&
1394 Current.Previous->MatchingParen &&
1395 Current.Previous->MatchingParen->Previous &&
1396 Current.Previous->MatchingParen->Previous->is(
1397 TT_ObjCMethodSpecifier)) {
1398 // This is the first part of an Objective-C selector name. (If there's no
1399 // colon after this, this is the only place which annotates the identifier
1401 Current.Type = TT_SelectorName;
1402 } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
1404 !Current.Previous->isOneOf(tok::equal, tok::at) &&
1405 Line.MightBeFunctionDecl && Contexts.size() == 1) {
1406 // Line.MightBeFunctionDecl can only be true after the parentheses of a
1407 // function declaration have been found.
1408 Current.Type = TT_TrailingAnnotation;
1409 } else if ((Style.Language == FormatStyle::LK_Java ||
1410 Style.Language == FormatStyle::LK_JavaScript) &&
1412 if (Current.Previous->is(tok::at) &&
1413 Current.isNot(Keywords.kw_interface)) {
1414 const FormatToken &AtToken = *Current.Previous;
1415 const FormatToken *Previous = AtToken.getPreviousNonComment();
1416 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
1417 Current.Type = TT_LeadingJavaAnnotation;
1419 Current.Type = TT_JavaAnnotation;
1420 } else if (Current.Previous->is(tok::period) &&
1421 Current.Previous->isOneOf(TT_JavaAnnotation,
1422 TT_LeadingJavaAnnotation)) {
1423 Current.Type = Current.Previous->Type;
1428 /// Take a guess at whether \p Tok starts a name of a function or
1429 /// variable declaration.
1431 /// This is a heuristic based on whether \p Tok is an identifier following
1432 /// something that is likely a type.
1433 bool isStartOfName(const FormatToken &Tok) {
1434 if (Tok.isNot(tok::identifier) || !Tok.Previous)
1437 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
1440 if (Style.Language == FormatStyle::LK_JavaScript &&
1441 Tok.Previous->is(Keywords.kw_in))
1444 // Skip "const" as it does not have an influence on whether this is a name.
1445 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
1446 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
1447 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
1449 if (!PreviousNotConst)
1452 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
1453 PreviousNotConst->Previous &&
1454 PreviousNotConst->Previous->is(tok::hash);
1456 if (PreviousNotConst->is(TT_TemplateCloser))
1457 return PreviousNotConst && PreviousNotConst->MatchingParen &&
1458 PreviousNotConst->MatchingParen->Previous &&
1459 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
1460 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
1462 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
1463 PreviousNotConst->MatchingParen->Previous &&
1464 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
1467 return (!IsPPKeyword &&
1468 PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
1469 PreviousNotConst->is(TT_PointerOrReference) ||
1470 PreviousNotConst->isSimpleTypeSpecifier();
1473 /// Determine whether ')' is ending a cast.
1474 bool rParenEndsCast(const FormatToken &Tok) {
1475 // C-style casts are only used in C++ and Java.
1476 if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java)
1479 // Empty parens aren't casts and there are no casts at the end of the line.
1480 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
1483 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
1485 // If there is a closing parenthesis left of the current parentheses,
1486 // look past it as these might be chained casts.
1487 if (LeftOfParens->is(tok::r_paren)) {
1488 if (!LeftOfParens->MatchingParen ||
1489 !LeftOfParens->MatchingParen->Previous)
1491 LeftOfParens = LeftOfParens->MatchingParen->Previous;
1494 // If there is an identifier (or with a few exceptions a keyword) right
1495 // before the parentheses, this is unlikely to be a cast.
1496 if (LeftOfParens->Tok.getIdentifierInfo() &&
1497 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
1501 // Certain other tokens right before the parentheses are also signals that
1502 // this cannot be a cast.
1503 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
1504 TT_TemplateCloser, tok::ellipsis))
1508 if (Tok.Next->is(tok::question))
1511 // As Java has no function types, a "(" after the ")" likely means that this
1513 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
1516 // If a (non-string) literal follows, this is likely a cast.
1517 if (Tok.Next->isNot(tok::string_literal) &&
1518 (Tok.Next->Tok.isLiteral() ||
1519 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
1522 // Heuristically try to determine whether the parentheses contain a type.
1523 bool ParensAreType =
1525 Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
1526 Tok.Previous->isSimpleTypeSpecifier();
1527 bool ParensCouldEndDecl =
1528 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
1529 if (ParensAreType && !ParensCouldEndDecl)
1532 // At this point, we heuristically assume that there are no casts at the
1533 // start of the line. We assume that we have found most cases where there
1534 // are by the logic above, e.g. "(void)x;".
1538 // Certain token types inside the parentheses mean that this can't be a
1540 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
1541 Token = Token->Next)
1542 if (Token->is(TT_BinaryOperator))
1545 // If the following token is an identifier or 'this', this is a cast. All
1546 // cases where this can be something else are handled above.
1547 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
1550 if (!Tok.Next->Next)
1553 // If the next token after the parenthesis is a unary operator, assume
1554 // that this is cast, unless there are unexpected tokens inside the
1557 Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
1558 if (!NextIsUnary || Tok.Next->is(tok::plus) ||
1559 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
1561 // Search for unexpected tokens.
1562 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
1563 Prev = Prev->Previous) {
1564 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
1570 /// Return the type of the given token assuming it is * or &.
1571 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
1572 bool InTemplateArgument) {
1573 if (Style.Language == FormatStyle::LK_JavaScript)
1574 return TT_BinaryOperator;
1576 const FormatToken *PrevToken = Tok.getPreviousNonComment();
1578 return TT_UnaryOperator;
1580 const FormatToken *NextToken = Tok.getNextNonComment();
1582 NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) ||
1583 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
1584 return TT_PointerOrReference;
1586 if (PrevToken->is(tok::coloncolon))
1587 return TT_PointerOrReference;
1589 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
1590 tok::comma, tok::semi, tok::kw_return, tok::colon,
1591 tok::equal, tok::kw_delete, tok::kw_sizeof,
1593 PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr,
1594 TT_UnaryOperator, TT_CastRParen))
1595 return TT_UnaryOperator;
1597 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
1598 return TT_PointerOrReference;
1599 if (NextToken->is(tok::kw_operator) && !IsExpression)
1600 return TT_PointerOrReference;
1601 if (NextToken->isOneOf(tok::comma, tok::semi))
1602 return TT_PointerOrReference;
1604 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) {
1605 FormatToken *TokenBeforeMatchingParen =
1606 PrevToken->MatchingParen->getPreviousNonComment();
1607 if (TokenBeforeMatchingParen &&
1608 TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
1609 return TT_PointerOrReference;
1612 if (PrevToken->Tok.isLiteral() ||
1613 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
1614 tok::kw_false, tok::r_brace) ||
1615 NextToken->Tok.isLiteral() ||
1616 NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
1617 NextToken->isUnaryOperator() ||
1618 // If we know we're in a template argument, there are no named
1619 // declarations. Thus, having an identifier on the right-hand side
1620 // indicates a binary operator.
1621 (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
1622 return TT_BinaryOperator;
1624 // "&&(" is quite unlikely to be two successive unary "&".
1625 if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
1626 return TT_BinaryOperator;
1628 // This catches some cases where evaluation order is used as control flow:
1630 const FormatToken *NextNextToken = NextToken->getNextNonComment();
1631 if (NextNextToken && NextNextToken->is(tok::arrow))
1632 return TT_BinaryOperator;
1634 // It is very unlikely that we are going to find a pointer or reference type
1635 // definition on the RHS of an assignment.
1636 if (IsExpression && !Contexts.back().CaretFound)
1637 return TT_BinaryOperator;
1639 return TT_PointerOrReference;
1642 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
1643 const FormatToken *PrevToken = Tok.getPreviousNonComment();
1645 return TT_UnaryOperator;
1647 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
1648 // This must be a sequence of leading unary operators.
1649 return TT_UnaryOperator;
1651 // Use heuristics to recognize unary operators.
1652 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
1653 tok::question, tok::colon, tok::kw_return,
1654 tok::kw_case, tok::at, tok::l_brace))
1655 return TT_UnaryOperator;
1657 // There can't be two consecutive binary operators.
1658 if (PrevToken->is(TT_BinaryOperator))
1659 return TT_UnaryOperator;
1661 // Fall back to marking the token as binary operator.
1662 return TT_BinaryOperator;
1665 /// Determine whether ++/-- are pre- or post-increments/-decrements.
1666 TokenType determineIncrementUsage(const FormatToken &Tok) {
1667 const FormatToken *PrevToken = Tok.getPreviousNonComment();
1668 if (!PrevToken || PrevToken->is(TT_CastRParen))
1669 return TT_UnaryOperator;
1670 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
1671 return TT_TrailingUnaryOperator;
1673 return TT_UnaryOperator;
1676 SmallVector<Context, 8> Contexts;
1678 const FormatStyle &Style;
1679 AnnotatedLine &Line;
1680 FormatToken *CurrentToken;
1682 const AdditionalKeywords &Keywords;
1684 // Set of "<" tokens that do not open a template parameter list. If parseAngle
1685 // determines that a specific token can't be a template opener, it will make
1686 // same decision irrespective of the decisions for tokens leading up to it.
1687 // Store this information to prevent this from causing exponential runtime.
1688 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
1691 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
1692 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
1694 /// Parses binary expressions by inserting fake parenthesis based on
1695 /// operator precedence.
1696 class ExpressionParser {
1698 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
1699 AnnotatedLine &Line)
1700 : Style(Style), Keywords(Keywords), Current(Line.First) {}
1702 /// Parse expressions with the given operator precedence.
1703 void parse(int Precedence = 0) {
1704 // Skip 'return' and ObjC selector colons as they are not part of a binary
1706 while (Current && (Current->is(tok::kw_return) ||
1707 (Current->is(tok::colon) &&
1708 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))))
1711 if (!Current || Precedence > PrecedenceArrowAndPeriod)
1714 // Conditional expressions need to be parsed separately for proper nesting.
1715 if (Precedence == prec::Conditional) {
1716 parseConditionalExpr();
1720 // Parse unary operators, which all have a higher precedence than binary
1722 if (Precedence == PrecedenceUnaryOperator) {
1723 parseUnaryOperator();
1727 FormatToken *Start = Current;
1728 FormatToken *LatestOperator = nullptr;
1729 unsigned OperatorIndex = 0;
1732 // Consume operators with higher precedence.
1733 parse(Precedence + 1);
1735 int CurrentPrecedence = getCurrentPrecedence();
1737 if (Current && Current->is(TT_SelectorName) &&
1738 Precedence == CurrentPrecedence) {
1740 addFakeParenthesis(Start, prec::Level(Precedence));
1744 // At the end of the line or when an operator with higher precedence is
1745 // found, insert fake parenthesis and return.
1747 (Current->closesScope() &&
1748 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
1749 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
1750 (CurrentPrecedence == prec::Conditional &&
1751 Precedence == prec::Assignment && Current->is(tok::colon))) {
1755 // Consume scopes: (), [], <> and {}
1756 if (Current->opensScope()) {
1757 // In fragment of a JavaScript template string can look like '}..${' and
1758 // thus close a scope and open a new one at the same time.
1759 while (Current && (!Current->closesScope() || Current->opensScope())) {
1766 if (CurrentPrecedence == Precedence) {
1768 LatestOperator->NextOperator = Current;
1769 LatestOperator = Current;
1770 Current->OperatorIndex = OperatorIndex;
1773 next(/*SkipPastLeadingComments=*/Precedence > 0);
1777 if (LatestOperator && (Current || Precedence > 0)) {
1778 // LatestOperator->LastOperator = true;
1779 if (Precedence == PrecedenceArrowAndPeriod) {
1780 // Call expressions don't have a binary operator precedence.
1781 addFakeParenthesis(Start, prec::Unknown);
1783 addFakeParenthesis(Start, prec::Level(Precedence));
1789 /// Gets the precedence (+1) of the given token for binary operators
1790 /// and other tokens that we treat like binary operators.
1791 int getCurrentPrecedence() {
1793 const FormatToken *NextNonComment = Current->getNextNonComment();
1794 if (Current->is(TT_ConditionalExpr))
1795 return prec::Conditional;
1796 if (NextNonComment && Current->is(TT_SelectorName) &&
1797 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
1798 ((Style.Language == FormatStyle::LK_Proto ||
1799 Style.Language == FormatStyle::LK_TextProto) &&
1800 NextNonComment->is(tok::less))))
1801 return prec::Assignment;
1802 if (Current->is(TT_JsComputedPropertyName))
1803 return prec::Assignment;
1804 if (Current->is(TT_LambdaArrow))
1806 if (Current->is(TT_JsFatArrow))
1807 return prec::Assignment;
1808 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
1809 (Current->is(tok::comment) && NextNonComment &&
1810 NextNonComment->is(TT_SelectorName)))
1812 if (Current->is(TT_RangeBasedForLoopColon))
1814 if ((Style.Language == FormatStyle::LK_Java ||
1815 Style.Language == FormatStyle::LK_JavaScript) &&
1816 Current->is(Keywords.kw_instanceof))
1817 return prec::Relational;
1818 if (Style.Language == FormatStyle::LK_JavaScript &&
1819 Current->isOneOf(Keywords.kw_in, Keywords.kw_as))
1820 return prec::Relational;
1821 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
1822 return Current->getPrecedence();
1823 if (Current->isOneOf(tok::period, tok::arrow))
1824 return PrecedenceArrowAndPeriod;
1825 if ((Style.Language == FormatStyle::LK_Java ||
1826 Style.Language == FormatStyle::LK_JavaScript) &&
1827 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
1828 Keywords.kw_throws))
1834 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
1835 Start->FakeLParens.push_back(Precedence);
1836 if (Precedence > prec::Unknown)
1837 Start->StartsBinaryExpression = true;
1839 FormatToken *Previous = Current->Previous;
1840 while (Previous->is(tok::comment) && Previous->Previous)
1841 Previous = Previous->Previous;
1842 ++Previous->FakeRParens;
1843 if (Precedence > prec::Unknown)
1844 Previous->EndsBinaryExpression = true;
1848 /// Parse unary operator expressions and surround them with fake
1849 /// parentheses if appropriate.
1850 void parseUnaryOperator() {
1851 llvm::SmallVector<FormatToken *, 2> Tokens;
1852 while (Current && Current->is(TT_UnaryOperator)) {
1853 Tokens.push_back(Current);
1856 parse(PrecedenceArrowAndPeriod);
1857 for (FormatToken *Token : llvm::reverse(Tokens))
1858 // The actual precedence doesn't matter.
1859 addFakeParenthesis(Token, prec::Unknown);
1862 void parseConditionalExpr() {
1863 while (Current && Current->isTrailingComment()) {
1866 FormatToken *Start = Current;
1867 parse(prec::LogicalOr);
1868 if (!Current || !Current->is(tok::question))
1871 parse(prec::Assignment);
1872 if (!Current || Current->isNot(TT_ConditionalExpr))
1875 parse(prec::Assignment);
1876 addFakeParenthesis(Start, prec::Conditional);
1879 void next(bool SkipPastLeadingComments = true) {
1881 Current = Current->Next;
1883 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
1884 Current->isTrailingComment())
1885 Current = Current->Next;
1888 const FormatStyle &Style;
1889 const AdditionalKeywords &Keywords;
1890 FormatToken *Current;
1893 } // end anonymous namespace
1895 void TokenAnnotator::setCommentLineLevels(
1896 SmallVectorImpl<AnnotatedLine *> &Lines) {
1897 const AnnotatedLine *NextNonCommentLine = nullptr;
1898 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
1901 bool CommentLine = true;
1902 for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
1903 if (!Tok->is(tok::comment)) {
1904 CommentLine = false;
1909 // If the comment is currently aligned with the line immediately following
1910 // it, that's probably intentional and we should keep it.
1911 if (NextNonCommentLine && CommentLine &&
1912 NextNonCommentLine->First->NewlinesBefore <= 1 &&
1913 NextNonCommentLine->First->OriginalColumn ==
1914 (*I)->First->OriginalColumn) {
1915 // Align comments for preprocessor lines with the # in column 0.
1916 // Otherwise, align with the next line.
1917 (*I)->Level = (NextNonCommentLine->Type == LT_PreprocessorDirective ||
1918 NextNonCommentLine->Type == LT_ImportStatement)
1920 : NextNonCommentLine->Level;
1922 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
1925 setCommentLineLevels((*I)->Children);
1929 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
1930 unsigned Result = 0;
1931 for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next)
1932 Result = std::max(Result, Tok->NestingLevel);
1936 void TokenAnnotator::annotate(AnnotatedLine &Line) {
1937 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
1938 E = Line.Children.end();
1942 AnnotatingParser Parser(Style, Line, Keywords);
1943 Line.Type = Parser.parseLine();
1945 // With very deep nesting, ExpressionParser uses lots of stack and the
1946 // formatting algorithm is very slow. We're not going to do a good job here
1947 // anyway - it's probably generated code being formatted by mistake.
1948 // Just skip the whole line.
1949 if (maxNestingDepth(Line) > 50)
1950 Line.Type = LT_Invalid;
1952 if (Line.Type == LT_Invalid)
1955 ExpressionParser ExprParser(Style, Keywords, Line);
1958 if (Line.startsWith(TT_ObjCMethodSpecifier))
1959 Line.Type = LT_ObjCMethodDecl;
1960 else if (Line.startsWith(TT_ObjCDecl))
1961 Line.Type = LT_ObjCDecl;
1962 else if (Line.startsWith(TT_ObjCProperty))
1963 Line.Type = LT_ObjCProperty;
1965 Line.First->SpacesRequiredBefore = 1;
1966 Line.First->CanBreakBefore = Line.First->MustBreakBefore;
1969 // This function heuristically determines whether 'Current' starts the name of a
1970 // function declaration.
1971 static bool isFunctionDeclarationName(const FormatToken &Current,
1972 const AnnotatedLine &Line) {
1973 auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
1974 for (; Next; Next = Next->Next) {
1975 if (Next->is(TT_OverloadedOperatorLParen))
1977 if (Next->is(TT_OverloadedOperator))
1979 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
1980 // For 'new[]' and 'delete[]'.
1981 if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next &&
1982 Next->Next->Next->is(tok::r_square))
1983 Next = Next->Next->Next;
1992 // Find parentheses of parameter list.
1993 const FormatToken *Next = Current.Next;
1994 if (Current.is(tok::kw_operator)) {
1995 if (Current.Previous && Current.Previous->is(tok::coloncolon))
1997 Next = skipOperatorName(Next);
1999 if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
2001 for (; Next; Next = Next->Next) {
2002 if (Next->is(TT_TemplateOpener)) {
2003 Next = Next->MatchingParen;
2004 } else if (Next->is(tok::coloncolon)) {
2008 if (Next->is(tok::kw_operator)) {
2009 Next = skipOperatorName(Next->Next);
2012 if (!Next->is(tok::identifier))
2014 } else if (Next->is(tok::l_paren)) {
2022 // Check whether parameter list can belong to a function declaration.
2023 if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen)
2025 // If the lines ends with "{", this is likely an function definition.
2026 if (Line.Last->is(tok::l_brace))
2028 if (Next->Next == Next->MatchingParen)
2029 return true; // Empty parentheses.
2030 // If there is an &/&& after the r_paren, this is likely a function.
2031 if (Next->MatchingParen->Next &&
2032 Next->MatchingParen->Next->is(TT_PointerOrReference))
2034 for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
2036 if (Tok->is(tok::l_paren) && Tok->MatchingParen) {
2037 Tok = Tok->MatchingParen;
2040 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
2041 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis))
2043 if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
2044 Tok->Tok.isLiteral())
2050 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
2051 assert(Line.MightBeFunctionDecl);
2053 if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
2054 Style.AlwaysBreakAfterReturnType ==
2055 FormatStyle::RTBS_TopLevelDefinitions) &&
2059 switch (Style.AlwaysBreakAfterReturnType) {
2060 case FormatStyle::RTBS_None:
2062 case FormatStyle::RTBS_All:
2063 case FormatStyle::RTBS_TopLevel:
2065 case FormatStyle::RTBS_AllDefinitions:
2066 case FormatStyle::RTBS_TopLevelDefinitions:
2067 return Line.mightBeFunctionDefinition();
2073 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
2074 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
2075 E = Line.Children.end();
2077 calculateFormattingInformation(**I);
2080 Line.First->TotalLength =
2081 Line.First->IsMultiline ? Style.ColumnLimit
2082 : Line.FirstStartColumn + Line.First->ColumnWidth;
2083 FormatToken *Current = Line.First->Next;
2084 bool InFunctionDecl = Line.MightBeFunctionDecl;
2086 if (isFunctionDeclarationName(*Current, Line))
2087 Current->Type = TT_FunctionDeclarationName;
2088 if (Current->is(TT_LineComment)) {
2089 if (Current->Previous->BlockKind == BK_BracedInit &&
2090 Current->Previous->opensScope())
2091 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
2093 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
2095 // If we find a trailing comment, iterate backwards to determine whether
2096 // it seems to relate to a specific parameter. If so, break before that
2097 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
2098 // to the previous line in:
2102 if (!Current->HasUnescapedNewline) {
2103 for (FormatToken *Parameter = Current->Previous; Parameter;
2104 Parameter = Parameter->Previous) {
2105 if (Parameter->isOneOf(tok::comment, tok::r_brace))
2107 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
2108 if (!Parameter->Previous->is(TT_CtorInitializerComma) &&
2109 Parameter->HasUnescapedNewline)
2110 Parameter->MustBreakBefore = true;
2115 } else if (Current->SpacesRequiredBefore == 0 &&
2116 spaceRequiredBefore(Line, *Current)) {
2117 Current->SpacesRequiredBefore = 1;
2120 Current->MustBreakBefore =
2121 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
2123 if (!Current->MustBreakBefore && InFunctionDecl &&
2124 Current->is(TT_FunctionDeclarationName))
2125 Current->MustBreakBefore = mustBreakForReturnType(Line);
2127 Current->CanBreakBefore =
2128 Current->MustBreakBefore || canBreakBefore(Line, *Current);
2129 unsigned ChildSize = 0;
2130 if (Current->Previous->Children.size() == 1) {
2131 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
2132 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
2133 : LastOfChild.TotalLength + 1;
2135 const FormatToken *Prev = Current->Previous;
2136 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
2137 (Prev->Children.size() == 1 &&
2138 Prev->Children[0]->First->MustBreakBefore) ||
2139 Current->IsMultiline)
2140 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
2142 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
2143 ChildSize + Current->SpacesRequiredBefore;
2145 if (Current->is(TT_CtorInitializerColon))
2146 InFunctionDecl = false;
2148 // FIXME: Only calculate this if CanBreakBefore is true once static
2149 // initializers etc. are sorted out.
2150 // FIXME: Move magic numbers to a better place.
2152 // Reduce penalty for aligning ObjC method arguments using the colon
2153 // alignment as this is the canonical way (still prefer fitting everything
2154 // into one line if possible). Trying to fit a whole expression into one
2155 // line should not force other line breaks (e.g. when ObjC method
2156 // expression is a part of other expression).
2157 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
2158 if (Style.Language == FormatStyle::LK_ObjC &&
2159 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
2160 if (Current->ParameterIndex == 1)
2161 Current->SplitPenalty += 5 * Current->BindingStrength;
2163 Current->SplitPenalty += 20 * Current->BindingStrength;
2166 Current = Current->Next;
2169 calculateUnbreakableTailLengths(Line);
2170 unsigned IndentLevel = Line.Level;
2171 for (Current = Line.First; Current != nullptr; Current = Current->Next) {
2173 Current->Role->precomputeFormattingInfos(Current);
2174 if (Current->MatchingParen &&
2175 Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
2176 assert(IndentLevel > 0);
2179 Current->IndentLevel = IndentLevel;
2180 if (Current->opensBlockOrBlockTypeList(Style))
2184 LLVM_DEBUG({ printDebugInfo(Line); });
2187 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
2188 unsigned UnbreakableTailLength = 0;
2189 FormatToken *Current = Line.Last;
2191 Current->UnbreakableTailLength = UnbreakableTailLength;
2192 if (Current->CanBreakBefore ||
2193 Current->isOneOf(tok::comment, tok::string_literal)) {
2194 UnbreakableTailLength = 0;
2196 UnbreakableTailLength +=
2197 Current->ColumnWidth + Current->SpacesRequiredBefore;
2199 Current = Current->Previous;
2203 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
2204 const FormatToken &Tok,
2205 bool InFunctionDecl) {
2206 const FormatToken &Left = *Tok.Previous;
2207 const FormatToken &Right = Tok;
2209 if (Left.is(tok::semi))
2212 if (Style.Language == FormatStyle::LK_Java) {
2213 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
2215 if (Right.is(Keywords.kw_implements))
2217 if (Left.is(tok::comma) && Left.NestingLevel == 0)
2219 } else if (Style.Language == FormatStyle::LK_JavaScript) {
2220 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
2222 if (Left.is(TT_JsTypeColon))
2224 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2225 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2227 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
2228 if (Left.opensScope() && Right.closesScope())
2232 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
2234 if (Right.is(tok::l_square)) {
2235 if (Style.Language == FormatStyle::LK_Proto)
2237 if (Left.is(tok::r_square))
2239 // Slightly prefer formatting local lambda definitions like functions.
2240 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
2242 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2243 TT_ArrayInitializerLSquare,
2244 TT_DesignatedInitializerLSquare, TT_AttributeSquare))
2248 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
2249 Right.is(tok::kw_operator)) {
2250 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
2252 if (Left.is(TT_StartOfName))
2254 if (InFunctionDecl && Right.NestingLevel == 0)
2255 return Style.PenaltyReturnTypeOnItsOwnLine;
2258 if (Right.is(TT_PointerOrReference))
2260 if (Right.is(TT_LambdaArrow))
2262 if (Left.is(tok::equal) && Right.is(tok::l_brace))
2264 if (Left.is(TT_CastRParen))
2266 if (Left.is(tok::coloncolon) ||
2267 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
2269 if (Left.isOneOf(tok::kw_class, tok::kw_struct))
2271 if (Left.is(tok::comment))
2274 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
2275 TT_CtorInitializerColon))
2278 if (Right.isMemberAccess()) {
2279 // Breaking before the "./->" of a chained call/member access is reasonably
2280 // cheap, as formatting those with one call per line is generally
2281 // desirable. In particular, it should be cheaper to break before the call
2282 // than it is to break inside a call's parameters, which could lead to weird
2283 // "hanging" indents. The exception is the very last "./->" to support this
2284 // frequent pattern:
2286 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
2289 // which might otherwise be blown up onto many lines. Here, clang-format
2290 // won't produce "hanging" indents anyway as there is no other trailing
2293 // Also apply higher penalty is not a call as that might lead to a wrapping
2297 // .aaaaaaaaa.bbbbbbbb(cccccccc);
2298 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
2303 if (Right.is(TT_TrailingAnnotation) &&
2304 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
2305 // Moving trailing annotations to the next line is fine for ObjC method
2307 if (Line.startsWith(TT_ObjCMethodSpecifier))
2309 // Generally, breaking before a trailing annotation is bad unless it is
2310 // function-like. It seems to be especially preferable to keep standard
2311 // annotations (i.e. "const", "final" and "override") on the same line.
2312 // Use a slightly higher penalty after ")" so that annotations like
2313 // "const override" are kept together.
2314 bool is_short_annotation = Right.TokenText.size() < 10;
2315 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
2318 // In for-loops, prefer breaking at ',' and ';'.
2319 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
2322 // In Objective-C method expressions, prefer breaking before "param:" over
2323 // breaking after it.
2324 if (Right.is(TT_SelectorName))
2326 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
2327 return Line.MightBeFunctionDecl ? 50 : 500;
2329 // In Objective-C type declarations, avoid breaking after the category's
2330 // open paren (we'll prefer breaking after the protocol list's opening
2331 // angle bracket, if present).
2332 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
2333 Left.Previous->isOneOf(tok::identifier, tok::greater))
2336 if (Left.is(tok::l_paren) && InFunctionDecl &&
2337 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
2339 if (Left.is(tok::l_paren) && Left.Previous &&
2340 (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) ||
2341 Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if)))
2343 if (Left.is(tok::equal) && InFunctionDecl)
2345 if (Right.is(tok::r_brace))
2347 if (Left.is(TT_TemplateOpener))
2349 if (Left.opensScope()) {
2350 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
2352 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
2354 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
2357 if (Left.is(TT_JavaAnnotation))
2360 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
2361 Left.Previous->isLabelString() &&
2362 (Left.NextOperator || Left.OperatorIndex != 0))
2364 if (Right.is(tok::plus) && Left.isLabelString() &&
2365 (Right.NextOperator || Right.OperatorIndex != 0))
2367 if (Left.is(tok::comma))
2369 if (Right.is(tok::lessless) && Left.isLabelString() &&
2370 (Right.NextOperator || Right.OperatorIndex != 1))
2372 if (Right.is(tok::lessless)) {
2373 // Breaking at a << is really cheap.
2374 if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0)
2375 // Slightly prefer to break before the first one in log-like statements.
2379 if (Left.ClosesTemplateDeclaration)
2380 return Style.PenaltyBreakTemplateDeclaration;
2381 if (Left.is(TT_ConditionalExpr))
2382 return prec::Conditional;
2383 prec::Level Level = Left.getPrecedence();
2384 if (Level == prec::Unknown)
2385 Level = Right.getPrecedence();
2386 if (Level == prec::Assignment)
2387 return Style.PenaltyBreakAssignment;
2388 if (Level != prec::Unknown)
2394 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
2395 const FormatToken &Left,
2396 const FormatToken &Right) {
2397 if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
2399 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
2401 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
2402 Left.Tok.getObjCKeywordID() == tok::objc_property)
2404 if (Right.is(tok::hashhash))
2405 return Left.is(tok::hash);
2406 if (Left.isOneOf(tok::hashhash, tok::hash))
2407 return Right.is(tok::hash);
2408 if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
2409 return Style.SpaceInEmptyParentheses;
2410 if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
2411 return (Right.is(TT_CastRParen) ||
2412 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
2413 ? Style.SpacesInCStyleCastParentheses
2414 : Style.SpacesInParentheses;
2415 if (Right.isOneOf(tok::semi, tok::comma))
2417 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
2418 bool IsLightweightGeneric =
2419 Right.MatchingParen && Right.MatchingParen->Next &&
2420 Right.MatchingParen->Next->is(tok::colon);
2421 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
2423 if (Right.is(tok::less) && Left.is(tok::kw_template))
2424 return Style.SpaceAfterTemplateKeyword;
2425 if (Left.isOneOf(tok::exclaim, tok::tilde))
2427 if (Left.is(tok::at) &&
2428 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
2429 tok::numeric_constant, tok::l_paren, tok::l_brace,
2430 tok::kw_true, tok::kw_false))
2432 if (Left.is(tok::colon))
2433 return !Left.is(TT_ObjCMethodExpr);
2434 if (Left.is(tok::coloncolon))
2436 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
2437 if (Style.Language == FormatStyle::LK_TextProto ||
2438 (Style.Language == FormatStyle::LK_Proto &&
2439 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
2440 // Format empty list as `<>`.
2441 if (Left.is(tok::less) && Right.is(tok::greater))
2443 return !Style.Cpp11BracedListStyle;
2447 if (Right.is(tok::ellipsis))
2448 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous &&
2449 Left.Previous->is(tok::kw_case));
2450 if (Left.is(tok::l_square) && Right.is(tok::amp))
2452 if (Right.is(TT_PointerOrReference)) {
2453 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
2454 if (!Left.MatchingParen)
2456 FormatToken *TokenBeforeMatchingParen =
2457 Left.MatchingParen->getPreviousNonComment();
2458 if (!TokenBeforeMatchingParen ||
2459 !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype))
2462 return (Left.Tok.isLiteral() ||
2463 (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
2464 (Style.PointerAlignment != FormatStyle::PAS_Left ||
2465 (Line.IsMultiVariableDeclStmt &&
2466 (Left.NestingLevel == 0 ||
2467 (Left.NestingLevel == 1 && Line.First->is(tok::kw_for)))))));
2469 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
2470 (!Left.is(TT_PointerOrReference) ||
2471 (Style.PointerAlignment != FormatStyle::PAS_Right &&
2472 !Line.IsMultiVariableDeclStmt)))
2474 if (Left.is(TT_PointerOrReference))
2475 return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
2476 (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
2477 !Right.is(TT_StartOfName)) ||
2478 (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
2479 (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
2481 (Style.PointerAlignment != FormatStyle::PAS_Right &&
2482 !Line.IsMultiVariableDeclStmt) &&
2484 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
2485 if (Right.is(tok::star) && Left.is(tok::l_paren))
2487 const auto SpaceRequiredForArrayInitializerLSquare =
2488 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
2489 return Style.SpacesInContainerLiterals ||
2490 ((Style.Language == FormatStyle::LK_Proto ||
2491 Style.Language == FormatStyle::LK_TextProto) &&
2492 !Style.Cpp11BracedListStyle &&
2493 LSquareTok.endsSequence(tok::l_square, tok::colon,
2496 if (Left.is(tok::l_square))
2497 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
2498 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
2499 (Left.isOneOf(TT_ArraySubscriptLSquare,
2500 TT_StructuredBindingLSquare) &&
2501 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
2502 if (Right.is(tok::r_square))
2503 return Right.MatchingParen &&
2504 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
2505 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
2507 (Style.SpacesInSquareBrackets &&
2508 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
2509 TT_StructuredBindingLSquare)) ||
2510 Right.MatchingParen->is(TT_AttributeParen));
2511 if (Right.is(tok::l_square) &&
2512 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
2513 TT_DesignatedInitializerLSquare,
2514 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
2515 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral))
2517 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
2518 return !Left.Children.empty(); // No spaces in "{}".
2519 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
2520 (Right.is(tok::r_brace) && Right.MatchingParen &&
2521 Right.MatchingParen->BlockKind != BK_Block))
2522 return !Style.Cpp11BracedListStyle;
2523 if (Left.is(TT_BlockComment))
2524 // No whitespace in x(/*foo=*/1), except for JavaScript.
2525 return Style.Language == FormatStyle::LK_JavaScript ||
2526 !Left.TokenText.endswith("=*/");
2527 if (Right.is(tok::l_paren)) {
2528 if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) ||
2529 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare)))
2531 return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
2532 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
2533 (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
2534 tok::kw_switch, tok::kw_case, TT_ForEachMacro,
2536 Left.endsSequence(tok::kw_constexpr, tok::kw_if) ||
2537 (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
2538 tok::kw_new, tok::kw_delete) &&
2539 (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
2540 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
2541 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() ||
2542 Left.is(tok::r_paren)) &&
2543 Line.Type != LT_PreprocessorDirective);
2545 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
2547 if (Right.is(TT_UnaryOperator))
2548 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
2549 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
2550 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
2552 Left.isSimpleTypeSpecifier()) &&
2553 Right.is(tok::l_brace) && Right.getNextNonComment() &&
2554 Right.BlockKind != BK_Block)
2556 if (Left.is(tok::period) || Right.is(tok::period))
2558 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
2560 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
2561 Left.MatchingParen->Previous &&
2562 (Left.MatchingParen->Previous->is(tok::period) ||
2563 Left.MatchingParen->Previous->is(tok::coloncolon)))
2564 // Java call to generic function with explicit type:
2565 // A.<B<C<...>>>DoSomething();
2566 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
2568 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
2570 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at))
2571 // Objective-C dictionary literal -> no space after opening brace.
2573 if (Right.is(tok::r_brace) && Right.MatchingParen &&
2574 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at))
2575 // Objective-C dictionary literal -> no space before closing brace.
2580 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
2581 const FormatToken &Right) {
2582 const FormatToken &Left = *Right.Previous;
2583 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
2584 return true; // Never ever merge two identifiers.
2585 if (Style.isCpp()) {
2586 if (Left.is(tok::kw_operator))
2587 return Right.is(tok::coloncolon);
2588 if (Right.is(tok::l_brace) && Right.BlockKind == BK_BracedInit &&
2589 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList)
2591 } else if (Style.Language == FormatStyle::LK_Proto ||
2592 Style.Language == FormatStyle::LK_TextProto) {
2593 if (Right.is(tok::period) &&
2594 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
2595 Keywords.kw_repeated, Keywords.kw_extend))
2597 if (Right.is(tok::l_paren) &&
2598 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
2600 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
2602 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
2603 if (Left.is(tok::slash) || Right.is(tok::slash))
2605 if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
2606 Right.isOneOf(tok::l_brace, tok::less))
2607 return !Style.Cpp11BracedListStyle;
2608 // A percent is probably part of a formatting specification, such as %lld.
2609 if (Left.is(tok::percent))
2611 // Preserve the existence of a space before a percent for cases like 0x%04x
2613 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
2614 return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin();
2615 } else if (Style.Language == FormatStyle::LK_JavaScript) {
2616 if (Left.is(TT_JsFatArrow))
2619 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
2620 Left.Previous->is(tok::kw_for))
2622 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
2623 Right.MatchingParen) {
2624 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
2625 // An async arrow function, for example: `x = async () => foo();`,
2626 // as opposed to calling a function called async: `x = async();`
2627 if (Next && Next->is(TT_JsFatArrow))
2630 if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) ||
2631 (Right.is(TT_TemplateString) && Right.TokenText.startswith("}")))
2633 // In tagged template literals ("html`bar baz`"), there is no space between
2634 // the tag identifier and the template string. getIdentifierInfo makes sure
2635 // that the identifier is not a pseudo keyword like `yield`, either.
2636 if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) &&
2637 Right.is(TT_TemplateString))
2639 if (Right.is(tok::star) &&
2640 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield))
2642 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
2643 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
2644 Keywords.kw_extends, Keywords.kw_implements))
2646 if (Right.is(tok::l_paren)) {
2647 // JS methods can use some keywords as names (e.g. `delete()`).
2648 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
2650 // Valid JS method names can include keywords, e.g. `foo.delete()` or
2651 // `bar.instanceof()`. Recognize call positions by preceding period.
2652 if (Left.Previous && Left.Previous->is(tok::period) &&
2653 Left.Tok.getIdentifierInfo())
2655 // Additional unary JavaScript operators that need a space after.
2656 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
2660 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
2662 // "of" is only a keyword if it appears after another identifier
2663 // (e.g. as "const x of y" in a for loop), or after a destructuring
2664 // operation (const [x, y] of z, const {a, b} of c).
2665 (Left.is(Keywords.kw_of) && Left.Previous &&
2666 (Left.Previous->Tok.is(tok::identifier) ||
2667 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
2668 (!Left.Previous || !Left.Previous->is(tok::period)))
2670 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
2671 Left.Previous->is(tok::period) && Right.is(tok::l_paren))
2673 if (Left.is(Keywords.kw_as) &&
2674 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren))
2676 if (Left.is(tok::kw_default) && Left.Previous &&
2677 Left.Previous->is(tok::kw_export))
2679 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
2681 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
2683 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
2685 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
2686 Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
2688 if (Left.is(tok::ellipsis))
2690 if (Left.is(TT_TemplateCloser) &&
2691 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
2692 Keywords.kw_implements, Keywords.kw_extends))
2693 // Type assertions ('<type>expr') are not followed by whitespace. Other
2694 // locations that should have whitespace following are identified by the
2695 // above set of follower tokens.
2697 if (Right.is(TT_JsNonNullAssertion))
2699 if (Left.is(TT_JsNonNullAssertion) &&
2700 Right.isOneOf(Keywords.kw_as, Keywords.kw_in))
2701 return true; // "x! as string", "x! in y"
2702 } else if (Style.Language == FormatStyle::LK_Java) {
2703 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
2705 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
2706 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
2707 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
2708 tok::kw_protected) ||
2709 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
2710 Keywords.kw_native)) &&
2711 Right.is(TT_TemplateOpener))
2714 if (Left.is(TT_ImplicitStringLiteral))
2715 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2716 if (Line.Type == LT_ObjCMethodDecl) {
2717 if (Left.is(TT_ObjCMethodSpecifier))
2719 if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right))
2720 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
2721 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
2722 // method declaration.
2725 if (Line.Type == LT_ObjCProperty &&
2726 (Right.is(tok::equal) || Left.is(tok::equal)))
2729 if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
2730 Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
2732 if (Right.is(TT_OverloadedOperatorLParen))
2733 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2734 if (Left.is(tok::comma))
2736 if (Right.is(tok::comma))
2738 if (Right.is(TT_ObjCBlockLParen))
2740 if (Right.is(TT_CtorInitializerColon))
2741 return Style.SpaceBeforeCtorInitializerColon;
2742 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
2744 if (Right.is(TT_RangeBasedForLoopColon) &&
2745 !Style.SpaceBeforeRangeBasedForLoopColon)
2747 if (Right.is(tok::colon)) {
2748 if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
2749 !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
2751 if (Right.is(TT_ObjCMethodExpr))
2753 if (Left.is(tok::question))
2755 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
2757 if (Right.is(TT_DictLiteral))
2758 return Style.SpacesInContainerLiterals;
2759 if (Right.is(TT_AttributeColon))
2763 if (Left.is(TT_UnaryOperator))
2764 return Right.is(TT_BinaryOperator);
2766 // If the next token is a binary operator or a selector name, we have
2767 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
2768 if (Left.is(TT_CastRParen))
2769 return Style.SpaceAfterCStyleCast ||
2770 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
2772 if (Left.is(tok::greater) && Right.is(tok::greater)) {
2773 if (Style.Language == FormatStyle::LK_TextProto ||
2774 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral)))
2775 return !Style.Cpp11BracedListStyle;
2776 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
2777 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
2779 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
2780 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
2781 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod)))
2783 if (!Style.SpaceBeforeAssignmentOperators &&
2784 Right.getPrecedence() == prec::Assignment)
2786 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
2787 (Left.is(tok::identifier) || Left.is(tok::kw_this)))
2789 if (Right.is(tok::coloncolon) && Left.is(tok::identifier))
2790 // Generally don't remove existing spaces between an identifier and "::".
2791 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
2792 // this turns out to be too lenient, add analysis of the identifier itself.
2793 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
2794 if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment))
2795 return (Left.is(TT_TemplateOpener) &&
2796 Style.Standard == FormatStyle::LS_Cpp03) ||
2797 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
2798 tok::kw___super, TT_TemplateCloser,
2799 TT_TemplateOpener)) ||
2800 (Left.is(tok ::l_paren) && Style.SpacesInParentheses);
2801 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
2802 return Style.SpacesInAngles;
2803 // Space before TT_StructuredBindingLSquare.
2804 if (Right.is(TT_StructuredBindingLSquare))
2805 return !Left.isOneOf(tok::amp, tok::ampamp) ||
2806 Style.PointerAlignment != FormatStyle::PAS_Right;
2807 // Space before & or && following a TT_StructuredBindingLSquare.
2808 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
2809 Right.isOneOf(tok::amp, tok::ampamp))
2810 return Style.PointerAlignment != FormatStyle::PAS_Left;
2811 if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
2812 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
2813 !Right.is(tok::r_paren)))
2815 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
2816 Right.isNot(TT_FunctionTypeLParen))
2817 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
2818 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
2819 Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen))
2821 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
2822 Line.startsWith(tok::hash))
2824 if (Right.is(TT_TrailingUnaryOperator))
2826 if (Left.is(TT_RegexLiteral))
2828 return spaceRequiredBetween(Line, Left, Right);
2831 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
2832 static bool isAllmanBrace(const FormatToken &Tok) {
2833 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
2834 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
2837 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
2838 const FormatToken &Right) {
2839 const FormatToken &Left = *Right.Previous;
2840 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
2843 if (Style.Language == FormatStyle::LK_JavaScript) {
2844 // FIXME: This might apply to other languages and token kinds.
2845 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
2846 Left.Previous->is(tok::string_literal))
2848 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
2849 Left.Previous && Left.Previous->is(tok::equal) &&
2850 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
2852 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
2854 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
2855 // Object literals on the top level of a file are treated as "enum-style".
2856 // Each key/value pair is put on a separate line, instead of bin-packing.
2858 if (Left.is(tok::l_brace) && Line.Level == 0 &&
2859 (Line.startsWith(tok::kw_enum) ||
2860 Line.startsWith(tok::kw_const, tok::kw_enum) ||
2861 Line.startsWith(tok::kw_export, tok::kw_enum) ||
2862 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum)))
2863 // JavaScript top-level enum key/value pairs are put on separate lines
2864 // instead of bin-packing.
2866 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
2867 !Left.Children.empty())
2868 // Support AllowShortFunctionsOnASingleLine for JavaScript.
2869 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
2870 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
2871 (Left.NestingLevel == 0 && Line.Level == 0 &&
2872 Style.AllowShortFunctionsOnASingleLine &
2873 FormatStyle::SFS_InlineOnly);
2874 } else if (Style.Language == FormatStyle::LK_Java) {
2875 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
2876 Right.Next->is(tok::string_literal))
2878 } else if (Style.Language == FormatStyle::LK_Cpp ||
2879 Style.Language == FormatStyle::LK_ObjC ||
2880 Style.Language == FormatStyle::LK_Proto ||
2881 Style.Language == FormatStyle::LK_TableGen ||
2882 Style.Language == FormatStyle::LK_TextProto) {
2883 if (Left.isStringLiteral() && Right.isStringLiteral())
2887 // If the last token before a '}', ']', or ')' is a comma or a trailing
2888 // comment, the intention is to insert a line break after it in order to make
2889 // shuffling around entries easier. Import statements, especially in
2890 // JavaScript, can be an exception to this rule.
2891 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
2892 const FormatToken *BeforeClosingBrace = nullptr;
2893 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
2894 (Style.Language == FormatStyle::LK_JavaScript &&
2895 Left.is(tok::l_paren))) &&
2896 Left.BlockKind != BK_Block && Left.MatchingParen)
2897 BeforeClosingBrace = Left.MatchingParen->Previous;
2898 else if (Right.MatchingParen &&
2899 (Right.MatchingParen->isOneOf(tok::l_brace,
2900 TT_ArrayInitializerLSquare) ||
2901 (Style.Language == FormatStyle::LK_JavaScript &&
2902 Right.MatchingParen->is(tok::l_paren))))
2903 BeforeClosingBrace = &Left;
2904 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
2905 BeforeClosingBrace->isTrailingComment()))
2909 if (Right.is(tok::comment))
2910 return Left.BlockKind != BK_BracedInit &&
2911 Left.isNot(TT_CtorInitializerColon) &&
2912 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
2913 if (Left.isTrailingComment())
2915 if (Right.Previous->IsUnterminatedLiteral)
2917 if (Right.is(tok::lessless) && Right.Next &&
2918 Right.Previous->is(tok::string_literal) &&
2919 Right.Next->is(tok::string_literal))
2921 if (Right.Previous->ClosesTemplateDeclaration &&
2922 Right.Previous->MatchingParen &&
2923 Right.Previous->MatchingParen->NestingLevel == 0 &&
2924 Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes)
2926 if (Right.is(TT_CtorInitializerComma) &&
2927 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
2928 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2930 if (Right.is(TT_CtorInitializerColon) &&
2931 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
2932 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
2934 // Break only if we have multiple inheritance.
2935 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
2936 Right.is(TT_InheritanceComma))
2938 if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\""))
2939 // Multiline raw string literals are special wrt. line breaks. The author
2940 // has made a deliberate choice and might have aligned the contents of the
2941 // string literal accordingly. Thus, we try keep existing line breaks.
2942 return Right.IsMultiline && Right.NewlinesBefore > 0;
2943 if ((Right.Previous->is(tok::l_brace) ||
2944 (Right.Previous->is(tok::less) && Right.Previous->Previous &&
2945 Right.Previous->Previous->is(tok::equal))) &&
2946 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
2947 // Don't put enums or option definitions onto single lines in protocol
2951 if (Right.is(TT_InlineASMBrace))
2952 return Right.HasUnescapedNewline;
2953 if (isAllmanBrace(Left) || isAllmanBrace(Right))
2954 return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
2955 (Line.startsWith(tok::kw_typedef, tok::kw_enum) &&
2956 Style.BraceWrapping.AfterEnum) ||
2957 (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
2958 (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
2959 if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
2962 if ((Style.Language == FormatStyle::LK_Java ||
2963 Style.Language == FormatStyle::LK_JavaScript) &&
2964 Left.is(TT_LeadingJavaAnnotation) &&
2965 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
2966 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
2969 if (Right.is(TT_ProtoExtensionLSquare))
2972 // In text proto instances if a submessage contains at least 2 entries and at
2973 // least one of them is a submessage, like A { ... B { ... } ... },
2974 // put all of the entries of A on separate lines by forcing the selector of
2975 // the submessage B to be put on a newline.
2977 // Example: these can stay on one line:
2978 // a { scalar_1: 1 scalar_2: 2 }
2979 // a { b { key: value } }
2981 // and these entries need to be on a new line even if putting them all in one
2982 // line is under the column limit:
2988 // We enforce this by breaking before a submessage field that has previous
2989 // siblings, *and* breaking before a field that follows a submessage field.
2991 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
2992 // the TT_SelectorName there, but we don't want to break inside the brackets.
2994 // Another edge case is @submessage { key: value }, which is a common
2995 // substitution placeholder. In this case we want to keep `@` and `submessage`
2998 // We ensure elsewhere that extensions are always on their own line.
2999 if ((Style.Language == FormatStyle::LK_Proto ||
3000 Style.Language == FormatStyle::LK_TextProto) &&
3001 Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) {
3002 // Keep `@submessage` together in:
3003 // @submessage { key: value }
3004 if (Right.Previous && Right.Previous->is(tok::at))
3006 // Look for the scope opener after selector in cases like:
3009 // selector: @base { ...
3010 FormatToken *LBrace = Right.Next;
3011 if (LBrace && LBrace->is(tok::colon)) {
3012 LBrace = LBrace->Next;
3013 if (LBrace && LBrace->is(tok::at)) {
3014 LBrace = LBrace->Next;
3016 LBrace = LBrace->Next;
3020 // The scope opener is one of {, [, <:
3025 // In case of selector { ... }, the l_brace is TT_DictLiteral.
3026 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
3027 // so we check for immediately following r_brace.
3028 ((LBrace->is(tok::l_brace) &&
3029 (LBrace->is(TT_DictLiteral) ||
3030 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
3031 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
3032 // If Left.ParameterCount is 0, then this submessage entry is not the
3033 // first in its parent submessage, and we want to break before this entry.
3034 // If Left.ParameterCount is greater than 0, then its parent submessage
3035 // might contain 1 or more entries and we want to break before this entry
3036 // if it contains at least 2 entries. We deal with this case later by
3037 // detecting and breaking before the next entry in the parent submessage.
3038 if (Left.ParameterCount == 0)
3040 // However, if this submessage is the first entry in its parent
3041 // submessage, Left.ParameterCount might be 1 in some cases.
3042 // We deal with this case later by detecting an entry
3043 // following a closing paren of this submessage.
3046 // If this is an entry immediately following a submessage, it will be
3047 // preceded by a closing paren of that submessage, like in:
3048 // left---. .---right
3050 // sub: { ... } key: value
3051 // If there was a comment between `}` an `key` above, then `key` would be
3052 // put on a new line anyways.
3053 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
3057 // Deal with lambda arguments in C++ - we want consistent line breaks whether
3058 // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced
3059 // as aggressive line breaks are placed when the lambda is not the last arg.
3060 if ((Style.Language == FormatStyle::LK_Cpp ||
3061 Style.Language == FormatStyle::LK_ObjC) &&
3062 Left.is(tok::l_paren) && Left.BlockParameterCount > 0 &&
3063 !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) {
3064 // Multiple lambdas in the same function call force line breaks.
3065 if (Left.BlockParameterCount > 1)
3068 // A lambda followed by another arg forces a line break.
3071 auto Comma = Left.Role->lastComma();
3074 auto Next = Comma->getNextNonComment();
3077 if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret))
3084 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
3085 const FormatToken &Right) {
3086 const FormatToken &Left = *Right.Previous;
3088 // Language-specific stuff.
3089 if (Style.Language == FormatStyle::LK_Java) {
3090 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3091 Keywords.kw_implements))
3093 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
3094 Keywords.kw_implements))
3096 } else if (Style.Language == FormatStyle::LK_JavaScript) {
3097 const FormatToken *NonComment = Right.getPreviousNonComment();
3099 NonComment->isOneOf(
3100 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
3101 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
3102 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
3103 Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get,
3104 Keywords.kw_set, Keywords.kw_async, Keywords.kw_await))
3105 return false; // Otherwise automatic semicolon insertion would trigger.
3106 if (Right.NestingLevel == 0 &&
3107 (Left.Tok.getIdentifierInfo() ||
3108 Left.isOneOf(tok::r_square, tok::r_paren)) &&
3109 Right.isOneOf(tok::l_square, tok::l_paren))
3110 return false; // Otherwise automatic semicolon insertion would trigger.
3111 if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
3113 if (Left.is(TT_JsTypeColon))
3115 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
3116 if (Left.is(tok::exclaim) && Right.is(tok::colon))
3118 // Look for is type annotations like:
3119 // function f(): a is B { ... }
3120 // Do not break before is in these cases.
3121 if (Right.is(Keywords.kw_is)) {
3122 const FormatToken* Next = Right.getNextNonComment();
3123 // If `is` is followed by a colon, it's likely that it's a dict key, so
3124 // ignore it for this check.
3125 // For example this is common in Polymer:
3130 if (!Next || !Next->is(tok::colon))
3133 if (Left.is(Keywords.kw_in))
3134 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
3135 if (Right.is(Keywords.kw_in))
3136 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
3137 if (Right.is(Keywords.kw_as))
3138 return false; // must not break before as in 'x as type' casts
3139 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
3140 // extends and infer can appear as keywords in conditional types:
3141 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
3142 // do not break before them, as the expressions are subject to ASI.
3145 if (Left.is(Keywords.kw_as))
3147 if (Left.is(TT_JsNonNullAssertion))
3149 if (Left.is(Keywords.kw_declare) &&
3150 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
3151 Keywords.kw_function, tok::kw_class, tok::kw_enum,
3152 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
3153 Keywords.kw_let, tok::kw_const))
3154 // See grammar for 'declare' statements at:
3155 // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10
3157 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
3158 Right.isOneOf(tok::identifier, tok::string_literal))
3159 return false; // must not break in "module foo { ...}"
3160 if (Right.is(TT_TemplateString) && Right.closesScope())
3162 if (Left.is(TT_TemplateString) && Left.opensScope())
3166 if (Left.is(tok::at))
3168 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
3170 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
3171 return !Right.is(tok::l_paren);
3172 if (Right.is(TT_PointerOrReference))
3173 return Line.IsMultiVariableDeclStmt ||
3174 (Style.PointerAlignment == FormatStyle::PAS_Right &&
3175 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
3176 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
3177 Right.is(tok::kw_operator))
3179 if (Left.is(TT_PointerOrReference))
3181 if (Right.isTrailingComment())
3182 // We rely on MustBreakBefore being set correctly here as we should not
3183 // change the "binding" behavior of a comment.
3184 // The first comment in a braced lists is always interpreted as belonging to
3185 // the first list element. Otherwise, it should be placed outside of the
3187 return Left.BlockKind == BK_BracedInit ||
3188 (Left.is(TT_CtorInitializerColon) &&
3189 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
3190 if (Left.is(tok::question) && Right.is(tok::colon))
3192 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
3193 return Style.BreakBeforeTernaryOperators;
3194 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
3195 return !Style.BreakBeforeTernaryOperators;
3196 if (Left.is(TT_InheritanceColon))
3197 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
3198 if (Right.is(TT_InheritanceColon))
3199 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
3200 if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) &&
3201 Left.isNot(TT_SelectorName))
3204 if (Right.is(tok::colon) &&
3205 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon))
3207 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
3208 if (Style.Language == FormatStyle::LK_Proto ||
3209 Style.Language == FormatStyle::LK_TextProto) {
3210 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
3212 // Prevent cases like:
3215 // { key: valueeeeeeeeeeee }
3217 // when the snippet does not fit into one line.
3221 // key: valueeeeeeeeeeee
3224 // instead, even if it is longer by one line.
3226 // Note that this allows allows the "{" to go over the column limit
3227 // when the column limit is just between ":" and "{", but that does
3228 // not happen too often and alternative formattings in this case are
3231 // The code covers the cases:
3233 // submessage: { ... }
3234 // submessage: < ... >
3235 // repeated: [ ... ]
3236 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
3237 Right.is(TT_DictLiteral)) ||
3238 Right.is(TT_ArrayInitializerLSquare))
3243 if (Right.is(tok::r_square) && Right.MatchingParen &&
3244 Right.MatchingParen->is(TT_ProtoExtensionLSquare))
3246 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
3247 Right.Next->is(TT_ObjCMethodExpr)))
3248 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
3249 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
3251 if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
3253 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
3254 TT_OverloadedOperator))
3256 if (Left.is(TT_RangeBasedForLoopColon))
3258 if (Right.is(TT_RangeBasedForLoopColon))
3260 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
3262 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
3263 Left.is(tok::kw_operator))
3265 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
3266 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
3268 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
3269 !Style.Cpp11BracedListStyle)
3271 if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
3273 if (Left.is(tok::l_paren) && Left.Previous &&
3274 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen)))
3276 if (Right.is(TT_ImplicitStringLiteral))
3279 if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser))
3281 if (Right.is(tok::r_square) && Right.MatchingParen &&
3282 Right.MatchingParen->is(TT_LambdaLSquare))
3285 // We only break before r_brace if there was a corresponding break before
3286 // the l_brace, which is tracked by BreakBeforeClosingBrace.
3287 if (Right.is(tok::r_brace))
3288 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
3290 // Allow breaking after a trailing annotation, e.g. after a method
3292 if (Left.is(TT_TrailingAnnotation))
3293 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
3294 tok::less, tok::coloncolon);
3296 if (Right.is(tok::kw___attribute) ||
3297 (Right.is(tok::l_square) && Right.is(TT_AttributeSquare)))
3300 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
3303 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
3306 if (Left.is(TT_CtorInitializerColon))
3307 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
3308 if (Right.is(TT_CtorInitializerColon))
3309 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
3310 if (Left.is(TT_CtorInitializerComma) &&
3311 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
3313 if (Right.is(TT_CtorInitializerComma) &&
3314 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma)
3316 if (Left.is(TT_InheritanceComma) &&
3317 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
3319 if (Right.is(TT_InheritanceComma) &&
3320 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma)
3322 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
3323 (Left.is(tok::less) && Right.is(tok::less)))
3325 if (Right.is(TT_BinaryOperator) &&
3326 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
3327 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
3328 Right.getPrecedence() != prec::Assignment))
3330 if (Left.is(TT_ArrayInitializerLSquare))
3332 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
3334 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
3335 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
3336 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
3337 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
3338 Left.getPrecedence() == prec::Assignment))
3340 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
3341 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare)))
3343 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
3344 tok::kw_class, tok::kw_struct, tok::comment) ||
3345 Right.isMemberAccess() ||
3346 Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless,
3347 tok::colon, tok::l_square, tok::at) ||
3348 (Left.is(tok::r_paren) &&
3349 Right.isOneOf(tok::identifier, tok::kw_const)) ||
3350 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
3351 (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser));
3354 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
3355 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n";
3356 const FormatToken *Tok = Line.First;
3358 llvm::errs() << " M=" << Tok->MustBreakBefore
3359 << " C=" << Tok->CanBreakBefore
3360 << " T=" << getTokenTypeName(Tok->Type)
3361 << " S=" << Tok->SpacesRequiredBefore
3362 << " B=" << Tok->BlockParameterCount
3363 << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
3364 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
3365 << " PPK=" << Tok->PackingKind << " FakeLParens=";
3366 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
3367 llvm::errs() << Tok->FakeLParens[i] << "/";
3368 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
3369 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
3370 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
3372 assert(Tok == Line.Last);
3375 llvm::errs() << "----\n";
3378 } // namespace format
3379 } // namespace clang