contrib/llvm/tools/clang/lib/Format/FormatToken.h

   1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// This file contains the declaration of the FormatToken, a wrapper
  12 /// around Token with additional information related to formatting.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
  17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
  18
  19 #include "clang/Basic/IdentifierTable.h"
  20 #include "clang/Basic/OperatorPrecedence.h"
  21 #include "clang/Format/Format.h"
  22 #include "clang/Lex/Lexer.h"
  23 #include <memory>
  24 #include <unordered_set>
  25
  26 namespace clang {
  27 namespace format {
  28
  29 #define LIST_TOKEN_TYPES                                                       \
  30   TYPE(ArrayInitializerLSquare)                                                \
  31   TYPE(ArraySubscriptLSquare)                                                  \
  32   TYPE(AttributeColon)                                                         \
  33   TYPE(AttributeParen)                                                         \
  34   TYPE(AttributeSquare)                                                        \
  35   TYPE(BinaryOperator)                                                         \
  36   TYPE(BitFieldColon)                                                          \
  37   TYPE(BlockComment)                                                           \
  38   TYPE(CastRParen)                                                             \
  39   TYPE(ConditionalExpr)                                                        \
  40   TYPE(ConflictAlternative)                                                    \
  41   TYPE(ConflictEnd)                                                            \
  42   TYPE(ConflictStart)                                                          \
  43   TYPE(CtorInitializerColon)                                                   \
  44   TYPE(CtorInitializerComma)                                                   \
  45   TYPE(DesignatedInitializerLSquare)                                           \
  46   TYPE(DesignatedInitializerPeriod)                                            \
  47   TYPE(DictLiteral)                                                            \
  48   TYPE(ForEachMacro)                                                           \
  49   TYPE(FunctionAnnotationRParen)                                               \
  50   TYPE(FunctionDeclarationName)                                                \
  51   TYPE(FunctionLBrace)                                                         \
  52   TYPE(FunctionTypeLParen)                                                     \
  53   TYPE(ImplicitStringLiteral)                                                  \
  54   TYPE(InheritanceColon)                                                       \
  55   TYPE(InheritanceComma)                                                       \
  56   TYPE(InlineASMBrace)                                                         \
  57   TYPE(InlineASMColon)                                                         \
  58   TYPE(JavaAnnotation)                                                         \
  59   TYPE(JsComputedPropertyName)                                                 \
  60   TYPE(JsExponentiation)                                                       \
  61   TYPE(JsExponentiationEqual)                                                  \
  62   TYPE(JsFatArrow)                                                             \
  63   TYPE(JsNonNullAssertion)                                                     \
  64   TYPE(JsTypeColon)                                                            \
  65   TYPE(JsTypeOperator)                                                         \
  66   TYPE(JsTypeOptionalQuestion)                                                 \
  67   TYPE(LambdaArrow)                                                            \
  68   TYPE(LambdaLSquare)                                                          \
  69   TYPE(LeadingJavaAnnotation)                                                  \
  70   TYPE(LineComment)                                                            \
  71   TYPE(MacroBlockBegin)                                                        \
  72   TYPE(MacroBlockEnd)                                                          \
  73   TYPE(ObjCBlockLBrace)                                                        \
  74   TYPE(ObjCBlockLParen)                                                        \
  75   TYPE(ObjCDecl)                                                               \
  76   TYPE(ObjCForIn)                                                              \
  77   TYPE(ObjCMethodExpr)                                                         \
  78   TYPE(ObjCMethodSpecifier)                                                    \
  79   TYPE(ObjCProperty)                                                           \
  80   TYPE(ObjCStringLiteral)                                                      \
  81   TYPE(OverloadedOperator)                                                     \
  82   TYPE(OverloadedOperatorLParen)                                               \
  83   TYPE(PointerOrReference)                                                     \
  84   TYPE(PureVirtualSpecifier)                                                   \
  85   TYPE(RangeBasedForLoopColon)                                                 \
  86   TYPE(RegexLiteral)                                                           \
  87   TYPE(SelectorName)                                                           \
  88   TYPE(StartOfName)                                                            \
  89   TYPE(StructuredBindingLSquare)                                               \
  90   TYPE(TemplateCloser)                                                         \
  91   TYPE(TemplateOpener)                                                         \
  92   TYPE(TemplateString)                                                         \
  93   TYPE(ProtoExtensionLSquare)                                                  \
  94   TYPE(TrailingAnnotation)                                                     \
  95   TYPE(TrailingReturnArrow)                                                    \
  96   TYPE(TrailingUnaryOperator)                                                  \
  97   TYPE(UnaryOperator)                                                          \
  98   TYPE(Unknown)
  99
 100 enum TokenType {
 101 #define TYPE(X) TT_##X,
 102   LIST_TOKEN_TYPES
 103 #undef TYPE
 104       NUM_TOKEN_TYPES
 105 };
 106
 107 /// Determines the name of a token type.
 108 const char *getTokenTypeName(TokenType Type);
 109
 110 // Represents what type of block a set of braces open.
 111 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit };
 112
 113 // The packing kind of a function's parameters.
 114 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
 115
 116 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
 117
 118 class TokenRole;
 119 class AnnotatedLine;
 120
 121 /// A wrapper around a \c Token storing information about the
 122 /// whitespace characters preceding it.
 123 struct FormatToken {
 124   FormatToken() {}
 125
 126   /// The \c Token.
 127   Token Tok;
 128
 129   /// The number of newlines immediately before the \c Token.
 130   ///
 131   /// This can be used to determine what the user wrote in the original code
 132   /// and thereby e.g. leave an empty line between two function definitions.
 133   unsigned NewlinesBefore = 0;
 134
 135   /// Whether there is at least one unescaped newline before the \c
 136   /// Token.
 137   bool HasUnescapedNewline = false;
 138
 139   /// The range of the whitespace immediately preceding the \c Token.
 140   SourceRange WhitespaceRange;
 141
 142   /// The offset just past the last '\n' in this token's leading
 143   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
 144   unsigned LastNewlineOffset = 0;
 145
 146   /// The width of the non-whitespace parts of the token (or its first
 147   /// line for multi-line tokens) in columns.
 148   /// We need this to correctly measure number of columns a token spans.
 149   unsigned ColumnWidth = 0;
 150
 151   /// Contains the width in columns of the last line of a multi-line
 152   /// token.
 153   unsigned LastLineColumnWidth = 0;
 154
 155   /// Whether the token text contains newlines (escaped or not).
 156   bool IsMultiline = false;
 157
 158   /// Indicates that this is the first token of the file.
 159   bool IsFirst = false;
 160
 161   /// Whether there must be a line break before this token.
 162   ///
 163   /// This happens for example when a preprocessor directive ended directly
 164   /// before the token.
 165   bool MustBreakBefore = false;
 166
 167   /// The raw text of the token.
 168   ///
 169   /// Contains the raw token text without leading whitespace and without leading
 170   /// escaped newlines.
 171   StringRef TokenText;
 172
 173   /// Set to \c true if this token is an unterminated literal.
 174   bool IsUnterminatedLiteral = 0;
 175
 176   /// Contains the kind of block if this token is a brace.
 177   BraceBlockKind BlockKind = BK_Unknown;
 178
 179   TokenType Type = TT_Unknown;
 180
 181   /// The number of spaces that should be inserted before this token.
 182   unsigned SpacesRequiredBefore = 0;
 183
 184   /// \c true if it is allowed to break before this token.
 185   bool CanBreakBefore = false;
 186
 187   /// \c true if this is the ">" of "template<..>".
 188   bool ClosesTemplateDeclaration = false;
 189
 190   /// Number of parameters, if this is "(", "[" or "<".
 191   ///
 192   /// This is initialized to 1 as we don't need to distinguish functions with
 193   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
 194   /// the number of commas.
 195   unsigned ParameterCount = 0;
 196
 197   /// Number of parameters that are nested blocks,
 198   /// if this is "(", "[" or "<".
 199   unsigned BlockParameterCount = 0;
 200
 201   /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
 202   /// the surrounding bracket.
 203   tok::TokenKind ParentBracket = tok::unknown;
 204
 205   /// A token can have a special role that can carry extra information
 206   /// about the token's formatting.
 207   std::unique_ptr<TokenRole> Role;
 208
 209   /// If this is an opening parenthesis, how are the parameters packed?
 210   ParameterPackingKind PackingKind = PPK_Inconclusive;
 211
 212   /// The total length of the unwrapped line up to and including this
 213   /// token.
 214   unsigned TotalLength = 0;
 215
 216   /// The original 0-based column of this token, including expanded tabs.
 217   /// The configured TabWidth is used as tab width.
 218   unsigned OriginalColumn = 0;
 219
 220   /// The length of following tokens until the next natural split point,
 221   /// or the next token that can be broken.
 222   unsigned UnbreakableTailLength = 0;
 223
 224   // FIXME: Come up with a 'cleaner' concept.
 225   /// The binding strength of a token. This is a combined value of
 226   /// operator precedence, parenthesis nesting, etc.
 227   unsigned BindingStrength = 0;
 228
 229   /// The nesting level of this token, i.e. the number of surrounding (),
 230   /// [], {} or <>.
 231   unsigned NestingLevel = 0;
 232
 233   /// The indent level of this token. Copied from the surrounding line.
 234   unsigned IndentLevel = 0;
 235
 236   /// Penalty for inserting a line break before this token.
 237   unsigned SplitPenalty = 0;
 238
 239   /// If this is the first ObjC selector name in an ObjC method
 240   /// definition or call, this contains the length of the longest name.
 241   ///
 242   /// This being set to 0 means that the selectors should not be colon-aligned,
 243   /// e.g. because several of them are block-type.
 244   unsigned LongestObjCSelectorName = 0;
 245
 246   /// If this is the first ObjC selector name in an ObjC method
 247   /// definition or call, this contains the number of parts that the whole
 248   /// selector consist of.
 249   unsigned ObjCSelectorNameParts = 0;
 250
 251   /// The 0-based index of the parameter/argument. For ObjC it is set
 252   /// for the selector name token.
 253   /// For now calculated only for ObjC.
 254   unsigned ParameterIndex = 0;
 255
 256   /// Stores the number of required fake parentheses and the
 257   /// corresponding operator precedence.
 258   ///
 259   /// If multiple fake parentheses start at a token, this vector stores them in
 260   /// reverse order, i.e. inner fake parenthesis first.
 261   SmallVector<prec::Level, 4> FakeLParens;
 262   /// Insert this many fake ) after this token for correct indentation.
 263   unsigned FakeRParens = 0;
 264
 265   /// \c true if this token starts a binary expression, i.e. has at least
 266   /// one fake l_paren with a precedence greater than prec::Unknown.
 267   bool StartsBinaryExpression = false;
 268   /// \c true if this token ends a binary expression.
 269   bool EndsBinaryExpression = false;
 270
 271   /// Is this is an operator (or "."/"->") in a sequence of operators
 272   /// with the same precedence, contains the 0-based operator index.
 273   unsigned OperatorIndex = 0;
 274
 275   /// If this is an operator (or "."/"->") in a sequence of operators
 276   /// with the same precedence, points to the next operator.
 277   FormatToken *NextOperator = nullptr;
 278
 279   /// Is this token part of a \c DeclStmt defining multiple variables?
 280   ///
 281   /// Only set if \c Type == \c TT_StartOfName.
 282   bool PartOfMultiVariableDeclStmt = false;
 283
 284   /// Does this line comment continue a line comment section?
 285   ///
 286   /// Only set to true if \c Type == \c TT_LineComment.
 287   bool ContinuesLineCommentSection = false;
 288
 289   /// If this is a bracket, this points to the matching one.
 290   FormatToken *MatchingParen = nullptr;
 291
 292   /// The previous token in the unwrapped line.
 293   FormatToken *Previous = nullptr;
 294
 295   /// The next token in the unwrapped line.
 296   FormatToken *Next = nullptr;
 297
 298   /// If this token starts a block, this contains all the unwrapped lines
 299   /// in it.
 300   SmallVector<AnnotatedLine *, 1> Children;
 301
 302   /// Stores the formatting decision for the token once it was made.
 303   FormatDecision Decision = FD_Unformatted;
 304
 305   /// If \c true, this token has been fully formatted (indented and
 306   /// potentially re-formatted inside), and we do not allow further formatting
 307   /// changes.
 308   bool Finalized = false;
 309
 310   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
 311   bool is(TokenType TT) const { return Type == TT; }
 312   bool is(const IdentifierInfo *II) const {
 313     return II && II == Tok.getIdentifierInfo();
 314   }
 315   bool is(tok::PPKeywordKind Kind) const {
 316     return Tok.getIdentifierInfo() &&
 317            Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
 318   }
 319   template <typename A, typename B> bool isOneOf(A K1, B K2) const {
 320     return is(K1) || is(K2);
 321   }
 322   template <typename A, typename B, typename... Ts>
 323   bool isOneOf(A K1, B K2, Ts... Ks) const {
 324     return is(K1) || isOneOf(K2, Ks...);
 325   }
 326   template <typename T> bool isNot(T Kind) const { return !is(Kind); }
 327
 328   /// \c true if this token starts a sequence with the given tokens in order,
 329   /// following the ``Next`` pointers, ignoring comments.
 330   template <typename A, typename... Ts>
 331   bool startsSequence(A K1, Ts... Tokens) const {
 332     return startsSequenceInternal(K1, Tokens...);
 333   }
 334
 335   /// \c true if this token ends a sequence with the given tokens in order,
 336   /// following the ``Previous`` pointers, ignoring comments.
 337   template <typename A, typename... Ts>
 338   bool endsSequence(A K1, Ts... Tokens) const {
 339     return endsSequenceInternal(K1, Tokens...);
 340   }
 341
 342   bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
 343
 344   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
 345     return Tok.isObjCAtKeyword(Kind);
 346   }
 347
 348   bool isAccessSpecifier(bool ColonRequired = true) const {
 349     return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
 350            (!ColonRequired || (Next && Next->is(tok::colon)));
 351   }
 352
 353   /// Determine whether the token is a simple-type-specifier.
 354   bool isSimpleTypeSpecifier() const;
 355
 356   bool isObjCAccessSpecifier() const {
 357     return is(tok::at) && Next &&
 358            (Next->isObjCAtKeyword(tok::objc_public) ||
 359             Next->isObjCAtKeyword(tok::objc_protected) ||
 360             Next->isObjCAtKeyword(tok::objc_package) ||
 361             Next->isObjCAtKeyword(tok::objc_private));
 362   }
 363
 364   /// Returns whether \p Tok is ([{ or an opening < of a template or in
 365   /// protos.
 366   bool opensScope() const {
 367     if (is(TT_TemplateString) && TokenText.endswith("${"))
 368       return true;
 369     if (is(TT_DictLiteral) && is(tok::less))
 370       return true;
 371     return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
 372                    TT_TemplateOpener);
 373   }
 374   /// Returns whether \p Tok is )]} or a closing > of a template or in
 375   /// protos.
 376   bool closesScope() const {
 377     if (is(TT_TemplateString) && TokenText.startswith("}"))
 378       return true;
 379     if (is(TT_DictLiteral) && is(tok::greater))
 380       return true;
 381     return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
 382                    TT_TemplateCloser);
 383   }
 384
 385   /// Returns \c true if this is a "." or "->" accessing a member.
 386   bool isMemberAccess() const {
 387     return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
 388            !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
 389                     TT_LambdaArrow);
 390   }
 391
 392   bool isUnaryOperator() const {
 393     switch (Tok.getKind()) {
 394     case tok::plus:
 395     case tok::plusplus:
 396     case tok::minus:
 397     case tok::minusminus:
 398     case tok::exclaim:
 399     case tok::tilde:
 400     case tok::kw_sizeof:
 401     case tok::kw_alignof:
 402       return true;
 403     default:
 404       return false;
 405     }
 406   }
 407
 408   bool isBinaryOperator() const {
 409     // Comma is a binary operator, but does not behave as such wrt. formatting.
 410     return getPrecedence() > prec::Comma;
 411   }
 412
 413   bool isTrailingComment() const {
 414     return is(tok::comment) &&
 415            (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
 416   }
 417
 418   /// Returns \c true if this is a keyword that can be used
 419   /// like a function call (e.g. sizeof, typeid, ...).
 420   bool isFunctionLikeKeyword() const {
 421     switch (Tok.getKind()) {
 422     case tok::kw_throw:
 423     case tok::kw_typeid:
 424     case tok::kw_return:
 425     case tok::kw_sizeof:
 426     case tok::kw_alignof:
 427     case tok::kw_alignas:
 428     case tok::kw_decltype:
 429     case tok::kw_noexcept:
 430     case tok::kw_static_assert:
 431     case tok::kw___attribute:
 432       return true;
 433     default:
 434       return false;
 435     }
 436   }
 437
 438   /// Returns \c true if this is a string literal that's like a label,
 439   /// e.g. ends with "=" or ":".
 440   bool isLabelString() const {
 441     if (!is(tok::string_literal))
 442       return false;
 443     StringRef Content = TokenText;
 444     if (Content.startswith("\"") || Content.startswith("'"))
 445       Content = Content.drop_front(1);
 446     if (Content.endswith("\"") || Content.endswith("'"))
 447       Content = Content.drop_back(1);
 448     Content = Content.trim();
 449     return Content.size() > 1 &&
 450            (Content.back() == ':' || Content.back() == '=');
 451   }
 452
 453   /// Returns actual token start location without leading escaped
 454   /// newlines and whitespace.
 455   ///
 456   /// This can be different to Tok.getLocation(), which includes leading escaped
 457   /// newlines.
 458   SourceLocation getStartOfNonWhitespace() const {
 459     return WhitespaceRange.getEnd();
 460   }
 461
 462   prec::Level getPrecedence() const {
 463     return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
 464                               /*CPlusPlus11=*/true);
 465   }
 466
 467   /// Returns the previous token ignoring comments.
 468   FormatToken *getPreviousNonComment() const {
 469     FormatToken *Tok = Previous;
 470     while (Tok && Tok->is(tok::comment))
 471       Tok = Tok->Previous;
 472     return Tok;
 473   }
 474
 475   /// Returns the next token ignoring comments.
 476   const FormatToken *getNextNonComment() const {
 477     const FormatToken *Tok = Next;
 478     while (Tok && Tok->is(tok::comment))
 479       Tok = Tok->Next;
 480     return Tok;
 481   }
 482
 483   /// Returns \c true if this tokens starts a block-type list, i.e. a
 484   /// list that should be indented with a block indent.
 485   bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
 486     if (is(TT_TemplateString) && opensScope())
 487       return true;
 488     return is(TT_ArrayInitializerLSquare) ||
 489            is(TT_ProtoExtensionLSquare) ||
 490            (is(tok::l_brace) &&
 491             (BlockKind == BK_Block || is(TT_DictLiteral) ||
 492              (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
 493            (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
 494                               Style.Language == FormatStyle::LK_TextProto));
 495   }
 496
 497   /// Returns whether the token is the left square bracket of a C++
 498   /// structured binding declaration.
 499   bool isCppStructuredBinding(const FormatStyle &Style) const {
 500     if (!Style.isCpp() || isNot(tok::l_square))
 501       return false;
 502     const FormatToken *T = this;
 503     do {
 504       T = T->getPreviousNonComment();
 505     } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
 506                              tok::ampamp));
 507     return T && T->is(tok::kw_auto);
 508   }
 509
 510   /// Same as opensBlockOrBlockTypeList, but for the closing token.
 511   bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
 512     if (is(TT_TemplateString) && closesScope())
 513       return true;
 514     return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
 515   }
 516
 517   /// Return the actual namespace token, if this token starts a namespace
 518   /// block.
 519   const FormatToken *getNamespaceToken() const {
 520     const FormatToken *NamespaceTok = this;
 521     if (is(tok::comment))
 522       NamespaceTok = NamespaceTok->getNextNonComment();
 523     // Detect "(inline)? namespace" in the beginning of a line.
 524     if (NamespaceTok && NamespaceTok->is(tok::kw_inline))
 525       NamespaceTok = NamespaceTok->getNextNonComment();
 526     return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok
 527                                                                : nullptr;
 528   }
 529
 530 private:
 531   // Disallow copying.
 532   FormatToken(const FormatToken &) = delete;
 533   void operator=(const FormatToken &) = delete;
 534
 535   template <typename A, typename... Ts>
 536   bool startsSequenceInternal(A K1, Ts... Tokens) const {
 537     if (is(tok::comment) && Next)
 538       return Next->startsSequenceInternal(K1, Tokens...);
 539     return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
 540   }
 541
 542   template <typename A> bool startsSequenceInternal(A K1) const {
 543     if (is(tok::comment) && Next)
 544       return Next->startsSequenceInternal(K1);
 545     return is(K1);
 546   }
 547
 548   template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
 549     if (is(tok::comment) && Previous)
 550       return Previous->endsSequenceInternal(K1);
 551     return is(K1);
 552   }
 553
 554   template <typename A, typename... Ts>
 555   bool endsSequenceInternal(A K1, Ts... Tokens) const {
 556     if (is(tok::comment) && Previous)
 557       return Previous->endsSequenceInternal(K1, Tokens...);
 558     return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
 559   }
 560 };
 561
 562 class ContinuationIndenter;
 563 struct LineState;
 564
 565 class TokenRole {
 566 public:
 567   TokenRole(const FormatStyle &Style) : Style(Style) {}
 568   virtual ~TokenRole();
 569
 570   /// After the \c TokenAnnotator has finished annotating all the tokens,
 571   /// this function precomputes required information for formatting.
 572   virtual void precomputeFormattingInfos(const FormatToken *Token);
 573
 574   /// Apply the special formatting that the given role demands.
 575   ///
 576   /// Assumes that the token having this role is already formatted.
 577   ///
 578   /// Continues formatting from \p State leaving indentation to \p Indenter and
 579   /// returns the total penalty that this formatting incurs.
 580   virtual unsigned formatFromToken(LineState &State,
 581                                    ContinuationIndenter *Indenter,
 582                                    bool DryRun) {
 583     return 0;
 584   }
 585
 586   /// Same as \c formatFromToken, but assumes that the first token has
 587   /// already been set thereby deciding on the first line break.
 588   virtual unsigned formatAfterToken(LineState &State,
 589                                     ContinuationIndenter *Indenter,
 590                                     bool DryRun) {
 591     return 0;
 592   }
 593
 594   /// Notifies the \c Role that a comma was found.
 595   virtual void CommaFound(const FormatToken *Token) {}
 596
 597 protected:
 598   const FormatStyle &Style;
 599 };
 600
 601 class CommaSeparatedList : public TokenRole {
 602 public:
 603   CommaSeparatedList(const FormatStyle &Style)
 604       : TokenRole(Style), HasNestedBracedList(false) {}
 605
 606   void precomputeFormattingInfos(const FormatToken *Token) override;
 607
 608   unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
 609                             bool DryRun) override;
 610
 611   unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
 612                            bool DryRun) override;
 613
 614   /// Adds \p Token as the next comma to the \c CommaSeparated list.
 615   void CommaFound(const FormatToken *Token) override {
 616     Commas.push_back(Token);
 617   }
 618
 619 private:
 620   /// A struct that holds information on how to format a given list with
 621   /// a specific number of columns.
 622   struct ColumnFormat {
 623     /// The number of columns to use.
 624     unsigned Columns;
 625
 626     /// The total width in characters.
 627     unsigned TotalWidth;
 628
 629     /// The number of lines required for this format.
 630     unsigned LineCount;
 631
 632     /// The size of each column in characters.
 633     SmallVector<unsigned, 8> ColumnSizes;
 634   };
 635
 636   /// Calculate which \c ColumnFormat fits best into
 637   /// \p RemainingCharacters.
 638   const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
 639
 640   /// The ordered \c FormatTokens making up the commas of this list.
 641   SmallVector<const FormatToken *, 8> Commas;
 642
 643   /// The length of each of the list's items in characters including the
 644   /// trailing comma.
 645   SmallVector<unsigned, 8> ItemLengths;
 646
 647   /// Precomputed formats that can be used for this list.
 648   SmallVector<ColumnFormat, 4> Formats;
 649
 650   bool HasNestedBracedList;
 651 };
 652
 653 /// Encapsulates keywords that are context sensitive or for languages not
 654 /// properly supported by Clang's lexer.
 655 struct AdditionalKeywords {
 656   AdditionalKeywords(IdentifierTable &IdentTable) {
 657     kw_final = &IdentTable.get("final");
 658     kw_override = &IdentTable.get("override");
 659     kw_in = &IdentTable.get("in");
 660     kw_of = &IdentTable.get("of");
 661     kw_CF_ENUM = &IdentTable.get("CF_ENUM");
 662     kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
 663     kw_NS_ENUM = &IdentTable.get("NS_ENUM");
 664     kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
 665
 666     kw_as = &IdentTable.get("as");
 667     kw_async = &IdentTable.get("async");
 668     kw_await = &IdentTable.get("await");
 669     kw_declare = &IdentTable.get("declare");
 670     kw_finally = &IdentTable.get("finally");
 671     kw_from = &IdentTable.get("from");
 672     kw_function = &IdentTable.get("function");
 673     kw_get = &IdentTable.get("get");
 674     kw_import = &IdentTable.get("import");
 675     kw_is = &IdentTable.get("is");
 676     kw_let = &IdentTable.get("let");
 677     kw_module = &IdentTable.get("module");
 678     kw_readonly = &IdentTable.get("readonly");
 679     kw_set = &IdentTable.get("set");
 680     kw_type = &IdentTable.get("type");
 681     kw_typeof = &IdentTable.get("typeof");
 682     kw_var = &IdentTable.get("var");
 683     kw_yield = &IdentTable.get("yield");
 684
 685     kw_abstract = &IdentTable.get("abstract");
 686     kw_assert = &IdentTable.get("assert");
 687     kw_extends = &IdentTable.get("extends");
 688     kw_implements = &IdentTable.get("implements");
 689     kw_instanceof = &IdentTable.get("instanceof");
 690     kw_interface = &IdentTable.get("interface");
 691     kw_native = &IdentTable.get("native");
 692     kw_package = &IdentTable.get("package");
 693     kw_synchronized = &IdentTable.get("synchronized");
 694     kw_throws = &IdentTable.get("throws");
 695     kw___except = &IdentTable.get("__except");
 696     kw___has_include = &IdentTable.get("__has_include");
 697     kw___has_include_next = &IdentTable.get("__has_include_next");
 698
 699     kw_mark = &IdentTable.get("mark");
 700
 701     kw_extend = &IdentTable.get("extend");
 702     kw_option = &IdentTable.get("option");
 703     kw_optional = &IdentTable.get("optional");
 704     kw_repeated = &IdentTable.get("repeated");
 705     kw_required = &IdentTable.get("required");
 706     kw_returns = &IdentTable.get("returns");
 707
 708     kw_signals = &IdentTable.get("signals");
 709     kw_qsignals = &IdentTable.get("Q_SIGNALS");
 710     kw_slots = &IdentTable.get("slots");
 711     kw_qslots = &IdentTable.get("Q_SLOTS");
 712
 713     // Keep this at the end of the constructor to make sure everything here is
 714     // already initialized.
 715     JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
 716         {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
 717          kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
 718          kw_set, kw_type, kw_typeof, kw_var, kw_yield,
 719          // Keywords from the Java section.
 720          kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
 721   }
 722
 723   // Context sensitive keywords.
 724   IdentifierInfo *kw_final;
 725   IdentifierInfo *kw_override;
 726   IdentifierInfo *kw_in;
 727   IdentifierInfo *kw_of;
 728   IdentifierInfo *kw_CF_ENUM;
 729   IdentifierInfo *kw_CF_OPTIONS;
 730   IdentifierInfo *kw_NS_ENUM;
 731   IdentifierInfo *kw_NS_OPTIONS;
 732   IdentifierInfo *kw___except;
 733   IdentifierInfo *kw___has_include;
 734   IdentifierInfo *kw___has_include_next;
 735
 736   // JavaScript keywords.
 737   IdentifierInfo *kw_as;
 738   IdentifierInfo *kw_async;
 739   IdentifierInfo *kw_await;
 740   IdentifierInfo *kw_declare;
 741   IdentifierInfo *kw_finally;
 742   IdentifierInfo *kw_from;
 743   IdentifierInfo *kw_function;
 744   IdentifierInfo *kw_get;
 745   IdentifierInfo *kw_import;
 746   IdentifierInfo *kw_is;
 747   IdentifierInfo *kw_let;
 748   IdentifierInfo *kw_module;
 749   IdentifierInfo *kw_readonly;
 750   IdentifierInfo *kw_set;
 751   IdentifierInfo *kw_type;
 752   IdentifierInfo *kw_typeof;
 753   IdentifierInfo *kw_var;
 754   IdentifierInfo *kw_yield;
 755
 756   // Java keywords.
 757   IdentifierInfo *kw_abstract;
 758   IdentifierInfo *kw_assert;
 759   IdentifierInfo *kw_extends;
 760   IdentifierInfo *kw_implements;
 761   IdentifierInfo *kw_instanceof;
 762   IdentifierInfo *kw_interface;
 763   IdentifierInfo *kw_native;
 764   IdentifierInfo *kw_package;
 765   IdentifierInfo *kw_synchronized;
 766   IdentifierInfo *kw_throws;
 767
 768   // Pragma keywords.
 769   IdentifierInfo *kw_mark;
 770
 771   // Proto keywords.
 772   IdentifierInfo *kw_extend;
 773   IdentifierInfo *kw_option;
 774   IdentifierInfo *kw_optional;
 775   IdentifierInfo *kw_repeated;
 776   IdentifierInfo *kw_required;
 777   IdentifierInfo *kw_returns;
 778
 779   // QT keywords.
 780   IdentifierInfo *kw_signals;
 781   IdentifierInfo *kw_qsignals;
 782   IdentifierInfo *kw_slots;
 783   IdentifierInfo *kw_qslots;
 784
 785   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
 786   /// \c false if it is a keyword or a pseudo keyword.
 787   bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
 788     return Tok.is(tok::identifier) &&
 789            JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
 790                JsExtraKeywords.end();
 791   }
 792
 793 private:
 794   /// The JavaScript keywords beyond the C++ keyword set.
 795   std::unordered_set<IdentifierInfo *> JsExtraKeywords;
 796 };
 797
 798 } // namespace format
 799 } // namespace clang
 800
 801 #endif