contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h

   1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file implements a token annotator, i.e. creates
  12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
  17 #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
  18
  19 #include "UnwrappedLineParser.h"
  20 #include "clang/Basic/OperatorPrecedence.h"
  21 #include "clang/Format/Format.h"
  22 #include <string>
  23
  24 namespace clang {
  25 class Lexer;
  26 class SourceManager;
  27
  28 namespace format {
  29
  30 enum TokenType {
  31   TT_BinaryOperator,
  32   TT_BlockComment,
  33   TT_CastRParen,
  34   TT_ConditionalExpr,
  35   TT_CtorInitializerColon,
  36   TT_ImplicitStringLiteral,
  37   TT_InlineASMColon,
  38   TT_InheritanceColon,
  39   TT_LineComment,
  40   TT_ObjCArrayLiteral,
  41   TT_ObjCBlockLParen,
  42   TT_ObjCDecl,
  43   TT_ObjCForIn,
  44   TT_ObjCMethodExpr,
  45   TT_ObjCMethodSpecifier,
  46   TT_ObjCProperty,
  47   TT_ObjCSelectorName,
  48   TT_OverloadedOperatorLParen,
  49   TT_PointerOrReference,
  50   TT_PureVirtualSpecifier,
  51   TT_RangeBasedForLoopColon,
  52   TT_StartOfName,
  53   TT_TemplateCloser,
  54   TT_TemplateOpener,
  55   TT_TrailingUnaryOperator,
  56   TT_UnaryOperator,
  57   TT_Unknown
  58 };
  59
  60 enum LineType {
  61   LT_Invalid,
  62   LT_Other,
  63   LT_BuilderTypeCall,
  64   LT_PreprocessorDirective,
  65   LT_VirtualFunctionDecl,
  66   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
  67   LT_ObjCMethodDecl,
  68   LT_ObjCProperty // An @property line.
  69 };
  70
  71 class AnnotatedToken {
  72 public:
  73   explicit AnnotatedToken(const FormatToken &FormatTok)
  74       : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
  75         CanBreakBefore(false), MustBreakBefore(false),
  76         ClosesTemplateDeclaration(false), MatchingParen(NULL),
  77         ParameterCount(0), BindingStrength(0), SplitPenalty(0),
  78         LongestObjCSelectorName(0), Parent(NULL),
  79         FakeRParens(0), LastInChainOfCalls(false),
  80         PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {}
  81
  82   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
  83
  84   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
  85     return is(K1) || is(K2);
  86   }
  87
  88   bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
  89     return is(K1) || is(K2) || is(K3);
  90   }
  91
  92   bool isOneOf(
  93       tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
  94       tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
  95       tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
  96       tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
  97       tok::TokenKind K10 = tok::NUM_TOKENS,
  98       tok::TokenKind K11 = tok::NUM_TOKENS,
  99       tok::TokenKind K12 = tok::NUM_TOKENS) const {
 100     return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
 101            is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
 102   }
 103
 104   bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
 105
 106   bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
 107     return FormatTok.Tok.isObjCAtKeyword(Kind);
 108   }
 109
 110   bool isAccessSpecifier(bool ColonRequired = true) const {
 111     return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
 112            (!ColonRequired ||
 113             (!Children.empty() && Children[0].is(tok::colon)));
 114   }
 115
 116   bool isObjCAccessSpecifier() const {
 117     return is(tok::at) && !Children.empty() &&
 118            (Children[0].isObjCAtKeyword(tok::objc_public) ||
 119             Children[0].isObjCAtKeyword(tok::objc_protected) ||
 120             Children[0].isObjCAtKeyword(tok::objc_package) ||
 121             Children[0].isObjCAtKeyword(tok::objc_private));
 122   }
 123
 124   /// \brief Returns whether \p Tok is ([{ or a template opening <.
 125   bool opensScope() const;
 126   /// \brief Returns whether \p Tok is )]} or a template opening >.
 127   bool closesScope() const;
 128
 129   bool isUnaryOperator() const;
 130   bool isBinaryOperator() const;
 131   bool isTrailingComment() const;
 132
 133   FormatToken FormatTok;
 134
 135   TokenType Type;
 136
 137   unsigned SpacesRequiredBefore;
 138   bool CanBreakBefore;
 139   bool MustBreakBefore;
 140
 141   bool ClosesTemplateDeclaration;
 142
 143   AnnotatedToken *MatchingParen;
 144
 145   /// \brief Number of parameters, if this is "(", "[" or "<".
 146   ///
 147   /// This is initialized to 1 as we don't need to distinguish functions with
 148   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
 149   /// the number of commas.
 150   unsigned ParameterCount;
 151
 152   /// \brief The total length of the line up to and including this token.
 153   unsigned TotalLength;
 154
 155   // FIXME: Come up with a 'cleaner' concept.
 156   /// \brief The binding strength of a token. This is a combined value of
 157   /// operator precedence, parenthesis nesting, etc.
 158   unsigned BindingStrength;
 159
 160   /// \brief Penalty for inserting a line break before this token.
 161   unsigned SplitPenalty;
 162
 163   /// \brief If this is the first ObjC selector name in an ObjC method
 164   /// definition or call, this contains the length of the longest name.
 165   unsigned LongestObjCSelectorName;
 166
 167   std::vector<AnnotatedToken> Children;
 168   AnnotatedToken *Parent;
 169
 170   /// \brief Stores the number of required fake parentheses and the
 171   /// corresponding operator precedence.
 172   ///
 173   /// If multiple fake parentheses start at a token, this vector stores them in
 174   /// reverse order, i.e. inner fake parenthesis first.
 175   SmallVector<prec::Level, 4>  FakeLParens;
 176   /// \brief Insert this many fake ) after this token for correct indentation.
 177   unsigned FakeRParens;
 178
 179   /// \brief Is this the last "." or "->" in a builder-type call?
 180   bool LastInChainOfCalls;
 181
 182   /// \brief Is this token part of a \c DeclStmt defining multiple variables?
 183   ///
 184   /// Only set if \c Type == \c TT_StartOfName.
 185   bool PartOfMultiVariableDeclStmt;
 186
 187   /// \brief Set to \c true for "("-tokens if this is the last token other than
 188   /// ")" in the next higher parenthesis level.
 189   ///
 190   /// If this is \c true, no more formatting decisions have to be made on the
 191   /// next higher parenthesis level, enabling optimizations.
 192   ///
 193   /// Example:
 194   /// \code
 195   /// aaaaaa(aaaaaa());
 196   ///              ^  // Set to true for this parenthesis.
 197   /// \endcode
 198   bool NoMoreTokensOnLevel;
 199
 200   /// \brief Returns the previous token ignoring comments.
 201   AnnotatedToken *getPreviousNoneComment() const;
 202
 203   /// \brief Returns the next token ignoring comments.
 204   const AnnotatedToken *getNextNoneComment() const;
 205 };
 206
 207 class AnnotatedLine {
 208 public:
 209   AnnotatedLine(const UnwrappedLine &Line)
 210       : First(Line.Tokens.front()), Level(Line.Level),
 211         InPPDirective(Line.InPPDirective),
 212         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
 213         StartsDefinition(false) {
 214     assert(!Line.Tokens.empty());
 215     AnnotatedToken *Current = &First;
 216     for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
 217                                                 E = Line.Tokens.end();
 218          I != E; ++I) {
 219       Current->Children.push_back(AnnotatedToken(*I));
 220       Current->Children[0].Parent = Current;
 221       Current = &Current->Children[0];
 222     }
 223     Last = Current;
 224   }
 225   AnnotatedLine(const AnnotatedLine &Other)
 226       : First(Other.First), Type(Other.Type), Level(Other.Level),
 227         InPPDirective(Other.InPPDirective),
 228         MustBeDeclaration(Other.MustBeDeclaration),
 229         MightBeFunctionDecl(Other.MightBeFunctionDecl),
 230         StartsDefinition(Other.StartsDefinition) {
 231     Last = &First;
 232     while (!Last->Children.empty()) {
 233       Last->Children[0].Parent = Last;
 234       Last = &Last->Children[0];
 235     }
 236   }
 237
 238   AnnotatedToken First;
 239   AnnotatedToken *Last;
 240
 241   LineType Type;
 242   unsigned Level;
 243   bool InPPDirective;
 244   bool MustBeDeclaration;
 245   bool MightBeFunctionDecl;
 246   bool StartsDefinition;
 247 };
 248
 249 inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
 250   return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
 251 }
 252
 253 /// \brief Determines extra information about the tokens comprising an
 254 /// \c UnwrappedLine.
 255 class TokenAnnotator {
 256 public:
 257   TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
 258                  IdentifierInfo &Ident_in)
 259       : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
 260   }
 261
 262   void annotate(AnnotatedLine &Line);
 263   void calculateFormattingInformation(AnnotatedLine &Line);
 264
 265 private:
 266   /// \brief Calculate the penalty for splitting before \c Tok.
 267   unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
 268
 269   bool spaceRequiredBetween(const AnnotatedLine &Line,
 270                             const AnnotatedToken &Left,
 271                             const AnnotatedToken &Right);
 272
 273   bool spaceRequiredBefore(const AnnotatedLine &Line,
 274                            const AnnotatedToken &Tok);
 275
 276   bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
 277
 278   void printDebugInfo(const AnnotatedLine &Line);
 279
 280   const FormatStyle &Style;
 281   SourceManager &SourceMgr;
 282   Lexer &Lex;
 283
 284   // Contextual keywords:
 285   IdentifierInfo &Ident_in;
 286 };
 287
 288 } // end namespace format
 289 } // end namespace clang
 290
 291 #endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H