1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements a token annotator, i.e. creates
12 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
17 #define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
19 #include "UnwrappedLineParser.h"
20 #include "clang/Basic/OperatorPrecedence.h"
21 #include "clang/Format/Format.h"
35 TT_CtorInitializerColon,
36 TT_ImplicitStringLiteral,
45 TT_ObjCMethodSpecifier,
48 TT_OverloadedOperatorLParen,
49 TT_PointerOrReference,
50 TT_PureVirtualSpecifier,
51 TT_RangeBasedForLoopColon,
55 TT_TrailingUnaryOperator,
64 LT_PreprocessorDirective,
65 LT_VirtualFunctionDecl,
66 LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
68 LT_ObjCProperty // An @property line.
71 class AnnotatedToken {
73 explicit AnnotatedToken(const FormatToken &FormatTok)
74 : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
75 CanBreakBefore(false), MustBreakBefore(false),
76 ClosesTemplateDeclaration(false), MatchingParen(NULL),
77 ParameterCount(0), BindingStrength(0), SplitPenalty(0),
78 LongestObjCSelectorName(0), Parent(NULL),
79 FakeRParens(0), LastInChainOfCalls(false),
80 PartOfMultiVariableDeclStmt(false), NoMoreTokensOnLevel(false) {}
82 bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
84 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
85 return is(K1) || is(K2);
88 bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
89 return is(K1) || is(K2) || is(K3);
93 tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
94 tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
95 tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
96 tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
97 tok::TokenKind K10 = tok::NUM_TOKENS,
98 tok::TokenKind K11 = tok::NUM_TOKENS,
99 tok::TokenKind K12 = tok::NUM_TOKENS) const {
100 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
101 is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
104 bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
106 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
107 return FormatTok.Tok.isObjCAtKeyword(Kind);
110 bool isAccessSpecifier(bool ColonRequired = true) const {
111 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
113 (!Children.empty() && Children[0].is(tok::colon)));
116 bool isObjCAccessSpecifier() const {
117 return is(tok::at) && !Children.empty() &&
118 (Children[0].isObjCAtKeyword(tok::objc_public) ||
119 Children[0].isObjCAtKeyword(tok::objc_protected) ||
120 Children[0].isObjCAtKeyword(tok::objc_package) ||
121 Children[0].isObjCAtKeyword(tok::objc_private));
124 /// \brief Returns whether \p Tok is ([{ or a template opening <.
125 bool opensScope() const;
126 /// \brief Returns whether \p Tok is )]} or a template opening >.
127 bool closesScope() const;
129 bool isUnaryOperator() const;
130 bool isBinaryOperator() const;
131 bool isTrailingComment() const;
133 FormatToken FormatTok;
137 unsigned SpacesRequiredBefore;
139 bool MustBreakBefore;
141 bool ClosesTemplateDeclaration;
143 AnnotatedToken *MatchingParen;
145 /// \brief Number of parameters, if this is "(", "[" or "<".
147 /// This is initialized to 1 as we don't need to distinguish functions with
148 /// 0 parameters from functions with 1 parameter. Thus, we can simply count
149 /// the number of commas.
150 unsigned ParameterCount;
152 /// \brief The total length of the line up to and including this token.
153 unsigned TotalLength;
155 // FIXME: Come up with a 'cleaner' concept.
156 /// \brief The binding strength of a token. This is a combined value of
157 /// operator precedence, parenthesis nesting, etc.
158 unsigned BindingStrength;
160 /// \brief Penalty for inserting a line break before this token.
161 unsigned SplitPenalty;
163 /// \brief If this is the first ObjC selector name in an ObjC method
164 /// definition or call, this contains the length of the longest name.
165 unsigned LongestObjCSelectorName;
167 std::vector<AnnotatedToken> Children;
168 AnnotatedToken *Parent;
170 /// \brief Stores the number of required fake parentheses and the
171 /// corresponding operator precedence.
173 /// If multiple fake parentheses start at a token, this vector stores them in
174 /// reverse order, i.e. inner fake parenthesis first.
175 SmallVector<prec::Level, 4> FakeLParens;
176 /// \brief Insert this many fake ) after this token for correct indentation.
177 unsigned FakeRParens;
179 /// \brief Is this the last "." or "->" in a builder-type call?
180 bool LastInChainOfCalls;
182 /// \brief Is this token part of a \c DeclStmt defining multiple variables?
184 /// Only set if \c Type == \c TT_StartOfName.
185 bool PartOfMultiVariableDeclStmt;
187 /// \brief Set to \c true for "("-tokens if this is the last token other than
188 /// ")" in the next higher parenthesis level.
190 /// If this is \c true, no more formatting decisions have to be made on the
191 /// next higher parenthesis level, enabling optimizations.
195 /// aaaaaa(aaaaaa());
196 /// ^ // Set to true for this parenthesis.
198 bool NoMoreTokensOnLevel;
200 /// \brief Returns the previous token ignoring comments.
201 AnnotatedToken *getPreviousNoneComment() const;
203 /// \brief Returns the next token ignoring comments.
204 const AnnotatedToken *getNextNoneComment() const;
207 class AnnotatedLine {
209 AnnotatedLine(const UnwrappedLine &Line)
210 : First(Line.Tokens.front()), Level(Line.Level),
211 InPPDirective(Line.InPPDirective),
212 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
213 StartsDefinition(false) {
214 assert(!Line.Tokens.empty());
215 AnnotatedToken *Current = &First;
216 for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
217 E = Line.Tokens.end();
219 Current->Children.push_back(AnnotatedToken(*I));
220 Current->Children[0].Parent = Current;
221 Current = &Current->Children[0];
225 AnnotatedLine(const AnnotatedLine &Other)
226 : First(Other.First), Type(Other.Type), Level(Other.Level),
227 InPPDirective(Other.InPPDirective),
228 MustBeDeclaration(Other.MustBeDeclaration),
229 MightBeFunctionDecl(Other.MightBeFunctionDecl),
230 StartsDefinition(Other.StartsDefinition) {
232 while (!Last->Children.empty()) {
233 Last->Children[0].Parent = Last;
234 Last = &Last->Children[0];
238 AnnotatedToken First;
239 AnnotatedToken *Last;
244 bool MustBeDeclaration;
245 bool MightBeFunctionDecl;
246 bool StartsDefinition;
249 inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
250 return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
253 /// \brief Determines extra information about the tokens comprising an
254 /// \c UnwrappedLine.
255 class TokenAnnotator {
257 TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
258 IdentifierInfo &Ident_in)
259 : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
262 void annotate(AnnotatedLine &Line);
263 void calculateFormattingInformation(AnnotatedLine &Line);
266 /// \brief Calculate the penalty for splitting before \c Tok.
267 unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
269 bool spaceRequiredBetween(const AnnotatedLine &Line,
270 const AnnotatedToken &Left,
271 const AnnotatedToken &Right);
273 bool spaceRequiredBefore(const AnnotatedLine &Line,
274 const AnnotatedToken &Tok);
276 bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
278 void printDebugInfo(const AnnotatedLine &Line);
280 const FormatStyle &Style;
281 SourceManager &SourceMgr;
284 // Contextual keywords:
285 IdentifierInfo &Ident_in;
288 } // end namespace format
289 } // end namespace clang
291 #endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H