1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements an indenter that manages the indentation of
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
33 class WhitespaceManager;
35 class ContinuationIndenter {
37 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38 /// column \p FirstIndent.
39 ContinuationIndenter(const FormatStyle &Style,
40 const AdditionalKeywords &Keywords,
41 const SourceManager &SourceMgr,
42 WhitespaceManager &Whitespaces,
43 encoding::Encoding Encoding,
44 bool BinPackInconclusiveFunctions);
46 /// \brief Get the initial state, i.e. the state after placing \p Line's
47 /// first token at \p FirstIndent.
48 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
51 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
53 /// \brief Returns \c true, if a line break after \p State is allowed.
54 bool canBreak(const LineState &State);
56 /// \brief Returns \c true, if a line break after \p State is mandatory.
57 bool mustBreak(const LineState &State);
59 /// \brief Appends the next token to \p State and updates information
60 /// necessary for indentation.
62 /// Puts the token on the current line if \p Newline is \c false and adds a
63 /// line break and necessary indentation otherwise.
65 /// If \p DryRun is \c false, also creates and stores the required
67 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
68 unsigned ExtraSpaces = 0);
70 /// \brief Get the column limit for this line. This is the style's column
71 /// limit, potentially reduced for preprocessor definitions.
72 unsigned getColumnLimit(const LineState &State) const;
75 /// \brief Mark the next token as consumed in \p State and modify its stacks
77 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
79 /// \brief Update 'State' according to the next token's fake left parentheses.
80 void moveStatePastFakeLParens(LineState &State, bool Newline);
81 /// \brief Update 'State' according to the next token's fake r_parens.
82 void moveStatePastFakeRParens(LineState &State);
84 /// \brief Update 'State' according to the next token being one of "(<{[".
85 void moveStatePastScopeOpener(LineState &State, bool Newline);
86 /// \brief Update 'State' according to the next token being one of ")>}]".
87 void moveStatePastScopeCloser(LineState &State);
88 /// \brief Update 'State' with the next token opening a nested block.
89 void moveStateToNewBlock(LineState &State);
91 /// \brief If the current token sticks out over the end of the line, break
94 /// \returns An extra penalty if a token was broken, otherwise 0.
96 /// The returned penalty will cover the cost of the additional line breaks and
97 /// column limit violation in all lines except for the last one. The penalty
98 /// for the column limit violation in the last line (and in single line
99 /// tokens) is handled in \c addNextStateToQueue.
100 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
103 /// \brief Appends the next token to \p State and updates information
104 /// necessary for indentation.
106 /// Puts the token on the current line.
108 /// If \p DryRun is \c false, also creates and stores the required
110 void addTokenOnCurrentLine(LineState &State, bool DryRun,
111 unsigned ExtraSpaces);
113 /// \brief Appends the next token to \p State and updates information
114 /// necessary for indentation.
116 /// Adds a line break and necessary indentation.
118 /// If \p DryRun is \c false, also creates and stores the required
120 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
122 /// \brief Calculate the new column for a line wrap before the next token.
123 unsigned getNewLineColumn(const LineState &State);
125 /// \brief Adds a multiline token to the \p State.
127 /// \returns Extra penalty for the first line of the literal: last line is
128 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
129 /// matter, as we don't change them.
130 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
132 /// \brief Returns \c true if the next token starts a multiline string
135 /// This includes implicitly concatenated strings, strings that will be broken
136 /// by clang-format and string literals with escaped newlines.
137 bool nextIsMultilineString(const LineState &State);
140 const AdditionalKeywords &Keywords;
141 const SourceManager &SourceMgr;
142 WhitespaceManager &Whitespaces;
143 encoding::Encoding Encoding;
144 bool BinPackInconclusiveFunctions;
145 llvm::Regex CommentPragmasRegex;
149 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
151 : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
152 BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
153 BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
154 NoLineBreakInOperand(false), LastOperatorWrapped(true),
155 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
156 AlignColons(true), ObjCSelectorNameFound(false),
157 HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
159 /// \brief The position to which a specific parenthesis level needs to be
163 /// \brief The position of the last space on each level.
165 /// Used e.g. to break like:
166 /// functionCall(Parameter, otherCall(
167 /// OtherParameter));
170 /// \brief If a block relative to this parenthesis level gets wrapped, indent
172 unsigned NestedBlockIndent;
174 /// \brief The position the first "<<" operator encountered on each level.
176 /// Used to align "<<" operators. 0 if no such operator has been encountered
178 unsigned FirstLessLess = 0;
180 /// \brief The column of a \c ? in a conditional expression;
181 unsigned QuestionColumn = 0;
183 /// \brief The position of the colon in an ObjC method declaration/call.
184 unsigned ColonPos = 0;
186 /// \brief The start of the most recent function in a builder-type call.
187 unsigned StartOfFunctionCall = 0;
189 /// \brief Contains the start of array subscript expressions, so that they
191 unsigned StartOfArraySubscripts = 0;
193 /// \brief If a nested name specifier was broken over multiple lines, this
194 /// contains the start column of the second line. Otherwise 0.
195 unsigned NestedNameSpecifierContinuation = 0;
197 /// \brief If a call expression was broken over multiple lines, this
198 /// contains the start column of the second line. Otherwise 0.
199 unsigned CallContinuation = 0;
201 /// \brief The column of the first variable name in a variable declaration.
203 /// Used to align further variables if necessary.
204 unsigned VariablePos = 0;
206 /// \brief Whether a newline needs to be inserted before the block's closing
209 /// We only want to insert a newline before the closing brace if there also
210 /// was a newline after the beginning left brace.
211 bool BreakBeforeClosingBrace : 1;
213 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
214 /// lines, in this context.
215 bool AvoidBinPacking : 1;
217 /// \brief Break after the next comma (or all the commas in this context if
218 /// \c AvoidBinPacking is \c true).
219 bool BreakBeforeParameter : 1;
221 /// \brief Line breaking in this context would break a formatting rule.
222 bool NoLineBreak : 1;
224 /// \brief Same as \c NoLineBreak, but is restricted until the end of the
225 /// operand (including the next ",").
226 bool NoLineBreakInOperand : 1;
228 /// \brief True if the last binary operator on this level was wrapped to the
230 bool LastOperatorWrapped : 1;
232 /// \brief \c true if this \c ParenState already contains a line-break.
234 /// The first line break in a certain \c ParenState causes extra penalty so
235 /// that clang-format prefers similar breaks, i.e. breaks in the same
237 bool ContainsLineBreak : 1;
239 /// \brief \c true if this \c ParenState contains multiple segments of a
240 /// builder-type call on one line.
241 bool ContainsUnwrappedBuilder : 1;
243 /// \brief \c true if the colons of the curren ObjC method expression should
246 /// Not considered for memoization as it will always have the same value at
248 bool AlignColons : 1;
250 /// \brief \c true if at least one selector name was found in the current
251 /// ObjC method expression.
253 /// Not considered for memoization as it will always have the same value at
255 bool ObjCSelectorNameFound : 1;
257 /// \brief \c true if there are multiple nested blocks inside these parens.
259 /// Not considered for memoization as it will always have the same value at
261 bool HasMultipleNestedBlocks : 1;
263 // \brief The start of a nested block (e.g. lambda introducer in C++ or
264 // "function" in JavaScript) is not wrapped to a new line.
265 bool NestedBlockInlined : 1;
267 bool operator<(const ParenState &Other) const {
268 if (Indent != Other.Indent)
269 return Indent < Other.Indent;
270 if (LastSpace != Other.LastSpace)
271 return LastSpace < Other.LastSpace;
272 if (NestedBlockIndent != Other.NestedBlockIndent)
273 return NestedBlockIndent < Other.NestedBlockIndent;
274 if (FirstLessLess != Other.FirstLessLess)
275 return FirstLessLess < Other.FirstLessLess;
276 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
277 return BreakBeforeClosingBrace;
278 if (QuestionColumn != Other.QuestionColumn)
279 return QuestionColumn < Other.QuestionColumn;
280 if (AvoidBinPacking != Other.AvoidBinPacking)
281 return AvoidBinPacking;
282 if (BreakBeforeParameter != Other.BreakBeforeParameter)
283 return BreakBeforeParameter;
284 if (NoLineBreak != Other.NoLineBreak)
286 if (LastOperatorWrapped != Other.LastOperatorWrapped)
287 return LastOperatorWrapped;
288 if (ColonPos != Other.ColonPos)
289 return ColonPos < Other.ColonPos;
290 if (StartOfFunctionCall != Other.StartOfFunctionCall)
291 return StartOfFunctionCall < Other.StartOfFunctionCall;
292 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
293 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
294 if (CallContinuation != Other.CallContinuation)
295 return CallContinuation < Other.CallContinuation;
296 if (VariablePos != Other.VariablePos)
297 return VariablePos < Other.VariablePos;
298 if (ContainsLineBreak != Other.ContainsLineBreak)
299 return ContainsLineBreak;
300 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
301 return ContainsUnwrappedBuilder;
302 if (NestedBlockInlined != Other.NestedBlockInlined)
303 return NestedBlockInlined;
308 /// \brief The current state when indenting a unwrapped line.
310 /// As the indenting tries different combinations this is copied by value.
312 /// \brief The number of used columns in the current line.
315 /// \brief The token that needs to be next formatted.
316 FormatToken *NextToken;
318 /// \brief \c true if this line contains a continued for-loop section.
319 bool LineContainsContinuedForLoopSection;
321 /// \brief The \c NestingLevel at the start of this line.
322 unsigned StartOfLineLevel;
324 /// \brief The lowest \c NestingLevel on the current line.
325 unsigned LowestLevelOnLine;
327 /// \brief The start column of the string literal, if we're in a string
328 /// literal sequence, 0 otherwise.
329 unsigned StartOfStringLiteral;
331 /// \brief A stack keeping track of properties applying to parenthesis
333 std::vector<ParenState> Stack;
335 /// \brief Ignore the stack of \c ParenStates for state comparison.
337 /// In long and deeply nested unwrapped lines, the current algorithm can
338 /// be insufficient for finding the best formatting with a reasonable amount
339 /// of time and memory. Setting this flag will effectively lead to the
340 /// algorithm not analyzing some combinations. However, these combinations
341 /// rarely contain the optimal solution: In short, accepting a higher
342 /// penalty early would need to lead to different values in the \c
343 /// ParenState stack (in an otherwise identical state) and these different
344 /// values would need to lead to a significant amount of avoided penalty
347 /// FIXME: Come up with a better algorithm instead.
348 bool IgnoreStackForComparison;
350 /// \brief The indent of the first token.
351 unsigned FirstIndent;
353 /// \brief The line that is being formatted.
355 /// Does not need to be considered for memoization because it doesn't change.
356 const AnnotatedLine *Line;
358 /// \brief Comparison operator to be able to used \c LineState in \c map.
359 bool operator<(const LineState &Other) const {
360 if (NextToken != Other.NextToken)
361 return NextToken < Other.NextToken;
362 if (Column != Other.Column)
363 return Column < Other.Column;
364 if (LineContainsContinuedForLoopSection !=
365 Other.LineContainsContinuedForLoopSection)
366 return LineContainsContinuedForLoopSection;
367 if (StartOfLineLevel != Other.StartOfLineLevel)
368 return StartOfLineLevel < Other.StartOfLineLevel;
369 if (LowestLevelOnLine != Other.LowestLevelOnLine)
370 return LowestLevelOnLine < Other.LowestLevelOnLine;
371 if (StartOfStringLiteral != Other.StartOfStringLiteral)
372 return StartOfStringLiteral < Other.StartOfStringLiteral;
373 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
375 return Stack < Other.Stack;
379 } // end namespace format
380 } // end namespace clang