1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements an indenter that manages the indentation of
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
33 class WhitespaceManager;
35 class ContinuationIndenter {
37 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38 /// column \p FirstIndent.
39 ContinuationIndenter(const FormatStyle &Style,
40 const AdditionalKeywords &Keywords,
41 const SourceManager &SourceMgr,
42 WhitespaceManager &Whitespaces,
43 encoding::Encoding Encoding,
44 bool BinPackInconclusiveFunctions);
46 /// \brief Get the initial state, i.e. the state after placing \p Line's
47 /// first token at \p FirstIndent.
48 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
51 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
53 /// \brief Returns \c true, if a line break after \p State is allowed.
54 bool canBreak(const LineState &State);
56 /// \brief Returns \c true, if a line break after \p State is mandatory.
57 bool mustBreak(const LineState &State);
59 /// \brief Appends the next token to \p State and updates information
60 /// necessary for indentation.
62 /// Puts the token on the current line if \p Newline is \c false and adds a
63 /// line break and necessary indentation otherwise.
65 /// If \p DryRun is \c false, also creates and stores the required
67 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
68 unsigned ExtraSpaces = 0);
70 /// \brief Get the column limit for this line. This is the style's column
71 /// limit, potentially reduced for preprocessor definitions.
72 unsigned getColumnLimit(const LineState &State) const;
75 /// \brief Mark the next token as consumed in \p State and modify its stacks
77 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
79 /// \brief Update 'State' according to the next token's fake left parentheses.
80 void moveStatePastFakeLParens(LineState &State, bool Newline);
81 /// \brief Update 'State' according to the next token's fake r_parens.
82 void moveStatePastFakeRParens(LineState &State);
84 /// \brief Update 'State' according to the next token being one of "(<{[".
85 void moveStatePastScopeOpener(LineState &State, bool Newline);
86 /// \brief Update 'State' according to the next token being one of ")>}]".
87 void moveStatePastScopeCloser(LineState &State);
88 /// \brief Update 'State' with the next token opening a nested block.
89 void moveStateToNewBlock(LineState &State);
91 /// \brief If the current token sticks out over the end of the line, break
94 /// \returns An extra penalty if a token was broken, otherwise 0.
96 /// The returned penalty will cover the cost of the additional line breaks and
97 /// column limit violation in all lines except for the last one. The penalty
98 /// for the column limit violation in the last line (and in single line
99 /// tokens) is handled in \c addNextStateToQueue.
100 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
103 /// \brief Appends the next token to \p State and updates information
104 /// necessary for indentation.
106 /// Puts the token on the current line.
108 /// If \p DryRun is \c false, also creates and stores the required
110 void addTokenOnCurrentLine(LineState &State, bool DryRun,
111 unsigned ExtraSpaces);
113 /// \brief Appends the next token to \p State and updates information
114 /// necessary for indentation.
116 /// Adds a line break and necessary indentation.
118 /// If \p DryRun is \c false, also creates and stores the required
120 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
122 /// \brief Calculate the new column for a line wrap before the next token.
123 unsigned getNewLineColumn(const LineState &State);
125 /// \brief Adds a multiline token to the \p State.
127 /// \returns Extra penalty for the first line of the literal: last line is
128 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
129 /// matter, as we don't change them.
130 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
132 /// \brief Returns \c true if the next token starts a multiline string
135 /// This includes implicitly concatenated strings, strings that will be broken
136 /// by clang-format and string literals with escaped newlines.
137 bool nextIsMultilineString(const LineState &State);
140 const AdditionalKeywords &Keywords;
141 const SourceManager &SourceMgr;
142 WhitespaceManager &Whitespaces;
143 encoding::Encoding Encoding;
144 bool BinPackInconclusiveFunctions;
145 llvm::Regex CommentPragmasRegex;
149 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
150 bool AvoidBinPacking, bool NoLineBreak)
151 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
152 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
153 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
154 NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
155 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
156 AlignColons(true), ObjCSelectorNameFound(false),
157 HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
159 /// \brief The position to which a specific parenthesis level needs to be
163 /// \brief The number of indentation levels of the block.
164 unsigned IndentLevel;
166 /// \brief The position of the last space on each level.
168 /// Used e.g. to break like:
169 /// functionCall(Parameter, otherCall(
170 /// OtherParameter));
173 /// \brief If a block relative to this parenthesis level gets wrapped, indent
175 unsigned NestedBlockIndent;
177 /// \brief The position the first "<<" operator encountered on each level.
179 /// Used to align "<<" operators. 0 if no such operator has been encountered
181 unsigned FirstLessLess = 0;
183 /// \brief The column of a \c ? in a conditional expression;
184 unsigned QuestionColumn = 0;
186 /// \brief The position of the colon in an ObjC method declaration/call.
187 unsigned ColonPos = 0;
189 /// \brief The start of the most recent function in a builder-type call.
190 unsigned StartOfFunctionCall = 0;
192 /// \brief Contains the start of array subscript expressions, so that they
194 unsigned StartOfArraySubscripts = 0;
196 /// \brief If a nested name specifier was broken over multiple lines, this
197 /// contains the start column of the second line. Otherwise 0.
198 unsigned NestedNameSpecifierContinuation = 0;
200 /// \brief If a call expression was broken over multiple lines, this
201 /// contains the start column of the second line. Otherwise 0.
202 unsigned CallContinuation = 0;
204 /// \brief The column of the first variable name in a variable declaration.
206 /// Used to align further variables if necessary.
207 unsigned VariablePos = 0;
209 /// \brief Whether a newline needs to be inserted before the block's closing
212 /// We only want to insert a newline before the closing brace if there also
213 /// was a newline after the beginning left brace.
214 bool BreakBeforeClosingBrace : 1;
216 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
217 /// lines, in this context.
218 bool AvoidBinPacking : 1;
220 /// \brief Break after the next comma (or all the commas in this context if
221 /// \c AvoidBinPacking is \c true).
222 bool BreakBeforeParameter : 1;
224 /// \brief Line breaking in this context would break a formatting rule.
225 bool NoLineBreak : 1;
227 /// \brief True if the last binary operator on this level was wrapped to the
229 bool LastOperatorWrapped : 1;
231 /// \brief \c true if this \c ParenState already contains a line-break.
233 /// The first line break in a certain \c ParenState causes extra penalty so
234 /// that clang-format prefers similar breaks, i.e. breaks in the same
236 bool ContainsLineBreak : 1;
238 /// \brief \c true if this \c ParenState contains multiple segments of a
239 /// builder-type call on one line.
240 bool ContainsUnwrappedBuilder : 1;
242 /// \brief \c true if the colons of the curren ObjC method expression should
245 /// Not considered for memoization as it will always have the same value at
247 bool AlignColons : 1;
249 /// \brief \c true if at least one selector name was found in the current
250 /// ObjC method expression.
252 /// Not considered for memoization as it will always have the same value at
254 bool ObjCSelectorNameFound : 1;
256 /// \brief \c true if there are multiple nested blocks inside these parens.
258 /// Not considered for memoization as it will always have the same value at
260 bool HasMultipleNestedBlocks : 1;
262 // \brief The start of a nested block (e.g. lambda introducer in C++ or
263 // "function" in JavaScript) is not wrapped to a new line.
264 bool NestedBlockInlined : 1;
266 bool operator<(const ParenState &Other) const {
267 if (Indent != Other.Indent)
268 return Indent < Other.Indent;
269 if (LastSpace != Other.LastSpace)
270 return LastSpace < Other.LastSpace;
271 if (NestedBlockIndent != Other.NestedBlockIndent)
272 return NestedBlockIndent < Other.NestedBlockIndent;
273 if (FirstLessLess != Other.FirstLessLess)
274 return FirstLessLess < Other.FirstLessLess;
275 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
276 return BreakBeforeClosingBrace;
277 if (QuestionColumn != Other.QuestionColumn)
278 return QuestionColumn < Other.QuestionColumn;
279 if (AvoidBinPacking != Other.AvoidBinPacking)
280 return AvoidBinPacking;
281 if (BreakBeforeParameter != Other.BreakBeforeParameter)
282 return BreakBeforeParameter;
283 if (NoLineBreak != Other.NoLineBreak)
285 if (LastOperatorWrapped != Other.LastOperatorWrapped)
286 return LastOperatorWrapped;
287 if (ColonPos != Other.ColonPos)
288 return ColonPos < Other.ColonPos;
289 if (StartOfFunctionCall != Other.StartOfFunctionCall)
290 return StartOfFunctionCall < Other.StartOfFunctionCall;
291 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
292 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
293 if (CallContinuation != Other.CallContinuation)
294 return CallContinuation < Other.CallContinuation;
295 if (VariablePos != Other.VariablePos)
296 return VariablePos < Other.VariablePos;
297 if (ContainsLineBreak != Other.ContainsLineBreak)
298 return ContainsLineBreak;
299 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
300 return ContainsUnwrappedBuilder;
301 if (NestedBlockInlined != Other.NestedBlockInlined)
302 return NestedBlockInlined;
307 /// \brief The current state when indenting a unwrapped line.
309 /// As the indenting tries different combinations this is copied by value.
311 /// \brief The number of used columns in the current line.
314 /// \brief The token that needs to be next formatted.
315 FormatToken *NextToken;
317 /// \brief \c true if this line contains a continued for-loop section.
318 bool LineContainsContinuedForLoopSection;
320 /// \brief The \c NestingLevel at the start of this line.
321 unsigned StartOfLineLevel;
323 /// \brief The lowest \c NestingLevel on the current line.
324 unsigned LowestLevelOnLine;
326 /// \brief The start column of the string literal, if we're in a string
327 /// literal sequence, 0 otherwise.
328 unsigned StartOfStringLiteral;
330 /// \brief A stack keeping track of properties applying to parenthesis
332 std::vector<ParenState> Stack;
334 /// \brief Ignore the stack of \c ParenStates for state comparison.
336 /// In long and deeply nested unwrapped lines, the current algorithm can
337 /// be insufficient for finding the best formatting with a reasonable amount
338 /// of time and memory. Setting this flag will effectively lead to the
339 /// algorithm not analyzing some combinations. However, these combinations
340 /// rarely contain the optimal solution: In short, accepting a higher
341 /// penalty early would need to lead to different values in the \c
342 /// ParenState stack (in an otherwise identical state) and these different
343 /// values would need to lead to a significant amount of avoided penalty
346 /// FIXME: Come up with a better algorithm instead.
347 bool IgnoreStackForComparison;
349 /// \brief The indent of the first token.
350 unsigned FirstIndent;
352 /// \brief The line that is being formatted.
354 /// Does not need to be considered for memoization because it doesn't change.
355 const AnnotatedLine *Line;
357 /// \brief Comparison operator to be able to used \c LineState in \c map.
358 bool operator<(const LineState &Other) const {
359 if (NextToken != Other.NextToken)
360 return NextToken < Other.NextToken;
361 if (Column != Other.Column)
362 return Column < Other.Column;
363 if (LineContainsContinuedForLoopSection !=
364 Other.LineContainsContinuedForLoopSection)
365 return LineContainsContinuedForLoopSection;
366 if (StartOfLineLevel != Other.StartOfLineLevel)
367 return StartOfLineLevel < Other.StartOfLineLevel;
368 if (LowestLevelOnLine != Other.LowestLevelOnLine)
369 return LowestLevelOnLine < Other.LowestLevelOnLine;
370 if (StartOfStringLiteral != Other.StartOfStringLiteral)
371 return StartOfStringLiteral < Other.StartOfStringLiteral;
372 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
374 return Stack < Other.Stack;
378 } // end namespace format
379 } // end namespace clang