1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements an indenter that manages the indentation of
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
33 class WhitespaceManager;
35 class ContinuationIndenter {
37 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
38 /// column \p FirstIndent.
39 ContinuationIndenter(const FormatStyle &Style,
40 const AdditionalKeywords &Keywords,
41 SourceManager &SourceMgr, WhitespaceManager &Whitespaces,
42 encoding::Encoding Encoding,
43 bool BinPackInconclusiveFunctions);
45 /// \brief Get the initial state, i.e. the state after placing \p Line's
46 /// first token at \p FirstIndent.
47 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
50 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
52 /// \brief Returns \c true, if a line break after \p State is allowed.
53 bool canBreak(const LineState &State);
55 /// \brief Returns \c true, if a line break after \p State is mandatory.
56 bool mustBreak(const LineState &State);
58 /// \brief Appends the next token to \p State and updates information
59 /// necessary for indentation.
61 /// Puts the token on the current line if \p Newline is \c false and adds a
62 /// line break and necessary indentation otherwise.
64 /// If \p DryRun is \c false, also creates and stores the required
66 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
67 unsigned ExtraSpaces = 0);
69 /// \brief Get the column limit for this line. This is the style's column
70 /// limit, potentially reduced for preprocessor definitions.
71 unsigned getColumnLimit(const LineState &State) const;
74 /// \brief Mark the next token as consumed in \p State and modify its stacks
76 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
78 /// \brief Update 'State' according to the next token's fake left parentheses.
79 void moveStatePastFakeLParens(LineState &State, bool Newline);
80 /// \brief Update 'State' according to the next token's fake r_parens.
81 void moveStatePastFakeRParens(LineState &State);
83 /// \brief Update 'State' according to the next token being one of "(<{[".
84 void moveStatePastScopeOpener(LineState &State, bool Newline);
85 /// \brief Update 'State' according to the next token being one of ")>}]".
86 void moveStatePastScopeCloser(LineState &State);
87 /// \brief Update 'State' with the next token opening a nested block.
88 void moveStateToNewBlock(LineState &State);
90 /// \brief If the current token sticks out over the end of the line, break
93 /// \returns An extra penalty if a token was broken, otherwise 0.
95 /// The returned penalty will cover the cost of the additional line breaks and
96 /// column limit violation in all lines except for the last one. The penalty
97 /// for the column limit violation in the last line (and in single line
98 /// tokens) is handled in \c addNextStateToQueue.
99 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
102 /// \brief Appends the next token to \p State and updates information
103 /// necessary for indentation.
105 /// Puts the token on the current line.
107 /// If \p DryRun is \c false, also creates and stores the required
109 void addTokenOnCurrentLine(LineState &State, bool DryRun,
110 unsigned ExtraSpaces);
112 /// \brief Appends the next token to \p State and updates information
113 /// necessary for indentation.
115 /// Adds a line break and necessary indentation.
117 /// If \p DryRun is \c false, also creates and stores the required
119 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
121 /// \brief Calculate the new column for a line wrap before the next token.
122 unsigned getNewLineColumn(const LineState &State);
124 /// \brief Adds a multiline token to the \p State.
126 /// \returns Extra penalty for the first line of the literal: last line is
127 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
128 /// matter, as we don't change them.
129 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
131 /// \brief Returns \c true if the next token starts a multiline string
134 /// This includes implicitly concatenated strings, strings that will be broken
135 /// by clang-format and string literals with escaped newlines.
136 bool nextIsMultilineString(const LineState &State);
139 const AdditionalKeywords &Keywords;
140 SourceManager &SourceMgr;
141 WhitespaceManager &Whitespaces;
142 encoding::Encoding Encoding;
143 bool BinPackInconclusiveFunctions;
144 llvm::Regex CommentPragmasRegex;
148 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
149 bool AvoidBinPacking, bool NoLineBreak)
150 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
151 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
152 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
153 NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
154 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
155 AlignColons(true), ObjCSelectorNameFound(false),
156 HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
158 /// \brief The position to which a specific parenthesis level needs to be
162 /// \brief The number of indentation levels of the block.
163 unsigned IndentLevel;
165 /// \brief The position of the last space on each level.
167 /// Used e.g. to break like:
168 /// functionCall(Parameter, otherCall(
169 /// OtherParameter));
172 /// \brief If a block relative to this parenthesis level gets wrapped, indent
174 unsigned NestedBlockIndent;
176 /// \brief The position the first "<<" operator encountered on each level.
178 /// Used to align "<<" operators. 0 if no such operator has been encountered
180 unsigned FirstLessLess = 0;
182 /// \brief The column of a \c ? in a conditional expression;
183 unsigned QuestionColumn = 0;
185 /// \brief The position of the colon in an ObjC method declaration/call.
186 unsigned ColonPos = 0;
188 /// \brief The start of the most recent function in a builder-type call.
189 unsigned StartOfFunctionCall = 0;
191 /// \brief Contains the start of array subscript expressions, so that they
193 unsigned StartOfArraySubscripts = 0;
195 /// \brief If a nested name specifier was broken over multiple lines, this
196 /// contains the start column of the second line. Otherwise 0.
197 unsigned NestedNameSpecifierContinuation = 0;
199 /// \brief If a call expression was broken over multiple lines, this
200 /// contains the start column of the second line. Otherwise 0.
201 unsigned CallContinuation = 0;
203 /// \brief The column of the first variable name in a variable declaration.
205 /// Used to align further variables if necessary.
206 unsigned VariablePos = 0;
208 /// \brief Whether a newline needs to be inserted before the block's closing
211 /// We only want to insert a newline before the closing brace if there also
212 /// was a newline after the beginning left brace.
213 bool BreakBeforeClosingBrace : 1;
215 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
216 /// lines, in this context.
217 bool AvoidBinPacking : 1;
219 /// \brief Break after the next comma (or all the commas in this context if
220 /// \c AvoidBinPacking is \c true).
221 bool BreakBeforeParameter : 1;
223 /// \brief Line breaking in this context would break a formatting rule.
224 bool NoLineBreak : 1;
226 /// \brief True if the last binary operator on this level was wrapped to the
228 bool LastOperatorWrapped : 1;
230 /// \brief \c true if this \c ParenState already contains a line-break.
232 /// The first line break in a certain \c ParenState causes extra penalty so
233 /// that clang-format prefers similar breaks, i.e. breaks in the same
235 bool ContainsLineBreak : 1;
237 /// \brief \c true if this \c ParenState contains multiple segments of a
238 /// builder-type call on one line.
239 bool ContainsUnwrappedBuilder : 1;
241 /// \brief \c true if the colons of the curren ObjC method expression should
244 /// Not considered for memoization as it will always have the same value at
246 bool AlignColons : 1;
248 /// \brief \c true if at least one selector name was found in the current
249 /// ObjC method expression.
251 /// Not considered for memoization as it will always have the same value at
253 bool ObjCSelectorNameFound : 1;
255 /// \brief \c true if there are multiple nested blocks inside these parens.
257 /// Not considered for memoization as it will always have the same value at
259 bool HasMultipleNestedBlocks : 1;
261 // \brief The start of a nested block (e.g. lambda introducer in C++ or
262 // "function" in JavaScript) is not wrapped to a new line.
263 bool NestedBlockInlined : 1;
265 bool operator<(const ParenState &Other) const {
266 if (Indent != Other.Indent)
267 return Indent < Other.Indent;
268 if (LastSpace != Other.LastSpace)
269 return LastSpace < Other.LastSpace;
270 if (NestedBlockIndent != Other.NestedBlockIndent)
271 return NestedBlockIndent < Other.NestedBlockIndent;
272 if (FirstLessLess != Other.FirstLessLess)
273 return FirstLessLess < Other.FirstLessLess;
274 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
275 return BreakBeforeClosingBrace;
276 if (QuestionColumn != Other.QuestionColumn)
277 return QuestionColumn < Other.QuestionColumn;
278 if (AvoidBinPacking != Other.AvoidBinPacking)
279 return AvoidBinPacking;
280 if (BreakBeforeParameter != Other.BreakBeforeParameter)
281 return BreakBeforeParameter;
282 if (NoLineBreak != Other.NoLineBreak)
284 if (LastOperatorWrapped != Other.LastOperatorWrapped)
285 return LastOperatorWrapped;
286 if (ColonPos != Other.ColonPos)
287 return ColonPos < Other.ColonPos;
288 if (StartOfFunctionCall != Other.StartOfFunctionCall)
289 return StartOfFunctionCall < Other.StartOfFunctionCall;
290 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
291 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
292 if (CallContinuation != Other.CallContinuation)
293 return CallContinuation < Other.CallContinuation;
294 if (VariablePos != Other.VariablePos)
295 return VariablePos < Other.VariablePos;
296 if (ContainsLineBreak != Other.ContainsLineBreak)
297 return ContainsLineBreak;
298 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
299 return ContainsUnwrappedBuilder;
300 if (NestedBlockInlined != Other.NestedBlockInlined)
301 return NestedBlockInlined;
306 /// \brief The current state when indenting a unwrapped line.
308 /// As the indenting tries different combinations this is copied by value.
310 /// \brief The number of used columns in the current line.
313 /// \brief The token that needs to be next formatted.
314 FormatToken *NextToken;
316 /// \brief \c true if this line contains a continued for-loop section.
317 bool LineContainsContinuedForLoopSection;
319 /// \brief The \c NestingLevel at the start of this line.
320 unsigned StartOfLineLevel;
322 /// \brief The lowest \c NestingLevel on the current line.
323 unsigned LowestLevelOnLine;
325 /// \brief The start column of the string literal, if we're in a string
326 /// literal sequence, 0 otherwise.
327 unsigned StartOfStringLiteral;
329 /// \brief A stack keeping track of properties applying to parenthesis
331 std::vector<ParenState> Stack;
333 /// \brief Ignore the stack of \c ParenStates for state comparison.
335 /// In long and deeply nested unwrapped lines, the current algorithm can
336 /// be insufficient for finding the best formatting with a reasonable amount
337 /// of time and memory. Setting this flag will effectively lead to the
338 /// algorithm not analyzing some combinations. However, these combinations
339 /// rarely contain the optimal solution: In short, accepting a higher
340 /// penalty early would need to lead to different values in the \c
341 /// ParenState stack (in an otherwise identical state) and these different
342 /// values would need to lead to a significant amount of avoided penalty
345 /// FIXME: Come up with a better algorithm instead.
346 bool IgnoreStackForComparison;
348 /// \brief The indent of the first token.
349 unsigned FirstIndent;
351 /// \brief The line that is being formatted.
353 /// Does not need to be considered for memoization because it doesn't change.
354 const AnnotatedLine *Line;
356 /// \brief Comparison operator to be able to used \c LineState in \c map.
357 bool operator<(const LineState &Other) const {
358 if (NextToken != Other.NextToken)
359 return NextToken < Other.NextToken;
360 if (Column != Other.Column)
361 return Column < Other.Column;
362 if (LineContainsContinuedForLoopSection !=
363 Other.LineContainsContinuedForLoopSection)
364 return LineContainsContinuedForLoopSection;
365 if (StartOfLineLevel != Other.StartOfLineLevel)
366 return StartOfLineLevel < Other.StartOfLineLevel;
367 if (LowestLevelOnLine != Other.LowestLevelOnLine)
368 return LowestLevelOnLine < Other.LowestLevelOnLine;
369 if (StartOfStringLiteral != Other.StartOfStringLiteral)
370 return StartOfStringLiteral < Other.StartOfStringLiteral;
371 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
373 return Stack < Other.Stack;
377 } // end namespace format
378 } // end namespace clang