1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements an indenter that manages the indentation of
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
19 #include "FormatToken.h"
20 #include "clang/Format/Format.h"
21 #include "llvm/Support/Regex.h"
35 struct RawStringFormatStyleManager;
36 class WhitespaceManager;
38 struct RawStringFormatStyleManager {
39 llvm::StringMap<FormatStyle> DelimiterStyle;
40 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
42 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
44 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
46 llvm::Optional<FormatStyle>
47 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
50 class ContinuationIndenter {
52 /// Constructs a \c ContinuationIndenter to format \p Line starting in
53 /// column \p FirstIndent.
54 ContinuationIndenter(const FormatStyle &Style,
55 const AdditionalKeywords &Keywords,
56 const SourceManager &SourceMgr,
57 WhitespaceManager &Whitespaces,
58 encoding::Encoding Encoding,
59 bool BinPackInconclusiveFunctions);
61 /// Get the initial state, i.e. the state after placing \p Line's
62 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
63 /// the case of formatting inside raw string literals, \p FirstStartColumn is
64 /// the column at which the state of the parent formatter is.
65 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
66 const AnnotatedLine *Line, bool DryRun);
68 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
70 /// Returns \c true, if a line break after \p State is allowed.
71 bool canBreak(const LineState &State);
73 /// Returns \c true, if a line break after \p State is mandatory.
74 bool mustBreak(const LineState &State);
76 /// Appends the next token to \p State and updates information
77 /// necessary for indentation.
79 /// Puts the token on the current line if \p Newline is \c false and adds a
80 /// line break and necessary indentation otherwise.
82 /// If \p DryRun is \c false, also creates and stores the required
84 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
85 unsigned ExtraSpaces = 0);
87 /// Get the column limit for this line. This is the style's column
88 /// limit, potentially reduced for preprocessor definitions.
89 unsigned getColumnLimit(const LineState &State) const;
92 /// Mark the next token as consumed in \p State and modify its stacks
94 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
96 /// Update 'State' according to the next token's fake left parentheses.
97 void moveStatePastFakeLParens(LineState &State, bool Newline);
98 /// Update 'State' according to the next token's fake r_parens.
99 void moveStatePastFakeRParens(LineState &State);
101 /// Update 'State' according to the next token being one of "(<{[".
102 void moveStatePastScopeOpener(LineState &State, bool Newline);
103 /// Update 'State' according to the next token being one of ")>}]".
104 void moveStatePastScopeCloser(LineState &State);
105 /// Update 'State' with the next token opening a nested block.
106 void moveStateToNewBlock(LineState &State);
108 /// Reformats a raw string literal.
110 /// \returns An extra penalty induced by reformatting the token.
111 unsigned reformatRawStringLiteral(const FormatToken &Current,
113 const FormatStyle &RawStringStyle,
114 bool DryRun, bool Newline);
116 /// If the current token is at the end of the current line, handle
117 /// the transition to the next line.
118 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
119 bool DryRun, bool AllowBreak, bool Newline);
121 /// If \p Current is a raw string that is configured to be reformatted,
122 /// return the style to be used.
123 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
124 const LineState &State);
126 /// If the current token sticks out over the end of the line, break
129 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
130 /// when tokens are broken or lines exceed the column limit, and exceeded
131 /// indicates whether the algorithm purposefully left lines exceeding the
134 /// The returned penalty will cover the cost of the additional line breaks
135 /// and column limit violation in all lines except for the last one. The
136 /// penalty for the column limit violation in the last line (and in single
137 /// line tokens) is handled in \c addNextStateToQueue.
139 /// \p Strict indicates whether reflowing is allowed to leave characters
140 /// protruding the column limit; if true, lines will be split strictly within
141 /// the column limit where possible; if false, words are allowed to protrude
142 /// over the column limit as long as the penalty is less than the penalty
144 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
146 bool AllowBreak, bool DryRun,
149 /// Returns the \c BreakableToken starting at \p Current, or nullptr
150 /// if the current token cannot be broken.
151 std::unique_ptr<BreakableToken>
152 createBreakableToken(const FormatToken &Current, LineState &State,
155 /// Appends the next token to \p State and updates information
156 /// necessary for indentation.
158 /// Puts the token on the current line.
160 /// If \p DryRun is \c false, also creates and stores the required
162 void addTokenOnCurrentLine(LineState &State, bool DryRun,
163 unsigned ExtraSpaces);
165 /// Appends the next token to \p State and updates information
166 /// necessary for indentation.
168 /// Adds a line break and necessary indentation.
170 /// If \p DryRun is \c false, also creates and stores the required
172 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
174 /// Calculate the new column for a line wrap before the next token.
175 unsigned getNewLineColumn(const LineState &State);
177 /// Adds a multiline token to the \p State.
179 /// \returns Extra penalty for the first line of the literal: last line is
180 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
181 /// matter, as we don't change them.
182 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
184 /// Returns \c true if the next token starts a multiline string
187 /// This includes implicitly concatenated strings, strings that will be broken
188 /// by clang-format and string literals with escaped newlines.
189 bool nextIsMultilineString(const LineState &State);
192 const AdditionalKeywords &Keywords;
193 const SourceManager &SourceMgr;
194 WhitespaceManager &Whitespaces;
195 encoding::Encoding Encoding;
196 bool BinPackInconclusiveFunctions;
197 llvm::Regex CommentPragmasRegex;
198 const RawStringFormatStyleManager RawStringFormats;
202 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
203 bool AvoidBinPacking, bool NoLineBreak)
204 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
205 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
206 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
207 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
208 LastOperatorWrapped(true), ContainsLineBreak(false),
209 ContainsUnwrappedBuilder(false), AlignColons(true),
210 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
211 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
213 /// \brief The token opening this parenthesis level, or nullptr if this level
214 /// is opened by fake parenthesis.
216 /// Not considered for memoization as it will always have the same value at
218 const FormatToken *Tok;
220 /// The position to which a specific parenthesis level needs to be
224 /// The position of the last space on each level.
226 /// Used e.g. to break like:
227 /// functionCall(Parameter, otherCall(
228 /// OtherParameter));
231 /// If a block relative to this parenthesis level gets wrapped, indent
233 unsigned NestedBlockIndent;
235 /// The position the first "<<" operator encountered on each level.
237 /// Used to align "<<" operators. 0 if no such operator has been encountered
239 unsigned FirstLessLess = 0;
241 /// The column of a \c ? in a conditional expression;
242 unsigned QuestionColumn = 0;
244 /// The position of the colon in an ObjC method declaration/call.
245 unsigned ColonPos = 0;
247 /// The start of the most recent function in a builder-type call.
248 unsigned StartOfFunctionCall = 0;
250 /// Contains the start of array subscript expressions, so that they
252 unsigned StartOfArraySubscripts = 0;
254 /// If a nested name specifier was broken over multiple lines, this
255 /// contains the start column of the second line. Otherwise 0.
256 unsigned NestedNameSpecifierContinuation = 0;
258 /// If a call expression was broken over multiple lines, this
259 /// contains the start column of the second line. Otherwise 0.
260 unsigned CallContinuation = 0;
262 /// The column of the first variable name in a variable declaration.
264 /// Used to align further variables if necessary.
265 unsigned VariablePos = 0;
267 /// Whether a newline needs to be inserted before the block's closing
270 /// We only want to insert a newline before the closing brace if there also
271 /// was a newline after the beginning left brace.
272 bool BreakBeforeClosingBrace : 1;
274 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
275 /// lines, in this context.
276 bool AvoidBinPacking : 1;
278 /// Break after the next comma (or all the commas in this context if
279 /// \c AvoidBinPacking is \c true).
280 bool BreakBeforeParameter : 1;
282 /// Line breaking in this context would break a formatting rule.
283 bool NoLineBreak : 1;
285 /// Same as \c NoLineBreak, but is restricted until the end of the
286 /// operand (including the next ",").
287 bool NoLineBreakInOperand : 1;
289 /// True if the last binary operator on this level was wrapped to the
291 bool LastOperatorWrapped : 1;
293 /// \c true if this \c ParenState already contains a line-break.
295 /// The first line break in a certain \c ParenState causes extra penalty so
296 /// that clang-format prefers similar breaks, i.e. breaks in the same
298 bool ContainsLineBreak : 1;
300 /// \c true if this \c ParenState contains multiple segments of a
301 /// builder-type call on one line.
302 bool ContainsUnwrappedBuilder : 1;
304 /// \c true if the colons of the curren ObjC method expression should
307 /// Not considered for memoization as it will always have the same value at
309 bool AlignColons : 1;
311 /// \c true if at least one selector name was found in the current
312 /// ObjC method expression.
314 /// Not considered for memoization as it will always have the same value at
316 bool ObjCSelectorNameFound : 1;
318 /// \c true if there are multiple nested blocks inside these parens.
320 /// Not considered for memoization as it will always have the same value at
322 bool HasMultipleNestedBlocks : 1;
324 /// The start of a nested block (e.g. lambda introducer in C++ or
325 /// "function" in JavaScript) is not wrapped to a new line.
326 bool NestedBlockInlined : 1;
328 /// \c true if the current \c ParenState represents an Objective-C
330 bool IsInsideObjCArrayLiteral : 1;
332 bool operator<(const ParenState &Other) const {
333 if (Indent != Other.Indent)
334 return Indent < Other.Indent;
335 if (LastSpace != Other.LastSpace)
336 return LastSpace < Other.LastSpace;
337 if (NestedBlockIndent != Other.NestedBlockIndent)
338 return NestedBlockIndent < Other.NestedBlockIndent;
339 if (FirstLessLess != Other.FirstLessLess)
340 return FirstLessLess < Other.FirstLessLess;
341 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
342 return BreakBeforeClosingBrace;
343 if (QuestionColumn != Other.QuestionColumn)
344 return QuestionColumn < Other.QuestionColumn;
345 if (AvoidBinPacking != Other.AvoidBinPacking)
346 return AvoidBinPacking;
347 if (BreakBeforeParameter != Other.BreakBeforeParameter)
348 return BreakBeforeParameter;
349 if (NoLineBreak != Other.NoLineBreak)
351 if (LastOperatorWrapped != Other.LastOperatorWrapped)
352 return LastOperatorWrapped;
353 if (ColonPos != Other.ColonPos)
354 return ColonPos < Other.ColonPos;
355 if (StartOfFunctionCall != Other.StartOfFunctionCall)
356 return StartOfFunctionCall < Other.StartOfFunctionCall;
357 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
358 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
359 if (CallContinuation != Other.CallContinuation)
360 return CallContinuation < Other.CallContinuation;
361 if (VariablePos != Other.VariablePos)
362 return VariablePos < Other.VariablePos;
363 if (ContainsLineBreak != Other.ContainsLineBreak)
364 return ContainsLineBreak;
365 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
366 return ContainsUnwrappedBuilder;
367 if (NestedBlockInlined != Other.NestedBlockInlined)
368 return NestedBlockInlined;
373 /// The current state when indenting a unwrapped line.
375 /// As the indenting tries different combinations this is copied by value.
377 /// The number of used columns in the current line.
380 /// The token that needs to be next formatted.
381 FormatToken *NextToken;
383 /// \c true if this line contains a continued for-loop section.
384 bool LineContainsContinuedForLoopSection;
386 /// \c true if \p NextToken should not continue this line.
389 /// The \c NestingLevel at the start of this line.
390 unsigned StartOfLineLevel;
392 /// The lowest \c NestingLevel on the current line.
393 unsigned LowestLevelOnLine;
395 /// The start column of the string literal, if we're in a string
396 /// literal sequence, 0 otherwise.
397 unsigned StartOfStringLiteral;
399 /// A stack keeping track of properties applying to parenthesis
401 std::vector<ParenState> Stack;
403 /// Ignore the stack of \c ParenStates for state comparison.
405 /// In long and deeply nested unwrapped lines, the current algorithm can
406 /// be insufficient for finding the best formatting with a reasonable amount
407 /// of time and memory. Setting this flag will effectively lead to the
408 /// algorithm not analyzing some combinations. However, these combinations
409 /// rarely contain the optimal solution: In short, accepting a higher
410 /// penalty early would need to lead to different values in the \c
411 /// ParenState stack (in an otherwise identical state) and these different
412 /// values would need to lead to a significant amount of avoided penalty
415 /// FIXME: Come up with a better algorithm instead.
416 bool IgnoreStackForComparison;
418 /// The indent of the first token.
419 unsigned FirstIndent;
421 /// The line that is being formatted.
423 /// Does not need to be considered for memoization because it doesn't change.
424 const AnnotatedLine *Line;
426 /// Comparison operator to be able to used \c LineState in \c map.
427 bool operator<(const LineState &Other) const {
428 if (NextToken != Other.NextToken)
429 return NextToken < Other.NextToken;
430 if (Column != Other.Column)
431 return Column < Other.Column;
432 if (LineContainsContinuedForLoopSection !=
433 Other.LineContainsContinuedForLoopSection)
434 return LineContainsContinuedForLoopSection;
435 if (NoContinuation != Other.NoContinuation)
436 return NoContinuation;
437 if (StartOfLineLevel != Other.StartOfLineLevel)
438 return StartOfLineLevel < Other.StartOfLineLevel;
439 if (LowestLevelOnLine != Other.LowestLevelOnLine)
440 return LowestLevelOnLine < Other.LowestLevelOnLine;
441 if (StartOfStringLiteral != Other.StartOfStringLiteral)
442 return StartOfStringLiteral < Other.StartOfStringLiteral;
443 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
445 return Stack < Other.Stack;
449 } // end namespace format
450 } // end namespace clang