1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements an indenter that manages the indentation of
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
36 struct RawStringFormatStyleManager;
37 class WhitespaceManager;
39 struct RawStringFormatStyleManager {
40 llvm::StringMap<FormatStyle> DelimiterStyle;
42 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
44 llvm::Optional<FormatStyle> get(StringRef Delimiter) const;
47 class ContinuationIndenter {
49 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
50 /// column \p FirstIndent.
51 ContinuationIndenter(const FormatStyle &Style,
52 const AdditionalKeywords &Keywords,
53 const SourceManager &SourceMgr,
54 WhitespaceManager &Whitespaces,
55 encoding::Encoding Encoding,
56 bool BinPackInconclusiveFunctions);
58 /// \brief Get the initial state, i.e. the state after placing \p Line's
59 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
60 /// the case of formatting inside raw string literals, \p FirstStartColumn is
61 /// the column at which the state of the parent formatter is.
62 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
63 const AnnotatedLine *Line, bool DryRun);
65 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
67 /// \brief Returns \c true, if a line break after \p State is allowed.
68 bool canBreak(const LineState &State);
70 /// \brief Returns \c true, if a line break after \p State is mandatory.
71 bool mustBreak(const LineState &State);
73 /// \brief Appends the next token to \p State and updates information
74 /// necessary for indentation.
76 /// Puts the token on the current line if \p Newline is \c false and adds a
77 /// line break and necessary indentation otherwise.
79 /// If \p DryRun is \c false, also creates and stores the required
81 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
82 unsigned ExtraSpaces = 0);
84 /// \brief Get the column limit for this line. This is the style's column
85 /// limit, potentially reduced for preprocessor definitions.
86 unsigned getColumnLimit(const LineState &State) const;
89 /// \brief Mark the next token as consumed in \p State and modify its stacks
91 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
93 /// \brief Update 'State' according to the next token's fake left parentheses.
94 void moveStatePastFakeLParens(LineState &State, bool Newline);
95 /// \brief Update 'State' according to the next token's fake r_parens.
96 void moveStatePastFakeRParens(LineState &State);
98 /// \brief Update 'State' according to the next token being one of "(<{[".
99 void moveStatePastScopeOpener(LineState &State, bool Newline);
100 /// \brief Update 'State' according to the next token being one of ")>}]".
101 void moveStatePastScopeCloser(LineState &State);
102 /// \brief Update 'State' with the next token opening a nested block.
103 void moveStateToNewBlock(LineState &State);
105 /// \brief Reformats a raw string literal.
107 /// \returns An extra penalty induced by reformatting the token.
108 unsigned reformatRawStringLiteral(const FormatToken &Current,
110 const FormatStyle &RawStringStyle,
113 /// \brief If the current token is at the end of the current line, handle
114 /// the transition to the next line.
115 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
116 bool DryRun, bool AllowBreak);
118 /// \brief If \p Current is a raw string that is configured to be reformatted,
119 /// return the style to be used.
120 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
121 const LineState &State);
123 /// \brief If the current token sticks out over the end of the line, break
126 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
127 /// when tokens are broken or lines exceed the column limit, and exceeded
128 /// indicates whether the algorithm purposefully left lines exceeding the
131 /// The returned penalty will cover the cost of the additional line breaks
132 /// and column limit violation in all lines except for the last one. The
133 /// penalty for the column limit violation in the last line (and in single
134 /// line tokens) is handled in \c addNextStateToQueue.
136 /// \p Strict indicates whether reflowing is allowed to leave characters
137 /// protruding the column limit; if true, lines will be split strictly within
138 /// the column limit where possible; if false, words are allowed to protrude
139 /// over the column limit as long as the penalty is less than the penalty
141 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
143 bool AllowBreak, bool DryRun,
146 /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
147 /// if the current token cannot be broken.
148 std::unique_ptr<BreakableToken>
149 createBreakableToken(const FormatToken &Current, LineState &State,
152 /// \brief Appends the next token to \p State and updates information
153 /// necessary for indentation.
155 /// Puts the token on the current line.
157 /// If \p DryRun is \c false, also creates and stores the required
159 void addTokenOnCurrentLine(LineState &State, bool DryRun,
160 unsigned ExtraSpaces);
162 /// \brief Appends the next token to \p State and updates information
163 /// necessary for indentation.
165 /// Adds a line break and necessary indentation.
167 /// If \p DryRun is \c false, also creates and stores the required
169 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
171 /// \brief Calculate the new column for a line wrap before the next token.
172 unsigned getNewLineColumn(const LineState &State);
174 /// \brief Adds a multiline token to the \p State.
176 /// \returns Extra penalty for the first line of the literal: last line is
177 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
178 /// matter, as we don't change them.
179 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
181 /// \brief Returns \c true if the next token starts a multiline string
184 /// This includes implicitly concatenated strings, strings that will be broken
185 /// by clang-format and string literals with escaped newlines.
186 bool nextIsMultilineString(const LineState &State);
189 const AdditionalKeywords &Keywords;
190 const SourceManager &SourceMgr;
191 WhitespaceManager &Whitespaces;
192 encoding::Encoding Encoding;
193 bool BinPackInconclusiveFunctions;
194 llvm::Regex CommentPragmasRegex;
195 const RawStringFormatStyleManager RawStringFormats;
199 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
201 : Indent(Indent), LastSpace(LastSpace), NestedBlockIndent(Indent),
202 BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking),
203 BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
204 NoLineBreakInOperand(false), LastOperatorWrapped(true),
205 ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
206 AlignColons(true), ObjCSelectorNameFound(false),
207 HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
209 /// \brief The position to which a specific parenthesis level needs to be
213 /// \brief The position of the last space on each level.
215 /// Used e.g. to break like:
216 /// functionCall(Parameter, otherCall(
217 /// OtherParameter));
220 /// \brief If a block relative to this parenthesis level gets wrapped, indent
222 unsigned NestedBlockIndent;
224 /// \brief The position the first "<<" operator encountered on each level.
226 /// Used to align "<<" operators. 0 if no such operator has been encountered
228 unsigned FirstLessLess = 0;
230 /// \brief The column of a \c ? in a conditional expression;
231 unsigned QuestionColumn = 0;
233 /// \brief The position of the colon in an ObjC method declaration/call.
234 unsigned ColonPos = 0;
236 /// \brief The start of the most recent function in a builder-type call.
237 unsigned StartOfFunctionCall = 0;
239 /// \brief Contains the start of array subscript expressions, so that they
241 unsigned StartOfArraySubscripts = 0;
243 /// \brief If a nested name specifier was broken over multiple lines, this
244 /// contains the start column of the second line. Otherwise 0.
245 unsigned NestedNameSpecifierContinuation = 0;
247 /// \brief If a call expression was broken over multiple lines, this
248 /// contains the start column of the second line. Otherwise 0.
249 unsigned CallContinuation = 0;
251 /// \brief The column of the first variable name in a variable declaration.
253 /// Used to align further variables if necessary.
254 unsigned VariablePos = 0;
256 /// \brief Whether a newline needs to be inserted before the block's closing
259 /// We only want to insert a newline before the closing brace if there also
260 /// was a newline after the beginning left brace.
261 bool BreakBeforeClosingBrace : 1;
263 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
264 /// lines, in this context.
265 bool AvoidBinPacking : 1;
267 /// \brief Break after the next comma (or all the commas in this context if
268 /// \c AvoidBinPacking is \c true).
269 bool BreakBeforeParameter : 1;
271 /// \brief Line breaking in this context would break a formatting rule.
272 bool NoLineBreak : 1;
274 /// \brief Same as \c NoLineBreak, but is restricted until the end of the
275 /// operand (including the next ",").
276 bool NoLineBreakInOperand : 1;
278 /// \brief True if the last binary operator on this level was wrapped to the
280 bool LastOperatorWrapped : 1;
282 /// \brief \c true if this \c ParenState already contains a line-break.
284 /// The first line break in a certain \c ParenState causes extra penalty so
285 /// that clang-format prefers similar breaks, i.e. breaks in the same
287 bool ContainsLineBreak : 1;
289 /// \brief \c true if this \c ParenState contains multiple segments of a
290 /// builder-type call on one line.
291 bool ContainsUnwrappedBuilder : 1;
293 /// \brief \c true if the colons of the curren ObjC method expression should
296 /// Not considered for memoization as it will always have the same value at
298 bool AlignColons : 1;
300 /// \brief \c true if at least one selector name was found in the current
301 /// ObjC method expression.
303 /// Not considered for memoization as it will always have the same value at
305 bool ObjCSelectorNameFound : 1;
307 /// \brief \c true if there are multiple nested blocks inside these parens.
309 /// Not considered for memoization as it will always have the same value at
311 bool HasMultipleNestedBlocks : 1;
313 // \brief The start of a nested block (e.g. lambda introducer in C++ or
314 // "function" in JavaScript) is not wrapped to a new line.
315 bool NestedBlockInlined : 1;
317 bool operator<(const ParenState &Other) const {
318 if (Indent != Other.Indent)
319 return Indent < Other.Indent;
320 if (LastSpace != Other.LastSpace)
321 return LastSpace < Other.LastSpace;
322 if (NestedBlockIndent != Other.NestedBlockIndent)
323 return NestedBlockIndent < Other.NestedBlockIndent;
324 if (FirstLessLess != Other.FirstLessLess)
325 return FirstLessLess < Other.FirstLessLess;
326 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
327 return BreakBeforeClosingBrace;
328 if (QuestionColumn != Other.QuestionColumn)
329 return QuestionColumn < Other.QuestionColumn;
330 if (AvoidBinPacking != Other.AvoidBinPacking)
331 return AvoidBinPacking;
332 if (BreakBeforeParameter != Other.BreakBeforeParameter)
333 return BreakBeforeParameter;
334 if (NoLineBreak != Other.NoLineBreak)
336 if (LastOperatorWrapped != Other.LastOperatorWrapped)
337 return LastOperatorWrapped;
338 if (ColonPos != Other.ColonPos)
339 return ColonPos < Other.ColonPos;
340 if (StartOfFunctionCall != Other.StartOfFunctionCall)
341 return StartOfFunctionCall < Other.StartOfFunctionCall;
342 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
343 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
344 if (CallContinuation != Other.CallContinuation)
345 return CallContinuation < Other.CallContinuation;
346 if (VariablePos != Other.VariablePos)
347 return VariablePos < Other.VariablePos;
348 if (ContainsLineBreak != Other.ContainsLineBreak)
349 return ContainsLineBreak;
350 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
351 return ContainsUnwrappedBuilder;
352 if (NestedBlockInlined != Other.NestedBlockInlined)
353 return NestedBlockInlined;
358 /// \brief The current state when indenting a unwrapped line.
360 /// As the indenting tries different combinations this is copied by value.
362 /// \brief The number of used columns in the current line.
365 /// \brief The token that needs to be next formatted.
366 FormatToken *NextToken;
368 /// \brief \c true if this line contains a continued for-loop section.
369 bool LineContainsContinuedForLoopSection;
371 /// \brief \c true if \p NextToken should not continue this line.
374 /// \brief The \c NestingLevel at the start of this line.
375 unsigned StartOfLineLevel;
377 /// \brief The lowest \c NestingLevel on the current line.
378 unsigned LowestLevelOnLine;
380 /// \brief The start column of the string literal, if we're in a string
381 /// literal sequence, 0 otherwise.
382 unsigned StartOfStringLiteral;
384 /// \brief A stack keeping track of properties applying to parenthesis
386 std::vector<ParenState> Stack;
388 /// \brief Ignore the stack of \c ParenStates for state comparison.
390 /// In long and deeply nested unwrapped lines, the current algorithm can
391 /// be insufficient for finding the best formatting with a reasonable amount
392 /// of time and memory. Setting this flag will effectively lead to the
393 /// algorithm not analyzing some combinations. However, these combinations
394 /// rarely contain the optimal solution: In short, accepting a higher
395 /// penalty early would need to lead to different values in the \c
396 /// ParenState stack (in an otherwise identical state) and these different
397 /// values would need to lead to a significant amount of avoided penalty
400 /// FIXME: Come up with a better algorithm instead.
401 bool IgnoreStackForComparison;
403 /// \brief The indent of the first token.
404 unsigned FirstIndent;
406 /// \brief The line that is being formatted.
408 /// Does not need to be considered for memoization because it doesn't change.
409 const AnnotatedLine *Line;
411 /// \brief Comparison operator to be able to used \c LineState in \c map.
412 bool operator<(const LineState &Other) const {
413 if (NextToken != Other.NextToken)
414 return NextToken < Other.NextToken;
415 if (Column != Other.Column)
416 return Column < Other.Column;
417 if (LineContainsContinuedForLoopSection !=
418 Other.LineContainsContinuedForLoopSection)
419 return LineContainsContinuedForLoopSection;
420 if (NoContinuation != Other.NoContinuation)
421 return NoContinuation;
422 if (StartOfLineLevel != Other.StartOfLineLevel)
423 return StartOfLineLevel < Other.StartOfLineLevel;
424 if (LowestLevelOnLine != Other.LowestLevelOnLine)
425 return LowestLevelOnLine < Other.LowestLevelOnLine;
426 if (StartOfStringLiteral != Other.StartOfStringLiteral)
427 return StartOfStringLiteral < Other.StartOfStringLiteral;
428 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
430 return Stack < Other.Stack;
434 } // end namespace format
435 } // end namespace clang