1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// This file implements an indenter that manages the indentation of
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
20 #include "FormatToken.h"
21 #include "clang/Format/Format.h"
22 #include "llvm/Support/Regex.h"
36 struct RawStringFormatStyleManager;
37 class WhitespaceManager;
39 struct RawStringFormatStyleManager {
40 llvm::StringMap<FormatStyle> DelimiterStyle;
41 llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
43 RawStringFormatStyleManager(const FormatStyle &CodeStyle);
45 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
47 llvm::Optional<FormatStyle>
48 getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
51 class ContinuationIndenter {
53 /// Constructs a \c ContinuationIndenter to format \p Line starting in
54 /// column \p FirstIndent.
55 ContinuationIndenter(const FormatStyle &Style,
56 const AdditionalKeywords &Keywords,
57 const SourceManager &SourceMgr,
58 WhitespaceManager &Whitespaces,
59 encoding::Encoding Encoding,
60 bool BinPackInconclusiveFunctions);
62 /// Get the initial state, i.e. the state after placing \p Line's
63 /// first token at \p FirstIndent. When reformatting a fragment of code, as in
64 /// the case of formatting inside raw string literals, \p FirstStartColumn is
65 /// the column at which the state of the parent formatter is.
66 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
67 const AnnotatedLine *Line, bool DryRun);
69 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
71 /// Returns \c true, if a line break after \p State is allowed.
72 bool canBreak(const LineState &State);
74 /// Returns \c true, if a line break after \p State is mandatory.
75 bool mustBreak(const LineState &State);
77 /// Appends the next token to \p State and updates information
78 /// necessary for indentation.
80 /// Puts the token on the current line if \p Newline is \c false and adds a
81 /// line break and necessary indentation otherwise.
83 /// If \p DryRun is \c false, also creates and stores the required
85 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
86 unsigned ExtraSpaces = 0);
88 /// Get the column limit for this line. This is the style's column
89 /// limit, potentially reduced for preprocessor definitions.
90 unsigned getColumnLimit(const LineState &State) const;
93 /// Mark the next token as consumed in \p State and modify its stacks
95 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
97 /// Update 'State' according to the next token's fake left parentheses.
98 void moveStatePastFakeLParens(LineState &State, bool Newline);
99 /// Update 'State' according to the next token's fake r_parens.
100 void moveStatePastFakeRParens(LineState &State);
102 /// Update 'State' according to the next token being one of "(<{[".
103 void moveStatePastScopeOpener(LineState &State, bool Newline);
104 /// Update 'State' according to the next token being one of ")>}]".
105 void moveStatePastScopeCloser(LineState &State);
106 /// Update 'State' with the next token opening a nested block.
107 void moveStateToNewBlock(LineState &State);
109 /// Reformats a raw string literal.
111 /// \returns An extra penalty induced by reformatting the token.
112 unsigned reformatRawStringLiteral(const FormatToken &Current,
114 const FormatStyle &RawStringStyle,
117 /// If the current token is at the end of the current line, handle
118 /// the transition to the next line.
119 unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
120 bool DryRun, bool AllowBreak);
122 /// If \p Current is a raw string that is configured to be reformatted,
123 /// return the style to be used.
124 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
125 const LineState &State);
127 /// If the current token sticks out over the end of the line, break
130 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
131 /// when tokens are broken or lines exceed the column limit, and exceeded
132 /// indicates whether the algorithm purposefully left lines exceeding the
135 /// The returned penalty will cover the cost of the additional line breaks
136 /// and column limit violation in all lines except for the last one. The
137 /// penalty for the column limit violation in the last line (and in single
138 /// line tokens) is handled in \c addNextStateToQueue.
140 /// \p Strict indicates whether reflowing is allowed to leave characters
141 /// protruding the column limit; if true, lines will be split strictly within
142 /// the column limit where possible; if false, words are allowed to protrude
143 /// over the column limit as long as the penalty is less than the penalty
145 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
147 bool AllowBreak, bool DryRun,
150 /// Returns the \c BreakableToken starting at \p Current, or nullptr
151 /// if the current token cannot be broken.
152 std::unique_ptr<BreakableToken>
153 createBreakableToken(const FormatToken &Current, LineState &State,
156 /// Appends the next token to \p State and updates information
157 /// necessary for indentation.
159 /// Puts the token on the current line.
161 /// If \p DryRun is \c false, also creates and stores the required
163 void addTokenOnCurrentLine(LineState &State, bool DryRun,
164 unsigned ExtraSpaces);
166 /// Appends the next token to \p State and updates information
167 /// necessary for indentation.
169 /// Adds a line break and necessary indentation.
171 /// If \p DryRun is \c false, also creates and stores the required
173 unsigned addTokenOnNewLine(LineState &State, bool DryRun);
175 /// Calculate the new column for a line wrap before the next token.
176 unsigned getNewLineColumn(const LineState &State);
178 /// Adds a multiline token to the \p State.
180 /// \returns Extra penalty for the first line of the literal: last line is
181 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
182 /// matter, as we don't change them.
183 unsigned addMultilineToken(const FormatToken &Current, LineState &State);
185 /// Returns \c true if the next token starts a multiline string
188 /// This includes implicitly concatenated strings, strings that will be broken
189 /// by clang-format and string literals with escaped newlines.
190 bool nextIsMultilineString(const LineState &State);
193 const AdditionalKeywords &Keywords;
194 const SourceManager &SourceMgr;
195 WhitespaceManager &Whitespaces;
196 encoding::Encoding Encoding;
197 bool BinPackInconclusiveFunctions;
198 llvm::Regex CommentPragmasRegex;
199 const RawStringFormatStyleManager RawStringFormats;
203 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
204 bool AvoidBinPacking, bool NoLineBreak)
205 : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
206 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
207 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
208 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
209 LastOperatorWrapped(true), ContainsLineBreak(false),
210 ContainsUnwrappedBuilder(false), AlignColons(true),
211 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
212 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {}
214 /// \brief The token opening this parenthesis level, or nullptr if this level
215 /// is opened by fake parenthesis.
217 /// Not considered for memoization as it will always have the same value at
219 const FormatToken *Tok;
221 /// The position to which a specific parenthesis level needs to be
225 /// The position of the last space on each level.
227 /// Used e.g. to break like:
228 /// functionCall(Parameter, otherCall(
229 /// OtherParameter));
232 /// If a block relative to this parenthesis level gets wrapped, indent
234 unsigned NestedBlockIndent;
236 /// The position the first "<<" operator encountered on each level.
238 /// Used to align "<<" operators. 0 if no such operator has been encountered
240 unsigned FirstLessLess = 0;
242 /// The column of a \c ? in a conditional expression;
243 unsigned QuestionColumn = 0;
245 /// The position of the colon in an ObjC method declaration/call.
246 unsigned ColonPos = 0;
248 /// The start of the most recent function in a builder-type call.
249 unsigned StartOfFunctionCall = 0;
251 /// Contains the start of array subscript expressions, so that they
253 unsigned StartOfArraySubscripts = 0;
255 /// If a nested name specifier was broken over multiple lines, this
256 /// contains the start column of the second line. Otherwise 0.
257 unsigned NestedNameSpecifierContinuation = 0;
259 /// If a call expression was broken over multiple lines, this
260 /// contains the start column of the second line. Otherwise 0.
261 unsigned CallContinuation = 0;
263 /// The column of the first variable name in a variable declaration.
265 /// Used to align further variables if necessary.
266 unsigned VariablePos = 0;
268 /// Whether a newline needs to be inserted before the block's closing
271 /// We only want to insert a newline before the closing brace if there also
272 /// was a newline after the beginning left brace.
273 bool BreakBeforeClosingBrace : 1;
275 /// Avoid bin packing, i.e. multiple parameters/elements on multiple
276 /// lines, in this context.
277 bool AvoidBinPacking : 1;
279 /// Break after the next comma (or all the commas in this context if
280 /// \c AvoidBinPacking is \c true).
281 bool BreakBeforeParameter : 1;
283 /// Line breaking in this context would break a formatting rule.
284 bool NoLineBreak : 1;
286 /// Same as \c NoLineBreak, but is restricted until the end of the
287 /// operand (including the next ",").
288 bool NoLineBreakInOperand : 1;
290 /// True if the last binary operator on this level was wrapped to the
292 bool LastOperatorWrapped : 1;
294 /// \c true if this \c ParenState already contains a line-break.
296 /// The first line break in a certain \c ParenState causes extra penalty so
297 /// that clang-format prefers similar breaks, i.e. breaks in the same
299 bool ContainsLineBreak : 1;
301 /// \c true if this \c ParenState contains multiple segments of a
302 /// builder-type call on one line.
303 bool ContainsUnwrappedBuilder : 1;
305 /// \c true if the colons of the curren ObjC method expression should
308 /// Not considered for memoization as it will always have the same value at
310 bool AlignColons : 1;
312 /// \c true if at least one selector name was found in the current
313 /// ObjC method expression.
315 /// Not considered for memoization as it will always have the same value at
317 bool ObjCSelectorNameFound : 1;
319 /// \c true if there are multiple nested blocks inside these parens.
321 /// Not considered for memoization as it will always have the same value at
323 bool HasMultipleNestedBlocks : 1;
325 /// The start of a nested block (e.g. lambda introducer in C++ or
326 /// "function" in JavaScript) is not wrapped to a new line.
327 bool NestedBlockInlined : 1;
329 /// \c true if the current \c ParenState represents an Objective-C
331 bool IsInsideObjCArrayLiteral : 1;
333 bool operator<(const ParenState &Other) const {
334 if (Indent != Other.Indent)
335 return Indent < Other.Indent;
336 if (LastSpace != Other.LastSpace)
337 return LastSpace < Other.LastSpace;
338 if (NestedBlockIndent != Other.NestedBlockIndent)
339 return NestedBlockIndent < Other.NestedBlockIndent;
340 if (FirstLessLess != Other.FirstLessLess)
341 return FirstLessLess < Other.FirstLessLess;
342 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
343 return BreakBeforeClosingBrace;
344 if (QuestionColumn != Other.QuestionColumn)
345 return QuestionColumn < Other.QuestionColumn;
346 if (AvoidBinPacking != Other.AvoidBinPacking)
347 return AvoidBinPacking;
348 if (BreakBeforeParameter != Other.BreakBeforeParameter)
349 return BreakBeforeParameter;
350 if (NoLineBreak != Other.NoLineBreak)
352 if (LastOperatorWrapped != Other.LastOperatorWrapped)
353 return LastOperatorWrapped;
354 if (ColonPos != Other.ColonPos)
355 return ColonPos < Other.ColonPos;
356 if (StartOfFunctionCall != Other.StartOfFunctionCall)
357 return StartOfFunctionCall < Other.StartOfFunctionCall;
358 if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
359 return StartOfArraySubscripts < Other.StartOfArraySubscripts;
360 if (CallContinuation != Other.CallContinuation)
361 return CallContinuation < Other.CallContinuation;
362 if (VariablePos != Other.VariablePos)
363 return VariablePos < Other.VariablePos;
364 if (ContainsLineBreak != Other.ContainsLineBreak)
365 return ContainsLineBreak;
366 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
367 return ContainsUnwrappedBuilder;
368 if (NestedBlockInlined != Other.NestedBlockInlined)
369 return NestedBlockInlined;
374 /// The current state when indenting a unwrapped line.
376 /// As the indenting tries different combinations this is copied by value.
378 /// The number of used columns in the current line.
381 /// The token that needs to be next formatted.
382 FormatToken *NextToken;
384 /// \c true if this line contains a continued for-loop section.
385 bool LineContainsContinuedForLoopSection;
387 /// \c true if \p NextToken should not continue this line.
390 /// The \c NestingLevel at the start of this line.
391 unsigned StartOfLineLevel;
393 /// The lowest \c NestingLevel on the current line.
394 unsigned LowestLevelOnLine;
396 /// The start column of the string literal, if we're in a string
397 /// literal sequence, 0 otherwise.
398 unsigned StartOfStringLiteral;
400 /// A stack keeping track of properties applying to parenthesis
402 std::vector<ParenState> Stack;
404 /// Ignore the stack of \c ParenStates for state comparison.
406 /// In long and deeply nested unwrapped lines, the current algorithm can
407 /// be insufficient for finding the best formatting with a reasonable amount
408 /// of time and memory. Setting this flag will effectively lead to the
409 /// algorithm not analyzing some combinations. However, these combinations
410 /// rarely contain the optimal solution: In short, accepting a higher
411 /// penalty early would need to lead to different values in the \c
412 /// ParenState stack (in an otherwise identical state) and these different
413 /// values would need to lead to a significant amount of avoided penalty
416 /// FIXME: Come up with a better algorithm instead.
417 bool IgnoreStackForComparison;
419 /// The indent of the first token.
420 unsigned FirstIndent;
422 /// The line that is being formatted.
424 /// Does not need to be considered for memoization because it doesn't change.
425 const AnnotatedLine *Line;
427 /// Comparison operator to be able to used \c LineState in \c map.
428 bool operator<(const LineState &Other) const {
429 if (NextToken != Other.NextToken)
430 return NextToken < Other.NextToken;
431 if (Column != Other.Column)
432 return Column < Other.Column;
433 if (LineContainsContinuedForLoopSection !=
434 Other.LineContainsContinuedForLoopSection)
435 return LineContainsContinuedForLoopSection;
436 if (NoContinuation != Other.NoContinuation)
437 return NoContinuation;
438 if (StartOfLineLevel != Other.StartOfLineLevel)
439 return StartOfLineLevel < Other.StartOfLineLevel;
440 if (LowestLevelOnLine != Other.LowestLevelOnLine)
441 return LowestLevelOnLine < Other.LowestLevelOnLine;
442 if (StartOfStringLiteral != Other.StartOfStringLiteral)
443 return StartOfStringLiteral < Other.StartOfStringLiteral;
444 if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
446 return Stack < Other.Stack;
450 } // end namespace format
451 } // end namespace clang