1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements WhitespaceManager class.
13 //===----------------------------------------------------------------------===//
15 #include "WhitespaceManager.h"
16 #include "llvm/ADT/STLExtras.h"
21 bool WhitespaceManager::Change::IsBeforeInFile::
22 operator()(const Change &C1, const Change &C2) const {
23 return SourceMgr.isBeforeInTranslationUnit(
24 C1.OriginalWhitespaceRange.getBegin(),
25 C2.OriginalWhitespaceRange.getBegin());
28 WhitespaceManager::Change::Change(
29 bool CreateReplacement, SourceRange OriginalWhitespaceRange,
30 unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
31 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
32 StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
33 bool IsStartOfDeclName, bool IsInsideToken)
34 : CreateReplacement(CreateReplacement),
35 OriginalWhitespaceRange(OriginalWhitespaceRange),
36 StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
37 PreviousLinePostfix(PreviousLinePostfix),
38 CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
39 ContinuesPPDirective(ContinuesPPDirective),
40 IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
41 Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false),
42 TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
43 StartOfBlockComment(nullptr), IndentationOffset(0) {}
45 void WhitespaceManager::reset() {
50 void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
51 unsigned IndentLevel, unsigned Spaces,
52 unsigned StartOfTokenColumn,
56 Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
58 Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel,
59 Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(),
60 InPPDirective && !Tok.IsFirst,
61 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
62 /*IsInsideToken=*/false));
65 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
69 Changes.push_back(Change(
70 /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0,
71 /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
72 Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
73 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
74 /*IsInsideToken=*/false));
77 void WhitespaceManager::replaceWhitespaceInToken(
78 const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
79 StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
80 unsigned Newlines, unsigned IndentLevel, int Spaces) {
83 SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
84 Changes.push_back(Change(
85 true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
86 IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
87 CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown,
88 InPPDirective && !Tok.IsFirst,
89 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
90 /*IsInsideToken=*/Newlines == 0));
93 const tooling::Replacements &WhitespaceManager::generateReplacements() {
97 std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
98 calculateLineBreakInformation();
99 alignConsecutiveDeclarations();
100 alignConsecutiveAssignments();
101 alignTrailingComments();
102 alignEscapedNewlines();
108 void WhitespaceManager::calculateLineBreakInformation() {
109 Changes[0].PreviousEndOfTokenColumn = 0;
110 Change *LastOutsideTokenChange = &Changes[0];
111 for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
112 unsigned OriginalWhitespaceStart =
113 SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
114 unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
115 Changes[i - 1].OriginalWhitespaceRange.getEnd());
116 Changes[i - 1].TokenLength = OriginalWhitespaceStart -
117 PreviousOriginalWhitespaceEnd +
118 Changes[i].PreviousLinePostfix.size() +
119 Changes[i - 1].CurrentLinePrefix.size();
121 // If there are multiple changes in this token, sum up all the changes until
122 // the end of the line.
123 if (Changes[i - 1].IsInsideToken)
124 LastOutsideTokenChange->TokenLength +=
125 Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
127 LastOutsideTokenChange = &Changes[i - 1];
129 Changes[i].PreviousEndOfTokenColumn =
130 Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
132 Changes[i - 1].IsTrailingComment =
133 (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
134 (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
135 Changes[i - 1].Kind == tok::comment;
137 // FIXME: The last token is currently not always an eof token; in those
138 // cases, setting TokenLength of the last token to 0 is wrong.
139 Changes.back().TokenLength = 0;
140 Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
142 const WhitespaceManager::Change *LastBlockComment = nullptr;
143 for (auto &Change : Changes) {
144 // Reset the IsTrailingComment flag for changes inside of trailing comments
145 // so they don't get realigned later.
146 if (Change.IsInsideToken)
147 Change.IsTrailingComment = false;
148 Change.StartOfBlockComment = nullptr;
149 Change.IndentationOffset = 0;
150 if (Change.Kind == tok::comment) {
151 LastBlockComment = &Change;
152 } else if (Change.Kind == tok::unknown) {
153 if ((Change.StartOfBlockComment = LastBlockComment))
154 Change.IndentationOffset =
155 Change.StartOfTokenColumn -
156 Change.StartOfBlockComment->StartOfTokenColumn;
158 LastBlockComment = nullptr;
163 // Align a single sequence of tokens, see AlignTokens below.
164 template <typename F>
166 AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
167 SmallVector<WhitespaceManager::Change, 16> &Changes) {
168 bool FoundMatchOnLine = false;
170 for (unsigned i = Start; i != End; ++i) {
171 if (Changes[i].NewlinesBefore > 0) {
172 FoundMatchOnLine = false;
176 // If this is the first matching token to be aligned, remember by how many
177 // spaces it has to be shifted, so the rest of the changes on the line are
178 // shifted by the same amount
179 if (!FoundMatchOnLine && Matches(Changes[i])) {
180 FoundMatchOnLine = true;
181 Shift = Column - Changes[i].StartOfTokenColumn;
182 Changes[i].Spaces += Shift;
186 Changes[i].StartOfTokenColumn += Shift;
187 if (i + 1 != Changes.size())
188 Changes[i + 1].PreviousEndOfTokenColumn += Shift;
192 // Walk through all of the changes and find sequences of matching tokens to
193 // align. To do so, keep track of the lines and whether or not a matching token
194 // was found on a line. If a matching token is found, extend the current
195 // sequence. If the current line cannot be part of a sequence, e.g. because
196 // there is an empty line before it or it contains only non-matching tokens,
197 // finalize the previous sequence.
198 template <typename F>
199 static void AlignTokens(const FormatStyle &Style, F &&Matches,
200 SmallVector<WhitespaceManager::Change, 16> &Changes) {
201 unsigned MinColumn = 0;
202 unsigned MaxColumn = UINT_MAX;
204 // Line number of the start and the end of the current token sequence.
205 unsigned StartOfSequence = 0;
206 unsigned EndOfSequence = 0;
208 // Keep track of the nesting level of matching tokens, i.e. the number of
209 // surrounding (), [], or {}. We will only align a sequence of matching
210 // token that share the same scope depth.
212 // FIXME: This could use FormatToken::NestingLevel information, but there is
213 // an outstanding issue wrt the brace scopes.
214 unsigned NestingLevelOfLastMatch = 0;
215 unsigned NestingLevel = 0;
217 // Keep track of the number of commas before the matching tokens, we will only
218 // align a sequence of matching tokens if they are preceded by the same number
220 unsigned CommasBeforeLastMatch = 0;
221 unsigned CommasBeforeMatch = 0;
223 // Whether a matching token has been found on the current line.
224 bool FoundMatchOnLine = false;
226 // Aligns a sequence of matching tokens, on the MinColumn column.
228 // Sequences start from the first matching token to align, and end at the
229 // first token of the first line that doesn't need to be aligned.
231 // We need to adjust the StartOfTokenColumn of each Change that is on a line
232 // containing any matching token to be aligned and located after such token.
233 auto AlignCurrentSequence = [&] {
234 if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
235 AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
238 MaxColumn = UINT_MAX;
243 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
244 if (Changes[i].NewlinesBefore != 0) {
245 CommasBeforeMatch = 0;
247 // If there is a blank line, or if the last line didn't contain any
248 // matching token, the sequence ends here.
249 if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
250 AlignCurrentSequence();
252 FoundMatchOnLine = false;
255 if (Changes[i].Kind == tok::comma) {
257 } else if (Changes[i].Kind == tok::r_brace ||
258 Changes[i].Kind == tok::r_paren ||
259 Changes[i].Kind == tok::r_square) {
261 } else if (Changes[i].Kind == tok::l_brace ||
262 Changes[i].Kind == tok::l_paren ||
263 Changes[i].Kind == tok::l_square) {
264 // We want sequences to skip over child scopes if possible, but not the
266 NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel);
270 if (!Matches(Changes[i]))
273 // If there is more than one matching token per line, or if the number of
274 // preceding commas, or the scope depth, do not match anymore, end the
276 if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch ||
277 NestingLevel != NestingLevelOfLastMatch)
278 AlignCurrentSequence();
280 CommasBeforeLastMatch = CommasBeforeMatch;
281 NestingLevelOfLastMatch = NestingLevel;
282 FoundMatchOnLine = true;
284 if (StartOfSequence == 0)
287 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
288 int LineLengthAfter = -Changes[i].Spaces;
289 for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
290 LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
291 unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
293 // If we are restricted by the maximum column width, end the sequence.
294 if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
295 CommasBeforeLastMatch != CommasBeforeMatch) {
296 AlignCurrentSequence();
300 MinColumn = std::max(MinColumn, ChangeMinColumn);
301 MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
304 EndOfSequence = Changes.size();
305 AlignCurrentSequence();
308 void WhitespaceManager::alignConsecutiveAssignments() {
309 if (!Style.AlignConsecutiveAssignments)
313 [&](const Change &C) {
314 // Do not align on equal signs that are first on a line.
315 if (C.NewlinesBefore > 0)
318 // Do not align on equal signs that are last on a line.
319 if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
322 return C.Kind == tok::equal;
327 void WhitespaceManager::alignConsecutiveDeclarations() {
328 if (!Style.AlignConsecutiveDeclarations)
331 // FIXME: Currently we don't handle properly the PointerAlignment: Right
332 // The * and & are not aligned and are left dangling. Something has to be done
333 // about it, but it raises the question of alignment of code like:
334 // const char* const* v1;
336 // SomeVeryLongType const& v3;
338 AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; },
342 void WhitespaceManager::alignTrailingComments() {
343 unsigned MinColumn = 0;
344 unsigned MaxColumn = UINT_MAX;
345 unsigned StartOfSequence = 0;
346 bool BreakBeforeNext = false;
347 unsigned Newlines = 0;
348 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
349 if (Changes[i].StartOfBlockComment)
351 Newlines += Changes[i].NewlinesBefore;
352 if (!Changes[i].IsTrailingComment)
355 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
356 unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
358 // If we don't create a replacement for this change, we have to consider
359 // it to be immovable.
360 if (!Changes[i].CreateReplacement)
361 ChangeMaxColumn = ChangeMinColumn;
363 if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
364 ChangeMaxColumn -= 2;
365 // If this comment follows an } in column 0, it probably documents the
366 // closing of a namespace and we don't want to align it.
367 bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
368 Changes[i - 1].Kind == tok::r_brace &&
369 Changes[i - 1].StartOfTokenColumn == 0;
370 bool WasAlignedWithStartOfNextLine = false;
371 if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
372 unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
373 Changes[i].OriginalWhitespaceRange.getEnd());
374 for (unsigned j = i + 1; j != e; ++j) {
375 if (Changes[j].Kind != tok::comment) { // Skip over comments.
376 unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
377 Changes[j].OriginalWhitespaceRange.getEnd());
378 // The start of the next token was previously aligned with the
379 // start of this comment.
380 WasAlignedWithStartOfNextLine =
381 CommentColumn == NextColumn ||
382 CommentColumn == NextColumn + Style.IndentWidth;
387 if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
388 alignTrailingComments(StartOfSequence, i, MinColumn);
389 MinColumn = ChangeMinColumn;
390 MaxColumn = ChangeMinColumn;
392 } else if (BreakBeforeNext || Newlines > 1 ||
393 (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
394 // Break the comment sequence if the previous line did not end
395 // in a trailing comment.
396 (Changes[i].NewlinesBefore == 1 && i > 0 &&
397 !Changes[i - 1].IsTrailingComment) ||
398 WasAlignedWithStartOfNextLine) {
399 alignTrailingComments(StartOfSequence, i, MinColumn);
400 MinColumn = ChangeMinColumn;
401 MaxColumn = ChangeMaxColumn;
404 MinColumn = std::max(MinColumn, ChangeMinColumn);
405 MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
408 (i == 0) || (Changes[i].NewlinesBefore > 1) ||
409 // Never start a sequence with a comment at the beginning of
411 (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
414 alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
417 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
419 for (unsigned i = Start; i != End; ++i) {
421 if (Changes[i].IsTrailingComment) {
422 Shift = Column - Changes[i].StartOfTokenColumn;
424 if (Changes[i].StartOfBlockComment) {
425 Shift = Changes[i].IndentationOffset +
426 Changes[i].StartOfBlockComment->StartOfTokenColumn -
427 Changes[i].StartOfTokenColumn;
430 Changes[i].Spaces += Shift;
432 Changes[i + 1].PreviousEndOfTokenColumn += Shift;
433 Changes[i].StartOfTokenColumn += Shift;
437 void WhitespaceManager::alignEscapedNewlines() {
438 unsigned MaxEndOfLine =
439 Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
440 unsigned StartOfMacro = 0;
441 for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
442 Change &C = Changes[i];
443 if (C.NewlinesBefore > 0) {
444 if (C.ContinuesPPDirective) {
445 MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
447 alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
448 MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
453 alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
456 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
458 for (unsigned i = Start; i < End; ++i) {
459 Change &C = Changes[i];
460 if (C.NewlinesBefore > 0) {
461 assert(C.ContinuesPPDirective);
462 if (C.PreviousEndOfTokenColumn + 1 > Column)
463 C.EscapedNewlineColumn = 0;
465 C.EscapedNewlineColumn = Column;
470 void WhitespaceManager::generateChanges() {
471 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
472 const Change &C = Changes[i];
474 assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
475 C.OriginalWhitespaceRange.getBegin() &&
476 "Generating two replacements for the same location");
478 if (C.CreateReplacement) {
479 std::string ReplacementText = C.PreviousLinePostfix;
480 if (C.ContinuesPPDirective)
481 appendNewlineText(ReplacementText, C.NewlinesBefore,
482 C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
484 appendNewlineText(ReplacementText, C.NewlinesBefore);
485 appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
486 C.StartOfTokenColumn - std::max(0, C.Spaces));
487 ReplacementText.append(C.CurrentLinePrefix);
488 storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
493 void WhitespaceManager::storeReplacement(SourceRange Range,
495 unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
496 SourceMgr.getFileOffset(Range.getBegin());
497 // Don't create a replacement, if it does not change anything.
498 if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
499 WhitespaceLength) == Text)
501 Replaces.insert(tooling::Replacement(
502 SourceMgr, CharSourceRange::getCharRange(Range), Text));
505 void WhitespaceManager::appendNewlineText(std::string &Text,
507 for (unsigned i = 0; i < Newlines; ++i)
508 Text.append(UseCRLF ? "\r\n" : "\n");
511 void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
512 unsigned PreviousEndOfTokenColumn,
513 unsigned EscapedNewlineColumn) {
516 std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
517 for (unsigned i = 0; i < Newlines; ++i) {
518 Text.append(EscapedNewlineColumn - Offset - 1, ' ');
519 Text.append(UseCRLF ? "\\\r\n" : "\\\n");
525 void WhitespaceManager::appendIndentText(std::string &Text,
526 unsigned IndentLevel, unsigned Spaces,
527 unsigned WhitespaceStartColumn) {
528 switch (Style.UseTab) {
529 case FormatStyle::UT_Never:
530 Text.append(Spaces, ' ');
532 case FormatStyle::UT_Always: {
533 unsigned FirstTabWidth =
534 Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
535 // Indent with tabs only when there's at least one full tab.
536 if (FirstTabWidth + Style.TabWidth <= Spaces) {
537 Spaces -= FirstTabWidth;
540 Text.append(Spaces / Style.TabWidth, '\t');
541 Text.append(Spaces % Style.TabWidth, ' ');
544 case FormatStyle::UT_ForIndentation:
545 if (WhitespaceStartColumn == 0) {
546 unsigned Indentation = IndentLevel * Style.IndentWidth;
547 // This happens, e.g. when a line in a block comment is indented less than
549 if (Indentation > Spaces)
550 Indentation = Spaces;
551 unsigned Tabs = Indentation / Style.TabWidth;
552 Text.append(Tabs, '\t');
553 Spaces -= Tabs * Style.TabWidth;
555 Text.append(Spaces, ' ');
560 } // namespace format