1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements WhitespaceManager class.
13 //===----------------------------------------------------------------------===//
15 #include "WhitespaceManager.h"
16 #include "llvm/ADT/STLExtras.h"
21 bool WhitespaceManager::Change::IsBeforeInFile::
22 operator()(const Change &C1, const Change &C2) const {
23 return SourceMgr.isBeforeInTranslationUnit(
24 C1.OriginalWhitespaceRange.getBegin(),
25 C2.OriginalWhitespaceRange.getBegin());
28 WhitespaceManager::Change::Change(
29 bool CreateReplacement, SourceRange OriginalWhitespaceRange,
30 unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
31 unsigned NewlinesBefore, StringRef PreviousLinePostfix,
32 StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
33 bool IsStartOfDeclName, bool IsInsideToken)
34 : CreateReplacement(CreateReplacement),
35 OriginalWhitespaceRange(OriginalWhitespaceRange),
36 StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
37 PreviousLinePostfix(PreviousLinePostfix),
38 CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
39 ContinuesPPDirective(ContinuesPPDirective),
40 IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
41 Spaces(Spaces), IsInsideToken(IsInsideToken), IsTrailingComment(false),
42 TokenLength(0), PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
43 StartOfBlockComment(nullptr), IndentationOffset(0) {}
45 void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
46 unsigned IndentLevel, unsigned Spaces,
47 unsigned StartOfTokenColumn,
51 Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
53 Change(/*CreateReplacement=*/true, Tok.WhitespaceRange, IndentLevel,
54 Spaces, StartOfTokenColumn, Newlines, "", "", Tok.Tok.getKind(),
55 InPPDirective && !Tok.IsFirst,
56 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
57 /*IsInsideToken=*/false));
60 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
64 Changes.push_back(Change(
65 /*CreateReplacement=*/false, Tok.WhitespaceRange, /*IndentLevel=*/0,
66 /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
67 Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
68 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
69 /*IsInsideToken=*/false));
72 void WhitespaceManager::replaceWhitespaceInToken(
73 const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
74 StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
75 unsigned Newlines, unsigned IndentLevel, int Spaces) {
78 SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
79 Changes.push_back(Change(
80 true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
81 IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
82 CurrentPrefix, Tok.is(TT_LineComment) ? tok::comment : tok::unknown,
83 InPPDirective && !Tok.IsFirst,
84 Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName),
85 /*IsInsideToken=*/Newlines == 0));
88 const tooling::Replacements &WhitespaceManager::generateReplacements() {
92 std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
93 calculateLineBreakInformation();
94 alignConsecutiveDeclarations();
95 alignConsecutiveAssignments();
96 alignTrailingComments();
97 alignEscapedNewlines();
103 void WhitespaceManager::calculateLineBreakInformation() {
104 Changes[0].PreviousEndOfTokenColumn = 0;
105 Change *LastOutsideTokenChange = &Changes[0];
106 for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
107 unsigned OriginalWhitespaceStart =
108 SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
109 unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
110 Changes[i - 1].OriginalWhitespaceRange.getEnd());
111 Changes[i - 1].TokenLength = OriginalWhitespaceStart -
112 PreviousOriginalWhitespaceEnd +
113 Changes[i].PreviousLinePostfix.size() +
114 Changes[i - 1].CurrentLinePrefix.size();
116 // If there are multiple changes in this token, sum up all the changes until
117 // the end of the line.
118 if (Changes[i - 1].IsInsideToken)
119 LastOutsideTokenChange->TokenLength +=
120 Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
122 LastOutsideTokenChange = &Changes[i - 1];
124 Changes[i].PreviousEndOfTokenColumn =
125 Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
127 Changes[i - 1].IsTrailingComment =
128 (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
129 (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
130 Changes[i - 1].Kind == tok::comment;
132 // FIXME: The last token is currently not always an eof token; in those
133 // cases, setting TokenLength of the last token to 0 is wrong.
134 Changes.back().TokenLength = 0;
135 Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
137 const WhitespaceManager::Change *LastBlockComment = nullptr;
138 for (auto &Change : Changes) {
139 // Reset the IsTrailingComment flag for changes inside of trailing comments
140 // so they don't get realigned later.
141 if (Change.IsInsideToken)
142 Change.IsTrailingComment = false;
143 Change.StartOfBlockComment = nullptr;
144 Change.IndentationOffset = 0;
145 if (Change.Kind == tok::comment) {
146 LastBlockComment = &Change;
147 } else if (Change.Kind == tok::unknown) {
148 if ((Change.StartOfBlockComment = LastBlockComment))
149 Change.IndentationOffset =
150 Change.StartOfTokenColumn -
151 Change.StartOfBlockComment->StartOfTokenColumn;
153 LastBlockComment = nullptr;
158 // Align a single sequence of tokens, see AlignTokens below.
159 template <typename F>
161 AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
162 SmallVector<WhitespaceManager::Change, 16> &Changes) {
163 bool FoundMatchOnLine = false;
165 for (unsigned i = Start; i != End; ++i) {
166 if (Changes[i].NewlinesBefore > 0) {
167 FoundMatchOnLine = false;
171 // If this is the first matching token to be aligned, remember by how many
172 // spaces it has to be shifted, so the rest of the changes on the line are
173 // shifted by the same amount
174 if (!FoundMatchOnLine && Matches(Changes[i])) {
175 FoundMatchOnLine = true;
176 Shift = Column - Changes[i].StartOfTokenColumn;
177 Changes[i].Spaces += Shift;
181 Changes[i].StartOfTokenColumn += Shift;
182 if (i + 1 != Changes.size())
183 Changes[i + 1].PreviousEndOfTokenColumn += Shift;
187 // Walk through all of the changes and find sequences of matching tokens to
188 // align. To do so, keep track of the lines and whether or not a matching token
189 // was found on a line. If a matching token is found, extend the current
190 // sequence. If the current line cannot be part of a sequence, e.g. because
191 // there is an empty line before it or it contains only non-matching tokens,
192 // finalize the previous sequence.
193 template <typename F>
194 static void AlignTokens(const FormatStyle &Style, F &&Matches,
195 SmallVector<WhitespaceManager::Change, 16> &Changes) {
196 unsigned MinColumn = 0;
197 unsigned MaxColumn = UINT_MAX;
199 // Line number of the start and the end of the current token sequence.
200 unsigned StartOfSequence = 0;
201 unsigned EndOfSequence = 0;
203 // Keep track of the nesting level of matching tokens, i.e. the number of
204 // surrounding (), [], or {}. We will only align a sequence of matching
205 // token that share the same scope depth.
207 // FIXME: This could use FormatToken::NestingLevel information, but there is
208 // an outstanding issue wrt the brace scopes.
209 unsigned NestingLevelOfLastMatch = 0;
210 unsigned NestingLevel = 0;
212 // Keep track of the number of commas before the matching tokens, we will only
213 // align a sequence of matching tokens if they are preceded by the same number
215 unsigned CommasBeforeLastMatch = 0;
216 unsigned CommasBeforeMatch = 0;
218 // Whether a matching token has been found on the current line.
219 bool FoundMatchOnLine = false;
221 // Aligns a sequence of matching tokens, on the MinColumn column.
223 // Sequences start from the first matching token to align, and end at the
224 // first token of the first line that doesn't need to be aligned.
226 // We need to adjust the StartOfTokenColumn of each Change that is on a line
227 // containing any matching token to be aligned and located after such token.
228 auto AlignCurrentSequence = [&] {
229 if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
230 AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
233 MaxColumn = UINT_MAX;
238 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
239 if (Changes[i].NewlinesBefore != 0) {
240 CommasBeforeMatch = 0;
242 // If there is a blank line, or if the last line didn't contain any
243 // matching token, the sequence ends here.
244 if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
245 AlignCurrentSequence();
247 FoundMatchOnLine = false;
250 if (Changes[i].Kind == tok::comma) {
252 } else if (Changes[i].Kind == tok::r_brace ||
253 Changes[i].Kind == tok::r_paren ||
254 Changes[i].Kind == tok::r_square) {
256 } else if (Changes[i].Kind == tok::l_brace ||
257 Changes[i].Kind == tok::l_paren ||
258 Changes[i].Kind == tok::l_square) {
259 // We want sequences to skip over child scopes if possible, but not the
261 NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel);
265 if (!Matches(Changes[i]))
268 // If there is more than one matching token per line, or if the number of
269 // preceding commas, or the scope depth, do not match anymore, end the
271 if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch ||
272 NestingLevel != NestingLevelOfLastMatch)
273 AlignCurrentSequence();
275 CommasBeforeLastMatch = CommasBeforeMatch;
276 NestingLevelOfLastMatch = NestingLevel;
277 FoundMatchOnLine = true;
279 if (StartOfSequence == 0)
282 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
283 int LineLengthAfter = -Changes[i].Spaces;
284 for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
285 LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
286 unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
288 // If we are restricted by the maximum column width, end the sequence.
289 if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
290 CommasBeforeLastMatch != CommasBeforeMatch) {
291 AlignCurrentSequence();
295 MinColumn = std::max(MinColumn, ChangeMinColumn);
296 MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
299 EndOfSequence = Changes.size();
300 AlignCurrentSequence();
303 void WhitespaceManager::alignConsecutiveAssignments() {
304 if (!Style.AlignConsecutiveAssignments)
308 [&](const Change &C) {
309 // Do not align on equal signs that are first on a line.
310 if (C.NewlinesBefore > 0)
313 // Do not align on equal signs that are last on a line.
314 if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
317 return C.Kind == tok::equal;
322 void WhitespaceManager::alignConsecutiveDeclarations() {
323 if (!Style.AlignConsecutiveDeclarations)
326 // FIXME: Currently we don't handle properly the PointerAlignment: Right
327 // The * and & are not aligned and are left dangling. Something has to be done
328 // about it, but it raises the question of alignment of code like:
329 // const char* const* v1;
331 // SomeVeryLongType const& v3;
333 AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; },
337 void WhitespaceManager::alignTrailingComments() {
338 unsigned MinColumn = 0;
339 unsigned MaxColumn = UINT_MAX;
340 unsigned StartOfSequence = 0;
341 bool BreakBeforeNext = false;
342 unsigned Newlines = 0;
343 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
344 if (Changes[i].StartOfBlockComment)
346 Newlines += Changes[i].NewlinesBefore;
347 if (!Changes[i].IsTrailingComment)
350 unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
351 unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
353 // If we don't create a replacement for this change, we have to consider
354 // it to be immovable.
355 if (!Changes[i].CreateReplacement)
356 ChangeMaxColumn = ChangeMinColumn;
358 if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
359 ChangeMaxColumn -= 2;
360 // If this comment follows an } in column 0, it probably documents the
361 // closing of a namespace and we don't want to align it.
362 bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
363 Changes[i - 1].Kind == tok::r_brace &&
364 Changes[i - 1].StartOfTokenColumn == 0;
365 bool WasAlignedWithStartOfNextLine = false;
366 if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
367 unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
368 Changes[i].OriginalWhitespaceRange.getEnd());
369 for (unsigned j = i + 1; j != e; ++j) {
370 if (Changes[j].Kind == tok::comment ||
371 Changes[j].Kind == tok::unknown)
372 // Skip over comments and unknown tokens. "unknown tokens are used for
373 // the continuation of multiline comments.
376 unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
377 Changes[j].OriginalWhitespaceRange.getEnd());
378 // The start of the next token was previously aligned with the
379 // start of this comment.
380 WasAlignedWithStartOfNextLine =
381 CommentColumn == NextColumn ||
382 CommentColumn == NextColumn + Style.IndentWidth;
386 if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
387 alignTrailingComments(StartOfSequence, i, MinColumn);
388 MinColumn = ChangeMinColumn;
389 MaxColumn = ChangeMinColumn;
391 } else if (BreakBeforeNext || Newlines > 1 ||
392 (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
393 // Break the comment sequence if the previous line did not end
394 // in a trailing comment.
395 (Changes[i].NewlinesBefore == 1 && i > 0 &&
396 !Changes[i - 1].IsTrailingComment) ||
397 WasAlignedWithStartOfNextLine) {
398 alignTrailingComments(StartOfSequence, i, MinColumn);
399 MinColumn = ChangeMinColumn;
400 MaxColumn = ChangeMaxColumn;
403 MinColumn = std::max(MinColumn, ChangeMinColumn);
404 MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
407 (i == 0) || (Changes[i].NewlinesBefore > 1) ||
408 // Never start a sequence with a comment at the beginning of
410 (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
413 alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
416 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
418 for (unsigned i = Start; i != End; ++i) {
420 if (Changes[i].IsTrailingComment) {
421 Shift = Column - Changes[i].StartOfTokenColumn;
423 if (Changes[i].StartOfBlockComment) {
424 Shift = Changes[i].IndentationOffset +
425 Changes[i].StartOfBlockComment->StartOfTokenColumn -
426 Changes[i].StartOfTokenColumn;
429 Changes[i].Spaces += Shift;
430 if (i + 1 != Changes.size())
431 Changes[i + 1].PreviousEndOfTokenColumn += Shift;
432 Changes[i].StartOfTokenColumn += Shift;
436 void WhitespaceManager::alignEscapedNewlines() {
437 unsigned MaxEndOfLine =
438 Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
439 unsigned StartOfMacro = 0;
440 for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
441 Change &C = Changes[i];
442 if (C.NewlinesBefore > 0) {
443 if (C.ContinuesPPDirective) {
444 MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
446 alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
447 MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
452 alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
455 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
457 for (unsigned i = Start; i < End; ++i) {
458 Change &C = Changes[i];
459 if (C.NewlinesBefore > 0) {
460 assert(C.ContinuesPPDirective);
461 if (C.PreviousEndOfTokenColumn + 1 > Column)
462 C.EscapedNewlineColumn = 0;
464 C.EscapedNewlineColumn = Column;
469 void WhitespaceManager::generateChanges() {
470 for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
471 const Change &C = Changes[i];
473 assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
474 C.OriginalWhitespaceRange.getBegin() &&
475 "Generating two replacements for the same location");
477 if (C.CreateReplacement) {
478 std::string ReplacementText = C.PreviousLinePostfix;
479 if (C.ContinuesPPDirective)
480 appendNewlineText(ReplacementText, C.NewlinesBefore,
481 C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
483 appendNewlineText(ReplacementText, C.NewlinesBefore);
484 appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
485 C.StartOfTokenColumn - std::max(0, C.Spaces));
486 ReplacementText.append(C.CurrentLinePrefix);
487 storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
492 void WhitespaceManager::storeReplacement(SourceRange Range,
494 unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
495 SourceMgr.getFileOffset(Range.getBegin());
496 // Don't create a replacement, if it does not change anything.
497 if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
498 WhitespaceLength) == Text)
500 auto Err = Replaces.add(tooling::Replacement(
501 SourceMgr, CharSourceRange::getCharRange(Range), Text));
502 // FIXME: better error handling. For now, just print an error message in the
505 llvm::errs() << llvm::toString(std::move(Err)) << "\n";
510 void WhitespaceManager::appendNewlineText(std::string &Text,
512 for (unsigned i = 0; i < Newlines; ++i)
513 Text.append(UseCRLF ? "\r\n" : "\n");
516 void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
517 unsigned PreviousEndOfTokenColumn,
518 unsigned EscapedNewlineColumn) {
521 std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
522 for (unsigned i = 0; i < Newlines; ++i) {
523 Text.append(EscapedNewlineColumn - Offset - 1, ' ');
524 Text.append(UseCRLF ? "\\\r\n" : "\\\n");
530 void WhitespaceManager::appendIndentText(std::string &Text,
531 unsigned IndentLevel, unsigned Spaces,
532 unsigned WhitespaceStartColumn) {
533 switch (Style.UseTab) {
534 case FormatStyle::UT_Never:
535 Text.append(Spaces, ' ');
537 case FormatStyle::UT_Always: {
538 unsigned FirstTabWidth =
539 Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
540 // Indent with tabs only when there's at least one full tab.
541 if (FirstTabWidth + Style.TabWidth <= Spaces) {
542 Spaces -= FirstTabWidth;
545 Text.append(Spaces / Style.TabWidth, '\t');
546 Text.append(Spaces % Style.TabWidth, ' ');
549 case FormatStyle::UT_ForIndentation:
550 if (WhitespaceStartColumn == 0) {
551 unsigned Indentation = IndentLevel * Style.IndentWidth;
552 // This happens, e.g. when a line in a block comment is indented less than
554 if (Indentation > Spaces)
555 Indentation = Spaces;
556 unsigned Tabs = Indentation / Style.TabWidth;
557 Text.append(Tabs, '\t');
558 Spaces -= Tabs * Style.TabWidth;
560 Text.append(Spaces, ' ');
562 case FormatStyle::UT_ForContinuationAndIndentation:
563 if (WhitespaceStartColumn == 0) {
564 unsigned Tabs = Spaces / Style.TabWidth;
565 Text.append(Tabs, '\t');
566 Spaces -= Tabs * Style.TabWidth;
568 Text.append(Spaces, ' ');
573 } // namespace format