1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Contains implementation of BreakableToken class and classes derived
14 //===----------------------------------------------------------------------===//
16 #include "BreakableToken.h"
17 #include "ContinuationIndenter.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "clang/Format/Format.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/Support/Debug.h"
24 #define DEBUG_TYPE "format-token-breaker"
29 static const char *const Blanks = " \t\v\f\r";
30 static bool IsBlank(char C) {
43 static StringRef getLineCommentIndentPrefix(StringRef Comment) {
44 static const char *const KnownPrefixes[] = {"///", "//", "//!"};
45 StringRef LongestPrefix;
46 for (StringRef KnownPrefix : KnownPrefixes) {
47 if (Comment.startswith(KnownPrefix)) {
48 size_t PrefixLength = KnownPrefix.size();
49 while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
51 if (PrefixLength > LongestPrefix.size())
52 LongestPrefix = Comment.substr(0, PrefixLength);
58 static BreakableToken::Split getCommentSplit(StringRef Text,
59 unsigned ContentStartColumn,
62 encoding::Encoding Encoding) {
63 if (ColumnLimit <= ContentStartColumn + 1)
64 return BreakableToken::Split(StringRef::npos, 0);
66 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
67 unsigned MaxSplitBytes = 0;
69 for (unsigned NumChars = 0;
70 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
71 unsigned BytesInChar =
72 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
74 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
75 ContentStartColumn, TabWidth, Encoding);
76 MaxSplitBytes += BytesInChar;
79 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
80 if (SpaceOffset == StringRef::npos ||
81 // Don't break at leading whitespace.
82 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
83 // Make sure that we don't break at leading whitespace that
84 // reaches past MaxSplit.
85 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
86 if (FirstNonWhitespace == StringRef::npos)
87 // If the comment is only whitespace, we cannot split.
88 return BreakableToken::Split(StringRef::npos, 0);
89 SpaceOffset = Text.find_first_of(
90 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
92 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
93 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
94 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);
95 return BreakableToken::Split(BeforeCut.size(),
96 AfterCut.begin() - BeforeCut.end());
98 return BreakableToken::Split(StringRef::npos, 0);
101 static BreakableToken::Split
102 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
103 unsigned TabWidth, encoding::Encoding Encoding) {
104 // FIXME: Reduce unit test case.
106 return BreakableToken::Split(StringRef::npos, 0);
107 if (ColumnLimit <= UsedColumns)
108 return BreakableToken::Split(StringRef::npos, 0);
109 unsigned MaxSplit = ColumnLimit - UsedColumns;
110 StringRef::size_type SpaceOffset = 0;
111 StringRef::size_type SlashOffset = 0;
112 StringRef::size_type WordStartOffset = 0;
113 StringRef::size_type SplitPoint = 0;
114 for (unsigned Chars = 0;;) {
116 if (Text[0] == '\\') {
117 Advance = encoding::getEscapeSequenceLength(Text);
120 Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
121 Chars += encoding::columnWidthWithTabs(
122 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
125 if (Chars > MaxSplit || Text.size() <= Advance)
128 if (IsBlank(Text[0]))
129 SpaceOffset = SplitPoint;
131 SlashOffset = SplitPoint;
132 if (Advance == 1 && !isAlphanumeric(Text[0]))
133 WordStartOffset = SplitPoint;
135 SplitPoint += Advance;
136 Text = Text.substr(Advance);
139 if (SpaceOffset != 0)
140 return BreakableToken::Split(SpaceOffset + 1, 0);
141 if (SlashOffset != 0)
142 return BreakableToken::Split(SlashOffset + 1, 0);
143 if (WordStartOffset != 0)
144 return BreakableToken::Split(WordStartOffset + 1, 0);
146 return BreakableToken::Split(SplitPoint, 0);
147 return BreakableToken::Split(StringRef::npos, 0);
150 bool switchesFormatting(const FormatToken &Token) {
151 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
152 "formatting regions are switched by comment tokens");
153 StringRef Content = Token.TokenText.substr(2).ltrim();
154 return Content.startswith("clang-format on") ||
155 Content.startswith("clang-format off");
159 BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
161 // Example: consider the content
163 // - RemainingTokenColumns is the original number of columns, 10;
164 // - Split is (4, 2), denoting the two spaces between the two words;
166 // We compute the number of columns when the split is compressed into a single
169 return RemainingTokenColumns + 1 - Split.second;
172 unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
174 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
175 unsigned LineIndex, unsigned TailOffset,
176 StringRef::size_type Length) const {
177 return StartColumn + Prefix.size() + Postfix.size() +
178 encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
179 StartColumn + Prefix.size(),
180 Style.TabWidth, Encoding);
183 BreakableSingleLineToken::BreakableSingleLineToken(
184 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
185 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
186 const FormatStyle &Style)
187 : BreakableToken(Tok, InPPDirective, Encoding, Style),
188 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
189 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
190 Line = Tok.TokenText.substr(
191 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
194 BreakableStringLiteral::BreakableStringLiteral(
195 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
196 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
197 const FormatStyle &Style)
198 : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
201 BreakableToken::Split
202 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
203 unsigned ColumnLimit,
204 llvm::Regex &CommentPragmasRegex) const {
205 return getStringSplit(Line.substr(TailOffset),
206 StartColumn + Prefix.size() + Postfix.size(),
207 ColumnLimit, Style.TabWidth, Encoding);
210 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
211 unsigned TailOffset, Split Split,
212 WhitespaceManager &Whitespaces) {
213 Whitespaces.replaceWhitespaceInToken(
214 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
215 Prefix, InPPDirective, 1, StartColumn);
218 BreakableComment::BreakableComment(const FormatToken &Token,
219 unsigned StartColumn,
221 encoding::Encoding Encoding,
222 const FormatStyle &Style)
223 : BreakableToken(Token, InPPDirective, Encoding, Style),
224 StartColumn(StartColumn) {}
226 unsigned BreakableComment::getLineCount() const { return Lines.size(); }
228 BreakableToken::Split
229 BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
230 unsigned ColumnLimit,
231 llvm::Regex &CommentPragmasRegex) const {
232 // Don't break lines matching the comment pragmas regex.
233 if (CommentPragmasRegex.match(Content[LineIndex]))
234 return Split(StringRef::npos, 0);
235 return getCommentSplit(Content[LineIndex].substr(TailOffset),
236 getContentStartColumn(LineIndex, TailOffset),
237 ColumnLimit, Style.TabWidth, Encoding);
240 void BreakableComment::compressWhitespace(unsigned LineIndex,
241 unsigned TailOffset, Split Split,
242 WhitespaceManager &Whitespaces) {
243 StringRef Text = Content[LineIndex].substr(TailOffset);
244 // Text is relative to the content line, but Whitespaces operates relative to
245 // the start of the corresponding token, so compute the start of the Split
246 // that needs to be compressed into a single space relative to the start of
248 unsigned BreakOffsetInToken =
249 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
250 unsigned CharsToRemove = Split.second;
251 Whitespaces.replaceWhitespaceInToken(
252 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
253 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
256 BreakableToken::Split
257 BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
258 unsigned PreviousEndColumn,
259 unsigned ColumnLimit) const {
260 unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
261 StringRef TrimmedText = Text.rtrim(Blanks);
262 // This is the width of the resulting line in case the full line of Text gets
263 // reflown up starting at ReflowStartColumn.
264 unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
265 TrimmedText, ReflowStartColumn,
266 Style.TabWidth, Encoding);
267 // If the full line fits up, we return a reflow split after it,
268 // otherwise we compute the largest piece of text that fits after
269 // ReflowStartColumn.
271 FullWidth <= ColumnLimit
272 ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
273 : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
274 Style.TabWidth, Encoding);
276 // We need to be extra careful here, because while it's OK to keep a long line
277 // if it can't be broken into smaller pieces (like when the first word of a
278 // long line is longer than the column limit), it's not OK to reflow that long
279 // word up. So we recompute the size of the previous line after reflowing and
280 // only return the reflow split if that's under the line limit.
281 if (ReflowSplit.first != StringRef::npos &&
282 // Check if the width of the newly reflown line is under the limit.
283 PreviousEndColumn + ReflowPrefix.size() +
284 encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
287 Style.TabWidth, Encoding) <=
291 return Split(StringRef::npos, 0);
294 const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
295 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
298 static bool mayReflowContent(StringRef Content) {
299 Content = Content.trim(Blanks);
300 // Lines starting with '@' commonly have special meaning.
301 static const SmallVector<StringRef, 4> kSpecialMeaningPrefixes = {
302 "@", "TODO", "FIXME", "XXX"};
303 bool hasSpecialMeaningPrefix = false;
304 for (StringRef Prefix : kSpecialMeaningPrefixes) {
305 if (Content.startswith(Prefix)) {
306 hasSpecialMeaningPrefix = true;
310 // Simple heuristic for what to reflow: content should contain at least two
311 // characters and either the first or second character must be
313 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
314 !Content.endswith("\\") &&
315 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
316 // true, then the first code point must be 1 byte long.
317 (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
320 BreakableBlockComment::BreakableBlockComment(
321 const FormatToken &Token, unsigned StartColumn,
322 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
323 encoding::Encoding Encoding, const FormatStyle &Style)
324 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
325 assert(Tok.is(TT_BlockComment) &&
326 "block comment section must start with a block comment");
328 StringRef TokenText(Tok.TokenText);
329 assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
330 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
332 int IndentDelta = StartColumn - OriginalStartColumn;
333 Content.resize(Lines.size());
334 Content[0] = Lines[0];
335 ContentColumn.resize(Lines.size());
336 // Account for the initial '/*'.
337 ContentColumn[0] = StartColumn + 2;
338 Tokens.resize(Lines.size());
339 for (size_t i = 1; i < Lines.size(); ++i)
340 adjustWhitespace(i, IndentDelta);
342 // Align decorations with the column of the star on the first line,
343 // that is one column after the start "/*".
344 DecorationColumn = StartColumn + 1;
346 // Account for comment decoration patterns like this:
351 if (Lines.size() >= 2 && Content[1].startswith("**") &&
352 static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
353 DecorationColumn = StartColumn;
357 if (Lines.size() == 1 && !FirstInLine) {
358 // Comments for which FirstInLine is false can start on arbitrary column,
359 // and available horizontal space can be too small to align consecutive
360 // lines with the first one.
361 // FIXME: We could, probably, align them to current indentation level, but
362 // now we just wrap them without stars.
365 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
366 // If the last line is empty, the closing "*/" will have a star.
367 if (i + 1 == e && Content[i].empty())
369 if (!Content[i].empty() && i + 1 != e &&
370 Decoration.startswith(Content[i]))
372 while (!Content[i].startswith(Decoration))
373 Decoration = Decoration.substr(0, Decoration.size() - 1);
376 LastLineNeedsDecoration = true;
377 IndentAtLineBreak = ContentColumn[0] + 1;
378 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
379 if (Content[i].empty()) {
381 // Empty last line means that we already have a star as a part of the
382 // trailing */. We also need to preserve whitespace, so that */ is
383 // correctly indented.
384 LastLineNeedsDecoration = false;
385 // Align the star in the last '*/' with the stars on the previous lines.
386 if (e >= 2 && !Decoration.empty()) {
387 ContentColumn[i] = DecorationColumn;
389 } else if (Decoration.empty()) {
390 // For all other lines, set the start column to 0 if they're empty, so
391 // we do not insert trailing whitespace anywhere.
392 ContentColumn[i] = 0;
397 // The first line already excludes the star.
398 // The last line excludes the star if LastLineNeedsDecoration is false.
399 // For all other lines, adjust the line to exclude the star and
400 // (optionally) the first whitespace.
401 unsigned DecorationSize = Decoration.startswith(Content[i])
404 if (DecorationSize) {
405 ContentColumn[i] = DecorationColumn + DecorationSize;
407 Content[i] = Content[i].substr(DecorationSize);
408 if (!Decoration.startswith(Content[i]))
410 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
413 std::max<unsigned>(IndentAtLineBreak, Decoration.size());
416 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
417 for (size_t i = 0; i < Lines.size(); ++i) {
418 llvm::dbgs() << i << " |" << Content[i] << "| "
419 << "CC=" << ContentColumn[i] << "| "
420 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
425 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
427 // When in a preprocessor directive, the trailing backslash in a block comment
428 // is not needed, but can serve a purpose of uniformity with necessary escaped
429 // newlines outside the comment. In this case we remove it here before
430 // trimming the trailing whitespace. The backslash will be re-added later when
431 // inserting a line break.
432 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
433 if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
436 // Calculate the end of the non-whitespace text in the previous line.
438 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
439 if (EndOfPreviousLine == StringRef::npos)
440 EndOfPreviousLine = 0;
443 // Calculate the start of the non-whitespace text in the current line.
444 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
445 if (StartOfLine == StringRef::npos)
446 StartOfLine = Lines[LineIndex].rtrim("\r\n").size();
448 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
449 // Adjust Lines to only contain relevant text.
450 size_t PreviousContentOffset =
451 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
452 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
453 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
454 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
456 // Adjust the start column uniformly across all lines.
457 ContentColumn[LineIndex] =
458 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
462 unsigned BreakableBlockComment::getLineLengthAfterSplit(
463 unsigned LineIndex, unsigned TailOffset,
464 StringRef::size_type Length) const {
465 unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
466 unsigned LineLength =
467 ContentStartColumn + encoding::columnWidthWithTabs(
468 Content[LineIndex].substr(TailOffset, Length),
469 ContentStartColumn, Style.TabWidth, Encoding);
470 // The last line gets a "*/" postfix.
471 if (LineIndex + 1 == Lines.size()) {
473 // We never need a decoration when breaking just the trailing "*/" postfix.
474 // Note that checking that Length == 0 is not enough, since Length could
475 // also be StringRef::npos.
476 if (Content[LineIndex].substr(TailOffset, Length).empty()) {
477 LineLength -= Decoration.size();
483 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
485 WhitespaceManager &Whitespaces) {
486 StringRef Text = Content[LineIndex].substr(TailOffset);
487 StringRef Prefix = Decoration;
488 // We need this to account for the case when we have a decoration "* " for all
489 // the lines except for the last one, where the star in "*/" acts as a
491 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
492 if (LineIndex + 1 == Lines.size() &&
493 Text.size() == Split.first + Split.second) {
494 // For the last line we need to break before "*/", but not to add "* ".
496 if (LocalIndentAtLineBreak >= 2)
497 LocalIndentAtLineBreak -= 2;
499 // The split offset is from the beginning of the line. Convert it to an offset
500 // from the beginning of the token text.
501 unsigned BreakOffsetInToken =
502 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
503 unsigned CharsToRemove = Split.second;
504 assert(LocalIndentAtLineBreak >= Prefix.size());
505 Whitespaces.replaceWhitespaceInToken(
506 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,
507 InPPDirective, /*Newlines=*/1,
508 /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
511 BreakableToken::Split BreakableBlockComment::getSplitBefore(
513 unsigned PreviousEndColumn,
514 unsigned ColumnLimit,
515 llvm::Regex &CommentPragmasRegex) const {
516 if (!mayReflow(LineIndex, CommentPragmasRegex))
517 return Split(StringRef::npos, 0);
518 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
519 return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
523 unsigned BreakableBlockComment::getReflownColumn(
526 unsigned PreviousEndColumn) const {
527 unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
528 // If this is the last line, it will carry around its '*/' postfix.
529 unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
530 // The line is composed of previous text, reflow prefix, reflown text and
532 unsigned ReflownColumn =
533 StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
534 Style.TabWidth, Encoding) +
536 return ReflownColumn;
539 unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
540 unsigned LineIndex, unsigned TailOffset,
541 unsigned PreviousEndColumn,
542 unsigned ColumnLimit,
543 Split SplitBefore) const {
544 if (SplitBefore.first == StringRef::npos ||
545 // Block comment line contents contain the trailing whitespace after the
546 // decoration, so the need of left trim. Note that this behavior is
547 // consistent with the breaking of block comments where the indentation of
548 // a broken line is uniform across all the lines of the block comment.
549 SplitBefore.first + SplitBefore.second <
550 Content[LineIndex].ltrim().size()) {
551 // A piece of line, not the whole, gets reflown.
552 return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
554 // The whole line gets reflown, need to check if we need to insert a break
555 // for the postfix or not.
556 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
557 unsigned ReflownColumn =
558 getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
559 if (ReflownColumn <= ColumnLimit) {
560 return ReflownColumn;
562 return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
565 void BreakableBlockComment::replaceWhitespaceBefore(
566 unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
567 Split SplitBefore, WhitespaceManager &Whitespaces) {
568 if (LineIndex == 0) return;
569 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
570 if (SplitBefore.first != StringRef::npos) {
571 // Here we need to reflow.
572 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
573 "Reflowing whitespace within a token");
574 // This is the offset of the end of the last line relative to the start of
575 // the token text in the token.
576 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
577 Content[LineIndex - 1].size() -
578 tokenAt(LineIndex).TokenText.data();
579 unsigned WhitespaceLength = TrimmedContent.data() -
580 tokenAt(LineIndex).TokenText.data() -
581 WhitespaceOffsetInToken;
582 Whitespaces.replaceWhitespaceInToken(
583 tokenAt(LineIndex), WhitespaceOffsetInToken,
584 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
585 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
587 // Check if we need to also insert a break at the whitespace range.
588 // For this we first adapt the reflow split relative to the beginning of the
590 // Note that we don't need a penalty for this break, since it doesn't change
591 // the total number of lines.
592 Split BreakSplit = SplitBefore;
593 BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
594 unsigned ReflownColumn =
595 getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
596 if (ReflownColumn > ColumnLimit) {
597 insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
602 // Here no reflow with the previous line will happen.
603 // Fix the decoration of the line at LineIndex.
604 StringRef Prefix = Decoration;
605 if (Content[LineIndex].empty()) {
606 if (LineIndex + 1 == Lines.size()) {
607 if (!LastLineNeedsDecoration) {
608 // If the last line was empty, we don't need a prefix, as the */ will
609 // line up with the decoration (if it exists).
612 } else if (!Decoration.empty()) {
613 // For other empty lines, if we do have a decoration, adapt it to not
614 // contain a trailing whitespace.
615 Prefix = Prefix.substr(0, 1);
618 if (ContentColumn[LineIndex] == 1) {
619 // This line starts immediately after the decorating *.
620 Prefix = Prefix.substr(0, 1);
623 // This is the offset of the end of the last line relative to the start of the
624 // token text in the token.
625 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
626 Content[LineIndex - 1].size() -
627 tokenAt(LineIndex).TokenText.data();
628 unsigned WhitespaceLength = Content[LineIndex].data() -
629 tokenAt(LineIndex).TokenText.data() -
630 WhitespaceOffsetInToken;
631 Whitespaces.replaceWhitespaceInToken(
632 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
633 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
636 bool BreakableBlockComment::mayReflow(unsigned LineIndex,
637 llvm::Regex &CommentPragmasRegex) const {
638 // Content[LineIndex] may exclude the indent after the '*' decoration. In that
639 // case, we compute the start of the comment pragma manually.
640 StringRef IndentContent = Content[LineIndex];
641 if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
642 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
644 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
645 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
646 !switchesFormatting(tokenAt(LineIndex));
650 BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
651 unsigned TailOffset) const {
652 // If we break, we always break at the predefined indent.
654 return IndentAtLineBreak;
655 return std::max(0, ContentColumn[LineIndex]);
658 BreakableLineCommentSection::BreakableLineCommentSection(
659 const FormatToken &Token, unsigned StartColumn,
660 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
661 encoding::Encoding Encoding, const FormatStyle &Style)
662 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
663 assert(Tok.is(TT_LineComment) &&
664 "line comment section must start with a line comment");
665 FormatToken *LineTok = nullptr;
666 for (const FormatToken *CurrentTok = &Tok;
667 CurrentTok && CurrentTok->is(TT_LineComment);
668 CurrentTok = CurrentTok->Next) {
669 LastLineTok = LineTok;
670 StringRef TokenText(CurrentTok->TokenText);
671 assert(TokenText.startswith("//"));
672 size_t FirstLineIndex = Lines.size();
673 TokenText.split(Lines, "\n");
674 Content.resize(Lines.size());
675 ContentColumn.resize(Lines.size());
676 OriginalContentColumn.resize(Lines.size());
677 Tokens.resize(Lines.size());
678 Prefix.resize(Lines.size());
679 OriginalPrefix.resize(Lines.size());
680 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
681 // We need to trim the blanks in case this is not the first line in a
682 // multiline comment. Then the indent is included in Lines[i].
683 StringRef IndentPrefix =
684 getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
685 assert(IndentPrefix.startswith("//"));
686 OriginalPrefix[i] = Prefix[i] = IndentPrefix;
687 if (Lines[i].size() > Prefix[i].size() &&
688 isAlphanumeric(Lines[i][Prefix[i].size()])) {
689 if (Prefix[i] == "//")
691 else if (Prefix[i] == "///")
693 else if (Prefix[i] == "//!")
698 Content[i] = Lines[i].substr(IndentPrefix.size());
699 OriginalContentColumn[i] =
701 encoding::columnWidthWithTabs(OriginalPrefix[i],
707 encoding::columnWidthWithTabs(Prefix[i],
712 // Calculate the end of the non-whitespace text in this line.
713 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
714 if (EndOfLine == StringRef::npos)
715 EndOfLine = Content[i].size();
718 Content[i] = Content[i].substr(0, EndOfLine);
720 LineTok = CurrentTok->Next;
721 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
722 // A line comment section needs to broken by a line comment that is
723 // preceded by at least two newlines. Note that we put this break here
724 // instead of breaking at a previous stage during parsing, since that
725 // would split the contents of the enum into two unwrapped lines in this
726 // example, which is undesirable:
728 // a, // comment about a
730 // // comment about b
734 // FIXME: Consider putting separate line comment sections as children to
735 // the unwrapped line instead.
741 unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
742 unsigned LineIndex, unsigned TailOffset,
743 StringRef::size_type Length) const {
744 unsigned ContentStartColumn =
745 (TailOffset == 0 ? ContentColumn[LineIndex]
746 : OriginalContentColumn[LineIndex]);
747 return ContentStartColumn + encoding::columnWidthWithTabs(
748 Content[LineIndex].substr(TailOffset, Length),
749 ContentStartColumn, Style.TabWidth, Encoding);
752 void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
753 unsigned TailOffset, Split Split,
754 WhitespaceManager &Whitespaces) {
755 StringRef Text = Content[LineIndex].substr(TailOffset);
756 // Compute the offset of the split relative to the beginning of the token
758 unsigned BreakOffsetInToken =
759 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
760 unsigned CharsToRemove = Split.second;
761 // Compute the size of the new indent, including the size of the new prefix of
762 // the newly broken line.
763 unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
764 Prefix[LineIndex].size() -
765 OriginalPrefix[LineIndex].size();
766 assert(IndentAtLineBreak >= Prefix[LineIndex].size());
767 Whitespaces.replaceWhitespaceInToken(
768 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
769 Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
770 /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
773 BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
774 unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
775 llvm::Regex &CommentPragmasRegex) const {
776 if (!mayReflow(LineIndex, CommentPragmasRegex))
777 return Split(StringRef::npos, 0);
778 return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
782 unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
783 unsigned LineIndex, unsigned TailOffset,
784 unsigned PreviousEndColumn,
785 unsigned ColumnLimit,
786 Split SplitBefore) const {
787 if (SplitBefore.first == StringRef::npos ||
788 SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
789 // A piece of line, not the whole line, gets reflown.
790 return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
792 // The whole line gets reflown.
793 unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
794 return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
801 void BreakableLineCommentSection::replaceWhitespaceBefore(
802 unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
803 Split SplitBefore, WhitespaceManager &Whitespaces) {
804 // If this is the first line of a token, we need to inform Whitespace Manager
805 // about it: either adapt the whitespace range preceding it, or mark it as an
806 // untouchable token.
807 // This happens for instance here:
810 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
811 if (SplitBefore.first != StringRef::npos) {
812 // Reflow happens between tokens. Replace the whitespace between the
813 // tokens by the empty string.
814 Whitespaces.replaceWhitespace(
815 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
816 /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
817 // Replace the indent and prefix of the token with the reflow prefix.
818 unsigned WhitespaceLength =
819 Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
820 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
822 /*ReplaceChars=*/WhitespaceLength,
823 /*PreviousPostfix=*/"",
824 /*CurrentPrefix=*/ReflowPrefix,
825 /*InPPDirective=*/false,
829 // This is the first line for the current token, but no reflow with the
830 // previous token is necessary. However, we still may need to adjust the
831 // start column. Note that ContentColumn[LineIndex] is the expected
832 // content column after a possible update to the prefix, hence the prefix
833 // length change is included.
834 unsigned LineColumn =
835 ContentColumn[LineIndex] -
836 (Content[LineIndex].data() - Lines[LineIndex].data()) +
837 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
839 // We always want to create a replacement instead of adding an untouchable
840 // token, even if LineColumn is the same as the original column of the
841 // token. This is because WhitespaceManager doesn't align trailing
842 // comments if they are untouchable.
843 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
845 /*Spaces=*/LineColumn,
846 /*StartOfTokenColumn=*/LineColumn,
847 /*InPPDirective=*/false);
850 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
851 // Adjust the prefix if necessary.
853 // Take care of the space possibly introduced after a decoration.
854 assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
855 "Expecting a line comment prefix to differ from original by at most "
857 Whitespaces.replaceWhitespaceInToken(
858 tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
859 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
861 // Add a break after a reflow split has been introduced, if necessary.
862 // Note that this break doesn't need to be penalized, since it doesn't change
863 // the number of lines.
864 if (SplitBefore.first != StringRef::npos &&
865 SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
866 insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
870 void BreakableLineCommentSection::updateNextToken(LineState& State) const {
872 State.NextToken = LastLineTok->Next;
876 bool BreakableLineCommentSection::mayReflow(
877 unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
878 // Line comments have the indent as part of the prefix, so we need to
879 // recompute the start of the line.
880 StringRef IndentContent = Content[LineIndex];
881 if (Lines[LineIndex].startswith("//")) {
882 IndentContent = Lines[LineIndex].substr(2);
884 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
885 mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
886 !switchesFormatting(tokenAt(LineIndex)) &&
887 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
891 BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
892 unsigned TailOffset) const {
893 if (TailOffset != 0) {
894 return OriginalContentColumn[LineIndex];
896 return ContentColumn[LineIndex];
899 } // namespace format