contrib/llvm/tools/clang/lib/Format/BreakableToken.cpp

   1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief Contains implementation of BreakableToken class and classes derived
  12 /// from it.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "BreakableToken.h"
  17 #include "Comments.h"
  18 #include "clang/Basic/CharInfo.h"
  19 #include "clang/Format/Format.h"
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/Support/Debug.h"
  22 #include <algorithm>
  23
  24 #define DEBUG_TYPE "format-token-breaker"
  25
  26 namespace clang {
  27 namespace format {
  28
  29 static const char *const Blanks = " \t\v\f\r";
  30 static bool IsBlank(char C) {
  31   switch (C) {
  32   case ' ':
  33   case '\t':
  34   case '\v':
  35   case '\f':
  36   case '\r':
  37     return true;
  38   default:
  39     return false;
  40   }
  41 }
  42
  43 static BreakableToken::Split getCommentSplit(StringRef Text,
  44                                              unsigned ContentStartColumn,
  45                                              unsigned ColumnLimit,
  46                                              unsigned TabWidth,
  47                                              encoding::Encoding Encoding) {
  48   if (ColumnLimit <= ContentStartColumn + 1)
  49     return BreakableToken::Split(StringRef::npos, 0);
  50
  51   unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
  52   unsigned MaxSplitBytes = 0;
  53
  54   for (unsigned NumChars = 0;
  55        NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
  56     unsigned BytesInChar =
  57         encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
  58     NumChars +=
  59         encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
  60                                       ContentStartColumn, TabWidth, Encoding);
  61     MaxSplitBytes += BytesInChar;
  62   }
  63
  64   StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
  65   if (SpaceOffset == StringRef::npos ||
  66       // Don't break at leading whitespace.
  67       Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
  68     // Make sure that we don't break at leading whitespace that
  69     // reaches past MaxSplit.
  70     StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
  71     if (FirstNonWhitespace == StringRef::npos)
  72       // If the comment is only whitespace, we cannot split.
  73       return BreakableToken::Split(StringRef::npos, 0);
  74     SpaceOffset = Text.find_first_of(
  75         Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
  76   }
  77   if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
  78     StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
  79     StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);
  80     return BreakableToken::Split(BeforeCut.size(),
  81                                  AfterCut.begin() - BeforeCut.end());
  82   }
  83   return BreakableToken::Split(StringRef::npos, 0);
  84 }
  85
  86 static BreakableToken::Split
  87 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
  88                unsigned TabWidth, encoding::Encoding Encoding) {
  89   // FIXME: Reduce unit test case.
  90   if (Text.empty())
  91     return BreakableToken::Split(StringRef::npos, 0);
  92   if (ColumnLimit <= UsedColumns)
  93     return BreakableToken::Split(StringRef::npos, 0);
  94   unsigned MaxSplit = ColumnLimit - UsedColumns;
  95   StringRef::size_type SpaceOffset = 0;
  96   StringRef::size_type SlashOffset = 0;
  97   StringRef::size_type WordStartOffset = 0;
  98   StringRef::size_type SplitPoint = 0;
  99   for (unsigned Chars = 0;;) {
 100     unsigned Advance;
 101     if (Text[0] == '\\') {
 102       Advance = encoding::getEscapeSequenceLength(Text);
 103       Chars += Advance;
 104     } else {
 105       Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
 106       Chars += encoding::columnWidthWithTabs(
 107           Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
 108     }
 109
 110     if (Chars > MaxSplit || Text.size() <= Advance)
 111       break;
 112
 113     if (IsBlank(Text[0]))
 114       SpaceOffset = SplitPoint;
 115     if (Text[0] == '/')
 116       SlashOffset = SplitPoint;
 117     if (Advance == 1 && !isAlphanumeric(Text[0]))
 118       WordStartOffset = SplitPoint;
 119
 120     SplitPoint += Advance;
 121     Text = Text.substr(Advance);
 122   }
 123
 124   if (SpaceOffset != 0)
 125     return BreakableToken::Split(SpaceOffset + 1, 0);
 126   if (SlashOffset != 0)
 127     return BreakableToken::Split(SlashOffset + 1, 0);
 128   if (WordStartOffset != 0)
 129     return BreakableToken::Split(WordStartOffset + 1, 0);
 130   if (SplitPoint != 0)
 131     return BreakableToken::Split(SplitPoint, 0);
 132   return BreakableToken::Split(StringRef::npos, 0);
 133 }
 134
 135 unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
 136
 137 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
 138     unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
 139   return StartColumn + Prefix.size() + Postfix.size() +
 140          encoding::columnWidthWithTabs(Line.substr(Offset, Length),
 141                                        StartColumn + Prefix.size(),
 142                                        Style.TabWidth, Encoding);
 143 }
 144
 145 BreakableSingleLineToken::BreakableSingleLineToken(
 146     const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
 147     StringRef Prefix, StringRef Postfix, bool InPPDirective,
 148     encoding::Encoding Encoding, const FormatStyle &Style)
 149     : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style),
 150       StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
 151   assert(Tok.TokenText.endswith(Postfix));
 152   Line = Tok.TokenText.substr(
 153       Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
 154 }
 155
 156 BreakableStringLiteral::BreakableStringLiteral(
 157     const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn,
 158     StringRef Prefix, StringRef Postfix, bool InPPDirective,
 159     encoding::Encoding Encoding, const FormatStyle &Style)
 160     : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix,
 161                                InPPDirective, Encoding, Style) {}
 162
 163 BreakableToken::Split
 164 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
 165                                  unsigned ColumnLimit) const {
 166   return getStringSplit(Line.substr(TailOffset),
 167                         StartColumn + Prefix.size() + Postfix.size(),
 168                         ColumnLimit, Style.TabWidth, Encoding);
 169 }
 170
 171 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
 172                                          unsigned TailOffset, Split Split,
 173                                          WhitespaceManager &Whitespaces) {
 174   unsigned LeadingSpaces = StartColumn;
 175   // The '@' of an ObjC string literal (@"Test") does not become part of the
 176   // string token.
 177   // FIXME: It might be a cleaner solution to merge the tokens as a
 178   // precomputation step.
 179   if (Prefix.startswith("@"))
 180     --LeadingSpaces;
 181   Whitespaces.replaceWhitespaceInToken(
 182       Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
 183       Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces);
 184 }
 185
 186 BreakableLineComment::BreakableLineComment(
 187     const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
 188     bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
 189     : BreakableSingleLineToken(Token, IndentLevel, StartColumn,
 190                                getLineCommentIndentPrefix(Token.TokenText), "",
 191                                InPPDirective, Encoding, Style) {
 192   OriginalPrefix = Prefix;
 193   if (Token.TokenText.size() > Prefix.size() &&
 194       isAlphanumeric(Token.TokenText[Prefix.size()])) {
 195     if (Prefix == "//")
 196       Prefix = "// ";
 197     else if (Prefix == "///")
 198       Prefix = "/// ";
 199     else if (Prefix == "//!")
 200       Prefix = "//! ";
 201   }
 202 }
 203
 204 BreakableToken::Split
 205 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
 206                                unsigned ColumnLimit) const {
 207   return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
 208                          ColumnLimit, Style.TabWidth, Encoding);
 209 }
 210
 211 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
 212                                        Split Split,
 213                                        WhitespaceManager &Whitespaces) {
 214   Whitespaces.replaceWhitespaceInToken(
 215       Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
 216       Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn);
 217 }
 218
 219 void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
 220                                              unsigned TailOffset, Split Split,
 221                                              WhitespaceManager &Whitespaces) {
 222   Whitespaces.replaceWhitespaceInToken(
 223       Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "",
 224       "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
 225       /*Spaces=*/1);
 226 }
 227
 228 void BreakableLineComment::replaceWhitespaceBefore(
 229     unsigned LineIndex, WhitespaceManager &Whitespaces) {
 230   if (OriginalPrefix != Prefix) {
 231     Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
 232                                          /*InPPDirective=*/false,
 233                                          /*Newlines=*/0, /*IndentLevel=*/0,
 234                                          /*Spaces=*/1);
 235   }
 236 }
 237
 238 BreakableBlockComment::BreakableBlockComment(
 239     const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
 240     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
 241     encoding::Encoding Encoding, const FormatStyle &Style)
 242     : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) {
 243   StringRef TokenText(Token.TokenText);
 244   assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
 245   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
 246
 247   int IndentDelta = StartColumn - OriginalStartColumn;
 248   LeadingWhitespace.resize(Lines.size());
 249   StartOfLineColumn.resize(Lines.size());
 250   StartOfLineColumn[0] = StartColumn + 2;
 251   for (size_t i = 1; i < Lines.size(); ++i)
 252     adjustWhitespace(i, IndentDelta);
 253
 254   Decoration = "* ";
 255   if (Lines.size() == 1 && !FirstInLine) {
 256     // Comments for which FirstInLine is false can start on arbitrary column,
 257     // and available horizontal space can be too small to align consecutive
 258     // lines with the first one.
 259     // FIXME: We could, probably, align them to current indentation level, but
 260     // now we just wrap them without stars.
 261     Decoration = "";
 262   }
 263   for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
 264     // If the last line is empty, the closing "*/" will have a star.
 265     if (i + 1 == e && Lines[i].empty())
 266       break;
 267     if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
 268       continue;
 269     while (!Lines[i].startswith(Decoration))
 270       Decoration = Decoration.substr(0, Decoration.size() - 1);
 271   }
 272
 273   LastLineNeedsDecoration = true;
 274   IndentAtLineBreak = StartOfLineColumn[0] + 1;
 275   for (size_t i = 1; i < Lines.size(); ++i) {
 276     if (Lines[i].empty()) {
 277       if (i + 1 == Lines.size()) {
 278         // Empty last line means that we already have a star as a part of the
 279         // trailing */. We also need to preserve whitespace, so that */ is
 280         // correctly indented.
 281         LastLineNeedsDecoration = false;
 282       } else if (Decoration.empty()) {
 283         // For all other lines, set the start column to 0 if they're empty, so
 284         // we do not insert trailing whitespace anywhere.
 285         StartOfLineColumn[i] = 0;
 286       }
 287       continue;
 288     }
 289
 290     // The first line already excludes the star.
 291     // For all other lines, adjust the line to exclude the star and
 292     // (optionally) the first whitespace.
 293     unsigned DecorationSize =
 294         Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
 295     StartOfLineColumn[i] += DecorationSize;
 296     Lines[i] = Lines[i].substr(DecorationSize);
 297     LeadingWhitespace[i] += DecorationSize;
 298     if (!Decoration.startswith(Lines[i]))
 299       IndentAtLineBreak =
 300           std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
 301   }
 302   IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
 303   DEBUG({
 304     llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
 305     for (size_t i = 0; i < Lines.size(); ++i) {
 306       llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
 307                    << "\n";
 308     }
 309   });
 310 }
 311
 312 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
 313                                              int IndentDelta) {
 314   // When in a preprocessor directive, the trailing backslash in a block comment
 315   // is not needed, but can serve a purpose of uniformity with necessary escaped
 316   // newlines outside the comment. In this case we remove it here before
 317   // trimming the trailing whitespace. The backslash will be re-added later when
 318   // inserting a line break.
 319   size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
 320   if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
 321     --EndOfPreviousLine;
 322
 323   // Calculate the end of the non-whitespace text in the previous line.
 324   EndOfPreviousLine =
 325       Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
 326   if (EndOfPreviousLine == StringRef::npos)
 327     EndOfPreviousLine = 0;
 328   else
 329     ++EndOfPreviousLine;
 330   // Calculate the start of the non-whitespace text in the current line.
 331   size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
 332   if (StartOfLine == StringRef::npos)
 333     StartOfLine = Lines[LineIndex].rtrim("\r\n").size();
 334
 335   StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
 336   // Adjust Lines to only contain relevant text.
 337   Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
 338   Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
 339   // Adjust LeadingWhitespace to account all whitespace between the lines
 340   // to the current line.
 341   LeadingWhitespace[LineIndex] =
 342       Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
 343
 344   // Adjust the start column uniformly across all lines.
 345   StartOfLineColumn[LineIndex] =
 346       encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
 347       IndentDelta;
 348 }
 349
 350 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
 351
 352 unsigned BreakableBlockComment::getLineLengthAfterSplit(
 353     unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
 354   unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
 355   return ContentStartColumn +
 356          encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
 357                                        ContentStartColumn, Style.TabWidth,
 358                                        Encoding) +
 359          // The last line gets a "*/" postfix.
 360          (LineIndex + 1 == Lines.size() ? 2 : 0);
 361 }
 362
 363 BreakableToken::Split
 364 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
 365                                 unsigned ColumnLimit) const {
 366   return getCommentSplit(Lines[LineIndex].substr(TailOffset),
 367                          getContentStartColumn(LineIndex, TailOffset),
 368                          ColumnLimit, Style.TabWidth, Encoding);
 369 }
 370
 371 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
 372                                         Split Split,
 373                                         WhitespaceManager &Whitespaces) {
 374   StringRef Text = Lines[LineIndex].substr(TailOffset);
 375   StringRef Prefix = Decoration;
 376   if (LineIndex + 1 == Lines.size() &&
 377       Text.size() == Split.first + Split.second) {
 378     // For the last line we need to break before "*/", but not to add "* ".
 379     Prefix = "";
 380   }
 381
 382   unsigned BreakOffsetInToken =
 383       Text.data() - Tok.TokenText.data() + Split.first;
 384   unsigned CharsToRemove = Split.second;
 385   assert(IndentAtLineBreak >= Decoration.size());
 386   Whitespaces.replaceWhitespaceInToken(
 387       Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
 388       IndentLevel, IndentAtLineBreak - Decoration.size());
 389 }
 390
 391 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
 392                                               unsigned TailOffset, Split Split,
 393                                               WhitespaceManager &Whitespaces) {
 394   StringRef Text = Lines[LineIndex].substr(TailOffset);
 395   unsigned BreakOffsetInToken =
 396       Text.data() - Tok.TokenText.data() + Split.first;
 397   unsigned CharsToRemove = Split.second;
 398   Whitespaces.replaceWhitespaceInToken(
 399       Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false,
 400       /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
 401 }
 402
 403 void BreakableBlockComment::replaceWhitespaceBefore(
 404     unsigned LineIndex, WhitespaceManager &Whitespaces) {
 405   if (LineIndex == 0)
 406     return;
 407   StringRef Prefix = Decoration;
 408   if (Lines[LineIndex].empty()) {
 409     if (LineIndex + 1 == Lines.size()) {
 410       if (!LastLineNeedsDecoration) {
 411         // If the last line was empty, we don't need a prefix, as the */ will
 412         // line up with the decoration (if it exists).
 413         Prefix = "";
 414       }
 415     } else if (!Decoration.empty()) {
 416       // For other empty lines, if we do have a decoration, adapt it to not
 417       // contain a trailing whitespace.
 418       Prefix = Prefix.substr(0, 1);
 419     }
 420   } else {
 421     if (StartOfLineColumn[LineIndex] == 1) {
 422       // This line starts immediately after the decorating *.
 423       Prefix = Prefix.substr(0, 1);
 424     }
 425   }
 426
 427   unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
 428                                      Tok.TokenText.data() -
 429                                      LeadingWhitespace[LineIndex];
 430   Whitespaces.replaceWhitespaceInToken(
 431       Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
 432       InPPDirective, 1, IndentLevel,
 433       StartOfLineColumn[LineIndex] - Prefix.size());
 434 }
 435
 436 unsigned
 437 BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
 438                                              unsigned TailOffset) const {
 439   // If we break, we always break at the predefined indent.
 440   if (TailOffset != 0)
 441     return IndentAtLineBreak;
 442   return std::max(0, StartOfLineColumn[LineIndex]);
 443 }
 444
 445 } // namespace format
 446 } // namespace clang