contrib/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp

   1 //===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 ///
  10 /// \file
  11 /// \brief This file implements FormatTokenLexer, which tokenizes a source file
  12 /// into a FormatToken stream suitable for ClangFormat.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #include "FormatTokenLexer.h"
  17 #include "FormatToken.h"
  18 #include "clang/Basic/SourceLocation.h"
  19 #include "clang/Basic/SourceManager.h"
  20 #include "clang/Format/Format.h"
  21 #include "llvm/Support/Regex.h"
  22
  23 namespace clang {
  24 namespace format {
  25
  26 FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
  27                                    const FormatStyle &Style,
  28                                    encoding::Encoding Encoding)
  29     : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
  30       LessStashed(false), Column(0), TrailingWhitespace(0),
  31       SourceMgr(SourceMgr), ID(ID), Style(Style),
  32       IdentTable(getFormattingLangOpts(Style)), Keywords(IdentTable),
  33       Encoding(Encoding), FirstInLineIndex(0), FormattingDisabled(false),
  34       MacroBlockBeginRegex(Style.MacroBlockBegin),
  35       MacroBlockEndRegex(Style.MacroBlockEnd) {
  36   Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
  37                       getFormattingLangOpts(Style)));
  38   Lex->SetKeepWhitespaceMode(true);
  39
  40   for (const std::string &ForEachMacro : Style.ForEachMacros)
  41     ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
  42   std::sort(ForEachMacros.begin(), ForEachMacros.end());
  43 }
  44
  45 ArrayRef<FormatToken *> FormatTokenLexer::lex() {
  46   assert(Tokens.empty());
  47   assert(FirstInLineIndex == 0);
  48   do {
  49     Tokens.push_back(getNextToken());
  50     if (Style.Language == FormatStyle::LK_JavaScript) {
  51       tryParseJSRegexLiteral();
  52       tryParseTemplateString();
  53     }
  54     tryMergePreviousTokens();
  55     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
  56       FirstInLineIndex = Tokens.size() - 1;
  57   } while (Tokens.back()->Tok.isNot(tok::eof));
  58   return Tokens;
  59 }
  60
  61 void FormatTokenLexer::tryMergePreviousTokens() {
  62   if (tryMerge_TMacro())
  63     return;
  64   if (tryMergeConflictMarkers())
  65     return;
  66   if (tryMergeLessLess())
  67     return;
  68
  69   if (Style.Language == FormatStyle::LK_JavaScript) {
  70     static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
  71     static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,
  72                                                    tok::equal};
  73     static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
  74                                                   tok::greaterequal};
  75     static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
  76     // FIXME: Investigate what token type gives the correct operator priority.
  77     if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
  78       return;
  79     if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
  80       return;
  81     if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
  82       return;
  83     if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
  84       return;
  85   }
  86 }
  87
  88 bool FormatTokenLexer::tryMergeLessLess() {
  89   // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
  90   if (Tokens.size() < 3)
  91     return false;
  92
  93   bool FourthTokenIsLess = false;
  94   if (Tokens.size() > 3)
  95     FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less);
  96
  97   auto First = Tokens.end() - 3;
  98   if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
  99       First[0]->isNot(tok::less) || FourthTokenIsLess)
 100     return false;
 101
 102   // Only merge if there currently is no whitespace between the two "<".
 103   if (First[1]->WhitespaceRange.getBegin() !=
 104       First[1]->WhitespaceRange.getEnd())
 105     return false;
 106
 107   First[0]->Tok.setKind(tok::lessless);
 108   First[0]->TokenText = "<<";
 109   First[0]->ColumnWidth += 1;
 110   Tokens.erase(Tokens.end() - 2);
 111   return true;
 112 }
 113
 114 bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
 115                                       TokenType NewType) {
 116   if (Tokens.size() < Kinds.size())
 117     return false;
 118
 119   SmallVectorImpl<FormatToken *>::const_iterator First =
 120       Tokens.end() - Kinds.size();
 121   if (!First[0]->is(Kinds[0]))
 122     return false;
 123   unsigned AddLength = 0;
 124   for (unsigned i = 1; i < Kinds.size(); ++i) {
 125     if (!First[i]->is(Kinds[i]) ||
 126         First[i]->WhitespaceRange.getBegin() !=
 127             First[i]->WhitespaceRange.getEnd())
 128       return false;
 129     AddLength += First[i]->TokenText.size();
 130   }
 131   Tokens.resize(Tokens.size() - Kinds.size() + 1);
 132   First[0]->TokenText = StringRef(First[0]->TokenText.data(),
 133                                   First[0]->TokenText.size() + AddLength);
 134   First[0]->ColumnWidth += AddLength;
 135   First[0]->Type = NewType;
 136   return true;
 137 }
 138
 139 // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
 140 bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {
 141   // NB: This is not entirely correct, as an r_paren can introduce an operand
 142   // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
 143   // corner case to not matter in practice, though.
 144   return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
 145                       tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
 146                       tok::colon, tok::question, tok::tilde) ||
 147          Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
 148                       tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
 149                       tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) ||
 150          Tok->isBinaryOperator();
 151 }
 152
 153 bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
 154   if (!Prev)
 155     return true;
 156
 157   // Regex literals can only follow after prefix unary operators, not after
 158   // postfix unary operators. If the '++' is followed by a non-operand
 159   // introducing token, the slash here is the operand and not the start of a
 160   // regex.
 161   if (Prev->isOneOf(tok::plusplus, tok::minusminus))
 162     return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
 163
 164   // The previous token must introduce an operand location where regex
 165   // literals can occur.
 166   if (!precedesOperand(Prev))
 167     return false;
 168
 169   return true;
 170 }
 171
 172 // Tries to parse a JavaScript Regex literal starting at the current token,
 173 // if that begins with a slash and is in a location where JavaScript allows
 174 // regex literals. Changes the current token to a regex literal and updates
 175 // its text if successful.
 176 void FormatTokenLexer::tryParseJSRegexLiteral() {
 177   FormatToken *RegexToken = Tokens.back();
 178   if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
 179     return;
 180
 181   FormatToken *Prev = nullptr;
 182   for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
 183     // NB: Because previous pointers are not initialized yet, this cannot use
 184     // Token.getPreviousNonComment.
 185     if ((*I)->isNot(tok::comment)) {
 186       Prev = *I;
 187       break;
 188     }
 189   }
 190
 191   if (!canPrecedeRegexLiteral(Prev))
 192     return;
 193
 194   // 'Manually' lex ahead in the current file buffer.
 195   const char *Offset = Lex->getBufferLocation();
 196   const char *RegexBegin = Offset - RegexToken->TokenText.size();
 197   StringRef Buffer = Lex->getBuffer();
 198   bool InCharacterClass = false;
 199   bool HaveClosingSlash = false;
 200   for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
 201     // Regular expressions are terminated with a '/', which can only be
 202     // escaped using '\' or a character class between '[' and ']'.
 203     // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
 204     switch (*Offset) {
 205     case '\\':
 206       // Skip the escaped character.
 207       ++Offset;
 208       break;
 209     case '[':
 210       InCharacterClass = true;
 211       break;
 212     case ']':
 213       InCharacterClass = false;
 214       break;
 215     case '/':
 216       if (!InCharacterClass)
 217         HaveClosingSlash = true;
 218       break;
 219     }
 220   }
 221
 222   RegexToken->Type = TT_RegexLiteral;
 223   // Treat regex literals like other string_literals.
 224   RegexToken->Tok.setKind(tok::string_literal);
 225   RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
 226   RegexToken->ColumnWidth = RegexToken->TokenText.size();
 227
 228   resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
 229 }
 230
 231 void FormatTokenLexer::tryParseTemplateString() {
 232   FormatToken *BacktickToken = Tokens.back();
 233   if (!BacktickToken->is(tok::unknown) || BacktickToken->TokenText != "`")
 234     return;
 235
 236   // 'Manually' lex ahead in the current file buffer.
 237   const char *Offset = Lex->getBufferLocation();
 238   const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`"
 239   for (; Offset != Lex->getBuffer().end() && *Offset != '`'; ++Offset) {
 240     if (*Offset == '\\')
 241       ++Offset; // Skip the escaped character.
 242   }
 243
 244   StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1);
 245   BacktickToken->Type = TT_TemplateString;
 246   BacktickToken->Tok.setKind(tok::string_literal);
 247   BacktickToken->TokenText = LiteralText;
 248
 249   // Adjust width for potentially multiline string literals.
 250   size_t FirstBreak = LiteralText.find('\n');
 251   StringRef FirstLineText = FirstBreak == StringRef::npos
 252                                 ? LiteralText
 253                                 : LiteralText.substr(0, FirstBreak);
 254   BacktickToken->ColumnWidth = encoding::columnWidthWithTabs(
 255       FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);
 256   size_t LastBreak = LiteralText.rfind('\n');
 257   if (LastBreak != StringRef::npos) {
 258     BacktickToken->IsMultiline = true;
 259     unsigned StartColumn = 0; // The template tail spans the entire line.
 260     BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs(
 261         LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
 262         Style.TabWidth, Encoding);
 263   }
 264
 265   resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
 266 }
 267
 268 bool FormatTokenLexer::tryMerge_TMacro() {
 269   if (Tokens.size() < 4)
 270     return false;
 271   FormatToken *Last = Tokens.back();
 272   if (!Last->is(tok::r_paren))
 273     return false;
 274
 275   FormatToken *String = Tokens[Tokens.size() - 2];
 276   if (!String->is(tok::string_literal) || String->IsMultiline)
 277     return false;
 278
 279   if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
 280     return false;
 281
 282   FormatToken *Macro = Tokens[Tokens.size() - 4];
 283   if (Macro->TokenText != "_T")
 284     return false;
 285
 286   const char *Start = Macro->TokenText.data();
 287   const char *End = Last->TokenText.data() + Last->TokenText.size();
 288   String->TokenText = StringRef(Start, End - Start);
 289   String->IsFirst = Macro->IsFirst;
 290   String->LastNewlineOffset = Macro->LastNewlineOffset;
 291   String->WhitespaceRange = Macro->WhitespaceRange;
 292   String->OriginalColumn = Macro->OriginalColumn;
 293   String->ColumnWidth = encoding::columnWidthWithTabs(
 294       String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
 295   String->NewlinesBefore = Macro->NewlinesBefore;
 296   String->HasUnescapedNewline = Macro->HasUnescapedNewline;
 297
 298   Tokens.pop_back();
 299   Tokens.pop_back();
 300   Tokens.pop_back();
 301   Tokens.back() = String;
 302   return true;
 303 }
 304
 305 bool FormatTokenLexer::tryMergeConflictMarkers() {
 306   if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
 307     return false;
 308
 309   // Conflict lines look like:
 310   // <marker> <text from the vcs>
 311   // For example:
 312   // >>>>>>> /file/in/file/system at revision 1234
 313   //
 314   // We merge all tokens in a line that starts with a conflict marker
 315   // into a single token with a special token type that the unwrapped line
 316   // parser will use to correctly rebuild the underlying code.
 317
 318   FileID ID;
 319   // Get the position of the first token in the line.
 320   unsigned FirstInLineOffset;
 321   std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
 322       Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
 323   StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
 324   // Calculate the offset of the start of the current line.
 325   auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
 326   if (LineOffset == StringRef::npos) {
 327     LineOffset = 0;
 328   } else {
 329     ++LineOffset;
 330   }
 331
 332   auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
 333   StringRef LineStart;
 334   if (FirstSpace == StringRef::npos) {
 335     LineStart = Buffer.substr(LineOffset);
 336   } else {
 337     LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
 338   }
 339
 340   TokenType Type = TT_Unknown;
 341   if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
 342     Type = TT_ConflictStart;
 343   } else if (LineStart == "|||||||" || LineStart == "=======" ||
 344              LineStart == "====") {
 345     Type = TT_ConflictAlternative;
 346   } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
 347     Type = TT_ConflictEnd;
 348   }
 349
 350   if (Type != TT_Unknown) {
 351     FormatToken *Next = Tokens.back();
 352
 353     Tokens.resize(FirstInLineIndex + 1);
 354     // We do not need to build a complete token here, as we will skip it
 355     // during parsing anyway (as we must not touch whitespace around conflict
 356     // markers).
 357     Tokens.back()->Type = Type;
 358     Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
 359
 360     Tokens.push_back(Next);
 361     return true;
 362   }
 363
 364   return false;
 365 }
 366
 367 FormatToken *FormatTokenLexer::getStashedToken() {
 368   // Create a synthesized second '>' or '<' token.
 369   Token Tok = FormatTok->Tok;
 370   StringRef TokenText = FormatTok->TokenText;
 371
 372   unsigned OriginalColumn = FormatTok->OriginalColumn;
 373   FormatTok = new (Allocator.Allocate()) FormatToken;
 374   FormatTok->Tok = Tok;
 375   SourceLocation TokLocation =
 376       FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);
 377   FormatTok->Tok.setLocation(TokLocation);
 378   FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);
 379   FormatTok->TokenText = TokenText;
 380   FormatTok->ColumnWidth = 1;
 381   FormatTok->OriginalColumn = OriginalColumn + 1;
 382
 383   return FormatTok;
 384 }
 385
 386 FormatToken *FormatTokenLexer::getNextToken() {
 387   if (GreaterStashed) {
 388     GreaterStashed = false;
 389     return getStashedToken();
 390   }
 391   if (LessStashed) {
 392     LessStashed = false;
 393     return getStashedToken();
 394   }
 395
 396   FormatTok = new (Allocator.Allocate()) FormatToken;
 397   readRawToken(*FormatTok);
 398   SourceLocation WhitespaceStart =
 399       FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
 400   FormatTok->IsFirst = IsFirstToken;
 401   IsFirstToken = false;
 402
 403   // Consume and record whitespace until we find a significant token.
 404   unsigned WhitespaceLength = TrailingWhitespace;
 405   while (FormatTok->Tok.is(tok::unknown)) {
 406     StringRef Text = FormatTok->TokenText;
 407     auto EscapesNewline = [&](int pos) {
 408       // A '\r' here is just part of '\r\n'. Skip it.
 409       if (pos >= 0 && Text[pos] == '\r')
 410         --pos;
 411       // See whether there is an odd number of '\' before this.
 412       unsigned count = 0;
 413       for (; pos >= 0; --pos, ++count)
 414         if (Text[pos] != '\\')
 415           break;
 416       return count & 1;
 417     };
 418     // FIXME: This miscounts tok:unknown tokens that are not just
 419     // whitespace, e.g. a '`' character.
 420     for (int i = 0, e = Text.size(); i != e; ++i) {
 421       switch (Text[i]) {
 422       case '\n':
 423         ++FormatTok->NewlinesBefore;
 424         FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
 425         FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
 426         Column = 0;
 427         break;
 428       case '\r':
 429         FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
 430         Column = 0;
 431         break;
 432       case '\f':
 433       case '\v':
 434         Column = 0;
 435         break;
 436       case ' ':
 437         ++Column;
 438         break;
 439       case '\t':
 440         Column += Style.TabWidth - Column % Style.TabWidth;
 441         break;
 442       case '\\':
 443         if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
 444           FormatTok->Type = TT_ImplicitStringLiteral;
 445         break;
 446       default:
 447         FormatTok->Type = TT_ImplicitStringLiteral;
 448         break;
 449       }
 450       if (FormatTok->Type == TT_ImplicitStringLiteral)
 451         break;
 452     }
 453
 454     if (FormatTok->is(TT_ImplicitStringLiteral))
 455       break;
 456     WhitespaceLength += FormatTok->Tok.getLength();
 457
 458     readRawToken(*FormatTok);
 459   }
 460
 461   // In case the token starts with escaped newlines, we want to
 462   // take them into account as whitespace - this pattern is quite frequent
 463   // in macro definitions.
 464   // FIXME: Add a more explicit test.
 465   while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
 466          FormatTok->TokenText[1] == '\n') {
 467     ++FormatTok->NewlinesBefore;
 468     WhitespaceLength += 2;
 469     FormatTok->LastNewlineOffset = 2;
 470     Column = 0;
 471     FormatTok->TokenText = FormatTok->TokenText.substr(2);
 472   }
 473
 474   FormatTok->WhitespaceRange = SourceRange(
 475       WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
 476
 477   FormatTok->OriginalColumn = Column;
 478
 479   TrailingWhitespace = 0;
 480   if (FormatTok->Tok.is(tok::comment)) {
 481     // FIXME: Add the trimmed whitespace to Column.
 482     StringRef UntrimmedText = FormatTok->TokenText;
 483     FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
 484     TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
 485   } else if (FormatTok->Tok.is(tok::raw_identifier)) {
 486     IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
 487     FormatTok->Tok.setIdentifierInfo(&Info);
 488     FormatTok->Tok.setKind(Info.getTokenID());
 489     if (Style.Language == FormatStyle::LK_Java &&
 490         FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
 491                            tok::kw_operator)) {
 492       FormatTok->Tok.setKind(tok::identifier);
 493       FormatTok->Tok.setIdentifierInfo(nullptr);
 494     } else if (Style.Language == FormatStyle::LK_JavaScript &&
 495                FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
 496                                   tok::kw_operator)) {
 497       FormatTok->Tok.setKind(tok::identifier);
 498       FormatTok->Tok.setIdentifierInfo(nullptr);
 499     }
 500   } else if (FormatTok->Tok.is(tok::greatergreater)) {
 501     FormatTok->Tok.setKind(tok::greater);
 502     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
 503     GreaterStashed = true;
 504   } else if (FormatTok->Tok.is(tok::lessless)) {
 505     FormatTok->Tok.setKind(tok::less);
 506     FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
 507     LessStashed = true;
 508   }
 509
 510   // Now FormatTok is the next non-whitespace token.
 511
 512   StringRef Text = FormatTok->TokenText;
 513   size_t FirstNewlinePos = Text.find('\n');
 514   if (FirstNewlinePos == StringRef::npos) {
 515     // FIXME: ColumnWidth actually depends on the start column, we need to
 516     // take this into account when the token is moved.
 517     FormatTok->ColumnWidth =
 518         encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
 519     Column += FormatTok->ColumnWidth;
 520   } else {
 521     FormatTok->IsMultiline = true;
 522     // FIXME: ColumnWidth actually depends on the start column, we need to
 523     // take this into account when the token is moved.
 524     FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
 525         Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
 526
 527     // The last line of the token always starts in column 0.
 528     // Thus, the length can be precomputed even in the presence of tabs.
 529     FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
 530         Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);
 531     Column = FormatTok->LastLineColumnWidth;
 532   }
 533
 534   if (Style.Language == FormatStyle::LK_Cpp) {
 535     if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
 536           Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
 537               tok::pp_define) &&
 538         std::find(ForEachMacros.begin(), ForEachMacros.end(),
 539                   FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
 540       FormatTok->Type = TT_ForEachMacro;
 541     } else if (FormatTok->is(tok::identifier)) {
 542       if (MacroBlockBeginRegex.match(Text)) {
 543         FormatTok->Type = TT_MacroBlockBegin;
 544       } else if (MacroBlockEndRegex.match(Text)) {
 545         FormatTok->Type = TT_MacroBlockEnd;
 546       }
 547     }
 548   }
 549
 550   return FormatTok;
 551 }
 552
 553 void FormatTokenLexer::readRawToken(FormatToken &Tok) {
 554   Lex->LexFromRawLexer(Tok.Tok);
 555   Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
 556                             Tok.Tok.getLength());
 557   // For formatting, treat unterminated string literals like normal string
 558   // literals.
 559   if (Tok.is(tok::unknown)) {
 560     if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
 561       Tok.Tok.setKind(tok::string_literal);
 562       Tok.IsUnterminatedLiteral = true;
 563     } else if (Style.Language == FormatStyle::LK_JavaScript &&
 564                Tok.TokenText == "''") {
 565       Tok.Tok.setKind(tok::string_literal);
 566     }
 567   }
 568
 569   if (Style.Language == FormatStyle::LK_JavaScript &&
 570       Tok.is(tok::char_constant)) {
 571     Tok.Tok.setKind(tok::string_literal);
 572   }
 573
 574   if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
 575                                Tok.TokenText == "/* clang-format on */")) {
 576     FormattingDisabled = false;
 577   }
 578
 579   Tok.Finalized = FormattingDisabled;
 580
 581   if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
 582                                Tok.TokenText == "/* clang-format off */")) {
 583     FormattingDisabled = true;
 584   }
 585 }
 586
 587 void FormatTokenLexer::resetLexer(unsigned Offset) {
 588   StringRef Buffer = SourceMgr.getBufferData(ID);
 589   Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
 590                       getFormattingLangOpts(Style), Buffer.begin(),
 591                       Buffer.begin() + Offset, Buffer.end()));
 592   Lex->SetKeepWhitespaceMode(true);
 593   TrailingWhitespace = 0;
 594 }
 595
 596 } // namespace format
 597 } // namespace clang