contrib/llvm/lib/MC/MCParser/AsmLexer.cpp

   1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This class implements the lexer for assembly files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/MC/MCParser/AsmLexer.h"
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/StringExtras.h"
  18 #include "llvm/ADT/StringRef.h"
  19 #include "llvm/ADT/StringSwitch.h"
  20 #include "llvm/MC/MCAsmInfo.h"
  21 #include "llvm/MC/MCParser/MCAsmLexer.h"
  22 #include "llvm/Support/SMLoc.h"
  23 #include "llvm/Support/SaveAndRestore.h"
  24 #include <cassert>
  25 #include <cctype>
  26 #include <cstdio>
  27 #include <cstring>
  28 #include <string>
  29 #include <tuple>
  30 #include <utility>
  31
  32 using namespace llvm;
  33
  34 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
  35   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
  36 }
  37
  38 AsmLexer::~AsmLexer() = default;
  39
  40 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
  41   CurBuf = Buf;
  42
  43   if (ptr)
  44     CurPtr = ptr;
  45   else
  46     CurPtr = CurBuf.begin();
  47
  48   TokStart = nullptr;
  49 }
  50
  51 /// ReturnError - Set the error to the specified string at the specified
  52 /// location.  This is defined to always return AsmToken::Error.
  53 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
  54   SetError(SMLoc::getFromPointer(Loc), Msg);
  55
  56   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
  57 }
  58
  59 int AsmLexer::getNextChar() {
  60   if (CurPtr == CurBuf.end())
  61     return EOF;
  62   return (unsigned char)*CurPtr++;
  63 }
  64
  65 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
  66 ///
  67 /// The leading integral digit sequence and dot should have already been
  68 /// consumed, some or all of the fractional digit sequence *can* have been
  69 /// consumed.
  70 AsmToken AsmLexer::LexFloatLiteral() {
  71   // Skip the fractional digit sequence.
  72   while (isDigit(*CurPtr))
  73     ++CurPtr;
  74
  75   // Check for exponent; we intentionally accept a slighlty wider set of
  76   // literals here and rely on the upstream client to reject invalid ones (e.g.,
  77   // "1e+").
  78   if (*CurPtr == 'e' || *CurPtr == 'E') {
  79     ++CurPtr;
  80     if (*CurPtr == '-' || *CurPtr == '+')
  81       ++CurPtr;
  82     while (isDigit(*CurPtr))
  83       ++CurPtr;
  84   }
  85
  86   return AsmToken(AsmToken::Real,
  87                   StringRef(TokStart, CurPtr - TokStart));
  88 }
  89
  90 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
  91 /// while making sure there are enough actual digits around for the constant to
  92 /// be valid.
  93 ///
  94 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
  95 /// before we get here.
  96 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
  97   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
  98          "unexpected parse state in floating hex");
  99   bool NoFracDigits = true;
 100
 101   // Skip the fractional part if there is one
 102   if (*CurPtr == '.') {
 103     ++CurPtr;
 104
 105     const char *FracStart = CurPtr;
 106     while (isHexDigit(*CurPtr))
 107       ++CurPtr;
 108
 109     NoFracDigits = CurPtr == FracStart;
 110   }
 111
 112   if (NoIntDigits && NoFracDigits)
 113     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 114                                  "expected at least one significand digit");
 115
 116   // Make sure we do have some kind of proper exponent part
 117   if (*CurPtr != 'p' && *CurPtr != 'P')
 118     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 119                                  "expected exponent part 'p'");
 120   ++CurPtr;
 121
 122   if (*CurPtr == '+' || *CurPtr == '-')
 123     ++CurPtr;
 124
 125   // N.b. exponent digits are *not* hex
 126   const char *ExpStart = CurPtr;
 127   while (isDigit(*CurPtr))
 128     ++CurPtr;
 129
 130   if (CurPtr == ExpStart)
 131     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 132                                  "expected at least one exponent digit");
 133
 134   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
 135 }
 136
 137 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
 138 static bool IsIdentifierChar(char c, bool AllowAt) {
 139   return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
 140          (c == '@' && AllowAt) || c == '?';
 141 }
 142
 143 AsmToken AsmLexer::LexIdentifier() {
 144   // Check for floating point literals.
 145   if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
 146     // Disambiguate a .1243foo identifier from a floating literal.
 147     while (isDigit(*CurPtr))
 148       ++CurPtr;
 149     if (*CurPtr == 'e' || *CurPtr == 'E' ||
 150         !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
 151       return LexFloatLiteral();
 152   }
 153
 154   while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
 155     ++CurPtr;
 156
 157   // Handle . as a special case.
 158   if (CurPtr == TokStart+1 && TokStart[0] == '.')
 159     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
 160
 161   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 162 }
 163
 164 /// LexSlash: Slash: /
 165 ///           C-Style Comment: /* ... */
 166 AsmToken AsmLexer::LexSlash() {
 167   switch (*CurPtr) {
 168   case '*':
 169     IsAtStartOfStatement = false;
 170     break; // C style comment.
 171   case '/':
 172     ++CurPtr;
 173     return LexLineComment();
 174   default:
 175     IsAtStartOfStatement = false;
 176     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
 177   }
 178
 179   // C Style comment.
 180   ++CurPtr;  // skip the star.
 181   const char *CommentTextStart = CurPtr;
 182   while (CurPtr != CurBuf.end()) {
 183     switch (*CurPtr++) {
 184     case '*':
 185       // End of the comment?
 186       if (*CurPtr != '/')
 187         break;
 188       // If we have a CommentConsumer, notify it about the comment.
 189       if (CommentConsumer) {
 190         CommentConsumer->HandleComment(
 191             SMLoc::getFromPointer(CommentTextStart),
 192             StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
 193       }
 194       ++CurPtr;   // End the */.
 195       return AsmToken(AsmToken::Comment,
 196                       StringRef(TokStart, CurPtr - TokStart));
 197     }
 198   }
 199   return ReturnError(TokStart, "unterminated comment");
 200 }
 201
 202 /// LexLineComment: Comment: #[^\n]*
 203 ///                        : //[^\n]*
 204 AsmToken AsmLexer::LexLineComment() {
 205   // Mark This as an end of statement with a body of the
 206   // comment. While it would be nicer to leave this two tokens,
 207   // backwards compatability with TargetParsers makes keeping this in this form
 208   // better.
 209   const char *CommentTextStart = CurPtr;
 210   int CurChar = getNextChar();
 211   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
 212     CurChar = getNextChar();
 213   if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
 214     ++CurPtr;
 215
 216   // If we have a CommentConsumer, notify it about the comment.
 217   if (CommentConsumer) {
 218     CommentConsumer->HandleComment(
 219         SMLoc::getFromPointer(CommentTextStart),
 220         StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
 221   }
 222
 223   IsAtStartOfLine = true;
 224   // This is a whole line comment. leave newline
 225   if (IsAtStartOfStatement)
 226     return AsmToken(AsmToken::EndOfStatement,
 227                     StringRef(TokStart, CurPtr - TokStart));
 228   IsAtStartOfStatement = true;
 229
 230   return AsmToken(AsmToken::EndOfStatement,
 231                   StringRef(TokStart, CurPtr - 1 - TokStart));
 232 }
 233
 234 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 235   // Skip ULL, UL, U, L and LL suffices.
 236   if (CurPtr[0] == 'U')
 237     ++CurPtr;
 238   if (CurPtr[0] == 'L')
 239     ++CurPtr;
 240   if (CurPtr[0] == 'L')
 241     ++CurPtr;
 242 }
 243
 244 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
 245 // integer as a hexadecimal, possibly with leading zeroes.
 246 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
 247   const char *FirstHex = nullptr;
 248   const char *LookAhead = CurPtr;
 249   while (true) {
 250     if (isDigit(*LookAhead)) {
 251       ++LookAhead;
 252     } else if (isHexDigit(*LookAhead)) {
 253       if (!FirstHex)
 254         FirstHex = LookAhead;
 255       ++LookAhead;
 256     } else {
 257       break;
 258     }
 259   }
 260   bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
 261   CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
 262   if (isHex)
 263     return 16;
 264   return DefaultRadix;
 265 }
 266
 267 static AsmToken intToken(StringRef Ref, APInt &Value)
 268 {
 269   if (Value.isIntN(64))
 270     return AsmToken(AsmToken::Integer, Ref, Value);
 271   return AsmToken(AsmToken::BigNum, Ref, Value);
 272 }
 273
 274 /// LexDigit: First character is [0-9].
 275 ///   Local Label: [0-9][:]
 276 ///   Forward/Backward Label: [0-9][fb]
 277 ///   Binary integer: 0b[01]+
 278 ///   Octal integer: 0[0-7]+
 279 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
 280 ///   Decimal integer: [1-9][0-9]*
 281 AsmToken AsmLexer::LexDigit() {
 282   // MASM-flavor binary integer: [01]+[bB]
 283   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
 284   if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
 285     const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
 286                                    CurPtr - 1 : nullptr;
 287     const char *OldCurPtr = CurPtr;
 288     while (isHexDigit(*CurPtr)) {
 289       if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
 290         FirstNonBinary = CurPtr;
 291       ++CurPtr;
 292     }
 293
 294     unsigned Radix = 0;
 295     if (*CurPtr == 'h' || *CurPtr == 'H') {
 296       // hexadecimal number
 297       ++CurPtr;
 298       Radix = 16;
 299     } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
 300                (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
 301       Radix = 2;
 302
 303     if (Radix == 2 || Radix == 16) {
 304       StringRef Result(TokStart, CurPtr - TokStart);
 305       APInt Value(128, 0, true);
 306
 307       if (Result.drop_back().getAsInteger(Radix, Value))
 308         return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
 309                              "invalid hexdecimal number");
 310
 311       // MSVC accepts and ignores type suffices on integer literals.
 312       SkipIgnoredIntegerSuffix(CurPtr);
 313
 314       return intToken(Result, Value);
 315    }
 316
 317     // octal/decimal integers, or floating point numbers, fall through
 318     CurPtr = OldCurPtr;
 319   }
 320
 321   // Decimal integer: [1-9][0-9]*
 322   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
 323     unsigned Radix = doLookAhead(CurPtr, 10);
 324     bool isHex = Radix == 16;
 325     // Check for floating point literals.
 326     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
 327       ++CurPtr;
 328       return LexFloatLiteral();
 329     }
 330
 331     StringRef Result(TokStart, CurPtr - TokStart);
 332
 333     APInt Value(128, 0, true);
 334     if (Result.getAsInteger(Radix, Value))
 335       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
 336                            "invalid hexdecimal number");
 337
 338     // Consume the [bB][hH].
 339     if (Radix == 2 || Radix == 16)
 340       ++CurPtr;
 341
 342     // The darwin/x86 (and x86-64) assembler accepts and ignores type
 343     // suffices on integer literals.
 344     SkipIgnoredIntegerSuffix(CurPtr);
 345
 346     return intToken(Result, Value);
 347   }
 348
 349   if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
 350     ++CurPtr;
 351     // See if we actually have "0b" as part of something like "jmp 0b\n"
 352     if (!isDigit(CurPtr[0])) {
 353       --CurPtr;
 354       StringRef Result(TokStart, CurPtr - TokStart);
 355       return AsmToken(AsmToken::Integer, Result, 0);
 356     }
 357     const char *NumStart = CurPtr;
 358     while (CurPtr[0] == '0' || CurPtr[0] == '1')
 359       ++CurPtr;
 360
 361     // Requires at least one binary digit.
 362     if (CurPtr == NumStart)
 363       return ReturnError(TokStart, "invalid binary number");
 364
 365     StringRef Result(TokStart, CurPtr - TokStart);
 366
 367     APInt Value(128, 0, true);
 368     if (Result.substr(2).getAsInteger(2, Value))
 369       return ReturnError(TokStart, "invalid binary number");
 370
 371     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 372     // suffixes on integer literals.
 373     SkipIgnoredIntegerSuffix(CurPtr);
 374
 375     return intToken(Result, Value);
 376   }
 377
 378   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
 379     ++CurPtr;
 380     const char *NumStart = CurPtr;
 381     while (isHexDigit(CurPtr[0]))
 382       ++CurPtr;
 383
 384     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
 385     // diagnosed by LexHexFloatLiteral).
 386     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
 387       return LexHexFloatLiteral(NumStart == CurPtr);
 388
 389     // Otherwise requires at least one hex digit.
 390     if (CurPtr == NumStart)
 391       return ReturnError(CurPtr-2, "invalid hexadecimal number");
 392
 393     APInt Result(128, 0);
 394     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
 395       return ReturnError(TokStart, "invalid hexadecimal number");
 396
 397     // Consume the optional [hH].
 398     if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
 399       ++CurPtr;
 400
 401     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 402     // suffixes on integer literals.
 403     SkipIgnoredIntegerSuffix(CurPtr);
 404
 405     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
 406   }
 407
 408   // Either octal or hexadecimal.
 409   APInt Value(128, 0, true);
 410   unsigned Radix = doLookAhead(CurPtr, 8);
 411   bool isHex = Radix == 16;
 412   StringRef Result(TokStart, CurPtr - TokStart);
 413   if (Result.getAsInteger(Radix, Value))
 414     return ReturnError(TokStart, !isHex ? "invalid octal number" :
 415                        "invalid hexdecimal number");
 416
 417   // Consume the [hH].
 418   if (Radix == 16)
 419     ++CurPtr;
 420
 421   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 422   // suffixes on integer literals.
 423   SkipIgnoredIntegerSuffix(CurPtr);
 424
 425   return intToken(Result, Value);
 426 }
 427
 428 /// LexSingleQuote: Integer: 'b'
 429 AsmToken AsmLexer::LexSingleQuote() {
 430   int CurChar = getNextChar();
 431
 432   if (CurChar == '\\')
 433     CurChar = getNextChar();
 434
 435   if (CurChar == EOF)
 436     return ReturnError(TokStart, "unterminated single quote");
 437
 438   CurChar = getNextChar();
 439
 440   if (CurChar != '\'')
 441     return ReturnError(TokStart, "single quote way too long");
 442
 443   // The idea here being that 'c' is basically just an integral
 444   // constant.
 445   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
 446   long long Value;
 447
 448   if (Res.startswith("\'\\")) {
 449     char theChar = Res[2];
 450     switch (theChar) {
 451       default: Value = theChar; break;
 452       case '\'': Value = '\''; break;
 453       case 't': Value = '\t'; break;
 454       case 'n': Value = '\n'; break;
 455       case 'b': Value = '\b'; break;
 456     }
 457   } else
 458     Value = TokStart[1];
 459
 460   return AsmToken(AsmToken::Integer, Res, Value);
 461 }
 462
 463 /// LexQuote: String: "..."
 464 AsmToken AsmLexer::LexQuote() {
 465   int CurChar = getNextChar();
 466   // TODO: does gas allow multiline string constants?
 467   while (CurChar != '"') {
 468     if (CurChar == '\\') {
 469       // Allow \", etc.
 470       CurChar = getNextChar();
 471     }
 472
 473     if (CurChar == EOF)
 474       return ReturnError(TokStart, "unterminated string constant");
 475
 476     CurChar = getNextChar();
 477   }
 478
 479   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
 480 }
 481
 482 StringRef AsmLexer::LexUntilEndOfStatement() {
 483   TokStart = CurPtr;
 484
 485   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
 486          !isAtStatementSeparator(CurPtr) && // End of statement marker.
 487          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
 488     ++CurPtr;
 489   }
 490   return StringRef(TokStart, CurPtr-TokStart);
 491 }
 492
 493 StringRef AsmLexer::LexUntilEndOfLine() {
 494   TokStart = CurPtr;
 495
 496   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
 497     ++CurPtr;
 498   }
 499   return StringRef(TokStart, CurPtr-TokStart);
 500 }
 501
 502 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
 503                             bool ShouldSkipSpace) {
 504   SaveAndRestore<const char *> SavedTokenStart(TokStart);
 505   SaveAndRestore<const char *> SavedCurPtr(CurPtr);
 506   SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
 507   SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
 508   SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
 509   SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
 510   std::string SavedErr = getErr();
 511   SMLoc SavedErrLoc = getErrLoc();
 512
 513   size_t ReadCount;
 514   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
 515     AsmToken Token = LexToken();
 516
 517     Buf[ReadCount] = Token;
 518
 519     if (Token.is(AsmToken::Eof))
 520       break;
 521   }
 522
 523   SetError(SavedErrLoc, SavedErr);
 524   return ReadCount;
 525 }
 526
 527 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
 528   StringRef CommentString = MAI.getCommentString();
 529
 530   if (CommentString.size() == 1)
 531     return CommentString[0] == Ptr[0];
 532
 533   // Allow # preprocessor commments also be counted as comments for "##" cases
 534   if (CommentString[1] == '#')
 535     return CommentString[0] == Ptr[0];
 536
 537   return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
 538 }
 539
 540 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
 541   return strncmp(Ptr, MAI.getSeparatorString(),
 542                  strlen(MAI.getSeparatorString())) == 0;
 543 }
 544
 545 AsmToken AsmLexer::LexToken() {
 546   TokStart = CurPtr;
 547   // This always consumes at least one character.
 548   int CurChar = getNextChar();
 549
 550   if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
 551     // If this starts with a '#', this may be a cpp
 552     // hash directive and otherwise a line comment.
 553     AsmToken TokenBuf[2];
 554     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
 555     size_t num = peekTokens(Buf, true);
 556     // There cannot be a space preceeding this
 557     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
 558         TokenBuf[1].is(AsmToken::String)) {
 559       CurPtr = TokStart; // reset curPtr;
 560       StringRef s = LexUntilEndOfLine();
 561       UnLex(TokenBuf[1]);
 562       UnLex(TokenBuf[0]);
 563       return AsmToken(AsmToken::HashDirective, s);
 564     }
 565     return LexLineComment();
 566   }
 567
 568   if (isAtStartOfComment(TokStart))
 569     return LexLineComment();
 570
 571   if (isAtStatementSeparator(TokStart)) {
 572     CurPtr += strlen(MAI.getSeparatorString()) - 1;
 573     IsAtStartOfLine = true;
 574     IsAtStartOfStatement = true;
 575     return AsmToken(AsmToken::EndOfStatement,
 576                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
 577   }
 578
 579   // If we're missing a newline at EOF, make sure we still get an
 580   // EndOfStatement token before the Eof token.
 581   if (CurChar == EOF && !IsAtStartOfStatement) {
 582     IsAtStartOfLine = true;
 583     IsAtStartOfStatement = true;
 584     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
 585   }
 586   IsAtStartOfLine = false;
 587   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
 588   IsAtStartOfStatement = false;
 589   switch (CurChar) {
 590   default:
 591     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
 592     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
 593       return LexIdentifier();
 594
 595     // Unknown character, emit an error.
 596     return ReturnError(TokStart, "invalid character in input");
 597   case EOF:
 598     IsAtStartOfLine = true;
 599     IsAtStartOfStatement = true;
 600     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
 601   case 0:
 602   case ' ':
 603   case '\t':
 604     IsAtStartOfStatement = OldIsAtStartOfStatement;
 605     while (*CurPtr == ' ' || *CurPtr == '\t')
 606       CurPtr++;
 607     if (SkipSpace)
 608       return LexToken(); // Ignore whitespace.
 609     else
 610       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
 611   case '\r': {
 612     IsAtStartOfLine = true;
 613     IsAtStartOfStatement = true;
 614     // If this is a CR followed by LF, treat that as one token.
 615     if (CurPtr != CurBuf.end() && *CurPtr == '\n')
 616       ++CurPtr;
 617     return AsmToken(AsmToken::EndOfStatement,
 618                     StringRef(TokStart, CurPtr - TokStart));
 619   }
 620   case '\n':
 621     IsAtStartOfLine = true;
 622     IsAtStartOfStatement = true;
 623     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
 624   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
 625   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
 626   case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
 627   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
 628   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
 629   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
 630   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
 631   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
 632   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
 633   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
 634   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
 635   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
 636   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
 637   case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
 638   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
 639   case '=':
 640     if (*CurPtr == '=') {
 641       ++CurPtr;
 642       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
 643     }
 644     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
 645   case '|':
 646     if (*CurPtr == '|') {
 647       ++CurPtr;
 648       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
 649     }
 650     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
 651   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
 652   case '&':
 653     if (*CurPtr == '&') {
 654       ++CurPtr;
 655       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
 656     }
 657     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
 658   case '!':
 659     if (*CurPtr == '=') {
 660       ++CurPtr;
 661       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
 662     }
 663     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
 664   case '%':
 665     if (MAI.hasMipsExpressions()) {
 666       AsmToken::TokenKind Operator;
 667       unsigned OperatorLength;
 668
 669       std::tie(Operator, OperatorLength) =
 670           StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
 671               StringRef(CurPtr))
 672               .StartsWith("call16", {AsmToken::PercentCall16, 7})
 673               .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
 674               .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
 675               .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
 676               .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
 677               .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
 678               .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
 679               .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
 680               .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
 681               .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
 682               .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
 683               .StartsWith("got", {AsmToken::PercentGot, 4})
 684               .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
 685               .StartsWith("higher", {AsmToken::PercentHigher, 7})
 686               .StartsWith("highest", {AsmToken::PercentHighest, 8})
 687               .StartsWith("hi", {AsmToken::PercentHi, 3})
 688               .StartsWith("lo", {AsmToken::PercentLo, 3})
 689               .StartsWith("neg", {AsmToken::PercentNeg, 4})
 690               .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
 691               .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
 692               .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
 693               .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
 694               .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
 695               .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
 696               .Default({AsmToken::Percent, 1});
 697
 698       if (Operator != AsmToken::Percent) {
 699         CurPtr += OperatorLength - 1;
 700         return AsmToken(Operator, StringRef(TokStart, OperatorLength));
 701       }
 702     }
 703     return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
 704   case '/':
 705     IsAtStartOfStatement = OldIsAtStartOfStatement;
 706     return LexSlash();
 707   case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
 708   case '\'': return LexSingleQuote();
 709   case '"': return LexQuote();
 710   case '0': case '1': case '2': case '3': case '4':
 711   case '5': case '6': case '7': case '8': case '9':
 712     return LexDigit();
 713   case '<':
 714     switch (*CurPtr) {
 715     case '<':
 716       ++CurPtr;
 717       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
 718     case '=':
 719       ++CurPtr;
 720       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
 721     case '>':
 722       ++CurPtr;
 723       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
 724     default:
 725       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
 726     }
 727   case '>':
 728     switch (*CurPtr) {
 729     case '>':
 730       ++CurPtr;
 731       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
 732     case '=':
 733       ++CurPtr;
 734       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
 735     default:
 736       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
 737     }
 738
 739   // TODO: Quoted identifiers (objc methods etc)
 740   // local labels: [0-9][:]
 741   // Forward/backward labels: [0-9][fb]
 742   // Integers, fp constants, character constants.
 743   }
 744 }