contrib/llvm/lib/MC/MCParser/AsmLexer.cpp

   1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This class implements the lexer for assembly files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/MC/MCParser/AsmLexer.h"
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/StringExtras.h"
  18 #include "llvm/ADT/StringRef.h"
  19 #include "llvm/ADT/StringSwitch.h"
  20 #include "llvm/MC/MCAsmInfo.h"
  21 #include "llvm/MC/MCParser/MCAsmLexer.h"
  22 #include "llvm/Support/SMLoc.h"
  23 #include "llvm/Support/SaveAndRestore.h"
  24 #include <cassert>
  25 #include <cctype>
  26 #include <cstdio>
  27 #include <cstring>
  28 #include <string>
  29 #include <tuple>
  30 #include <utility>
  31
  32 using namespace llvm;
  33
  34 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
  35   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
  36 }
  37
  38 AsmLexer::~AsmLexer() = default;
  39
  40 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
  41   CurBuf = Buf;
  42
  43   if (ptr)
  44     CurPtr = ptr;
  45   else
  46     CurPtr = CurBuf.begin();
  47
  48   TokStart = nullptr;
  49 }
  50
  51 /// ReturnError - Set the error to the specified string at the specified
  52 /// location.  This is defined to always return AsmToken::Error.
  53 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
  54   SetError(SMLoc::getFromPointer(Loc), Msg);
  55
  56   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
  57 }
  58
  59 int AsmLexer::getNextChar() {
  60   if (CurPtr == CurBuf.end())
  61     return EOF;
  62   return (unsigned char)*CurPtr++;
  63 }
  64
  65 /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
  66 ///
  67 /// The leading integral digit sequence and dot should have already been
  68 /// consumed, some or all of the fractional digit sequence *can* have been
  69 /// consumed.
  70 AsmToken AsmLexer::LexFloatLiteral() {
  71   // Skip the fractional digit sequence.
  72   while (isDigit(*CurPtr))
  73     ++CurPtr;
  74
  75   // Check for exponent; we intentionally accept a slighlty wider set of
  76   // literals here and rely on the upstream client to reject invalid ones (e.g.,
  77   // "1e+").
  78   if (*CurPtr == 'e' || *CurPtr == 'E') {
  79     ++CurPtr;
  80     if (*CurPtr == '-' || *CurPtr == '+')
  81       ++CurPtr;
  82     while (isDigit(*CurPtr))
  83       ++CurPtr;
  84   }
  85
  86   return AsmToken(AsmToken::Real,
  87                   StringRef(TokStart, CurPtr - TokStart));
  88 }
  89
  90 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
  91 /// while making sure there are enough actual digits around for the constant to
  92 /// be valid.
  93 ///
  94 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
  95 /// before we get here.
  96 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
  97   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
  98          "unexpected parse state in floating hex");
  99   bool NoFracDigits = true;
 100
 101   // Skip the fractional part if there is one
 102   if (*CurPtr == '.') {
 103     ++CurPtr;
 104
 105     const char *FracStart = CurPtr;
 106     while (isHexDigit(*CurPtr))
 107       ++CurPtr;
 108
 109     NoFracDigits = CurPtr == FracStart;
 110   }
 111
 112   if (NoIntDigits && NoFracDigits)
 113     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 114                                  "expected at least one significand digit");
 115
 116   // Make sure we do have some kind of proper exponent part
 117   if (*CurPtr != 'p' && *CurPtr != 'P')
 118     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 119                                  "expected exponent part 'p'");
 120   ++CurPtr;
 121
 122   if (*CurPtr == '+' || *CurPtr == '-')
 123     ++CurPtr;
 124
 125   // N.b. exponent digits are *not* hex
 126   const char *ExpStart = CurPtr;
 127   while (isDigit(*CurPtr))
 128     ++CurPtr;
 129
 130   if (CurPtr == ExpStart)
 131     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
 132                                  "expected at least one exponent digit");
 133
 134   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
 135 }
 136
 137 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
 138 static bool IsIdentifierChar(char c, bool AllowAt) {
 139   return isAlnum(c) || c == '_' || c == '$' || c == '.' ||
 140          (c == '@' && AllowAt) || c == '?';
 141 }
 142
 143 AsmToken AsmLexer::LexIdentifier() {
 144   // Check for floating point literals.
 145   if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
 146     // Disambiguate a .1243foo identifier from a floating literal.
 147     while (isDigit(*CurPtr))
 148       ++CurPtr;
 149     if (*CurPtr == 'e' || *CurPtr == 'E' ||
 150         !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
 151       return LexFloatLiteral();
 152   }
 153
 154   while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
 155     ++CurPtr;
 156
 157   // Handle . as a special case.
 158   if (CurPtr == TokStart+1 && TokStart[0] == '.')
 159     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
 160
 161   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
 162 }
 163
 164 /// LexSlash: Slash: /
 165 ///           C-Style Comment: /* ... */
 166 AsmToken AsmLexer::LexSlash() {
 167   switch (*CurPtr) {
 168   case '*':
 169     IsAtStartOfStatement = false;
 170     break; // C style comment.
 171   case '/':
 172     ++CurPtr;
 173     return LexLineComment();
 174   default:
 175     IsAtStartOfStatement = false;
 176     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
 177   }
 178
 179   // C Style comment.
 180   ++CurPtr;  // skip the star.
 181   const char *CommentTextStart = CurPtr;
 182   while (CurPtr != CurBuf.end()) {
 183     switch (*CurPtr++) {
 184     case '*':
 185       // End of the comment?
 186       if (*CurPtr != '/')
 187         break;
 188       // If we have a CommentConsumer, notify it about the comment.
 189       if (CommentConsumer) {
 190         CommentConsumer->HandleComment(
 191             SMLoc::getFromPointer(CommentTextStart),
 192             StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
 193       }
 194       ++CurPtr;   // End the */.
 195       return AsmToken(AsmToken::Comment,
 196                       StringRef(TokStart, CurPtr - TokStart));
 197     }
 198   }
 199   return ReturnError(TokStart, "unterminated comment");
 200 }
 201
 202 /// LexLineComment: Comment: #[^\n]*
 203 ///                        : //[^\n]*
 204 AsmToken AsmLexer::LexLineComment() {
 205   // Mark This as an end of statement with a body of the
 206   // comment. While it would be nicer to leave this two tokens,
 207   // backwards compatability with TargetParsers makes keeping this in this form
 208   // better.
 209   const char *CommentTextStart = CurPtr;
 210   int CurChar = getNextChar();
 211   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
 212     CurChar = getNextChar();
 213   if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
 214     ++CurPtr;
 215
 216   // If we have a CommentConsumer, notify it about the comment.
 217   if (CommentConsumer) {
 218     CommentConsumer->HandleComment(
 219         SMLoc::getFromPointer(CommentTextStart),
 220         StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
 221   }
 222
 223   IsAtStartOfLine = true;
 224   // This is a whole line comment. leave newline
 225   if (IsAtStartOfStatement)
 226     return AsmToken(AsmToken::EndOfStatement,
 227                     StringRef(TokStart, CurPtr - TokStart));
 228   IsAtStartOfStatement = true;
 229
 230   return AsmToken(AsmToken::EndOfStatement,
 231                   StringRef(TokStart, CurPtr - 1 - TokStart));
 232 }
 233
 234 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 235   // Skip ULL, UL, U, L and LL suffices.
 236   if (CurPtr[0] == 'U')
 237     ++CurPtr;
 238   if (CurPtr[0] == 'L')
 239     ++CurPtr;
 240   if (CurPtr[0] == 'L')
 241     ++CurPtr;
 242 }
 243
 244 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
 245 // integer as a hexadecimal, possibly with leading zeroes.
 246 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
 247                                bool LexHex) {
 248   const char *FirstNonDec = nullptr;
 249   const char *LookAhead = CurPtr;
 250   while (true) {
 251     if (isDigit(*LookAhead)) {
 252       ++LookAhead;
 253     } else {
 254       if (!FirstNonDec)
 255         FirstNonDec = LookAhead;
 256
 257       // Keep going if we are looking for a 'h' suffix.
 258       if (LexHex && isHexDigit(*LookAhead))
 259         ++LookAhead;
 260       else
 261         break;
 262     }
 263   }
 264   bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
 265   CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
 266   if (isHex)
 267     return 16;
 268   return DefaultRadix;
 269 }
 270
 271 static AsmToken intToken(StringRef Ref, APInt &Value)
 272 {
 273   if (Value.isIntN(64))
 274     return AsmToken(AsmToken::Integer, Ref, Value);
 275   return AsmToken(AsmToken::BigNum, Ref, Value);
 276 }
 277
 278 /// LexDigit: First character is [0-9].
 279 ///   Local Label: [0-9][:]
 280 ///   Forward/Backward Label: [0-9][fb]
 281 ///   Binary integer: 0b[01]+
 282 ///   Octal integer: 0[0-7]+
 283 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
 284 ///   Decimal integer: [1-9][0-9]*
 285 AsmToken AsmLexer::LexDigit() {
 286   // MASM-flavor binary integer: [01]+[bB]
 287   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
 288   if (LexMasmIntegers && isdigit(CurPtr[-1])) {
 289     const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
 290                                    CurPtr - 1 : nullptr;
 291     const char *OldCurPtr = CurPtr;
 292     while (isHexDigit(*CurPtr)) {
 293       if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
 294         FirstNonBinary = CurPtr;
 295       ++CurPtr;
 296     }
 297
 298     unsigned Radix = 0;
 299     if (*CurPtr == 'h' || *CurPtr == 'H') {
 300       // hexadecimal number
 301       ++CurPtr;
 302       Radix = 16;
 303     } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
 304                (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
 305       Radix = 2;
 306
 307     if (Radix == 2 || Radix == 16) {
 308       StringRef Result(TokStart, CurPtr - TokStart);
 309       APInt Value(128, 0, true);
 310
 311       if (Result.drop_back().getAsInteger(Radix, Value))
 312         return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
 313                              "invalid hexdecimal number");
 314
 315       // MSVC accepts and ignores type suffices on integer literals.
 316       SkipIgnoredIntegerSuffix(CurPtr);
 317
 318       return intToken(Result, Value);
 319    }
 320
 321     // octal/decimal integers, or floating point numbers, fall through
 322     CurPtr = OldCurPtr;
 323   }
 324
 325   // Decimal integer: [1-9][0-9]*
 326   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
 327     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
 328     bool isHex = Radix == 16;
 329     // Check for floating point literals.
 330     if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
 331       ++CurPtr;
 332       return LexFloatLiteral();
 333     }
 334
 335     StringRef Result(TokStart, CurPtr - TokStart);
 336
 337     APInt Value(128, 0, true);
 338     if (Result.getAsInteger(Radix, Value))
 339       return ReturnError(TokStart, !isHex ? "invalid decimal number" :
 340                            "invalid hexdecimal number");
 341
 342     // Consume the [hH].
 343     if (LexMasmIntegers && Radix == 16)
 344       ++CurPtr;
 345
 346     // The darwin/x86 (and x86-64) assembler accepts and ignores type
 347     // suffices on integer literals.
 348     SkipIgnoredIntegerSuffix(CurPtr);
 349
 350     return intToken(Result, Value);
 351   }
 352
 353   if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
 354     ++CurPtr;
 355     // See if we actually have "0b" as part of something like "jmp 0b\n"
 356     if (!isDigit(CurPtr[0])) {
 357       --CurPtr;
 358       StringRef Result(TokStart, CurPtr - TokStart);
 359       return AsmToken(AsmToken::Integer, Result, 0);
 360     }
 361     const char *NumStart = CurPtr;
 362     while (CurPtr[0] == '0' || CurPtr[0] == '1')
 363       ++CurPtr;
 364
 365     // Requires at least one binary digit.
 366     if (CurPtr == NumStart)
 367       return ReturnError(TokStart, "invalid binary number");
 368
 369     StringRef Result(TokStart, CurPtr - TokStart);
 370
 371     APInt Value(128, 0, true);
 372     if (Result.substr(2).getAsInteger(2, Value))
 373       return ReturnError(TokStart, "invalid binary number");
 374
 375     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 376     // suffixes on integer literals.
 377     SkipIgnoredIntegerSuffix(CurPtr);
 378
 379     return intToken(Result, Value);
 380   }
 381
 382   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
 383     ++CurPtr;
 384     const char *NumStart = CurPtr;
 385     while (isHexDigit(CurPtr[0]))
 386       ++CurPtr;
 387
 388     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
 389     // diagnosed by LexHexFloatLiteral).
 390     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
 391       return LexHexFloatLiteral(NumStart == CurPtr);
 392
 393     // Otherwise requires at least one hex digit.
 394     if (CurPtr == NumStart)
 395       return ReturnError(CurPtr-2, "invalid hexadecimal number");
 396
 397     APInt Result(128, 0);
 398     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
 399       return ReturnError(TokStart, "invalid hexadecimal number");
 400
 401     // Consume the optional [hH].
 402     if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
 403       ++CurPtr;
 404
 405     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 406     // suffixes on integer literals.
 407     SkipIgnoredIntegerSuffix(CurPtr);
 408
 409     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
 410   }
 411
 412   // Either octal or hexadecimal.
 413   APInt Value(128, 0, true);
 414   unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
 415   bool isHex = Radix == 16;
 416   StringRef Result(TokStart, CurPtr - TokStart);
 417   if (Result.getAsInteger(Radix, Value))
 418     return ReturnError(TokStart, !isHex ? "invalid octal number" :
 419                        "invalid hexdecimal number");
 420
 421   // Consume the [hH].
 422   if (Radix == 16)
 423     ++CurPtr;
 424
 425   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
 426   // suffixes on integer literals.
 427   SkipIgnoredIntegerSuffix(CurPtr);
 428
 429   return intToken(Result, Value);
 430 }
 431
 432 /// LexSingleQuote: Integer: 'b'
 433 AsmToken AsmLexer::LexSingleQuote() {
 434   int CurChar = getNextChar();
 435
 436   if (CurChar == '\\')
 437     CurChar = getNextChar();
 438
 439   if (CurChar == EOF)
 440     return ReturnError(TokStart, "unterminated single quote");
 441
 442   CurChar = getNextChar();
 443
 444   if (CurChar != '\'')
 445     return ReturnError(TokStart, "single quote way too long");
 446
 447   // The idea here being that 'c' is basically just an integral
 448   // constant.
 449   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
 450   long long Value;
 451
 452   if (Res.startswith("\'\\")) {
 453     char theChar = Res[2];
 454     switch (theChar) {
 455       default: Value = theChar; break;
 456       case '\'': Value = '\''; break;
 457       case 't': Value = '\t'; break;
 458       case 'n': Value = '\n'; break;
 459       case 'b': Value = '\b'; break;
 460     }
 461   } else
 462     Value = TokStart[1];
 463
 464   return AsmToken(AsmToken::Integer, Res, Value);
 465 }
 466
 467 /// LexQuote: String: "..."
 468 AsmToken AsmLexer::LexQuote() {
 469   int CurChar = getNextChar();
 470   // TODO: does gas allow multiline string constants?
 471   while (CurChar != '"') {
 472     if (CurChar == '\\') {
 473       // Allow \", etc.
 474       CurChar = getNextChar();
 475     }
 476
 477     if (CurChar == EOF)
 478       return ReturnError(TokStart, "unterminated string constant");
 479
 480     CurChar = getNextChar();
 481   }
 482
 483   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
 484 }
 485
 486 StringRef AsmLexer::LexUntilEndOfStatement() {
 487   TokStart = CurPtr;
 488
 489   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
 490          !isAtStatementSeparator(CurPtr) && // End of statement marker.
 491          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
 492     ++CurPtr;
 493   }
 494   return StringRef(TokStart, CurPtr-TokStart);
 495 }
 496
 497 StringRef AsmLexer::LexUntilEndOfLine() {
 498   TokStart = CurPtr;
 499
 500   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
 501     ++CurPtr;
 502   }
 503   return StringRef(TokStart, CurPtr-TokStart);
 504 }
 505
 506 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
 507                             bool ShouldSkipSpace) {
 508   SaveAndRestore<const char *> SavedTokenStart(TokStart);
 509   SaveAndRestore<const char *> SavedCurPtr(CurPtr);
 510   SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
 511   SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
 512   SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
 513   SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
 514   std::string SavedErr = getErr();
 515   SMLoc SavedErrLoc = getErrLoc();
 516
 517   size_t ReadCount;
 518   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
 519     AsmToken Token = LexToken();
 520
 521     Buf[ReadCount] = Token;
 522
 523     if (Token.is(AsmToken::Eof))
 524       break;
 525   }
 526
 527   SetError(SavedErrLoc, SavedErr);
 528   return ReadCount;
 529 }
 530
 531 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
 532   StringRef CommentString = MAI.getCommentString();
 533
 534   if (CommentString.size() == 1)
 535     return CommentString[0] == Ptr[0];
 536
 537   // Allow # preprocessor commments also be counted as comments for "##" cases
 538   if (CommentString[1] == '#')
 539     return CommentString[0] == Ptr[0];
 540
 541   return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
 542 }
 543
 544 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
 545   return strncmp(Ptr, MAI.getSeparatorString(),
 546                  strlen(MAI.getSeparatorString())) == 0;
 547 }
 548
 549 AsmToken AsmLexer::LexToken() {
 550   TokStart = CurPtr;
 551   // This always consumes at least one character.
 552   int CurChar = getNextChar();
 553
 554   if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
 555     // If this starts with a '#', this may be a cpp
 556     // hash directive and otherwise a line comment.
 557     AsmToken TokenBuf[2];
 558     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
 559     size_t num = peekTokens(Buf, true);
 560     // There cannot be a space preceeding this
 561     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
 562         TokenBuf[1].is(AsmToken::String)) {
 563       CurPtr = TokStart; // reset curPtr;
 564       StringRef s = LexUntilEndOfLine();
 565       UnLex(TokenBuf[1]);
 566       UnLex(TokenBuf[0]);
 567       return AsmToken(AsmToken::HashDirective, s);
 568     }
 569     return LexLineComment();
 570   }
 571
 572   if (isAtStartOfComment(TokStart))
 573     return LexLineComment();
 574
 575   if (isAtStatementSeparator(TokStart)) {
 576     CurPtr += strlen(MAI.getSeparatorString()) - 1;
 577     IsAtStartOfLine = true;
 578     IsAtStartOfStatement = true;
 579     return AsmToken(AsmToken::EndOfStatement,
 580                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
 581   }
 582
 583   // If we're missing a newline at EOF, make sure we still get an
 584   // EndOfStatement token before the Eof token.
 585   if (CurChar == EOF && !IsAtStartOfStatement) {
 586     IsAtStartOfLine = true;
 587     IsAtStartOfStatement = true;
 588     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
 589   }
 590   IsAtStartOfLine = false;
 591   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
 592   IsAtStartOfStatement = false;
 593   switch (CurChar) {
 594   default:
 595     // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
 596     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
 597       return LexIdentifier();
 598
 599     // Unknown character, emit an error.
 600     return ReturnError(TokStart, "invalid character in input");
 601   case EOF:
 602     IsAtStartOfLine = true;
 603     IsAtStartOfStatement = true;
 604     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
 605   case 0:
 606   case ' ':
 607   case '\t':
 608     IsAtStartOfStatement = OldIsAtStartOfStatement;
 609     while (*CurPtr == ' ' || *CurPtr == '\t')
 610       CurPtr++;
 611     if (SkipSpace)
 612       return LexToken(); // Ignore whitespace.
 613     else
 614       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
 615   case '\r': {
 616     IsAtStartOfLine = true;
 617     IsAtStartOfStatement = true;
 618     // If this is a CR followed by LF, treat that as one token.
 619     if (CurPtr != CurBuf.end() && *CurPtr == '\n')
 620       ++CurPtr;
 621     return AsmToken(AsmToken::EndOfStatement,
 622                     StringRef(TokStart, CurPtr - TokStart));
 623   }
 624   case '\n':
 625     IsAtStartOfLine = true;
 626     IsAtStartOfStatement = true;
 627     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
 628   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
 629   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
 630   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
 631   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
 632   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
 633   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
 634   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
 635   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
 636   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
 637   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
 638   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
 639   case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
 640   case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
 641   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
 642   case '=':
 643     if (*CurPtr == '=') {
 644       ++CurPtr;
 645       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
 646     }
 647     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
 648   case '-':
 649     if (*CurPtr == '>') {
 650       ++CurPtr;
 651       return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
 652     }
 653     return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
 654   case '|':
 655     if (*CurPtr == '|') {
 656       ++CurPtr;
 657       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
 658     }
 659     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
 660   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
 661   case '&':
 662     if (*CurPtr == '&') {
 663       ++CurPtr;
 664       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
 665     }
 666     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
 667   case '!':
 668     if (*CurPtr == '=') {
 669       ++CurPtr;
 670       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
 671     }
 672     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
 673   case '%':
 674     if (MAI.hasMipsExpressions()) {
 675       AsmToken::TokenKind Operator;
 676       unsigned OperatorLength;
 677
 678       std::tie(Operator, OperatorLength) =
 679           StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
 680               StringRef(CurPtr))
 681               .StartsWith("call16", {AsmToken::PercentCall16, 7})
 682               .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
 683               .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
 684               .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
 685               .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
 686               .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
 687               .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
 688               .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
 689               .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
 690               .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
 691               .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
 692               .StartsWith("got", {AsmToken::PercentGot, 4})
 693               .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
 694               .StartsWith("higher", {AsmToken::PercentHigher, 7})
 695               .StartsWith("highest", {AsmToken::PercentHighest, 8})
 696               .StartsWith("hi", {AsmToken::PercentHi, 3})
 697               .StartsWith("lo", {AsmToken::PercentLo, 3})
 698               .StartsWith("neg", {AsmToken::PercentNeg, 4})
 699               .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
 700               .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
 701               .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
 702               .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
 703               .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
 704               .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
 705               .Default({AsmToken::Percent, 1});
 706
 707       if (Operator != AsmToken::Percent) {
 708         CurPtr += OperatorLength - 1;
 709         return AsmToken(Operator, StringRef(TokStart, OperatorLength));
 710       }
 711     }
 712     return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
 713   case '/':
 714     IsAtStartOfStatement = OldIsAtStartOfStatement;
 715     return LexSlash();
 716   case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
 717   case '\'': return LexSingleQuote();
 718   case '"': return LexQuote();
 719   case '0': case '1': case '2': case '3': case '4':
 720   case '5': case '6': case '7': case '8': case '9':
 721     return LexDigit();
 722   case '<':
 723     switch (*CurPtr) {
 724     case '<':
 725       ++CurPtr;
 726       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
 727     case '=':
 728       ++CurPtr;
 729       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
 730     case '>':
 731       ++CurPtr;
 732       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
 733     default:
 734       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
 735     }
 736   case '>':
 737     switch (*CurPtr) {
 738     case '>':
 739       ++CurPtr;
 740       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
 741     case '=':
 742       ++CurPtr;
 743       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
 744     default:
 745       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
 746     }
 747
 748   // TODO: Quoted identifiers (objc methods etc)
 749   // local labels: [0-9][:]
 750   // Forward/backward labels: [0-9][fb]
 751   // Integers, fp constants, character constants.
 752   }
 753 }