contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h

   1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
  11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
  12
  13 #include "llvm/ADT/APInt.h"
  14 #include "llvm/ADT/ArrayRef.h"
  15 #include "llvm/ADT/SmallVector.h"
  16 #include "llvm/ADT/StringRef.h"
  17 #include "llvm/Support/SMLoc.h"
  18 #include <algorithm>
  19 #include <cassert>
  20 #include <cstddef>
  21 #include <cstdint>
  22 #include <string>
  23
  24 namespace llvm {
  25
  26 /// Target independent representation for an assembler token.
  27 class AsmToken {
  28 public:
  29   enum TokenKind {
  30     // Markers
  31     Eof, Error,
  32
  33     // String values.
  34     Identifier,
  35     String,
  36
  37     // Integer values.
  38     Integer,
  39     BigNum, // larger than 64 bits
  40
  41     // Real values.
  42     Real,
  43
  44     // Comments
  45     Comment,
  46     HashDirective,
  47     // No-value.
  48     EndOfStatement,
  49     Colon,
  50     Space,
  51     Plus, Minus, Tilde,
  52     Slash,     // '/'
  53     BackSlash, // '\'
  54     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
  55     Star, Dot, Comma, Dollar, Equal, EqualEqual,
  56
  57     Pipe, PipePipe, Caret,
  58     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
  59     Less, LessEqual, LessLess, LessGreater,
  60     Greater, GreaterEqual, GreaterGreater, At,
  61
  62     // MIPS unary expression operators such as %neg.
  63     PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
  64     PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
  65     PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
  66     PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
  67     PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
  68     PercentTprel_Lo
  69   };
  70
  71 private:
  72   TokenKind Kind;
  73
  74   /// A reference to the entire token contents; this is always a pointer into
  75   /// a memory buffer owned by the source manager.
  76   StringRef Str;
  77
  78   APInt IntVal;
  79
  80 public:
  81   AsmToken() = default;
  82   AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
  83       : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
  84   AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
  85       : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
  86
  87   TokenKind getKind() const { return Kind; }
  88   bool is(TokenKind K) const { return Kind == K; }
  89   bool isNot(TokenKind K) const { return Kind != K; }
  90
  91   SMLoc getLoc() const;
  92   SMLoc getEndLoc() const;
  93   SMRange getLocRange() const;
  94
  95   /// Get the contents of a string token (without quotes).
  96   StringRef getStringContents() const {
  97     assert(Kind == String && "This token isn't a string!");
  98     return Str.slice(1, Str.size() - 1);
  99   }
 100
 101   /// Get the identifier string for the current token, which should be an
 102   /// identifier or a string. This gets the portion of the string which should
 103   /// be used as the identifier, e.g., it does not include the quotes on
 104   /// strings.
 105   StringRef getIdentifier() const {
 106     if (Kind == Identifier)
 107       return getString();
 108     return getStringContents();
 109   }
 110
 111   /// Get the string for the current token, this includes all characters (for
 112   /// example, the quotes on strings) in the token.
 113   ///
 114   /// The returned StringRef points into the source manager's memory buffer, and
 115   /// is safe to store across calls to Lex().
 116   StringRef getString() const { return Str; }
 117
 118   // FIXME: Don't compute this in advance, it makes every token larger, and is
 119   // also not generally what we want (it is nicer for recovery etc. to lex 123br
 120   // as a single token, then diagnose as an invalid number).
 121   int64_t getIntVal() const {
 122     assert(Kind == Integer && "This token isn't an integer!");
 123     return IntVal.getZExtValue();
 124   }
 125
 126   APInt getAPIntVal() const {
 127     assert((Kind == Integer || Kind == BigNum) &&
 128            "This token isn't an integer!");
 129     return IntVal;
 130   }
 131 };
 132
 133 /// A callback class which is notified of each comment in an assembly file as
 134 /// it is lexed.
 135 class AsmCommentConsumer {
 136 public:
 137   virtual ~AsmCommentConsumer() = default;
 138
 139   /// Callback function for when a comment is lexed. Loc is the start of the
 140   /// comment text (excluding the comment-start marker). CommentText is the text
 141   /// of the comment, excluding the comment start and end markers, and the
 142   /// newline for single-line comments.
 143   virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
 144 };
 145
 146
 147 /// Generic assembler lexer interface, for use by target specific assembly
 148 /// lexers.
 149 class MCAsmLexer {
 150   /// The current token, stored in the base class for faster access.
 151   SmallVector<AsmToken, 1> CurTok;
 152
 153   /// The location and description of the current error
 154   SMLoc ErrLoc;
 155   std::string Err;
 156
 157 protected: // Can only create subclasses.
 158   const char *TokStart = nullptr;
 159   bool SkipSpace = true;
 160   bool AllowAtInIdentifier;
 161   bool IsAtStartOfStatement = true;
 162   AsmCommentConsumer *CommentConsumer = nullptr;
 163
 164   bool AltMacroMode;
 165   MCAsmLexer();
 166
 167   virtual AsmToken LexToken() = 0;
 168
 169   void SetError(SMLoc errLoc, const std::string &err) {
 170     ErrLoc = errLoc;
 171     Err = err;
 172   }
 173
 174 public:
 175   MCAsmLexer(const MCAsmLexer &) = delete;
 176   MCAsmLexer &operator=(const MCAsmLexer &) = delete;
 177   virtual ~MCAsmLexer();
 178
 179   bool IsaAltMacroMode() {
 180     return AltMacroMode;
 181   }
 182
 183   void SetAltMacroMode(bool AltMacroSet) {
 184     AltMacroMode = AltMacroSet;
 185   }
 186
 187   /// Consume the next token from the input stream and return it.
 188   ///
 189   /// The lexer will continuosly return the end-of-file token once the end of
 190   /// the main input file has been reached.
 191   const AsmToken &Lex() {
 192     assert(!CurTok.empty());
 193     // Mark if we parsing out a EndOfStatement.
 194     IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
 195     CurTok.erase(CurTok.begin());
 196     // LexToken may generate multiple tokens via UnLex but will always return
 197     // the first one. Place returned value at head of CurTok vector.
 198     if (CurTok.empty()) {
 199       AsmToken T = LexToken();
 200       CurTok.insert(CurTok.begin(), T);
 201     }
 202     return CurTok.front();
 203   }
 204
 205   void UnLex(AsmToken const &Token) {
 206     IsAtStartOfStatement = false;
 207     CurTok.insert(CurTok.begin(), Token);
 208   }
 209
 210   bool isAtStartOfStatement() { return IsAtStartOfStatement; }
 211
 212   virtual StringRef LexUntilEndOfStatement() = 0;
 213
 214   /// Get the current source location.
 215   SMLoc getLoc() const;
 216
 217   /// Get the current (last) lexed token.
 218   const AsmToken &getTok() const {
 219     return CurTok[0];
 220   }
 221
 222   /// Look ahead at the next token to be lexed.
 223   const AsmToken peekTok(bool ShouldSkipSpace = true) {
 224     AsmToken Tok;
 225
 226     MutableArrayRef<AsmToken> Buf(Tok);
 227     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
 228
 229     assert(ReadCount == 1);
 230     (void)ReadCount;
 231
 232     return Tok;
 233   }
 234
 235   /// Look ahead an arbitrary number of tokens.
 236   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
 237                             bool ShouldSkipSpace = true) = 0;
 238
 239   /// Get the current error location
 240   SMLoc getErrLoc() {
 241     return ErrLoc;
 242   }
 243
 244   /// Get the current error string
 245   const std::string &getErr() {
 246     return Err;
 247   }
 248
 249   /// Get the kind of current token.
 250   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
 251
 252   /// Check if the current token has kind \p K.
 253   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
 254
 255   /// Check if the current token has kind \p K.
 256   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
 257
 258   /// Set whether spaces should be ignored by the lexer
 259   void setSkipSpace(bool val) { SkipSpace = val; }
 260
 261   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
 262   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
 263
 264   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
 265     this->CommentConsumer = CommentConsumer;
 266   }
 267 };
 268
 269 } // end namespace llvm
 270
 271 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H