1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/SMLoc.h"
26 /// Target independent representation for an assembler token.
39 BigNum, // larger than 64 bits
54 LParen, RParen, LBrac, RBrac, LCurly, RCurly,
55 Star, Dot, Comma, Dollar, Equal, EqualEqual,
57 Pipe, PipePipe, Caret,
58 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
59 Less, LessEqual, LessLess, LessGreater,
60 Greater, GreaterEqual, GreaterGreater, At,
62 // MIPS unary expression operators such as %neg.
63 PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
64 PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
65 PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
66 PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
67 PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
74 /// A reference to the entire token contents; this is always a pointer into
75 /// a memory buffer owned by the source manager.
82 AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
83 : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
84 AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
85 : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
87 TokenKind getKind() const { return Kind; }
88 bool is(TokenKind K) const { return Kind == K; }
89 bool isNot(TokenKind K) const { return Kind != K; }
92 SMLoc getEndLoc() const;
93 SMRange getLocRange() const;
95 /// Get the contents of a string token (without quotes).
96 StringRef getStringContents() const {
97 assert(Kind == String && "This token isn't a string!");
98 return Str.slice(1, Str.size() - 1);
101 /// Get the identifier string for the current token, which should be an
102 /// identifier or a string. This gets the portion of the string which should
103 /// be used as the identifier, e.g., it does not include the quotes on
105 StringRef getIdentifier() const {
106 if (Kind == Identifier)
108 return getStringContents();
111 /// Get the string for the current token, this includes all characters (for
112 /// example, the quotes on strings) in the token.
114 /// The returned StringRef points into the source manager's memory buffer, and
115 /// is safe to store across calls to Lex().
116 StringRef getString() const { return Str; }
118 // FIXME: Don't compute this in advance, it makes every token larger, and is
119 // also not generally what we want (it is nicer for recovery etc. to lex 123br
120 // as a single token, then diagnose as an invalid number).
121 int64_t getIntVal() const {
122 assert(Kind == Integer && "This token isn't an integer!");
123 return IntVal.getZExtValue();
126 APInt getAPIntVal() const {
127 assert((Kind == Integer || Kind == BigNum) &&
128 "This token isn't an integer!");
133 /// A callback class which is notified of each comment in an assembly file as
135 class AsmCommentConsumer {
137 virtual ~AsmCommentConsumer() = default;
139 /// Callback function for when a comment is lexed. Loc is the start of the
140 /// comment text (excluding the comment-start marker). CommentText is the text
141 /// of the comment, excluding the comment start and end markers, and the
142 /// newline for single-line comments.
143 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
147 /// Generic assembler lexer interface, for use by target specific assembly
150 /// The current token, stored in the base class for faster access.
151 SmallVector<AsmToken, 1> CurTok;
153 /// The location and description of the current error
157 protected: // Can only create subclasses.
158 const char *TokStart = nullptr;
159 bool SkipSpace = true;
160 bool AllowAtInIdentifier;
161 bool IsAtStartOfStatement = true;
162 AsmCommentConsumer *CommentConsumer = nullptr;
167 virtual AsmToken LexToken() = 0;
169 void SetError(SMLoc errLoc, const std::string &err) {
175 MCAsmLexer(const MCAsmLexer &) = delete;
176 MCAsmLexer &operator=(const MCAsmLexer &) = delete;
177 virtual ~MCAsmLexer();
179 bool IsaAltMacroMode() {
183 void SetAltMacroMode(bool AltMacroSet) {
184 AltMacroMode = AltMacroSet;
187 /// Consume the next token from the input stream and return it.
189 /// The lexer will continuosly return the end-of-file token once the end of
190 /// the main input file has been reached.
191 const AsmToken &Lex() {
192 assert(!CurTok.empty());
193 // Mark if we parsing out a EndOfStatement.
194 IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
195 CurTok.erase(CurTok.begin());
196 // LexToken may generate multiple tokens via UnLex but will always return
197 // the first one. Place returned value at head of CurTok vector.
198 if (CurTok.empty()) {
199 AsmToken T = LexToken();
200 CurTok.insert(CurTok.begin(), T);
202 return CurTok.front();
205 void UnLex(AsmToken const &Token) {
206 IsAtStartOfStatement = false;
207 CurTok.insert(CurTok.begin(), Token);
210 bool isAtStartOfStatement() { return IsAtStartOfStatement; }
212 virtual StringRef LexUntilEndOfStatement() = 0;
214 /// Get the current source location.
215 SMLoc getLoc() const;
217 /// Get the current (last) lexed token.
218 const AsmToken &getTok() const {
222 /// Look ahead at the next token to be lexed.
223 const AsmToken peekTok(bool ShouldSkipSpace = true) {
226 MutableArrayRef<AsmToken> Buf(Tok);
227 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
229 assert(ReadCount == 1);
235 /// Look ahead an arbitrary number of tokens.
236 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
237 bool ShouldSkipSpace = true) = 0;
239 /// Get the current error location
244 /// Get the current error string
245 const std::string &getErr() {
249 /// Get the kind of current token.
250 AsmToken::TokenKind getKind() const { return getTok().getKind(); }
252 /// Check if the current token has kind \p K.
253 bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
255 /// Check if the current token has kind \p K.
256 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
258 /// Set whether spaces should be ignored by the lexer
259 void setSkipSpace(bool val) { SkipSpace = val; }
261 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
262 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
264 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
265 this->CommentConsumer = CommentConsumer;
269 } // end namespace llvm
271 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H