1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
61 class X86AsmParser : public MCTargetAsmParser {
62 const MCInstrInfo &MII;
63 ParseInstructionInfo *InstInfo;
64 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
68 SMLoc consumeToken() {
69 MCAsmParser &Parser = getParser();
70 SMLoc Result = Parser.getTok().getLoc();
75 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
76 uint64_t &ErrorInfo, bool matchingInlineAsm,
77 unsigned VariantID = 0) {
78 // In Code16GCC mode, match as 32-bit.
80 SwitchMode(X86::Mode32Bit);
81 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
82 matchingInlineAsm, VariantID);
84 SwitchMode(X86::Mode16Bit);
88 enum InfixCalculatorTok {
107 enum IntelOperatorKind {
115 class InfixCalculator {
116 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
117 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
118 SmallVector<ICToken, 4> PostfixStack;
120 bool isUnaryOperator(const InfixCalculatorTok Op) {
121 return Op == IC_NEG || Op == IC_NOT;
125 int64_t popOperand() {
126 assert (!PostfixStack.empty() && "Poped an empty stack!");
127 ICToken Op = PostfixStack.pop_back_val();
128 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
129 && "Expected and immediate or register!");
132 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
133 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
134 "Unexpected operand!");
135 PostfixStack.push_back(std::make_pair(Op, Val));
138 void popOperator() { InfixOperatorStack.pop_back(); }
139 void pushOperator(InfixCalculatorTok Op) {
140 // Push the new operator if the stack is empty.
141 if (InfixOperatorStack.empty()) {
142 InfixOperatorStack.push_back(Op);
146 // Push the new operator if it has a higher precedence than the operator
147 // on the top of the stack or the operator on the top of the stack is a
149 unsigned Idx = InfixOperatorStack.size() - 1;
150 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
151 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
152 InfixOperatorStack.push_back(Op);
156 // The operator on the top of the stack has higher precedence than the
158 unsigned ParenCount = 0;
160 // Nothing to process.
161 if (InfixOperatorStack.empty())
164 Idx = InfixOperatorStack.size() - 1;
165 StackOp = InfixOperatorStack[Idx];
166 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
169 // If we have an even parentheses count and we see a left parentheses,
170 // then stop processing.
171 if (!ParenCount && StackOp == IC_LPAREN)
174 if (StackOp == IC_RPAREN) {
176 InfixOperatorStack.pop_back();
177 } else if (StackOp == IC_LPAREN) {
179 InfixOperatorStack.pop_back();
181 InfixOperatorStack.pop_back();
182 PostfixStack.push_back(std::make_pair(StackOp, 0));
185 // Push the new operator.
186 InfixOperatorStack.push_back(Op);
190 // Push any remaining operators onto the postfix stack.
191 while (!InfixOperatorStack.empty()) {
192 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
193 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
194 PostfixStack.push_back(std::make_pair(StackOp, 0));
197 if (PostfixStack.empty())
200 SmallVector<ICToken, 16> OperandStack;
201 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
202 ICToken Op = PostfixStack[i];
203 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
204 OperandStack.push_back(Op);
205 } else if (isUnaryOperator(Op.first)) {
206 assert (OperandStack.size() > 0 && "Too few operands.");
207 ICToken Operand = OperandStack.pop_back_val();
208 assert (Operand.first == IC_IMM &&
209 "Unary operation with a register!");
212 report_fatal_error("Unexpected operator!");
215 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
218 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
222 assert (OperandStack.size() > 1 && "Too few operands.");
224 ICToken Op2 = OperandStack.pop_back_val();
225 ICToken Op1 = OperandStack.pop_back_val();
228 report_fatal_error("Unexpected operator!");
231 Val = Op1.second + Op2.second;
232 OperandStack.push_back(std::make_pair(IC_IMM, Val));
235 Val = Op1.second - Op2.second;
236 OperandStack.push_back(std::make_pair(IC_IMM, Val));
239 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
240 "Multiply operation with an immediate and a register!");
241 Val = Op1.second * Op2.second;
242 OperandStack.push_back(std::make_pair(IC_IMM, Val));
245 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
246 "Divide operation with an immediate and a register!");
247 assert (Op2.second != 0 && "Division by zero!");
248 Val = Op1.second / Op2.second;
249 OperandStack.push_back(std::make_pair(IC_IMM, Val));
252 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
253 "Modulo operation with an immediate and a register!");
254 Val = Op1.second % Op2.second;
255 OperandStack.push_back(std::make_pair(IC_IMM, Val));
258 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
259 "Or operation with an immediate and a register!");
260 Val = Op1.second | Op2.second;
261 OperandStack.push_back(std::make_pair(IC_IMM, Val));
264 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
265 "Xor operation with an immediate and a register!");
266 Val = Op1.second ^ Op2.second;
267 OperandStack.push_back(std::make_pair(IC_IMM, Val));
270 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
271 "And operation with an immediate and a register!");
272 Val = Op1.second & Op2.second;
273 OperandStack.push_back(std::make_pair(IC_IMM, Val));
276 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
277 "Left shift operation with an immediate and a register!");
278 Val = Op1.second << Op2.second;
279 OperandStack.push_back(std::make_pair(IC_IMM, Val));
282 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
283 "Right shift operation with an immediate and a register!");
284 Val = Op1.second >> Op2.second;
285 OperandStack.push_back(std::make_pair(IC_IMM, Val));
290 assert (OperandStack.size() == 1 && "Expected a single result.");
291 return OperandStack.pop_back_val().second;
295 enum IntelExprState {
317 class IntelExprStateMachine {
318 IntelExprState State, PrevState;
319 unsigned BaseReg, IndexReg, TmpReg, Scale;
323 bool StopOnLBrac, AddImmPrefix;
325 InlineAsmIdentifierInfo Info;
328 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
329 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
330 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
331 AddImmPrefix(addimmprefix) { Info.clear(); }
333 unsigned getBaseReg() { return BaseReg; }
334 unsigned getIndexReg() { return IndexReg; }
335 unsigned getScale() { return Scale; }
336 const MCExpr *getSym() { return Sym; }
337 StringRef getSymName() { return SymName; }
338 int64_t getImm() { return Imm + IC.execute(); }
339 bool isValidEndState() {
340 return State == IES_RBRAC || State == IES_INTEGER;
342 bool getStopOnLBrac() { return StopOnLBrac; }
343 bool getAddImmPrefix() { return AddImmPrefix; }
344 bool hadError() { return State == IES_ERROR; }
346 InlineAsmIdentifierInfo &getIdentifierInfo() {
351 IntelExprState CurrState = State;
360 IC.pushOperator(IC_OR);
363 PrevState = CurrState;
366 IntelExprState CurrState = State;
375 IC.pushOperator(IC_XOR);
378 PrevState = CurrState;
381 IntelExprState CurrState = State;
390 IC.pushOperator(IC_AND);
393 PrevState = CurrState;
396 IntelExprState CurrState = State;
405 IC.pushOperator(IC_LSHIFT);
408 PrevState = CurrState;
411 IntelExprState CurrState = State;
420 IC.pushOperator(IC_RSHIFT);
423 PrevState = CurrState;
426 IntelExprState CurrState = State;
435 IC.pushOperator(IC_PLUS);
436 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
437 // If we already have a BaseReg, then assume this is the IndexReg with
442 assert (!IndexReg && "BaseReg/IndexReg already set!");
449 PrevState = CurrState;
452 IntelExprState CurrState = State;
474 // push minus operator if it is not a negate operator
475 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
476 CurrState == IES_INTEGER || CurrState == IES_RBRAC)
477 IC.pushOperator(IC_MINUS);
479 IC.pushOperator(IC_NEG);
480 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
481 // If we already have a BaseReg, then assume this is the IndexReg with
486 assert (!IndexReg && "BaseReg/IndexReg already set!");
493 PrevState = CurrState;
496 IntelExprState CurrState = State;
515 IC.pushOperator(IC_NOT);
518 PrevState = CurrState;
520 void onRegister(unsigned Reg) {
521 IntelExprState CurrState = State;
528 State = IES_REGISTER;
530 IC.pushOperand(IC_REGISTER);
533 // Index Register - Scale * Register
534 if (PrevState == IES_INTEGER) {
535 assert (!IndexReg && "IndexReg already set!");
536 State = IES_REGISTER;
538 // Get the scale and replace the 'Scale * Register' with '0'.
539 Scale = IC.popOperand();
540 IC.pushOperand(IC_IMM);
547 PrevState = CurrState;
549 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
560 SymName = SymRefName;
561 IC.pushOperand(IC_IMM);
565 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
566 IntelExprState CurrState = State;
584 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
585 // Index Register - Register * Scale
586 assert (!IndexReg && "IndexReg already set!");
589 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
590 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
593 // Get the scale and replace the 'Register * Scale' with '0'.
596 IC.pushOperand(IC_IMM, TmpInt);
600 PrevState = CurrState;
612 State = IES_MULTIPLY;
613 IC.pushOperator(IC_MULTIPLY);
626 IC.pushOperator(IC_DIVIDE);
639 IC.pushOperator(IC_MOD);
651 IC.pushOperator(IC_PLUS);
656 IntelExprState CurrState = State;
665 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
666 // If we already have a BaseReg, then assume this is the IndexReg with
671 assert (!IndexReg && "BaseReg/IndexReg already set!");
678 PrevState = CurrState;
681 IntelExprState CurrState = State;
699 IC.pushOperator(IC_LPAREN);
702 PrevState = CurrState;
714 IC.pushOperator(IC_RPAREN);
720 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
721 bool MatchingInlineAsm = false) {
722 MCAsmParser &Parser = getParser();
723 if (MatchingInlineAsm) {
724 if (!getLexer().isAtStartOfStatement())
725 Parser.eatToEndOfStatement();
728 return Parser.Error(L, Msg, Range);
731 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
736 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
737 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
738 bool IsSIReg(unsigned Reg);
739 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
741 AddDefaultSrcDestOperands(OperandVector &Operands,
742 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
743 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
744 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
745 OperandVector &FinalOperands);
746 std::unique_ptr<X86Operand> ParseOperand();
747 std::unique_ptr<X86Operand> ParseATTOperand();
748 std::unique_ptr<X86Operand> ParseIntelOperand();
749 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
750 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
751 unsigned IdentifyIntelOperator(StringRef Name);
752 unsigned ParseIntelOperator(unsigned OpKind);
753 std::unique_ptr<X86Operand>
754 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
755 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
756 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
757 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
758 std::unique_ptr<X86Operand>
759 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
760 bool isSymbol, unsigned Size);
761 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
762 InlineAsmIdentifierInfo &Info,
763 bool IsUnevaluatedOperand, SMLoc &End);
765 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
767 std::unique_ptr<X86Operand>
768 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
769 unsigned IndexReg, unsigned Scale, SMLoc Start,
770 SMLoc End, unsigned Size, StringRef Identifier,
771 InlineAsmIdentifierInfo &Info,
772 bool AllowBetterSizeMatch = false);
774 bool parseDirectiveEven(SMLoc L);
775 bool ParseDirectiveWord(unsigned Size, SMLoc L);
776 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
778 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
780 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
781 /// instrumentation around Inst.
782 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
784 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
785 OperandVector &Operands, MCStreamer &Out,
787 bool MatchingInlineAsm) override;
789 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
790 MCStreamer &Out, bool MatchingInlineAsm);
792 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
793 bool MatchingInlineAsm);
795 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
796 OperandVector &Operands, MCStreamer &Out,
798 bool MatchingInlineAsm);
800 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
801 OperandVector &Operands, MCStreamer &Out,
803 bool MatchingInlineAsm);
805 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
807 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
808 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
809 /// return false if no parsing errors occurred, true otherwise.
810 bool HandleAVX512Operand(OperandVector &Operands,
811 const MCParsedAsmOperand &Op);
813 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
815 bool is64BitMode() const {
816 // FIXME: Can tablegen auto-generate this?
817 return getSTI().getFeatureBits()[X86::Mode64Bit];
819 bool is32BitMode() const {
820 // FIXME: Can tablegen auto-generate this?
821 return getSTI().getFeatureBits()[X86::Mode32Bit];
823 bool is16BitMode() const {
824 // FIXME: Can tablegen auto-generate this?
825 return getSTI().getFeatureBits()[X86::Mode16Bit];
827 void SwitchMode(unsigned mode) {
828 MCSubtargetInfo &STI = copySTI();
829 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
830 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
831 unsigned FB = ComputeAvailableFeatures(
832 STI.ToggleFeature(OldMode.flip(mode)));
833 setAvailableFeatures(FB);
835 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
838 unsigned getPointerWidth() {
839 if (is16BitMode()) return 16;
840 if (is32BitMode()) return 32;
841 if (is64BitMode()) return 64;
842 llvm_unreachable("invalid mode");
845 bool isParsingIntelSyntax() {
846 return getParser().getAssemblerDialect();
849 /// @name Auto-generated Matcher Functions
852 #define GET_ASSEMBLER_HEADER
853 #include "X86GenAsmMatcher.inc"
859 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
860 const MCInstrInfo &mii, const MCTargetOptions &Options)
861 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
864 // Initialize the set of available features.
865 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
866 Instrumentation.reset(
867 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
870 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
872 void SetFrameRegister(unsigned RegNo) override;
874 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
875 SMLoc NameLoc, OperandVector &Operands) override;
877 bool ParseDirective(AsmToken DirectiveID) override;
879 } // end anonymous namespace
881 /// @name Auto-generated Match Functions
884 static unsigned MatchRegisterName(StringRef Name);
888 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
890 // If we have both a base register and an index register make sure they are
891 // both 64-bit or 32-bit registers.
892 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
894 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
895 ErrMsg = "invalid base+index expression";
898 if (BaseReg != 0 && IndexReg != 0) {
899 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
900 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
901 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
902 IndexReg != X86::RIZ) {
903 ErrMsg = "base register is 64-bit, but index register is not";
906 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
907 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
908 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
909 IndexReg != X86::EIZ){
910 ErrMsg = "base register is 32-bit, but index register is not";
913 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
914 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
915 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
916 ErrMsg = "base register is 16-bit, but index register is not";
919 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
920 IndexReg != X86::SI && IndexReg != X86::DI) ||
921 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
922 IndexReg != X86::BX && IndexReg != X86::BP)) {
923 ErrMsg = "invalid 16-bit base/index register combination";
931 bool X86AsmParser::ParseRegister(unsigned &RegNo,
932 SMLoc &StartLoc, SMLoc &EndLoc) {
933 MCAsmParser &Parser = getParser();
935 const AsmToken &PercentTok = Parser.getTok();
936 StartLoc = PercentTok.getLoc();
938 // If we encounter a %, ignore it. This code handles registers with and
939 // without the prefix, unprefixed registers can occur in cfi directives.
940 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
941 Parser.Lex(); // Eat percent token.
943 const AsmToken &Tok = Parser.getTok();
944 EndLoc = Tok.getEndLoc();
946 if (Tok.isNot(AsmToken::Identifier)) {
947 if (isParsingIntelSyntax()) return true;
948 return Error(StartLoc, "invalid register name",
949 SMRange(StartLoc, EndLoc));
952 RegNo = MatchRegisterName(Tok.getString());
954 // If the match failed, try the register name as lowercase.
956 RegNo = MatchRegisterName(Tok.getString().lower());
958 // The "flags" register cannot be referenced directly.
959 // Treat it as an identifier instead.
960 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
963 if (!is64BitMode()) {
964 // FIXME: This should be done using Requires<Not64BitMode> and
965 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
967 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
969 if (RegNo == X86::RIZ ||
970 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
971 X86II::isX86_64NonExtLowByteReg(RegNo) ||
972 X86II::isX86_64ExtendedReg(RegNo))
973 return Error(StartLoc, "register %"
974 + Tok.getString() + " is only available in 64-bit mode",
975 SMRange(StartLoc, EndLoc));
976 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
977 if (X86II::is32ExtendedReg(RegNo))
978 return Error(StartLoc, "register %"
979 + Tok.getString() + " is only available with AVX512",
980 SMRange(StartLoc, EndLoc));
983 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
984 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
986 Parser.Lex(); // Eat 'st'
988 // Check to see if we have '(4)' after %st.
989 if (getLexer().isNot(AsmToken::LParen))
994 const AsmToken &IntTok = Parser.getTok();
995 if (IntTok.isNot(AsmToken::Integer))
996 return Error(IntTok.getLoc(), "expected stack index");
997 switch (IntTok.getIntVal()) {
998 case 0: RegNo = X86::ST0; break;
999 case 1: RegNo = X86::ST1; break;
1000 case 2: RegNo = X86::ST2; break;
1001 case 3: RegNo = X86::ST3; break;
1002 case 4: RegNo = X86::ST4; break;
1003 case 5: RegNo = X86::ST5; break;
1004 case 6: RegNo = X86::ST6; break;
1005 case 7: RegNo = X86::ST7; break;
1006 default: return Error(IntTok.getLoc(), "invalid stack index");
1009 if (getParser().Lex().isNot(AsmToken::RParen))
1010 return Error(Parser.getTok().getLoc(), "expected ')'");
1012 EndLoc = Parser.getTok().getEndLoc();
1013 Parser.Lex(); // Eat ')'
1017 EndLoc = Parser.getTok().getEndLoc();
1019 // If this is "db[0-7]", match it as an alias
1021 if (RegNo == 0 && Tok.getString().size() == 3 &&
1022 Tok.getString().startswith("db")) {
1023 switch (Tok.getString()[2]) {
1024 case '0': RegNo = X86::DR0; break;
1025 case '1': RegNo = X86::DR1; break;
1026 case '2': RegNo = X86::DR2; break;
1027 case '3': RegNo = X86::DR3; break;
1028 case '4': RegNo = X86::DR4; break;
1029 case '5': RegNo = X86::DR5; break;
1030 case '6': RegNo = X86::DR6; break;
1031 case '7': RegNo = X86::DR7; break;
1035 EndLoc = Parser.getTok().getEndLoc();
1036 Parser.Lex(); // Eat it.
1042 if (isParsingIntelSyntax()) return true;
1043 return Error(StartLoc, "invalid register name",
1044 SMRange(StartLoc, EndLoc));
1047 Parser.Lex(); // Eat identifier token.
1051 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1052 Instrumentation->SetInitialFrameRegister(RegNo);
1055 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1056 bool Parse32 = is32BitMode() || Code16GCC;
1057 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1058 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1059 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1060 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1064 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1065 bool Parse32 = is32BitMode() || Code16GCC;
1066 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1067 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1068 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1069 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1073 bool X86AsmParser::IsSIReg(unsigned Reg) {
1075 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1087 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1089 switch (RegClassID) {
1090 default: llvm_unreachable("Unexpected register class");
1091 case X86::GR64RegClassID:
1092 return IsSIReg ? X86::RSI : X86::RDI;
1093 case X86::GR32RegClassID:
1094 return IsSIReg ? X86::ESI : X86::EDI;
1095 case X86::GR16RegClassID:
1096 return IsSIReg ? X86::SI : X86::DI;
1100 void X86AsmParser::AddDefaultSrcDestOperands(
1101 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1102 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1103 if (isParsingIntelSyntax()) {
1104 Operands.push_back(std::move(Dst));
1105 Operands.push_back(std::move(Src));
1108 Operands.push_back(std::move(Src));
1109 Operands.push_back(std::move(Dst));
1113 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1114 OperandVector &FinalOperands) {
1116 if (OrigOperands.size() > 1) {
1117 // Check if sizes match, OrigOperands also contains the instruction name
1118 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1119 "Operand size mismatch");
1121 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1122 // Verify types match
1123 int RegClassID = -1;
1124 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1125 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1126 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1128 if (FinalOp.isReg() &&
1129 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1130 // Return false and let a normal complaint about bogus operands happen
1133 if (FinalOp.isMem()) {
1135 if (!OrigOp.isMem())
1136 // Return false and let a normal complaint about bogus operands happen
1139 unsigned OrigReg = OrigOp.Mem.BaseReg;
1140 unsigned FinalReg = FinalOp.Mem.BaseReg;
1142 // If we've already encounterd a register class, make sure all register
1143 // bases are of the same register class
1144 if (RegClassID != -1 &&
1145 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1146 return Error(OrigOp.getStartLoc(),
1147 "mismatching source and destination index registers");
1150 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1151 RegClassID = X86::GR64RegClassID;
1152 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1153 RegClassID = X86::GR32RegClassID;
1154 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1155 RegClassID = X86::GR16RegClassID;
1157 // Unexpected register class type
1158 // Return false and let a normal complaint about bogus operands happen
1161 bool IsSI = IsSIReg(FinalReg);
1162 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1164 if (FinalReg != OrigReg) {
1165 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1166 Warnings.push_back(std::make_pair(
1167 OrigOp.getStartLoc(),
1168 "memory operand is only for determining the size, " + RegName +
1169 " will be used for the location"));
1172 FinalOp.Mem.Size = OrigOp.Mem.Size;
1173 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1174 FinalOp.Mem.BaseReg = FinalReg;
1178 // Produce warnings only if all the operands passed the adjustment - prevent
1179 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1180 for (auto &WarningMsg : Warnings) {
1181 Warning(WarningMsg.first, WarningMsg.second);
1184 // Remove old operands
1185 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1186 OrigOperands.pop_back();
1188 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1189 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1190 OrigOperands.push_back(std::move(FinalOperands[i]));
1195 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1196 if (isParsingIntelSyntax())
1197 return ParseIntelOperand();
1198 return ParseATTOperand();
1201 /// getIntelMemOperandSize - Return intel memory operand size.
1202 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1203 unsigned Size = StringSwitch<unsigned>(OpStr)
1204 .Cases("BYTE", "byte", 8)
1205 .Cases("WORD", "word", 16)
1206 .Cases("DWORD", "dword", 32)
1207 .Cases("FWORD", "fword", 48)
1208 .Cases("QWORD", "qword", 64)
1209 .Cases("MMWORD","mmword", 64)
1210 .Cases("XWORD", "xword", 80)
1211 .Cases("TBYTE", "tbyte", 80)
1212 .Cases("XMMWORD", "xmmword", 128)
1213 .Cases("YMMWORD", "ymmword", 256)
1214 .Cases("ZMMWORD", "zmmword", 512)
1215 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1220 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1221 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1222 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1223 InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) {
1224 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1225 // some other label reference.
1226 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1227 // Insert an explicit size if the user didn't have one.
1229 Size = getPointerWidth();
1230 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1234 // Create an absolute memory reference in order to match against
1235 // instructions taking a PC relative operand.
1236 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1237 Identifier, Info.OpDecl);
1241 // We either have a direct symbol reference, or an offset from a symbol. The
1242 // parser always puts the symbol on the LHS, so look there for size
1243 // calculation purposes.
1244 unsigned FrontendSize = 0;
1245 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1247 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1248 if (IsSymRef && !Size && Info.Type)
1249 FrontendSize = Info.Type * 8; // Size is in terms of bits in this context.
1251 // When parsing inline assembly we set the base register to a non-zero value
1252 // if we don't know the actual value at this time. This is necessary to
1253 // get the matching correct in some cases.
1254 BaseReg = BaseReg ? BaseReg : 1;
1255 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1256 IndexReg, Scale, Start, End, Size, Identifier,
1257 Info.OpDecl, FrontendSize);
1261 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1262 StringRef SymName, int64_t ImmDisp,
1263 int64_t FinalImmDisp, SMLoc &BracLoc,
1264 SMLoc &StartInBrac, SMLoc &End) {
1265 // Remove the '[' and ']' from the IR string.
1266 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1267 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1269 // If ImmDisp is non-zero, then we parsed a displacement before the
1270 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1271 // If ImmDisp doesn't match the displacement computed by the state machine
1272 // then we have an additional displacement in the bracketed expression.
1273 if (ImmDisp != FinalImmDisp) {
1275 // We have an immediate displacement before the bracketed expression.
1276 // Adjust this to match the final immediate displacement.
1278 for (AsmRewrite &AR : AsmRewrites) {
1279 if (AR.Loc.getPointer() > BracLoc.getPointer())
1281 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1282 assert (!Found && "ImmDisp already rewritten.");
1284 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1285 AR.Val = FinalImmDisp;
1290 assert (Found && "Unable to rewrite ImmDisp.");
1293 // We have a symbolic and an immediate displacement, but no displacement
1294 // before the bracketed expression. Put the immediate displacement
1295 // before the bracketed expression.
1296 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1299 // Remove all the ImmPrefix rewrites within the brackets.
1300 // We may have some Imm rewrties as a result of an operator applying,
1301 // remove them as well
1302 for (AsmRewrite &AR : AsmRewrites) {
1303 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1305 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm)
1306 AR.Kind = AOK_Delete;
1308 const char *SymLocPtr = SymName.data();
1309 // Skip everything before the symbol.
1310 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1311 assert(Len > 0 && "Expected a non-negative length.");
1312 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1314 // Skip everything after the symbol.
1315 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1316 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1317 assert(Len > 0 && "Expected a non-negative length.");
1318 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1322 // Some binary bitwise operators have a named synonymous
1323 // Query a candidate string for being such a named operator
1324 // and if so - invoke the appropriate handler
1325 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1326 // A named operator should be either lower or upper case, but not a mix
1327 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1329 if (Name.equals_lower("not"))
1331 else if (Name.equals_lower("or"))
1333 else if (Name.equals_lower("shl"))
1335 else if (Name.equals_lower("shr"))
1337 else if (Name.equals_lower("xor"))
1339 else if (Name.equals_lower("and"))
1341 else if (Name.equals_lower("mod"))
1348 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1349 MCAsmParser &Parser = getParser();
1350 const AsmToken &Tok = Parser.getTok();
1352 AsmToken::TokenKind PrevTK = AsmToken::Error;
1355 bool UpdateLocLex = true;
1357 AsmToken::TokenKind TK = getLexer().getKind();
1358 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1359 // identifier. Don't try an parse it as a register.
1360 if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") &&
1361 TK != AsmToken::Identifier)
1364 // If we're parsing an immediate expression, we don't expect a '['.
1365 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1370 if (SM.isValidEndState()) {
1374 return Error(Tok.getLoc(), "unknown token in expression");
1376 case AsmToken::EndOfStatement: {
1380 case AsmToken::String:
1381 case AsmToken::Identifier: {
1382 // This could be a register or a symbolic displacement.
1385 SMLoc IdentLoc = Tok.getLoc();
1386 StringRef Identifier = Tok.getString();
1387 UpdateLocLex = false;
1388 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1389 SM.onRegister(TmpReg);
1390 } else if (ParseIntelNamedOperator(Identifier, SM)) {
1391 UpdateLocLex = true;
1392 } else if (!isParsingInlineAsm()) {
1393 if (getParser().parsePrimaryExpr(Val, End))
1394 return Error(Tok.getLoc(), "Unexpected identifier!");
1395 SM.onIdentifierExpr(Val, Identifier);
1396 } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) {
1397 if (OpKind == IOK_OFFSET)
1398 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1399 "a compound immediate expression is yet to be supported");
1400 int64_t Val = ParseIntelOperator(OpKind);
1404 if (SM.onInteger(Val, ErrMsg))
1405 return Error(IdentLoc, ErrMsg);
1406 } else if (Identifier.find('.') != StringRef::npos &&
1407 PrevTK == AsmToken::RBrac) {
1410 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1411 if (ParseIntelIdentifier(Val, Identifier, Info,
1412 /*Unevaluated=*/false, End))
1414 SM.onIdentifierExpr(Val, Identifier);
1418 case AsmToken::Integer: {
1420 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1421 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1422 // Look for 'b' or 'f' following an Integer as a directional label
1423 SMLoc Loc = getTok().getLoc();
1424 int64_t IntVal = getTok().getIntVal();
1425 End = consumeToken();
1426 UpdateLocLex = false;
1427 if (getLexer().getKind() == AsmToken::Identifier) {
1428 StringRef IDVal = getTok().getString();
1429 if (IDVal == "f" || IDVal == "b") {
1431 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1432 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1434 MCSymbolRefExpr::create(Sym, Variant, getContext());
1435 if (IDVal == "b" && Sym->isUndefined())
1436 return Error(Loc, "invalid reference to undefined symbol");
1437 StringRef Identifier = Sym->getName();
1438 SM.onIdentifierExpr(Val, Identifier);
1439 End = consumeToken();
1441 if (SM.onInteger(IntVal, ErrMsg))
1442 return Error(Loc, ErrMsg);
1445 if (SM.onInteger(IntVal, ErrMsg))
1446 return Error(Loc, ErrMsg);
1450 case AsmToken::Plus: SM.onPlus(); break;
1451 case AsmToken::Minus: SM.onMinus(); break;
1452 case AsmToken::Tilde: SM.onNot(); break;
1453 case AsmToken::Star: SM.onStar(); break;
1454 case AsmToken::Slash: SM.onDivide(); break;
1455 case AsmToken::Pipe: SM.onOr(); break;
1456 case AsmToken::Caret: SM.onXor(); break;
1457 case AsmToken::Amp: SM.onAnd(); break;
1458 case AsmToken::LessLess:
1459 SM.onLShift(); break;
1460 case AsmToken::GreaterGreater:
1461 SM.onRShift(); break;
1462 case AsmToken::LBrac: SM.onLBrac(); break;
1463 case AsmToken::RBrac: SM.onRBrac(); break;
1464 case AsmToken::LParen: SM.onLParen(); break;
1465 case AsmToken::RParen: SM.onRParen(); break;
1468 return Error(Tok.getLoc(), "unknown token in expression");
1470 if (!Done && UpdateLocLex)
1471 End = consumeToken();
1478 std::unique_ptr<X86Operand>
1479 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1480 int64_t ImmDisp, bool isSymbol,
1482 MCAsmParser &Parser = getParser();
1483 const AsmToken &Tok = Parser.getTok();
1484 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1485 if (getLexer().isNot(AsmToken::LBrac))
1486 return ErrorOperand(BracLoc, "Expected '[' token!");
1487 Parser.Lex(); // Eat '['
1489 SMLoc StartInBrac = Parser.getTok().getLoc();
1490 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1491 // may have already parsed an immediate displacement before the bracketed
1493 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1494 if (ParseIntelExpression(SM, End))
1497 const MCExpr *Disp = nullptr;
1498 if (const MCExpr *Sym = SM.getSym()) {
1499 // A symbolic displacement.
1501 if (isParsingInlineAsm())
1502 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1503 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1507 if (SM.getImm() || !Disp) {
1508 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1510 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1512 Disp = Imm; // An immediate displacement only.
1515 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1516 // will in fact do global lookup the field name inside all global typedefs,
1517 // but we don't emulate that.
1518 if ((Parser.getTok().getKind() == AsmToken::Identifier ||
1519 Parser.getTok().getKind() == AsmToken::Dot ||
1520 Parser.getTok().getKind() == AsmToken::Real) &&
1521 Parser.getTok().getString().find('.') != StringRef::npos) {
1522 const MCExpr *NewDisp;
1523 if (ParseIntelDotOperator(Disp, NewDisp))
1526 End = Tok.getEndLoc();
1527 Parser.Lex(); // Eat the field.
1533 Error(Start, "cannot use more than one symbol in memory operand");
1536 if (SM.getBaseReg()) {
1537 Error(Start, "cannot use base register with variable reference");
1540 if (SM.getIndexReg()) {
1541 Error(Start, "cannot use index register with variable reference");
1546 int BaseReg = SM.getBaseReg();
1547 int IndexReg = SM.getIndexReg();
1548 int Scale = SM.getScale();
1549 if (!isParsingInlineAsm()) {
1551 if (!BaseReg && !IndexReg) {
1553 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1554 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1558 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1559 Error(StartInBrac, ErrMsg);
1562 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1563 IndexReg, Scale, Start, End, Size);
1566 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1567 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1568 End, Size, SM.getSymName(), Info,
1569 isParsingInlineAsm());
1572 // Inline assembly may use variable names with namespace alias qualifiers.
1573 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1574 StringRef &Identifier,
1575 InlineAsmIdentifierInfo &Info,
1576 bool IsUnevaluatedOperand, SMLoc &End) {
1577 MCAsmParser &Parser = getParser();
1578 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1581 StringRef LineBuf(Identifier.data());
1583 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1585 const AsmToken &Tok = Parser.getTok();
1586 SMLoc Loc = Tok.getLoc();
1588 // Advance the token stream until the end of the current token is
1589 // after the end of what the frontend claimed.
1590 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1592 End = Tok.getEndLoc();
1594 } while (End.getPointer() < EndPtr);
1595 Identifier = LineBuf;
1597 // The frontend should end parsing on an assembler token boundary, unless it
1599 assert((End.getPointer() == EndPtr || !Result) &&
1600 "frontend claimed part of a token?");
1602 // If the identifier lookup was unsuccessful, assume that we are dealing with
1605 StringRef InternalName =
1606 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1608 assert(InternalName.size() && "We should have an internal name here.");
1609 // Push a rewrite for replacing the identifier name with the internal name.
1610 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1614 // Create the symbol reference.
1615 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1616 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1617 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1621 /// \brief Parse intel style segment override.
1622 std::unique_ptr<X86Operand>
1623 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1625 MCAsmParser &Parser = getParser();
1626 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1627 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1628 if (Tok.isNot(AsmToken::Colon))
1629 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1630 Parser.Lex(); // Eat ':'
1632 int64_t ImmDisp = 0;
1633 if (getLexer().is(AsmToken::Integer)) {
1634 ImmDisp = Tok.getIntVal();
1635 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1637 if (isParsingInlineAsm())
1638 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1640 if (getLexer().isNot(AsmToken::LBrac)) {
1641 // An immediate following a 'segment register', 'colon' token sequence can
1642 // be followed by a bracketed expression. If it isn't we know we have our
1643 // final segment override.
1644 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1645 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1646 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1647 Start, ImmDispToken.getEndLoc(), Size);
1651 if (getLexer().is(AsmToken::LBrac))
1652 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
1656 if (!isParsingInlineAsm()) {
1657 if (getParser().parsePrimaryExpr(Val, End))
1658 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1660 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1663 InlineAsmIdentifierInfo Info;
1664 StringRef Identifier = Tok.getString();
1665 if (ParseIntelIdentifier(Val, Identifier, Info,
1666 /*Unevaluated=*/false, End))
1668 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1669 /*Scale=*/1, Start, End, Size, Identifier, Info);
1672 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1673 std::unique_ptr<X86Operand>
1674 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1675 MCAsmParser &Parser = getParser();
1676 const AsmToken &Tok = Parser.getTok();
1677 // Eat "{" and mark the current place.
1678 const SMLoc consumedToken = consumeToken();
1679 if (Tok.getIdentifier().startswith("r")){
1680 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1681 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1682 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1683 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1684 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1687 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1688 Parser.Lex(); // Eat "r*" of r*-sae
1689 if (!getLexer().is(AsmToken::Minus))
1690 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1691 Parser.Lex(); // Eat "-"
1692 Parser.Lex(); // Eat the sae
1693 if (!getLexer().is(AsmToken::RCurly))
1694 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1695 Parser.Lex(); // Eat "}"
1696 const MCExpr *RndModeOp =
1697 MCConstantExpr::create(rndMode, Parser.getContext());
1698 return X86Operand::CreateImm(RndModeOp, Start, End);
1700 if(Tok.getIdentifier().equals("sae")){
1701 Parser.Lex(); // Eat the sae
1702 if (!getLexer().is(AsmToken::RCurly))
1703 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1704 Parser.Lex(); // Eat "}"
1705 return X86Operand::CreateToken("{sae}", consumedToken);
1707 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1710 /// Parse the '.' operator.
1711 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1712 const MCExpr *&NewDisp) {
1713 MCAsmParser &Parser = getParser();
1714 const AsmToken &Tok = Parser.getTok();
1715 int64_t OrigDispVal, DotDispVal;
1717 // FIXME: Handle non-constant expressions.
1718 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1719 OrigDispVal = OrigDisp->getValue();
1721 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1723 // Drop the optional '.'.
1724 StringRef DotDispStr = Tok.getString();
1725 if (DotDispStr.startswith("."))
1726 DotDispStr = DotDispStr.drop_front(1);
1728 // .Imm gets lexed as a real.
1729 if (Tok.is(AsmToken::Real)) {
1731 DotDispStr.getAsInteger(10, DotDisp);
1732 DotDispVal = DotDisp.getZExtValue();
1733 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1735 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1736 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1738 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1739 DotDispVal = DotDisp;
1741 return Error(Tok.getLoc(), "Unexpected token type!");
1743 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1744 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1745 unsigned Len = DotDispStr.size();
1746 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, DotDispVal);
1749 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1753 /// Parse the 'offset' operator. This operator is used to specify the
1754 /// location rather then the content of a variable.
1755 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1756 MCAsmParser &Parser = getParser();
1757 const AsmToken &Tok = Parser.getTok();
1758 SMLoc OffsetOfLoc = Tok.getLoc();
1759 Parser.Lex(); // Eat offset.
1762 InlineAsmIdentifierInfo Info;
1763 SMLoc Start = Tok.getLoc(), End;
1764 StringRef Identifier = Tok.getString();
1765 if (ParseIntelIdentifier(Val, Identifier, Info,
1766 /*Unevaluated=*/false, End))
1769 // Don't emit the offset operator.
1770 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1772 // The offset operator will have an 'r' constraint, thus we need to create
1773 // register operand to ensure proper matching. Just pick a GPR based on
1774 // the size of a pointer.
1775 bool Parse32 = is32BitMode() || Code16GCC;
1776 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1778 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1779 OffsetOfLoc, Identifier, Info.OpDecl);
1782 // Query a candidate string for being an Intel assembly operator
1783 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1784 unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) {
1785 return StringSwitch<unsigned>(Name)
1786 .Cases("TYPE","type",IOK_TYPE)
1787 .Cases("SIZE","size",IOK_SIZE)
1788 .Cases("LENGTH","length",IOK_LENGTH)
1789 .Cases("OFFSET","offset",IOK_OFFSET)
1790 .Default(IOK_INVALID);
1793 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1794 /// returns the number of elements in an array. It returns the value 1 for
1795 /// non-array variables. The SIZE operator returns the size of a C or C++
1796 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1797 /// TYPE operator returns the size of a C or C++ type or variable. If the
1798 /// variable is an array, TYPE returns the size of a single element.
1799 unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1800 MCAsmParser &Parser = getParser();
1801 const AsmToken &Tok = Parser.getTok();
1802 SMLoc TypeLoc = Tok.getLoc();
1803 Parser.Lex(); // Eat operator.
1805 const MCExpr *Val = nullptr;
1806 InlineAsmIdentifierInfo Info;
1807 SMLoc Start = Tok.getLoc(), End;
1808 StringRef Identifier = Tok.getString();
1809 if (ParseIntelIdentifier(Val, Identifier, Info,
1810 /*Unevaluated=*/true, End))
1814 Error(Start, "unable to lookup expression");
1820 default: llvm_unreachable("Unexpected operand kind!");
1821 case IOK_LENGTH: CVal = Info.Length; break;
1822 case IOK_SIZE: CVal = Info.Size; break;
1823 case IOK_TYPE: CVal = Info.Type; break;
1826 // Rewrite the type operator and the C or C++ type or variable in terms of an
1827 // immediate. E.g. TYPE foo -> $$4
1828 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1829 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1834 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1835 MCAsmParser &Parser = getParser();
1836 const AsmToken &Tok = Parser.getTok();
1839 // FIXME: Offset operator
1840 // Should be handled as part of immediate expression, as other operators
1841 // Currently, only supported as a stand-alone operand
1842 if (isParsingInlineAsm())
1843 if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET)
1844 return ParseIntelOffsetOfOperator();
1846 bool PtrInOperand = false;
1847 unsigned Size = getIntelMemOperandSize(Tok.getString());
1849 Parser.Lex(); // Eat operand size (e.g., byte, word).
1850 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1851 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1852 Parser.Lex(); // Eat ptr.
1853 PtrInOperand = true;
1856 Start = Tok.getLoc();
1858 // rounding mode token
1859 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1860 getLexer().is(AsmToken::LCurly))
1861 return ParseRoundingModeOp(Start, End);
1865 if (getLexer().is(AsmToken::Identifier) &&
1866 !ParseRegister(RegNo, Start, End)) {
1867 // If this is a segment register followed by a ':', then this is the start
1868 // of a segment override, otherwise this is a normal register reference.
1869 // In case it is a normal register and there is ptr in the operand this
1871 if (RegNo == X86::RIP)
1872 return ErrorOperand(Start, "rip can only be used as a base register");
1873 if (getLexer().isNot(AsmToken::Colon)) {
1875 return ErrorOperand(Start, "expected memory operand after "
1876 "'ptr', found register operand instead");
1878 return X86Operand::CreateReg(RegNo, Start, End);
1880 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1883 // Immediates and Memory
1885 // Parse [ BaseReg + Scale*IndexReg + Disp ].
1886 if (getLexer().is(AsmToken::LBrac))
1887 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1890 AsmToken StartTok = Tok;
1891 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1892 /*AddImmPrefix=*/false);
1893 if (ParseIntelExpression(SM, End))
1896 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1897 int64_t Imm = SM.getImm();
1898 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1899 SM.getSym()->evaluateAsAbsolute(Imm);
1901 if (StartTok.isNot(AsmToken::Identifier) &&
1902 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1903 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1904 if (StartTok.getString().size() == Len)
1905 // Just add a prefix if this wasn't a complex immediate expression.
1906 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1908 // Otherwise, rewrite the complex expression as a single immediate.
1909 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1912 if (getLexer().isNot(AsmToken::LBrac)) {
1913 // If a directional label (ie. 1f or 2b) was parsed above from
1914 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1915 // to the MCExpr with the directional local symbol and this is a
1916 // memory operand not an immediate operand.
1918 if (isParsingInlineAsm())
1919 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1921 /*Scale=*/1, Start, End, Size,
1922 SM.getSymName(), SM.getIdentifierInfo());
1923 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1927 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1928 return X86Operand::CreateImm(ImmExpr, Start, End);
1931 // Only positive immediates are valid.
1933 return ErrorOperand(Start, "expected a positive immediate displacement "
1934 "before bracketed expr.");
1936 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
1939 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1940 MCAsmParser &Parser = getParser();
1941 switch (getLexer().getKind()) {
1943 // Parse a memory operand with no segment register.
1944 return ParseMemOperand(0, Parser.getTok().getLoc());
1945 case AsmToken::Percent: {
1946 // Read the register.
1949 if (ParseRegister(RegNo, Start, End)) return nullptr;
1950 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1951 Error(Start, "%eiz and %riz can only be used as index registers",
1952 SMRange(Start, End));
1955 if (RegNo == X86::RIP) {
1956 Error(Start, "%rip can only be used as a base register",
1957 SMRange(Start, End));
1961 // If this is a segment register followed by a ':', then this is the start
1962 // of a memory reference, otherwise this is a normal register reference.
1963 if (getLexer().isNot(AsmToken::Colon))
1964 return X86Operand::CreateReg(RegNo, Start, End);
1966 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1967 return ErrorOperand(Start, "invalid segment register");
1969 getParser().Lex(); // Eat the colon.
1970 return ParseMemOperand(RegNo, Start);
1972 case AsmToken::Dollar: {
1973 // $42 -> immediate.
1974 SMLoc Start = Parser.getTok().getLoc(), End;
1977 if (getParser().parseExpression(Val, End))
1979 return X86Operand::CreateImm(Val, Start, End);
1981 case AsmToken::LCurly:{
1982 SMLoc Start = Parser.getTok().getLoc(), End;
1983 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1984 return ParseRoundingModeOp(Start, End);
1985 return ErrorOperand(Start, "Unexpected '{' in expression");
1990 // true on failure, false otherwise
1991 // If no {z} mark was found - Parser doesn't advance
1992 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1993 const SMLoc &StartLoc) {
1994 MCAsmParser &Parser = getParser();
1995 // Assuming we are just pass the '{' mark, quering the next token
1996 // Searched for {z}, but none was found. Return false, as no parsing error was
1998 if (!(getLexer().is(AsmToken::Identifier) &&
1999 (getLexer().getTok().getIdentifier() == "z")))
2001 Parser.Lex(); // Eat z
2002 // Query and eat the '}' mark
2003 if (!getLexer().is(AsmToken::RCurly))
2004 return Error(getLexer().getLoc(), "Expected } at this point");
2005 Parser.Lex(); // Eat '}'
2006 // Assign Z with the {z} mark opernad
2007 Z = X86Operand::CreateToken("{z}", StartLoc);
2011 // true on failure, false otherwise
2012 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2013 const MCParsedAsmOperand &Op) {
2014 MCAsmParser &Parser = getParser();
2015 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
2016 if (getLexer().is(AsmToken::LCurly)) {
2017 // Eat "{" and mark the current place.
2018 const SMLoc consumedToken = consumeToken();
2019 // Distinguish {1to<NUM>} from {%k<NUM>}.
2020 if(getLexer().is(AsmToken::Integer)) {
2021 // Parse memory broadcasting ({1to<NUM>}).
2022 if (getLexer().getTok().getIntVal() != 1)
2023 return TokError("Expected 1to<NUM> at this point");
2024 Parser.Lex(); // Eat "1" of 1to8
2025 if (!getLexer().is(AsmToken::Identifier) ||
2026 !getLexer().getTok().getIdentifier().startswith("to"))
2027 return TokError("Expected 1to<NUM> at this point");
2028 // Recognize only reasonable suffixes.
2029 const char *BroadcastPrimitive =
2030 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2031 .Case("to2", "{1to2}")
2032 .Case("to4", "{1to4}")
2033 .Case("to8", "{1to8}")
2034 .Case("to16", "{1to16}")
2036 if (!BroadcastPrimitive)
2037 return TokError("Invalid memory broadcast primitive.");
2038 Parser.Lex(); // Eat "toN" of 1toN
2039 if (!getLexer().is(AsmToken::RCurly))
2040 return TokError("Expected } at this point");
2041 Parser.Lex(); // Eat "}"
2042 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2044 // No AVX512 specific primitives can pass
2045 // after memory broadcasting, so return.
2048 // Parse either {k}{z}, {z}{k}, {k} or {z}
2049 // last one have no meaning, but GCC accepts it
2050 // Currently, we're just pass a '{' mark
2051 std::unique_ptr<X86Operand> Z;
2052 if (ParseZ(Z, consumedToken))
2054 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2056 // Query for the need of further parsing for a {%k<NUM>} mark
2057 if (!Z || getLexer().is(AsmToken::LCurly)) {
2058 const SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2059 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2061 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2062 if (!getLexer().is(AsmToken::RCurly))
2063 return Error(getLexer().getLoc(), "Expected } at this point");
2064 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2065 Operands.push_back(std::move(Op));
2066 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2068 return Error(getLexer().getLoc(),
2069 "Expected an op-mask register at this point");
2070 // {%k<NUM>} mark is found, inquire for {z}
2071 if (getLexer().is(AsmToken::LCurly) && !Z) {
2072 // Have we've found a parsing error, or found no (expected) {z} mark
2073 // - report an error
2074 if (ParseZ(Z, consumeToken()) || !Z)
2078 // '{z}' on its own is meaningless, hence should be ignored.
2079 // on the contrary - have it been accompanied by a K register,
2082 Operands.push_back(std::move(Z));
2090 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2091 /// has already been parsed if present.
2092 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2095 MCAsmParser &Parser = getParser();
2096 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2097 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2098 // only way to do this without lookahead is to eat the '(' and see what is
2100 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2101 if (getLexer().isNot(AsmToken::LParen)) {
2103 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2105 // After parsing the base expression we could either have a parenthesized
2106 // memory address or not. If not, return now. If so, eat the (.
2107 if (getLexer().isNot(AsmToken::LParen)) {
2108 // Unless we have a segment register, treat this as an immediate.
2110 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2111 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2118 // Okay, we have a '('. We don't know if this is an expression or not, but
2119 // so we have to eat the ( to see beyond it.
2120 SMLoc LParenLoc = Parser.getTok().getLoc();
2121 Parser.Lex(); // Eat the '('.
2123 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2124 // Nothing to do here, fall into the code below with the '(' part of the
2125 // memory operand consumed.
2129 // It must be an parenthesized expression, parse it now.
2130 if (getParser().parseParenExpression(Disp, ExprEnd))
2133 // After parsing the base expression we could either have a parenthesized
2134 // memory address or not. If not, return now. If so, eat the (.
2135 if (getLexer().isNot(AsmToken::LParen)) {
2136 // Unless we have a segment register, treat this as an immediate.
2138 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2140 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2149 // If we reached here, then we just ate the ( of the memory operand. Process
2150 // the rest of the memory operand.
2151 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2152 SMLoc IndexLoc, BaseLoc;
2154 if (getLexer().is(AsmToken::Percent)) {
2155 SMLoc StartLoc, EndLoc;
2156 BaseLoc = Parser.getTok().getLoc();
2157 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2158 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2159 Error(StartLoc, "eiz and riz can only be used as index registers",
2160 SMRange(StartLoc, EndLoc));
2165 if (getLexer().is(AsmToken::Comma)) {
2166 Parser.Lex(); // Eat the comma.
2167 IndexLoc = Parser.getTok().getLoc();
2169 // Following the comma we should have either an index register, or a scale
2170 // value. We don't support the later form, but we want to parse it
2173 // Not that even though it would be completely consistent to support syntax
2174 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2175 if (getLexer().is(AsmToken::Percent)) {
2177 if (ParseRegister(IndexReg, L, L))
2179 if (BaseReg == X86::RIP) {
2180 Error(IndexLoc, "%rip as base register can not have an index register");
2183 if (IndexReg == X86::RIP) {
2184 Error(IndexLoc, "%rip is not allowed as an index register");
2188 if (getLexer().isNot(AsmToken::RParen)) {
2189 // Parse the scale amount:
2190 // ::= ',' [scale-expression]
2191 if (getLexer().isNot(AsmToken::Comma)) {
2192 Error(Parser.getTok().getLoc(),
2193 "expected comma in scale expression");
2196 Parser.Lex(); // Eat the comma.
2198 if (getLexer().isNot(AsmToken::RParen)) {
2199 SMLoc Loc = Parser.getTok().getLoc();
2202 if (getParser().parseAbsoluteExpression(ScaleVal)){
2203 Error(Loc, "expected scale expression");
2207 // Validate the scale amount.
2208 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2210 Error(Loc, "scale factor in 16-bit address must be 1");
2213 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2215 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2218 Scale = (unsigned)ScaleVal;
2221 } else if (getLexer().isNot(AsmToken::RParen)) {
2222 // A scale amount without an index is ignored.
2224 SMLoc Loc = Parser.getTok().getLoc();
2227 if (getParser().parseAbsoluteExpression(Value))
2231 Warning(Loc, "scale factor without index register is ignored");
2236 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2237 if (getLexer().isNot(AsmToken::RParen)) {
2238 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2241 SMLoc MemEnd = Parser.getTok().getEndLoc();
2242 Parser.Lex(); // Eat the ')'.
2244 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2245 // and then only in non-64-bit modes. Except for DX, which is a special case
2246 // because an unofficial form of in/out instructions uses it.
2247 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2248 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2249 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2250 BaseReg != X86::DX) {
2251 Error(BaseLoc, "invalid 16-bit base register");
2255 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2256 Error(IndexLoc, "16-bit memory operand may not include only index register");
2261 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2262 Error(BaseLoc, ErrMsg);
2266 if (SegReg || BaseReg || IndexReg)
2267 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2268 IndexReg, Scale, MemStart, MemEnd);
2269 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2272 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2273 SMLoc NameLoc, OperandVector &Operands) {
2274 MCAsmParser &Parser = getParser();
2276 StringRef PatchedName = Name;
2278 if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) {
2279 StringRef NextTok = Parser.getTok().getString();
2280 if (NextTok == "short") {
2282 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2283 // Eat the short keyword
2285 // MS ignores the short keyword, it determines the jmp type based
2286 // on the distance of the label
2287 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2288 NextTok.size() + 1);
2292 // FIXME: Hack to recognize setneb as setne.
2293 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2294 PatchedName != "setb" && PatchedName != "setnb")
2295 PatchedName = PatchedName.substr(0, Name.size()-1);
2297 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2298 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2299 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2300 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2301 bool IsVCMP = PatchedName[0] == 'v';
2302 unsigned CCIdx = IsVCMP ? 4 : 3;
2303 unsigned ComparisonCode = StringSwitch<unsigned>(
2304 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2306 .Case("eq_oq", 0x00)
2308 .Case("lt_os", 0x01)
2310 .Case("le_os", 0x02)
2311 .Case("unord", 0x03)
2312 .Case("unord_q", 0x03)
2314 .Case("neq_uq", 0x04)
2316 .Case("nlt_us", 0x05)
2318 .Case("nle_us", 0x06)
2320 .Case("ord_q", 0x07)
2321 /* AVX only from here */
2322 .Case("eq_uq", 0x08)
2324 .Case("nge_us", 0x09)
2326 .Case("ngt_us", 0x0A)
2327 .Case("false", 0x0B)
2328 .Case("false_oq", 0x0B)
2329 .Case("neq_oq", 0x0C)
2331 .Case("ge_os", 0x0D)
2333 .Case("gt_os", 0x0E)
2335 .Case("true_uq", 0x0F)
2336 .Case("eq_os", 0x10)
2337 .Case("lt_oq", 0x11)
2338 .Case("le_oq", 0x12)
2339 .Case("unord_s", 0x13)
2340 .Case("neq_us", 0x14)
2341 .Case("nlt_uq", 0x15)
2342 .Case("nle_uq", 0x16)
2343 .Case("ord_s", 0x17)
2344 .Case("eq_us", 0x18)
2345 .Case("nge_uq", 0x19)
2346 .Case("ngt_uq", 0x1A)
2347 .Case("false_os", 0x1B)
2348 .Case("neq_os", 0x1C)
2349 .Case("ge_oq", 0x1D)
2350 .Case("gt_oq", 0x1E)
2351 .Case("true_us", 0x1F)
2353 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2355 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2358 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2359 getParser().getContext());
2360 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2362 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2366 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2367 if (PatchedName.startswith("vpcmp") &&
2368 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2369 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2370 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2371 unsigned ComparisonCode = StringSwitch<unsigned>(
2372 PatchedName.slice(5, PatchedName.size() - CCIdx))
2373 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2376 //.Case("false", 0x3) // Not a documented alias.
2380 //.Case("true", 0x7) // Not a documented alias.
2382 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2383 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2385 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2386 getParser().getContext());
2387 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2389 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2393 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2394 if (PatchedName.startswith("vpcom") &&
2395 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2396 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2397 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2398 unsigned ComparisonCode = StringSwitch<unsigned>(
2399 PatchedName.slice(5, PatchedName.size() - CCIdx))
2409 if (ComparisonCode != ~0U) {
2410 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2412 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2413 getParser().getContext());
2414 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2416 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2420 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2422 // Determine whether this is an instruction prefix.
2424 Name == "lock" || Name == "rep" ||
2425 Name == "repe" || Name == "repz" ||
2426 Name == "repne" || Name == "repnz" ||
2427 Name == "rex64" || Name == "data16" || Name == "data32";
2429 bool CurlyAsEndOfStatement = false;
2430 // This does the actual operand parsing. Don't parse any more if we have a
2431 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2432 // just want to parse the "lock" as the first instruction and the "incl" as
2434 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2436 // Parse '*' modifier.
2437 if (getLexer().is(AsmToken::Star))
2438 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2440 // Read the operands.
2442 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2443 Operands.push_back(std::move(Op));
2444 if (HandleAVX512Operand(Operands, *Operands.back()))
2449 // check for comma and eat it
2450 if (getLexer().is(AsmToken::Comma))
2456 // In MS inline asm curly braces mark the beginning/end of a block,
2457 // therefore they should be interepreted as end of statement
2458 CurlyAsEndOfStatement =
2459 isParsingIntelSyntax() && isParsingInlineAsm() &&
2460 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2461 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2462 return TokError("unexpected token in argument list");
2465 // Consume the EndOfStatement or the prefix separator Slash
2466 if (getLexer().is(AsmToken::EndOfStatement) ||
2467 (isPrefix && getLexer().is(AsmToken::Slash)))
2469 else if (CurlyAsEndOfStatement)
2470 // Add an actual EndOfStatement before the curly brace
2471 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2472 getLexer().getTok().getLoc(), 0);
2474 // This is for gas compatibility and cannot be done in td.
2475 // Adding "p" for some floating point with no argument.
2476 // For example: fsub --> fsubp
2478 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2479 if (IsFp && Operands.size() == 1) {
2480 const char *Repl = StringSwitch<const char *>(Name)
2481 .Case("fsub", "fsubp")
2482 .Case("fdiv", "fdivp")
2483 .Case("fsubr", "fsubrp")
2484 .Case("fdivr", "fdivrp");
2485 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2488 // Moving a 32 or 16 bit value into a segment register has the same
2489 // behavior. Modify such instructions to always take shorter form.
2490 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2491 (Operands.size() == 3)) {
2492 X86Operand &Op1 = (X86Operand &)*Operands[1];
2493 X86Operand &Op2 = (X86Operand &)*Operands[2];
2494 SMLoc Loc = Op1.getEndLoc();
2495 if (Op1.isReg() && Op2.isReg() &&
2496 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2498 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2499 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2500 // Change instruction name to match new instruction.
2501 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2502 Name = is16BitMode() ? "movw" : "movl";
2503 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2505 // Select the correct equivalent 16-/32-bit source register.
2507 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2508 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2512 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2513 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2514 // documented form in various unofficial manuals, so a lot of code uses it.
2515 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2516 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2517 Operands.size() == 3) {
2518 X86Operand &Op = (X86Operand &)*Operands.back();
2519 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2520 isa<MCConstantExpr>(Op.Mem.Disp) &&
2521 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2522 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2523 SMLoc Loc = Op.getEndLoc();
2524 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2527 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2528 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2529 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2530 Operands.size() == 3) {
2531 X86Operand &Op = (X86Operand &)*Operands[1];
2532 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2533 isa<MCConstantExpr>(Op.Mem.Disp) &&
2534 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2535 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2536 SMLoc Loc = Op.getEndLoc();
2537 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2541 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2542 bool HadVerifyError = false;
2544 // Append default arguments to "ins[bwld]"
2545 if (Name.startswith("ins") &&
2546 (Operands.size() == 1 || Operands.size() == 3) &&
2547 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2550 AddDefaultSrcDestOperands(TmpOperands,
2551 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2552 DefaultMemDIOperand(NameLoc));
2553 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2556 // Append default arguments to "outs[bwld]"
2557 if (Name.startswith("outs") &&
2558 (Operands.size() == 1 || Operands.size() == 3) &&
2559 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2560 Name == "outsd" || Name == "outs")) {
2561 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2562 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2563 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2566 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2567 // values of $SIREG according to the mode. It would be nice if this
2568 // could be achieved with InstAlias in the tables.
2569 if (Name.startswith("lods") &&
2570 (Operands.size() == 1 || Operands.size() == 2) &&
2571 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2572 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2573 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2574 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2577 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2578 // values of $DIREG according to the mode. It would be nice if this
2579 // could be achieved with InstAlias in the tables.
2580 if (Name.startswith("stos") &&
2581 (Operands.size() == 1 || Operands.size() == 2) &&
2582 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2583 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2584 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2585 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2588 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2589 // values of $DIREG according to the mode. It would be nice if this
2590 // could be achieved with InstAlias in the tables.
2591 if (Name.startswith("scas") &&
2592 (Operands.size() == 1 || Operands.size() == 2) &&
2593 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2594 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2595 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2596 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2599 // Add default SI and DI operands to "cmps[bwlq]".
2600 if (Name.startswith("cmps") &&
2601 (Operands.size() == 1 || Operands.size() == 3) &&
2602 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2603 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2604 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2605 DefaultMemSIOperand(NameLoc));
2606 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2609 // Add default SI and DI operands to "movs[bwlq]".
2610 if (((Name.startswith("movs") &&
2611 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2612 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2613 (Name.startswith("smov") &&
2614 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2615 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2616 (Operands.size() == 1 || Operands.size() == 3)) {
2617 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2618 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2619 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2620 DefaultMemDIOperand(NameLoc));
2621 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2624 // Check if we encountered an error for one the string insturctions
2625 if (HadVerifyError) {
2626 return HadVerifyError;
2629 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2631 if ((Name.startswith("shr") || Name.startswith("sar") ||
2632 Name.startswith("shl") || Name.startswith("sal") ||
2633 Name.startswith("rcl") || Name.startswith("rcr") ||
2634 Name.startswith("rol") || Name.startswith("ror")) &&
2635 Operands.size() == 3) {
2636 if (isParsingIntelSyntax()) {
2638 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2639 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2640 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2641 Operands.pop_back();
2643 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2644 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2645 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2646 Operands.erase(Operands.begin() + 1);
2650 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2651 // instalias with an immediate operand yet.
2652 if (Name == "int" && Operands.size() == 2) {
2653 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2655 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2656 if (CE->getValue() == 3) {
2657 Operands.erase(Operands.begin() + 1);
2658 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2662 // Transforms "xlat mem8" into "xlatb"
2663 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2664 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2666 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2667 "size, (R|E)BX will be used for the location");
2668 Operands.pop_back();
2669 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2676 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2680 static const char *getSubtargetFeatureName(uint64_t Val);
2682 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2684 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2688 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2689 OperandVector &Operands,
2690 MCStreamer &Out, uint64_t &ErrorInfo,
2691 bool MatchingInlineAsm) {
2692 if (isParsingIntelSyntax())
2693 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2695 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2699 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2700 OperandVector &Operands, MCStreamer &Out,
2701 bool MatchingInlineAsm) {
2702 // FIXME: This should be replaced with a real .td file alias mechanism.
2703 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2705 const char *Repl = StringSwitch<const char *>(Op.getToken())
2706 .Case("finit", "fninit")
2707 .Case("fsave", "fnsave")
2708 .Case("fstcw", "fnstcw")
2709 .Case("fstcww", "fnstcw")
2710 .Case("fstenv", "fnstenv")
2711 .Case("fstsw", "fnstsw")
2712 .Case("fstsww", "fnstsw")
2713 .Case("fclex", "fnclex")
2717 Inst.setOpcode(X86::WAIT);
2719 if (!MatchingInlineAsm)
2720 EmitInstruction(Inst, Operands, Out);
2721 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2725 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2726 bool MatchingInlineAsm) {
2727 assert(ErrorInfo && "Unknown missing feature!");
2728 SmallString<126> Msg;
2729 raw_svector_ostream OS(Msg);
2730 OS << "instruction requires:";
2732 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2733 if (ErrorInfo & Mask)
2734 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2737 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2740 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2741 OperandVector &Operands,
2743 uint64_t &ErrorInfo,
2744 bool MatchingInlineAsm) {
2745 assert(!Operands.empty() && "Unexpect empty operand list!");
2746 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2747 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2748 SMRange EmptyRange = None;
2750 // First, handle aliases that expand to multiple instructions.
2751 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2753 bool WasOriginallyInvalidOperand = false;
2756 // First, try a direct match.
2757 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2758 isParsingIntelSyntax())) {
2759 default: llvm_unreachable("Unexpected match result!");
2761 // Some instructions need post-processing to, for example, tweak which
2762 // encoding is selected. Loop on it while changes happen so the
2763 // individual transformations can chain off each other.
2764 if (!MatchingInlineAsm)
2765 while (processInstruction(Inst, Operands))
2769 if (!MatchingInlineAsm)
2770 EmitInstruction(Inst, Operands, Out);
2771 Opcode = Inst.getOpcode();
2773 case Match_MissingFeature:
2774 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2775 case Match_InvalidOperand:
2776 WasOriginallyInvalidOperand = true;
2778 case Match_MnemonicFail:
2782 // FIXME: Ideally, we would only attempt suffix matches for things which are
2783 // valid prefixes, and we could just infer the right unambiguous
2784 // type. However, that requires substantially more matcher support than the
2787 // Change the operand to point to a temporary token.
2788 StringRef Base = Op.getToken();
2789 SmallString<16> Tmp;
2792 Op.setTokenValue(Tmp);
2794 // If this instruction starts with an 'f', then it is a floating point stack
2795 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2796 // 80-bit floating point, which use the suffixes s,l,t respectively.
2798 // Otherwise, we assume that this may be an integer instruction, which comes
2799 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2800 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2802 // Check for the various suffix matches.
2803 uint64_t ErrorInfoIgnore;
2804 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2807 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2808 Tmp.back() = Suffixes[I];
2809 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2810 MatchingInlineAsm, isParsingIntelSyntax());
2811 // If this returned as a missing feature failure, remember that.
2812 if (Match[I] == Match_MissingFeature)
2813 ErrorInfoMissingFeature = ErrorInfoIgnore;
2816 // Restore the old token.
2817 Op.setTokenValue(Base);
2819 // If exactly one matched, then we treat that as a successful match (and the
2820 // instruction will already have been filled in correctly, since the failing
2821 // matches won't have modified it).
2822 unsigned NumSuccessfulMatches =
2823 std::count(std::begin(Match), std::end(Match), Match_Success);
2824 if (NumSuccessfulMatches == 1) {
2826 if (!MatchingInlineAsm)
2827 EmitInstruction(Inst, Operands, Out);
2828 Opcode = Inst.getOpcode();
2832 // Otherwise, the match failed, try to produce a decent error message.
2834 // If we had multiple suffix matches, then identify this as an ambiguous
2836 if (NumSuccessfulMatches > 1) {
2838 unsigned NumMatches = 0;
2839 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2840 if (Match[I] == Match_Success)
2841 MatchChars[NumMatches++] = Suffixes[I];
2843 SmallString<126> Msg;
2844 raw_svector_ostream OS(Msg);
2845 OS << "ambiguous instructions require an explicit suffix (could be ";
2846 for (unsigned i = 0; i != NumMatches; ++i) {
2849 if (i + 1 == NumMatches)
2851 OS << "'" << Base << MatchChars[i] << "'";
2854 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
2858 // Okay, we know that none of the variants matched successfully.
2860 // If all of the instructions reported an invalid mnemonic, then the original
2861 // mnemonic was invalid.
2862 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2863 if (!WasOriginallyInvalidOperand) {
2864 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2865 Op.getLocRange(), MatchingInlineAsm);
2868 // Recover location info for the operand if we know which was the problem.
2869 if (ErrorInfo != ~0ULL) {
2870 if (ErrorInfo >= Operands.size())
2871 return Error(IDLoc, "too few operands for instruction", EmptyRange,
2874 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2875 if (Operand.getStartLoc().isValid()) {
2876 SMRange OperandRange = Operand.getLocRange();
2877 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2878 OperandRange, MatchingInlineAsm);
2882 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2886 // If one instruction matched with a missing feature, report this as a
2888 if (std::count(std::begin(Match), std::end(Match),
2889 Match_MissingFeature) == 1) {
2890 ErrorInfo = ErrorInfoMissingFeature;
2891 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2895 // If one instruction matched with an invalid operand, report this as an
2897 if (std::count(std::begin(Match), std::end(Match),
2898 Match_InvalidOperand) == 1) {
2899 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2903 // If all of these were an outright failure, report it in a useless way.
2904 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2905 EmptyRange, MatchingInlineAsm);
2909 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2910 OperandVector &Operands,
2912 uint64_t &ErrorInfo,
2913 bool MatchingInlineAsm) {
2914 assert(!Operands.empty() && "Unexpect empty operand list!");
2915 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2916 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2917 StringRef Mnemonic = Op.getToken();
2918 SMRange EmptyRange = None;
2919 StringRef Base = Op.getToken();
2921 // First, handle aliases that expand to multiple instructions.
2922 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2926 // Find one unsized memory operand, if present.
2927 X86Operand *UnsizedMemOp = nullptr;
2928 for (const auto &Op : Operands) {
2929 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2930 if (X86Op->isMemUnsized()) {
2931 UnsizedMemOp = X86Op;
2932 // Have we found an unqualified memory operand,
2933 // break. IA allows only one memory operand.
2938 // Allow some instructions to have implicitly pointer-sized operands. This is
2939 // compatible with gas.
2941 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2942 for (const char *Instr : PtrSizedInstrs) {
2943 if (Mnemonic == Instr) {
2944 UnsizedMemOp->Mem.Size = getPointerWidth();
2950 SmallVector<unsigned, 8> Match;
2951 uint64_t ErrorInfoMissingFeature = 0;
2953 // If unsized push has immediate operand we should default the default pointer
2954 // size for the size.
2955 if (Mnemonic == "push" && Operands.size() == 2) {
2956 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
2957 if (X86Op->isImm()) {
2958 // If it's not a constant fall through and let remainder take care of it.
2959 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
2960 unsigned Size = getPointerWidth();
2962 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
2963 SmallString<16> Tmp;
2965 Tmp += (is64BitMode())
2967 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
2968 Op.setTokenValue(Tmp);
2969 // Do match in ATT mode to allow explicit suffix usage.
2970 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
2972 false /*isParsingIntelSyntax()*/));
2973 Op.setTokenValue(Base);
2978 // If an unsized memory operand is present, try to match with each memory
2979 // operand size. In Intel assembly, the size is not part of the instruction
2981 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2982 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2983 for (unsigned Size : MopSizes) {
2984 UnsizedMemOp->Mem.Size = Size;
2985 uint64_t ErrorInfoIgnore;
2986 unsigned LastOpcode = Inst.getOpcode();
2987 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2988 MatchingInlineAsm, isParsingIntelSyntax());
2989 if (Match.empty() || LastOpcode != Inst.getOpcode())
2992 // If this returned as a missing feature failure, remember that.
2993 if (Match.back() == Match_MissingFeature)
2994 ErrorInfoMissingFeature = ErrorInfoIgnore;
2997 // Restore the size of the unsized memory operand if we modified it.
2998 UnsizedMemOp->Mem.Size = 0;
3001 // If we haven't matched anything yet, this is not a basic integer or FPU
3002 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
3003 // matching with the unsized operand.
3004 if (Match.empty()) {
3005 Match.push_back(MatchInstruction(
3006 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
3007 // If this returned as a missing feature failure, remember that.
3008 if (Match.back() == Match_MissingFeature)
3009 ErrorInfoMissingFeature = ErrorInfo;
3012 // Restore the size of the unsized memory operand if we modified it.
3014 UnsizedMemOp->Mem.Size = 0;
3016 // If it's a bad mnemonic, all results will be the same.
3017 if (Match.back() == Match_MnemonicFail) {
3018 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3019 Op.getLocRange(), MatchingInlineAsm);
3022 unsigned NumSuccessfulMatches =
3023 std::count(std::begin(Match), std::end(Match), Match_Success);
3025 // If matching was ambiguous and we had size information from the frontend,
3026 // try again with that. This handles cases like "movxz eax, m8/m16".
3027 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3028 UnsizedMemOp->getMemFrontendSize()) {
3029 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3030 unsigned M = MatchInstruction(
3031 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
3032 if (M == Match_Success)
3033 NumSuccessfulMatches = 1;
3035 // Add a rewrite that encodes the size information we used from the
3037 InstInfo->AsmRewrites->emplace_back(
3038 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3039 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3042 // If exactly one matched, then we treat that as a successful match (and the
3043 // instruction will already have been filled in correctly, since the failing
3044 // matches won't have modified it).
3045 if (NumSuccessfulMatches == 1) {
3046 // Some instructions need post-processing to, for example, tweak which
3047 // encoding is selected. Loop on it while changes happen so the individual
3048 // transformations can chain off each other.
3049 if (!MatchingInlineAsm)
3050 while (processInstruction(Inst, Operands))
3053 if (!MatchingInlineAsm)
3054 EmitInstruction(Inst, Operands, Out);
3055 Opcode = Inst.getOpcode();
3057 } else if (NumSuccessfulMatches > 1) {
3058 assert(UnsizedMemOp &&
3059 "multiple matches only possible with unsized memory operands");
3060 return Error(UnsizedMemOp->getStartLoc(),
3061 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3062 UnsizedMemOp->getLocRange());
3065 // If one instruction matched with a missing feature, report this as a
3067 if (std::count(std::begin(Match), std::end(Match),
3068 Match_MissingFeature) == 1) {
3069 ErrorInfo = ErrorInfoMissingFeature;
3070 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3074 // If one instruction matched with an invalid operand, report this as an
3076 if (std::count(std::begin(Match), std::end(Match),
3077 Match_InvalidOperand) == 1) {
3078 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3082 // If all of these were an outright failure, report it in a useless way.
3083 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3087 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3088 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3091 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3092 MCAsmParser &Parser = getParser();
3093 StringRef IDVal = DirectiveID.getIdentifier();
3094 if (IDVal == ".word")
3095 return ParseDirectiveWord(2, DirectiveID.getLoc());
3096 else if (IDVal.startswith(".code"))
3097 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3098 else if (IDVal.startswith(".att_syntax")) {
3099 getParser().setParsingInlineAsm(false);
3100 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3101 if (Parser.getTok().getString() == "prefix")
3103 else if (Parser.getTok().getString() == "noprefix")
3104 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3105 "supported: registers must have a "
3106 "'%' prefix in .att_syntax");
3108 getParser().setAssemblerDialect(0);
3110 } else if (IDVal.startswith(".intel_syntax")) {
3111 getParser().setAssemblerDialect(1);
3112 getParser().setParsingInlineAsm(true);
3113 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3114 if (Parser.getTok().getString() == "noprefix")
3116 else if (Parser.getTok().getString() == "prefix")
3117 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3118 "supported: registers must not have "
3119 "a '%' prefix in .intel_syntax");
3122 } else if (IDVal == ".even")
3123 return parseDirectiveEven(DirectiveID.getLoc());
3127 /// parseDirectiveEven
3129 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3130 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3131 TokError("unexpected token in directive");
3134 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3136 getStreamer().InitSections(false);
3137 Section = getStreamer().getCurrentSectionOnly();
3139 if (Section->UseCodeAlign())
3140 getStreamer().EmitCodeAlignment(2, 0);
3142 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3145 /// ParseDirectiveWord
3146 /// ::= .word [ expression (, expression)* ]
3147 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
3148 MCAsmParser &Parser = getParser();
3149 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3151 const MCExpr *Value;
3152 SMLoc ExprLoc = getLexer().getLoc();
3153 if (getParser().parseExpression(Value))
3156 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
3157 assert(Size <= 8 && "Invalid size");
3158 uint64_t IntValue = MCE->getValue();
3159 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3160 return Error(ExprLoc, "literal value out of range for directive");
3161 getStreamer().EmitIntValue(IntValue, Size);
3163 getStreamer().EmitValue(Value, Size, ExprLoc);
3166 if (getLexer().is(AsmToken::EndOfStatement))
3169 // FIXME: Improve diagnostic.
3170 if (getLexer().isNot(AsmToken::Comma)) {
3171 Error(L, "unexpected token in directive");
3182 /// ParseDirectiveCode
3183 /// ::= .code16 | .code32 | .code64
3184 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3185 MCAsmParser &Parser = getParser();
3187 if (IDVal == ".code16") {
3189 if (!is16BitMode()) {
3190 SwitchMode(X86::Mode16Bit);
3191 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3193 } else if (IDVal == ".code16gcc") {
3194 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3197 if (!is16BitMode()) {
3198 SwitchMode(X86::Mode16Bit);
3199 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3201 } else if (IDVal == ".code32") {
3203 if (!is32BitMode()) {
3204 SwitchMode(X86::Mode32Bit);
3205 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3207 } else if (IDVal == ".code64") {
3209 if (!is64BitMode()) {
3210 SwitchMode(X86::Mode64Bit);
3211 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3214 Error(L, "unknown directive " + IDVal);
3221 // Force static initialization.
3222 extern "C" void LLVMInitializeX86AsmParser() {
3223 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3224 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3227 #define GET_REGISTER_MATCHER
3228 #define GET_MATCHER_IMPLEMENTATION
3229 #define GET_SUBTARGET_FEATURE_NAME
3230 #include "X86GenAsmMatcher.inc"