1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
58 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
73 uint64_t &ErrorInfo, bool matchingInlineAsm,
74 unsigned VariantID = 0) {
75 // In Code16GCC mode, match as 32-bit.
77 SwitchMode(X86::Mode32Bit);
78 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
79 matchingInlineAsm, VariantID);
81 SwitchMode(X86::Mode16Bit);
85 enum InfixCalculatorTok {
101 enum IntelOperatorKind {
109 class InfixCalculator {
110 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
111 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
112 SmallVector<ICToken, 4> PostfixStack;
115 int64_t popOperand() {
116 assert (!PostfixStack.empty() && "Poped an empty stack!");
117 ICToken Op = PostfixStack.pop_back_val();
118 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
119 && "Expected and immediate or register!");
122 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
123 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
124 "Unexpected operand!");
125 PostfixStack.push_back(std::make_pair(Op, Val));
128 void popOperator() { InfixOperatorStack.pop_back(); }
129 void pushOperator(InfixCalculatorTok Op) {
130 // Push the new operator if the stack is empty.
131 if (InfixOperatorStack.empty()) {
132 InfixOperatorStack.push_back(Op);
136 // Push the new operator if it has a higher precedence than the operator
137 // on the top of the stack or the operator on the top of the stack is a
139 unsigned Idx = InfixOperatorStack.size() - 1;
140 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
141 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
142 InfixOperatorStack.push_back(Op);
146 // The operator on the top of the stack has higher precedence than the
148 unsigned ParenCount = 0;
150 // Nothing to process.
151 if (InfixOperatorStack.empty())
154 Idx = InfixOperatorStack.size() - 1;
155 StackOp = InfixOperatorStack[Idx];
156 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
159 // If we have an even parentheses count and we see a left parentheses,
160 // then stop processing.
161 if (!ParenCount && StackOp == IC_LPAREN)
164 if (StackOp == IC_RPAREN) {
166 InfixOperatorStack.pop_back();
167 } else if (StackOp == IC_LPAREN) {
169 InfixOperatorStack.pop_back();
171 InfixOperatorStack.pop_back();
172 PostfixStack.push_back(std::make_pair(StackOp, 0));
175 // Push the new operator.
176 InfixOperatorStack.push_back(Op);
180 // Push any remaining operators onto the postfix stack.
181 while (!InfixOperatorStack.empty()) {
182 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
183 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
184 PostfixStack.push_back(std::make_pair(StackOp, 0));
187 if (PostfixStack.empty())
190 SmallVector<ICToken, 16> OperandStack;
191 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
192 ICToken Op = PostfixStack[i];
193 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
194 OperandStack.push_back(Op);
196 assert (OperandStack.size() > 1 && "Too few operands.");
198 ICToken Op2 = OperandStack.pop_back_val();
199 ICToken Op1 = OperandStack.pop_back_val();
202 report_fatal_error("Unexpected operator!");
205 Val = Op1.second + Op2.second;
206 OperandStack.push_back(std::make_pair(IC_IMM, Val));
209 Val = Op1.second - Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Multiply operation with an immediate and a register!");
215 Val = Op1.second * Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Divide operation with an immediate and a register!");
221 assert (Op2.second != 0 && "Division by zero!");
222 Val = Op1.second / Op2.second;
223 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
227 "Or operation with an immediate and a register!");
228 Val = Op1.second | Op2.second;
229 OperandStack.push_back(std::make_pair(IC_IMM, Val));
232 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
233 "Xor operation with an immediate and a register!");
234 Val = Op1.second ^ Op2.second;
235 OperandStack.push_back(std::make_pair(IC_IMM, Val));
238 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
239 "And operation with an immediate and a register!");
240 Val = Op1.second & Op2.second;
241 OperandStack.push_back(std::make_pair(IC_IMM, Val));
244 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
245 "Left shift operation with an immediate and a register!");
246 Val = Op1.second << Op2.second;
247 OperandStack.push_back(std::make_pair(IC_IMM, Val));
250 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
251 "Right shift operation with an immediate and a register!");
252 Val = Op1.second >> Op2.second;
253 OperandStack.push_back(std::make_pair(IC_IMM, Val));
258 assert (OperandStack.size() == 1 && "Expected a single result.");
259 return OperandStack.pop_back_val().second;
263 enum IntelExprState {
284 class IntelExprStateMachine {
285 IntelExprState State, PrevState;
286 unsigned BaseReg, IndexReg, TmpReg, Scale;
290 bool StopOnLBrac, AddImmPrefix;
292 InlineAsmIdentifierInfo Info;
295 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
296 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
297 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
298 AddImmPrefix(addimmprefix) { Info.clear(); }
300 unsigned getBaseReg() { return BaseReg; }
301 unsigned getIndexReg() { return IndexReg; }
302 unsigned getScale() { return Scale; }
303 const MCExpr *getSym() { return Sym; }
304 StringRef getSymName() { return SymName; }
305 int64_t getImm() { return Imm + IC.execute(); }
306 bool isValidEndState() {
307 return State == IES_RBRAC || State == IES_INTEGER;
309 bool getStopOnLBrac() { return StopOnLBrac; }
310 bool getAddImmPrefix() { return AddImmPrefix; }
311 bool hadError() { return State == IES_ERROR; }
313 InlineAsmIdentifierInfo &getIdentifierInfo() {
318 IntelExprState CurrState = State;
327 IC.pushOperator(IC_OR);
330 PrevState = CurrState;
333 IntelExprState CurrState = State;
342 IC.pushOperator(IC_XOR);
345 PrevState = CurrState;
348 IntelExprState CurrState = State;
357 IC.pushOperator(IC_AND);
360 PrevState = CurrState;
363 IntelExprState CurrState = State;
372 IC.pushOperator(IC_LSHIFT);
375 PrevState = CurrState;
378 IntelExprState CurrState = State;
387 IC.pushOperator(IC_RSHIFT);
390 PrevState = CurrState;
393 IntelExprState CurrState = State;
402 IC.pushOperator(IC_PLUS);
403 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
404 // If we already have a BaseReg, then assume this is the IndexReg with
409 assert (!IndexReg && "BaseReg/IndexReg already set!");
416 PrevState = CurrState;
419 IntelExprState CurrState = State;
435 // Only push the minus operator if it is not a unary operator.
436 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
437 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
438 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
439 IC.pushOperator(IC_MINUS);
440 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
441 // If we already have a BaseReg, then assume this is the IndexReg with
446 assert (!IndexReg && "BaseReg/IndexReg already set!");
453 PrevState = CurrState;
456 IntelExprState CurrState = State;
466 PrevState = CurrState;
468 void onRegister(unsigned Reg) {
469 IntelExprState CurrState = State;
476 State = IES_REGISTER;
478 IC.pushOperand(IC_REGISTER);
481 // Index Register - Scale * Register
482 if (PrevState == IES_INTEGER) {
483 assert (!IndexReg && "IndexReg already set!");
484 State = IES_REGISTER;
486 // Get the scale and replace the 'Scale * Register' with '0'.
487 Scale = IC.popOperand();
488 IC.pushOperand(IC_IMM);
495 PrevState = CurrState;
497 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
508 SymName = SymRefName;
509 IC.pushOperand(IC_IMM);
513 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
514 IntelExprState CurrState = State;
531 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
532 // Index Register - Register * Scale
533 assert (!IndexReg && "IndexReg already set!");
536 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
537 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
540 // Get the scale and replace the 'Register * Scale' with '0'.
542 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
543 PrevState == IES_OR || PrevState == IES_AND ||
544 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
545 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
546 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
547 PrevState == IES_NOT || PrevState == IES_XOR) &&
548 CurrState == IES_MINUS) {
549 // Unary minus. No need to pop the minus operand because it was never
551 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
552 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
553 PrevState == IES_OR || PrevState == IES_AND ||
554 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
555 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
556 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
557 PrevState == IES_NOT || PrevState == IES_XOR) &&
558 CurrState == IES_NOT) {
559 // Unary not. No need to pop the not operand because it was never
561 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
563 IC.pushOperand(IC_IMM, TmpInt);
567 PrevState = CurrState;
579 State = IES_MULTIPLY;
580 IC.pushOperator(IC_MULTIPLY);
593 IC.pushOperator(IC_DIVIDE);
605 IC.pushOperator(IC_PLUS);
610 IntelExprState CurrState = State;
619 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
620 // If we already have a BaseReg, then assume this is the IndexReg with
625 assert (!IndexReg && "BaseReg/IndexReg already set!");
632 PrevState = CurrState;
635 IntelExprState CurrState = State;
651 // FIXME: We don't handle this type of unary minus or not, yet.
652 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
653 PrevState == IES_OR || PrevState == IES_AND ||
654 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
655 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
656 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
657 PrevState == IES_NOT || PrevState == IES_XOR) &&
658 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
663 IC.pushOperator(IC_LPAREN);
666 PrevState = CurrState;
678 IC.pushOperator(IC_RPAREN);
684 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
685 bool MatchingInlineAsm = false) {
686 MCAsmParser &Parser = getParser();
687 if (MatchingInlineAsm) {
688 if (!getLexer().isAtStartOfStatement())
689 Parser.eatToEndOfStatement();
692 return Parser.Error(L, Msg, Range);
695 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
700 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
701 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
702 bool IsSIReg(unsigned Reg);
703 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
705 AddDefaultSrcDestOperands(OperandVector &Operands,
706 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
707 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
708 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
709 OperandVector &FinalOperands);
710 std::unique_ptr<X86Operand> ParseOperand();
711 std::unique_ptr<X86Operand> ParseATTOperand();
712 std::unique_ptr<X86Operand> ParseIntelOperand();
713 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
714 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
715 unsigned IdentifyIntelOperator(StringRef Name);
716 unsigned ParseIntelOperator(unsigned OpKind);
717 std::unique_ptr<X86Operand>
718 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
719 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
720 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
721 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
722 std::unique_ptr<X86Operand>
723 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
724 bool isSymbol, unsigned Size);
725 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
726 InlineAsmIdentifierInfo &Info,
727 bool IsUnevaluatedOperand, SMLoc &End);
729 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
731 std::unique_ptr<X86Operand>
732 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
733 unsigned IndexReg, unsigned Scale, SMLoc Start,
734 SMLoc End, unsigned Size, StringRef Identifier,
735 InlineAsmIdentifierInfo &Info,
736 bool AllowBetterSizeMatch = false);
738 bool parseDirectiveEven(SMLoc L);
739 bool ParseDirectiveWord(unsigned Size, SMLoc L);
740 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
742 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
744 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
745 /// instrumentation around Inst.
746 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
748 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
749 OperandVector &Operands, MCStreamer &Out,
751 bool MatchingInlineAsm) override;
753 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
754 MCStreamer &Out, bool MatchingInlineAsm);
756 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
757 bool MatchingInlineAsm);
759 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
760 OperandVector &Operands, MCStreamer &Out,
762 bool MatchingInlineAsm);
764 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
765 OperandVector &Operands, MCStreamer &Out,
767 bool MatchingInlineAsm);
769 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
771 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
772 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
773 /// return false if no parsing errors occurred, true otherwise.
774 bool HandleAVX512Operand(OperandVector &Operands,
775 const MCParsedAsmOperand &Op);
777 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
779 bool is64BitMode() const {
780 // FIXME: Can tablegen auto-generate this?
781 return getSTI().getFeatureBits()[X86::Mode64Bit];
783 bool is32BitMode() const {
784 // FIXME: Can tablegen auto-generate this?
785 return getSTI().getFeatureBits()[X86::Mode32Bit];
787 bool is16BitMode() const {
788 // FIXME: Can tablegen auto-generate this?
789 return getSTI().getFeatureBits()[X86::Mode16Bit];
791 void SwitchMode(unsigned mode) {
792 MCSubtargetInfo &STI = copySTI();
793 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
794 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
795 unsigned FB = ComputeAvailableFeatures(
796 STI.ToggleFeature(OldMode.flip(mode)));
797 setAvailableFeatures(FB);
799 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
802 unsigned getPointerWidth() {
803 if (is16BitMode()) return 16;
804 if (is32BitMode()) return 32;
805 if (is64BitMode()) return 64;
806 llvm_unreachable("invalid mode");
809 bool isParsingIntelSyntax() {
810 return getParser().getAssemblerDialect();
813 /// @name Auto-generated Matcher Functions
816 #define GET_ASSEMBLER_HEADER
817 #include "X86GenAsmMatcher.inc"
823 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
824 const MCInstrInfo &mii, const MCTargetOptions &Options)
825 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
828 // Initialize the set of available features.
829 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
830 Instrumentation.reset(
831 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
834 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
836 void SetFrameRegister(unsigned RegNo) override;
838 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
839 SMLoc NameLoc, OperandVector &Operands) override;
841 bool ParseDirective(AsmToken DirectiveID) override;
843 } // end anonymous namespace
845 /// @name Auto-generated Match Functions
848 static unsigned MatchRegisterName(StringRef Name);
852 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
854 // If we have both a base register and an index register make sure they are
855 // both 64-bit or 32-bit registers.
856 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
858 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
859 ErrMsg = "invalid base+index expression";
862 if (BaseReg != 0 && IndexReg != 0) {
863 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
864 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
865 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
866 IndexReg != X86::RIZ) {
867 ErrMsg = "base register is 64-bit, but index register is not";
870 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
871 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
872 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
873 IndexReg != X86::EIZ){
874 ErrMsg = "base register is 32-bit, but index register is not";
877 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
878 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
879 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
880 ErrMsg = "base register is 16-bit, but index register is not";
883 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
884 IndexReg != X86::SI && IndexReg != X86::DI) ||
885 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
886 IndexReg != X86::BX && IndexReg != X86::BP)) {
887 ErrMsg = "invalid 16-bit base/index register combination";
895 bool X86AsmParser::ParseRegister(unsigned &RegNo,
896 SMLoc &StartLoc, SMLoc &EndLoc) {
897 MCAsmParser &Parser = getParser();
899 const AsmToken &PercentTok = Parser.getTok();
900 StartLoc = PercentTok.getLoc();
902 // If we encounter a %, ignore it. This code handles registers with and
903 // without the prefix, unprefixed registers can occur in cfi directives.
904 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
905 Parser.Lex(); // Eat percent token.
907 const AsmToken &Tok = Parser.getTok();
908 EndLoc = Tok.getEndLoc();
910 if (Tok.isNot(AsmToken::Identifier)) {
911 if (isParsingIntelSyntax()) return true;
912 return Error(StartLoc, "invalid register name",
913 SMRange(StartLoc, EndLoc));
916 RegNo = MatchRegisterName(Tok.getString());
918 // If the match failed, try the register name as lowercase.
920 RegNo = MatchRegisterName(Tok.getString().lower());
922 // The "flags" register cannot be referenced directly.
923 // Treat it as an identifier instead.
924 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
927 if (!is64BitMode()) {
928 // FIXME: This should be done using Requires<Not64BitMode> and
929 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
931 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
933 if (RegNo == X86::RIZ ||
934 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
935 X86II::isX86_64NonExtLowByteReg(RegNo) ||
936 X86II::isX86_64ExtendedReg(RegNo))
937 return Error(StartLoc, "register %"
938 + Tok.getString() + " is only available in 64-bit mode",
939 SMRange(StartLoc, EndLoc));
940 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
941 if (X86II::is32ExtendedReg(RegNo))
942 return Error(StartLoc, "register %"
943 + Tok.getString() + " is only available with AVX512",
944 SMRange(StartLoc, EndLoc));
947 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
948 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
950 Parser.Lex(); // Eat 'st'
952 // Check to see if we have '(4)' after %st.
953 if (getLexer().isNot(AsmToken::LParen))
958 const AsmToken &IntTok = Parser.getTok();
959 if (IntTok.isNot(AsmToken::Integer))
960 return Error(IntTok.getLoc(), "expected stack index");
961 switch (IntTok.getIntVal()) {
962 case 0: RegNo = X86::ST0; break;
963 case 1: RegNo = X86::ST1; break;
964 case 2: RegNo = X86::ST2; break;
965 case 3: RegNo = X86::ST3; break;
966 case 4: RegNo = X86::ST4; break;
967 case 5: RegNo = X86::ST5; break;
968 case 6: RegNo = X86::ST6; break;
969 case 7: RegNo = X86::ST7; break;
970 default: return Error(IntTok.getLoc(), "invalid stack index");
973 if (getParser().Lex().isNot(AsmToken::RParen))
974 return Error(Parser.getTok().getLoc(), "expected ')'");
976 EndLoc = Parser.getTok().getEndLoc();
977 Parser.Lex(); // Eat ')'
981 EndLoc = Parser.getTok().getEndLoc();
983 // If this is "db[0-7]", match it as an alias
985 if (RegNo == 0 && Tok.getString().size() == 3 &&
986 Tok.getString().startswith("db")) {
987 switch (Tok.getString()[2]) {
988 case '0': RegNo = X86::DR0; break;
989 case '1': RegNo = X86::DR1; break;
990 case '2': RegNo = X86::DR2; break;
991 case '3': RegNo = X86::DR3; break;
992 case '4': RegNo = X86::DR4; break;
993 case '5': RegNo = X86::DR5; break;
994 case '6': RegNo = X86::DR6; break;
995 case '7': RegNo = X86::DR7; break;
999 EndLoc = Parser.getTok().getEndLoc();
1000 Parser.Lex(); // Eat it.
1006 if (isParsingIntelSyntax()) return true;
1007 return Error(StartLoc, "invalid register name",
1008 SMRange(StartLoc, EndLoc));
1011 Parser.Lex(); // Eat identifier token.
1015 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1016 Instrumentation->SetInitialFrameRegister(RegNo);
1019 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1020 bool Parse32 = is32BitMode() || Code16GCC;
1021 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1022 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1023 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1024 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1028 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1029 bool Parse32 = is32BitMode() || Code16GCC;
1030 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1031 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1032 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1033 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1037 bool X86AsmParser::IsSIReg(unsigned Reg) {
1039 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1051 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1053 switch (RegClassID) {
1054 default: llvm_unreachable("Unexpected register class");
1055 case X86::GR64RegClassID:
1056 return IsSIReg ? X86::RSI : X86::RDI;
1057 case X86::GR32RegClassID:
1058 return IsSIReg ? X86::ESI : X86::EDI;
1059 case X86::GR16RegClassID:
1060 return IsSIReg ? X86::SI : X86::DI;
1064 void X86AsmParser::AddDefaultSrcDestOperands(
1065 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1066 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1067 if (isParsingIntelSyntax()) {
1068 Operands.push_back(std::move(Dst));
1069 Operands.push_back(std::move(Src));
1072 Operands.push_back(std::move(Src));
1073 Operands.push_back(std::move(Dst));
1077 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1078 OperandVector &FinalOperands) {
1080 if (OrigOperands.size() > 1) {
1081 // Check if sizes match, OrigOperands also contains the instruction name
1082 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1083 "Operand size mismatch");
1085 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1086 // Verify types match
1087 int RegClassID = -1;
1088 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1089 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1090 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1092 if (FinalOp.isReg() &&
1093 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1094 // Return false and let a normal complaint about bogus operands happen
1097 if (FinalOp.isMem()) {
1099 if (!OrigOp.isMem())
1100 // Return false and let a normal complaint about bogus operands happen
1103 unsigned OrigReg = OrigOp.Mem.BaseReg;
1104 unsigned FinalReg = FinalOp.Mem.BaseReg;
1106 // If we've already encounterd a register class, make sure all register
1107 // bases are of the same register class
1108 if (RegClassID != -1 &&
1109 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1110 return Error(OrigOp.getStartLoc(),
1111 "mismatching source and destination index registers");
1114 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1115 RegClassID = X86::GR64RegClassID;
1116 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1117 RegClassID = X86::GR32RegClassID;
1118 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1119 RegClassID = X86::GR16RegClassID;
1121 // Unexpected register class type
1122 // Return false and let a normal complaint about bogus operands happen
1125 bool IsSI = IsSIReg(FinalReg);
1126 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1128 if (FinalReg != OrigReg) {
1129 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1130 Warnings.push_back(std::make_pair(
1131 OrigOp.getStartLoc(),
1132 "memory operand is only for determining the size, " + RegName +
1133 " will be used for the location"));
1136 FinalOp.Mem.Size = OrigOp.Mem.Size;
1137 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1138 FinalOp.Mem.BaseReg = FinalReg;
1142 // Produce warnings only if all the operands passed the adjustment - prevent
1143 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1144 for (auto &WarningMsg : Warnings) {
1145 Warning(WarningMsg.first, WarningMsg.second);
1148 // Remove old operands
1149 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1150 OrigOperands.pop_back();
1152 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1153 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1154 OrigOperands.push_back(std::move(FinalOperands[i]));
1159 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1160 if (isParsingIntelSyntax())
1161 return ParseIntelOperand();
1162 return ParseATTOperand();
1165 /// getIntelMemOperandSize - Return intel memory operand size.
1166 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1167 unsigned Size = StringSwitch<unsigned>(OpStr)
1168 .Cases("BYTE", "byte", 8)
1169 .Cases("WORD", "word", 16)
1170 .Cases("DWORD", "dword", 32)
1171 .Cases("FWORD", "fword", 48)
1172 .Cases("QWORD", "qword", 64)
1173 .Cases("MMWORD","mmword", 64)
1174 .Cases("XWORD", "xword", 80)
1175 .Cases("TBYTE", "tbyte", 80)
1176 .Cases("XMMWORD", "xmmword", 128)
1177 .Cases("YMMWORD", "ymmword", 256)
1178 .Cases("ZMMWORD", "zmmword", 512)
1179 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1184 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1185 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1186 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1187 InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) {
1188 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1189 // some other label reference.
1190 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1191 // Insert an explicit size if the user didn't have one.
1193 Size = getPointerWidth();
1194 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1198 // Create an absolute memory reference in order to match against
1199 // instructions taking a PC relative operand.
1200 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1201 Identifier, Info.OpDecl);
1205 // We either have a direct symbol reference, or an offset from a symbol. The
1206 // parser always puts the symbol on the LHS, so look there for size
1207 // calculation purposes.
1208 unsigned FrontendSize = 0;
1209 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1211 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1212 if (IsSymRef && !Size && Info.Type)
1213 FrontendSize = Info.Type * 8; // Size is in terms of bits in this context.
1215 // When parsing inline assembly we set the base register to a non-zero value
1216 // if we don't know the actual value at this time. This is necessary to
1217 // get the matching correct in some cases.
1218 BaseReg = BaseReg ? BaseReg : 1;
1219 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1220 IndexReg, Scale, Start, End, Size, Identifier,
1221 Info.OpDecl, FrontendSize);
1225 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1226 StringRef SymName, int64_t ImmDisp,
1227 int64_t FinalImmDisp, SMLoc &BracLoc,
1228 SMLoc &StartInBrac, SMLoc &End) {
1229 // Remove the '[' and ']' from the IR string.
1230 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1231 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1233 // If ImmDisp is non-zero, then we parsed a displacement before the
1234 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1235 // If ImmDisp doesn't match the displacement computed by the state machine
1236 // then we have an additional displacement in the bracketed expression.
1237 if (ImmDisp != FinalImmDisp) {
1239 // We have an immediate displacement before the bracketed expression.
1240 // Adjust this to match the final immediate displacement.
1242 for (AsmRewrite &AR : AsmRewrites) {
1243 if (AR.Loc.getPointer() > BracLoc.getPointer())
1245 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1246 assert (!Found && "ImmDisp already rewritten.");
1248 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1249 AR.Val = FinalImmDisp;
1254 assert (Found && "Unable to rewrite ImmDisp.");
1257 // We have a symbolic and an immediate displacement, but no displacement
1258 // before the bracketed expression. Put the immediate displacement
1259 // before the bracketed expression.
1260 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1263 // Remove all the ImmPrefix rewrites within the brackets.
1264 // We may have some Imm rewrties as a result of an operator applying,
1265 // remove them as well
1266 for (AsmRewrite &AR : AsmRewrites) {
1267 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1269 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm)
1270 AR.Kind = AOK_Delete;
1272 const char *SymLocPtr = SymName.data();
1273 // Skip everything before the symbol.
1274 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1275 assert(Len > 0 && "Expected a non-negative length.");
1276 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1278 // Skip everything after the symbol.
1279 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1280 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1281 assert(Len > 0 && "Expected a non-negative length.");
1282 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1286 // Some binary bitwise operators have a named synonymous
1287 // Query a candidate string for being such a named operator
1288 // and if so - invoke the appropriate handler
1289 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1290 // A named operator should be either lower or upper case, but not a mix
1291 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1293 if (Name.equals_lower("not"))
1295 else if (Name.equals_lower("or"))
1297 else if (Name.equals_lower("shl"))
1299 else if (Name.equals_lower("shr"))
1301 else if (Name.equals_lower("xor"))
1303 else if (Name.equals_lower("and"))
1310 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1311 MCAsmParser &Parser = getParser();
1312 const AsmToken &Tok = Parser.getTok();
1314 AsmToken::TokenKind PrevTK = AsmToken::Error;
1317 bool UpdateLocLex = true;
1319 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1320 // identifier. Don't try an parse it as a register.
1321 if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
1324 // If we're parsing an immediate expression, we don't expect a '['.
1325 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1328 AsmToken::TokenKind TK = getLexer().getKind();
1331 if (SM.isValidEndState()) {
1335 return Error(Tok.getLoc(), "unknown token in expression");
1337 case AsmToken::EndOfStatement: {
1341 case AsmToken::String:
1342 case AsmToken::Identifier: {
1343 // This could be a register or a symbolic displacement.
1346 SMLoc IdentLoc = Tok.getLoc();
1347 StringRef Identifier = Tok.getString();
1348 UpdateLocLex = false;
1349 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1350 SM.onRegister(TmpReg);
1351 } else if (ParseIntelNamedOperator(Identifier, SM)) {
1352 UpdateLocLex = true;
1353 } else if (!isParsingInlineAsm()) {
1354 if (getParser().parsePrimaryExpr(Val, End))
1355 return Error(Tok.getLoc(), "Unexpected identifier!");
1356 SM.onIdentifierExpr(Val, Identifier);
1357 } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) {
1358 if (OpKind == IOK_OFFSET)
1359 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1360 "a compound immediate expression is yet to be supported");
1361 int64_t Val = ParseIntelOperator(OpKind);
1365 if (SM.onInteger(Val, ErrMsg))
1366 return Error(IdentLoc, ErrMsg);
1367 } else if (Identifier.find('.') != StringRef::npos &&
1368 PrevTK == AsmToken::RBrac) {
1371 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1372 if (ParseIntelIdentifier(Val, Identifier, Info,
1373 /*Unevaluated=*/false, End))
1375 SM.onIdentifierExpr(Val, Identifier);
1379 case AsmToken::Integer: {
1381 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1382 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1383 // Look for 'b' or 'f' following an Integer as a directional label
1384 SMLoc Loc = getTok().getLoc();
1385 int64_t IntVal = getTok().getIntVal();
1386 End = consumeToken();
1387 UpdateLocLex = false;
1388 if (getLexer().getKind() == AsmToken::Identifier) {
1389 StringRef IDVal = getTok().getString();
1390 if (IDVal == "f" || IDVal == "b") {
1392 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1393 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1395 MCSymbolRefExpr::create(Sym, Variant, getContext());
1396 if (IDVal == "b" && Sym->isUndefined())
1397 return Error(Loc, "invalid reference to undefined symbol");
1398 StringRef Identifier = Sym->getName();
1399 SM.onIdentifierExpr(Val, Identifier);
1400 End = consumeToken();
1402 if (SM.onInteger(IntVal, ErrMsg))
1403 return Error(Loc, ErrMsg);
1406 if (SM.onInteger(IntVal, ErrMsg))
1407 return Error(Loc, ErrMsg);
1411 case AsmToken::Plus: SM.onPlus(); break;
1412 case AsmToken::Minus: SM.onMinus(); break;
1413 case AsmToken::Tilde: SM.onNot(); break;
1414 case AsmToken::Star: SM.onStar(); break;
1415 case AsmToken::Slash: SM.onDivide(); break;
1416 case AsmToken::Pipe: SM.onOr(); break;
1417 case AsmToken::Caret: SM.onXor(); break;
1418 case AsmToken::Amp: SM.onAnd(); break;
1419 case AsmToken::LessLess:
1420 SM.onLShift(); break;
1421 case AsmToken::GreaterGreater:
1422 SM.onRShift(); break;
1423 case AsmToken::LBrac: SM.onLBrac(); break;
1424 case AsmToken::RBrac: SM.onRBrac(); break;
1425 case AsmToken::LParen: SM.onLParen(); break;
1426 case AsmToken::RParen: SM.onRParen(); break;
1429 return Error(Tok.getLoc(), "unknown token in expression");
1431 if (!Done && UpdateLocLex)
1432 End = consumeToken();
1439 std::unique_ptr<X86Operand>
1440 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1441 int64_t ImmDisp, bool isSymbol,
1443 MCAsmParser &Parser = getParser();
1444 const AsmToken &Tok = Parser.getTok();
1445 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1446 if (getLexer().isNot(AsmToken::LBrac))
1447 return ErrorOperand(BracLoc, "Expected '[' token!");
1448 Parser.Lex(); // Eat '['
1450 SMLoc StartInBrac = Parser.getTok().getLoc();
1451 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1452 // may have already parsed an immediate displacement before the bracketed
1454 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1455 if (ParseIntelExpression(SM, End))
1458 const MCExpr *Disp = nullptr;
1459 if (const MCExpr *Sym = SM.getSym()) {
1460 // A symbolic displacement.
1462 if (isParsingInlineAsm())
1463 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1464 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1468 if (SM.getImm() || !Disp) {
1469 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1471 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1473 Disp = Imm; // An immediate displacement only.
1476 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1477 // will in fact do global lookup the field name inside all global typedefs,
1478 // but we don't emulate that.
1479 if ((Parser.getTok().getKind() == AsmToken::Identifier ||
1480 Parser.getTok().getKind() == AsmToken::Dot ||
1481 Parser.getTok().getKind() == AsmToken::Real) &&
1482 Parser.getTok().getString().find('.') != StringRef::npos) {
1483 const MCExpr *NewDisp;
1484 if (ParseIntelDotOperator(Disp, NewDisp))
1487 End = Tok.getEndLoc();
1488 Parser.Lex(); // Eat the field.
1494 Error(Start, "cannot use more than one symbol in memory operand");
1497 if (SM.getBaseReg()) {
1498 Error(Start, "cannot use base register with variable reference");
1501 if (SM.getIndexReg()) {
1502 Error(Start, "cannot use index register with variable reference");
1507 int BaseReg = SM.getBaseReg();
1508 int IndexReg = SM.getIndexReg();
1509 int Scale = SM.getScale();
1510 if (!isParsingInlineAsm()) {
1512 if (!BaseReg && !IndexReg) {
1514 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1515 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1519 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1520 Error(StartInBrac, ErrMsg);
1523 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1524 IndexReg, Scale, Start, End, Size);
1527 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1528 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1529 End, Size, SM.getSymName(), Info,
1530 isParsingInlineAsm());
1533 // Inline assembly may use variable names with namespace alias qualifiers.
1534 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1535 StringRef &Identifier,
1536 InlineAsmIdentifierInfo &Info,
1537 bool IsUnevaluatedOperand, SMLoc &End) {
1538 MCAsmParser &Parser = getParser();
1539 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1542 StringRef LineBuf(Identifier.data());
1544 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1546 const AsmToken &Tok = Parser.getTok();
1547 SMLoc Loc = Tok.getLoc();
1549 // Advance the token stream until the end of the current token is
1550 // after the end of what the frontend claimed.
1551 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1553 End = Tok.getEndLoc();
1555 } while (End.getPointer() < EndPtr);
1556 Identifier = LineBuf;
1558 // The frontend should end parsing on an assembler token boundary, unless it
1560 assert((End.getPointer() == EndPtr || !Result) &&
1561 "frontend claimed part of a token?");
1563 // If the identifier lookup was unsuccessful, assume that we are dealing with
1566 StringRef InternalName =
1567 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1569 assert(InternalName.size() && "We should have an internal name here.");
1570 // Push a rewrite for replacing the identifier name with the internal name.
1571 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1575 // Create the symbol reference.
1576 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1577 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1578 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1582 /// \brief Parse intel style segment override.
1583 std::unique_ptr<X86Operand>
1584 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1586 MCAsmParser &Parser = getParser();
1587 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1588 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1589 if (Tok.isNot(AsmToken::Colon))
1590 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1591 Parser.Lex(); // Eat ':'
1593 int64_t ImmDisp = 0;
1594 if (getLexer().is(AsmToken::Integer)) {
1595 ImmDisp = Tok.getIntVal();
1596 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1598 if (isParsingInlineAsm())
1599 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1601 if (getLexer().isNot(AsmToken::LBrac)) {
1602 // An immediate following a 'segment register', 'colon' token sequence can
1603 // be followed by a bracketed expression. If it isn't we know we have our
1604 // final segment override.
1605 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1606 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1607 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1608 Start, ImmDispToken.getEndLoc(), Size);
1612 if (getLexer().is(AsmToken::LBrac))
1613 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
1617 if (!isParsingInlineAsm()) {
1618 if (getParser().parsePrimaryExpr(Val, End))
1619 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1621 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1624 InlineAsmIdentifierInfo Info;
1625 StringRef Identifier = Tok.getString();
1626 if (ParseIntelIdentifier(Val, Identifier, Info,
1627 /*Unevaluated=*/false, End))
1629 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1630 /*Scale=*/1, Start, End, Size, Identifier, Info);
1633 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1634 std::unique_ptr<X86Operand>
1635 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1636 MCAsmParser &Parser = getParser();
1637 const AsmToken &Tok = Parser.getTok();
1638 // Eat "{" and mark the current place.
1639 const SMLoc consumedToken = consumeToken();
1640 if (Tok.getIdentifier().startswith("r")){
1641 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1642 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1643 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1644 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1645 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1648 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1649 Parser.Lex(); // Eat "r*" of r*-sae
1650 if (!getLexer().is(AsmToken::Minus))
1651 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1652 Parser.Lex(); // Eat "-"
1653 Parser.Lex(); // Eat the sae
1654 if (!getLexer().is(AsmToken::RCurly))
1655 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1656 Parser.Lex(); // Eat "}"
1657 const MCExpr *RndModeOp =
1658 MCConstantExpr::create(rndMode, Parser.getContext());
1659 return X86Operand::CreateImm(RndModeOp, Start, End);
1661 if(Tok.getIdentifier().equals("sae")){
1662 Parser.Lex(); // Eat the sae
1663 if (!getLexer().is(AsmToken::RCurly))
1664 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1665 Parser.Lex(); // Eat "}"
1666 return X86Operand::CreateToken("{sae}", consumedToken);
1668 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1671 /// Parse the '.' operator.
1672 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1673 const MCExpr *&NewDisp) {
1674 MCAsmParser &Parser = getParser();
1675 const AsmToken &Tok = Parser.getTok();
1676 int64_t OrigDispVal, DotDispVal;
1678 // FIXME: Handle non-constant expressions.
1679 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1680 OrigDispVal = OrigDisp->getValue();
1682 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1684 // Drop the optional '.'.
1685 StringRef DotDispStr = Tok.getString();
1686 if (DotDispStr.startswith("."))
1687 DotDispStr = DotDispStr.drop_front(1);
1689 // .Imm gets lexed as a real.
1690 if (Tok.is(AsmToken::Real)) {
1692 DotDispStr.getAsInteger(10, DotDisp);
1693 DotDispVal = DotDisp.getZExtValue();
1694 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1696 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1697 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1699 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1700 DotDispVal = DotDisp;
1702 return Error(Tok.getLoc(), "Unexpected token type!");
1704 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1705 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1706 unsigned Len = DotDispStr.size();
1707 unsigned Val = OrigDispVal + DotDispVal;
1708 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
1711 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1715 /// Parse the 'offset' operator. This operator is used to specify the
1716 /// location rather then the content of a variable.
1717 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1718 MCAsmParser &Parser = getParser();
1719 const AsmToken &Tok = Parser.getTok();
1720 SMLoc OffsetOfLoc = Tok.getLoc();
1721 Parser.Lex(); // Eat offset.
1724 InlineAsmIdentifierInfo Info;
1725 SMLoc Start = Tok.getLoc(), End;
1726 StringRef Identifier = Tok.getString();
1727 if (ParseIntelIdentifier(Val, Identifier, Info,
1728 /*Unevaluated=*/false, End))
1731 // Don't emit the offset operator.
1732 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1734 // The offset operator will have an 'r' constraint, thus we need to create
1735 // register operand to ensure proper matching. Just pick a GPR based on
1736 // the size of a pointer.
1737 bool Parse32 = is32BitMode() || Code16GCC;
1738 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1740 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1741 OffsetOfLoc, Identifier, Info.OpDecl);
1744 // Query a candidate string for being an Intel assembly operator
1745 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1746 unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) {
1747 return StringSwitch<unsigned>(Name)
1748 .Cases("TYPE","type",IOK_TYPE)
1749 .Cases("SIZE","size",IOK_SIZE)
1750 .Cases("LENGTH","length",IOK_LENGTH)
1751 .Cases("OFFSET","offset",IOK_OFFSET)
1752 .Default(IOK_INVALID);
1755 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1756 /// returns the number of elements in an array. It returns the value 1 for
1757 /// non-array variables. The SIZE operator returns the size of a C or C++
1758 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1759 /// TYPE operator returns the size of a C or C++ type or variable. If the
1760 /// variable is an array, TYPE returns the size of a single element.
1761 unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1762 MCAsmParser &Parser = getParser();
1763 const AsmToken &Tok = Parser.getTok();
1764 SMLoc TypeLoc = Tok.getLoc();
1765 Parser.Lex(); // Eat operator.
1767 const MCExpr *Val = nullptr;
1768 InlineAsmIdentifierInfo Info;
1769 SMLoc Start = Tok.getLoc(), End;
1770 StringRef Identifier = Tok.getString();
1771 if (ParseIntelIdentifier(Val, Identifier, Info,
1772 /*Unevaluated=*/true, End))
1776 Error(Start, "unable to lookup expression");
1782 default: llvm_unreachable("Unexpected operand kind!");
1783 case IOK_LENGTH: CVal = Info.Length; break;
1784 case IOK_SIZE: CVal = Info.Size; break;
1785 case IOK_TYPE: CVal = Info.Type; break;
1788 // Rewrite the type operator and the C or C++ type or variable in terms of an
1789 // immediate. E.g. TYPE foo -> $$4
1790 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1791 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1796 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1797 MCAsmParser &Parser = getParser();
1798 const AsmToken &Tok = Parser.getTok();
1801 // FIXME: Offset operator
1802 // Should be handled as part of immediate expression, as other operators
1803 // Currently, only supported as a stand-alone operand
1804 if (isParsingInlineAsm())
1805 if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET)
1806 return ParseIntelOffsetOfOperator();
1808 bool PtrInOperand = false;
1809 unsigned Size = getIntelMemOperandSize(Tok.getString());
1811 Parser.Lex(); // Eat operand size (e.g., byte, word).
1812 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1813 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1814 Parser.Lex(); // Eat ptr.
1815 PtrInOperand = true;
1818 Start = Tok.getLoc();
1820 // rounding mode token
1821 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1822 getLexer().is(AsmToken::LCurly))
1823 return ParseRoundingModeOp(Start, End);
1827 if (getLexer().is(AsmToken::Identifier) &&
1828 !ParseRegister(RegNo, Start, End)) {
1829 // If this is a segment register followed by a ':', then this is the start
1830 // of a segment override, otherwise this is a normal register reference.
1831 // In case it is a normal register and there is ptr in the operand this
1833 if (RegNo == X86::RIP)
1834 return ErrorOperand(Start, "rip can only be used as a base register");
1835 if (getLexer().isNot(AsmToken::Colon)) {
1837 return ErrorOperand(Start, "expected memory operand after "
1838 "'ptr', found register operand instead");
1840 return X86Operand::CreateReg(RegNo, Start, End);
1842 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1845 // Immediates and Memory
1847 // Parse [ BaseReg + Scale*IndexReg + Disp ].
1848 if (getLexer().is(AsmToken::LBrac))
1849 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1852 AsmToken StartTok = Tok;
1853 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1854 /*AddImmPrefix=*/false);
1855 if (ParseIntelExpression(SM, End))
1858 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1859 int64_t Imm = SM.getImm();
1860 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1861 SM.getSym()->evaluateAsAbsolute(Imm);
1863 if (StartTok.isNot(AsmToken::Identifier) &&
1864 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1865 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1866 if (StartTok.getString().size() == Len)
1867 // Just add a prefix if this wasn't a complex immediate expression.
1868 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1870 // Otherwise, rewrite the complex expression as a single immediate.
1871 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1874 if (getLexer().isNot(AsmToken::LBrac)) {
1875 // If a directional label (ie. 1f or 2b) was parsed above from
1876 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1877 // to the MCExpr with the directional local symbol and this is a
1878 // memory operand not an immediate operand.
1880 if (isParsingInlineAsm())
1881 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1883 /*Scale=*/1, Start, End, Size,
1884 SM.getSymName(), SM.getIdentifierInfo());
1885 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1889 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1890 return X86Operand::CreateImm(ImmExpr, Start, End);
1893 // Only positive immediates are valid.
1895 return ErrorOperand(Start, "expected a positive immediate displacement "
1896 "before bracketed expr.");
1898 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
1901 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1902 MCAsmParser &Parser = getParser();
1903 switch (getLexer().getKind()) {
1905 // Parse a memory operand with no segment register.
1906 return ParseMemOperand(0, Parser.getTok().getLoc());
1907 case AsmToken::Percent: {
1908 // Read the register.
1911 if (ParseRegister(RegNo, Start, End)) return nullptr;
1912 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1913 Error(Start, "%eiz and %riz can only be used as index registers",
1914 SMRange(Start, End));
1917 if (RegNo == X86::RIP) {
1918 Error(Start, "%rip can only be used as a base register",
1919 SMRange(Start, End));
1923 // If this is a segment register followed by a ':', then this is the start
1924 // of a memory reference, otherwise this is a normal register reference.
1925 if (getLexer().isNot(AsmToken::Colon))
1926 return X86Operand::CreateReg(RegNo, Start, End);
1928 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1929 return ErrorOperand(Start, "invalid segment register");
1931 getParser().Lex(); // Eat the colon.
1932 return ParseMemOperand(RegNo, Start);
1934 case AsmToken::Dollar: {
1935 // $42 -> immediate.
1936 SMLoc Start = Parser.getTok().getLoc(), End;
1939 if (getParser().parseExpression(Val, End))
1941 return X86Operand::CreateImm(Val, Start, End);
1943 case AsmToken::LCurly:{
1944 SMLoc Start = Parser.getTok().getLoc(), End;
1945 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1946 return ParseRoundingModeOp(Start, End);
1947 return ErrorOperand(Start, "Unexpected '{' in expression");
1952 // true on failure, false otherwise
1953 // If no {z} mark was found - Parser doesn't advance
1954 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1955 const SMLoc &StartLoc) {
1956 MCAsmParser &Parser = getParser();
1957 // Assuming we are just pass the '{' mark, quering the next token
1958 // Searched for {z}, but none was found. Return false, as no parsing error was
1960 if (!(getLexer().is(AsmToken::Identifier) &&
1961 (getLexer().getTok().getIdentifier() == "z")))
1963 Parser.Lex(); // Eat z
1964 // Query and eat the '}' mark
1965 if (!getLexer().is(AsmToken::RCurly))
1966 return Error(getLexer().getLoc(), "Expected } at this point");
1967 Parser.Lex(); // Eat '}'
1968 // Assign Z with the {z} mark opernad
1969 Z = X86Operand::CreateToken("{z}", StartLoc);
1973 // true on failure, false otherwise
1974 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1975 const MCParsedAsmOperand &Op) {
1976 MCAsmParser &Parser = getParser();
1977 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1978 if (getLexer().is(AsmToken::LCurly)) {
1979 // Eat "{" and mark the current place.
1980 const SMLoc consumedToken = consumeToken();
1981 // Distinguish {1to<NUM>} from {%k<NUM>}.
1982 if(getLexer().is(AsmToken::Integer)) {
1983 // Parse memory broadcasting ({1to<NUM>}).
1984 if (getLexer().getTok().getIntVal() != 1)
1985 return TokError("Expected 1to<NUM> at this point");
1986 Parser.Lex(); // Eat "1" of 1to8
1987 if (!getLexer().is(AsmToken::Identifier) ||
1988 !getLexer().getTok().getIdentifier().startswith("to"))
1989 return TokError("Expected 1to<NUM> at this point");
1990 // Recognize only reasonable suffixes.
1991 const char *BroadcastPrimitive =
1992 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1993 .Case("to2", "{1to2}")
1994 .Case("to4", "{1to4}")
1995 .Case("to8", "{1to8}")
1996 .Case("to16", "{1to16}")
1998 if (!BroadcastPrimitive)
1999 return TokError("Invalid memory broadcast primitive.");
2000 Parser.Lex(); // Eat "toN" of 1toN
2001 if (!getLexer().is(AsmToken::RCurly))
2002 return TokError("Expected } at this point");
2003 Parser.Lex(); // Eat "}"
2004 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2006 // No AVX512 specific primitives can pass
2007 // after memory broadcasting, so return.
2010 // Parse either {k}{z}, {z}{k}, {k} or {z}
2011 // last one have no meaning, but GCC accepts it
2012 // Currently, we're just pass a '{' mark
2013 std::unique_ptr<X86Operand> Z;
2014 if (ParseZ(Z, consumedToken))
2016 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2018 // Query for the need of further parsing for a {%k<NUM>} mark
2019 if (!Z || getLexer().is(AsmToken::LCurly)) {
2020 const SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2021 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2023 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2024 if (!getLexer().is(AsmToken::RCurly))
2025 return Error(getLexer().getLoc(), "Expected } at this point");
2026 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2027 Operands.push_back(std::move(Op));
2028 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2030 return Error(getLexer().getLoc(),
2031 "Expected an op-mask register at this point");
2032 // {%k<NUM>} mark is found, inquire for {z}
2033 if (getLexer().is(AsmToken::LCurly) && !Z) {
2034 // Have we've found a parsing error, or found no (expected) {z} mark
2035 // - report an error
2036 if (ParseZ(Z, consumeToken()) || !Z)
2040 // '{z}' on its own is meaningless, hence should be ignored.
2041 // on the contrary - have it been accompanied by a K register,
2044 Operands.push_back(std::move(Z));
2052 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2053 /// has already been parsed if present.
2054 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2057 MCAsmParser &Parser = getParser();
2058 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2059 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2060 // only way to do this without lookahead is to eat the '(' and see what is
2062 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2063 if (getLexer().isNot(AsmToken::LParen)) {
2065 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2067 // After parsing the base expression we could either have a parenthesized
2068 // memory address or not. If not, return now. If so, eat the (.
2069 if (getLexer().isNot(AsmToken::LParen)) {
2070 // Unless we have a segment register, treat this as an immediate.
2072 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2073 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2080 // Okay, we have a '('. We don't know if this is an expression or not, but
2081 // so we have to eat the ( to see beyond it.
2082 SMLoc LParenLoc = Parser.getTok().getLoc();
2083 Parser.Lex(); // Eat the '('.
2085 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2086 // Nothing to do here, fall into the code below with the '(' part of the
2087 // memory operand consumed.
2091 // It must be an parenthesized expression, parse it now.
2092 if (getParser().parseParenExpression(Disp, ExprEnd))
2095 // After parsing the base expression we could either have a parenthesized
2096 // memory address or not. If not, return now. If so, eat the (.
2097 if (getLexer().isNot(AsmToken::LParen)) {
2098 // Unless we have a segment register, treat this as an immediate.
2100 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2102 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2111 // If we reached here, then we just ate the ( of the memory operand. Process
2112 // the rest of the memory operand.
2113 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2114 SMLoc IndexLoc, BaseLoc;
2116 if (getLexer().is(AsmToken::Percent)) {
2117 SMLoc StartLoc, EndLoc;
2118 BaseLoc = Parser.getTok().getLoc();
2119 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2120 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2121 Error(StartLoc, "eiz and riz can only be used as index registers",
2122 SMRange(StartLoc, EndLoc));
2127 if (getLexer().is(AsmToken::Comma)) {
2128 Parser.Lex(); // Eat the comma.
2129 IndexLoc = Parser.getTok().getLoc();
2131 // Following the comma we should have either an index register, or a scale
2132 // value. We don't support the later form, but we want to parse it
2135 // Not that even though it would be completely consistent to support syntax
2136 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2137 if (getLexer().is(AsmToken::Percent)) {
2139 if (ParseRegister(IndexReg, L, L))
2141 if (BaseReg == X86::RIP) {
2142 Error(IndexLoc, "%rip as base register can not have an index register");
2145 if (IndexReg == X86::RIP) {
2146 Error(IndexLoc, "%rip is not allowed as an index register");
2150 if (getLexer().isNot(AsmToken::RParen)) {
2151 // Parse the scale amount:
2152 // ::= ',' [scale-expression]
2153 if (getLexer().isNot(AsmToken::Comma)) {
2154 Error(Parser.getTok().getLoc(),
2155 "expected comma in scale expression");
2158 Parser.Lex(); // Eat the comma.
2160 if (getLexer().isNot(AsmToken::RParen)) {
2161 SMLoc Loc = Parser.getTok().getLoc();
2164 if (getParser().parseAbsoluteExpression(ScaleVal)){
2165 Error(Loc, "expected scale expression");
2169 // Validate the scale amount.
2170 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2172 Error(Loc, "scale factor in 16-bit address must be 1");
2175 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2177 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2180 Scale = (unsigned)ScaleVal;
2183 } else if (getLexer().isNot(AsmToken::RParen)) {
2184 // A scale amount without an index is ignored.
2186 SMLoc Loc = Parser.getTok().getLoc();
2189 if (getParser().parseAbsoluteExpression(Value))
2193 Warning(Loc, "scale factor without index register is ignored");
2198 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2199 if (getLexer().isNot(AsmToken::RParen)) {
2200 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2203 SMLoc MemEnd = Parser.getTok().getEndLoc();
2204 Parser.Lex(); // Eat the ')'.
2206 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2207 // and then only in non-64-bit modes. Except for DX, which is a special case
2208 // because an unofficial form of in/out instructions uses it.
2209 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2210 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2211 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2212 BaseReg != X86::DX) {
2213 Error(BaseLoc, "invalid 16-bit base register");
2217 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2218 Error(IndexLoc, "16-bit memory operand may not include only index register");
2223 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2224 Error(BaseLoc, ErrMsg);
2228 if (SegReg || BaseReg || IndexReg)
2229 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2230 IndexReg, Scale, MemStart, MemEnd);
2231 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2234 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2235 SMLoc NameLoc, OperandVector &Operands) {
2236 MCAsmParser &Parser = getParser();
2238 StringRef PatchedName = Name;
2240 if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) {
2241 StringRef NextTok = Parser.getTok().getString();
2242 if (NextTok == "short") {
2244 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2245 // Eat the short keyword
2247 // MS ignores the short keyword, it determines the jmp type based
2248 // on the distance of the label
2249 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2250 NextTok.size() + 1);
2254 // FIXME: Hack to recognize setneb as setne.
2255 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2256 PatchedName != "setb" && PatchedName != "setnb")
2257 PatchedName = PatchedName.substr(0, Name.size()-1);
2259 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2260 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2261 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2262 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2263 bool IsVCMP = PatchedName[0] == 'v';
2264 unsigned CCIdx = IsVCMP ? 4 : 3;
2265 unsigned ComparisonCode = StringSwitch<unsigned>(
2266 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2268 .Case("eq_oq", 0x00)
2270 .Case("lt_os", 0x01)
2272 .Case("le_os", 0x02)
2273 .Case("unord", 0x03)
2274 .Case("unord_q", 0x03)
2276 .Case("neq_uq", 0x04)
2278 .Case("nlt_us", 0x05)
2280 .Case("nle_us", 0x06)
2282 .Case("ord_q", 0x07)
2283 /* AVX only from here */
2284 .Case("eq_uq", 0x08)
2286 .Case("nge_us", 0x09)
2288 .Case("ngt_us", 0x0A)
2289 .Case("false", 0x0B)
2290 .Case("false_oq", 0x0B)
2291 .Case("neq_oq", 0x0C)
2293 .Case("ge_os", 0x0D)
2295 .Case("gt_os", 0x0E)
2297 .Case("true_uq", 0x0F)
2298 .Case("eq_os", 0x10)
2299 .Case("lt_oq", 0x11)
2300 .Case("le_oq", 0x12)
2301 .Case("unord_s", 0x13)
2302 .Case("neq_us", 0x14)
2303 .Case("nlt_uq", 0x15)
2304 .Case("nle_uq", 0x16)
2305 .Case("ord_s", 0x17)
2306 .Case("eq_us", 0x18)
2307 .Case("nge_uq", 0x19)
2308 .Case("ngt_uq", 0x1A)
2309 .Case("false_os", 0x1B)
2310 .Case("neq_os", 0x1C)
2311 .Case("ge_oq", 0x1D)
2312 .Case("gt_oq", 0x1E)
2313 .Case("true_us", 0x1F)
2315 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2317 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2320 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2321 getParser().getContext());
2322 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2324 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2328 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2329 if (PatchedName.startswith("vpcmp") &&
2330 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2331 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2332 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2333 unsigned ComparisonCode = StringSwitch<unsigned>(
2334 PatchedName.slice(5, PatchedName.size() - CCIdx))
2335 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2338 //.Case("false", 0x3) // Not a documented alias.
2342 //.Case("true", 0x7) // Not a documented alias.
2344 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2345 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2347 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2348 getParser().getContext());
2349 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2351 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2355 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2356 if (PatchedName.startswith("vpcom") &&
2357 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2358 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2359 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2360 unsigned ComparisonCode = StringSwitch<unsigned>(
2361 PatchedName.slice(5, PatchedName.size() - CCIdx))
2371 if (ComparisonCode != ~0U) {
2372 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2374 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2375 getParser().getContext());
2376 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2378 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2382 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2384 // Determine whether this is an instruction prefix.
2386 Name == "lock" || Name == "rep" ||
2387 Name == "repe" || Name == "repz" ||
2388 Name == "repne" || Name == "repnz" ||
2389 Name == "rex64" || Name == "data16" || Name == "data32";
2391 bool CurlyAsEndOfStatement = false;
2392 // This does the actual operand parsing. Don't parse any more if we have a
2393 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2394 // just want to parse the "lock" as the first instruction and the "incl" as
2396 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2398 // Parse '*' modifier.
2399 if (getLexer().is(AsmToken::Star))
2400 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2402 // Read the operands.
2404 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2405 Operands.push_back(std::move(Op));
2406 if (HandleAVX512Operand(Operands, *Operands.back()))
2411 // check for comma and eat it
2412 if (getLexer().is(AsmToken::Comma))
2418 // In MS inline asm curly braces mark the begining/end of a block, therefore
2419 // they should be interepreted as end of statement
2420 CurlyAsEndOfStatement =
2421 isParsingIntelSyntax() && isParsingInlineAsm() &&
2422 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2423 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2424 return TokError("unexpected token in argument list");
2427 // Consume the EndOfStatement or the prefix separator Slash
2428 if (getLexer().is(AsmToken::EndOfStatement) ||
2429 (isPrefix && getLexer().is(AsmToken::Slash)))
2431 else if (CurlyAsEndOfStatement)
2432 // Add an actual EndOfStatement before the curly brace
2433 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2434 getLexer().getTok().getLoc(), 0);
2436 // This is for gas compatibility and cannot be done in td.
2437 // Adding "p" for some floating point with no argument.
2438 // For example: fsub --> fsubp
2440 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2441 if (IsFp && Operands.size() == 1) {
2442 const char *Repl = StringSwitch<const char *>(Name)
2443 .Case("fsub", "fsubp")
2444 .Case("fdiv", "fdivp")
2445 .Case("fsubr", "fsubrp")
2446 .Case("fdivr", "fdivrp");
2447 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2450 // Moving a 32 or 16 bit value into a segment register has the same
2451 // behavior. Modify such instructions to always take shorter form.
2452 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2453 (Operands.size() == 3)) {
2454 X86Operand &Op1 = (X86Operand &)*Operands[1];
2455 X86Operand &Op2 = (X86Operand &)*Operands[2];
2456 SMLoc Loc = Op1.getEndLoc();
2457 if (Op1.isReg() && Op2.isReg() &&
2458 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2460 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2461 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2462 // Change instruction name to match new instruction.
2463 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2464 Name = is16BitMode() ? "movw" : "movl";
2465 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2467 // Select the correct equivalent 16-/32-bit source register.
2469 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2470 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2474 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2475 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2476 // documented form in various unofficial manuals, so a lot of code uses it.
2477 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2478 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2479 Operands.size() == 3) {
2480 X86Operand &Op = (X86Operand &)*Operands.back();
2481 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2482 isa<MCConstantExpr>(Op.Mem.Disp) &&
2483 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2484 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2485 SMLoc Loc = Op.getEndLoc();
2486 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2489 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2490 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2491 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2492 Operands.size() == 3) {
2493 X86Operand &Op = (X86Operand &)*Operands[1];
2494 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2495 isa<MCConstantExpr>(Op.Mem.Disp) &&
2496 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2497 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2498 SMLoc Loc = Op.getEndLoc();
2499 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2503 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2504 bool HadVerifyError = false;
2506 // Append default arguments to "ins[bwld]"
2507 if (Name.startswith("ins") &&
2508 (Operands.size() == 1 || Operands.size() == 3) &&
2509 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2512 AddDefaultSrcDestOperands(TmpOperands,
2513 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2514 DefaultMemDIOperand(NameLoc));
2515 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2518 // Append default arguments to "outs[bwld]"
2519 if (Name.startswith("outs") &&
2520 (Operands.size() == 1 || Operands.size() == 3) &&
2521 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2522 Name == "outsd" || Name == "outs")) {
2523 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2524 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2525 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2528 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2529 // values of $SIREG according to the mode. It would be nice if this
2530 // could be achieved with InstAlias in the tables.
2531 if (Name.startswith("lods") &&
2532 (Operands.size() == 1 || Operands.size() == 2) &&
2533 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2534 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2535 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2536 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2539 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2540 // values of $DIREG according to the mode. It would be nice if this
2541 // could be achieved with InstAlias in the tables.
2542 if (Name.startswith("stos") &&
2543 (Operands.size() == 1 || Operands.size() == 2) &&
2544 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2545 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2546 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2547 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2550 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2551 // values of $DIREG according to the mode. It would be nice if this
2552 // could be achieved with InstAlias in the tables.
2553 if (Name.startswith("scas") &&
2554 (Operands.size() == 1 || Operands.size() == 2) &&
2555 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2556 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2557 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2558 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2561 // Add default SI and DI operands to "cmps[bwlq]".
2562 if (Name.startswith("cmps") &&
2563 (Operands.size() == 1 || Operands.size() == 3) &&
2564 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2565 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2566 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2567 DefaultMemSIOperand(NameLoc));
2568 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2571 // Add default SI and DI operands to "movs[bwlq]".
2572 if (((Name.startswith("movs") &&
2573 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2574 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2575 (Name.startswith("smov") &&
2576 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2577 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2578 (Operands.size() == 1 || Operands.size() == 3)) {
2579 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2580 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2581 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2582 DefaultMemDIOperand(NameLoc));
2583 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2586 // Check if we encountered an error for one the string insturctions
2587 if (HadVerifyError) {
2588 return HadVerifyError;
2591 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2593 if ((Name.startswith("shr") || Name.startswith("sar") ||
2594 Name.startswith("shl") || Name.startswith("sal") ||
2595 Name.startswith("rcl") || Name.startswith("rcr") ||
2596 Name.startswith("rol") || Name.startswith("ror")) &&
2597 Operands.size() == 3) {
2598 if (isParsingIntelSyntax()) {
2600 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2601 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2602 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2603 Operands.pop_back();
2605 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2606 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2607 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2608 Operands.erase(Operands.begin() + 1);
2612 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2613 // instalias with an immediate operand yet.
2614 if (Name == "int" && Operands.size() == 2) {
2615 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2617 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2618 if (CE->getValue() == 3) {
2619 Operands.erase(Operands.begin() + 1);
2620 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2624 // Transforms "xlat mem8" into "xlatb"
2625 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2626 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2628 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2629 "size, (R|E)BX will be used for the location");
2630 Operands.pop_back();
2631 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2638 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2642 static const char *getSubtargetFeatureName(uint64_t Val);
2644 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2646 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2650 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2651 OperandVector &Operands,
2652 MCStreamer &Out, uint64_t &ErrorInfo,
2653 bool MatchingInlineAsm) {
2654 if (isParsingIntelSyntax())
2655 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2657 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2661 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2662 OperandVector &Operands, MCStreamer &Out,
2663 bool MatchingInlineAsm) {
2664 // FIXME: This should be replaced with a real .td file alias mechanism.
2665 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2667 const char *Repl = StringSwitch<const char *>(Op.getToken())
2668 .Case("finit", "fninit")
2669 .Case("fsave", "fnsave")
2670 .Case("fstcw", "fnstcw")
2671 .Case("fstcww", "fnstcw")
2672 .Case("fstenv", "fnstenv")
2673 .Case("fstsw", "fnstsw")
2674 .Case("fstsww", "fnstsw")
2675 .Case("fclex", "fnclex")
2679 Inst.setOpcode(X86::WAIT);
2681 if (!MatchingInlineAsm)
2682 EmitInstruction(Inst, Operands, Out);
2683 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2687 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2688 bool MatchingInlineAsm) {
2689 assert(ErrorInfo && "Unknown missing feature!");
2690 SmallString<126> Msg;
2691 raw_svector_ostream OS(Msg);
2692 OS << "instruction requires:";
2694 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2695 if (ErrorInfo & Mask)
2696 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2699 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2702 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2703 OperandVector &Operands,
2705 uint64_t &ErrorInfo,
2706 bool MatchingInlineAsm) {
2707 assert(!Operands.empty() && "Unexpect empty operand list!");
2708 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2709 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2710 SMRange EmptyRange = None;
2712 // First, handle aliases that expand to multiple instructions.
2713 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2715 bool WasOriginallyInvalidOperand = false;
2718 // First, try a direct match.
2719 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2720 isParsingIntelSyntax())) {
2721 default: llvm_unreachable("Unexpected match result!");
2723 // Some instructions need post-processing to, for example, tweak which
2724 // encoding is selected. Loop on it while changes happen so the
2725 // individual transformations can chain off each other.
2726 if (!MatchingInlineAsm)
2727 while (processInstruction(Inst, Operands))
2731 if (!MatchingInlineAsm)
2732 EmitInstruction(Inst, Operands, Out);
2733 Opcode = Inst.getOpcode();
2735 case Match_MissingFeature:
2736 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2737 case Match_InvalidOperand:
2738 WasOriginallyInvalidOperand = true;
2740 case Match_MnemonicFail:
2744 // FIXME: Ideally, we would only attempt suffix matches for things which are
2745 // valid prefixes, and we could just infer the right unambiguous
2746 // type. However, that requires substantially more matcher support than the
2749 // Change the operand to point to a temporary token.
2750 StringRef Base = Op.getToken();
2751 SmallString<16> Tmp;
2754 Op.setTokenValue(Tmp);
2756 // If this instruction starts with an 'f', then it is a floating point stack
2757 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2758 // 80-bit floating point, which use the suffixes s,l,t respectively.
2760 // Otherwise, we assume that this may be an integer instruction, which comes
2761 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2762 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2764 // Check for the various suffix matches.
2765 uint64_t ErrorInfoIgnore;
2766 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2769 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2770 Tmp.back() = Suffixes[I];
2771 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2772 MatchingInlineAsm, isParsingIntelSyntax());
2773 // If this returned as a missing feature failure, remember that.
2774 if (Match[I] == Match_MissingFeature)
2775 ErrorInfoMissingFeature = ErrorInfoIgnore;
2778 // Restore the old token.
2779 Op.setTokenValue(Base);
2781 // If exactly one matched, then we treat that as a successful match (and the
2782 // instruction will already have been filled in correctly, since the failing
2783 // matches won't have modified it).
2784 unsigned NumSuccessfulMatches =
2785 std::count(std::begin(Match), std::end(Match), Match_Success);
2786 if (NumSuccessfulMatches == 1) {
2788 if (!MatchingInlineAsm)
2789 EmitInstruction(Inst, Operands, Out);
2790 Opcode = Inst.getOpcode();
2794 // Otherwise, the match failed, try to produce a decent error message.
2796 // If we had multiple suffix matches, then identify this as an ambiguous
2798 if (NumSuccessfulMatches > 1) {
2800 unsigned NumMatches = 0;
2801 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2802 if (Match[I] == Match_Success)
2803 MatchChars[NumMatches++] = Suffixes[I];
2805 SmallString<126> Msg;
2806 raw_svector_ostream OS(Msg);
2807 OS << "ambiguous instructions require an explicit suffix (could be ";
2808 for (unsigned i = 0; i != NumMatches; ++i) {
2811 if (i + 1 == NumMatches)
2813 OS << "'" << Base << MatchChars[i] << "'";
2816 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
2820 // Okay, we know that none of the variants matched successfully.
2822 // If all of the instructions reported an invalid mnemonic, then the original
2823 // mnemonic was invalid.
2824 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2825 if (!WasOriginallyInvalidOperand) {
2826 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2827 Op.getLocRange(), MatchingInlineAsm);
2830 // Recover location info for the operand if we know which was the problem.
2831 if (ErrorInfo != ~0ULL) {
2832 if (ErrorInfo >= Operands.size())
2833 return Error(IDLoc, "too few operands for instruction", EmptyRange,
2836 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2837 if (Operand.getStartLoc().isValid()) {
2838 SMRange OperandRange = Operand.getLocRange();
2839 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2840 OperandRange, MatchingInlineAsm);
2844 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2848 // If one instruction matched with a missing feature, report this as a
2850 if (std::count(std::begin(Match), std::end(Match),
2851 Match_MissingFeature) == 1) {
2852 ErrorInfo = ErrorInfoMissingFeature;
2853 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2857 // If one instruction matched with an invalid operand, report this as an
2859 if (std::count(std::begin(Match), std::end(Match),
2860 Match_InvalidOperand) == 1) {
2861 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2865 // If all of these were an outright failure, report it in a useless way.
2866 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2867 EmptyRange, MatchingInlineAsm);
2871 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2872 OperandVector &Operands,
2874 uint64_t &ErrorInfo,
2875 bool MatchingInlineAsm) {
2876 assert(!Operands.empty() && "Unexpect empty operand list!");
2877 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2878 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2879 StringRef Mnemonic = Op.getToken();
2880 SMRange EmptyRange = None;
2881 StringRef Base = Op.getToken();
2883 // First, handle aliases that expand to multiple instructions.
2884 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2888 // Find one unsized memory operand, if present.
2889 X86Operand *UnsizedMemOp = nullptr;
2890 for (const auto &Op : Operands) {
2891 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2892 if (X86Op->isMemUnsized()) {
2893 UnsizedMemOp = X86Op;
2894 // Have we found an unqualified memory operand,
2895 // break. IA allows only one memory operand.
2900 // Allow some instructions to have implicitly pointer-sized operands. This is
2901 // compatible with gas.
2903 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2904 for (const char *Instr : PtrSizedInstrs) {
2905 if (Mnemonic == Instr) {
2906 UnsizedMemOp->Mem.Size = getPointerWidth();
2912 SmallVector<unsigned, 8> Match;
2913 uint64_t ErrorInfoMissingFeature = 0;
2915 // If unsized push has immediate operand we should default the default pointer
2916 // size for the size.
2917 if (Mnemonic == "push" && Operands.size() == 2) {
2918 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
2919 if (X86Op->isImm()) {
2920 // If it's not a constant fall through and let remainder take care of it.
2921 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
2922 unsigned Size = getPointerWidth();
2924 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
2925 SmallString<16> Tmp;
2927 Tmp += (is64BitMode())
2929 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
2930 Op.setTokenValue(Tmp);
2931 // Do match in ATT mode to allow explicit suffix usage.
2932 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
2934 false /*isParsingIntelSyntax()*/));
2935 Op.setTokenValue(Base);
2940 // If an unsized memory operand is present, try to match with each memory
2941 // operand size. In Intel assembly, the size is not part of the instruction
2943 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2944 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2945 for (unsigned Size : MopSizes) {
2946 UnsizedMemOp->Mem.Size = Size;
2947 uint64_t ErrorInfoIgnore;
2948 unsigned LastOpcode = Inst.getOpcode();
2949 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2950 MatchingInlineAsm, isParsingIntelSyntax());
2951 if (Match.empty() || LastOpcode != Inst.getOpcode())
2954 // If this returned as a missing feature failure, remember that.
2955 if (Match.back() == Match_MissingFeature)
2956 ErrorInfoMissingFeature = ErrorInfoIgnore;
2959 // Restore the size of the unsized memory operand if we modified it.
2960 UnsizedMemOp->Mem.Size = 0;
2963 // If we haven't matched anything yet, this is not a basic integer or FPU
2964 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2965 // matching with the unsized operand.
2966 if (Match.empty()) {
2967 Match.push_back(MatchInstruction(
2968 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
2969 // If this returned as a missing feature failure, remember that.
2970 if (Match.back() == Match_MissingFeature)
2971 ErrorInfoMissingFeature = ErrorInfo;
2974 // Restore the size of the unsized memory operand if we modified it.
2976 UnsizedMemOp->Mem.Size = 0;
2978 // If it's a bad mnemonic, all results will be the same.
2979 if (Match.back() == Match_MnemonicFail) {
2980 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2981 Op.getLocRange(), MatchingInlineAsm);
2984 unsigned NumSuccessfulMatches =
2985 std::count(std::begin(Match), std::end(Match), Match_Success);
2987 // If matching was ambiguous and we had size information from the frontend,
2988 // try again with that. This handles cases like "movxz eax, m8/m16".
2989 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
2990 UnsizedMemOp->getMemFrontendSize()) {
2991 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
2992 unsigned M = MatchInstruction(
2993 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
2994 if (M == Match_Success)
2995 NumSuccessfulMatches = 1;
2997 // Add a rewrite that encodes the size information we used from the
2999 InstInfo->AsmRewrites->emplace_back(
3000 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3001 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3004 // If exactly one matched, then we treat that as a successful match (and the
3005 // instruction will already have been filled in correctly, since the failing
3006 // matches won't have modified it).
3007 if (NumSuccessfulMatches == 1) {
3008 // Some instructions need post-processing to, for example, tweak which
3009 // encoding is selected. Loop on it while changes happen so the individual
3010 // transformations can chain off each other.
3011 if (!MatchingInlineAsm)
3012 while (processInstruction(Inst, Operands))
3015 if (!MatchingInlineAsm)
3016 EmitInstruction(Inst, Operands, Out);
3017 Opcode = Inst.getOpcode();
3019 } else if (NumSuccessfulMatches > 1) {
3020 assert(UnsizedMemOp &&
3021 "multiple matches only possible with unsized memory operands");
3022 return Error(UnsizedMemOp->getStartLoc(),
3023 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3024 UnsizedMemOp->getLocRange());
3027 // If one instruction matched with a missing feature, report this as a
3029 if (std::count(std::begin(Match), std::end(Match),
3030 Match_MissingFeature) == 1) {
3031 ErrorInfo = ErrorInfoMissingFeature;
3032 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3036 // If one instruction matched with an invalid operand, report this as an
3038 if (std::count(std::begin(Match), std::end(Match),
3039 Match_InvalidOperand) == 1) {
3040 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3044 // If all of these were an outright failure, report it in a useless way.
3045 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3049 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3050 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3053 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3054 MCAsmParser &Parser = getParser();
3055 StringRef IDVal = DirectiveID.getIdentifier();
3056 if (IDVal == ".word")
3057 return ParseDirectiveWord(2, DirectiveID.getLoc());
3058 else if (IDVal.startswith(".code"))
3059 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3060 else if (IDVal.startswith(".att_syntax")) {
3061 getParser().setParsingInlineAsm(false);
3062 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3063 if (Parser.getTok().getString() == "prefix")
3065 else if (Parser.getTok().getString() == "noprefix")
3066 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3067 "supported: registers must have a "
3068 "'%' prefix in .att_syntax");
3070 getParser().setAssemblerDialect(0);
3072 } else if (IDVal.startswith(".intel_syntax")) {
3073 getParser().setAssemblerDialect(1);
3074 getParser().setParsingInlineAsm(true);
3075 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3076 if (Parser.getTok().getString() == "noprefix")
3078 else if (Parser.getTok().getString() == "prefix")
3079 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3080 "supported: registers must not have "
3081 "a '%' prefix in .intel_syntax");
3084 } else if (IDVal == ".even")
3085 return parseDirectiveEven(DirectiveID.getLoc());
3089 /// parseDirectiveEven
3091 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3092 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3093 TokError("unexpected token in directive");
3096 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3098 getStreamer().InitSections(false);
3099 Section = getStreamer().getCurrentSectionOnly();
3101 if (Section->UseCodeAlign())
3102 getStreamer().EmitCodeAlignment(2, 0);
3104 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3107 /// ParseDirectiveWord
3108 /// ::= .word [ expression (, expression)* ]
3109 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
3110 MCAsmParser &Parser = getParser();
3111 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3113 const MCExpr *Value;
3114 SMLoc ExprLoc = getLexer().getLoc();
3115 if (getParser().parseExpression(Value))
3118 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
3119 assert(Size <= 8 && "Invalid size");
3120 uint64_t IntValue = MCE->getValue();
3121 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3122 return Error(ExprLoc, "literal value out of range for directive");
3123 getStreamer().EmitIntValue(IntValue, Size);
3125 getStreamer().EmitValue(Value, Size, ExprLoc);
3128 if (getLexer().is(AsmToken::EndOfStatement))
3131 // FIXME: Improve diagnostic.
3132 if (getLexer().isNot(AsmToken::Comma)) {
3133 Error(L, "unexpected token in directive");
3144 /// ParseDirectiveCode
3145 /// ::= .code16 | .code32 | .code64
3146 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3147 MCAsmParser &Parser = getParser();
3149 if (IDVal == ".code16") {
3151 if (!is16BitMode()) {
3152 SwitchMode(X86::Mode16Bit);
3153 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3155 } else if (IDVal == ".code16gcc") {
3156 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3159 if (!is16BitMode()) {
3160 SwitchMode(X86::Mode16Bit);
3161 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3163 } else if (IDVal == ".code32") {
3165 if (!is32BitMode()) {
3166 SwitchMode(X86::Mode32Bit);
3167 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3169 } else if (IDVal == ".code64") {
3171 if (!is64BitMode()) {
3172 SwitchMode(X86::Mode64Bit);
3173 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3176 Error(L, "unknown directive " + IDVal);
3183 // Force static initialization.
3184 extern "C" void LLVMInitializeX86AsmParser() {
3185 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3186 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3189 #define GET_REGISTER_MATCHER
3190 #define GET_MATCHER_IMPLEMENTATION
3191 #define GET_SUBTARGET_FEATURE_NAME
3192 #include "X86GenAsmMatcher.inc"