1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
43 static const char OpPrecedence[] = {
59 class X86AsmParser : public MCTargetAsmParser {
61 const MCInstrInfo &MII;
62 ParseInstructionInfo *InstInfo;
63 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 enum InfixCalculatorTok {
88 class InfixCalculator {
89 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
90 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
91 SmallVector<ICToken, 4> PostfixStack;
94 int64_t popOperand() {
95 assert (!PostfixStack.empty() && "Poped an empty stack!");
96 ICToken Op = PostfixStack.pop_back_val();
97 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
98 && "Expected and immediate or register!");
101 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
102 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
103 "Unexpected operand!");
104 PostfixStack.push_back(std::make_pair(Op, Val));
107 void popOperator() { InfixOperatorStack.pop_back(); }
108 void pushOperator(InfixCalculatorTok Op) {
109 // Push the new operator if the stack is empty.
110 if (InfixOperatorStack.empty()) {
111 InfixOperatorStack.push_back(Op);
115 // Push the new operator if it has a higher precedence than the operator
116 // on the top of the stack or the operator on the top of the stack is a
118 unsigned Idx = InfixOperatorStack.size() - 1;
119 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
120 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
121 InfixOperatorStack.push_back(Op);
125 // The operator on the top of the stack has higher precedence than the
127 unsigned ParenCount = 0;
129 // Nothing to process.
130 if (InfixOperatorStack.empty())
133 Idx = InfixOperatorStack.size() - 1;
134 StackOp = InfixOperatorStack[Idx];
135 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
138 // If we have an even parentheses count and we see a left parentheses,
139 // then stop processing.
140 if (!ParenCount && StackOp == IC_LPAREN)
143 if (StackOp == IC_RPAREN) {
145 InfixOperatorStack.pop_back();
146 } else if (StackOp == IC_LPAREN) {
148 InfixOperatorStack.pop_back();
150 InfixOperatorStack.pop_back();
151 PostfixStack.push_back(std::make_pair(StackOp, 0));
154 // Push the new operator.
155 InfixOperatorStack.push_back(Op);
158 // Push any remaining operators onto the postfix stack.
159 while (!InfixOperatorStack.empty()) {
160 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
161 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
162 PostfixStack.push_back(std::make_pair(StackOp, 0));
165 if (PostfixStack.empty())
168 SmallVector<ICToken, 16> OperandStack;
169 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
170 ICToken Op = PostfixStack[i];
171 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
172 OperandStack.push_back(Op);
174 assert (OperandStack.size() > 1 && "Too few operands.");
176 ICToken Op2 = OperandStack.pop_back_val();
177 ICToken Op1 = OperandStack.pop_back_val();
180 report_fatal_error("Unexpected operator!");
183 Val = Op1.second + Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 Val = Op1.second - Op2.second;
188 OperandStack.push_back(std::make_pair(IC_IMM, Val));
191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192 "Multiply operation with an immediate and a register!");
193 Val = Op1.second * Op2.second;
194 OperandStack.push_back(std::make_pair(IC_IMM, Val));
197 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
198 "Divide operation with an immediate and a register!");
199 assert (Op2.second != 0 && "Division by zero!");
200 Val = Op1.second / Op2.second;
201 OperandStack.push_back(std::make_pair(IC_IMM, Val));
204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205 "Or operation with an immediate and a register!");
206 Val = Op1.second | Op2.second;
207 OperandStack.push_back(std::make_pair(IC_IMM, Val));
210 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
211 "Xor operation with an immediate and a register!");
212 Val = Op1.second ^ Op2.second;
213 OperandStack.push_back(std::make_pair(IC_IMM, Val));
216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217 "And operation with an immediate and a register!");
218 Val = Op1.second & Op2.second;
219 OperandStack.push_back(std::make_pair(IC_IMM, Val));
222 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
223 "Left shift operation with an immediate and a register!");
224 Val = Op1.second << Op2.second;
225 OperandStack.push_back(std::make_pair(IC_IMM, Val));
228 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
229 "Right shift operation with an immediate and a register!");
230 Val = Op1.second >> Op2.second;
231 OperandStack.push_back(std::make_pair(IC_IMM, Val));
236 assert (OperandStack.size() == 1 && "Expected a single result.");
237 return OperandStack.pop_back_val().second;
241 enum IntelExprState {
262 class IntelExprStateMachine {
263 IntelExprState State, PrevState;
264 unsigned BaseReg, IndexReg, TmpReg, Scale;
268 bool StopOnLBrac, AddImmPrefix;
270 InlineAsmIdentifierInfo Info;
272 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
273 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
274 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
275 AddImmPrefix(addimmprefix) { Info.clear(); }
277 unsigned getBaseReg() { return BaseReg; }
278 unsigned getIndexReg() { return IndexReg; }
279 unsigned getScale() { return Scale; }
280 const MCExpr *getSym() { return Sym; }
281 StringRef getSymName() { return SymName; }
282 int64_t getImm() { return Imm + IC.execute(); }
283 bool isValidEndState() {
284 return State == IES_RBRAC || State == IES_INTEGER;
286 bool getStopOnLBrac() { return StopOnLBrac; }
287 bool getAddImmPrefix() { return AddImmPrefix; }
288 bool hadError() { return State == IES_ERROR; }
290 InlineAsmIdentifierInfo &getIdentifierInfo() {
295 IntelExprState CurrState = State;
304 IC.pushOperator(IC_OR);
307 PrevState = CurrState;
310 IntelExprState CurrState = State;
319 IC.pushOperator(IC_XOR);
322 PrevState = CurrState;
325 IntelExprState CurrState = State;
334 IC.pushOperator(IC_AND);
337 PrevState = CurrState;
340 IntelExprState CurrState = State;
349 IC.pushOperator(IC_LSHIFT);
352 PrevState = CurrState;
355 IntelExprState CurrState = State;
364 IC.pushOperator(IC_RSHIFT);
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
379 IC.pushOperator(IC_PLUS);
380 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
381 // If we already have a BaseReg, then assume this is the IndexReg with
386 assert (!IndexReg && "BaseReg/IndexReg already set!");
393 PrevState = CurrState;
396 IntelExprState CurrState = State;
412 // Only push the minus operator if it is not a unary operator.
413 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
414 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
415 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
416 IC.pushOperator(IC_MINUS);
417 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
418 // If we already have a BaseReg, then assume this is the IndexReg with
423 assert (!IndexReg && "BaseReg/IndexReg already set!");
430 PrevState = CurrState;
433 IntelExprState CurrState = State;
443 PrevState = CurrState;
445 void onRegister(unsigned Reg) {
446 IntelExprState CurrState = State;
453 State = IES_REGISTER;
455 IC.pushOperand(IC_REGISTER);
458 // Index Register - Scale * Register
459 if (PrevState == IES_INTEGER) {
460 assert (!IndexReg && "IndexReg already set!");
461 State = IES_REGISTER;
463 // Get the scale and replace the 'Scale * Register' with '0'.
464 Scale = IC.popOperand();
465 IC.pushOperand(IC_IMM);
472 PrevState = CurrState;
474 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
485 SymName = SymRefName;
486 IC.pushOperand(IC_IMM);
490 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
491 IntelExprState CurrState = State;
508 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
509 // Index Register - Register * Scale
510 assert (!IndexReg && "IndexReg already set!");
513 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
514 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
517 // Get the scale and replace the 'Register * Scale' with '0'.
519 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
520 PrevState == IES_OR || PrevState == IES_AND ||
521 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
522 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
523 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
524 PrevState == IES_NOT || PrevState == IES_XOR) &&
525 CurrState == IES_MINUS) {
526 // Unary minus. No need to pop the minus operand because it was never
528 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
529 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
530 PrevState == IES_OR || PrevState == IES_AND ||
531 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
532 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
533 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
534 PrevState == IES_NOT || PrevState == IES_XOR) &&
535 CurrState == IES_NOT) {
536 // Unary not. No need to pop the not operand because it was never
538 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
540 IC.pushOperand(IC_IMM, TmpInt);
544 PrevState = CurrState;
556 State = IES_MULTIPLY;
557 IC.pushOperator(IC_MULTIPLY);
570 IC.pushOperator(IC_DIVIDE);
582 IC.pushOperator(IC_PLUS);
587 IntelExprState CurrState = State;
596 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
597 // If we already have a BaseReg, then assume this is the IndexReg with
602 assert (!IndexReg && "BaseReg/IndexReg already set!");
609 PrevState = CurrState;
612 IntelExprState CurrState = State;
628 // FIXME: We don't handle this type of unary minus or not, yet.
629 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
630 PrevState == IES_OR || PrevState == IES_AND ||
631 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
632 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
633 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
634 PrevState == IES_NOT || PrevState == IES_XOR) &&
635 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
640 IC.pushOperator(IC_LPAREN);
643 PrevState = CurrState;
655 IC.pushOperator(IC_RPAREN);
661 bool Error(SMLoc L, const Twine &Msg,
662 ArrayRef<SMRange> Ranges = None,
663 bool MatchingInlineAsm = false) {
664 MCAsmParser &Parser = getParser();
665 if (MatchingInlineAsm) return true;
666 return Parser.Error(L, Msg, Ranges);
669 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
670 ArrayRef<SMRange> Ranges = None,
671 bool MatchingInlineAsm = false) {
672 MCAsmParser &Parser = getParser();
673 Parser.eatToEndOfStatement();
674 return Error(L, Msg, Ranges, MatchingInlineAsm);
677 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
682 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
683 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
684 std::unique_ptr<X86Operand> ParseOperand();
685 std::unique_ptr<X86Operand> ParseATTOperand();
686 std::unique_ptr<X86Operand> ParseIntelOperand();
687 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
688 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
689 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
690 std::unique_ptr<X86Operand>
691 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
692 std::unique_ptr<X86Operand>
693 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
694 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
695 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
696 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
700 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
701 InlineAsmIdentifierInfo &Info,
702 bool IsUnevaluatedOperand, SMLoc &End);
704 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
706 std::unique_ptr<X86Operand>
707 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
708 unsigned IndexReg, unsigned Scale, SMLoc Start,
709 SMLoc End, unsigned Size, StringRef Identifier,
710 InlineAsmIdentifierInfo &Info);
712 bool ParseDirectiveWord(unsigned Size, SMLoc L);
713 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
715 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
716 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
718 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
719 /// instrumentation around Inst.
720 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
722 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
723 OperandVector &Operands, MCStreamer &Out,
725 bool MatchingInlineAsm) override;
727 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
728 MCStreamer &Out, bool MatchingInlineAsm);
730 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
731 bool MatchingInlineAsm);
733 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
734 OperandVector &Operands, MCStreamer &Out,
736 bool MatchingInlineAsm);
738 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
739 OperandVector &Operands, MCStreamer &Out,
741 bool MatchingInlineAsm);
743 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
745 /// doSrcDstMatch - Returns true if operands are matching in their
746 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
747 /// the parsing mode (Intel vs. AT&T).
748 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
750 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
751 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
752 /// \return \c true if no parsing errors occurred, \c false otherwise.
753 bool HandleAVX512Operand(OperandVector &Operands,
754 const MCParsedAsmOperand &Op);
756 bool is64BitMode() const {
757 // FIXME: Can tablegen auto-generate this?
758 return STI.getFeatureBits()[X86::Mode64Bit];
760 bool is32BitMode() const {
761 // FIXME: Can tablegen auto-generate this?
762 return STI.getFeatureBits()[X86::Mode32Bit];
764 bool is16BitMode() const {
765 // FIXME: Can tablegen auto-generate this?
766 return STI.getFeatureBits()[X86::Mode16Bit];
768 void SwitchMode(unsigned mode) {
769 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
770 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
771 unsigned FB = ComputeAvailableFeatures(
772 STI.ToggleFeature(OldMode.flip(mode)));
773 setAvailableFeatures(FB);
775 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
778 unsigned getPointerWidth() {
779 if (is16BitMode()) return 16;
780 if (is32BitMode()) return 32;
781 if (is64BitMode()) return 64;
782 llvm_unreachable("invalid mode");
785 bool isParsingIntelSyntax() {
786 return getParser().getAssemblerDialect();
789 /// @name Auto-generated Matcher Functions
792 #define GET_ASSEMBLER_HEADER
793 #include "X86GenAsmMatcher.inc"
798 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
799 const MCInstrInfo &mii, const MCTargetOptions &Options)
800 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
802 // Initialize the set of available features.
803 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
804 Instrumentation.reset(
805 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
808 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
810 void SetFrameRegister(unsigned RegNo) override;
812 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
813 SMLoc NameLoc, OperandVector &Operands) override;
815 bool ParseDirective(AsmToken DirectiveID) override;
817 } // end anonymous namespace
819 /// @name Auto-generated Match Functions
822 static unsigned MatchRegisterName(StringRef Name);
826 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
828 // If we have both a base register and an index register make sure they are
829 // both 64-bit or 32-bit registers.
830 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
831 if (BaseReg != 0 && IndexReg != 0) {
832 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
833 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
834 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
835 IndexReg != X86::RIZ) {
836 ErrMsg = "base register is 64-bit, but index register is not";
839 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
840 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
841 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
842 IndexReg != X86::EIZ){
843 ErrMsg = "base register is 32-bit, but index register is not";
846 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
847 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
848 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
849 ErrMsg = "base register is 16-bit, but index register is not";
852 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
853 IndexReg != X86::SI && IndexReg != X86::DI) ||
854 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
855 IndexReg != X86::BX && IndexReg != X86::BP)) {
856 ErrMsg = "invalid 16-bit base/index register combination";
864 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
866 // Return true and let a normal complaint about bogus operands happen.
867 if (!Op1.isMem() || !Op2.isMem())
870 // Actually these might be the other way round if Intel syntax is
871 // being used. It doesn't matter.
872 unsigned diReg = Op1.Mem.BaseReg;
873 unsigned siReg = Op2.Mem.BaseReg;
875 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
876 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
877 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
878 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
879 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
880 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
881 // Again, return true and let another error happen.
885 bool X86AsmParser::ParseRegister(unsigned &RegNo,
886 SMLoc &StartLoc, SMLoc &EndLoc) {
887 MCAsmParser &Parser = getParser();
889 const AsmToken &PercentTok = Parser.getTok();
890 StartLoc = PercentTok.getLoc();
892 // If we encounter a %, ignore it. This code handles registers with and
893 // without the prefix, unprefixed registers can occur in cfi directives.
894 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
895 Parser.Lex(); // Eat percent token.
897 const AsmToken &Tok = Parser.getTok();
898 EndLoc = Tok.getEndLoc();
900 if (Tok.isNot(AsmToken::Identifier)) {
901 if (isParsingIntelSyntax()) return true;
902 return Error(StartLoc, "invalid register name",
903 SMRange(StartLoc, EndLoc));
906 RegNo = MatchRegisterName(Tok.getString());
908 // If the match failed, try the register name as lowercase.
910 RegNo = MatchRegisterName(Tok.getString().lower());
912 if (!is64BitMode()) {
913 // FIXME: This should be done using Requires<Not64BitMode> and
914 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
916 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
918 if (RegNo == X86::RIZ ||
919 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
920 X86II::isX86_64NonExtLowByteReg(RegNo) ||
921 X86II::isX86_64ExtendedReg(RegNo))
922 return Error(StartLoc, "register %"
923 + Tok.getString() + " is only available in 64-bit mode",
924 SMRange(StartLoc, EndLoc));
927 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
928 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
930 Parser.Lex(); // Eat 'st'
932 // Check to see if we have '(4)' after %st.
933 if (getLexer().isNot(AsmToken::LParen))
938 const AsmToken &IntTok = Parser.getTok();
939 if (IntTok.isNot(AsmToken::Integer))
940 return Error(IntTok.getLoc(), "expected stack index");
941 switch (IntTok.getIntVal()) {
942 case 0: RegNo = X86::ST0; break;
943 case 1: RegNo = X86::ST1; break;
944 case 2: RegNo = X86::ST2; break;
945 case 3: RegNo = X86::ST3; break;
946 case 4: RegNo = X86::ST4; break;
947 case 5: RegNo = X86::ST5; break;
948 case 6: RegNo = X86::ST6; break;
949 case 7: RegNo = X86::ST7; break;
950 default: return Error(IntTok.getLoc(), "invalid stack index");
953 if (getParser().Lex().isNot(AsmToken::RParen))
954 return Error(Parser.getTok().getLoc(), "expected ')'");
956 EndLoc = Parser.getTok().getEndLoc();
957 Parser.Lex(); // Eat ')'
961 EndLoc = Parser.getTok().getEndLoc();
963 // If this is "db[0-7]", match it as an alias
965 if (RegNo == 0 && Tok.getString().size() == 3 &&
966 Tok.getString().startswith("db")) {
967 switch (Tok.getString()[2]) {
968 case '0': RegNo = X86::DR0; break;
969 case '1': RegNo = X86::DR1; break;
970 case '2': RegNo = X86::DR2; break;
971 case '3': RegNo = X86::DR3; break;
972 case '4': RegNo = X86::DR4; break;
973 case '5': RegNo = X86::DR5; break;
974 case '6': RegNo = X86::DR6; break;
975 case '7': RegNo = X86::DR7; break;
979 EndLoc = Parser.getTok().getEndLoc();
980 Parser.Lex(); // Eat it.
986 if (isParsingIntelSyntax()) return true;
987 return Error(StartLoc, "invalid register name",
988 SMRange(StartLoc, EndLoc));
991 Parser.Lex(); // Eat identifier token.
995 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
996 Instrumentation->SetInitialFrameRegister(RegNo);
999 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1001 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
1002 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1003 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1004 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1008 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1010 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1011 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1012 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1013 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1017 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1018 if (isParsingIntelSyntax())
1019 return ParseIntelOperand();
1020 return ParseATTOperand();
1023 /// getIntelMemOperandSize - Return intel memory operand size.
1024 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1025 unsigned Size = StringSwitch<unsigned>(OpStr)
1026 .Cases("BYTE", "byte", 8)
1027 .Cases("WORD", "word", 16)
1028 .Cases("DWORD", "dword", 32)
1029 .Cases("QWORD", "qword", 64)
1030 .Cases("XWORD", "xword", 80)
1031 .Cases("XMMWORD", "xmmword", 128)
1032 .Cases("YMMWORD", "ymmword", 256)
1033 .Cases("ZMMWORD", "zmmword", 512)
1034 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1039 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1040 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1041 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1042 InlineAsmIdentifierInfo &Info) {
1043 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1044 // some other label reference.
1045 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1046 // Insert an explicit size if the user didn't have one.
1048 Size = getPointerWidth();
1049 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1053 // Create an absolute memory reference in order to match against
1054 // instructions taking a PC relative operand.
1055 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1056 Identifier, Info.OpDecl);
1059 // We either have a direct symbol reference, or an offset from a symbol. The
1060 // parser always puts the symbol on the LHS, so look there for size
1061 // calculation purposes.
1062 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1064 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1067 Size = Info.Type * 8; // Size is in terms of bits in this context.
1069 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1074 // When parsing inline assembly we set the base register to a non-zero value
1075 // if we don't know the actual value at this time. This is necessary to
1076 // get the matching correct in some cases.
1077 BaseReg = BaseReg ? BaseReg : 1;
1078 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1079 IndexReg, Scale, Start, End, Size, Identifier,
1084 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1085 StringRef SymName, int64_t ImmDisp,
1086 int64_t FinalImmDisp, SMLoc &BracLoc,
1087 SMLoc &StartInBrac, SMLoc &End) {
1088 // Remove the '[' and ']' from the IR string.
1089 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1090 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1092 // If ImmDisp is non-zero, then we parsed a displacement before the
1093 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1094 // If ImmDisp doesn't match the displacement computed by the state machine
1095 // then we have an additional displacement in the bracketed expression.
1096 if (ImmDisp != FinalImmDisp) {
1098 // We have an immediate displacement before the bracketed expression.
1099 // Adjust this to match the final immediate displacement.
1101 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1102 E = AsmRewrites->end(); I != E; ++I) {
1103 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1105 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1106 assert (!Found && "ImmDisp already rewritten.");
1107 (*I).Kind = AOK_Imm;
1108 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1109 (*I).Val = FinalImmDisp;
1114 assert (Found && "Unable to rewrite ImmDisp.");
1117 // We have a symbolic and an immediate displacement, but no displacement
1118 // before the bracketed expression. Put the immediate displacement
1119 // before the bracketed expression.
1120 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1123 // Remove all the ImmPrefix rewrites within the brackets.
1124 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1125 E = AsmRewrites->end(); I != E; ++I) {
1126 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1128 if ((*I).Kind == AOK_ImmPrefix)
1129 (*I).Kind = AOK_Delete;
1131 const char *SymLocPtr = SymName.data();
1132 // Skip everything before the symbol.
1133 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1134 assert(Len > 0 && "Expected a non-negative length.");
1135 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1137 // Skip everything after the symbol.
1138 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1139 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1140 assert(Len > 0 && "Expected a non-negative length.");
1141 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1145 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1146 MCAsmParser &Parser = getParser();
1147 const AsmToken &Tok = Parser.getTok();
1151 bool UpdateLocLex = true;
1153 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1154 // identifier. Don't try an parse it as a register.
1155 if (Tok.getString().startswith("."))
1158 // If we're parsing an immediate expression, we don't expect a '['.
1159 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1162 AsmToken::TokenKind TK = getLexer().getKind();
1165 if (SM.isValidEndState()) {
1169 return Error(Tok.getLoc(), "unknown token in expression");
1171 case AsmToken::EndOfStatement: {
1175 case AsmToken::String:
1176 case AsmToken::Identifier: {
1177 // This could be a register or a symbolic displacement.
1180 SMLoc IdentLoc = Tok.getLoc();
1181 StringRef Identifier = Tok.getString();
1182 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1183 SM.onRegister(TmpReg);
1184 UpdateLocLex = false;
1187 if (!isParsingInlineAsm()) {
1188 if (getParser().parsePrimaryExpr(Val, End))
1189 return Error(Tok.getLoc(), "Unexpected identifier!");
1191 // This is a dot operator, not an adjacent identifier.
1192 if (Identifier.find('.') != StringRef::npos) {
1195 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1196 if (ParseIntelIdentifier(Val, Identifier, Info,
1197 /*Unevaluated=*/false, End))
1201 SM.onIdentifierExpr(Val, Identifier);
1202 UpdateLocLex = false;
1205 return Error(Tok.getLoc(), "Unexpected identifier!");
1207 case AsmToken::Integer: {
1209 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1210 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1212 // Look for 'b' or 'f' following an Integer as a directional label
1213 SMLoc Loc = getTok().getLoc();
1214 int64_t IntVal = getTok().getIntVal();
1215 End = consumeToken();
1216 UpdateLocLex = false;
1217 if (getLexer().getKind() == AsmToken::Identifier) {
1218 StringRef IDVal = getTok().getString();
1219 if (IDVal == "f" || IDVal == "b") {
1221 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1222 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1224 MCSymbolRefExpr::create(Sym, Variant, getContext());
1225 if (IDVal == "b" && Sym->isUndefined())
1226 return Error(Loc, "invalid reference to undefined symbol");
1227 StringRef Identifier = Sym->getName();
1228 SM.onIdentifierExpr(Val, Identifier);
1229 End = consumeToken();
1231 if (SM.onInteger(IntVal, ErrMsg))
1232 return Error(Loc, ErrMsg);
1235 if (SM.onInteger(IntVal, ErrMsg))
1236 return Error(Loc, ErrMsg);
1240 case AsmToken::Plus: SM.onPlus(); break;
1241 case AsmToken::Minus: SM.onMinus(); break;
1242 case AsmToken::Tilde: SM.onNot(); break;
1243 case AsmToken::Star: SM.onStar(); break;
1244 case AsmToken::Slash: SM.onDivide(); break;
1245 case AsmToken::Pipe: SM.onOr(); break;
1246 case AsmToken::Caret: SM.onXor(); break;
1247 case AsmToken::Amp: SM.onAnd(); break;
1248 case AsmToken::LessLess:
1249 SM.onLShift(); break;
1250 case AsmToken::GreaterGreater:
1251 SM.onRShift(); break;
1252 case AsmToken::LBrac: SM.onLBrac(); break;
1253 case AsmToken::RBrac: SM.onRBrac(); break;
1254 case AsmToken::LParen: SM.onLParen(); break;
1255 case AsmToken::RParen: SM.onRParen(); break;
1258 return Error(Tok.getLoc(), "unknown token in expression");
1260 if (!Done && UpdateLocLex)
1261 End = consumeToken();
1266 std::unique_ptr<X86Operand>
1267 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1268 int64_t ImmDisp, unsigned Size) {
1269 MCAsmParser &Parser = getParser();
1270 const AsmToken &Tok = Parser.getTok();
1271 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1272 if (getLexer().isNot(AsmToken::LBrac))
1273 return ErrorOperand(BracLoc, "Expected '[' token!");
1274 Parser.Lex(); // Eat '['
1276 SMLoc StartInBrac = Tok.getLoc();
1277 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1278 // may have already parsed an immediate displacement before the bracketed
1280 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1281 if (ParseIntelExpression(SM, End))
1284 const MCExpr *Disp = nullptr;
1285 if (const MCExpr *Sym = SM.getSym()) {
1286 // A symbolic displacement.
1288 if (isParsingInlineAsm())
1289 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1290 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1294 if (SM.getImm() || !Disp) {
1295 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1297 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1299 Disp = Imm; // An immediate displacement only.
1302 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1303 // will in fact do global lookup the field name inside all global typedefs,
1304 // but we don't emulate that.
1305 if (Tok.getString().find('.') != StringRef::npos) {
1306 const MCExpr *NewDisp;
1307 if (ParseIntelDotOperator(Disp, NewDisp))
1310 End = Tok.getEndLoc();
1311 Parser.Lex(); // Eat the field.
1315 int BaseReg = SM.getBaseReg();
1316 int IndexReg = SM.getIndexReg();
1317 int Scale = SM.getScale();
1318 if (!isParsingInlineAsm()) {
1320 if (!BaseReg && !IndexReg) {
1322 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1323 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1327 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1328 Error(StartInBrac, ErrMsg);
1331 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1332 IndexReg, Scale, Start, End, Size);
1335 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1336 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1337 End, Size, SM.getSymName(), Info);
1340 // Inline assembly may use variable names with namespace alias qualifiers.
1341 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1342 StringRef &Identifier,
1343 InlineAsmIdentifierInfo &Info,
1344 bool IsUnevaluatedOperand, SMLoc &End) {
1345 MCAsmParser &Parser = getParser();
1346 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1349 StringRef LineBuf(Identifier.data());
1351 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1353 const AsmToken &Tok = Parser.getTok();
1354 SMLoc Loc = Tok.getLoc();
1356 // Advance the token stream until the end of the current token is
1357 // after the end of what the frontend claimed.
1358 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1360 End = Tok.getEndLoc();
1363 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1364 if (End.getPointer() == EndPtr) break;
1366 Identifier = LineBuf;
1368 // If the identifier lookup was unsuccessful, assume that we are dealing with
1371 StringRef InternalName =
1372 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1374 assert(InternalName.size() && "We should have an internal name here.");
1375 // Push a rewrite for replacing the identifier name with the internal name.
1376 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1381 // Create the symbol reference.
1382 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1383 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1384 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1388 /// \brief Parse intel style segment override.
1389 std::unique_ptr<X86Operand>
1390 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1392 MCAsmParser &Parser = getParser();
1393 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1394 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1395 if (Tok.isNot(AsmToken::Colon))
1396 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1397 Parser.Lex(); // Eat ':'
1399 int64_t ImmDisp = 0;
1400 if (getLexer().is(AsmToken::Integer)) {
1401 ImmDisp = Tok.getIntVal();
1402 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1404 if (isParsingInlineAsm())
1405 InstInfo->AsmRewrites->push_back(
1406 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1408 if (getLexer().isNot(AsmToken::LBrac)) {
1409 // An immediate following a 'segment register', 'colon' token sequence can
1410 // be followed by a bracketed expression. If it isn't we know we have our
1411 // final segment override.
1412 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1413 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1414 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1415 Start, ImmDispToken.getEndLoc(), Size);
1419 if (getLexer().is(AsmToken::LBrac))
1420 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1424 if (!isParsingInlineAsm()) {
1425 if (getParser().parsePrimaryExpr(Val, End))
1426 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1428 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1431 InlineAsmIdentifierInfo Info;
1432 StringRef Identifier = Tok.getString();
1433 if (ParseIntelIdentifier(Val, Identifier, Info,
1434 /*Unevaluated=*/false, End))
1436 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1437 /*Scale=*/1, Start, End, Size, Identifier, Info);
1440 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1441 std::unique_ptr<X86Operand>
1442 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1443 MCAsmParser &Parser = getParser();
1444 const AsmToken &Tok = Parser.getTok();
1445 // Eat "{" and mark the current place.
1446 const SMLoc consumedToken = consumeToken();
1447 if (Tok.getIdentifier().startswith("r")){
1448 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1449 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1450 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1451 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1452 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1455 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1456 Parser.Lex(); // Eat "r*" of r*-sae
1457 if (!getLexer().is(AsmToken::Minus))
1458 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1459 Parser.Lex(); // Eat "-"
1460 Parser.Lex(); // Eat the sae
1461 if (!getLexer().is(AsmToken::RCurly))
1462 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1463 Parser.Lex(); // Eat "}"
1464 const MCExpr *RndModeOp =
1465 MCConstantExpr::create(rndMode, Parser.getContext());
1466 return X86Operand::CreateImm(RndModeOp, Start, End);
1468 if(Tok.getIdentifier().equals("sae")){
1469 Parser.Lex(); // Eat the sae
1470 if (!getLexer().is(AsmToken::RCurly))
1471 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1472 Parser.Lex(); // Eat "}"
1473 return X86Operand::CreateToken("{sae}", consumedToken);
1475 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1477 /// ParseIntelMemOperand - Parse intel style memory operand.
1478 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1481 MCAsmParser &Parser = getParser();
1482 const AsmToken &Tok = Parser.getTok();
1485 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1486 if (getLexer().is(AsmToken::LBrac))
1487 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1488 assert(ImmDisp == 0);
1491 if (!isParsingInlineAsm()) {
1492 if (getParser().parsePrimaryExpr(Val, End))
1493 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1495 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1498 InlineAsmIdentifierInfo Info;
1499 StringRef Identifier = Tok.getString();
1500 if (ParseIntelIdentifier(Val, Identifier, Info,
1501 /*Unevaluated=*/false, End))
1504 if (!getLexer().is(AsmToken::LBrac))
1505 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1506 /*Scale=*/1, Start, End, Size, Identifier, Info);
1508 Parser.Lex(); // Eat '['
1510 // Parse Identifier [ ImmDisp ]
1511 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1512 /*AddImmPrefix=*/false);
1513 if (ParseIntelExpression(SM, End))
1517 Error(Start, "cannot use more than one symbol in memory operand");
1520 if (SM.getBaseReg()) {
1521 Error(Start, "cannot use base register with variable reference");
1524 if (SM.getIndexReg()) {
1525 Error(Start, "cannot use index register with variable reference");
1529 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1530 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1531 // we're pointing to a local variable in memory, so the base register is
1532 // really the frame or stack pointer.
1533 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1534 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1535 Start, End, Size, Identifier, Info.OpDecl);
1538 /// Parse the '.' operator.
1539 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1540 const MCExpr *&NewDisp) {
1541 MCAsmParser &Parser = getParser();
1542 const AsmToken &Tok = Parser.getTok();
1543 int64_t OrigDispVal, DotDispVal;
1545 // FIXME: Handle non-constant expressions.
1546 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1547 OrigDispVal = OrigDisp->getValue();
1549 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1551 // Drop the optional '.'.
1552 StringRef DotDispStr = Tok.getString();
1553 if (DotDispStr.startswith("."))
1554 DotDispStr = DotDispStr.drop_front(1);
1556 // .Imm gets lexed as a real.
1557 if (Tok.is(AsmToken::Real)) {
1559 DotDispStr.getAsInteger(10, DotDisp);
1560 DotDispVal = DotDisp.getZExtValue();
1561 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1563 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1564 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1566 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1567 DotDispVal = DotDisp;
1569 return Error(Tok.getLoc(), "Unexpected token type!");
1571 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1572 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1573 unsigned Len = DotDispStr.size();
1574 unsigned Val = OrigDispVal + DotDispVal;
1575 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1579 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1583 /// Parse the 'offset' operator. This operator is used to specify the
1584 /// location rather then the content of a variable.
1585 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1586 MCAsmParser &Parser = getParser();
1587 const AsmToken &Tok = Parser.getTok();
1588 SMLoc OffsetOfLoc = Tok.getLoc();
1589 Parser.Lex(); // Eat offset.
1592 InlineAsmIdentifierInfo Info;
1593 SMLoc Start = Tok.getLoc(), End;
1594 StringRef Identifier = Tok.getString();
1595 if (ParseIntelIdentifier(Val, Identifier, Info,
1596 /*Unevaluated=*/false, End))
1599 // Don't emit the offset operator.
1600 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1602 // The offset operator will have an 'r' constraint, thus we need to create
1603 // register operand to ensure proper matching. Just pick a GPR based on
1604 // the size of a pointer.
1606 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1607 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1608 OffsetOfLoc, Identifier, Info.OpDecl);
1611 enum IntelOperatorKind {
1617 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1618 /// returns the number of elements in an array. It returns the value 1 for
1619 /// non-array variables. The SIZE operator returns the size of a C or C++
1620 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1621 /// TYPE operator returns the size of a C or C++ type or variable. If the
1622 /// variable is an array, TYPE returns the size of a single element.
1623 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1624 MCAsmParser &Parser = getParser();
1625 const AsmToken &Tok = Parser.getTok();
1626 SMLoc TypeLoc = Tok.getLoc();
1627 Parser.Lex(); // Eat operator.
1629 const MCExpr *Val = nullptr;
1630 InlineAsmIdentifierInfo Info;
1631 SMLoc Start = Tok.getLoc(), End;
1632 StringRef Identifier = Tok.getString();
1633 if (ParseIntelIdentifier(Val, Identifier, Info,
1634 /*Unevaluated=*/true, End))
1638 return ErrorOperand(Start, "unable to lookup expression");
1642 default: llvm_unreachable("Unexpected operand kind!");
1643 case IOK_LENGTH: CVal = Info.Length; break;
1644 case IOK_SIZE: CVal = Info.Size; break;
1645 case IOK_TYPE: CVal = Info.Type; break;
1648 // Rewrite the type operator and the C or C++ type or variable in terms of an
1649 // immediate. E.g. TYPE foo -> $$4
1650 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1651 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1653 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1654 return X86Operand::CreateImm(Imm, Start, End);
1657 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1658 MCAsmParser &Parser = getParser();
1659 const AsmToken &Tok = Parser.getTok();
1662 // Offset, length, type and size operators.
1663 if (isParsingInlineAsm()) {
1664 StringRef AsmTokStr = Tok.getString();
1665 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1666 return ParseIntelOffsetOfOperator();
1667 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1668 return ParseIntelOperator(IOK_LENGTH);
1669 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1670 return ParseIntelOperator(IOK_SIZE);
1671 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1672 return ParseIntelOperator(IOK_TYPE);
1675 unsigned Size = getIntelMemOperandSize(Tok.getString());
1677 Parser.Lex(); // Eat operand size (e.g., byte, word).
1678 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1679 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1680 Parser.Lex(); // Eat ptr.
1682 Start = Tok.getLoc();
1685 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1686 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1687 AsmToken StartTok = Tok;
1688 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1689 /*AddImmPrefix=*/false);
1690 if (ParseIntelExpression(SM, End))
1693 int64_t Imm = SM.getImm();
1694 if (isParsingInlineAsm()) {
1695 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1696 if (StartTok.getString().size() == Len)
1697 // Just add a prefix if this wasn't a complex immediate expression.
1698 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1700 // Otherwise, rewrite the complex expression as a single immediate.
1701 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1704 if (getLexer().isNot(AsmToken::LBrac)) {
1705 // If a directional label (ie. 1f or 2b) was parsed above from
1706 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1707 // to the MCExpr with the directional local symbol and this is a
1708 // memory operand not an immediate operand.
1710 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1713 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1714 return X86Operand::CreateImm(ImmExpr, Start, End);
1717 // Only positive immediates are valid.
1719 return ErrorOperand(Start, "expected a positive immediate displacement "
1720 "before bracketed expr.");
1722 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1723 return ParseIntelMemOperand(Imm, Start, Size);
1726 // rounding mode token
1727 if (STI.getFeatureBits()[X86::FeatureAVX512] &&
1728 getLexer().is(AsmToken::LCurly))
1729 return ParseRoundingModeOp(Start, End);
1733 if (!ParseRegister(RegNo, Start, End)) {
1734 // If this is a segment register followed by a ':', then this is the start
1735 // of a segment override, otherwise this is a normal register reference.
1736 if (getLexer().isNot(AsmToken::Colon))
1737 return X86Operand::CreateReg(RegNo, Start, End);
1739 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1743 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1746 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1747 MCAsmParser &Parser = getParser();
1748 switch (getLexer().getKind()) {
1750 // Parse a memory operand with no segment register.
1751 return ParseMemOperand(0, Parser.getTok().getLoc());
1752 case AsmToken::Percent: {
1753 // Read the register.
1756 if (ParseRegister(RegNo, Start, End)) return nullptr;
1757 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1758 Error(Start, "%eiz and %riz can only be used as index registers",
1759 SMRange(Start, End));
1763 // If this is a segment register followed by a ':', then this is the start
1764 // of a memory reference, otherwise this is a normal register reference.
1765 if (getLexer().isNot(AsmToken::Colon))
1766 return X86Operand::CreateReg(RegNo, Start, End);
1768 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1769 return ErrorOperand(Start, "invalid segment register");
1771 getParser().Lex(); // Eat the colon.
1772 return ParseMemOperand(RegNo, Start);
1774 case AsmToken::Dollar: {
1775 // $42 -> immediate.
1776 SMLoc Start = Parser.getTok().getLoc(), End;
1779 if (getParser().parseExpression(Val, End))
1781 return X86Operand::CreateImm(Val, Start, End);
1783 case AsmToken::LCurly:{
1784 SMLoc Start = Parser.getTok().getLoc(), End;
1785 if (STI.getFeatureBits()[X86::FeatureAVX512])
1786 return ParseRoundingModeOp(Start, End);
1787 return ErrorOperand(Start, "unknown token in expression");
1792 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1793 const MCParsedAsmOperand &Op) {
1794 MCAsmParser &Parser = getParser();
1795 if(STI.getFeatureBits()[X86::FeatureAVX512]) {
1796 if (getLexer().is(AsmToken::LCurly)) {
1797 // Eat "{" and mark the current place.
1798 const SMLoc consumedToken = consumeToken();
1799 // Distinguish {1to<NUM>} from {%k<NUM>}.
1800 if(getLexer().is(AsmToken::Integer)) {
1801 // Parse memory broadcasting ({1to<NUM>}).
1802 if (getLexer().getTok().getIntVal() != 1)
1803 return !ErrorAndEatStatement(getLexer().getLoc(),
1804 "Expected 1to<NUM> at this point");
1805 Parser.Lex(); // Eat "1" of 1to8
1806 if (!getLexer().is(AsmToken::Identifier) ||
1807 !getLexer().getTok().getIdentifier().startswith("to"))
1808 return !ErrorAndEatStatement(getLexer().getLoc(),
1809 "Expected 1to<NUM> at this point");
1810 // Recognize only reasonable suffixes.
1811 const char *BroadcastPrimitive =
1812 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1813 .Case("to2", "{1to2}")
1814 .Case("to4", "{1to4}")
1815 .Case("to8", "{1to8}")
1816 .Case("to16", "{1to16}")
1818 if (!BroadcastPrimitive)
1819 return !ErrorAndEatStatement(getLexer().getLoc(),
1820 "Invalid memory broadcast primitive.");
1821 Parser.Lex(); // Eat "toN" of 1toN
1822 if (!getLexer().is(AsmToken::RCurly))
1823 return !ErrorAndEatStatement(getLexer().getLoc(),
1824 "Expected } at this point");
1825 Parser.Lex(); // Eat "}"
1826 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1828 // No AVX512 specific primitives can pass
1829 // after memory broadcasting, so return.
1832 // Parse mask register {%k1}
1833 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1834 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1835 Operands.push_back(std::move(Op));
1836 if (!getLexer().is(AsmToken::RCurly))
1837 return !ErrorAndEatStatement(getLexer().getLoc(),
1838 "Expected } at this point");
1839 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1841 // Parse "zeroing non-masked" semantic {z}
1842 if (getLexer().is(AsmToken::LCurly)) {
1843 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1844 if (!getLexer().is(AsmToken::Identifier) ||
1845 getLexer().getTok().getIdentifier() != "z")
1846 return !ErrorAndEatStatement(getLexer().getLoc(),
1847 "Expected z at this point");
1848 Parser.Lex(); // Eat the z
1849 if (!getLexer().is(AsmToken::RCurly))
1850 return !ErrorAndEatStatement(getLexer().getLoc(),
1851 "Expected } at this point");
1852 Parser.Lex(); // Eat the }
1861 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1862 /// has already been parsed if present.
1863 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1866 MCAsmParser &Parser = getParser();
1867 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1868 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1869 // only way to do this without lookahead is to eat the '(' and see what is
1871 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
1872 if (getLexer().isNot(AsmToken::LParen)) {
1874 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1876 // After parsing the base expression we could either have a parenthesized
1877 // memory address or not. If not, return now. If so, eat the (.
1878 if (getLexer().isNot(AsmToken::LParen)) {
1879 // Unless we have a segment register, treat this as an immediate.
1881 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1882 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1889 // Okay, we have a '('. We don't know if this is an expression or not, but
1890 // so we have to eat the ( to see beyond it.
1891 SMLoc LParenLoc = Parser.getTok().getLoc();
1892 Parser.Lex(); // Eat the '('.
1894 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1895 // Nothing to do here, fall into the code below with the '(' part of the
1896 // memory operand consumed.
1900 // It must be an parenthesized expression, parse it now.
1901 if (getParser().parseParenExpression(Disp, ExprEnd))
1904 // After parsing the base expression we could either have a parenthesized
1905 // memory address or not. If not, return now. If so, eat the (.
1906 if (getLexer().isNot(AsmToken::LParen)) {
1907 // Unless we have a segment register, treat this as an immediate.
1909 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1911 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1920 // If we reached here, then we just ate the ( of the memory operand. Process
1921 // the rest of the memory operand.
1922 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1923 SMLoc IndexLoc, BaseLoc;
1925 if (getLexer().is(AsmToken::Percent)) {
1926 SMLoc StartLoc, EndLoc;
1927 BaseLoc = Parser.getTok().getLoc();
1928 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1929 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1930 Error(StartLoc, "eiz and riz can only be used as index registers",
1931 SMRange(StartLoc, EndLoc));
1936 if (getLexer().is(AsmToken::Comma)) {
1937 Parser.Lex(); // Eat the comma.
1938 IndexLoc = Parser.getTok().getLoc();
1940 // Following the comma we should have either an index register, or a scale
1941 // value. We don't support the later form, but we want to parse it
1944 // Not that even though it would be completely consistent to support syntax
1945 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1946 if (getLexer().is(AsmToken::Percent)) {
1948 if (ParseRegister(IndexReg, L, L)) return nullptr;
1950 if (getLexer().isNot(AsmToken::RParen)) {
1951 // Parse the scale amount:
1952 // ::= ',' [scale-expression]
1953 if (getLexer().isNot(AsmToken::Comma)) {
1954 Error(Parser.getTok().getLoc(),
1955 "expected comma in scale expression");
1958 Parser.Lex(); // Eat the comma.
1960 if (getLexer().isNot(AsmToken::RParen)) {
1961 SMLoc Loc = Parser.getTok().getLoc();
1964 if (getParser().parseAbsoluteExpression(ScaleVal)){
1965 Error(Loc, "expected scale expression");
1969 // Validate the scale amount.
1970 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1972 Error(Loc, "scale factor in 16-bit address must be 1");
1975 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1976 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1979 Scale = (unsigned)ScaleVal;
1982 } else if (getLexer().isNot(AsmToken::RParen)) {
1983 // A scale amount without an index is ignored.
1985 SMLoc Loc = Parser.getTok().getLoc();
1988 if (getParser().parseAbsoluteExpression(Value))
1992 Warning(Loc, "scale factor without index register is ignored");
1997 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1998 if (getLexer().isNot(AsmToken::RParen)) {
1999 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2002 SMLoc MemEnd = Parser.getTok().getEndLoc();
2003 Parser.Lex(); // Eat the ')'.
2005 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2006 // and then only in non-64-bit modes. Except for DX, which is a special case
2007 // because an unofficial form of in/out instructions uses it.
2008 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2009 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2010 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2011 BaseReg != X86::DX) {
2012 Error(BaseLoc, "invalid 16-bit base register");
2016 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2017 Error(IndexLoc, "16-bit memory operand may not include only index register");
2022 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2023 Error(BaseLoc, ErrMsg);
2027 if (SegReg || BaseReg || IndexReg)
2028 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2029 IndexReg, Scale, MemStart, MemEnd);
2030 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2033 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2034 SMLoc NameLoc, OperandVector &Operands) {
2035 MCAsmParser &Parser = getParser();
2037 StringRef PatchedName = Name;
2039 // FIXME: Hack to recognize setneb as setne.
2040 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2041 PatchedName != "setb" && PatchedName != "setnb")
2042 PatchedName = PatchedName.substr(0, Name.size()-1);
2044 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2045 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2046 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2047 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2048 bool IsVCMP = PatchedName[0] == 'v';
2049 unsigned CCIdx = IsVCMP ? 4 : 3;
2050 unsigned ComparisonCode = StringSwitch<unsigned>(
2051 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2055 .Case("unord", 0x03)
2060 /* AVX only from here */
2061 .Case("eq_uq", 0x08)
2064 .Case("false", 0x0B)
2065 .Case("neq_oq", 0x0C)
2069 .Case("eq_os", 0x10)
2070 .Case("lt_oq", 0x11)
2071 .Case("le_oq", 0x12)
2072 .Case("unord_s", 0x13)
2073 .Case("neq_us", 0x14)
2074 .Case("nlt_uq", 0x15)
2075 .Case("nle_uq", 0x16)
2076 .Case("ord_s", 0x17)
2077 .Case("eq_us", 0x18)
2078 .Case("nge_uq", 0x19)
2079 .Case("ngt_uq", 0x1A)
2080 .Case("false_os", 0x1B)
2081 .Case("neq_os", 0x1C)
2082 .Case("ge_oq", 0x1D)
2083 .Case("gt_oq", 0x1E)
2084 .Case("true_us", 0x1F)
2086 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2088 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2091 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2092 getParser().getContext());
2093 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2095 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2099 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2100 if (PatchedName.startswith("vpcmp") &&
2101 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2102 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2103 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2104 unsigned ComparisonCode = StringSwitch<unsigned>(
2105 PatchedName.slice(5, PatchedName.size() - CCIdx))
2106 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2109 //.Case("false", 0x3) // Not a documented alias.
2113 //.Case("true", 0x7) // Not a documented alias.
2115 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2116 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2118 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2119 getParser().getContext());
2120 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2122 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2126 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2127 if (PatchedName.startswith("vpcom") &&
2128 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2129 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2130 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2131 unsigned ComparisonCode = StringSwitch<unsigned>(
2132 PatchedName.slice(5, PatchedName.size() - CCIdx))
2142 if (ComparisonCode != ~0U) {
2143 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2145 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2146 getParser().getContext());
2147 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2149 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2153 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2155 // Determine whether this is an instruction prefix.
2157 Name == "lock" || Name == "rep" ||
2158 Name == "repe" || Name == "repz" ||
2159 Name == "repne" || Name == "repnz" ||
2160 Name == "rex64" || Name == "data16";
2163 // This does the actual operand parsing. Don't parse any more if we have a
2164 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2165 // just want to parse the "lock" as the first instruction and the "incl" as
2167 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2169 // Parse '*' modifier.
2170 if (getLexer().is(AsmToken::Star))
2171 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2173 // Read the operands.
2175 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2176 Operands.push_back(std::move(Op));
2177 if (!HandleAVX512Operand(Operands, *Operands.back()))
2180 Parser.eatToEndOfStatement();
2183 // check for comma and eat it
2184 if (getLexer().is(AsmToken::Comma))
2190 if (getLexer().isNot(AsmToken::EndOfStatement))
2191 return ErrorAndEatStatement(getLexer().getLoc(),
2192 "unexpected token in argument list");
2195 // Consume the EndOfStatement or the prefix separator Slash
2196 if (getLexer().is(AsmToken::EndOfStatement) ||
2197 (isPrefix && getLexer().is(AsmToken::Slash)))
2200 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2201 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2202 // documented form in various unofficial manuals, so a lot of code uses it.
2203 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2204 Operands.size() == 3) {
2205 X86Operand &Op = (X86Operand &)*Operands.back();
2206 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2207 isa<MCConstantExpr>(Op.Mem.Disp) &&
2208 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2209 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2210 SMLoc Loc = Op.getEndLoc();
2211 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2214 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2215 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2216 Operands.size() == 3) {
2217 X86Operand &Op = (X86Operand &)*Operands[1];
2218 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2219 isa<MCConstantExpr>(Op.Mem.Disp) &&
2220 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2221 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2222 SMLoc Loc = Op.getEndLoc();
2223 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2227 // Append default arguments to "ins[bwld]"
2228 if (Name.startswith("ins") && Operands.size() == 1 &&
2229 (Name == "insb" || Name == "insw" || Name == "insl" ||
2231 if (isParsingIntelSyntax()) {
2232 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2233 Operands.push_back(DefaultMemDIOperand(NameLoc));
2235 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2236 Operands.push_back(DefaultMemDIOperand(NameLoc));
2240 // Append default arguments to "outs[bwld]"
2241 if (Name.startswith("outs") && Operands.size() == 1 &&
2242 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2243 Name == "outsd" )) {
2244 if (isParsingIntelSyntax()) {
2245 Operands.push_back(DefaultMemSIOperand(NameLoc));
2246 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2248 Operands.push_back(DefaultMemSIOperand(NameLoc));
2249 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2253 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2254 // values of $SIREG according to the mode. It would be nice if this
2255 // could be achieved with InstAlias in the tables.
2256 if (Name.startswith("lods") && Operands.size() == 1 &&
2257 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2258 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2259 Operands.push_back(DefaultMemSIOperand(NameLoc));
2261 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2262 // values of $DIREG according to the mode. It would be nice if this
2263 // could be achieved with InstAlias in the tables.
2264 if (Name.startswith("stos") && Operands.size() == 1 &&
2265 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2266 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2267 Operands.push_back(DefaultMemDIOperand(NameLoc));
2269 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2270 // values of $DIREG according to the mode. It would be nice if this
2271 // could be achieved with InstAlias in the tables.
2272 if (Name.startswith("scas") && Operands.size() == 1 &&
2273 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2274 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2275 Operands.push_back(DefaultMemDIOperand(NameLoc));
2277 // Add default SI and DI operands to "cmps[bwlq]".
2278 if (Name.startswith("cmps") &&
2279 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2280 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2281 if (Operands.size() == 1) {
2282 if (isParsingIntelSyntax()) {
2283 Operands.push_back(DefaultMemSIOperand(NameLoc));
2284 Operands.push_back(DefaultMemDIOperand(NameLoc));
2286 Operands.push_back(DefaultMemDIOperand(NameLoc));
2287 Operands.push_back(DefaultMemSIOperand(NameLoc));
2289 } else if (Operands.size() == 3) {
2290 X86Operand &Op = (X86Operand &)*Operands[1];
2291 X86Operand &Op2 = (X86Operand &)*Operands[2];
2292 if (!doSrcDstMatch(Op, Op2))
2293 return Error(Op.getStartLoc(),
2294 "mismatching source and destination index registers");
2298 // Add default SI and DI operands to "movs[bwlq]".
2299 if ((Name.startswith("movs") &&
2300 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2301 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2302 (Name.startswith("smov") &&
2303 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2304 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2305 if (Operands.size() == 1) {
2306 if (Name == "movsd")
2307 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2308 if (isParsingIntelSyntax()) {
2309 Operands.push_back(DefaultMemDIOperand(NameLoc));
2310 Operands.push_back(DefaultMemSIOperand(NameLoc));
2312 Operands.push_back(DefaultMemSIOperand(NameLoc));
2313 Operands.push_back(DefaultMemDIOperand(NameLoc));
2315 } else if (Operands.size() == 3) {
2316 X86Operand &Op = (X86Operand &)*Operands[1];
2317 X86Operand &Op2 = (X86Operand &)*Operands[2];
2318 if (!doSrcDstMatch(Op, Op2))
2319 return Error(Op.getStartLoc(),
2320 "mismatching source and destination index registers");
2324 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2326 if ((Name.startswith("shr") || Name.startswith("sar") ||
2327 Name.startswith("shl") || Name.startswith("sal") ||
2328 Name.startswith("rcl") || Name.startswith("rcr") ||
2329 Name.startswith("rol") || Name.startswith("ror")) &&
2330 Operands.size() == 3) {
2331 if (isParsingIntelSyntax()) {
2333 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2334 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2335 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2336 Operands.pop_back();
2338 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2339 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2340 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2341 Operands.erase(Operands.begin() + 1);
2345 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2346 // instalias with an immediate operand yet.
2347 if (Name == "int" && Operands.size() == 2) {
2348 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2349 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2350 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2351 Operands.erase(Operands.begin() + 1);
2352 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2359 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2362 TmpInst.setOpcode(Opcode);
2364 TmpInst.addOperand(MCOperand::createReg(Reg));
2365 TmpInst.addOperand(MCOperand::createReg(Reg));
2366 TmpInst.addOperand(Inst.getOperand(0));
2371 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2372 bool isCmp = false) {
2373 if (!Inst.getOperand(0).isImm() ||
2374 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2377 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2380 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2381 bool isCmp = false) {
2382 if (!Inst.getOperand(0).isImm() ||
2383 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2386 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2389 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2390 bool isCmp = false) {
2391 if (!Inst.getOperand(0).isImm() ||
2392 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2395 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2398 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2399 switch (Inst.getOpcode()) {
2400 default: return true;
2402 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2403 assert(Op.isImm() && "expected immediate");
2405 if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
2406 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2411 llvm_unreachable("handle the instruction appropriately");
2414 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2415 switch (Inst.getOpcode()) {
2416 default: return false;
2417 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2418 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2419 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2420 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2421 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2422 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2423 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2424 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2425 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2426 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2427 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2428 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2429 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2430 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2431 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2432 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2433 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2434 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2435 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2436 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2437 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2438 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2439 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2440 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2441 case X86::VMOVAPDrr:
2442 case X86::VMOVAPDYrr:
2443 case X86::VMOVAPSrr:
2444 case X86::VMOVAPSYrr:
2445 case X86::VMOVDQArr:
2446 case X86::VMOVDQAYrr:
2447 case X86::VMOVDQUrr:
2448 case X86::VMOVDQUYrr:
2449 case X86::VMOVUPDrr:
2450 case X86::VMOVUPDYrr:
2451 case X86::VMOVUPSrr:
2452 case X86::VMOVUPSYrr: {
2453 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2454 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2458 switch (Inst.getOpcode()) {
2459 default: llvm_unreachable("Invalid opcode");
2460 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2461 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2462 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2463 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2464 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2465 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2466 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2467 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2468 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2469 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2470 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2471 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2473 Inst.setOpcode(NewOpc);
2477 case X86::VMOVSSrr: {
2478 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2479 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2482 switch (Inst.getOpcode()) {
2483 default: llvm_unreachable("Invalid opcode");
2484 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2485 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2487 Inst.setOpcode(NewOpc);
2493 static const char *getSubtargetFeatureName(uint64_t Val);
2495 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2497 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2501 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2502 OperandVector &Operands,
2503 MCStreamer &Out, uint64_t &ErrorInfo,
2504 bool MatchingInlineAsm) {
2505 if (isParsingIntelSyntax())
2506 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2508 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2512 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2513 OperandVector &Operands, MCStreamer &Out,
2514 bool MatchingInlineAsm) {
2515 // FIXME: This should be replaced with a real .td file alias mechanism.
2516 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2518 const char *Repl = StringSwitch<const char *>(Op.getToken())
2519 .Case("finit", "fninit")
2520 .Case("fsave", "fnsave")
2521 .Case("fstcw", "fnstcw")
2522 .Case("fstcww", "fnstcw")
2523 .Case("fstenv", "fnstenv")
2524 .Case("fstsw", "fnstsw")
2525 .Case("fstsww", "fnstsw")
2526 .Case("fclex", "fnclex")
2530 Inst.setOpcode(X86::WAIT);
2532 if (!MatchingInlineAsm)
2533 EmitInstruction(Inst, Operands, Out);
2534 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2538 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2539 bool MatchingInlineAsm) {
2540 assert(ErrorInfo && "Unknown missing feature!");
2541 ArrayRef<SMRange> EmptyRanges = None;
2542 SmallString<126> Msg;
2543 raw_svector_ostream OS(Msg);
2544 OS << "instruction requires:";
2546 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2547 if (ErrorInfo & Mask)
2548 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2551 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2554 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2555 OperandVector &Operands,
2557 uint64_t &ErrorInfo,
2558 bool MatchingInlineAsm) {
2559 assert(!Operands.empty() && "Unexpect empty operand list!");
2560 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2561 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2562 ArrayRef<SMRange> EmptyRanges = None;
2564 // First, handle aliases that expand to multiple instructions.
2565 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2567 bool WasOriginallyInvalidOperand = false;
2570 // First, try a direct match.
2571 switch (MatchInstructionImpl(Operands, Inst,
2572 ErrorInfo, MatchingInlineAsm,
2573 isParsingIntelSyntax())) {
2574 default: llvm_unreachable("Unexpected match result!");
2576 if (!validateInstruction(Inst, Operands))
2579 // Some instructions need post-processing to, for example, tweak which
2580 // encoding is selected. Loop on it while changes happen so the
2581 // individual transformations can chain off each other.
2582 if (!MatchingInlineAsm)
2583 while (processInstruction(Inst, Operands))
2587 if (!MatchingInlineAsm)
2588 EmitInstruction(Inst, Operands, Out);
2589 Opcode = Inst.getOpcode();
2591 case Match_MissingFeature:
2592 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2593 case Match_InvalidOperand:
2594 WasOriginallyInvalidOperand = true;
2596 case Match_MnemonicFail:
2600 // FIXME: Ideally, we would only attempt suffix matches for things which are
2601 // valid prefixes, and we could just infer the right unambiguous
2602 // type. However, that requires substantially more matcher support than the
2605 // Change the operand to point to a temporary token.
2606 StringRef Base = Op.getToken();
2607 SmallString<16> Tmp;
2610 Op.setTokenValue(Tmp);
2612 // If this instruction starts with an 'f', then it is a floating point stack
2613 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2614 // 80-bit floating point, which use the suffixes s,l,t respectively.
2616 // Otherwise, we assume that this may be an integer instruction, which comes
2617 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2618 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2620 // Check for the various suffix matches.
2621 uint64_t ErrorInfoIgnore;
2622 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2625 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2626 Tmp.back() = Suffixes[I];
2627 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2628 MatchingInlineAsm, isParsingIntelSyntax());
2629 // If this returned as a missing feature failure, remember that.
2630 if (Match[I] == Match_MissingFeature)
2631 ErrorInfoMissingFeature = ErrorInfoIgnore;
2634 // Restore the old token.
2635 Op.setTokenValue(Base);
2637 // If exactly one matched, then we treat that as a successful match (and the
2638 // instruction will already have been filled in correctly, since the failing
2639 // matches won't have modified it).
2640 unsigned NumSuccessfulMatches =
2641 std::count(std::begin(Match), std::end(Match), Match_Success);
2642 if (NumSuccessfulMatches == 1) {
2644 if (!MatchingInlineAsm)
2645 EmitInstruction(Inst, Operands, Out);
2646 Opcode = Inst.getOpcode();
2650 // Otherwise, the match failed, try to produce a decent error message.
2652 // If we had multiple suffix matches, then identify this as an ambiguous
2654 if (NumSuccessfulMatches > 1) {
2656 unsigned NumMatches = 0;
2657 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2658 if (Match[I] == Match_Success)
2659 MatchChars[NumMatches++] = Suffixes[I];
2661 SmallString<126> Msg;
2662 raw_svector_ostream OS(Msg);
2663 OS << "ambiguous instructions require an explicit suffix (could be ";
2664 for (unsigned i = 0; i != NumMatches; ++i) {
2667 if (i + 1 == NumMatches)
2669 OS << "'" << Base << MatchChars[i] << "'";
2672 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2676 // Okay, we know that none of the variants matched successfully.
2678 // If all of the instructions reported an invalid mnemonic, then the original
2679 // mnemonic was invalid.
2680 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2681 if (!WasOriginallyInvalidOperand) {
2682 ArrayRef<SMRange> Ranges =
2683 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2684 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2685 Ranges, MatchingInlineAsm);
2688 // Recover location info for the operand if we know which was the problem.
2689 if (ErrorInfo != ~0ULL) {
2690 if (ErrorInfo >= Operands.size())
2691 return Error(IDLoc, "too few operands for instruction",
2692 EmptyRanges, MatchingInlineAsm);
2694 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2695 if (Operand.getStartLoc().isValid()) {
2696 SMRange OperandRange = Operand.getLocRange();
2697 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2698 OperandRange, MatchingInlineAsm);
2702 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2706 // If one instruction matched with a missing feature, report this as a
2708 if (std::count(std::begin(Match), std::end(Match),
2709 Match_MissingFeature) == 1) {
2710 ErrorInfo = ErrorInfoMissingFeature;
2711 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2715 // If one instruction matched with an invalid operand, report this as an
2717 if (std::count(std::begin(Match), std::end(Match),
2718 Match_InvalidOperand) == 1) {
2719 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2723 // If all of these were an outright failure, report it in a useless way.
2724 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2725 EmptyRanges, MatchingInlineAsm);
2729 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2730 OperandVector &Operands,
2732 uint64_t &ErrorInfo,
2733 bool MatchingInlineAsm) {
2734 assert(!Operands.empty() && "Unexpect empty operand list!");
2735 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2736 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2737 StringRef Mnemonic = Op.getToken();
2738 ArrayRef<SMRange> EmptyRanges = None;
2740 // First, handle aliases that expand to multiple instructions.
2741 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2745 // Find one unsized memory operand, if present.
2746 X86Operand *UnsizedMemOp = nullptr;
2747 for (const auto &Op : Operands) {
2748 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2749 if (X86Op->isMemUnsized())
2750 UnsizedMemOp = X86Op;
2753 // Allow some instructions to have implicitly pointer-sized operands. This is
2754 // compatible with gas.
2756 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2757 for (const char *Instr : PtrSizedInstrs) {
2758 if (Mnemonic == Instr) {
2759 UnsizedMemOp->Mem.Size = getPointerWidth();
2765 // If an unsized memory operand is present, try to match with each memory
2766 // operand size. In Intel assembly, the size is not part of the instruction
2768 SmallVector<unsigned, 8> Match;
2769 uint64_t ErrorInfoMissingFeature = 0;
2770 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2771 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2772 for (unsigned Size : MopSizes) {
2773 UnsizedMemOp->Mem.Size = Size;
2774 uint64_t ErrorInfoIgnore;
2775 unsigned LastOpcode = Inst.getOpcode();
2777 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2778 MatchingInlineAsm, isParsingIntelSyntax());
2779 if (Match.empty() || LastOpcode != Inst.getOpcode())
2782 // If this returned as a missing feature failure, remember that.
2783 if (Match.back() == Match_MissingFeature)
2784 ErrorInfoMissingFeature = ErrorInfoIgnore;
2787 // Restore the size of the unsized memory operand if we modified it.
2789 UnsizedMemOp->Mem.Size = 0;
2792 // If we haven't matched anything yet, this is not a basic integer or FPU
2793 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2794 // matching with the unsized operand.
2795 if (Match.empty()) {
2796 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2798 isParsingIntelSyntax()));
2799 // If this returned as a missing feature failure, remember that.
2800 if (Match.back() == Match_MissingFeature)
2801 ErrorInfoMissingFeature = ErrorInfo;
2804 // Restore the size of the unsized memory operand if we modified it.
2806 UnsizedMemOp->Mem.Size = 0;
2808 // If it's a bad mnemonic, all results will be the same.
2809 if (Match.back() == Match_MnemonicFail) {
2810 ArrayRef<SMRange> Ranges =
2811 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2812 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2813 Ranges, MatchingInlineAsm);
2816 // If exactly one matched, then we treat that as a successful match (and the
2817 // instruction will already have been filled in correctly, since the failing
2818 // matches won't have modified it).
2819 unsigned NumSuccessfulMatches =
2820 std::count(std::begin(Match), std::end(Match), Match_Success);
2821 if (NumSuccessfulMatches == 1) {
2822 if (!validateInstruction(Inst, Operands))
2825 // Some instructions need post-processing to, for example, tweak which
2826 // encoding is selected. Loop on it while changes happen so the individual
2827 // transformations can chain off each other.
2828 if (!MatchingInlineAsm)
2829 while (processInstruction(Inst, Operands))
2832 if (!MatchingInlineAsm)
2833 EmitInstruction(Inst, Operands, Out);
2834 Opcode = Inst.getOpcode();
2836 } else if (NumSuccessfulMatches > 1) {
2837 assert(UnsizedMemOp &&
2838 "multiple matches only possible with unsized memory operands");
2839 ArrayRef<SMRange> Ranges =
2840 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2841 return Error(UnsizedMemOp->getStartLoc(),
2842 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2843 Ranges, MatchingInlineAsm);
2846 // If one instruction matched with a missing feature, report this as a
2848 if (std::count(std::begin(Match), std::end(Match),
2849 Match_MissingFeature) == 1) {
2850 ErrorInfo = ErrorInfoMissingFeature;
2851 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2855 // If one instruction matched with an invalid operand, report this as an
2857 if (std::count(std::begin(Match), std::end(Match),
2858 Match_InvalidOperand) == 1) {
2859 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2863 // If all of these were an outright failure, report it in a useless way.
2864 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2868 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2869 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2872 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2873 MCAsmParser &Parser = getParser();
2874 StringRef IDVal = DirectiveID.getIdentifier();
2875 if (IDVal == ".word")
2876 return ParseDirectiveWord(2, DirectiveID.getLoc());
2877 else if (IDVal.startswith(".code"))
2878 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2879 else if (IDVal.startswith(".att_syntax")) {
2880 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2881 if (Parser.getTok().getString() == "prefix")
2883 else if (Parser.getTok().getString() == "noprefix")
2884 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2885 "supported: registers must have a "
2886 "'%' prefix in .att_syntax");
2888 getParser().setAssemblerDialect(0);
2890 } else if (IDVal.startswith(".intel_syntax")) {
2891 getParser().setAssemblerDialect(1);
2892 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2893 if (Parser.getTok().getString() == "noprefix")
2895 else if (Parser.getTok().getString() == "prefix")
2896 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2897 "supported: registers must not have "
2898 "a '%' prefix in .intel_syntax");
2905 /// ParseDirectiveWord
2906 /// ::= .word [ expression (, expression)* ]
2907 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2908 MCAsmParser &Parser = getParser();
2909 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2911 const MCExpr *Value;
2912 if (getParser().parseExpression(Value))
2915 getParser().getStreamer().EmitValue(Value, Size);
2917 if (getLexer().is(AsmToken::EndOfStatement))
2920 // FIXME: Improve diagnostic.
2921 if (getLexer().isNot(AsmToken::Comma)) {
2922 Error(L, "unexpected token in directive");
2933 /// ParseDirectiveCode
2934 /// ::= .code16 | .code32 | .code64
2935 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2936 MCAsmParser &Parser = getParser();
2937 if (IDVal == ".code16") {
2939 if (!is16BitMode()) {
2940 SwitchMode(X86::Mode16Bit);
2941 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2943 } else if (IDVal == ".code32") {
2945 if (!is32BitMode()) {
2946 SwitchMode(X86::Mode32Bit);
2947 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2949 } else if (IDVal == ".code64") {
2951 if (!is64BitMode()) {
2952 SwitchMode(X86::Mode64Bit);
2953 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2956 Error(L, "unknown directive " + IDVal);
2963 // Force static initialization.
2964 extern "C" void LLVMInitializeX86AsmParser() {
2965 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2966 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2969 #define GET_REGISTER_MATCHER
2970 #define GET_MATCHER_IMPLEMENTATION
2971 #define GET_SUBTARGET_FEATURE_NAME
2972 #include "X86GenAsmMatcher.inc"