1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
58 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
73 uint64_t &ErrorInfo, bool matchingInlineAsm,
74 unsigned VariantID = 0) {
75 // In Code16GCC mode, match as 32-bit.
77 SwitchMode(X86::Mode32Bit);
78 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
79 matchingInlineAsm, VariantID);
81 SwitchMode(X86::Mode16Bit);
85 enum InfixCalculatorTok {
101 class InfixCalculator {
102 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
103 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
104 SmallVector<ICToken, 4> PostfixStack;
107 int64_t popOperand() {
108 assert (!PostfixStack.empty() && "Poped an empty stack!");
109 ICToken Op = PostfixStack.pop_back_val();
110 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
111 && "Expected and immediate or register!");
114 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
115 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
116 "Unexpected operand!");
117 PostfixStack.push_back(std::make_pair(Op, Val));
120 void popOperator() { InfixOperatorStack.pop_back(); }
121 void pushOperator(InfixCalculatorTok Op) {
122 // Push the new operator if the stack is empty.
123 if (InfixOperatorStack.empty()) {
124 InfixOperatorStack.push_back(Op);
128 // Push the new operator if it has a higher precedence than the operator
129 // on the top of the stack or the operator on the top of the stack is a
131 unsigned Idx = InfixOperatorStack.size() - 1;
132 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
133 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
134 InfixOperatorStack.push_back(Op);
138 // The operator on the top of the stack has higher precedence than the
140 unsigned ParenCount = 0;
142 // Nothing to process.
143 if (InfixOperatorStack.empty())
146 Idx = InfixOperatorStack.size() - 1;
147 StackOp = InfixOperatorStack[Idx];
148 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
151 // If we have an even parentheses count and we see a left parentheses,
152 // then stop processing.
153 if (!ParenCount && StackOp == IC_LPAREN)
156 if (StackOp == IC_RPAREN) {
158 InfixOperatorStack.pop_back();
159 } else if (StackOp == IC_LPAREN) {
161 InfixOperatorStack.pop_back();
163 InfixOperatorStack.pop_back();
164 PostfixStack.push_back(std::make_pair(StackOp, 0));
167 // Push the new operator.
168 InfixOperatorStack.push_back(Op);
172 // Push any remaining operators onto the postfix stack.
173 while (!InfixOperatorStack.empty()) {
174 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
175 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
176 PostfixStack.push_back(std::make_pair(StackOp, 0));
179 if (PostfixStack.empty())
182 SmallVector<ICToken, 16> OperandStack;
183 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
184 ICToken Op = PostfixStack[i];
185 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
186 OperandStack.push_back(Op);
188 assert (OperandStack.size() > 1 && "Too few operands.");
190 ICToken Op2 = OperandStack.pop_back_val();
191 ICToken Op1 = OperandStack.pop_back_val();
194 report_fatal_error("Unexpected operator!");
197 Val = Op1.second + Op2.second;
198 OperandStack.push_back(std::make_pair(IC_IMM, Val));
201 Val = Op1.second - Op2.second;
202 OperandStack.push_back(std::make_pair(IC_IMM, Val));
205 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
206 "Multiply operation with an immediate and a register!");
207 Val = Op1.second * Op2.second;
208 OperandStack.push_back(std::make_pair(IC_IMM, Val));
211 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
212 "Divide operation with an immediate and a register!");
213 assert (Op2.second != 0 && "Division by zero!");
214 Val = Op1.second / Op2.second;
215 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
219 "Or operation with an immediate and a register!");
220 Val = Op1.second | Op2.second;
221 OperandStack.push_back(std::make_pair(IC_IMM, Val));
224 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
225 "Xor operation with an immediate and a register!");
226 Val = Op1.second ^ Op2.second;
227 OperandStack.push_back(std::make_pair(IC_IMM, Val));
230 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
231 "And operation with an immediate and a register!");
232 Val = Op1.second & Op2.second;
233 OperandStack.push_back(std::make_pair(IC_IMM, Val));
236 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
237 "Left shift operation with an immediate and a register!");
238 Val = Op1.second << Op2.second;
239 OperandStack.push_back(std::make_pair(IC_IMM, Val));
242 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
243 "Right shift operation with an immediate and a register!");
244 Val = Op1.second >> Op2.second;
245 OperandStack.push_back(std::make_pair(IC_IMM, Val));
250 assert (OperandStack.size() == 1 && "Expected a single result.");
251 return OperandStack.pop_back_val().second;
255 enum IntelExprState {
276 class IntelExprStateMachine {
277 IntelExprState State, PrevState;
278 unsigned BaseReg, IndexReg, TmpReg, Scale;
282 bool StopOnLBrac, AddImmPrefix;
284 InlineAsmIdentifierInfo Info;
287 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
288 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
289 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
290 AddImmPrefix(addimmprefix) { Info.clear(); }
292 unsigned getBaseReg() { return BaseReg; }
293 unsigned getIndexReg() { return IndexReg; }
294 unsigned getScale() { return Scale; }
295 const MCExpr *getSym() { return Sym; }
296 StringRef getSymName() { return SymName; }
297 int64_t getImm() { return Imm + IC.execute(); }
298 bool isValidEndState() {
299 return State == IES_RBRAC || State == IES_INTEGER;
301 bool getStopOnLBrac() { return StopOnLBrac; }
302 bool getAddImmPrefix() { return AddImmPrefix; }
303 bool hadError() { return State == IES_ERROR; }
305 InlineAsmIdentifierInfo &getIdentifierInfo() {
310 IntelExprState CurrState = State;
319 IC.pushOperator(IC_OR);
322 PrevState = CurrState;
325 IntelExprState CurrState = State;
334 IC.pushOperator(IC_XOR);
337 PrevState = CurrState;
340 IntelExprState CurrState = State;
349 IC.pushOperator(IC_AND);
352 PrevState = CurrState;
355 IntelExprState CurrState = State;
364 IC.pushOperator(IC_LSHIFT);
367 PrevState = CurrState;
370 IntelExprState CurrState = State;
379 IC.pushOperator(IC_RSHIFT);
382 PrevState = CurrState;
385 IntelExprState CurrState = State;
394 IC.pushOperator(IC_PLUS);
395 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
396 // If we already have a BaseReg, then assume this is the IndexReg with
401 assert (!IndexReg && "BaseReg/IndexReg already set!");
408 PrevState = CurrState;
411 IntelExprState CurrState = State;
427 // Only push the minus operator if it is not a unary operator.
428 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
429 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
430 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
431 IC.pushOperator(IC_MINUS);
432 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
433 // If we already have a BaseReg, then assume this is the IndexReg with
438 assert (!IndexReg && "BaseReg/IndexReg already set!");
445 PrevState = CurrState;
448 IntelExprState CurrState = State;
458 PrevState = CurrState;
460 void onRegister(unsigned Reg) {
461 IntelExprState CurrState = State;
468 State = IES_REGISTER;
470 IC.pushOperand(IC_REGISTER);
473 // Index Register - Scale * Register
474 if (PrevState == IES_INTEGER) {
475 assert (!IndexReg && "IndexReg already set!");
476 State = IES_REGISTER;
478 // Get the scale and replace the 'Scale * Register' with '0'.
479 Scale = IC.popOperand();
480 IC.pushOperand(IC_IMM);
487 PrevState = CurrState;
489 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
500 SymName = SymRefName;
501 IC.pushOperand(IC_IMM);
505 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
506 IntelExprState CurrState = State;
523 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
524 // Index Register - Register * Scale
525 assert (!IndexReg && "IndexReg already set!");
528 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
529 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
532 // Get the scale and replace the 'Register * Scale' with '0'.
534 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
535 PrevState == IES_OR || PrevState == IES_AND ||
536 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
537 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
538 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
539 PrevState == IES_NOT || PrevState == IES_XOR) &&
540 CurrState == IES_MINUS) {
541 // Unary minus. No need to pop the minus operand because it was never
543 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
544 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
545 PrevState == IES_OR || PrevState == IES_AND ||
546 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
547 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
548 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
549 PrevState == IES_NOT || PrevState == IES_XOR) &&
550 CurrState == IES_NOT) {
551 // Unary not. No need to pop the not operand because it was never
553 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
555 IC.pushOperand(IC_IMM, TmpInt);
559 PrevState = CurrState;
571 State = IES_MULTIPLY;
572 IC.pushOperator(IC_MULTIPLY);
585 IC.pushOperator(IC_DIVIDE);
597 IC.pushOperator(IC_PLUS);
602 IntelExprState CurrState = State;
611 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
612 // If we already have a BaseReg, then assume this is the IndexReg with
617 assert (!IndexReg && "BaseReg/IndexReg already set!");
624 PrevState = CurrState;
627 IntelExprState CurrState = State;
643 // FIXME: We don't handle this type of unary minus or not, yet.
644 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
645 PrevState == IES_OR || PrevState == IES_AND ||
646 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
647 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
648 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
649 PrevState == IES_NOT || PrevState == IES_XOR) &&
650 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
655 IC.pushOperator(IC_LPAREN);
658 PrevState = CurrState;
670 IC.pushOperator(IC_RPAREN);
676 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
677 bool MatchingInlineAsm = false) {
678 MCAsmParser &Parser = getParser();
679 if (MatchingInlineAsm) {
680 if (!getLexer().isAtStartOfStatement())
681 Parser.eatToEndOfStatement();
684 return Parser.Error(L, Msg, Range);
687 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
692 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
693 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
694 bool IsSIReg(unsigned Reg);
695 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
697 AddDefaultSrcDestOperands(OperandVector &Operands,
698 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
699 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
700 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
701 OperandVector &FinalOperands);
702 std::unique_ptr<X86Operand> ParseOperand();
703 std::unique_ptr<X86Operand> ParseATTOperand();
704 std::unique_ptr<X86Operand> ParseIntelOperand();
705 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
706 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
707 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
708 std::unique_ptr<X86Operand>
709 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
710 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
711 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
712 std::unique_ptr<X86Operand>
713 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
714 bool isSymbol, unsigned Size);
715 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
716 InlineAsmIdentifierInfo &Info,
717 bool IsUnevaluatedOperand, SMLoc &End);
719 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
721 std::unique_ptr<X86Operand>
722 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
723 unsigned IndexReg, unsigned Scale, SMLoc Start,
724 SMLoc End, unsigned Size, StringRef Identifier,
725 InlineAsmIdentifierInfo &Info,
726 bool AllowBetterSizeMatch = false);
728 bool parseDirectiveEven(SMLoc L);
729 bool ParseDirectiveWord(unsigned Size, SMLoc L);
730 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
732 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
734 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
735 /// instrumentation around Inst.
736 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
738 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
739 OperandVector &Operands, MCStreamer &Out,
741 bool MatchingInlineAsm) override;
743 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
744 MCStreamer &Out, bool MatchingInlineAsm);
746 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
747 bool MatchingInlineAsm);
749 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
750 OperandVector &Operands, MCStreamer &Out,
752 bool MatchingInlineAsm);
754 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
755 OperandVector &Operands, MCStreamer &Out,
757 bool MatchingInlineAsm);
759 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
761 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
762 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
763 /// return false if no parsing errors occurred, true otherwise.
764 bool HandleAVX512Operand(OperandVector &Operands,
765 const MCParsedAsmOperand &Op);
767 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
769 /// MS-compatibility:
770 /// Obtain an appropriate size qualifier, when facing its absence,
771 /// upon AVX512 vector/broadcast memory operand
772 unsigned AdjustAVX512Mem(unsigned Size, X86Operand* UnsizedMemOpNext);
774 bool is64BitMode() const {
775 // FIXME: Can tablegen auto-generate this?
776 return getSTI().getFeatureBits()[X86::Mode64Bit];
778 bool is32BitMode() const {
779 // FIXME: Can tablegen auto-generate this?
780 return getSTI().getFeatureBits()[X86::Mode32Bit];
782 bool is16BitMode() const {
783 // FIXME: Can tablegen auto-generate this?
784 return getSTI().getFeatureBits()[X86::Mode16Bit];
786 void SwitchMode(unsigned mode) {
787 MCSubtargetInfo &STI = copySTI();
788 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
789 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
790 unsigned FB = ComputeAvailableFeatures(
791 STI.ToggleFeature(OldMode.flip(mode)));
792 setAvailableFeatures(FB);
794 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
797 unsigned getPointerWidth() {
798 if (is16BitMode()) return 16;
799 if (is32BitMode()) return 32;
800 if (is64BitMode()) return 64;
801 llvm_unreachable("invalid mode");
804 bool isParsingIntelSyntax() {
805 return getParser().getAssemblerDialect();
808 /// @name Auto-generated Matcher Functions
811 #define GET_ASSEMBLER_HEADER
812 #include "X86GenAsmMatcher.inc"
817 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
818 const MCInstrInfo &mii, const MCTargetOptions &Options)
819 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
822 // Initialize the set of available features.
823 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
824 Instrumentation.reset(
825 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
828 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
830 void SetFrameRegister(unsigned RegNo) override;
832 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
833 SMLoc NameLoc, OperandVector &Operands) override;
835 bool ParseDirective(AsmToken DirectiveID) override;
837 } // end anonymous namespace
839 /// @name Auto-generated Match Functions
842 static unsigned MatchRegisterName(StringRef Name);
846 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
848 // If we have both a base register and an index register make sure they are
849 // both 64-bit or 32-bit registers.
850 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
852 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
853 ErrMsg = "invalid base+index expression";
856 if (BaseReg != 0 && IndexReg != 0) {
857 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
858 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
859 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
860 IndexReg != X86::RIZ) {
861 ErrMsg = "base register is 64-bit, but index register is not";
864 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
865 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
866 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
867 IndexReg != X86::EIZ){
868 ErrMsg = "base register is 32-bit, but index register is not";
871 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
872 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
873 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
874 ErrMsg = "base register is 16-bit, but index register is not";
877 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
878 IndexReg != X86::SI && IndexReg != X86::DI) ||
879 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
880 IndexReg != X86::BX && IndexReg != X86::BP)) {
881 ErrMsg = "invalid 16-bit base/index register combination";
889 bool X86AsmParser::ParseRegister(unsigned &RegNo,
890 SMLoc &StartLoc, SMLoc &EndLoc) {
891 MCAsmParser &Parser = getParser();
893 const AsmToken &PercentTok = Parser.getTok();
894 StartLoc = PercentTok.getLoc();
896 // If we encounter a %, ignore it. This code handles registers with and
897 // without the prefix, unprefixed registers can occur in cfi directives.
898 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
899 Parser.Lex(); // Eat percent token.
901 const AsmToken &Tok = Parser.getTok();
902 EndLoc = Tok.getEndLoc();
904 if (Tok.isNot(AsmToken::Identifier)) {
905 if (isParsingIntelSyntax()) return true;
906 return Error(StartLoc, "invalid register name",
907 SMRange(StartLoc, EndLoc));
910 RegNo = MatchRegisterName(Tok.getString());
912 // If the match failed, try the register name as lowercase.
914 RegNo = MatchRegisterName(Tok.getString().lower());
916 // The "flags" register cannot be referenced directly.
917 // Treat it as an identifier instead.
918 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
921 if (!is64BitMode()) {
922 // FIXME: This should be done using Requires<Not64BitMode> and
923 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
925 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
927 if (RegNo == X86::RIZ ||
928 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
929 X86II::isX86_64NonExtLowByteReg(RegNo) ||
930 X86II::isX86_64ExtendedReg(RegNo))
931 return Error(StartLoc, "register %"
932 + Tok.getString() + " is only available in 64-bit mode",
933 SMRange(StartLoc, EndLoc));
934 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
935 if (X86II::is32ExtendedReg(RegNo))
936 return Error(StartLoc, "register %"
937 + Tok.getString() + " is only available with AVX512",
938 SMRange(StartLoc, EndLoc));
941 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
942 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
944 Parser.Lex(); // Eat 'st'
946 // Check to see if we have '(4)' after %st.
947 if (getLexer().isNot(AsmToken::LParen))
952 const AsmToken &IntTok = Parser.getTok();
953 if (IntTok.isNot(AsmToken::Integer))
954 return Error(IntTok.getLoc(), "expected stack index");
955 switch (IntTok.getIntVal()) {
956 case 0: RegNo = X86::ST0; break;
957 case 1: RegNo = X86::ST1; break;
958 case 2: RegNo = X86::ST2; break;
959 case 3: RegNo = X86::ST3; break;
960 case 4: RegNo = X86::ST4; break;
961 case 5: RegNo = X86::ST5; break;
962 case 6: RegNo = X86::ST6; break;
963 case 7: RegNo = X86::ST7; break;
964 default: return Error(IntTok.getLoc(), "invalid stack index");
967 if (getParser().Lex().isNot(AsmToken::RParen))
968 return Error(Parser.getTok().getLoc(), "expected ')'");
970 EndLoc = Parser.getTok().getEndLoc();
971 Parser.Lex(); // Eat ')'
975 EndLoc = Parser.getTok().getEndLoc();
977 // If this is "db[0-7]", match it as an alias
979 if (RegNo == 0 && Tok.getString().size() == 3 &&
980 Tok.getString().startswith("db")) {
981 switch (Tok.getString()[2]) {
982 case '0': RegNo = X86::DR0; break;
983 case '1': RegNo = X86::DR1; break;
984 case '2': RegNo = X86::DR2; break;
985 case '3': RegNo = X86::DR3; break;
986 case '4': RegNo = X86::DR4; break;
987 case '5': RegNo = X86::DR5; break;
988 case '6': RegNo = X86::DR6; break;
989 case '7': RegNo = X86::DR7; break;
993 EndLoc = Parser.getTok().getEndLoc();
994 Parser.Lex(); // Eat it.
1000 if (isParsingIntelSyntax()) return true;
1001 return Error(StartLoc, "invalid register name",
1002 SMRange(StartLoc, EndLoc));
1005 Parser.Lex(); // Eat identifier token.
1009 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1010 Instrumentation->SetInitialFrameRegister(RegNo);
1013 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1014 bool Parse32 = is32BitMode() || Code16GCC;
1015 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1016 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1017 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1018 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1022 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1023 bool Parse32 = is32BitMode() || Code16GCC;
1024 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1025 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1026 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1027 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1031 bool X86AsmParser::IsSIReg(unsigned Reg) {
1033 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1045 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1047 switch (RegClassID) {
1048 default: llvm_unreachable("Unexpected register class");
1049 case X86::GR64RegClassID:
1050 return IsSIReg ? X86::RSI : X86::RDI;
1051 case X86::GR32RegClassID:
1052 return IsSIReg ? X86::ESI : X86::EDI;
1053 case X86::GR16RegClassID:
1054 return IsSIReg ? X86::SI : X86::DI;
1058 void X86AsmParser::AddDefaultSrcDestOperands(
1059 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1060 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1061 if (isParsingIntelSyntax()) {
1062 Operands.push_back(std::move(Dst));
1063 Operands.push_back(std::move(Src));
1066 Operands.push_back(std::move(Src));
1067 Operands.push_back(std::move(Dst));
1071 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1072 OperandVector &FinalOperands) {
1074 if (OrigOperands.size() > 1) {
1075 // Check if sizes match, OrigOperands also contains the instruction name
1076 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1077 "Operand size mismatch");
1079 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1080 // Verify types match
1081 int RegClassID = -1;
1082 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1083 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1084 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1086 if (FinalOp.isReg() &&
1087 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1088 // Return false and let a normal complaint about bogus operands happen
1091 if (FinalOp.isMem()) {
1093 if (!OrigOp.isMem())
1094 // Return false and let a normal complaint about bogus operands happen
1097 unsigned OrigReg = OrigOp.Mem.BaseReg;
1098 unsigned FinalReg = FinalOp.Mem.BaseReg;
1100 // If we've already encounterd a register class, make sure all register
1101 // bases are of the same register class
1102 if (RegClassID != -1 &&
1103 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1104 return Error(OrigOp.getStartLoc(),
1105 "mismatching source and destination index registers");
1108 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1109 RegClassID = X86::GR64RegClassID;
1110 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1111 RegClassID = X86::GR32RegClassID;
1112 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1113 RegClassID = X86::GR16RegClassID;
1115 // Unexpected register class type
1116 // Return false and let a normal complaint about bogus operands happen
1119 bool IsSI = IsSIReg(FinalReg);
1120 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1122 if (FinalReg != OrigReg) {
1123 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1124 Warnings.push_back(std::make_pair(
1125 OrigOp.getStartLoc(),
1126 "memory operand is only for determining the size, " + RegName +
1127 " will be used for the location"));
1130 FinalOp.Mem.Size = OrigOp.Mem.Size;
1131 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1132 FinalOp.Mem.BaseReg = FinalReg;
1136 // Produce warnings only if all the operands passed the adjustment - prevent
1137 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1138 for (auto &WarningMsg : Warnings) {
1139 Warning(WarningMsg.first, WarningMsg.second);
1142 // Remove old operands
1143 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1144 OrigOperands.pop_back();
1146 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1147 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1148 OrigOperands.push_back(std::move(FinalOperands[i]));
1153 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1154 if (isParsingIntelSyntax())
1155 return ParseIntelOperand();
1156 return ParseATTOperand();
1159 /// getIntelMemOperandSize - Return intel memory operand size.
1160 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1161 unsigned Size = StringSwitch<unsigned>(OpStr)
1162 .Cases("BYTE", "byte", 8)
1163 .Cases("WORD", "word", 16)
1164 .Cases("DWORD", "dword", 32)
1165 .Cases("FWORD", "fword", 48)
1166 .Cases("QWORD", "qword", 64)
1167 .Cases("MMWORD","mmword", 64)
1168 .Cases("XWORD", "xword", 80)
1169 .Cases("TBYTE", "tbyte", 80)
1170 .Cases("XMMWORD", "xmmword", 128)
1171 .Cases("YMMWORD", "ymmword", 256)
1172 .Cases("ZMMWORD", "zmmword", 512)
1173 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1178 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1179 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1180 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1181 InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) {
1182 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1183 // some other label reference.
1184 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1185 // Insert an explicit size if the user didn't have one.
1187 Size = getPointerWidth();
1188 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1192 // Create an absolute memory reference in order to match against
1193 // instructions taking a PC relative operand.
1194 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1195 Identifier, Info.OpDecl);
1198 // We either have a direct symbol reference, or an offset from a symbol. The
1199 // parser always puts the symbol on the LHS, so look there for size
1200 // calculation purposes.
1201 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1203 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1206 Size = Info.Type * 8; // Size is in terms of bits in this context.
1208 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1210 if (AllowBetterSizeMatch)
1211 // Handle cases where size qualifier is absent, upon an indirect symbol
1212 // reference - e.g. "vaddps zmm1, zmm2, [var]"
1213 // set Size to zero to allow matching mechansim to try and find a better
1214 // size qualifier than our initial guess, based on available variants of
1215 // the given instruction
1220 // When parsing inline assembly we set the base register to a non-zero value
1221 // if we don't know the actual value at this time. This is necessary to
1222 // get the matching correct in some cases.
1223 BaseReg = BaseReg ? BaseReg : 1;
1224 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1225 IndexReg, Scale, Start, End, Size, Identifier,
1230 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1231 StringRef SymName, int64_t ImmDisp,
1232 int64_t FinalImmDisp, SMLoc &BracLoc,
1233 SMLoc &StartInBrac, SMLoc &End) {
1234 // Remove the '[' and ']' from the IR string.
1235 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1236 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1238 // If ImmDisp is non-zero, then we parsed a displacement before the
1239 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1240 // If ImmDisp doesn't match the displacement computed by the state machine
1241 // then we have an additional displacement in the bracketed expression.
1242 if (ImmDisp != FinalImmDisp) {
1244 // We have an immediate displacement before the bracketed expression.
1245 // Adjust this to match the final immediate displacement.
1247 for (AsmRewrite &AR : AsmRewrites) {
1248 if (AR.Loc.getPointer() > BracLoc.getPointer())
1250 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1251 assert (!Found && "ImmDisp already rewritten.");
1253 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1254 AR.Val = FinalImmDisp;
1259 assert (Found && "Unable to rewrite ImmDisp.");
1262 // We have a symbolic and an immediate displacement, but no displacement
1263 // before the bracketed expression. Put the immediate displacement
1264 // before the bracketed expression.
1265 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1268 // Remove all the ImmPrefix rewrites within the brackets.
1269 for (AsmRewrite &AR : AsmRewrites) {
1270 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1272 if (AR.Kind == AOK_ImmPrefix)
1273 AR.Kind = AOK_Delete;
1275 const char *SymLocPtr = SymName.data();
1276 // Skip everything before the symbol.
1277 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1278 assert(Len > 0 && "Expected a non-negative length.");
1279 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1281 // Skip everything after the symbol.
1282 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1283 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1284 assert(Len > 0 && "Expected a non-negative length.");
1285 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1289 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1290 MCAsmParser &Parser = getParser();
1291 const AsmToken &Tok = Parser.getTok();
1293 AsmToken::TokenKind PrevTK = AsmToken::Error;
1296 bool UpdateLocLex = true;
1298 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1299 // identifier. Don't try an parse it as a register.
1300 if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
1303 // If we're parsing an immediate expression, we don't expect a '['.
1304 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1307 AsmToken::TokenKind TK = getLexer().getKind();
1310 if (SM.isValidEndState()) {
1314 return Error(Tok.getLoc(), "unknown token in expression");
1316 case AsmToken::EndOfStatement: {
1320 case AsmToken::String:
1321 case AsmToken::Identifier: {
1322 // This could be a register or a symbolic displacement.
1325 SMLoc IdentLoc = Tok.getLoc();
1326 StringRef Identifier = Tok.getString();
1327 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1328 SM.onRegister(TmpReg);
1329 UpdateLocLex = false;
1332 if (!isParsingInlineAsm()) {
1333 if (getParser().parsePrimaryExpr(Val, End))
1334 return Error(Tok.getLoc(), "Unexpected identifier!");
1336 // This is a dot operator, not an adjacent identifier.
1337 if (Identifier.find('.') != StringRef::npos &&
1338 PrevTK == AsmToken::RBrac) {
1341 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1342 if (ParseIntelIdentifier(Val, Identifier, Info,
1343 /*Unevaluated=*/false, End))
1347 SM.onIdentifierExpr(Val, Identifier);
1348 UpdateLocLex = false;
1351 return Error(Tok.getLoc(), "Unexpected identifier!");
1353 case AsmToken::Integer: {
1355 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1356 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1357 // Look for 'b' or 'f' following an Integer as a directional label
1358 SMLoc Loc = getTok().getLoc();
1359 int64_t IntVal = getTok().getIntVal();
1360 End = consumeToken();
1361 UpdateLocLex = false;
1362 if (getLexer().getKind() == AsmToken::Identifier) {
1363 StringRef IDVal = getTok().getString();
1364 if (IDVal == "f" || IDVal == "b") {
1366 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1367 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1369 MCSymbolRefExpr::create(Sym, Variant, getContext());
1370 if (IDVal == "b" && Sym->isUndefined())
1371 return Error(Loc, "invalid reference to undefined symbol");
1372 StringRef Identifier = Sym->getName();
1373 SM.onIdentifierExpr(Val, Identifier);
1374 End = consumeToken();
1376 if (SM.onInteger(IntVal, ErrMsg))
1377 return Error(Loc, ErrMsg);
1380 if (SM.onInteger(IntVal, ErrMsg))
1381 return Error(Loc, ErrMsg);
1385 case AsmToken::Plus: SM.onPlus(); break;
1386 case AsmToken::Minus: SM.onMinus(); break;
1387 case AsmToken::Tilde: SM.onNot(); break;
1388 case AsmToken::Star: SM.onStar(); break;
1389 case AsmToken::Slash: SM.onDivide(); break;
1390 case AsmToken::Pipe: SM.onOr(); break;
1391 case AsmToken::Caret: SM.onXor(); break;
1392 case AsmToken::Amp: SM.onAnd(); break;
1393 case AsmToken::LessLess:
1394 SM.onLShift(); break;
1395 case AsmToken::GreaterGreater:
1396 SM.onRShift(); break;
1397 case AsmToken::LBrac: SM.onLBrac(); break;
1398 case AsmToken::RBrac: SM.onRBrac(); break;
1399 case AsmToken::LParen: SM.onLParen(); break;
1400 case AsmToken::RParen: SM.onRParen(); break;
1403 return Error(Tok.getLoc(), "unknown token in expression");
1405 if (!Done && UpdateLocLex)
1406 End = consumeToken();
1413 std::unique_ptr<X86Operand>
1414 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1415 int64_t ImmDisp, bool isSymbol,
1417 MCAsmParser &Parser = getParser();
1418 const AsmToken &Tok = Parser.getTok();
1419 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1420 if (getLexer().isNot(AsmToken::LBrac))
1421 return ErrorOperand(BracLoc, "Expected '[' token!");
1422 Parser.Lex(); // Eat '['
1424 SMLoc StartInBrac = Parser.getTok().getLoc();
1425 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1426 // may have already parsed an immediate displacement before the bracketed
1428 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1429 if (ParseIntelExpression(SM, End))
1432 const MCExpr *Disp = nullptr;
1433 if (const MCExpr *Sym = SM.getSym()) {
1434 // A symbolic displacement.
1436 if (isParsingInlineAsm())
1437 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1438 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1442 if (SM.getImm() || !Disp) {
1443 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1445 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1447 Disp = Imm; // An immediate displacement only.
1450 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1451 // will in fact do global lookup the field name inside all global typedefs,
1452 // but we don't emulate that.
1453 if ((Parser.getTok().getKind() == AsmToken::Identifier ||
1454 Parser.getTok().getKind() == AsmToken::Dot ||
1455 Parser.getTok().getKind() == AsmToken::Real) &&
1456 Parser.getTok().getString().find('.') != StringRef::npos) {
1457 const MCExpr *NewDisp;
1458 if (ParseIntelDotOperator(Disp, NewDisp))
1461 End = Tok.getEndLoc();
1462 Parser.Lex(); // Eat the field.
1468 Error(Start, "cannot use more than one symbol in memory operand");
1471 if (SM.getBaseReg()) {
1472 Error(Start, "cannot use base register with variable reference");
1475 if (SM.getIndexReg()) {
1476 Error(Start, "cannot use index register with variable reference");
1481 int BaseReg = SM.getBaseReg();
1482 int IndexReg = SM.getIndexReg();
1483 int Scale = SM.getScale();
1484 if (!isParsingInlineAsm()) {
1486 if (!BaseReg && !IndexReg) {
1488 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1489 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1493 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1494 Error(StartInBrac, ErrMsg);
1497 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1498 IndexReg, Scale, Start, End, Size);
1501 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1502 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1503 End, Size, SM.getSymName(), Info,
1504 isParsingInlineAsm());
1507 // Inline assembly may use variable names with namespace alias qualifiers.
1508 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1509 StringRef &Identifier,
1510 InlineAsmIdentifierInfo &Info,
1511 bool IsUnevaluatedOperand, SMLoc &End) {
1512 MCAsmParser &Parser = getParser();
1513 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1516 StringRef LineBuf(Identifier.data());
1518 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1520 const AsmToken &Tok = Parser.getTok();
1521 SMLoc Loc = Tok.getLoc();
1523 // Advance the token stream until the end of the current token is
1524 // after the end of what the frontend claimed.
1525 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1527 End = Tok.getEndLoc();
1529 } while (End.getPointer() < EndPtr);
1530 Identifier = LineBuf;
1532 // The frontend should end parsing on an assembler token boundary, unless it
1534 assert((End.getPointer() == EndPtr || !Result) &&
1535 "frontend claimed part of a token?");
1537 // If the identifier lookup was unsuccessful, assume that we are dealing with
1540 StringRef InternalName =
1541 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1543 assert(InternalName.size() && "We should have an internal name here.");
1544 // Push a rewrite for replacing the identifier name with the internal name.
1545 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1549 // Create the symbol reference.
1550 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1551 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1552 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1556 /// \brief Parse intel style segment override.
1557 std::unique_ptr<X86Operand>
1558 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1560 MCAsmParser &Parser = getParser();
1561 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1562 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1563 if (Tok.isNot(AsmToken::Colon))
1564 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1565 Parser.Lex(); // Eat ':'
1567 int64_t ImmDisp = 0;
1568 if (getLexer().is(AsmToken::Integer)) {
1569 ImmDisp = Tok.getIntVal();
1570 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1572 if (isParsingInlineAsm())
1573 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1575 if (getLexer().isNot(AsmToken::LBrac)) {
1576 // An immediate following a 'segment register', 'colon' token sequence can
1577 // be followed by a bracketed expression. If it isn't we know we have our
1578 // final segment override.
1579 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1580 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1581 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1582 Start, ImmDispToken.getEndLoc(), Size);
1586 if (getLexer().is(AsmToken::LBrac))
1587 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
1591 if (!isParsingInlineAsm()) {
1592 if (getParser().parsePrimaryExpr(Val, End))
1593 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1595 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1598 InlineAsmIdentifierInfo Info;
1599 StringRef Identifier = Tok.getString();
1600 if (ParseIntelIdentifier(Val, Identifier, Info,
1601 /*Unevaluated=*/false, End))
1603 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1604 /*Scale=*/1, Start, End, Size, Identifier, Info);
1607 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1608 std::unique_ptr<X86Operand>
1609 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1610 MCAsmParser &Parser = getParser();
1611 const AsmToken &Tok = Parser.getTok();
1612 // Eat "{" and mark the current place.
1613 const SMLoc consumedToken = consumeToken();
1614 if (Tok.getIdentifier().startswith("r")){
1615 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1616 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1617 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1618 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1619 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1622 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1623 Parser.Lex(); // Eat "r*" of r*-sae
1624 if (!getLexer().is(AsmToken::Minus))
1625 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1626 Parser.Lex(); // Eat "-"
1627 Parser.Lex(); // Eat the sae
1628 if (!getLexer().is(AsmToken::RCurly))
1629 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1630 Parser.Lex(); // Eat "}"
1631 const MCExpr *RndModeOp =
1632 MCConstantExpr::create(rndMode, Parser.getContext());
1633 return X86Operand::CreateImm(RndModeOp, Start, End);
1635 if(Tok.getIdentifier().equals("sae")){
1636 Parser.Lex(); // Eat the sae
1637 if (!getLexer().is(AsmToken::RCurly))
1638 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1639 Parser.Lex(); // Eat "}"
1640 return X86Operand::CreateToken("{sae}", consumedToken);
1642 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1645 /// Parse the '.' operator.
1646 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1647 const MCExpr *&NewDisp) {
1648 MCAsmParser &Parser = getParser();
1649 const AsmToken &Tok = Parser.getTok();
1650 int64_t OrigDispVal, DotDispVal;
1652 // FIXME: Handle non-constant expressions.
1653 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1654 OrigDispVal = OrigDisp->getValue();
1656 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1658 // Drop the optional '.'.
1659 StringRef DotDispStr = Tok.getString();
1660 if (DotDispStr.startswith("."))
1661 DotDispStr = DotDispStr.drop_front(1);
1663 // .Imm gets lexed as a real.
1664 if (Tok.is(AsmToken::Real)) {
1666 DotDispStr.getAsInteger(10, DotDisp);
1667 DotDispVal = DotDisp.getZExtValue();
1668 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1670 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1671 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1673 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1674 DotDispVal = DotDisp;
1676 return Error(Tok.getLoc(), "Unexpected token type!");
1678 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1679 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1680 unsigned Len = DotDispStr.size();
1681 unsigned Val = OrigDispVal + DotDispVal;
1682 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
1685 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1689 /// Parse the 'offset' operator. This operator is used to specify the
1690 /// location rather then the content of a variable.
1691 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1692 MCAsmParser &Parser = getParser();
1693 const AsmToken &Tok = Parser.getTok();
1694 SMLoc OffsetOfLoc = Tok.getLoc();
1695 Parser.Lex(); // Eat offset.
1698 InlineAsmIdentifierInfo Info;
1699 SMLoc Start = Tok.getLoc(), End;
1700 StringRef Identifier = Tok.getString();
1701 if (ParseIntelIdentifier(Val, Identifier, Info,
1702 /*Unevaluated=*/false, End))
1705 // Don't emit the offset operator.
1706 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1708 // The offset operator will have an 'r' constraint, thus we need to create
1709 // register operand to ensure proper matching. Just pick a GPR based on
1710 // the size of a pointer.
1711 bool Parse32 = is32BitMode() || Code16GCC;
1712 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1714 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1715 OffsetOfLoc, Identifier, Info.OpDecl);
1718 enum IntelOperatorKind {
1724 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1725 /// returns the number of elements in an array. It returns the value 1 for
1726 /// non-array variables. The SIZE operator returns the size of a C or C++
1727 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1728 /// TYPE operator returns the size of a C or C++ type or variable. If the
1729 /// variable is an array, TYPE returns the size of a single element.
1730 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1731 MCAsmParser &Parser = getParser();
1732 const AsmToken &Tok = Parser.getTok();
1733 SMLoc TypeLoc = Tok.getLoc();
1734 Parser.Lex(); // Eat operator.
1736 const MCExpr *Val = nullptr;
1737 InlineAsmIdentifierInfo Info;
1738 SMLoc Start = Tok.getLoc(), End;
1739 StringRef Identifier = Tok.getString();
1740 if (ParseIntelIdentifier(Val, Identifier, Info,
1741 /*Unevaluated=*/true, End))
1745 return ErrorOperand(Start, "unable to lookup expression");
1749 default: llvm_unreachable("Unexpected operand kind!");
1750 case IOK_LENGTH: CVal = Info.Length; break;
1751 case IOK_SIZE: CVal = Info.Size; break;
1752 case IOK_TYPE: CVal = Info.Type; break;
1755 // Rewrite the type operator and the C or C++ type or variable in terms of an
1756 // immediate. E.g. TYPE foo -> $$4
1757 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1758 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1760 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1761 return X86Operand::CreateImm(Imm, Start, End);
1764 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1765 MCAsmParser &Parser = getParser();
1766 const AsmToken &Tok = Parser.getTok();
1769 // Offset, length, type and size operators.
1770 if (isParsingInlineAsm()) {
1771 StringRef AsmTokStr = Tok.getString();
1772 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1773 return ParseIntelOffsetOfOperator();
1774 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1775 return ParseIntelOperator(IOK_LENGTH);
1776 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1777 return ParseIntelOperator(IOK_SIZE);
1778 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1779 return ParseIntelOperator(IOK_TYPE);
1782 bool PtrInOperand = false;
1783 unsigned Size = getIntelMemOperandSize(Tok.getString());
1785 Parser.Lex(); // Eat operand size (e.g., byte, word).
1786 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1787 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1788 Parser.Lex(); // Eat ptr.
1789 PtrInOperand = true;
1792 Start = Tok.getLoc();
1794 // rounding mode token
1795 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1796 getLexer().is(AsmToken::LCurly))
1797 return ParseRoundingModeOp(Start, End);
1801 if (getLexer().is(AsmToken::Identifier) &&
1802 !ParseRegister(RegNo, Start, End)) {
1803 // If this is a segment register followed by a ':', then this is the start
1804 // of a segment override, otherwise this is a normal register reference.
1805 // In case it is a normal register and there is ptr in the operand this
1807 if (RegNo == X86::RIP)
1808 return ErrorOperand(Start, "rip can only be used as a base register");
1809 if (getLexer().isNot(AsmToken::Colon)) {
1811 return ErrorOperand(Start, "expected memory operand after "
1812 "'ptr', found register operand instead");
1814 return X86Operand::CreateReg(RegNo, Start, End);
1816 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1819 // Immediates and Memory
1821 // Parse [ BaseReg + Scale*IndexReg + Disp ].
1822 if (getLexer().is(AsmToken::LBrac))
1823 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1826 AsmToken StartTok = Tok;
1827 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1828 /*AddImmPrefix=*/false);
1829 if (ParseIntelExpression(SM, End))
1832 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1833 int64_t Imm = SM.getImm();
1834 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1835 SM.getSym()->evaluateAsAbsolute(Imm);
1837 if (StartTok.isNot(AsmToken::Identifier) &&
1838 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1839 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1840 if (StartTok.getString().size() == Len)
1841 // Just add a prefix if this wasn't a complex immediate expression.
1842 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1844 // Otherwise, rewrite the complex expression as a single immediate.
1845 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1848 if (getLexer().isNot(AsmToken::LBrac)) {
1849 // If a directional label (ie. 1f or 2b) was parsed above from
1850 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1851 // to the MCExpr with the directional local symbol and this is a
1852 // memory operand not an immediate operand.
1854 if (isParsingInlineAsm())
1855 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1857 /*Scale=*/1, Start, End, Size,
1858 SM.getSymName(), SM.getIdentifierInfo());
1859 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1863 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1864 return X86Operand::CreateImm(ImmExpr, Start, End);
1867 // Only positive immediates are valid.
1869 return ErrorOperand(Start, "expected a positive immediate displacement "
1870 "before bracketed expr.");
1872 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
1875 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1876 MCAsmParser &Parser = getParser();
1877 switch (getLexer().getKind()) {
1879 // Parse a memory operand with no segment register.
1880 return ParseMemOperand(0, Parser.getTok().getLoc());
1881 case AsmToken::Percent: {
1882 // Read the register.
1885 if (ParseRegister(RegNo, Start, End)) return nullptr;
1886 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1887 Error(Start, "%eiz and %riz can only be used as index registers",
1888 SMRange(Start, End));
1891 if (RegNo == X86::RIP) {
1892 Error(Start, "%rip can only be used as a base register",
1893 SMRange(Start, End));
1897 // If this is a segment register followed by a ':', then this is the start
1898 // of a memory reference, otherwise this is a normal register reference.
1899 if (getLexer().isNot(AsmToken::Colon))
1900 return X86Operand::CreateReg(RegNo, Start, End);
1902 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1903 return ErrorOperand(Start, "invalid segment register");
1905 getParser().Lex(); // Eat the colon.
1906 return ParseMemOperand(RegNo, Start);
1908 case AsmToken::Dollar: {
1909 // $42 -> immediate.
1910 SMLoc Start = Parser.getTok().getLoc(), End;
1913 if (getParser().parseExpression(Val, End))
1915 return X86Operand::CreateImm(Val, Start, End);
1917 case AsmToken::LCurly:{
1918 SMLoc Start = Parser.getTok().getLoc(), End;
1919 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1920 return ParseRoundingModeOp(Start, End);
1921 return ErrorOperand(Start, "Unexpected '{' in expression");
1926 // true on failure, false otherwise
1927 // If no {z} mark was found - Parser doesn't advance
1928 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1929 const SMLoc &StartLoc) {
1930 MCAsmParser &Parser = getParser();
1931 // Assuming we are just pass the '{' mark, quering the next token
1932 // Searched for {z}, but none was found. Return false, as no parsing error was
1934 if (!(getLexer().is(AsmToken::Identifier) &&
1935 (getLexer().getTok().getIdentifier() == "z")))
1937 Parser.Lex(); // Eat z
1938 // Query and eat the '}' mark
1939 if (!getLexer().is(AsmToken::RCurly))
1940 return Error(getLexer().getLoc(), "Expected } at this point");
1941 Parser.Lex(); // Eat '}'
1942 // Assign Z with the {z} mark opernad
1943 Z = X86Operand::CreateToken("{z}", StartLoc);
1947 // true on failure, false otherwise
1948 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1949 const MCParsedAsmOperand &Op) {
1950 MCAsmParser &Parser = getParser();
1951 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1952 if (getLexer().is(AsmToken::LCurly)) {
1953 // Eat "{" and mark the current place.
1954 const SMLoc consumedToken = consumeToken();
1955 // Distinguish {1to<NUM>} from {%k<NUM>}.
1956 if(getLexer().is(AsmToken::Integer)) {
1957 // Parse memory broadcasting ({1to<NUM>}).
1958 if (getLexer().getTok().getIntVal() != 1)
1959 return TokError("Expected 1to<NUM> at this point");
1960 Parser.Lex(); // Eat "1" of 1to8
1961 if (!getLexer().is(AsmToken::Identifier) ||
1962 !getLexer().getTok().getIdentifier().startswith("to"))
1963 return TokError("Expected 1to<NUM> at this point");
1964 // Recognize only reasonable suffixes.
1965 const char *BroadcastPrimitive =
1966 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1967 .Case("to2", "{1to2}")
1968 .Case("to4", "{1to4}")
1969 .Case("to8", "{1to8}")
1970 .Case("to16", "{1to16}")
1972 if (!BroadcastPrimitive)
1973 return TokError("Invalid memory broadcast primitive.");
1974 Parser.Lex(); // Eat "toN" of 1toN
1975 if (!getLexer().is(AsmToken::RCurly))
1976 return TokError("Expected } at this point");
1977 Parser.Lex(); // Eat "}"
1978 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1980 // No AVX512 specific primitives can pass
1981 // after memory broadcasting, so return.
1984 // Parse either {k}{z}, {z}{k}, {k} or {z}
1985 // last one have no meaning, but GCC accepts it
1986 // Currently, we're just pass a '{' mark
1987 std::unique_ptr<X86Operand> Z;
1988 if (ParseZ(Z, consumedToken))
1990 // Reaching here means that parsing of the allegadly '{z}' mark yielded
1992 // Query for the need of further parsing for a {%k<NUM>} mark
1993 if (!Z || getLexer().is(AsmToken::LCurly)) {
1994 const SMLoc StartLoc = Z ? consumeToken() : consumedToken;
1995 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
1997 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1998 if (!getLexer().is(AsmToken::RCurly))
1999 return Error(getLexer().getLoc(), "Expected } at this point");
2000 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2001 Operands.push_back(std::move(Op));
2002 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2004 return Error(getLexer().getLoc(),
2005 "Expected an op-mask register at this point");
2006 // {%k<NUM>} mark is found, inquire for {z}
2007 if (getLexer().is(AsmToken::LCurly) && !Z) {
2008 // Have we've found a parsing error, or found no (expected) {z} mark
2009 // - report an error
2010 if (ParseZ(Z, consumeToken()) || !Z)
2014 // '{z}' on its own is meaningless, hence should be ignored.
2015 // on the contrary - have it been accompanied by a K register,
2018 Operands.push_back(std::move(Z));
2026 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2027 /// has already been parsed if present.
2028 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2031 MCAsmParser &Parser = getParser();
2032 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2033 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2034 // only way to do this without lookahead is to eat the '(' and see what is
2036 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2037 if (getLexer().isNot(AsmToken::LParen)) {
2039 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2041 // After parsing the base expression we could either have a parenthesized
2042 // memory address or not. If not, return now. If so, eat the (.
2043 if (getLexer().isNot(AsmToken::LParen)) {
2044 // Unless we have a segment register, treat this as an immediate.
2046 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2047 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2054 // Okay, we have a '('. We don't know if this is an expression or not, but
2055 // so we have to eat the ( to see beyond it.
2056 SMLoc LParenLoc = Parser.getTok().getLoc();
2057 Parser.Lex(); // Eat the '('.
2059 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2060 // Nothing to do here, fall into the code below with the '(' part of the
2061 // memory operand consumed.
2065 // It must be an parenthesized expression, parse it now.
2066 if (getParser().parseParenExpression(Disp, ExprEnd))
2069 // After parsing the base expression we could either have a parenthesized
2070 // memory address or not. If not, return now. If so, eat the (.
2071 if (getLexer().isNot(AsmToken::LParen)) {
2072 // Unless we have a segment register, treat this as an immediate.
2074 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2076 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2085 // If we reached here, then we just ate the ( of the memory operand. Process
2086 // the rest of the memory operand.
2087 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2088 SMLoc IndexLoc, BaseLoc;
2090 if (getLexer().is(AsmToken::Percent)) {
2091 SMLoc StartLoc, EndLoc;
2092 BaseLoc = Parser.getTok().getLoc();
2093 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2094 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2095 Error(StartLoc, "eiz and riz can only be used as index registers",
2096 SMRange(StartLoc, EndLoc));
2101 if (getLexer().is(AsmToken::Comma)) {
2102 Parser.Lex(); // Eat the comma.
2103 IndexLoc = Parser.getTok().getLoc();
2105 // Following the comma we should have either an index register, or a scale
2106 // value. We don't support the later form, but we want to parse it
2109 // Not that even though it would be completely consistent to support syntax
2110 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2111 if (getLexer().is(AsmToken::Percent)) {
2113 if (ParseRegister(IndexReg, L, L))
2115 if (BaseReg == X86::RIP) {
2116 Error(IndexLoc, "%rip as base register can not have an index register");
2119 if (IndexReg == X86::RIP) {
2120 Error(IndexLoc, "%rip is not allowed as an index register");
2124 if (getLexer().isNot(AsmToken::RParen)) {
2125 // Parse the scale amount:
2126 // ::= ',' [scale-expression]
2127 if (getLexer().isNot(AsmToken::Comma)) {
2128 Error(Parser.getTok().getLoc(),
2129 "expected comma in scale expression");
2132 Parser.Lex(); // Eat the comma.
2134 if (getLexer().isNot(AsmToken::RParen)) {
2135 SMLoc Loc = Parser.getTok().getLoc();
2138 if (getParser().parseAbsoluteExpression(ScaleVal)){
2139 Error(Loc, "expected scale expression");
2143 // Validate the scale amount.
2144 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2146 Error(Loc, "scale factor in 16-bit address must be 1");
2149 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2151 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2154 Scale = (unsigned)ScaleVal;
2157 } else if (getLexer().isNot(AsmToken::RParen)) {
2158 // A scale amount without an index is ignored.
2160 SMLoc Loc = Parser.getTok().getLoc();
2163 if (getParser().parseAbsoluteExpression(Value))
2167 Warning(Loc, "scale factor without index register is ignored");
2172 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2173 if (getLexer().isNot(AsmToken::RParen)) {
2174 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2177 SMLoc MemEnd = Parser.getTok().getEndLoc();
2178 Parser.Lex(); // Eat the ')'.
2180 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2181 // and then only in non-64-bit modes. Except for DX, which is a special case
2182 // because an unofficial form of in/out instructions uses it.
2183 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2184 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2185 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2186 BaseReg != X86::DX) {
2187 Error(BaseLoc, "invalid 16-bit base register");
2191 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2192 Error(IndexLoc, "16-bit memory operand may not include only index register");
2197 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2198 Error(BaseLoc, ErrMsg);
2202 if (SegReg || BaseReg || IndexReg)
2203 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2204 IndexReg, Scale, MemStart, MemEnd);
2205 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2208 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2209 SMLoc NameLoc, OperandVector &Operands) {
2210 MCAsmParser &Parser = getParser();
2212 StringRef PatchedName = Name;
2214 if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) {
2215 StringRef NextTok = Parser.getTok().getString();
2216 if (NextTok == "short") {
2218 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2219 // Eat the short keyword
2221 // MS ignores the short keyword, it determines the jmp type based
2222 // on the distance of the label
2223 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2224 NextTok.size() + 1);
2228 // FIXME: Hack to recognize setneb as setne.
2229 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2230 PatchedName != "setb" && PatchedName != "setnb")
2231 PatchedName = PatchedName.substr(0, Name.size()-1);
2233 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2234 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2235 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2236 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2237 bool IsVCMP = PatchedName[0] == 'v';
2238 unsigned CCIdx = IsVCMP ? 4 : 3;
2239 unsigned ComparisonCode = StringSwitch<unsigned>(
2240 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2242 .Case("eq_oq", 0x00)
2244 .Case("lt_os", 0x01)
2246 .Case("le_os", 0x02)
2247 .Case("unord", 0x03)
2248 .Case("unord_q", 0x03)
2250 .Case("neq_uq", 0x04)
2252 .Case("nlt_us", 0x05)
2254 .Case("nle_us", 0x06)
2256 .Case("ord_q", 0x07)
2257 /* AVX only from here */
2258 .Case("eq_uq", 0x08)
2260 .Case("nge_us", 0x09)
2262 .Case("ngt_us", 0x0A)
2263 .Case("false", 0x0B)
2264 .Case("false_oq", 0x0B)
2265 .Case("neq_oq", 0x0C)
2267 .Case("ge_os", 0x0D)
2269 .Case("gt_os", 0x0E)
2271 .Case("true_uq", 0x0F)
2272 .Case("eq_os", 0x10)
2273 .Case("lt_oq", 0x11)
2274 .Case("le_oq", 0x12)
2275 .Case("unord_s", 0x13)
2276 .Case("neq_us", 0x14)
2277 .Case("nlt_uq", 0x15)
2278 .Case("nle_uq", 0x16)
2279 .Case("ord_s", 0x17)
2280 .Case("eq_us", 0x18)
2281 .Case("nge_uq", 0x19)
2282 .Case("ngt_uq", 0x1A)
2283 .Case("false_os", 0x1B)
2284 .Case("neq_os", 0x1C)
2285 .Case("ge_oq", 0x1D)
2286 .Case("gt_oq", 0x1E)
2287 .Case("true_us", 0x1F)
2289 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2291 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2294 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2295 getParser().getContext());
2296 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2298 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2302 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2303 if (PatchedName.startswith("vpcmp") &&
2304 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2305 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2306 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2307 unsigned ComparisonCode = StringSwitch<unsigned>(
2308 PatchedName.slice(5, PatchedName.size() - CCIdx))
2309 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2312 //.Case("false", 0x3) // Not a documented alias.
2316 //.Case("true", 0x7) // Not a documented alias.
2318 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2319 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2321 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2322 getParser().getContext());
2323 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2325 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2329 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2330 if (PatchedName.startswith("vpcom") &&
2331 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2332 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2333 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2334 unsigned ComparisonCode = StringSwitch<unsigned>(
2335 PatchedName.slice(5, PatchedName.size() - CCIdx))
2345 if (ComparisonCode != ~0U) {
2346 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2348 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2349 getParser().getContext());
2350 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2352 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2356 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2358 // Determine whether this is an instruction prefix.
2360 Name == "lock" || Name == "rep" ||
2361 Name == "repe" || Name == "repz" ||
2362 Name == "repne" || Name == "repnz" ||
2363 Name == "rex64" || Name == "data16";
2365 bool CurlyAsEndOfStatement = false;
2366 // This does the actual operand parsing. Don't parse any more if we have a
2367 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2368 // just want to parse the "lock" as the first instruction and the "incl" as
2370 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2372 // Parse '*' modifier.
2373 if (getLexer().is(AsmToken::Star))
2374 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2376 // Read the operands.
2378 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2379 Operands.push_back(std::move(Op));
2380 if (HandleAVX512Operand(Operands, *Operands.back()))
2385 // check for comma and eat it
2386 if (getLexer().is(AsmToken::Comma))
2392 // In MS inline asm curly braces mark the begining/end of a block, therefore
2393 // they should be interepreted as end of statement
2394 CurlyAsEndOfStatement =
2395 isParsingIntelSyntax() && isParsingInlineAsm() &&
2396 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2397 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2398 return TokError("unexpected token in argument list");
2401 // Consume the EndOfStatement or the prefix separator Slash
2402 if (getLexer().is(AsmToken::EndOfStatement) ||
2403 (isPrefix && getLexer().is(AsmToken::Slash)))
2405 else if (CurlyAsEndOfStatement)
2406 // Add an actual EndOfStatement before the curly brace
2407 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2408 getLexer().getTok().getLoc(), 0);
2410 // This is for gas compatibility and cannot be done in td.
2411 // Adding "p" for some floating point with no argument.
2412 // For example: fsub --> fsubp
2414 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2415 if (IsFp && Operands.size() == 1) {
2416 const char *Repl = StringSwitch<const char *>(Name)
2417 .Case("fsub", "fsubp")
2418 .Case("fdiv", "fdivp")
2419 .Case("fsubr", "fsubrp")
2420 .Case("fdivr", "fdivrp");
2421 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2424 // Moving a 32 or 16 bit value into a segment register has the same
2425 // behavior. Modify such instructions to always take shorter form.
2426 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2427 (Operands.size() == 3)) {
2428 X86Operand &Op1 = (X86Operand &)*Operands[1];
2429 X86Operand &Op2 = (X86Operand &)*Operands[2];
2430 SMLoc Loc = Op1.getEndLoc();
2431 if (Op1.isReg() && Op2.isReg() &&
2432 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2434 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2435 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2436 // Change instruction name to match new instruction.
2437 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2438 Name = is16BitMode() ? "movw" : "movl";
2439 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2441 // Select the correct equivalent 16-/32-bit source register.
2443 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2444 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2448 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2449 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2450 // documented form in various unofficial manuals, so a lot of code uses it.
2451 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2452 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2453 Operands.size() == 3) {
2454 X86Operand &Op = (X86Operand &)*Operands.back();
2455 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2456 isa<MCConstantExpr>(Op.Mem.Disp) &&
2457 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2458 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2459 SMLoc Loc = Op.getEndLoc();
2460 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2463 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2464 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2465 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2466 Operands.size() == 3) {
2467 X86Operand &Op = (X86Operand &)*Operands[1];
2468 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2469 isa<MCConstantExpr>(Op.Mem.Disp) &&
2470 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2471 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2472 SMLoc Loc = Op.getEndLoc();
2473 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2477 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2478 bool HadVerifyError = false;
2480 // Append default arguments to "ins[bwld]"
2481 if (Name.startswith("ins") &&
2482 (Operands.size() == 1 || Operands.size() == 3) &&
2483 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2486 AddDefaultSrcDestOperands(TmpOperands,
2487 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2488 DefaultMemDIOperand(NameLoc));
2489 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2492 // Append default arguments to "outs[bwld]"
2493 if (Name.startswith("outs") &&
2494 (Operands.size() == 1 || Operands.size() == 3) &&
2495 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2496 Name == "outsd" || Name == "outs")) {
2497 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2498 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2499 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2502 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2503 // values of $SIREG according to the mode. It would be nice if this
2504 // could be achieved with InstAlias in the tables.
2505 if (Name.startswith("lods") &&
2506 (Operands.size() == 1 || Operands.size() == 2) &&
2507 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2508 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2509 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2510 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2513 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2514 // values of $DIREG according to the mode. It would be nice if this
2515 // could be achieved with InstAlias in the tables.
2516 if (Name.startswith("stos") &&
2517 (Operands.size() == 1 || Operands.size() == 2) &&
2518 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2519 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2520 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2521 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2524 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2525 // values of $DIREG according to the mode. It would be nice if this
2526 // could be achieved with InstAlias in the tables.
2527 if (Name.startswith("scas") &&
2528 (Operands.size() == 1 || Operands.size() == 2) &&
2529 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2530 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2531 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2532 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2535 // Add default SI and DI operands to "cmps[bwlq]".
2536 if (Name.startswith("cmps") &&
2537 (Operands.size() == 1 || Operands.size() == 3) &&
2538 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2539 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2540 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2541 DefaultMemSIOperand(NameLoc));
2542 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2545 // Add default SI and DI operands to "movs[bwlq]".
2546 if (((Name.startswith("movs") &&
2547 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2548 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2549 (Name.startswith("smov") &&
2550 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2551 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2552 (Operands.size() == 1 || Operands.size() == 3)) {
2553 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2554 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2555 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2556 DefaultMemDIOperand(NameLoc));
2557 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2560 // Check if we encountered an error for one the string insturctions
2561 if (HadVerifyError) {
2562 return HadVerifyError;
2565 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2567 if ((Name.startswith("shr") || Name.startswith("sar") ||
2568 Name.startswith("shl") || Name.startswith("sal") ||
2569 Name.startswith("rcl") || Name.startswith("rcr") ||
2570 Name.startswith("rol") || Name.startswith("ror")) &&
2571 Operands.size() == 3) {
2572 if (isParsingIntelSyntax()) {
2574 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2575 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2576 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2577 Operands.pop_back();
2579 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2580 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2581 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2582 Operands.erase(Operands.begin() + 1);
2586 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2587 // instalias with an immediate operand yet.
2588 if (Name == "int" && Operands.size() == 2) {
2589 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2591 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2592 if (CE->getValue() == 3) {
2593 Operands.erase(Operands.begin() + 1);
2594 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2598 // Transforms "xlat mem8" into "xlatb"
2599 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2600 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2602 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2603 "size, (R|E)BX will be used for the location");
2604 Operands.pop_back();
2605 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2612 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2616 static const char *getSubtargetFeatureName(uint64_t Val);
2618 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2620 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2624 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2625 OperandVector &Operands,
2626 MCStreamer &Out, uint64_t &ErrorInfo,
2627 bool MatchingInlineAsm) {
2628 if (isParsingIntelSyntax())
2629 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2631 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2635 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2636 OperandVector &Operands, MCStreamer &Out,
2637 bool MatchingInlineAsm) {
2638 // FIXME: This should be replaced with a real .td file alias mechanism.
2639 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2641 const char *Repl = StringSwitch<const char *>(Op.getToken())
2642 .Case("finit", "fninit")
2643 .Case("fsave", "fnsave")
2644 .Case("fstcw", "fnstcw")
2645 .Case("fstcww", "fnstcw")
2646 .Case("fstenv", "fnstenv")
2647 .Case("fstsw", "fnstsw")
2648 .Case("fstsww", "fnstsw")
2649 .Case("fclex", "fnclex")
2653 Inst.setOpcode(X86::WAIT);
2655 if (!MatchingInlineAsm)
2656 EmitInstruction(Inst, Operands, Out);
2657 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2661 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2662 bool MatchingInlineAsm) {
2663 assert(ErrorInfo && "Unknown missing feature!");
2664 SmallString<126> Msg;
2665 raw_svector_ostream OS(Msg);
2666 OS << "instruction requires:";
2668 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2669 if (ErrorInfo & Mask)
2670 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2673 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2676 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2677 OperandVector &Operands,
2679 uint64_t &ErrorInfo,
2680 bool MatchingInlineAsm) {
2681 assert(!Operands.empty() && "Unexpect empty operand list!");
2682 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2683 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2684 SMRange EmptyRange = None;
2686 // First, handle aliases that expand to multiple instructions.
2687 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2689 bool WasOriginallyInvalidOperand = false;
2692 // First, try a direct match.
2693 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2694 isParsingIntelSyntax())) {
2695 default: llvm_unreachable("Unexpected match result!");
2697 // Some instructions need post-processing to, for example, tweak which
2698 // encoding is selected. Loop on it while changes happen so the
2699 // individual transformations can chain off each other.
2700 if (!MatchingInlineAsm)
2701 while (processInstruction(Inst, Operands))
2705 if (!MatchingInlineAsm)
2706 EmitInstruction(Inst, Operands, Out);
2707 Opcode = Inst.getOpcode();
2709 case Match_MissingFeature:
2710 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2711 case Match_InvalidOperand:
2712 WasOriginallyInvalidOperand = true;
2714 case Match_MnemonicFail:
2718 // FIXME: Ideally, we would only attempt suffix matches for things which are
2719 // valid prefixes, and we could just infer the right unambiguous
2720 // type. However, that requires substantially more matcher support than the
2723 // Change the operand to point to a temporary token.
2724 StringRef Base = Op.getToken();
2725 SmallString<16> Tmp;
2728 Op.setTokenValue(Tmp);
2730 // If this instruction starts with an 'f', then it is a floating point stack
2731 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2732 // 80-bit floating point, which use the suffixes s,l,t respectively.
2734 // Otherwise, we assume that this may be an integer instruction, which comes
2735 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2736 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2738 // Check for the various suffix matches.
2739 uint64_t ErrorInfoIgnore;
2740 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2743 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2744 Tmp.back() = Suffixes[I];
2745 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2746 MatchingInlineAsm, isParsingIntelSyntax());
2747 // If this returned as a missing feature failure, remember that.
2748 if (Match[I] == Match_MissingFeature)
2749 ErrorInfoMissingFeature = ErrorInfoIgnore;
2752 // Restore the old token.
2753 Op.setTokenValue(Base);
2755 // If exactly one matched, then we treat that as a successful match (and the
2756 // instruction will already have been filled in correctly, since the failing
2757 // matches won't have modified it).
2758 unsigned NumSuccessfulMatches =
2759 std::count(std::begin(Match), std::end(Match), Match_Success);
2760 if (NumSuccessfulMatches == 1) {
2762 if (!MatchingInlineAsm)
2763 EmitInstruction(Inst, Operands, Out);
2764 Opcode = Inst.getOpcode();
2768 // Otherwise, the match failed, try to produce a decent error message.
2770 // If we had multiple suffix matches, then identify this as an ambiguous
2772 if (NumSuccessfulMatches > 1) {
2774 unsigned NumMatches = 0;
2775 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2776 if (Match[I] == Match_Success)
2777 MatchChars[NumMatches++] = Suffixes[I];
2779 SmallString<126> Msg;
2780 raw_svector_ostream OS(Msg);
2781 OS << "ambiguous instructions require an explicit suffix (could be ";
2782 for (unsigned i = 0; i != NumMatches; ++i) {
2785 if (i + 1 == NumMatches)
2787 OS << "'" << Base << MatchChars[i] << "'";
2790 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
2794 // Okay, we know that none of the variants matched successfully.
2796 // If all of the instructions reported an invalid mnemonic, then the original
2797 // mnemonic was invalid.
2798 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2799 if (!WasOriginallyInvalidOperand) {
2800 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2801 Op.getLocRange(), MatchingInlineAsm);
2804 // Recover location info for the operand if we know which was the problem.
2805 if (ErrorInfo != ~0ULL) {
2806 if (ErrorInfo >= Operands.size())
2807 return Error(IDLoc, "too few operands for instruction", EmptyRange,
2810 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2811 if (Operand.getStartLoc().isValid()) {
2812 SMRange OperandRange = Operand.getLocRange();
2813 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2814 OperandRange, MatchingInlineAsm);
2818 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2822 // If one instruction matched with a missing feature, report this as a
2824 if (std::count(std::begin(Match), std::end(Match),
2825 Match_MissingFeature) == 1) {
2826 ErrorInfo = ErrorInfoMissingFeature;
2827 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2831 // If one instruction matched with an invalid operand, report this as an
2833 if (std::count(std::begin(Match), std::end(Match),
2834 Match_InvalidOperand) == 1) {
2835 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2839 // If all of these were an outright failure, report it in a useless way.
2840 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2841 EmptyRange, MatchingInlineAsm);
2845 unsigned X86AsmParser::AdjustAVX512Mem(unsigned Size,
2846 X86Operand* UnsizedMemOpNext) {
2847 // Check for the existence of an AVX512 platform
2848 if (!getSTI().getFeatureBits()[X86::FeatureAVX512])
2850 // Allow adjusting upon a (x|y|z)mm
2851 if (Size == 512 || Size == 256 || Size == 128)
2853 // This is an allegadly broadcasting mem op adjustment,
2854 // allow some more inquiring to validate it
2855 if (Size == 64 || Size == 32)
2856 return UnsizedMemOpNext && UnsizedMemOpNext->isToken() &&
2857 UnsizedMemOpNext->getToken().substr(0, 4).equals("{1to") ? Size : 0;
2858 // Do not allow any other type of adjustments
2862 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2863 OperandVector &Operands,
2865 uint64_t &ErrorInfo,
2866 bool MatchingInlineAsm) {
2867 assert(!Operands.empty() && "Unexpect empty operand list!");
2868 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2869 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2870 StringRef Mnemonic = Op.getToken();
2871 SMRange EmptyRange = None;
2872 StringRef Base = Op.getToken();
2874 // First, handle aliases that expand to multiple instructions.
2875 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2879 // Find one unsized memory operand, if present.
2880 X86Operand *UnsizedMemOp = nullptr;
2881 // If unsized memory operand was found - obtain following operand.
2882 // For use in AdjustAVX512Mem
2883 X86Operand *UnsizedMemOpNext = nullptr;
2884 for (const auto &Op : Operands) {
2885 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2887 UnsizedMemOpNext = X86Op;
2888 // Have we found an unqualified memory operand,
2889 // break. IA allows only one memory operand.
2892 if (X86Op->isMemUnsized())
2893 UnsizedMemOp = X86Op;
2896 // Allow some instructions to have implicitly pointer-sized operands. This is
2897 // compatible with gas.
2899 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2900 for (const char *Instr : PtrSizedInstrs) {
2901 if (Mnemonic == Instr) {
2902 UnsizedMemOp->Mem.Size = getPointerWidth();
2908 SmallVector<unsigned, 8> Match;
2909 uint64_t ErrorInfoMissingFeature = 0;
2911 // If unsized push has immediate operand we should default the default pointer
2912 // size for the size.
2913 if (Mnemonic == "push" && Operands.size() == 2) {
2914 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
2915 if (X86Op->isImm()) {
2916 // If it's not a constant fall through and let remainder take care of it.
2917 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
2918 unsigned Size = getPointerWidth();
2920 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
2921 SmallString<16> Tmp;
2923 Tmp += (is64BitMode())
2925 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
2926 Op.setTokenValue(Tmp);
2927 // Do match in ATT mode to allow explicit suffix usage.
2928 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
2930 false /*isParsingIntelSyntax()*/));
2931 Op.setTokenValue(Base);
2936 // If an unsized memory operand is present, try to match with each memory
2937 // operand size. In Intel assembly, the size is not part of the instruction
2939 unsigned MatchedSize = 0;
2940 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2941 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2942 for (unsigned Size : MopSizes) {
2943 UnsizedMemOp->Mem.Size = Size;
2944 uint64_t ErrorInfoIgnore;
2945 unsigned LastOpcode = Inst.getOpcode();
2946 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2947 MatchingInlineAsm, isParsingIntelSyntax());
2948 if (Match.empty() || LastOpcode != Inst.getOpcode())
2951 // If this returned as a missing feature failure, remember that.
2952 if (Match.back() == Match_MissingFeature)
2953 ErrorInfoMissingFeature = ErrorInfoIgnore;
2954 if (M == Match_Success)
2955 // MS-compatability:
2956 // Adjust AVX512 vector/broadcast memory operand,
2957 // when facing the absence of a size qualifier.
2958 // Match GCC behavior on respective cases.
2959 MatchedSize = AdjustAVX512Mem(Size, UnsizedMemOpNext);
2962 // Restore the size of the unsized memory operand if we modified it.
2964 UnsizedMemOp->Mem.Size = 0;
2967 // If we haven't matched anything yet, this is not a basic integer or FPU
2968 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2969 // matching with the unsized operand.
2970 if (Match.empty()) {
2971 Match.push_back(MatchInstruction(
2972 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
2973 // If this returned as a missing feature failure, remember that.
2974 if (Match.back() == Match_MissingFeature)
2975 ErrorInfoMissingFeature = ErrorInfo;
2978 // Restore the size of the unsized memory operand if we modified it.
2980 UnsizedMemOp->Mem.Size = 0;
2982 // If it's a bad mnemonic, all results will be the same.
2983 if (Match.back() == Match_MnemonicFail) {
2984 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2985 Op.getLocRange(), MatchingInlineAsm);
2988 // If exactly one matched, then we treat that as a successful match (and the
2989 // instruction will already have been filled in correctly, since the failing
2990 // matches won't have modified it).
2991 unsigned NumSuccessfulMatches =
2992 std::count(std::begin(Match), std::end(Match), Match_Success);
2993 if (NumSuccessfulMatches == 1) {
2994 if (MatchedSize && isParsingInlineAsm() && isParsingIntelSyntax())
2995 // MS compatibility -
2996 // Fix the rewrite according to the matched memory size
2997 // MS inline assembly only
2998 for (AsmRewrite &AR : *InstInfo->AsmRewrites)
2999 if ((AR.Loc.getPointer() == UnsizedMemOp->StartLoc.getPointer()) &&
3000 (AR.Kind == AOK_SizeDirective))
3001 AR.Val = MatchedSize;
3002 // Some instructions need post-processing to, for example, tweak which
3003 // encoding is selected. Loop on it while changes happen so the individual
3004 // transformations can chain off each other.
3005 if (!MatchingInlineAsm)
3006 while (processInstruction(Inst, Operands))
3009 if (!MatchingInlineAsm)
3010 EmitInstruction(Inst, Operands, Out);
3011 Opcode = Inst.getOpcode();
3013 } else if (NumSuccessfulMatches > 1) {
3014 assert(UnsizedMemOp &&
3015 "multiple matches only possible with unsized memory operands");
3016 return Error(UnsizedMemOp->getStartLoc(),
3017 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3018 UnsizedMemOp->getLocRange(), MatchingInlineAsm);
3021 // If one instruction matched with a missing feature, report this as a
3023 if (std::count(std::begin(Match), std::end(Match),
3024 Match_MissingFeature) == 1) {
3025 ErrorInfo = ErrorInfoMissingFeature;
3026 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3030 // If one instruction matched with an invalid operand, report this as an
3032 if (std::count(std::begin(Match), std::end(Match),
3033 Match_InvalidOperand) == 1) {
3034 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3038 // If all of these were an outright failure, report it in a useless way.
3039 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3043 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3044 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3047 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3048 MCAsmParser &Parser = getParser();
3049 StringRef IDVal = DirectiveID.getIdentifier();
3050 if (IDVal == ".word")
3051 return ParseDirectiveWord(2, DirectiveID.getLoc());
3052 else if (IDVal.startswith(".code"))
3053 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3054 else if (IDVal.startswith(".att_syntax")) {
3055 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3056 if (Parser.getTok().getString() == "prefix")
3058 else if (Parser.getTok().getString() == "noprefix")
3059 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3060 "supported: registers must have a "
3061 "'%' prefix in .att_syntax");
3063 getParser().setAssemblerDialect(0);
3065 } else if (IDVal.startswith(".intel_syntax")) {
3066 getParser().setAssemblerDialect(1);
3067 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3068 if (Parser.getTok().getString() == "noprefix")
3070 else if (Parser.getTok().getString() == "prefix")
3071 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3072 "supported: registers must not have "
3073 "a '%' prefix in .intel_syntax");
3076 } else if (IDVal == ".even")
3077 return parseDirectiveEven(DirectiveID.getLoc());
3081 /// parseDirectiveEven
3083 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3084 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3085 TokError("unexpected token in directive");
3088 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3090 getStreamer().InitSections(false);
3091 Section = getStreamer().getCurrentSectionOnly();
3093 if (Section->UseCodeAlign())
3094 getStreamer().EmitCodeAlignment(2, 0);
3096 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3099 /// ParseDirectiveWord
3100 /// ::= .word [ expression (, expression)* ]
3101 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
3102 MCAsmParser &Parser = getParser();
3103 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3105 const MCExpr *Value;
3106 SMLoc ExprLoc = getLexer().getLoc();
3107 if (getParser().parseExpression(Value))
3110 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
3111 assert(Size <= 8 && "Invalid size");
3112 uint64_t IntValue = MCE->getValue();
3113 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3114 return Error(ExprLoc, "literal value out of range for directive");
3115 getStreamer().EmitIntValue(IntValue, Size);
3117 getStreamer().EmitValue(Value, Size, ExprLoc);
3120 if (getLexer().is(AsmToken::EndOfStatement))
3123 // FIXME: Improve diagnostic.
3124 if (getLexer().isNot(AsmToken::Comma)) {
3125 Error(L, "unexpected token in directive");
3136 /// ParseDirectiveCode
3137 /// ::= .code16 | .code32 | .code64
3138 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3139 MCAsmParser &Parser = getParser();
3141 if (IDVal == ".code16") {
3143 if (!is16BitMode()) {
3144 SwitchMode(X86::Mode16Bit);
3145 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3147 } else if (IDVal == ".code16gcc") {
3148 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3151 if (!is16BitMode()) {
3152 SwitchMode(X86::Mode16Bit);
3153 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3155 } else if (IDVal == ".code32") {
3157 if (!is32BitMode()) {
3158 SwitchMode(X86::Mode32Bit);
3159 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3161 } else if (IDVal == ".code64") {
3163 if (!is64BitMode()) {
3164 SwitchMode(X86::Mode64Bit);
3165 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3168 Error(L, "unknown directive " + IDVal);
3175 // Force static initialization.
3176 extern "C" void LLVMInitializeX86AsmParser() {
3177 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3178 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3181 #define GET_REGISTER_MATCHER
3182 #define GET_MATCHER_IMPLEMENTATION
3183 #define GET_SUBTARGET_FEATURE_NAME
3184 #include "X86GenAsmMatcher.inc"