1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
58 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
73 uint64_t &ErrorInfo, bool matchingInlineAsm,
74 unsigned VariantID = 0) {
75 // In Code16GCC mode, match as 32-bit.
77 SwitchMode(X86::Mode32Bit);
78 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
79 matchingInlineAsm, VariantID);
81 SwitchMode(X86::Mode16Bit);
85 enum InfixCalculatorTok {
101 enum IntelOperatorKind {
109 class InfixCalculator {
110 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
111 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
112 SmallVector<ICToken, 4> PostfixStack;
115 int64_t popOperand() {
116 assert (!PostfixStack.empty() && "Poped an empty stack!");
117 ICToken Op = PostfixStack.pop_back_val();
118 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
119 && "Expected and immediate or register!");
122 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
123 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
124 "Unexpected operand!");
125 PostfixStack.push_back(std::make_pair(Op, Val));
128 void popOperator() { InfixOperatorStack.pop_back(); }
129 void pushOperator(InfixCalculatorTok Op) {
130 // Push the new operator if the stack is empty.
131 if (InfixOperatorStack.empty()) {
132 InfixOperatorStack.push_back(Op);
136 // Push the new operator if it has a higher precedence than the operator
137 // on the top of the stack or the operator on the top of the stack is a
139 unsigned Idx = InfixOperatorStack.size() - 1;
140 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
141 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
142 InfixOperatorStack.push_back(Op);
146 // The operator on the top of the stack has higher precedence than the
148 unsigned ParenCount = 0;
150 // Nothing to process.
151 if (InfixOperatorStack.empty())
154 Idx = InfixOperatorStack.size() - 1;
155 StackOp = InfixOperatorStack[Idx];
156 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
159 // If we have an even parentheses count and we see a left parentheses,
160 // then stop processing.
161 if (!ParenCount && StackOp == IC_LPAREN)
164 if (StackOp == IC_RPAREN) {
166 InfixOperatorStack.pop_back();
167 } else if (StackOp == IC_LPAREN) {
169 InfixOperatorStack.pop_back();
171 InfixOperatorStack.pop_back();
172 PostfixStack.push_back(std::make_pair(StackOp, 0));
175 // Push the new operator.
176 InfixOperatorStack.push_back(Op);
180 // Push any remaining operators onto the postfix stack.
181 while (!InfixOperatorStack.empty()) {
182 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
183 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
184 PostfixStack.push_back(std::make_pair(StackOp, 0));
187 if (PostfixStack.empty())
190 SmallVector<ICToken, 16> OperandStack;
191 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
192 ICToken Op = PostfixStack[i];
193 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
194 OperandStack.push_back(Op);
196 assert (OperandStack.size() > 1 && "Too few operands.");
198 ICToken Op2 = OperandStack.pop_back_val();
199 ICToken Op1 = OperandStack.pop_back_val();
202 report_fatal_error("Unexpected operator!");
205 Val = Op1.second + Op2.second;
206 OperandStack.push_back(std::make_pair(IC_IMM, Val));
209 Val = Op1.second - Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Multiply operation with an immediate and a register!");
215 Val = Op1.second * Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Divide operation with an immediate and a register!");
221 assert (Op2.second != 0 && "Division by zero!");
222 Val = Op1.second / Op2.second;
223 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
227 "Or operation with an immediate and a register!");
228 Val = Op1.second | Op2.second;
229 OperandStack.push_back(std::make_pair(IC_IMM, Val));
232 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
233 "Xor operation with an immediate and a register!");
234 Val = Op1.second ^ Op2.second;
235 OperandStack.push_back(std::make_pair(IC_IMM, Val));
238 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
239 "And operation with an immediate and a register!");
240 Val = Op1.second & Op2.second;
241 OperandStack.push_back(std::make_pair(IC_IMM, Val));
244 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
245 "Left shift operation with an immediate and a register!");
246 Val = Op1.second << Op2.second;
247 OperandStack.push_back(std::make_pair(IC_IMM, Val));
250 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
251 "Right shift operation with an immediate and a register!");
252 Val = Op1.second >> Op2.second;
253 OperandStack.push_back(std::make_pair(IC_IMM, Val));
258 assert (OperandStack.size() == 1 && "Expected a single result.");
259 return OperandStack.pop_back_val().second;
263 enum IntelExprState {
284 class IntelExprStateMachine {
285 IntelExprState State, PrevState;
286 unsigned BaseReg, IndexReg, TmpReg, Scale;
290 bool StopOnLBrac, AddImmPrefix;
292 InlineAsmIdentifierInfo Info;
295 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
296 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
297 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
298 AddImmPrefix(addimmprefix) { Info.clear(); }
300 unsigned getBaseReg() { return BaseReg; }
301 unsigned getIndexReg() { return IndexReg; }
302 unsigned getScale() { return Scale; }
303 const MCExpr *getSym() { return Sym; }
304 StringRef getSymName() { return SymName; }
305 int64_t getImm() { return Imm + IC.execute(); }
306 bool isValidEndState() {
307 return State == IES_RBRAC || State == IES_INTEGER;
309 bool getStopOnLBrac() { return StopOnLBrac; }
310 bool getAddImmPrefix() { return AddImmPrefix; }
311 bool hadError() { return State == IES_ERROR; }
313 InlineAsmIdentifierInfo &getIdentifierInfo() {
318 IntelExprState CurrState = State;
327 IC.pushOperator(IC_OR);
330 PrevState = CurrState;
333 IntelExprState CurrState = State;
342 IC.pushOperator(IC_XOR);
345 PrevState = CurrState;
348 IntelExprState CurrState = State;
357 IC.pushOperator(IC_AND);
360 PrevState = CurrState;
363 IntelExprState CurrState = State;
372 IC.pushOperator(IC_LSHIFT);
375 PrevState = CurrState;
378 IntelExprState CurrState = State;
387 IC.pushOperator(IC_RSHIFT);
390 PrevState = CurrState;
393 IntelExprState CurrState = State;
402 IC.pushOperator(IC_PLUS);
403 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
404 // If we already have a BaseReg, then assume this is the IndexReg with
409 assert (!IndexReg && "BaseReg/IndexReg already set!");
416 PrevState = CurrState;
419 IntelExprState CurrState = State;
435 // Only push the minus operator if it is not a unary operator.
436 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
437 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
438 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
439 IC.pushOperator(IC_MINUS);
440 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
441 // If we already have a BaseReg, then assume this is the IndexReg with
446 assert (!IndexReg && "BaseReg/IndexReg already set!");
453 PrevState = CurrState;
456 IntelExprState CurrState = State;
466 PrevState = CurrState;
468 void onRegister(unsigned Reg) {
469 IntelExprState CurrState = State;
476 State = IES_REGISTER;
478 IC.pushOperand(IC_REGISTER);
481 // Index Register - Scale * Register
482 if (PrevState == IES_INTEGER) {
483 assert (!IndexReg && "IndexReg already set!");
484 State = IES_REGISTER;
486 // Get the scale and replace the 'Scale * Register' with '0'.
487 Scale = IC.popOperand();
488 IC.pushOperand(IC_IMM);
495 PrevState = CurrState;
497 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
508 SymName = SymRefName;
509 IC.pushOperand(IC_IMM);
513 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
514 IntelExprState CurrState = State;
531 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
532 // Index Register - Register * Scale
533 assert (!IndexReg && "IndexReg already set!");
536 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
537 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
540 // Get the scale and replace the 'Register * Scale' with '0'.
542 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
543 PrevState == IES_OR || PrevState == IES_AND ||
544 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
545 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
546 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
547 PrevState == IES_NOT || PrevState == IES_XOR) &&
548 CurrState == IES_MINUS) {
549 // Unary minus. No need to pop the minus operand because it was never
551 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
552 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
553 PrevState == IES_OR || PrevState == IES_AND ||
554 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
555 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
556 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
557 PrevState == IES_NOT || PrevState == IES_XOR) &&
558 CurrState == IES_NOT) {
559 // Unary not. No need to pop the not operand because it was never
561 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
563 IC.pushOperand(IC_IMM, TmpInt);
567 PrevState = CurrState;
579 State = IES_MULTIPLY;
580 IC.pushOperator(IC_MULTIPLY);
593 IC.pushOperator(IC_DIVIDE);
605 IC.pushOperator(IC_PLUS);
610 IntelExprState CurrState = State;
619 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
620 // If we already have a BaseReg, then assume this is the IndexReg with
625 assert (!IndexReg && "BaseReg/IndexReg already set!");
632 PrevState = CurrState;
635 IntelExprState CurrState = State;
651 // FIXME: We don't handle this type of unary minus or not, yet.
652 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
653 PrevState == IES_OR || PrevState == IES_AND ||
654 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
655 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
656 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
657 PrevState == IES_NOT || PrevState == IES_XOR) &&
658 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
663 IC.pushOperator(IC_LPAREN);
666 PrevState = CurrState;
678 IC.pushOperator(IC_RPAREN);
684 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
685 bool MatchingInlineAsm = false) {
686 MCAsmParser &Parser = getParser();
687 if (MatchingInlineAsm) {
688 if (!getLexer().isAtStartOfStatement())
689 Parser.eatToEndOfStatement();
692 return Parser.Error(L, Msg, Range);
695 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
700 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
701 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
702 bool IsSIReg(unsigned Reg);
703 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
705 AddDefaultSrcDestOperands(OperandVector &Operands,
706 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
707 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
708 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
709 OperandVector &FinalOperands);
710 std::unique_ptr<X86Operand> ParseOperand();
711 std::unique_ptr<X86Operand> ParseATTOperand();
712 std::unique_ptr<X86Operand> ParseIntelOperand();
713 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
714 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
715 unsigned IdentifyIntelOperator(StringRef Name);
716 unsigned ParseIntelOperator(unsigned OpKind);
717 std::unique_ptr<X86Operand>
718 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
719 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
720 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
721 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
722 std::unique_ptr<X86Operand>
723 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
724 bool isSymbol, unsigned Size);
725 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
726 InlineAsmIdentifierInfo &Info,
727 bool IsUnevaluatedOperand, SMLoc &End);
729 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
731 std::unique_ptr<X86Operand>
732 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
733 unsigned IndexReg, unsigned Scale, SMLoc Start,
734 SMLoc End, unsigned Size, StringRef Identifier,
735 InlineAsmIdentifierInfo &Info,
736 bool AllowBetterSizeMatch = false);
738 bool parseDirectiveEven(SMLoc L);
739 bool ParseDirectiveWord(unsigned Size, SMLoc L);
740 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
742 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
744 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
745 /// instrumentation around Inst.
746 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
748 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
749 OperandVector &Operands, MCStreamer &Out,
751 bool MatchingInlineAsm) override;
753 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
754 MCStreamer &Out, bool MatchingInlineAsm);
756 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
757 bool MatchingInlineAsm);
759 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
760 OperandVector &Operands, MCStreamer &Out,
762 bool MatchingInlineAsm);
764 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
765 OperandVector &Operands, MCStreamer &Out,
767 bool MatchingInlineAsm);
769 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
771 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
772 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
773 /// return false if no parsing errors occurred, true otherwise.
774 bool HandleAVX512Operand(OperandVector &Operands,
775 const MCParsedAsmOperand &Op);
777 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
779 bool is64BitMode() const {
780 // FIXME: Can tablegen auto-generate this?
781 return getSTI().getFeatureBits()[X86::Mode64Bit];
783 bool is32BitMode() const {
784 // FIXME: Can tablegen auto-generate this?
785 return getSTI().getFeatureBits()[X86::Mode32Bit];
787 bool is16BitMode() const {
788 // FIXME: Can tablegen auto-generate this?
789 return getSTI().getFeatureBits()[X86::Mode16Bit];
791 void SwitchMode(unsigned mode) {
792 MCSubtargetInfo &STI = copySTI();
793 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
794 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
795 unsigned FB = ComputeAvailableFeatures(
796 STI.ToggleFeature(OldMode.flip(mode)));
797 setAvailableFeatures(FB);
799 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
802 unsigned getPointerWidth() {
803 if (is16BitMode()) return 16;
804 if (is32BitMode()) return 32;
805 if (is64BitMode()) return 64;
806 llvm_unreachable("invalid mode");
809 bool isParsingIntelSyntax() {
810 return getParser().getAssemblerDialect();
813 /// @name Auto-generated Matcher Functions
816 #define GET_ASSEMBLER_HEADER
817 #include "X86GenAsmMatcher.inc"
823 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
824 const MCInstrInfo &mii, const MCTargetOptions &Options)
825 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
828 // Initialize the set of available features.
829 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
830 Instrumentation.reset(
831 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
834 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
836 void SetFrameRegister(unsigned RegNo) override;
838 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
839 SMLoc NameLoc, OperandVector &Operands) override;
841 bool ParseDirective(AsmToken DirectiveID) override;
843 } // end anonymous namespace
845 /// @name Auto-generated Match Functions
848 static unsigned MatchRegisterName(StringRef Name);
852 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
854 // If we have both a base register and an index register make sure they are
855 // both 64-bit or 32-bit registers.
856 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
858 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
859 ErrMsg = "invalid base+index expression";
862 if (BaseReg != 0 && IndexReg != 0) {
863 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
864 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
865 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
866 IndexReg != X86::RIZ) {
867 ErrMsg = "base register is 64-bit, but index register is not";
870 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
871 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
872 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
873 IndexReg != X86::EIZ){
874 ErrMsg = "base register is 32-bit, but index register is not";
877 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
878 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
879 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
880 ErrMsg = "base register is 16-bit, but index register is not";
883 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
884 IndexReg != X86::SI && IndexReg != X86::DI) ||
885 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
886 IndexReg != X86::BX && IndexReg != X86::BP)) {
887 ErrMsg = "invalid 16-bit base/index register combination";
895 bool X86AsmParser::ParseRegister(unsigned &RegNo,
896 SMLoc &StartLoc, SMLoc &EndLoc) {
897 MCAsmParser &Parser = getParser();
899 const AsmToken &PercentTok = Parser.getTok();
900 StartLoc = PercentTok.getLoc();
902 // If we encounter a %, ignore it. This code handles registers with and
903 // without the prefix, unprefixed registers can occur in cfi directives.
904 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
905 Parser.Lex(); // Eat percent token.
907 const AsmToken &Tok = Parser.getTok();
908 EndLoc = Tok.getEndLoc();
910 if (Tok.isNot(AsmToken::Identifier)) {
911 if (isParsingIntelSyntax()) return true;
912 return Error(StartLoc, "invalid register name",
913 SMRange(StartLoc, EndLoc));
916 RegNo = MatchRegisterName(Tok.getString());
918 // If the match failed, try the register name as lowercase.
920 RegNo = MatchRegisterName(Tok.getString().lower());
922 // The "flags" register cannot be referenced directly.
923 // Treat it as an identifier instead.
924 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
927 if (!is64BitMode()) {
928 // FIXME: This should be done using Requires<Not64BitMode> and
929 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
931 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
933 if (RegNo == X86::RIZ ||
934 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
935 X86II::isX86_64NonExtLowByteReg(RegNo) ||
936 X86II::isX86_64ExtendedReg(RegNo))
937 return Error(StartLoc, "register %"
938 + Tok.getString() + " is only available in 64-bit mode",
939 SMRange(StartLoc, EndLoc));
940 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
941 if (X86II::is32ExtendedReg(RegNo))
942 return Error(StartLoc, "register %"
943 + Tok.getString() + " is only available with AVX512",
944 SMRange(StartLoc, EndLoc));
947 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
948 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
950 Parser.Lex(); // Eat 'st'
952 // Check to see if we have '(4)' after %st.
953 if (getLexer().isNot(AsmToken::LParen))
958 const AsmToken &IntTok = Parser.getTok();
959 if (IntTok.isNot(AsmToken::Integer))
960 return Error(IntTok.getLoc(), "expected stack index");
961 switch (IntTok.getIntVal()) {
962 case 0: RegNo = X86::ST0; break;
963 case 1: RegNo = X86::ST1; break;
964 case 2: RegNo = X86::ST2; break;
965 case 3: RegNo = X86::ST3; break;
966 case 4: RegNo = X86::ST4; break;
967 case 5: RegNo = X86::ST5; break;
968 case 6: RegNo = X86::ST6; break;
969 case 7: RegNo = X86::ST7; break;
970 default: return Error(IntTok.getLoc(), "invalid stack index");
973 if (getParser().Lex().isNot(AsmToken::RParen))
974 return Error(Parser.getTok().getLoc(), "expected ')'");
976 EndLoc = Parser.getTok().getEndLoc();
977 Parser.Lex(); // Eat ')'
981 EndLoc = Parser.getTok().getEndLoc();
983 // If this is "db[0-7]", match it as an alias
985 if (RegNo == 0 && Tok.getString().size() == 3 &&
986 Tok.getString().startswith("db")) {
987 switch (Tok.getString()[2]) {
988 case '0': RegNo = X86::DR0; break;
989 case '1': RegNo = X86::DR1; break;
990 case '2': RegNo = X86::DR2; break;
991 case '3': RegNo = X86::DR3; break;
992 case '4': RegNo = X86::DR4; break;
993 case '5': RegNo = X86::DR5; break;
994 case '6': RegNo = X86::DR6; break;
995 case '7': RegNo = X86::DR7; break;
999 EndLoc = Parser.getTok().getEndLoc();
1000 Parser.Lex(); // Eat it.
1006 if (isParsingIntelSyntax()) return true;
1007 return Error(StartLoc, "invalid register name",
1008 SMRange(StartLoc, EndLoc));
1011 Parser.Lex(); // Eat identifier token.
1015 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1016 Instrumentation->SetInitialFrameRegister(RegNo);
1019 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1020 bool Parse32 = is32BitMode() || Code16GCC;
1021 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1022 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1023 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1024 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1028 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1029 bool Parse32 = is32BitMode() || Code16GCC;
1030 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1031 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1032 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1033 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1037 bool X86AsmParser::IsSIReg(unsigned Reg) {
1039 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1051 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1053 switch (RegClassID) {
1054 default: llvm_unreachable("Unexpected register class");
1055 case X86::GR64RegClassID:
1056 return IsSIReg ? X86::RSI : X86::RDI;
1057 case X86::GR32RegClassID:
1058 return IsSIReg ? X86::ESI : X86::EDI;
1059 case X86::GR16RegClassID:
1060 return IsSIReg ? X86::SI : X86::DI;
1064 void X86AsmParser::AddDefaultSrcDestOperands(
1065 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1066 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1067 if (isParsingIntelSyntax()) {
1068 Operands.push_back(std::move(Dst));
1069 Operands.push_back(std::move(Src));
1072 Operands.push_back(std::move(Src));
1073 Operands.push_back(std::move(Dst));
1077 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1078 OperandVector &FinalOperands) {
1080 if (OrigOperands.size() > 1) {
1081 // Check if sizes match, OrigOperands also contains the instruction name
1082 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1083 "Operand size mismatch");
1085 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1086 // Verify types match
1087 int RegClassID = -1;
1088 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1089 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1090 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1092 if (FinalOp.isReg() &&
1093 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1094 // Return false and let a normal complaint about bogus operands happen
1097 if (FinalOp.isMem()) {
1099 if (!OrigOp.isMem())
1100 // Return false and let a normal complaint about bogus operands happen
1103 unsigned OrigReg = OrigOp.Mem.BaseReg;
1104 unsigned FinalReg = FinalOp.Mem.BaseReg;
1106 // If we've already encounterd a register class, make sure all register
1107 // bases are of the same register class
1108 if (RegClassID != -1 &&
1109 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1110 return Error(OrigOp.getStartLoc(),
1111 "mismatching source and destination index registers");
1114 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1115 RegClassID = X86::GR64RegClassID;
1116 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1117 RegClassID = X86::GR32RegClassID;
1118 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1119 RegClassID = X86::GR16RegClassID;
1121 // Unexpected register class type
1122 // Return false and let a normal complaint about bogus operands happen
1125 bool IsSI = IsSIReg(FinalReg);
1126 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1128 if (FinalReg != OrigReg) {
1129 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1130 Warnings.push_back(std::make_pair(
1131 OrigOp.getStartLoc(),
1132 "memory operand is only for determining the size, " + RegName +
1133 " will be used for the location"));
1136 FinalOp.Mem.Size = OrigOp.Mem.Size;
1137 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1138 FinalOp.Mem.BaseReg = FinalReg;
1142 // Produce warnings only if all the operands passed the adjustment - prevent
1143 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1144 for (auto &WarningMsg : Warnings) {
1145 Warning(WarningMsg.first, WarningMsg.second);
1148 // Remove old operands
1149 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1150 OrigOperands.pop_back();
1152 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1153 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1154 OrigOperands.push_back(std::move(FinalOperands[i]));
1159 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1160 if (isParsingIntelSyntax())
1161 return ParseIntelOperand();
1162 return ParseATTOperand();
1165 /// getIntelMemOperandSize - Return intel memory operand size.
1166 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1167 unsigned Size = StringSwitch<unsigned>(OpStr)
1168 .Cases("BYTE", "byte", 8)
1169 .Cases("WORD", "word", 16)
1170 .Cases("DWORD", "dword", 32)
1171 .Cases("FWORD", "fword", 48)
1172 .Cases("QWORD", "qword", 64)
1173 .Cases("MMWORD","mmword", 64)
1174 .Cases("XWORD", "xword", 80)
1175 .Cases("TBYTE", "tbyte", 80)
1176 .Cases("XMMWORD", "xmmword", 128)
1177 .Cases("YMMWORD", "ymmword", 256)
1178 .Cases("ZMMWORD", "zmmword", 512)
1179 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1184 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1185 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1186 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1187 InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) {
1188 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1189 // some other label reference.
1190 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1191 // Insert an explicit size if the user didn't have one.
1193 Size = getPointerWidth();
1194 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1198 // Create an absolute memory reference in order to match against
1199 // instructions taking a PC relative operand.
1200 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1201 Identifier, Info.OpDecl);
1205 // We either have a direct symbol reference, or an offset from a symbol. The
1206 // parser always puts the symbol on the LHS, so look there for size
1207 // calculation purposes.
1208 unsigned FrontendSize = 0;
1209 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1211 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1212 if (IsSymRef && !Size && Info.Type)
1213 FrontendSize = Info.Type * 8; // Size is in terms of bits in this context.
1215 // When parsing inline assembly we set the base register to a non-zero value
1216 // if we don't know the actual value at this time. This is necessary to
1217 // get the matching correct in some cases.
1218 BaseReg = BaseReg ? BaseReg : 1;
1219 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1220 IndexReg, Scale, Start, End, Size, Identifier,
1221 Info.OpDecl, FrontendSize);
1225 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1226 StringRef SymName, int64_t ImmDisp,
1227 int64_t FinalImmDisp, SMLoc &BracLoc,
1228 SMLoc &StartInBrac, SMLoc &End) {
1229 // Remove the '[' and ']' from the IR string.
1230 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1231 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1233 // If ImmDisp is non-zero, then we parsed a displacement before the
1234 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1235 // If ImmDisp doesn't match the displacement computed by the state machine
1236 // then we have an additional displacement in the bracketed expression.
1237 if (ImmDisp != FinalImmDisp) {
1239 // We have an immediate displacement before the bracketed expression.
1240 // Adjust this to match the final immediate displacement.
1242 for (AsmRewrite &AR : AsmRewrites) {
1243 if (AR.Loc.getPointer() > BracLoc.getPointer())
1245 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1246 assert (!Found && "ImmDisp already rewritten.");
1248 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1249 AR.Val = FinalImmDisp;
1254 assert (Found && "Unable to rewrite ImmDisp.");
1257 // We have a symbolic and an immediate displacement, but no displacement
1258 // before the bracketed expression. Put the immediate displacement
1259 // before the bracketed expression.
1260 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1263 // Remove all the ImmPrefix rewrites within the brackets.
1264 // We may have some Imm rewrties as a result of an operator applying,
1265 // remove them as well
1266 for (AsmRewrite &AR : AsmRewrites) {
1267 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1269 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm)
1270 AR.Kind = AOK_Delete;
1272 const char *SymLocPtr = SymName.data();
1273 // Skip everything before the symbol.
1274 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1275 assert(Len > 0 && "Expected a non-negative length.");
1276 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1278 // Skip everything after the symbol.
1279 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1280 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1281 assert(Len > 0 && "Expected a non-negative length.");
1282 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1286 // Some binary bitwise operators have a named synonymous
1287 // Query a candidate string for being such a named operator
1288 // and if so - invoke the appropriate handler
1289 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1290 // A named operator should be either lower or upper case, but not a mix
1291 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1293 if (Name.equals_lower("not"))
1295 else if (Name.equals_lower("or"))
1297 else if (Name.equals_lower("shl"))
1299 else if (Name.equals_lower("shr"))
1301 else if (Name.equals_lower("xor"))
1303 else if (Name.equals_lower("and"))
1310 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1311 MCAsmParser &Parser = getParser();
1312 const AsmToken &Tok = Parser.getTok();
1314 AsmToken::TokenKind PrevTK = AsmToken::Error;
1317 bool UpdateLocLex = true;
1319 AsmToken::TokenKind TK = getLexer().getKind();
1320 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1321 // identifier. Don't try an parse it as a register.
1322 if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") &&
1323 TK != AsmToken::Identifier)
1326 // If we're parsing an immediate expression, we don't expect a '['.
1327 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1332 if (SM.isValidEndState()) {
1336 return Error(Tok.getLoc(), "unknown token in expression");
1338 case AsmToken::EndOfStatement: {
1342 case AsmToken::String:
1343 case AsmToken::Identifier: {
1344 // This could be a register or a symbolic displacement.
1347 SMLoc IdentLoc = Tok.getLoc();
1348 StringRef Identifier = Tok.getString();
1349 UpdateLocLex = false;
1350 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1351 SM.onRegister(TmpReg);
1352 } else if (ParseIntelNamedOperator(Identifier, SM)) {
1353 UpdateLocLex = true;
1354 } else if (!isParsingInlineAsm()) {
1355 if (getParser().parsePrimaryExpr(Val, End))
1356 return Error(Tok.getLoc(), "Unexpected identifier!");
1357 SM.onIdentifierExpr(Val, Identifier);
1358 } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) {
1359 if (OpKind == IOK_OFFSET)
1360 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1361 "a compound immediate expression is yet to be supported");
1362 int64_t Val = ParseIntelOperator(OpKind);
1366 if (SM.onInteger(Val, ErrMsg))
1367 return Error(IdentLoc, ErrMsg);
1368 } else if (Identifier.find('.') != StringRef::npos &&
1369 PrevTK == AsmToken::RBrac) {
1372 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1373 if (ParseIntelIdentifier(Val, Identifier, Info,
1374 /*Unevaluated=*/false, End))
1376 SM.onIdentifierExpr(Val, Identifier);
1380 case AsmToken::Integer: {
1382 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1383 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1384 // Look for 'b' or 'f' following an Integer as a directional label
1385 SMLoc Loc = getTok().getLoc();
1386 int64_t IntVal = getTok().getIntVal();
1387 End = consumeToken();
1388 UpdateLocLex = false;
1389 if (getLexer().getKind() == AsmToken::Identifier) {
1390 StringRef IDVal = getTok().getString();
1391 if (IDVal == "f" || IDVal == "b") {
1393 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1394 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1396 MCSymbolRefExpr::create(Sym, Variant, getContext());
1397 if (IDVal == "b" && Sym->isUndefined())
1398 return Error(Loc, "invalid reference to undefined symbol");
1399 StringRef Identifier = Sym->getName();
1400 SM.onIdentifierExpr(Val, Identifier);
1401 End = consumeToken();
1403 if (SM.onInteger(IntVal, ErrMsg))
1404 return Error(Loc, ErrMsg);
1407 if (SM.onInteger(IntVal, ErrMsg))
1408 return Error(Loc, ErrMsg);
1412 case AsmToken::Plus: SM.onPlus(); break;
1413 case AsmToken::Minus: SM.onMinus(); break;
1414 case AsmToken::Tilde: SM.onNot(); break;
1415 case AsmToken::Star: SM.onStar(); break;
1416 case AsmToken::Slash: SM.onDivide(); break;
1417 case AsmToken::Pipe: SM.onOr(); break;
1418 case AsmToken::Caret: SM.onXor(); break;
1419 case AsmToken::Amp: SM.onAnd(); break;
1420 case AsmToken::LessLess:
1421 SM.onLShift(); break;
1422 case AsmToken::GreaterGreater:
1423 SM.onRShift(); break;
1424 case AsmToken::LBrac: SM.onLBrac(); break;
1425 case AsmToken::RBrac: SM.onRBrac(); break;
1426 case AsmToken::LParen: SM.onLParen(); break;
1427 case AsmToken::RParen: SM.onRParen(); break;
1430 return Error(Tok.getLoc(), "unknown token in expression");
1432 if (!Done && UpdateLocLex)
1433 End = consumeToken();
1440 std::unique_ptr<X86Operand>
1441 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1442 int64_t ImmDisp, bool isSymbol,
1444 MCAsmParser &Parser = getParser();
1445 const AsmToken &Tok = Parser.getTok();
1446 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1447 if (getLexer().isNot(AsmToken::LBrac))
1448 return ErrorOperand(BracLoc, "Expected '[' token!");
1449 Parser.Lex(); // Eat '['
1451 SMLoc StartInBrac = Parser.getTok().getLoc();
1452 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1453 // may have already parsed an immediate displacement before the bracketed
1455 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1456 if (ParseIntelExpression(SM, End))
1459 const MCExpr *Disp = nullptr;
1460 if (const MCExpr *Sym = SM.getSym()) {
1461 // A symbolic displacement.
1463 if (isParsingInlineAsm())
1464 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1465 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1469 if (SM.getImm() || !Disp) {
1470 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1472 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1474 Disp = Imm; // An immediate displacement only.
1477 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1478 // will in fact do global lookup the field name inside all global typedefs,
1479 // but we don't emulate that.
1480 if ((Parser.getTok().getKind() == AsmToken::Identifier ||
1481 Parser.getTok().getKind() == AsmToken::Dot ||
1482 Parser.getTok().getKind() == AsmToken::Real) &&
1483 Parser.getTok().getString().find('.') != StringRef::npos) {
1484 const MCExpr *NewDisp;
1485 if (ParseIntelDotOperator(Disp, NewDisp))
1488 End = Tok.getEndLoc();
1489 Parser.Lex(); // Eat the field.
1495 Error(Start, "cannot use more than one symbol in memory operand");
1498 if (SM.getBaseReg()) {
1499 Error(Start, "cannot use base register with variable reference");
1502 if (SM.getIndexReg()) {
1503 Error(Start, "cannot use index register with variable reference");
1508 int BaseReg = SM.getBaseReg();
1509 int IndexReg = SM.getIndexReg();
1510 int Scale = SM.getScale();
1511 if (!isParsingInlineAsm()) {
1513 if (!BaseReg && !IndexReg) {
1515 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1516 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1520 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1521 Error(StartInBrac, ErrMsg);
1524 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1525 IndexReg, Scale, Start, End, Size);
1528 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1529 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1530 End, Size, SM.getSymName(), Info,
1531 isParsingInlineAsm());
1534 // Inline assembly may use variable names with namespace alias qualifiers.
1535 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1536 StringRef &Identifier,
1537 InlineAsmIdentifierInfo &Info,
1538 bool IsUnevaluatedOperand, SMLoc &End) {
1539 MCAsmParser &Parser = getParser();
1540 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1543 StringRef LineBuf(Identifier.data());
1545 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1547 const AsmToken &Tok = Parser.getTok();
1548 SMLoc Loc = Tok.getLoc();
1550 // Advance the token stream until the end of the current token is
1551 // after the end of what the frontend claimed.
1552 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1554 End = Tok.getEndLoc();
1556 } while (End.getPointer() < EndPtr);
1557 Identifier = LineBuf;
1559 // The frontend should end parsing on an assembler token boundary, unless it
1561 assert((End.getPointer() == EndPtr || !Result) &&
1562 "frontend claimed part of a token?");
1564 // If the identifier lookup was unsuccessful, assume that we are dealing with
1567 StringRef InternalName =
1568 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1570 assert(InternalName.size() && "We should have an internal name here.");
1571 // Push a rewrite for replacing the identifier name with the internal name.
1572 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1576 // Create the symbol reference.
1577 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1578 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1579 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1583 /// \brief Parse intel style segment override.
1584 std::unique_ptr<X86Operand>
1585 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1587 MCAsmParser &Parser = getParser();
1588 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1589 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1590 if (Tok.isNot(AsmToken::Colon))
1591 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1592 Parser.Lex(); // Eat ':'
1594 int64_t ImmDisp = 0;
1595 if (getLexer().is(AsmToken::Integer)) {
1596 ImmDisp = Tok.getIntVal();
1597 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1599 if (isParsingInlineAsm())
1600 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1602 if (getLexer().isNot(AsmToken::LBrac)) {
1603 // An immediate following a 'segment register', 'colon' token sequence can
1604 // be followed by a bracketed expression. If it isn't we know we have our
1605 // final segment override.
1606 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1607 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1608 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1609 Start, ImmDispToken.getEndLoc(), Size);
1613 if (getLexer().is(AsmToken::LBrac))
1614 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
1618 if (!isParsingInlineAsm()) {
1619 if (getParser().parsePrimaryExpr(Val, End))
1620 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1622 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1625 InlineAsmIdentifierInfo Info;
1626 StringRef Identifier = Tok.getString();
1627 if (ParseIntelIdentifier(Val, Identifier, Info,
1628 /*Unevaluated=*/false, End))
1630 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1631 /*Scale=*/1, Start, End, Size, Identifier, Info);
1634 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1635 std::unique_ptr<X86Operand>
1636 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1637 MCAsmParser &Parser = getParser();
1638 const AsmToken &Tok = Parser.getTok();
1639 // Eat "{" and mark the current place.
1640 const SMLoc consumedToken = consumeToken();
1641 if (Tok.getIdentifier().startswith("r")){
1642 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1643 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1644 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1645 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1646 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1649 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1650 Parser.Lex(); // Eat "r*" of r*-sae
1651 if (!getLexer().is(AsmToken::Minus))
1652 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1653 Parser.Lex(); // Eat "-"
1654 Parser.Lex(); // Eat the sae
1655 if (!getLexer().is(AsmToken::RCurly))
1656 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1657 Parser.Lex(); // Eat "}"
1658 const MCExpr *RndModeOp =
1659 MCConstantExpr::create(rndMode, Parser.getContext());
1660 return X86Operand::CreateImm(RndModeOp, Start, End);
1662 if(Tok.getIdentifier().equals("sae")){
1663 Parser.Lex(); // Eat the sae
1664 if (!getLexer().is(AsmToken::RCurly))
1665 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1666 Parser.Lex(); // Eat "}"
1667 return X86Operand::CreateToken("{sae}", consumedToken);
1669 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1672 /// Parse the '.' operator.
1673 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1674 const MCExpr *&NewDisp) {
1675 MCAsmParser &Parser = getParser();
1676 const AsmToken &Tok = Parser.getTok();
1677 int64_t OrigDispVal, DotDispVal;
1679 // FIXME: Handle non-constant expressions.
1680 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1681 OrigDispVal = OrigDisp->getValue();
1683 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1685 // Drop the optional '.'.
1686 StringRef DotDispStr = Tok.getString();
1687 if (DotDispStr.startswith("."))
1688 DotDispStr = DotDispStr.drop_front(1);
1690 // .Imm gets lexed as a real.
1691 if (Tok.is(AsmToken::Real)) {
1693 DotDispStr.getAsInteger(10, DotDisp);
1694 DotDispVal = DotDisp.getZExtValue();
1695 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1697 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1698 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1700 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1701 DotDispVal = DotDisp;
1703 return Error(Tok.getLoc(), "Unexpected token type!");
1705 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1706 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1707 unsigned Len = DotDispStr.size();
1708 unsigned Val = OrigDispVal + DotDispVal;
1709 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
1712 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1716 /// Parse the 'offset' operator. This operator is used to specify the
1717 /// location rather then the content of a variable.
1718 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1719 MCAsmParser &Parser = getParser();
1720 const AsmToken &Tok = Parser.getTok();
1721 SMLoc OffsetOfLoc = Tok.getLoc();
1722 Parser.Lex(); // Eat offset.
1725 InlineAsmIdentifierInfo Info;
1726 SMLoc Start = Tok.getLoc(), End;
1727 StringRef Identifier = Tok.getString();
1728 if (ParseIntelIdentifier(Val, Identifier, Info,
1729 /*Unevaluated=*/false, End))
1732 // Don't emit the offset operator.
1733 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1735 // The offset operator will have an 'r' constraint, thus we need to create
1736 // register operand to ensure proper matching. Just pick a GPR based on
1737 // the size of a pointer.
1738 bool Parse32 = is32BitMode() || Code16GCC;
1739 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1741 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1742 OffsetOfLoc, Identifier, Info.OpDecl);
1745 // Query a candidate string for being an Intel assembly operator
1746 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1747 unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) {
1748 return StringSwitch<unsigned>(Name)
1749 .Cases("TYPE","type",IOK_TYPE)
1750 .Cases("SIZE","size",IOK_SIZE)
1751 .Cases("LENGTH","length",IOK_LENGTH)
1752 .Cases("OFFSET","offset",IOK_OFFSET)
1753 .Default(IOK_INVALID);
1756 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1757 /// returns the number of elements in an array. It returns the value 1 for
1758 /// non-array variables. The SIZE operator returns the size of a C or C++
1759 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1760 /// TYPE operator returns the size of a C or C++ type or variable. If the
1761 /// variable is an array, TYPE returns the size of a single element.
1762 unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1763 MCAsmParser &Parser = getParser();
1764 const AsmToken &Tok = Parser.getTok();
1765 SMLoc TypeLoc = Tok.getLoc();
1766 Parser.Lex(); // Eat operator.
1768 const MCExpr *Val = nullptr;
1769 InlineAsmIdentifierInfo Info;
1770 SMLoc Start = Tok.getLoc(), End;
1771 StringRef Identifier = Tok.getString();
1772 if (ParseIntelIdentifier(Val, Identifier, Info,
1773 /*Unevaluated=*/true, End))
1777 Error(Start, "unable to lookup expression");
1783 default: llvm_unreachable("Unexpected operand kind!");
1784 case IOK_LENGTH: CVal = Info.Length; break;
1785 case IOK_SIZE: CVal = Info.Size; break;
1786 case IOK_TYPE: CVal = Info.Type; break;
1789 // Rewrite the type operator and the C or C++ type or variable in terms of an
1790 // immediate. E.g. TYPE foo -> $$4
1791 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1792 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1797 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1798 MCAsmParser &Parser = getParser();
1799 const AsmToken &Tok = Parser.getTok();
1802 // FIXME: Offset operator
1803 // Should be handled as part of immediate expression, as other operators
1804 // Currently, only supported as a stand-alone operand
1805 if (isParsingInlineAsm())
1806 if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET)
1807 return ParseIntelOffsetOfOperator();
1809 bool PtrInOperand = false;
1810 unsigned Size = getIntelMemOperandSize(Tok.getString());
1812 Parser.Lex(); // Eat operand size (e.g., byte, word).
1813 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1814 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1815 Parser.Lex(); // Eat ptr.
1816 PtrInOperand = true;
1819 Start = Tok.getLoc();
1821 // rounding mode token
1822 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1823 getLexer().is(AsmToken::LCurly))
1824 return ParseRoundingModeOp(Start, End);
1828 if (getLexer().is(AsmToken::Identifier) &&
1829 !ParseRegister(RegNo, Start, End)) {
1830 // If this is a segment register followed by a ':', then this is the start
1831 // of a segment override, otherwise this is a normal register reference.
1832 // In case it is a normal register and there is ptr in the operand this
1834 if (RegNo == X86::RIP)
1835 return ErrorOperand(Start, "rip can only be used as a base register");
1836 if (getLexer().isNot(AsmToken::Colon)) {
1838 return ErrorOperand(Start, "expected memory operand after "
1839 "'ptr', found register operand instead");
1841 return X86Operand::CreateReg(RegNo, Start, End);
1843 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1846 // Immediates and Memory
1848 // Parse [ BaseReg + Scale*IndexReg + Disp ].
1849 if (getLexer().is(AsmToken::LBrac))
1850 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1853 AsmToken StartTok = Tok;
1854 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1855 /*AddImmPrefix=*/false);
1856 if (ParseIntelExpression(SM, End))
1859 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1860 int64_t Imm = SM.getImm();
1861 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1862 SM.getSym()->evaluateAsAbsolute(Imm);
1864 if (StartTok.isNot(AsmToken::Identifier) &&
1865 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1866 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1867 if (StartTok.getString().size() == Len)
1868 // Just add a prefix if this wasn't a complex immediate expression.
1869 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1871 // Otherwise, rewrite the complex expression as a single immediate.
1872 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1875 if (getLexer().isNot(AsmToken::LBrac)) {
1876 // If a directional label (ie. 1f or 2b) was parsed above from
1877 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1878 // to the MCExpr with the directional local symbol and this is a
1879 // memory operand not an immediate operand.
1881 if (isParsingInlineAsm())
1882 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1884 /*Scale=*/1, Start, End, Size,
1885 SM.getSymName(), SM.getIdentifierInfo());
1886 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1890 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1891 return X86Operand::CreateImm(ImmExpr, Start, End);
1894 // Only positive immediates are valid.
1896 return ErrorOperand(Start, "expected a positive immediate displacement "
1897 "before bracketed expr.");
1899 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
1902 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1903 MCAsmParser &Parser = getParser();
1904 switch (getLexer().getKind()) {
1906 // Parse a memory operand with no segment register.
1907 return ParseMemOperand(0, Parser.getTok().getLoc());
1908 case AsmToken::Percent: {
1909 // Read the register.
1912 if (ParseRegister(RegNo, Start, End)) return nullptr;
1913 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1914 Error(Start, "%eiz and %riz can only be used as index registers",
1915 SMRange(Start, End));
1918 if (RegNo == X86::RIP) {
1919 Error(Start, "%rip can only be used as a base register",
1920 SMRange(Start, End));
1924 // If this is a segment register followed by a ':', then this is the start
1925 // of a memory reference, otherwise this is a normal register reference.
1926 if (getLexer().isNot(AsmToken::Colon))
1927 return X86Operand::CreateReg(RegNo, Start, End);
1929 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1930 return ErrorOperand(Start, "invalid segment register");
1932 getParser().Lex(); // Eat the colon.
1933 return ParseMemOperand(RegNo, Start);
1935 case AsmToken::Dollar: {
1936 // $42 -> immediate.
1937 SMLoc Start = Parser.getTok().getLoc(), End;
1940 if (getParser().parseExpression(Val, End))
1942 return X86Operand::CreateImm(Val, Start, End);
1944 case AsmToken::LCurly:{
1945 SMLoc Start = Parser.getTok().getLoc(), End;
1946 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1947 return ParseRoundingModeOp(Start, End);
1948 return ErrorOperand(Start, "Unexpected '{' in expression");
1953 // true on failure, false otherwise
1954 // If no {z} mark was found - Parser doesn't advance
1955 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1956 const SMLoc &StartLoc) {
1957 MCAsmParser &Parser = getParser();
1958 // Assuming we are just pass the '{' mark, quering the next token
1959 // Searched for {z}, but none was found. Return false, as no parsing error was
1961 if (!(getLexer().is(AsmToken::Identifier) &&
1962 (getLexer().getTok().getIdentifier() == "z")))
1964 Parser.Lex(); // Eat z
1965 // Query and eat the '}' mark
1966 if (!getLexer().is(AsmToken::RCurly))
1967 return Error(getLexer().getLoc(), "Expected } at this point");
1968 Parser.Lex(); // Eat '}'
1969 // Assign Z with the {z} mark opernad
1970 Z = X86Operand::CreateToken("{z}", StartLoc);
1974 // true on failure, false otherwise
1975 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1976 const MCParsedAsmOperand &Op) {
1977 MCAsmParser &Parser = getParser();
1978 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1979 if (getLexer().is(AsmToken::LCurly)) {
1980 // Eat "{" and mark the current place.
1981 const SMLoc consumedToken = consumeToken();
1982 // Distinguish {1to<NUM>} from {%k<NUM>}.
1983 if(getLexer().is(AsmToken::Integer)) {
1984 // Parse memory broadcasting ({1to<NUM>}).
1985 if (getLexer().getTok().getIntVal() != 1)
1986 return TokError("Expected 1to<NUM> at this point");
1987 Parser.Lex(); // Eat "1" of 1to8
1988 if (!getLexer().is(AsmToken::Identifier) ||
1989 !getLexer().getTok().getIdentifier().startswith("to"))
1990 return TokError("Expected 1to<NUM> at this point");
1991 // Recognize only reasonable suffixes.
1992 const char *BroadcastPrimitive =
1993 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1994 .Case("to2", "{1to2}")
1995 .Case("to4", "{1to4}")
1996 .Case("to8", "{1to8}")
1997 .Case("to16", "{1to16}")
1999 if (!BroadcastPrimitive)
2000 return TokError("Invalid memory broadcast primitive.");
2001 Parser.Lex(); // Eat "toN" of 1toN
2002 if (!getLexer().is(AsmToken::RCurly))
2003 return TokError("Expected } at this point");
2004 Parser.Lex(); // Eat "}"
2005 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2007 // No AVX512 specific primitives can pass
2008 // after memory broadcasting, so return.
2011 // Parse either {k}{z}, {z}{k}, {k} or {z}
2012 // last one have no meaning, but GCC accepts it
2013 // Currently, we're just pass a '{' mark
2014 std::unique_ptr<X86Operand> Z;
2015 if (ParseZ(Z, consumedToken))
2017 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2019 // Query for the need of further parsing for a {%k<NUM>} mark
2020 if (!Z || getLexer().is(AsmToken::LCurly)) {
2021 const SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2022 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2024 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2025 if (!getLexer().is(AsmToken::RCurly))
2026 return Error(getLexer().getLoc(), "Expected } at this point");
2027 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2028 Operands.push_back(std::move(Op));
2029 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2031 return Error(getLexer().getLoc(),
2032 "Expected an op-mask register at this point");
2033 // {%k<NUM>} mark is found, inquire for {z}
2034 if (getLexer().is(AsmToken::LCurly) && !Z) {
2035 // Have we've found a parsing error, or found no (expected) {z} mark
2036 // - report an error
2037 if (ParseZ(Z, consumeToken()) || !Z)
2041 // '{z}' on its own is meaningless, hence should be ignored.
2042 // on the contrary - have it been accompanied by a K register,
2045 Operands.push_back(std::move(Z));
2053 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2054 /// has already been parsed if present.
2055 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2058 MCAsmParser &Parser = getParser();
2059 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2060 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2061 // only way to do this without lookahead is to eat the '(' and see what is
2063 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2064 if (getLexer().isNot(AsmToken::LParen)) {
2066 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2068 // After parsing the base expression we could either have a parenthesized
2069 // memory address or not. If not, return now. If so, eat the (.
2070 if (getLexer().isNot(AsmToken::LParen)) {
2071 // Unless we have a segment register, treat this as an immediate.
2073 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2074 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2081 // Okay, we have a '('. We don't know if this is an expression or not, but
2082 // so we have to eat the ( to see beyond it.
2083 SMLoc LParenLoc = Parser.getTok().getLoc();
2084 Parser.Lex(); // Eat the '('.
2086 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2087 // Nothing to do here, fall into the code below with the '(' part of the
2088 // memory operand consumed.
2092 // It must be an parenthesized expression, parse it now.
2093 if (getParser().parseParenExpression(Disp, ExprEnd))
2096 // After parsing the base expression we could either have a parenthesized
2097 // memory address or not. If not, return now. If so, eat the (.
2098 if (getLexer().isNot(AsmToken::LParen)) {
2099 // Unless we have a segment register, treat this as an immediate.
2101 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2103 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2112 // If we reached here, then we just ate the ( of the memory operand. Process
2113 // the rest of the memory operand.
2114 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2115 SMLoc IndexLoc, BaseLoc;
2117 if (getLexer().is(AsmToken::Percent)) {
2118 SMLoc StartLoc, EndLoc;
2119 BaseLoc = Parser.getTok().getLoc();
2120 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2121 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2122 Error(StartLoc, "eiz and riz can only be used as index registers",
2123 SMRange(StartLoc, EndLoc));
2128 if (getLexer().is(AsmToken::Comma)) {
2129 Parser.Lex(); // Eat the comma.
2130 IndexLoc = Parser.getTok().getLoc();
2132 // Following the comma we should have either an index register, or a scale
2133 // value. We don't support the later form, but we want to parse it
2136 // Not that even though it would be completely consistent to support syntax
2137 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2138 if (getLexer().is(AsmToken::Percent)) {
2140 if (ParseRegister(IndexReg, L, L))
2142 if (BaseReg == X86::RIP) {
2143 Error(IndexLoc, "%rip as base register can not have an index register");
2146 if (IndexReg == X86::RIP) {
2147 Error(IndexLoc, "%rip is not allowed as an index register");
2151 if (getLexer().isNot(AsmToken::RParen)) {
2152 // Parse the scale amount:
2153 // ::= ',' [scale-expression]
2154 if (getLexer().isNot(AsmToken::Comma)) {
2155 Error(Parser.getTok().getLoc(),
2156 "expected comma in scale expression");
2159 Parser.Lex(); // Eat the comma.
2161 if (getLexer().isNot(AsmToken::RParen)) {
2162 SMLoc Loc = Parser.getTok().getLoc();
2165 if (getParser().parseAbsoluteExpression(ScaleVal)){
2166 Error(Loc, "expected scale expression");
2170 // Validate the scale amount.
2171 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2173 Error(Loc, "scale factor in 16-bit address must be 1");
2176 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2178 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2181 Scale = (unsigned)ScaleVal;
2184 } else if (getLexer().isNot(AsmToken::RParen)) {
2185 // A scale amount without an index is ignored.
2187 SMLoc Loc = Parser.getTok().getLoc();
2190 if (getParser().parseAbsoluteExpression(Value))
2194 Warning(Loc, "scale factor without index register is ignored");
2199 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2200 if (getLexer().isNot(AsmToken::RParen)) {
2201 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2204 SMLoc MemEnd = Parser.getTok().getEndLoc();
2205 Parser.Lex(); // Eat the ')'.
2207 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2208 // and then only in non-64-bit modes. Except for DX, which is a special case
2209 // because an unofficial form of in/out instructions uses it.
2210 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2211 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2212 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2213 BaseReg != X86::DX) {
2214 Error(BaseLoc, "invalid 16-bit base register");
2218 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2219 Error(IndexLoc, "16-bit memory operand may not include only index register");
2224 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2225 Error(BaseLoc, ErrMsg);
2229 if (SegReg || BaseReg || IndexReg)
2230 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2231 IndexReg, Scale, MemStart, MemEnd);
2232 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2235 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2236 SMLoc NameLoc, OperandVector &Operands) {
2237 MCAsmParser &Parser = getParser();
2239 StringRef PatchedName = Name;
2241 if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) {
2242 StringRef NextTok = Parser.getTok().getString();
2243 if (NextTok == "short") {
2245 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2246 // Eat the short keyword
2248 // MS ignores the short keyword, it determines the jmp type based
2249 // on the distance of the label
2250 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2251 NextTok.size() + 1);
2255 // FIXME: Hack to recognize setneb as setne.
2256 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2257 PatchedName != "setb" && PatchedName != "setnb")
2258 PatchedName = PatchedName.substr(0, Name.size()-1);
2260 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2261 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2262 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2263 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2264 bool IsVCMP = PatchedName[0] == 'v';
2265 unsigned CCIdx = IsVCMP ? 4 : 3;
2266 unsigned ComparisonCode = StringSwitch<unsigned>(
2267 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2269 .Case("eq_oq", 0x00)
2271 .Case("lt_os", 0x01)
2273 .Case("le_os", 0x02)
2274 .Case("unord", 0x03)
2275 .Case("unord_q", 0x03)
2277 .Case("neq_uq", 0x04)
2279 .Case("nlt_us", 0x05)
2281 .Case("nle_us", 0x06)
2283 .Case("ord_q", 0x07)
2284 /* AVX only from here */
2285 .Case("eq_uq", 0x08)
2287 .Case("nge_us", 0x09)
2289 .Case("ngt_us", 0x0A)
2290 .Case("false", 0x0B)
2291 .Case("false_oq", 0x0B)
2292 .Case("neq_oq", 0x0C)
2294 .Case("ge_os", 0x0D)
2296 .Case("gt_os", 0x0E)
2298 .Case("true_uq", 0x0F)
2299 .Case("eq_os", 0x10)
2300 .Case("lt_oq", 0x11)
2301 .Case("le_oq", 0x12)
2302 .Case("unord_s", 0x13)
2303 .Case("neq_us", 0x14)
2304 .Case("nlt_uq", 0x15)
2305 .Case("nle_uq", 0x16)
2306 .Case("ord_s", 0x17)
2307 .Case("eq_us", 0x18)
2308 .Case("nge_uq", 0x19)
2309 .Case("ngt_uq", 0x1A)
2310 .Case("false_os", 0x1B)
2311 .Case("neq_os", 0x1C)
2312 .Case("ge_oq", 0x1D)
2313 .Case("gt_oq", 0x1E)
2314 .Case("true_us", 0x1F)
2316 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2318 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2321 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2322 getParser().getContext());
2323 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2325 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2329 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2330 if (PatchedName.startswith("vpcmp") &&
2331 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2332 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2333 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2334 unsigned ComparisonCode = StringSwitch<unsigned>(
2335 PatchedName.slice(5, PatchedName.size() - CCIdx))
2336 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2339 //.Case("false", 0x3) // Not a documented alias.
2343 //.Case("true", 0x7) // Not a documented alias.
2345 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2346 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2348 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2349 getParser().getContext());
2350 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2352 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2356 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2357 if (PatchedName.startswith("vpcom") &&
2358 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2359 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2360 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2361 unsigned ComparisonCode = StringSwitch<unsigned>(
2362 PatchedName.slice(5, PatchedName.size() - CCIdx))
2372 if (ComparisonCode != ~0U) {
2373 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2375 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2376 getParser().getContext());
2377 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2379 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2383 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2385 // Determine whether this is an instruction prefix.
2387 Name == "lock" || Name == "rep" ||
2388 Name == "repe" || Name == "repz" ||
2389 Name == "repne" || Name == "repnz" ||
2390 Name == "rex64" || Name == "data16" || Name == "data32";
2392 bool CurlyAsEndOfStatement = false;
2393 // This does the actual operand parsing. Don't parse any more if we have a
2394 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2395 // just want to parse the "lock" as the first instruction and the "incl" as
2397 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2399 // Parse '*' modifier.
2400 if (getLexer().is(AsmToken::Star))
2401 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2403 // Read the operands.
2405 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2406 Operands.push_back(std::move(Op));
2407 if (HandleAVX512Operand(Operands, *Operands.back()))
2412 // check for comma and eat it
2413 if (getLexer().is(AsmToken::Comma))
2419 // In MS inline asm curly braces mark the begining/end of a block, therefore
2420 // they should be interepreted as end of statement
2421 CurlyAsEndOfStatement =
2422 isParsingIntelSyntax() && isParsingInlineAsm() &&
2423 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2424 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2425 return TokError("unexpected token in argument list");
2428 // Consume the EndOfStatement or the prefix separator Slash
2429 if (getLexer().is(AsmToken::EndOfStatement) ||
2430 (isPrefix && getLexer().is(AsmToken::Slash)))
2432 else if (CurlyAsEndOfStatement)
2433 // Add an actual EndOfStatement before the curly brace
2434 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2435 getLexer().getTok().getLoc(), 0);
2437 // This is for gas compatibility and cannot be done in td.
2438 // Adding "p" for some floating point with no argument.
2439 // For example: fsub --> fsubp
2441 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2442 if (IsFp && Operands.size() == 1) {
2443 const char *Repl = StringSwitch<const char *>(Name)
2444 .Case("fsub", "fsubp")
2445 .Case("fdiv", "fdivp")
2446 .Case("fsubr", "fsubrp")
2447 .Case("fdivr", "fdivrp");
2448 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2451 // Moving a 32 or 16 bit value into a segment register has the same
2452 // behavior. Modify such instructions to always take shorter form.
2453 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2454 (Operands.size() == 3)) {
2455 X86Operand &Op1 = (X86Operand &)*Operands[1];
2456 X86Operand &Op2 = (X86Operand &)*Operands[2];
2457 SMLoc Loc = Op1.getEndLoc();
2458 if (Op1.isReg() && Op2.isReg() &&
2459 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2461 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2462 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2463 // Change instruction name to match new instruction.
2464 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2465 Name = is16BitMode() ? "movw" : "movl";
2466 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2468 // Select the correct equivalent 16-/32-bit source register.
2470 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2471 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2475 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2476 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2477 // documented form in various unofficial manuals, so a lot of code uses it.
2478 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2479 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2480 Operands.size() == 3) {
2481 X86Operand &Op = (X86Operand &)*Operands.back();
2482 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2483 isa<MCConstantExpr>(Op.Mem.Disp) &&
2484 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2485 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2486 SMLoc Loc = Op.getEndLoc();
2487 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2490 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2491 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2492 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2493 Operands.size() == 3) {
2494 X86Operand &Op = (X86Operand &)*Operands[1];
2495 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2496 isa<MCConstantExpr>(Op.Mem.Disp) &&
2497 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2498 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2499 SMLoc Loc = Op.getEndLoc();
2500 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2504 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2505 bool HadVerifyError = false;
2507 // Append default arguments to "ins[bwld]"
2508 if (Name.startswith("ins") &&
2509 (Operands.size() == 1 || Operands.size() == 3) &&
2510 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2513 AddDefaultSrcDestOperands(TmpOperands,
2514 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2515 DefaultMemDIOperand(NameLoc));
2516 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2519 // Append default arguments to "outs[bwld]"
2520 if (Name.startswith("outs") &&
2521 (Operands.size() == 1 || Operands.size() == 3) &&
2522 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2523 Name == "outsd" || Name == "outs")) {
2524 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2525 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2526 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2529 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2530 // values of $SIREG according to the mode. It would be nice if this
2531 // could be achieved with InstAlias in the tables.
2532 if (Name.startswith("lods") &&
2533 (Operands.size() == 1 || Operands.size() == 2) &&
2534 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2535 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2536 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2537 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2540 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2541 // values of $DIREG according to the mode. It would be nice if this
2542 // could be achieved with InstAlias in the tables.
2543 if (Name.startswith("stos") &&
2544 (Operands.size() == 1 || Operands.size() == 2) &&
2545 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2546 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2547 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2548 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2551 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2552 // values of $DIREG according to the mode. It would be nice if this
2553 // could be achieved with InstAlias in the tables.
2554 if (Name.startswith("scas") &&
2555 (Operands.size() == 1 || Operands.size() == 2) &&
2556 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2557 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2558 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2559 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2562 // Add default SI and DI operands to "cmps[bwlq]".
2563 if (Name.startswith("cmps") &&
2564 (Operands.size() == 1 || Operands.size() == 3) &&
2565 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2566 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2567 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2568 DefaultMemSIOperand(NameLoc));
2569 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2572 // Add default SI and DI operands to "movs[bwlq]".
2573 if (((Name.startswith("movs") &&
2574 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2575 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2576 (Name.startswith("smov") &&
2577 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2578 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2579 (Operands.size() == 1 || Operands.size() == 3)) {
2580 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2581 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2582 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2583 DefaultMemDIOperand(NameLoc));
2584 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2587 // Check if we encountered an error for one the string insturctions
2588 if (HadVerifyError) {
2589 return HadVerifyError;
2592 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2594 if ((Name.startswith("shr") || Name.startswith("sar") ||
2595 Name.startswith("shl") || Name.startswith("sal") ||
2596 Name.startswith("rcl") || Name.startswith("rcr") ||
2597 Name.startswith("rol") || Name.startswith("ror")) &&
2598 Operands.size() == 3) {
2599 if (isParsingIntelSyntax()) {
2601 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2602 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2603 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2604 Operands.pop_back();
2606 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2607 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2608 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2609 Operands.erase(Operands.begin() + 1);
2613 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2614 // instalias with an immediate operand yet.
2615 if (Name == "int" && Operands.size() == 2) {
2616 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2618 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2619 if (CE->getValue() == 3) {
2620 Operands.erase(Operands.begin() + 1);
2621 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2625 // Transforms "xlat mem8" into "xlatb"
2626 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2627 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2629 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2630 "size, (R|E)BX will be used for the location");
2631 Operands.pop_back();
2632 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2639 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2643 static const char *getSubtargetFeatureName(uint64_t Val);
2645 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2647 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2651 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2652 OperandVector &Operands,
2653 MCStreamer &Out, uint64_t &ErrorInfo,
2654 bool MatchingInlineAsm) {
2655 if (isParsingIntelSyntax())
2656 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2658 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2662 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2663 OperandVector &Operands, MCStreamer &Out,
2664 bool MatchingInlineAsm) {
2665 // FIXME: This should be replaced with a real .td file alias mechanism.
2666 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2668 const char *Repl = StringSwitch<const char *>(Op.getToken())
2669 .Case("finit", "fninit")
2670 .Case("fsave", "fnsave")
2671 .Case("fstcw", "fnstcw")
2672 .Case("fstcww", "fnstcw")
2673 .Case("fstenv", "fnstenv")
2674 .Case("fstsw", "fnstsw")
2675 .Case("fstsww", "fnstsw")
2676 .Case("fclex", "fnclex")
2680 Inst.setOpcode(X86::WAIT);
2682 if (!MatchingInlineAsm)
2683 EmitInstruction(Inst, Operands, Out);
2684 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2688 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2689 bool MatchingInlineAsm) {
2690 assert(ErrorInfo && "Unknown missing feature!");
2691 SmallString<126> Msg;
2692 raw_svector_ostream OS(Msg);
2693 OS << "instruction requires:";
2695 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2696 if (ErrorInfo & Mask)
2697 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2700 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2703 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2704 OperandVector &Operands,
2706 uint64_t &ErrorInfo,
2707 bool MatchingInlineAsm) {
2708 assert(!Operands.empty() && "Unexpect empty operand list!");
2709 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2710 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2711 SMRange EmptyRange = None;
2713 // First, handle aliases that expand to multiple instructions.
2714 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2716 bool WasOriginallyInvalidOperand = false;
2719 // First, try a direct match.
2720 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2721 isParsingIntelSyntax())) {
2722 default: llvm_unreachable("Unexpected match result!");
2724 // Some instructions need post-processing to, for example, tweak which
2725 // encoding is selected. Loop on it while changes happen so the
2726 // individual transformations can chain off each other.
2727 if (!MatchingInlineAsm)
2728 while (processInstruction(Inst, Operands))
2732 if (!MatchingInlineAsm)
2733 EmitInstruction(Inst, Operands, Out);
2734 Opcode = Inst.getOpcode();
2736 case Match_MissingFeature:
2737 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2738 case Match_InvalidOperand:
2739 WasOriginallyInvalidOperand = true;
2741 case Match_MnemonicFail:
2745 // FIXME: Ideally, we would only attempt suffix matches for things which are
2746 // valid prefixes, and we could just infer the right unambiguous
2747 // type. However, that requires substantially more matcher support than the
2750 // Change the operand to point to a temporary token.
2751 StringRef Base = Op.getToken();
2752 SmallString<16> Tmp;
2755 Op.setTokenValue(Tmp);
2757 // If this instruction starts with an 'f', then it is a floating point stack
2758 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2759 // 80-bit floating point, which use the suffixes s,l,t respectively.
2761 // Otherwise, we assume that this may be an integer instruction, which comes
2762 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2763 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2765 // Check for the various suffix matches.
2766 uint64_t ErrorInfoIgnore;
2767 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2770 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2771 Tmp.back() = Suffixes[I];
2772 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2773 MatchingInlineAsm, isParsingIntelSyntax());
2774 // If this returned as a missing feature failure, remember that.
2775 if (Match[I] == Match_MissingFeature)
2776 ErrorInfoMissingFeature = ErrorInfoIgnore;
2779 // Restore the old token.
2780 Op.setTokenValue(Base);
2782 // If exactly one matched, then we treat that as a successful match (and the
2783 // instruction will already have been filled in correctly, since the failing
2784 // matches won't have modified it).
2785 unsigned NumSuccessfulMatches =
2786 std::count(std::begin(Match), std::end(Match), Match_Success);
2787 if (NumSuccessfulMatches == 1) {
2789 if (!MatchingInlineAsm)
2790 EmitInstruction(Inst, Operands, Out);
2791 Opcode = Inst.getOpcode();
2795 // Otherwise, the match failed, try to produce a decent error message.
2797 // If we had multiple suffix matches, then identify this as an ambiguous
2799 if (NumSuccessfulMatches > 1) {
2801 unsigned NumMatches = 0;
2802 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2803 if (Match[I] == Match_Success)
2804 MatchChars[NumMatches++] = Suffixes[I];
2806 SmallString<126> Msg;
2807 raw_svector_ostream OS(Msg);
2808 OS << "ambiguous instructions require an explicit suffix (could be ";
2809 for (unsigned i = 0; i != NumMatches; ++i) {
2812 if (i + 1 == NumMatches)
2814 OS << "'" << Base << MatchChars[i] << "'";
2817 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
2821 // Okay, we know that none of the variants matched successfully.
2823 // If all of the instructions reported an invalid mnemonic, then the original
2824 // mnemonic was invalid.
2825 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2826 if (!WasOriginallyInvalidOperand) {
2827 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2828 Op.getLocRange(), MatchingInlineAsm);
2831 // Recover location info for the operand if we know which was the problem.
2832 if (ErrorInfo != ~0ULL) {
2833 if (ErrorInfo >= Operands.size())
2834 return Error(IDLoc, "too few operands for instruction", EmptyRange,
2837 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2838 if (Operand.getStartLoc().isValid()) {
2839 SMRange OperandRange = Operand.getLocRange();
2840 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2841 OperandRange, MatchingInlineAsm);
2845 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2849 // If one instruction matched with a missing feature, report this as a
2851 if (std::count(std::begin(Match), std::end(Match),
2852 Match_MissingFeature) == 1) {
2853 ErrorInfo = ErrorInfoMissingFeature;
2854 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2858 // If one instruction matched with an invalid operand, report this as an
2860 if (std::count(std::begin(Match), std::end(Match),
2861 Match_InvalidOperand) == 1) {
2862 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2866 // If all of these were an outright failure, report it in a useless way.
2867 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2868 EmptyRange, MatchingInlineAsm);
2872 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2873 OperandVector &Operands,
2875 uint64_t &ErrorInfo,
2876 bool MatchingInlineAsm) {
2877 assert(!Operands.empty() && "Unexpect empty operand list!");
2878 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2879 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2880 StringRef Mnemonic = Op.getToken();
2881 SMRange EmptyRange = None;
2882 StringRef Base = Op.getToken();
2884 // First, handle aliases that expand to multiple instructions.
2885 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2889 // Find one unsized memory operand, if present.
2890 X86Operand *UnsizedMemOp = nullptr;
2891 for (const auto &Op : Operands) {
2892 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2893 if (X86Op->isMemUnsized()) {
2894 UnsizedMemOp = X86Op;
2895 // Have we found an unqualified memory operand,
2896 // break. IA allows only one memory operand.
2901 // Allow some instructions to have implicitly pointer-sized operands. This is
2902 // compatible with gas.
2904 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2905 for (const char *Instr : PtrSizedInstrs) {
2906 if (Mnemonic == Instr) {
2907 UnsizedMemOp->Mem.Size = getPointerWidth();
2913 SmallVector<unsigned, 8> Match;
2914 uint64_t ErrorInfoMissingFeature = 0;
2916 // If unsized push has immediate operand we should default the default pointer
2917 // size for the size.
2918 if (Mnemonic == "push" && Operands.size() == 2) {
2919 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
2920 if (X86Op->isImm()) {
2921 // If it's not a constant fall through and let remainder take care of it.
2922 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
2923 unsigned Size = getPointerWidth();
2925 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
2926 SmallString<16> Tmp;
2928 Tmp += (is64BitMode())
2930 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
2931 Op.setTokenValue(Tmp);
2932 // Do match in ATT mode to allow explicit suffix usage.
2933 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
2935 false /*isParsingIntelSyntax()*/));
2936 Op.setTokenValue(Base);
2941 // If an unsized memory operand is present, try to match with each memory
2942 // operand size. In Intel assembly, the size is not part of the instruction
2944 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2945 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2946 for (unsigned Size : MopSizes) {
2947 UnsizedMemOp->Mem.Size = Size;
2948 uint64_t ErrorInfoIgnore;
2949 unsigned LastOpcode = Inst.getOpcode();
2950 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2951 MatchingInlineAsm, isParsingIntelSyntax());
2952 if (Match.empty() || LastOpcode != Inst.getOpcode())
2955 // If this returned as a missing feature failure, remember that.
2956 if (Match.back() == Match_MissingFeature)
2957 ErrorInfoMissingFeature = ErrorInfoIgnore;
2960 // Restore the size of the unsized memory operand if we modified it.
2961 UnsizedMemOp->Mem.Size = 0;
2964 // If we haven't matched anything yet, this is not a basic integer or FPU
2965 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2966 // matching with the unsized operand.
2967 if (Match.empty()) {
2968 Match.push_back(MatchInstruction(
2969 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
2970 // If this returned as a missing feature failure, remember that.
2971 if (Match.back() == Match_MissingFeature)
2972 ErrorInfoMissingFeature = ErrorInfo;
2975 // Restore the size of the unsized memory operand if we modified it.
2977 UnsizedMemOp->Mem.Size = 0;
2979 // If it's a bad mnemonic, all results will be the same.
2980 if (Match.back() == Match_MnemonicFail) {
2981 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2982 Op.getLocRange(), MatchingInlineAsm);
2985 unsigned NumSuccessfulMatches =
2986 std::count(std::begin(Match), std::end(Match), Match_Success);
2988 // If matching was ambiguous and we had size information from the frontend,
2989 // try again with that. This handles cases like "movxz eax, m8/m16".
2990 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
2991 UnsizedMemOp->getMemFrontendSize()) {
2992 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
2993 unsigned M = MatchInstruction(
2994 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
2995 if (M == Match_Success)
2996 NumSuccessfulMatches = 1;
2998 // Add a rewrite that encodes the size information we used from the
3000 InstInfo->AsmRewrites->emplace_back(
3001 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3002 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3005 // If exactly one matched, then we treat that as a successful match (and the
3006 // instruction will already have been filled in correctly, since the failing
3007 // matches won't have modified it).
3008 if (NumSuccessfulMatches == 1) {
3009 // Some instructions need post-processing to, for example, tweak which
3010 // encoding is selected. Loop on it while changes happen so the individual
3011 // transformations can chain off each other.
3012 if (!MatchingInlineAsm)
3013 while (processInstruction(Inst, Operands))
3016 if (!MatchingInlineAsm)
3017 EmitInstruction(Inst, Operands, Out);
3018 Opcode = Inst.getOpcode();
3020 } else if (NumSuccessfulMatches > 1) {
3021 assert(UnsizedMemOp &&
3022 "multiple matches only possible with unsized memory operands");
3023 return Error(UnsizedMemOp->getStartLoc(),
3024 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3025 UnsizedMemOp->getLocRange());
3028 // If one instruction matched with a missing feature, report this as a
3030 if (std::count(std::begin(Match), std::end(Match),
3031 Match_MissingFeature) == 1) {
3032 ErrorInfo = ErrorInfoMissingFeature;
3033 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3037 // If one instruction matched with an invalid operand, report this as an
3039 if (std::count(std::begin(Match), std::end(Match),
3040 Match_InvalidOperand) == 1) {
3041 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3045 // If all of these were an outright failure, report it in a useless way.
3046 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3050 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3051 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3054 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3055 MCAsmParser &Parser = getParser();
3056 StringRef IDVal = DirectiveID.getIdentifier();
3057 if (IDVal == ".word")
3058 return ParseDirectiveWord(2, DirectiveID.getLoc());
3059 else if (IDVal.startswith(".code"))
3060 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3061 else if (IDVal.startswith(".att_syntax")) {
3062 getParser().setParsingInlineAsm(false);
3063 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3064 if (Parser.getTok().getString() == "prefix")
3066 else if (Parser.getTok().getString() == "noprefix")
3067 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3068 "supported: registers must have a "
3069 "'%' prefix in .att_syntax");
3071 getParser().setAssemblerDialect(0);
3073 } else if (IDVal.startswith(".intel_syntax")) {
3074 getParser().setAssemblerDialect(1);
3075 getParser().setParsingInlineAsm(true);
3076 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3077 if (Parser.getTok().getString() == "noprefix")
3079 else if (Parser.getTok().getString() == "prefix")
3080 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3081 "supported: registers must not have "
3082 "a '%' prefix in .intel_syntax");
3085 } else if (IDVal == ".even")
3086 return parseDirectiveEven(DirectiveID.getLoc());
3090 /// parseDirectiveEven
3092 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3093 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3094 TokError("unexpected token in directive");
3097 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3099 getStreamer().InitSections(false);
3100 Section = getStreamer().getCurrentSectionOnly();
3102 if (Section->UseCodeAlign())
3103 getStreamer().EmitCodeAlignment(2, 0);
3105 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3108 /// ParseDirectiveWord
3109 /// ::= .word [ expression (, expression)* ]
3110 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
3111 MCAsmParser &Parser = getParser();
3112 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3114 const MCExpr *Value;
3115 SMLoc ExprLoc = getLexer().getLoc();
3116 if (getParser().parseExpression(Value))
3119 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
3120 assert(Size <= 8 && "Invalid size");
3121 uint64_t IntValue = MCE->getValue();
3122 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3123 return Error(ExprLoc, "literal value out of range for directive");
3124 getStreamer().EmitIntValue(IntValue, Size);
3126 getStreamer().EmitValue(Value, Size, ExprLoc);
3129 if (getLexer().is(AsmToken::EndOfStatement))
3132 // FIXME: Improve diagnostic.
3133 if (getLexer().isNot(AsmToken::Comma)) {
3134 Error(L, "unexpected token in directive");
3145 /// ParseDirectiveCode
3146 /// ::= .code16 | .code32 | .code64
3147 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3148 MCAsmParser &Parser = getParser();
3150 if (IDVal == ".code16") {
3152 if (!is16BitMode()) {
3153 SwitchMode(X86::Mode16Bit);
3154 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3156 } else if (IDVal == ".code16gcc") {
3157 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3160 if (!is16BitMode()) {
3161 SwitchMode(X86::Mode16Bit);
3162 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3164 } else if (IDVal == ".code32") {
3166 if (!is32BitMode()) {
3167 SwitchMode(X86::Mode32Bit);
3168 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3170 } else if (IDVal == ".code64") {
3172 if (!is64BitMode()) {
3173 SwitchMode(X86::Mode64Bit);
3174 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3177 Error(L, "unknown directive " + IDVal);
3184 // Force static initialization.
3185 extern "C" void LLVMInitializeX86AsmParser() {
3186 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3187 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3190 #define GET_REGISTER_MATCHER
3191 #define GET_MATCHER_IMPLEMENTATION
3192 #define GET_SUBTARGET_FEATURE_NAME
3193 #include "X86GenAsmMatcher.inc"