1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
42 static const char OpPrecedence[] = {
58 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
65 SMLoc consumeToken() {
66 MCAsmParser &Parser = getParser();
67 SMLoc Result = Parser.getTok().getLoc();
72 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
73 uint64_t &ErrorInfo, bool matchingInlineAsm,
74 unsigned VariantID = 0) {
75 // In Code16GCC mode, match as 32-bit.
77 SwitchMode(X86::Mode32Bit);
78 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
79 matchingInlineAsm, VariantID);
81 SwitchMode(X86::Mode16Bit);
85 enum InfixCalculatorTok {
101 enum IntelOperatorKind {
109 class InfixCalculator {
110 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
111 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
112 SmallVector<ICToken, 4> PostfixStack;
115 int64_t popOperand() {
116 assert (!PostfixStack.empty() && "Poped an empty stack!");
117 ICToken Op = PostfixStack.pop_back_val();
118 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
119 && "Expected and immediate or register!");
122 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
123 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
124 "Unexpected operand!");
125 PostfixStack.push_back(std::make_pair(Op, Val));
128 void popOperator() { InfixOperatorStack.pop_back(); }
129 void pushOperator(InfixCalculatorTok Op) {
130 // Push the new operator if the stack is empty.
131 if (InfixOperatorStack.empty()) {
132 InfixOperatorStack.push_back(Op);
136 // Push the new operator if it has a higher precedence than the operator
137 // on the top of the stack or the operator on the top of the stack is a
139 unsigned Idx = InfixOperatorStack.size() - 1;
140 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
141 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
142 InfixOperatorStack.push_back(Op);
146 // The operator on the top of the stack has higher precedence than the
148 unsigned ParenCount = 0;
150 // Nothing to process.
151 if (InfixOperatorStack.empty())
154 Idx = InfixOperatorStack.size() - 1;
155 StackOp = InfixOperatorStack[Idx];
156 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
159 // If we have an even parentheses count and we see a left parentheses,
160 // then stop processing.
161 if (!ParenCount && StackOp == IC_LPAREN)
164 if (StackOp == IC_RPAREN) {
166 InfixOperatorStack.pop_back();
167 } else if (StackOp == IC_LPAREN) {
169 InfixOperatorStack.pop_back();
171 InfixOperatorStack.pop_back();
172 PostfixStack.push_back(std::make_pair(StackOp, 0));
175 // Push the new operator.
176 InfixOperatorStack.push_back(Op);
180 // Push any remaining operators onto the postfix stack.
181 while (!InfixOperatorStack.empty()) {
182 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
183 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
184 PostfixStack.push_back(std::make_pair(StackOp, 0));
187 if (PostfixStack.empty())
190 SmallVector<ICToken, 16> OperandStack;
191 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
192 ICToken Op = PostfixStack[i];
193 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
194 OperandStack.push_back(Op);
196 assert (OperandStack.size() > 1 && "Too few operands.");
198 ICToken Op2 = OperandStack.pop_back_val();
199 ICToken Op1 = OperandStack.pop_back_val();
202 report_fatal_error("Unexpected operator!");
205 Val = Op1.second + Op2.second;
206 OperandStack.push_back(std::make_pair(IC_IMM, Val));
209 Val = Op1.second - Op2.second;
210 OperandStack.push_back(std::make_pair(IC_IMM, Val));
213 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
214 "Multiply operation with an immediate and a register!");
215 Val = Op1.second * Op2.second;
216 OperandStack.push_back(std::make_pair(IC_IMM, Val));
219 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
220 "Divide operation with an immediate and a register!");
221 assert (Op2.second != 0 && "Division by zero!");
222 Val = Op1.second / Op2.second;
223 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
227 "Or operation with an immediate and a register!");
228 Val = Op1.second | Op2.second;
229 OperandStack.push_back(std::make_pair(IC_IMM, Val));
232 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
233 "Xor operation with an immediate and a register!");
234 Val = Op1.second ^ Op2.second;
235 OperandStack.push_back(std::make_pair(IC_IMM, Val));
238 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
239 "And operation with an immediate and a register!");
240 Val = Op1.second & Op2.second;
241 OperandStack.push_back(std::make_pair(IC_IMM, Val));
244 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
245 "Left shift operation with an immediate and a register!");
246 Val = Op1.second << Op2.second;
247 OperandStack.push_back(std::make_pair(IC_IMM, Val));
250 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
251 "Right shift operation with an immediate and a register!");
252 Val = Op1.second >> Op2.second;
253 OperandStack.push_back(std::make_pair(IC_IMM, Val));
258 assert (OperandStack.size() == 1 && "Expected a single result.");
259 return OperandStack.pop_back_val().second;
263 enum IntelExprState {
284 class IntelExprStateMachine {
285 IntelExprState State, PrevState;
286 unsigned BaseReg, IndexReg, TmpReg, Scale;
290 bool StopOnLBrac, AddImmPrefix;
292 InlineAsmIdentifierInfo Info;
295 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
296 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
297 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
298 AddImmPrefix(addimmprefix) { Info.clear(); }
300 unsigned getBaseReg() { return BaseReg; }
301 unsigned getIndexReg() { return IndexReg; }
302 unsigned getScale() { return Scale; }
303 const MCExpr *getSym() { return Sym; }
304 StringRef getSymName() { return SymName; }
305 int64_t getImm() { return Imm + IC.execute(); }
306 bool isValidEndState() {
307 return State == IES_RBRAC || State == IES_INTEGER;
309 bool getStopOnLBrac() { return StopOnLBrac; }
310 bool getAddImmPrefix() { return AddImmPrefix; }
311 bool hadError() { return State == IES_ERROR; }
313 InlineAsmIdentifierInfo &getIdentifierInfo() {
318 IntelExprState CurrState = State;
327 IC.pushOperator(IC_OR);
330 PrevState = CurrState;
333 IntelExprState CurrState = State;
342 IC.pushOperator(IC_XOR);
345 PrevState = CurrState;
348 IntelExprState CurrState = State;
357 IC.pushOperator(IC_AND);
360 PrevState = CurrState;
363 IntelExprState CurrState = State;
372 IC.pushOperator(IC_LSHIFT);
375 PrevState = CurrState;
378 IntelExprState CurrState = State;
387 IC.pushOperator(IC_RSHIFT);
390 PrevState = CurrState;
393 IntelExprState CurrState = State;
402 IC.pushOperator(IC_PLUS);
403 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
404 // If we already have a BaseReg, then assume this is the IndexReg with
409 assert (!IndexReg && "BaseReg/IndexReg already set!");
416 PrevState = CurrState;
419 IntelExprState CurrState = State;
435 // Only push the minus operator if it is not a unary operator.
436 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
437 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
438 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
439 IC.pushOperator(IC_MINUS);
440 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
441 // If we already have a BaseReg, then assume this is the IndexReg with
446 assert (!IndexReg && "BaseReg/IndexReg already set!");
453 PrevState = CurrState;
456 IntelExprState CurrState = State;
466 PrevState = CurrState;
468 void onRegister(unsigned Reg) {
469 IntelExprState CurrState = State;
476 State = IES_REGISTER;
478 IC.pushOperand(IC_REGISTER);
481 // Index Register - Scale * Register
482 if (PrevState == IES_INTEGER) {
483 assert (!IndexReg && "IndexReg already set!");
484 State = IES_REGISTER;
486 // Get the scale and replace the 'Scale * Register' with '0'.
487 Scale = IC.popOperand();
488 IC.pushOperand(IC_IMM);
495 PrevState = CurrState;
497 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
508 SymName = SymRefName;
509 IC.pushOperand(IC_IMM);
513 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
514 IntelExprState CurrState = State;
531 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
532 // Index Register - Register * Scale
533 assert (!IndexReg && "IndexReg already set!");
536 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
537 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
540 // Get the scale and replace the 'Register * Scale' with '0'.
542 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
543 PrevState == IES_OR || PrevState == IES_AND ||
544 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
545 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
546 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
547 PrevState == IES_NOT || PrevState == IES_XOR) &&
548 CurrState == IES_MINUS) {
549 // Unary minus. No need to pop the minus operand because it was never
551 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
552 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
553 PrevState == IES_OR || PrevState == IES_AND ||
554 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
555 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
556 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
557 PrevState == IES_NOT || PrevState == IES_XOR) &&
558 CurrState == IES_NOT) {
559 // Unary not. No need to pop the not operand because it was never
561 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
563 IC.pushOperand(IC_IMM, TmpInt);
567 PrevState = CurrState;
579 State = IES_MULTIPLY;
580 IC.pushOperator(IC_MULTIPLY);
593 IC.pushOperator(IC_DIVIDE);
605 IC.pushOperator(IC_PLUS);
610 IntelExprState CurrState = State;
619 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
620 // If we already have a BaseReg, then assume this is the IndexReg with
625 assert (!IndexReg && "BaseReg/IndexReg already set!");
632 PrevState = CurrState;
635 IntelExprState CurrState = State;
651 // FIXME: We don't handle this type of unary minus or not, yet.
652 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
653 PrevState == IES_OR || PrevState == IES_AND ||
654 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
655 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
656 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
657 PrevState == IES_NOT || PrevState == IES_XOR) &&
658 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
663 IC.pushOperator(IC_LPAREN);
666 PrevState = CurrState;
678 IC.pushOperator(IC_RPAREN);
684 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
685 bool MatchingInlineAsm = false) {
686 MCAsmParser &Parser = getParser();
687 if (MatchingInlineAsm) {
688 if (!getLexer().isAtStartOfStatement())
689 Parser.eatToEndOfStatement();
692 return Parser.Error(L, Msg, Range);
695 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
700 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
701 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
702 bool IsSIReg(unsigned Reg);
703 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
705 AddDefaultSrcDestOperands(OperandVector &Operands,
706 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
707 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
708 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
709 OperandVector &FinalOperands);
710 std::unique_ptr<X86Operand> ParseOperand();
711 std::unique_ptr<X86Operand> ParseATTOperand();
712 std::unique_ptr<X86Operand> ParseIntelOperand();
713 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
714 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
715 unsigned IdentifyIntelOperator(StringRef Name);
716 unsigned ParseIntelOperator(unsigned OpKind);
717 std::unique_ptr<X86Operand>
718 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
719 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
720 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
721 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
722 std::unique_ptr<X86Operand>
723 ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
724 bool isSymbol, unsigned Size);
725 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
726 InlineAsmIdentifierInfo &Info,
727 bool IsUnevaluatedOperand, SMLoc &End);
729 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
731 std::unique_ptr<X86Operand>
732 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
733 unsigned IndexReg, unsigned Scale, SMLoc Start,
734 SMLoc End, unsigned Size, StringRef Identifier,
735 InlineAsmIdentifierInfo &Info,
736 bool AllowBetterSizeMatch = false);
738 bool parseDirectiveEven(SMLoc L);
739 bool ParseDirectiveWord(unsigned Size, SMLoc L);
740 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
742 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
744 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
745 /// instrumentation around Inst.
746 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
748 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
749 OperandVector &Operands, MCStreamer &Out,
751 bool MatchingInlineAsm) override;
753 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
754 MCStreamer &Out, bool MatchingInlineAsm);
756 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
757 bool MatchingInlineAsm);
759 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
760 OperandVector &Operands, MCStreamer &Out,
762 bool MatchingInlineAsm);
764 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
765 OperandVector &Operands, MCStreamer &Out,
767 bool MatchingInlineAsm);
769 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
771 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
772 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
773 /// return false if no parsing errors occurred, true otherwise.
774 bool HandleAVX512Operand(OperandVector &Operands,
775 const MCParsedAsmOperand &Op);
777 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
779 /// MS-compatibility:
780 /// Obtain an appropriate size qualifier, when facing its absence,
781 /// upon AVX512 vector/broadcast memory operand
782 unsigned AdjustAVX512Mem(unsigned Size, X86Operand* UnsizedMemOpNext);
784 bool is64BitMode() const {
785 // FIXME: Can tablegen auto-generate this?
786 return getSTI().getFeatureBits()[X86::Mode64Bit];
788 bool is32BitMode() const {
789 // FIXME: Can tablegen auto-generate this?
790 return getSTI().getFeatureBits()[X86::Mode32Bit];
792 bool is16BitMode() const {
793 // FIXME: Can tablegen auto-generate this?
794 return getSTI().getFeatureBits()[X86::Mode16Bit];
796 void SwitchMode(unsigned mode) {
797 MCSubtargetInfo &STI = copySTI();
798 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
799 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
800 unsigned FB = ComputeAvailableFeatures(
801 STI.ToggleFeature(OldMode.flip(mode)));
802 setAvailableFeatures(FB);
804 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
807 unsigned getPointerWidth() {
808 if (is16BitMode()) return 16;
809 if (is32BitMode()) return 32;
810 if (is64BitMode()) return 64;
811 llvm_unreachable("invalid mode");
814 bool isParsingIntelSyntax() {
815 return getParser().getAssemblerDialect();
818 /// @name Auto-generated Matcher Functions
821 #define GET_ASSEMBLER_HEADER
822 #include "X86GenAsmMatcher.inc"
828 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
829 const MCInstrInfo &mii, const MCTargetOptions &Options)
830 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
833 // Initialize the set of available features.
834 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
835 Instrumentation.reset(
836 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
839 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
841 void SetFrameRegister(unsigned RegNo) override;
843 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
844 SMLoc NameLoc, OperandVector &Operands) override;
846 bool ParseDirective(AsmToken DirectiveID) override;
848 } // end anonymous namespace
850 /// @name Auto-generated Match Functions
853 static unsigned MatchRegisterName(StringRef Name);
857 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
859 // If we have both a base register and an index register make sure they are
860 // both 64-bit or 32-bit registers.
861 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
863 if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
864 ErrMsg = "invalid base+index expression";
867 if (BaseReg != 0 && IndexReg != 0) {
868 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
869 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
870 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
871 IndexReg != X86::RIZ) {
872 ErrMsg = "base register is 64-bit, but index register is not";
875 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
876 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
877 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
878 IndexReg != X86::EIZ){
879 ErrMsg = "base register is 32-bit, but index register is not";
882 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
883 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
884 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
885 ErrMsg = "base register is 16-bit, but index register is not";
888 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
889 IndexReg != X86::SI && IndexReg != X86::DI) ||
890 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
891 IndexReg != X86::BX && IndexReg != X86::BP)) {
892 ErrMsg = "invalid 16-bit base/index register combination";
900 bool X86AsmParser::ParseRegister(unsigned &RegNo,
901 SMLoc &StartLoc, SMLoc &EndLoc) {
902 MCAsmParser &Parser = getParser();
904 const AsmToken &PercentTok = Parser.getTok();
905 StartLoc = PercentTok.getLoc();
907 // If we encounter a %, ignore it. This code handles registers with and
908 // without the prefix, unprefixed registers can occur in cfi directives.
909 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
910 Parser.Lex(); // Eat percent token.
912 const AsmToken &Tok = Parser.getTok();
913 EndLoc = Tok.getEndLoc();
915 if (Tok.isNot(AsmToken::Identifier)) {
916 if (isParsingIntelSyntax()) return true;
917 return Error(StartLoc, "invalid register name",
918 SMRange(StartLoc, EndLoc));
921 RegNo = MatchRegisterName(Tok.getString());
923 // If the match failed, try the register name as lowercase.
925 RegNo = MatchRegisterName(Tok.getString().lower());
927 // The "flags" register cannot be referenced directly.
928 // Treat it as an identifier instead.
929 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
932 if (!is64BitMode()) {
933 // FIXME: This should be done using Requires<Not64BitMode> and
934 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
936 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
938 if (RegNo == X86::RIZ ||
939 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
940 X86II::isX86_64NonExtLowByteReg(RegNo) ||
941 X86II::isX86_64ExtendedReg(RegNo))
942 return Error(StartLoc, "register %"
943 + Tok.getString() + " is only available in 64-bit mode",
944 SMRange(StartLoc, EndLoc));
945 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
946 if (X86II::is32ExtendedReg(RegNo))
947 return Error(StartLoc, "register %"
948 + Tok.getString() + " is only available with AVX512",
949 SMRange(StartLoc, EndLoc));
952 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
953 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
955 Parser.Lex(); // Eat 'st'
957 // Check to see if we have '(4)' after %st.
958 if (getLexer().isNot(AsmToken::LParen))
963 const AsmToken &IntTok = Parser.getTok();
964 if (IntTok.isNot(AsmToken::Integer))
965 return Error(IntTok.getLoc(), "expected stack index");
966 switch (IntTok.getIntVal()) {
967 case 0: RegNo = X86::ST0; break;
968 case 1: RegNo = X86::ST1; break;
969 case 2: RegNo = X86::ST2; break;
970 case 3: RegNo = X86::ST3; break;
971 case 4: RegNo = X86::ST4; break;
972 case 5: RegNo = X86::ST5; break;
973 case 6: RegNo = X86::ST6; break;
974 case 7: RegNo = X86::ST7; break;
975 default: return Error(IntTok.getLoc(), "invalid stack index");
978 if (getParser().Lex().isNot(AsmToken::RParen))
979 return Error(Parser.getTok().getLoc(), "expected ')'");
981 EndLoc = Parser.getTok().getEndLoc();
982 Parser.Lex(); // Eat ')'
986 EndLoc = Parser.getTok().getEndLoc();
988 // If this is "db[0-7]", match it as an alias
990 if (RegNo == 0 && Tok.getString().size() == 3 &&
991 Tok.getString().startswith("db")) {
992 switch (Tok.getString()[2]) {
993 case '0': RegNo = X86::DR0; break;
994 case '1': RegNo = X86::DR1; break;
995 case '2': RegNo = X86::DR2; break;
996 case '3': RegNo = X86::DR3; break;
997 case '4': RegNo = X86::DR4; break;
998 case '5': RegNo = X86::DR5; break;
999 case '6': RegNo = X86::DR6; break;
1000 case '7': RegNo = X86::DR7; break;
1004 EndLoc = Parser.getTok().getEndLoc();
1005 Parser.Lex(); // Eat it.
1011 if (isParsingIntelSyntax()) return true;
1012 return Error(StartLoc, "invalid register name",
1013 SMRange(StartLoc, EndLoc));
1016 Parser.Lex(); // Eat identifier token.
1020 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1021 Instrumentation->SetInitialFrameRegister(RegNo);
1024 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1025 bool Parse32 = is32BitMode() || Code16GCC;
1026 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1027 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1028 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1029 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1033 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1034 bool Parse32 = is32BitMode() || Code16GCC;
1035 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1036 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1037 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1038 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1042 bool X86AsmParser::IsSIReg(unsigned Reg) {
1044 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1056 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1058 switch (RegClassID) {
1059 default: llvm_unreachable("Unexpected register class");
1060 case X86::GR64RegClassID:
1061 return IsSIReg ? X86::RSI : X86::RDI;
1062 case X86::GR32RegClassID:
1063 return IsSIReg ? X86::ESI : X86::EDI;
1064 case X86::GR16RegClassID:
1065 return IsSIReg ? X86::SI : X86::DI;
1069 void X86AsmParser::AddDefaultSrcDestOperands(
1070 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1071 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1072 if (isParsingIntelSyntax()) {
1073 Operands.push_back(std::move(Dst));
1074 Operands.push_back(std::move(Src));
1077 Operands.push_back(std::move(Src));
1078 Operands.push_back(std::move(Dst));
1082 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1083 OperandVector &FinalOperands) {
1085 if (OrigOperands.size() > 1) {
1086 // Check if sizes match, OrigOperands also contains the instruction name
1087 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1088 "Operand size mismatch");
1090 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1091 // Verify types match
1092 int RegClassID = -1;
1093 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1094 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1095 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1097 if (FinalOp.isReg() &&
1098 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1099 // Return false and let a normal complaint about bogus operands happen
1102 if (FinalOp.isMem()) {
1104 if (!OrigOp.isMem())
1105 // Return false and let a normal complaint about bogus operands happen
1108 unsigned OrigReg = OrigOp.Mem.BaseReg;
1109 unsigned FinalReg = FinalOp.Mem.BaseReg;
1111 // If we've already encounterd a register class, make sure all register
1112 // bases are of the same register class
1113 if (RegClassID != -1 &&
1114 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1115 return Error(OrigOp.getStartLoc(),
1116 "mismatching source and destination index registers");
1119 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1120 RegClassID = X86::GR64RegClassID;
1121 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1122 RegClassID = X86::GR32RegClassID;
1123 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1124 RegClassID = X86::GR16RegClassID;
1126 // Unexpected register class type
1127 // Return false and let a normal complaint about bogus operands happen
1130 bool IsSI = IsSIReg(FinalReg);
1131 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1133 if (FinalReg != OrigReg) {
1134 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1135 Warnings.push_back(std::make_pair(
1136 OrigOp.getStartLoc(),
1137 "memory operand is only for determining the size, " + RegName +
1138 " will be used for the location"));
1141 FinalOp.Mem.Size = OrigOp.Mem.Size;
1142 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1143 FinalOp.Mem.BaseReg = FinalReg;
1147 // Produce warnings only if all the operands passed the adjustment - prevent
1148 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1149 for (auto &WarningMsg : Warnings) {
1150 Warning(WarningMsg.first, WarningMsg.second);
1153 // Remove old operands
1154 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1155 OrigOperands.pop_back();
1157 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1158 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1159 OrigOperands.push_back(std::move(FinalOperands[i]));
1164 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1165 if (isParsingIntelSyntax())
1166 return ParseIntelOperand();
1167 return ParseATTOperand();
1170 /// getIntelMemOperandSize - Return intel memory operand size.
1171 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1172 unsigned Size = StringSwitch<unsigned>(OpStr)
1173 .Cases("BYTE", "byte", 8)
1174 .Cases("WORD", "word", 16)
1175 .Cases("DWORD", "dword", 32)
1176 .Cases("FWORD", "fword", 48)
1177 .Cases("QWORD", "qword", 64)
1178 .Cases("MMWORD","mmword", 64)
1179 .Cases("XWORD", "xword", 80)
1180 .Cases("TBYTE", "tbyte", 80)
1181 .Cases("XMMWORD", "xmmword", 128)
1182 .Cases("YMMWORD", "ymmword", 256)
1183 .Cases("ZMMWORD", "zmmword", 512)
1184 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1189 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1190 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1191 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1192 InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) {
1193 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1194 // some other label reference.
1195 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1196 // Insert an explicit size if the user didn't have one.
1198 Size = getPointerWidth();
1199 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1203 // Create an absolute memory reference in order to match against
1204 // instructions taking a PC relative operand.
1205 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1206 Identifier, Info.OpDecl);
1209 // We either have a direct symbol reference, or an offset from a symbol. The
1210 // parser always puts the symbol on the LHS, so look there for size
1211 // calculation purposes.
1212 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1214 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1217 Size = Info.Type * 8; // Size is in terms of bits in this context.
1219 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1221 if (AllowBetterSizeMatch)
1222 // Handle cases where size qualifier is absent, upon an indirect symbol
1223 // reference - e.g. "vaddps zmm1, zmm2, [var]"
1224 // set Size to zero to allow matching mechansim to try and find a better
1225 // size qualifier than our initial guess, based on available variants of
1226 // the given instruction
1231 // When parsing inline assembly we set the base register to a non-zero value
1232 // if we don't know the actual value at this time. This is necessary to
1233 // get the matching correct in some cases.
1234 BaseReg = BaseReg ? BaseReg : 1;
1235 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1236 IndexReg, Scale, Start, End, Size, Identifier,
1241 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1242 StringRef SymName, int64_t ImmDisp,
1243 int64_t FinalImmDisp, SMLoc &BracLoc,
1244 SMLoc &StartInBrac, SMLoc &End) {
1245 // Remove the '[' and ']' from the IR string.
1246 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1247 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1249 // If ImmDisp is non-zero, then we parsed a displacement before the
1250 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1251 // If ImmDisp doesn't match the displacement computed by the state machine
1252 // then we have an additional displacement in the bracketed expression.
1253 if (ImmDisp != FinalImmDisp) {
1255 // We have an immediate displacement before the bracketed expression.
1256 // Adjust this to match the final immediate displacement.
1258 for (AsmRewrite &AR : AsmRewrites) {
1259 if (AR.Loc.getPointer() > BracLoc.getPointer())
1261 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1262 assert (!Found && "ImmDisp already rewritten.");
1264 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1265 AR.Val = FinalImmDisp;
1270 assert (Found && "Unable to rewrite ImmDisp.");
1273 // We have a symbolic and an immediate displacement, but no displacement
1274 // before the bracketed expression. Put the immediate displacement
1275 // before the bracketed expression.
1276 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1279 // Remove all the ImmPrefix rewrites within the brackets.
1280 // We may have some Imm rewrties as a result of an operator applying,
1281 // remove them as well
1282 for (AsmRewrite &AR : AsmRewrites) {
1283 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1285 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm)
1286 AR.Kind = AOK_Delete;
1288 const char *SymLocPtr = SymName.data();
1289 // Skip everything before the symbol.
1290 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1291 assert(Len > 0 && "Expected a non-negative length.");
1292 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1294 // Skip everything after the symbol.
1295 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1296 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1297 assert(Len > 0 && "Expected a non-negative length.");
1298 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1302 // Some binary bitwise operators have a named synonymous
1303 // Query a candidate string for being such a named operator
1304 // and if so - invoke the appropriate handler
1305 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1306 // A named operator should be either lower or upper case, but not a mix
1307 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1309 if (Name.equals_lower("not"))
1311 else if (Name.equals_lower("or"))
1313 else if (Name.equals_lower("shl"))
1315 else if (Name.equals_lower("shr"))
1317 else if (Name.equals_lower("xor"))
1319 else if (Name.equals_lower("and"))
1326 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1327 MCAsmParser &Parser = getParser();
1328 const AsmToken &Tok = Parser.getTok();
1330 AsmToken::TokenKind PrevTK = AsmToken::Error;
1333 bool UpdateLocLex = true;
1335 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1336 // identifier. Don't try an parse it as a register.
1337 if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
1340 // If we're parsing an immediate expression, we don't expect a '['.
1341 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1344 AsmToken::TokenKind TK = getLexer().getKind();
1347 if (SM.isValidEndState()) {
1351 return Error(Tok.getLoc(), "unknown token in expression");
1353 case AsmToken::EndOfStatement: {
1357 case AsmToken::String:
1358 case AsmToken::Identifier: {
1359 // This could be a register or a symbolic displacement.
1362 SMLoc IdentLoc = Tok.getLoc();
1363 StringRef Identifier = Tok.getString();
1364 UpdateLocLex = false;
1365 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1366 SM.onRegister(TmpReg);
1367 } else if (ParseIntelNamedOperator(Identifier, SM)) {
1368 UpdateLocLex = true;
1369 } else if (!isParsingInlineAsm()) {
1370 if (getParser().parsePrimaryExpr(Val, End))
1371 return Error(Tok.getLoc(), "Unexpected identifier!");
1372 SM.onIdentifierExpr(Val, Identifier);
1373 } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) {
1374 if (OpKind == IOK_OFFSET)
1375 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1376 "a compound immediate expression is yet to be supported");
1377 int64_t Val = ParseIntelOperator(OpKind);
1381 if (SM.onInteger(Val, ErrMsg))
1382 return Error(IdentLoc, ErrMsg);
1383 } else if (Identifier.find('.') != StringRef::npos &&
1384 PrevTK == AsmToken::RBrac) {
1387 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1388 if (ParseIntelIdentifier(Val, Identifier, Info,
1389 /*Unevaluated=*/false, End))
1391 SM.onIdentifierExpr(Val, Identifier);
1395 case AsmToken::Integer: {
1397 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1398 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1399 // Look for 'b' or 'f' following an Integer as a directional label
1400 SMLoc Loc = getTok().getLoc();
1401 int64_t IntVal = getTok().getIntVal();
1402 End = consumeToken();
1403 UpdateLocLex = false;
1404 if (getLexer().getKind() == AsmToken::Identifier) {
1405 StringRef IDVal = getTok().getString();
1406 if (IDVal == "f" || IDVal == "b") {
1408 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1409 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1411 MCSymbolRefExpr::create(Sym, Variant, getContext());
1412 if (IDVal == "b" && Sym->isUndefined())
1413 return Error(Loc, "invalid reference to undefined symbol");
1414 StringRef Identifier = Sym->getName();
1415 SM.onIdentifierExpr(Val, Identifier);
1416 End = consumeToken();
1418 if (SM.onInteger(IntVal, ErrMsg))
1419 return Error(Loc, ErrMsg);
1422 if (SM.onInteger(IntVal, ErrMsg))
1423 return Error(Loc, ErrMsg);
1427 case AsmToken::Plus: SM.onPlus(); break;
1428 case AsmToken::Minus: SM.onMinus(); break;
1429 case AsmToken::Tilde: SM.onNot(); break;
1430 case AsmToken::Star: SM.onStar(); break;
1431 case AsmToken::Slash: SM.onDivide(); break;
1432 case AsmToken::Pipe: SM.onOr(); break;
1433 case AsmToken::Caret: SM.onXor(); break;
1434 case AsmToken::Amp: SM.onAnd(); break;
1435 case AsmToken::LessLess:
1436 SM.onLShift(); break;
1437 case AsmToken::GreaterGreater:
1438 SM.onRShift(); break;
1439 case AsmToken::LBrac: SM.onLBrac(); break;
1440 case AsmToken::RBrac: SM.onRBrac(); break;
1441 case AsmToken::LParen: SM.onLParen(); break;
1442 case AsmToken::RParen: SM.onRParen(); break;
1445 return Error(Tok.getLoc(), "unknown token in expression");
1447 if (!Done && UpdateLocLex)
1448 End = consumeToken();
1455 std::unique_ptr<X86Operand>
1456 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1457 int64_t ImmDisp, bool isSymbol,
1459 MCAsmParser &Parser = getParser();
1460 const AsmToken &Tok = Parser.getTok();
1461 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1462 if (getLexer().isNot(AsmToken::LBrac))
1463 return ErrorOperand(BracLoc, "Expected '[' token!");
1464 Parser.Lex(); // Eat '['
1466 SMLoc StartInBrac = Parser.getTok().getLoc();
1467 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1468 // may have already parsed an immediate displacement before the bracketed
1470 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1471 if (ParseIntelExpression(SM, End))
1474 const MCExpr *Disp = nullptr;
1475 if (const MCExpr *Sym = SM.getSym()) {
1476 // A symbolic displacement.
1478 if (isParsingInlineAsm())
1479 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1480 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1484 if (SM.getImm() || !Disp) {
1485 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1487 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1489 Disp = Imm; // An immediate displacement only.
1492 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1493 // will in fact do global lookup the field name inside all global typedefs,
1494 // but we don't emulate that.
1495 if ((Parser.getTok().getKind() == AsmToken::Identifier ||
1496 Parser.getTok().getKind() == AsmToken::Dot ||
1497 Parser.getTok().getKind() == AsmToken::Real) &&
1498 Parser.getTok().getString().find('.') != StringRef::npos) {
1499 const MCExpr *NewDisp;
1500 if (ParseIntelDotOperator(Disp, NewDisp))
1503 End = Tok.getEndLoc();
1504 Parser.Lex(); // Eat the field.
1510 Error(Start, "cannot use more than one symbol in memory operand");
1513 if (SM.getBaseReg()) {
1514 Error(Start, "cannot use base register with variable reference");
1517 if (SM.getIndexReg()) {
1518 Error(Start, "cannot use index register with variable reference");
1523 int BaseReg = SM.getBaseReg();
1524 int IndexReg = SM.getIndexReg();
1525 int Scale = SM.getScale();
1526 if (!isParsingInlineAsm()) {
1528 if (!BaseReg && !IndexReg) {
1530 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1531 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1535 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1536 Error(StartInBrac, ErrMsg);
1539 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1540 IndexReg, Scale, Start, End, Size);
1543 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1544 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1545 End, Size, SM.getSymName(), Info,
1546 isParsingInlineAsm());
1549 // Inline assembly may use variable names with namespace alias qualifiers.
1550 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1551 StringRef &Identifier,
1552 InlineAsmIdentifierInfo &Info,
1553 bool IsUnevaluatedOperand, SMLoc &End) {
1554 MCAsmParser &Parser = getParser();
1555 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1558 StringRef LineBuf(Identifier.data());
1560 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1562 const AsmToken &Tok = Parser.getTok();
1563 SMLoc Loc = Tok.getLoc();
1565 // Advance the token stream until the end of the current token is
1566 // after the end of what the frontend claimed.
1567 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1569 End = Tok.getEndLoc();
1571 } while (End.getPointer() < EndPtr);
1572 Identifier = LineBuf;
1574 // The frontend should end parsing on an assembler token boundary, unless it
1576 assert((End.getPointer() == EndPtr || !Result) &&
1577 "frontend claimed part of a token?");
1579 // If the identifier lookup was unsuccessful, assume that we are dealing with
1582 StringRef InternalName =
1583 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1585 assert(InternalName.size() && "We should have an internal name here.");
1586 // Push a rewrite for replacing the identifier name with the internal name.
1587 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1591 // Create the symbol reference.
1592 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1593 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1594 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1598 /// \brief Parse intel style segment override.
1599 std::unique_ptr<X86Operand>
1600 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1602 MCAsmParser &Parser = getParser();
1603 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1604 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1605 if (Tok.isNot(AsmToken::Colon))
1606 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1607 Parser.Lex(); // Eat ':'
1609 int64_t ImmDisp = 0;
1610 if (getLexer().is(AsmToken::Integer)) {
1611 ImmDisp = Tok.getIntVal();
1612 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1614 if (isParsingInlineAsm())
1615 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1617 if (getLexer().isNot(AsmToken::LBrac)) {
1618 // An immediate following a 'segment register', 'colon' token sequence can
1619 // be followed by a bracketed expression. If it isn't we know we have our
1620 // final segment override.
1621 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1622 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1623 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1624 Start, ImmDispToken.getEndLoc(), Size);
1628 if (getLexer().is(AsmToken::LBrac))
1629 return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
1633 if (!isParsingInlineAsm()) {
1634 if (getParser().parsePrimaryExpr(Val, End))
1635 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1637 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1640 InlineAsmIdentifierInfo Info;
1641 StringRef Identifier = Tok.getString();
1642 if (ParseIntelIdentifier(Val, Identifier, Info,
1643 /*Unevaluated=*/false, End))
1645 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1646 /*Scale=*/1, Start, End, Size, Identifier, Info);
1649 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1650 std::unique_ptr<X86Operand>
1651 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1652 MCAsmParser &Parser = getParser();
1653 const AsmToken &Tok = Parser.getTok();
1654 // Eat "{" and mark the current place.
1655 const SMLoc consumedToken = consumeToken();
1656 if (Tok.getIdentifier().startswith("r")){
1657 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1658 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1659 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1660 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1661 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1664 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1665 Parser.Lex(); // Eat "r*" of r*-sae
1666 if (!getLexer().is(AsmToken::Minus))
1667 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1668 Parser.Lex(); // Eat "-"
1669 Parser.Lex(); // Eat the sae
1670 if (!getLexer().is(AsmToken::RCurly))
1671 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1672 Parser.Lex(); // Eat "}"
1673 const MCExpr *RndModeOp =
1674 MCConstantExpr::create(rndMode, Parser.getContext());
1675 return X86Operand::CreateImm(RndModeOp, Start, End);
1677 if(Tok.getIdentifier().equals("sae")){
1678 Parser.Lex(); // Eat the sae
1679 if (!getLexer().is(AsmToken::RCurly))
1680 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1681 Parser.Lex(); // Eat "}"
1682 return X86Operand::CreateToken("{sae}", consumedToken);
1684 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1687 /// Parse the '.' operator.
1688 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1689 const MCExpr *&NewDisp) {
1690 MCAsmParser &Parser = getParser();
1691 const AsmToken &Tok = Parser.getTok();
1692 int64_t OrigDispVal, DotDispVal;
1694 // FIXME: Handle non-constant expressions.
1695 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1696 OrigDispVal = OrigDisp->getValue();
1698 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1700 // Drop the optional '.'.
1701 StringRef DotDispStr = Tok.getString();
1702 if (DotDispStr.startswith("."))
1703 DotDispStr = DotDispStr.drop_front(1);
1705 // .Imm gets lexed as a real.
1706 if (Tok.is(AsmToken::Real)) {
1708 DotDispStr.getAsInteger(10, DotDisp);
1709 DotDispVal = DotDisp.getZExtValue();
1710 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1712 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1713 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1715 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1716 DotDispVal = DotDisp;
1718 return Error(Tok.getLoc(), "Unexpected token type!");
1720 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1721 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1722 unsigned Len = DotDispStr.size();
1723 unsigned Val = OrigDispVal + DotDispVal;
1724 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
1727 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1731 /// Parse the 'offset' operator. This operator is used to specify the
1732 /// location rather then the content of a variable.
1733 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1734 MCAsmParser &Parser = getParser();
1735 const AsmToken &Tok = Parser.getTok();
1736 SMLoc OffsetOfLoc = Tok.getLoc();
1737 Parser.Lex(); // Eat offset.
1740 InlineAsmIdentifierInfo Info;
1741 SMLoc Start = Tok.getLoc(), End;
1742 StringRef Identifier = Tok.getString();
1743 if (ParseIntelIdentifier(Val, Identifier, Info,
1744 /*Unevaluated=*/false, End))
1747 // Don't emit the offset operator.
1748 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1750 // The offset operator will have an 'r' constraint, thus we need to create
1751 // register operand to ensure proper matching. Just pick a GPR based on
1752 // the size of a pointer.
1753 bool Parse32 = is32BitMode() || Code16GCC;
1754 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1756 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1757 OffsetOfLoc, Identifier, Info.OpDecl);
1760 // Query a candidate string for being an Intel assembly operator
1761 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
1762 unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) {
1763 return StringSwitch<unsigned>(Name)
1764 .Cases("TYPE","type",IOK_TYPE)
1765 .Cases("SIZE","size",IOK_SIZE)
1766 .Cases("LENGTH","length",IOK_LENGTH)
1767 .Cases("OFFSET","offset",IOK_OFFSET)
1768 .Default(IOK_INVALID);
1771 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1772 /// returns the number of elements in an array. It returns the value 1 for
1773 /// non-array variables. The SIZE operator returns the size of a C or C++
1774 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1775 /// TYPE operator returns the size of a C or C++ type or variable. If the
1776 /// variable is an array, TYPE returns the size of a single element.
1777 unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1778 MCAsmParser &Parser = getParser();
1779 const AsmToken &Tok = Parser.getTok();
1780 SMLoc TypeLoc = Tok.getLoc();
1781 Parser.Lex(); // Eat operator.
1783 const MCExpr *Val = nullptr;
1784 InlineAsmIdentifierInfo Info;
1785 SMLoc Start = Tok.getLoc(), End;
1786 StringRef Identifier = Tok.getString();
1787 if (ParseIntelIdentifier(Val, Identifier, Info,
1788 /*Unevaluated=*/true, End))
1792 Error(Start, "unable to lookup expression");
1798 default: llvm_unreachable("Unexpected operand kind!");
1799 case IOK_LENGTH: CVal = Info.Length; break;
1800 case IOK_SIZE: CVal = Info.Size; break;
1801 case IOK_TYPE: CVal = Info.Type; break;
1804 // Rewrite the type operator and the C or C++ type or variable in terms of an
1805 // immediate. E.g. TYPE foo -> $$4
1806 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1807 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1812 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1813 MCAsmParser &Parser = getParser();
1814 const AsmToken &Tok = Parser.getTok();
1817 // FIXME: Offset operator
1818 // Should be handled as part of immediate expression, as other operators
1819 // Currently, only supported as a stand-alone operand
1820 if (isParsingInlineAsm())
1821 if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET)
1822 return ParseIntelOffsetOfOperator();
1824 bool PtrInOperand = false;
1825 unsigned Size = getIntelMemOperandSize(Tok.getString());
1827 Parser.Lex(); // Eat operand size (e.g., byte, word).
1828 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1829 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1830 Parser.Lex(); // Eat ptr.
1831 PtrInOperand = true;
1834 Start = Tok.getLoc();
1836 // rounding mode token
1837 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1838 getLexer().is(AsmToken::LCurly))
1839 return ParseRoundingModeOp(Start, End);
1843 if (getLexer().is(AsmToken::Identifier) &&
1844 !ParseRegister(RegNo, Start, End)) {
1845 // If this is a segment register followed by a ':', then this is the start
1846 // of a segment override, otherwise this is a normal register reference.
1847 // In case it is a normal register and there is ptr in the operand this
1849 if (RegNo == X86::RIP)
1850 return ErrorOperand(Start, "rip can only be used as a base register");
1851 if (getLexer().isNot(AsmToken::Colon)) {
1853 return ErrorOperand(Start, "expected memory operand after "
1854 "'ptr', found register operand instead");
1856 return X86Operand::CreateReg(RegNo, Start, End);
1858 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1861 // Immediates and Memory
1863 // Parse [ BaseReg + Scale*IndexReg + Disp ].
1864 if (getLexer().is(AsmToken::LBrac))
1865 return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
1868 AsmToken StartTok = Tok;
1869 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1870 /*AddImmPrefix=*/false);
1871 if (ParseIntelExpression(SM, End))
1874 bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
1875 int64_t Imm = SM.getImm();
1876 if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
1877 SM.getSym()->evaluateAsAbsolute(Imm);
1879 if (StartTok.isNot(AsmToken::Identifier) &&
1880 StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
1881 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1882 if (StartTok.getString().size() == Len)
1883 // Just add a prefix if this wasn't a complex immediate expression.
1884 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1886 // Otherwise, rewrite the complex expression as a single immediate.
1887 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1890 if (getLexer().isNot(AsmToken::LBrac)) {
1891 // If a directional label (ie. 1f or 2b) was parsed above from
1892 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1893 // to the MCExpr with the directional local symbol and this is a
1894 // memory operand not an immediate operand.
1896 if (isParsingInlineAsm())
1897 return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
1899 /*Scale=*/1, Start, End, Size,
1900 SM.getSymName(), SM.getIdentifierInfo());
1901 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1905 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1906 return X86Operand::CreateImm(ImmExpr, Start, End);
1909 // Only positive immediates are valid.
1911 return ErrorOperand(Start, "expected a positive immediate displacement "
1912 "before bracketed expr.");
1914 return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
1917 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1918 MCAsmParser &Parser = getParser();
1919 switch (getLexer().getKind()) {
1921 // Parse a memory operand with no segment register.
1922 return ParseMemOperand(0, Parser.getTok().getLoc());
1923 case AsmToken::Percent: {
1924 // Read the register.
1927 if (ParseRegister(RegNo, Start, End)) return nullptr;
1928 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1929 Error(Start, "%eiz and %riz can only be used as index registers",
1930 SMRange(Start, End));
1933 if (RegNo == X86::RIP) {
1934 Error(Start, "%rip can only be used as a base register",
1935 SMRange(Start, End));
1939 // If this is a segment register followed by a ':', then this is the start
1940 // of a memory reference, otherwise this is a normal register reference.
1941 if (getLexer().isNot(AsmToken::Colon))
1942 return X86Operand::CreateReg(RegNo, Start, End);
1944 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1945 return ErrorOperand(Start, "invalid segment register");
1947 getParser().Lex(); // Eat the colon.
1948 return ParseMemOperand(RegNo, Start);
1950 case AsmToken::Dollar: {
1951 // $42 -> immediate.
1952 SMLoc Start = Parser.getTok().getLoc(), End;
1955 if (getParser().parseExpression(Val, End))
1957 return X86Operand::CreateImm(Val, Start, End);
1959 case AsmToken::LCurly:{
1960 SMLoc Start = Parser.getTok().getLoc(), End;
1961 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1962 return ParseRoundingModeOp(Start, End);
1963 return ErrorOperand(Start, "Unexpected '{' in expression");
1968 // true on failure, false otherwise
1969 // If no {z} mark was found - Parser doesn't advance
1970 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1971 const SMLoc &StartLoc) {
1972 MCAsmParser &Parser = getParser();
1973 // Assuming we are just pass the '{' mark, quering the next token
1974 // Searched for {z}, but none was found. Return false, as no parsing error was
1976 if (!(getLexer().is(AsmToken::Identifier) &&
1977 (getLexer().getTok().getIdentifier() == "z")))
1979 Parser.Lex(); // Eat z
1980 // Query and eat the '}' mark
1981 if (!getLexer().is(AsmToken::RCurly))
1982 return Error(getLexer().getLoc(), "Expected } at this point");
1983 Parser.Lex(); // Eat '}'
1984 // Assign Z with the {z} mark opernad
1985 Z = X86Operand::CreateToken("{z}", StartLoc);
1989 // true on failure, false otherwise
1990 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1991 const MCParsedAsmOperand &Op) {
1992 MCAsmParser &Parser = getParser();
1993 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1994 if (getLexer().is(AsmToken::LCurly)) {
1995 // Eat "{" and mark the current place.
1996 const SMLoc consumedToken = consumeToken();
1997 // Distinguish {1to<NUM>} from {%k<NUM>}.
1998 if(getLexer().is(AsmToken::Integer)) {
1999 // Parse memory broadcasting ({1to<NUM>}).
2000 if (getLexer().getTok().getIntVal() != 1)
2001 return TokError("Expected 1to<NUM> at this point");
2002 Parser.Lex(); // Eat "1" of 1to8
2003 if (!getLexer().is(AsmToken::Identifier) ||
2004 !getLexer().getTok().getIdentifier().startswith("to"))
2005 return TokError("Expected 1to<NUM> at this point");
2006 // Recognize only reasonable suffixes.
2007 const char *BroadcastPrimitive =
2008 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2009 .Case("to2", "{1to2}")
2010 .Case("to4", "{1to4}")
2011 .Case("to8", "{1to8}")
2012 .Case("to16", "{1to16}")
2014 if (!BroadcastPrimitive)
2015 return TokError("Invalid memory broadcast primitive.");
2016 Parser.Lex(); // Eat "toN" of 1toN
2017 if (!getLexer().is(AsmToken::RCurly))
2018 return TokError("Expected } at this point");
2019 Parser.Lex(); // Eat "}"
2020 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2022 // No AVX512 specific primitives can pass
2023 // after memory broadcasting, so return.
2026 // Parse either {k}{z}, {z}{k}, {k} or {z}
2027 // last one have no meaning, but GCC accepts it
2028 // Currently, we're just pass a '{' mark
2029 std::unique_ptr<X86Operand> Z;
2030 if (ParseZ(Z, consumedToken))
2032 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2034 // Query for the need of further parsing for a {%k<NUM>} mark
2035 if (!Z || getLexer().is(AsmToken::LCurly)) {
2036 const SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2037 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2039 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2040 if (!getLexer().is(AsmToken::RCurly))
2041 return Error(getLexer().getLoc(), "Expected } at this point");
2042 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2043 Operands.push_back(std::move(Op));
2044 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2046 return Error(getLexer().getLoc(),
2047 "Expected an op-mask register at this point");
2048 // {%k<NUM>} mark is found, inquire for {z}
2049 if (getLexer().is(AsmToken::LCurly) && !Z) {
2050 // Have we've found a parsing error, or found no (expected) {z} mark
2051 // - report an error
2052 if (ParseZ(Z, consumeToken()) || !Z)
2056 // '{z}' on its own is meaningless, hence should be ignored.
2057 // on the contrary - have it been accompanied by a K register,
2060 Operands.push_back(std::move(Z));
2068 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2069 /// has already been parsed if present.
2070 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2073 MCAsmParser &Parser = getParser();
2074 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2075 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2076 // only way to do this without lookahead is to eat the '(' and see what is
2078 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2079 if (getLexer().isNot(AsmToken::LParen)) {
2081 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2083 // After parsing the base expression we could either have a parenthesized
2084 // memory address or not. If not, return now. If so, eat the (.
2085 if (getLexer().isNot(AsmToken::LParen)) {
2086 // Unless we have a segment register, treat this as an immediate.
2088 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2089 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2096 // Okay, we have a '('. We don't know if this is an expression or not, but
2097 // so we have to eat the ( to see beyond it.
2098 SMLoc LParenLoc = Parser.getTok().getLoc();
2099 Parser.Lex(); // Eat the '('.
2101 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2102 // Nothing to do here, fall into the code below with the '(' part of the
2103 // memory operand consumed.
2107 // It must be an parenthesized expression, parse it now.
2108 if (getParser().parseParenExpression(Disp, ExprEnd))
2111 // After parsing the base expression we could either have a parenthesized
2112 // memory address or not. If not, return now. If so, eat the (.
2113 if (getLexer().isNot(AsmToken::LParen)) {
2114 // Unless we have a segment register, treat this as an immediate.
2116 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2118 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2127 // If we reached here, then we just ate the ( of the memory operand. Process
2128 // the rest of the memory operand.
2129 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2130 SMLoc IndexLoc, BaseLoc;
2132 if (getLexer().is(AsmToken::Percent)) {
2133 SMLoc StartLoc, EndLoc;
2134 BaseLoc = Parser.getTok().getLoc();
2135 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2136 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2137 Error(StartLoc, "eiz and riz can only be used as index registers",
2138 SMRange(StartLoc, EndLoc));
2143 if (getLexer().is(AsmToken::Comma)) {
2144 Parser.Lex(); // Eat the comma.
2145 IndexLoc = Parser.getTok().getLoc();
2147 // Following the comma we should have either an index register, or a scale
2148 // value. We don't support the later form, but we want to parse it
2151 // Not that even though it would be completely consistent to support syntax
2152 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2153 if (getLexer().is(AsmToken::Percent)) {
2155 if (ParseRegister(IndexReg, L, L))
2157 if (BaseReg == X86::RIP) {
2158 Error(IndexLoc, "%rip as base register can not have an index register");
2161 if (IndexReg == X86::RIP) {
2162 Error(IndexLoc, "%rip is not allowed as an index register");
2166 if (getLexer().isNot(AsmToken::RParen)) {
2167 // Parse the scale amount:
2168 // ::= ',' [scale-expression]
2169 if (getLexer().isNot(AsmToken::Comma)) {
2170 Error(Parser.getTok().getLoc(),
2171 "expected comma in scale expression");
2174 Parser.Lex(); // Eat the comma.
2176 if (getLexer().isNot(AsmToken::RParen)) {
2177 SMLoc Loc = Parser.getTok().getLoc();
2180 if (getParser().parseAbsoluteExpression(ScaleVal)){
2181 Error(Loc, "expected scale expression");
2185 // Validate the scale amount.
2186 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2188 Error(Loc, "scale factor in 16-bit address must be 1");
2191 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2193 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2196 Scale = (unsigned)ScaleVal;
2199 } else if (getLexer().isNot(AsmToken::RParen)) {
2200 // A scale amount without an index is ignored.
2202 SMLoc Loc = Parser.getTok().getLoc();
2205 if (getParser().parseAbsoluteExpression(Value))
2209 Warning(Loc, "scale factor without index register is ignored");
2214 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2215 if (getLexer().isNot(AsmToken::RParen)) {
2216 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2219 SMLoc MemEnd = Parser.getTok().getEndLoc();
2220 Parser.Lex(); // Eat the ')'.
2222 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2223 // and then only in non-64-bit modes. Except for DX, which is a special case
2224 // because an unofficial form of in/out instructions uses it.
2225 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2226 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2227 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2228 BaseReg != X86::DX) {
2229 Error(BaseLoc, "invalid 16-bit base register");
2233 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2234 Error(IndexLoc, "16-bit memory operand may not include only index register");
2239 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2240 Error(BaseLoc, ErrMsg);
2244 if (SegReg || BaseReg || IndexReg)
2245 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2246 IndexReg, Scale, MemStart, MemEnd);
2247 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2250 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2251 SMLoc NameLoc, OperandVector &Operands) {
2252 MCAsmParser &Parser = getParser();
2254 StringRef PatchedName = Name;
2256 if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) {
2257 StringRef NextTok = Parser.getTok().getString();
2258 if (NextTok == "short") {
2260 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2261 // Eat the short keyword
2263 // MS ignores the short keyword, it determines the jmp type based
2264 // on the distance of the label
2265 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2266 NextTok.size() + 1);
2270 // FIXME: Hack to recognize setneb as setne.
2271 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2272 PatchedName != "setb" && PatchedName != "setnb")
2273 PatchedName = PatchedName.substr(0, Name.size()-1);
2275 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2276 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2277 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2278 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2279 bool IsVCMP = PatchedName[0] == 'v';
2280 unsigned CCIdx = IsVCMP ? 4 : 3;
2281 unsigned ComparisonCode = StringSwitch<unsigned>(
2282 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2284 .Case("eq_oq", 0x00)
2286 .Case("lt_os", 0x01)
2288 .Case("le_os", 0x02)
2289 .Case("unord", 0x03)
2290 .Case("unord_q", 0x03)
2292 .Case("neq_uq", 0x04)
2294 .Case("nlt_us", 0x05)
2296 .Case("nle_us", 0x06)
2298 .Case("ord_q", 0x07)
2299 /* AVX only from here */
2300 .Case("eq_uq", 0x08)
2302 .Case("nge_us", 0x09)
2304 .Case("ngt_us", 0x0A)
2305 .Case("false", 0x0B)
2306 .Case("false_oq", 0x0B)
2307 .Case("neq_oq", 0x0C)
2309 .Case("ge_os", 0x0D)
2311 .Case("gt_os", 0x0E)
2313 .Case("true_uq", 0x0F)
2314 .Case("eq_os", 0x10)
2315 .Case("lt_oq", 0x11)
2316 .Case("le_oq", 0x12)
2317 .Case("unord_s", 0x13)
2318 .Case("neq_us", 0x14)
2319 .Case("nlt_uq", 0x15)
2320 .Case("nle_uq", 0x16)
2321 .Case("ord_s", 0x17)
2322 .Case("eq_us", 0x18)
2323 .Case("nge_uq", 0x19)
2324 .Case("ngt_uq", 0x1A)
2325 .Case("false_os", 0x1B)
2326 .Case("neq_os", 0x1C)
2327 .Case("ge_oq", 0x1D)
2328 .Case("gt_oq", 0x1E)
2329 .Case("true_us", 0x1F)
2331 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2333 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2336 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2337 getParser().getContext());
2338 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2340 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2344 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2345 if (PatchedName.startswith("vpcmp") &&
2346 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2347 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2348 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2349 unsigned ComparisonCode = StringSwitch<unsigned>(
2350 PatchedName.slice(5, PatchedName.size() - CCIdx))
2351 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2354 //.Case("false", 0x3) // Not a documented alias.
2358 //.Case("true", 0x7) // Not a documented alias.
2360 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2361 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2363 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2364 getParser().getContext());
2365 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2367 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2371 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2372 if (PatchedName.startswith("vpcom") &&
2373 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2374 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2375 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2376 unsigned ComparisonCode = StringSwitch<unsigned>(
2377 PatchedName.slice(5, PatchedName.size() - CCIdx))
2387 if (ComparisonCode != ~0U) {
2388 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2390 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2391 getParser().getContext());
2392 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2394 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2398 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2400 // Determine whether this is an instruction prefix.
2402 Name == "lock" || Name == "rep" ||
2403 Name == "repe" || Name == "repz" ||
2404 Name == "repne" || Name == "repnz" ||
2405 Name == "rex64" || Name == "data16" || Name == "data32";
2407 bool CurlyAsEndOfStatement = false;
2408 // This does the actual operand parsing. Don't parse any more if we have a
2409 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2410 // just want to parse the "lock" as the first instruction and the "incl" as
2412 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2414 // Parse '*' modifier.
2415 if (getLexer().is(AsmToken::Star))
2416 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2418 // Read the operands.
2420 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2421 Operands.push_back(std::move(Op));
2422 if (HandleAVX512Operand(Operands, *Operands.back()))
2427 // check for comma and eat it
2428 if (getLexer().is(AsmToken::Comma))
2434 // In MS inline asm curly braces mark the begining/end of a block, therefore
2435 // they should be interepreted as end of statement
2436 CurlyAsEndOfStatement =
2437 isParsingIntelSyntax() && isParsingInlineAsm() &&
2438 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2439 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2440 return TokError("unexpected token in argument list");
2443 // Consume the EndOfStatement or the prefix separator Slash
2444 if (getLexer().is(AsmToken::EndOfStatement) ||
2445 (isPrefix && getLexer().is(AsmToken::Slash)))
2447 else if (CurlyAsEndOfStatement)
2448 // Add an actual EndOfStatement before the curly brace
2449 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2450 getLexer().getTok().getLoc(), 0);
2452 // This is for gas compatibility and cannot be done in td.
2453 // Adding "p" for some floating point with no argument.
2454 // For example: fsub --> fsubp
2456 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2457 if (IsFp && Operands.size() == 1) {
2458 const char *Repl = StringSwitch<const char *>(Name)
2459 .Case("fsub", "fsubp")
2460 .Case("fdiv", "fdivp")
2461 .Case("fsubr", "fsubrp")
2462 .Case("fdivr", "fdivrp");
2463 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2466 // Moving a 32 or 16 bit value into a segment register has the same
2467 // behavior. Modify such instructions to always take shorter form.
2468 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2469 (Operands.size() == 3)) {
2470 X86Operand &Op1 = (X86Operand &)*Operands[1];
2471 X86Operand &Op2 = (X86Operand &)*Operands[2];
2472 SMLoc Loc = Op1.getEndLoc();
2473 if (Op1.isReg() && Op2.isReg() &&
2474 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2476 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2477 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2478 // Change instruction name to match new instruction.
2479 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2480 Name = is16BitMode() ? "movw" : "movl";
2481 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2483 // Select the correct equivalent 16-/32-bit source register.
2485 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2486 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2490 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2491 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2492 // documented form in various unofficial manuals, so a lot of code uses it.
2493 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2494 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2495 Operands.size() == 3) {
2496 X86Operand &Op = (X86Operand &)*Operands.back();
2497 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2498 isa<MCConstantExpr>(Op.Mem.Disp) &&
2499 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2500 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2501 SMLoc Loc = Op.getEndLoc();
2502 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2505 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2506 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2507 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2508 Operands.size() == 3) {
2509 X86Operand &Op = (X86Operand &)*Operands[1];
2510 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2511 isa<MCConstantExpr>(Op.Mem.Disp) &&
2512 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2513 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2514 SMLoc Loc = Op.getEndLoc();
2515 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2519 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2520 bool HadVerifyError = false;
2522 // Append default arguments to "ins[bwld]"
2523 if (Name.startswith("ins") &&
2524 (Operands.size() == 1 || Operands.size() == 3) &&
2525 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2528 AddDefaultSrcDestOperands(TmpOperands,
2529 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2530 DefaultMemDIOperand(NameLoc));
2531 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2534 // Append default arguments to "outs[bwld]"
2535 if (Name.startswith("outs") &&
2536 (Operands.size() == 1 || Operands.size() == 3) &&
2537 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2538 Name == "outsd" || Name == "outs")) {
2539 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2540 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2541 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2544 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2545 // values of $SIREG according to the mode. It would be nice if this
2546 // could be achieved with InstAlias in the tables.
2547 if (Name.startswith("lods") &&
2548 (Operands.size() == 1 || Operands.size() == 2) &&
2549 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2550 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2551 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2552 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2555 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2556 // values of $DIREG according to the mode. It would be nice if this
2557 // could be achieved with InstAlias in the tables.
2558 if (Name.startswith("stos") &&
2559 (Operands.size() == 1 || Operands.size() == 2) &&
2560 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2561 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2562 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2563 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2566 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2567 // values of $DIREG according to the mode. It would be nice if this
2568 // could be achieved with InstAlias in the tables.
2569 if (Name.startswith("scas") &&
2570 (Operands.size() == 1 || Operands.size() == 2) &&
2571 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2572 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2573 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2574 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2577 // Add default SI and DI operands to "cmps[bwlq]".
2578 if (Name.startswith("cmps") &&
2579 (Operands.size() == 1 || Operands.size() == 3) &&
2580 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2581 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2582 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2583 DefaultMemSIOperand(NameLoc));
2584 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2587 // Add default SI and DI operands to "movs[bwlq]".
2588 if (((Name.startswith("movs") &&
2589 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2590 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2591 (Name.startswith("smov") &&
2592 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2593 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2594 (Operands.size() == 1 || Operands.size() == 3)) {
2595 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2596 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2597 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2598 DefaultMemDIOperand(NameLoc));
2599 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2602 // Check if we encountered an error for one the string insturctions
2603 if (HadVerifyError) {
2604 return HadVerifyError;
2607 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2609 if ((Name.startswith("shr") || Name.startswith("sar") ||
2610 Name.startswith("shl") || Name.startswith("sal") ||
2611 Name.startswith("rcl") || Name.startswith("rcr") ||
2612 Name.startswith("rol") || Name.startswith("ror")) &&
2613 Operands.size() == 3) {
2614 if (isParsingIntelSyntax()) {
2616 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2617 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2618 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2619 Operands.pop_back();
2621 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2622 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2623 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2624 Operands.erase(Operands.begin() + 1);
2628 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2629 // instalias with an immediate operand yet.
2630 if (Name == "int" && Operands.size() == 2) {
2631 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2633 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2634 if (CE->getValue() == 3) {
2635 Operands.erase(Operands.begin() + 1);
2636 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2640 // Transforms "xlat mem8" into "xlatb"
2641 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2642 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2644 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2645 "size, (R|E)BX will be used for the location");
2646 Operands.pop_back();
2647 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2654 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2658 static const char *getSubtargetFeatureName(uint64_t Val);
2660 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2662 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2666 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2667 OperandVector &Operands,
2668 MCStreamer &Out, uint64_t &ErrorInfo,
2669 bool MatchingInlineAsm) {
2670 if (isParsingIntelSyntax())
2671 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2673 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2677 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2678 OperandVector &Operands, MCStreamer &Out,
2679 bool MatchingInlineAsm) {
2680 // FIXME: This should be replaced with a real .td file alias mechanism.
2681 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2683 const char *Repl = StringSwitch<const char *>(Op.getToken())
2684 .Case("finit", "fninit")
2685 .Case("fsave", "fnsave")
2686 .Case("fstcw", "fnstcw")
2687 .Case("fstcww", "fnstcw")
2688 .Case("fstenv", "fnstenv")
2689 .Case("fstsw", "fnstsw")
2690 .Case("fstsww", "fnstsw")
2691 .Case("fclex", "fnclex")
2695 Inst.setOpcode(X86::WAIT);
2697 if (!MatchingInlineAsm)
2698 EmitInstruction(Inst, Operands, Out);
2699 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2703 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2704 bool MatchingInlineAsm) {
2705 assert(ErrorInfo && "Unknown missing feature!");
2706 SmallString<126> Msg;
2707 raw_svector_ostream OS(Msg);
2708 OS << "instruction requires:";
2710 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2711 if (ErrorInfo & Mask)
2712 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2715 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2718 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2719 OperandVector &Operands,
2721 uint64_t &ErrorInfo,
2722 bool MatchingInlineAsm) {
2723 assert(!Operands.empty() && "Unexpect empty operand list!");
2724 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2725 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2726 SMRange EmptyRange = None;
2728 // First, handle aliases that expand to multiple instructions.
2729 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2731 bool WasOriginallyInvalidOperand = false;
2734 // First, try a direct match.
2735 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2736 isParsingIntelSyntax())) {
2737 default: llvm_unreachable("Unexpected match result!");
2739 // Some instructions need post-processing to, for example, tweak which
2740 // encoding is selected. Loop on it while changes happen so the
2741 // individual transformations can chain off each other.
2742 if (!MatchingInlineAsm)
2743 while (processInstruction(Inst, Operands))
2747 if (!MatchingInlineAsm)
2748 EmitInstruction(Inst, Operands, Out);
2749 Opcode = Inst.getOpcode();
2751 case Match_MissingFeature:
2752 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2753 case Match_InvalidOperand:
2754 WasOriginallyInvalidOperand = true;
2756 case Match_MnemonicFail:
2760 // FIXME: Ideally, we would only attempt suffix matches for things which are
2761 // valid prefixes, and we could just infer the right unambiguous
2762 // type. However, that requires substantially more matcher support than the
2765 // Change the operand to point to a temporary token.
2766 StringRef Base = Op.getToken();
2767 SmallString<16> Tmp;
2770 Op.setTokenValue(Tmp);
2772 // If this instruction starts with an 'f', then it is a floating point stack
2773 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2774 // 80-bit floating point, which use the suffixes s,l,t respectively.
2776 // Otherwise, we assume that this may be an integer instruction, which comes
2777 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2778 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2780 // Check for the various suffix matches.
2781 uint64_t ErrorInfoIgnore;
2782 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2785 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2786 Tmp.back() = Suffixes[I];
2787 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2788 MatchingInlineAsm, isParsingIntelSyntax());
2789 // If this returned as a missing feature failure, remember that.
2790 if (Match[I] == Match_MissingFeature)
2791 ErrorInfoMissingFeature = ErrorInfoIgnore;
2794 // Restore the old token.
2795 Op.setTokenValue(Base);
2797 // If exactly one matched, then we treat that as a successful match (and the
2798 // instruction will already have been filled in correctly, since the failing
2799 // matches won't have modified it).
2800 unsigned NumSuccessfulMatches =
2801 std::count(std::begin(Match), std::end(Match), Match_Success);
2802 if (NumSuccessfulMatches == 1) {
2804 if (!MatchingInlineAsm)
2805 EmitInstruction(Inst, Operands, Out);
2806 Opcode = Inst.getOpcode();
2810 // Otherwise, the match failed, try to produce a decent error message.
2812 // If we had multiple suffix matches, then identify this as an ambiguous
2814 if (NumSuccessfulMatches > 1) {
2816 unsigned NumMatches = 0;
2817 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2818 if (Match[I] == Match_Success)
2819 MatchChars[NumMatches++] = Suffixes[I];
2821 SmallString<126> Msg;
2822 raw_svector_ostream OS(Msg);
2823 OS << "ambiguous instructions require an explicit suffix (could be ";
2824 for (unsigned i = 0; i != NumMatches; ++i) {
2827 if (i + 1 == NumMatches)
2829 OS << "'" << Base << MatchChars[i] << "'";
2832 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
2836 // Okay, we know that none of the variants matched successfully.
2838 // If all of the instructions reported an invalid mnemonic, then the original
2839 // mnemonic was invalid.
2840 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2841 if (!WasOriginallyInvalidOperand) {
2842 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2843 Op.getLocRange(), MatchingInlineAsm);
2846 // Recover location info for the operand if we know which was the problem.
2847 if (ErrorInfo != ~0ULL) {
2848 if (ErrorInfo >= Operands.size())
2849 return Error(IDLoc, "too few operands for instruction", EmptyRange,
2852 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2853 if (Operand.getStartLoc().isValid()) {
2854 SMRange OperandRange = Operand.getLocRange();
2855 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2856 OperandRange, MatchingInlineAsm);
2860 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2864 // If one instruction matched with a missing feature, report this as a
2866 if (std::count(std::begin(Match), std::end(Match),
2867 Match_MissingFeature) == 1) {
2868 ErrorInfo = ErrorInfoMissingFeature;
2869 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2873 // If one instruction matched with an invalid operand, report this as an
2875 if (std::count(std::begin(Match), std::end(Match),
2876 Match_InvalidOperand) == 1) {
2877 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
2881 // If all of these were an outright failure, report it in a useless way.
2882 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2883 EmptyRange, MatchingInlineAsm);
2887 unsigned X86AsmParser::AdjustAVX512Mem(unsigned Size,
2888 X86Operand* UnsizedMemOpNext) {
2889 // Check for the existence of an AVX512 platform
2890 if (!getSTI().getFeatureBits()[X86::FeatureAVX512])
2892 // Allow adjusting upon a (x|y|z)mm
2893 if (Size == 512 || Size == 256 || Size == 128)
2895 // This is an allegadly broadcasting mem op adjustment,
2896 // allow some more inquiring to validate it
2897 if (Size == 64 || Size == 32)
2898 return UnsizedMemOpNext && UnsizedMemOpNext->isToken() &&
2899 UnsizedMemOpNext->getToken().substr(0, 4).equals("{1to") ? Size : 0;
2900 // Do not allow any other type of adjustments
2904 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2905 OperandVector &Operands,
2907 uint64_t &ErrorInfo,
2908 bool MatchingInlineAsm) {
2909 assert(!Operands.empty() && "Unexpect empty operand list!");
2910 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2911 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2912 StringRef Mnemonic = Op.getToken();
2913 SMRange EmptyRange = None;
2914 StringRef Base = Op.getToken();
2916 // First, handle aliases that expand to multiple instructions.
2917 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2921 // Find one unsized memory operand, if present.
2922 X86Operand *UnsizedMemOp = nullptr;
2923 // If unsized memory operand was found - obtain following operand.
2924 // For use in AdjustAVX512Mem
2925 X86Operand *UnsizedMemOpNext = nullptr;
2926 for (const auto &Op : Operands) {
2927 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2929 UnsizedMemOpNext = X86Op;
2930 // Have we found an unqualified memory operand,
2931 // break. IA allows only one memory operand.
2934 if (X86Op->isMemUnsized())
2935 UnsizedMemOp = X86Op;
2938 // Allow some instructions to have implicitly pointer-sized operands. This is
2939 // compatible with gas.
2941 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2942 for (const char *Instr : PtrSizedInstrs) {
2943 if (Mnemonic == Instr) {
2944 UnsizedMemOp->Mem.Size = getPointerWidth();
2950 SmallVector<unsigned, 8> Match;
2951 uint64_t ErrorInfoMissingFeature = 0;
2953 // If unsized push has immediate operand we should default the default pointer
2954 // size for the size.
2955 if (Mnemonic == "push" && Operands.size() == 2) {
2956 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
2957 if (X86Op->isImm()) {
2958 // If it's not a constant fall through and let remainder take care of it.
2959 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
2960 unsigned Size = getPointerWidth();
2962 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
2963 SmallString<16> Tmp;
2965 Tmp += (is64BitMode())
2967 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
2968 Op.setTokenValue(Tmp);
2969 // Do match in ATT mode to allow explicit suffix usage.
2970 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
2972 false /*isParsingIntelSyntax()*/));
2973 Op.setTokenValue(Base);
2978 // If an unsized memory operand is present, try to match with each memory
2979 // operand size. In Intel assembly, the size is not part of the instruction
2981 unsigned MatchedSize = 0;
2982 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2983 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2984 for (unsigned Size : MopSizes) {
2985 UnsizedMemOp->Mem.Size = Size;
2986 uint64_t ErrorInfoIgnore;
2987 unsigned LastOpcode = Inst.getOpcode();
2988 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2989 MatchingInlineAsm, isParsingIntelSyntax());
2990 if (Match.empty() || LastOpcode != Inst.getOpcode())
2993 // If this returned as a missing feature failure, remember that.
2994 if (Match.back() == Match_MissingFeature)
2995 ErrorInfoMissingFeature = ErrorInfoIgnore;
2996 if (M == Match_Success)
2997 // MS-compatability:
2998 // Adjust AVX512 vector/broadcast memory operand,
2999 // when facing the absence of a size qualifier.
3000 // Match GCC behavior on respective cases.
3001 MatchedSize = AdjustAVX512Mem(Size, UnsizedMemOpNext);
3004 // Restore the size of the unsized memory operand if we modified it.
3006 UnsizedMemOp->Mem.Size = 0;
3009 // If we haven't matched anything yet, this is not a basic integer or FPU
3010 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
3011 // matching with the unsized operand.
3012 if (Match.empty()) {
3013 Match.push_back(MatchInstruction(
3014 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
3015 // If this returned as a missing feature failure, remember that.
3016 if (Match.back() == Match_MissingFeature)
3017 ErrorInfoMissingFeature = ErrorInfo;
3020 // Restore the size of the unsized memory operand if we modified it.
3022 UnsizedMemOp->Mem.Size = 0;
3024 // If it's a bad mnemonic, all results will be the same.
3025 if (Match.back() == Match_MnemonicFail) {
3026 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3027 Op.getLocRange(), MatchingInlineAsm);
3030 // If exactly one matched, then we treat that as a successful match (and the
3031 // instruction will already have been filled in correctly, since the failing
3032 // matches won't have modified it).
3033 unsigned NumSuccessfulMatches =
3034 std::count(std::begin(Match), std::end(Match), Match_Success);
3035 if (NumSuccessfulMatches == 1) {
3036 if (MatchedSize && isParsingInlineAsm() && isParsingIntelSyntax())
3037 // MS compatibility -
3038 // Fix the rewrite according to the matched memory size
3039 // MS inline assembly only
3040 for (AsmRewrite &AR : *InstInfo->AsmRewrites)
3041 if ((AR.Loc.getPointer() == UnsizedMemOp->StartLoc.getPointer()) &&
3042 (AR.Kind == AOK_SizeDirective))
3043 AR.Val = MatchedSize;
3044 // Some instructions need post-processing to, for example, tweak which
3045 // encoding is selected. Loop on it while changes happen so the individual
3046 // transformations can chain off each other.
3047 if (!MatchingInlineAsm)
3048 while (processInstruction(Inst, Operands))
3051 if (!MatchingInlineAsm)
3052 EmitInstruction(Inst, Operands, Out);
3053 Opcode = Inst.getOpcode();
3055 } else if (NumSuccessfulMatches > 1) {
3056 assert(UnsizedMemOp &&
3057 "multiple matches only possible with unsized memory operands");
3058 return Error(UnsizedMemOp->getStartLoc(),
3059 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3060 UnsizedMemOp->getLocRange(), MatchingInlineAsm);
3063 // If one instruction matched with a missing feature, report this as a
3065 if (std::count(std::begin(Match), std::end(Match),
3066 Match_MissingFeature) == 1) {
3067 ErrorInfo = ErrorInfoMissingFeature;
3068 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3072 // If one instruction matched with an invalid operand, report this as an
3074 if (std::count(std::begin(Match), std::end(Match),
3075 Match_InvalidOperand) == 1) {
3076 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3080 // If all of these were an outright failure, report it in a useless way.
3081 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3085 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3086 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3089 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3090 MCAsmParser &Parser = getParser();
3091 StringRef IDVal = DirectiveID.getIdentifier();
3092 if (IDVal == ".word")
3093 return ParseDirectiveWord(2, DirectiveID.getLoc());
3094 else if (IDVal.startswith(".code"))
3095 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3096 else if (IDVal.startswith(".att_syntax")) {
3097 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3098 if (Parser.getTok().getString() == "prefix")
3100 else if (Parser.getTok().getString() == "noprefix")
3101 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3102 "supported: registers must have a "
3103 "'%' prefix in .att_syntax");
3105 getParser().setAssemblerDialect(0);
3107 } else if (IDVal.startswith(".intel_syntax")) {
3108 getParser().setAssemblerDialect(1);
3109 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3110 if (Parser.getTok().getString() == "noprefix")
3112 else if (Parser.getTok().getString() == "prefix")
3113 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3114 "supported: registers must not have "
3115 "a '%' prefix in .intel_syntax");
3118 } else if (IDVal == ".even")
3119 return parseDirectiveEven(DirectiveID.getLoc());
3123 /// parseDirectiveEven
3125 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3126 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3127 TokError("unexpected token in directive");
3130 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3132 getStreamer().InitSections(false);
3133 Section = getStreamer().getCurrentSectionOnly();
3135 if (Section->UseCodeAlign())
3136 getStreamer().EmitCodeAlignment(2, 0);
3138 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3141 /// ParseDirectiveWord
3142 /// ::= .word [ expression (, expression)* ]
3143 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
3144 MCAsmParser &Parser = getParser();
3145 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3147 const MCExpr *Value;
3148 SMLoc ExprLoc = getLexer().getLoc();
3149 if (getParser().parseExpression(Value))
3152 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
3153 assert(Size <= 8 && "Invalid size");
3154 uint64_t IntValue = MCE->getValue();
3155 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3156 return Error(ExprLoc, "literal value out of range for directive");
3157 getStreamer().EmitIntValue(IntValue, Size);
3159 getStreamer().EmitValue(Value, Size, ExprLoc);
3162 if (getLexer().is(AsmToken::EndOfStatement))
3165 // FIXME: Improve diagnostic.
3166 if (getLexer().isNot(AsmToken::Comma)) {
3167 Error(L, "unexpected token in directive");
3178 /// ParseDirectiveCode
3179 /// ::= .code16 | .code32 | .code64
3180 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3181 MCAsmParser &Parser = getParser();
3183 if (IDVal == ".code16") {
3185 if (!is16BitMode()) {
3186 SwitchMode(X86::Mode16Bit);
3187 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3189 } else if (IDVal == ".code16gcc") {
3190 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3193 if (!is16BitMode()) {
3194 SwitchMode(X86::Mode16Bit);
3195 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3197 } else if (IDVal == ".code32") {
3199 if (!is32BitMode()) {
3200 SwitchMode(X86::Mode32Bit);
3201 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3203 } else if (IDVal == ".code64") {
3205 if (!is64BitMode()) {
3206 SwitchMode(X86::Mode64Bit);
3207 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3210 Error(L, "unknown directive " + IDVal);
3217 // Force static initialization.
3218 extern "C" void LLVMInitializeX86AsmParser() {
3219 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3220 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3223 #define GET_REGISTER_MATCHER
3224 #define GET_MATCHER_IMPLEMENTATION
3225 #define GET_SUBTARGET_FEATURE_NAME
3226 #include "X86GenAsmMatcher.inc"