1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "AMDKernelCodeT.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
14 #include "SIDefines.h"
15 #include "SIInstrInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/ErrorHandling.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
70 class AMDGPUAsmParser;
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
74 //===----------------------------------------------------------------------===//
76 //===----------------------------------------------------------------------===//
78 class AMDGPUOperand : public MCParsedAsmOperand {
86 SMLoc StartLoc, EndLoc;
87 const AMDGPUAsmParser *AsmParser;
90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
93 using Ptr = std::unique_ptr<AMDGPUOperand>;
100 bool hasFPModifiers() const { return Abs || Neg; }
101 bool hasIntModifiers() const { return Sext; }
102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
104 int64_t getFPModifiersOperand() const {
106 Operand |= Abs ? SISrcMods::ABS : 0;
107 Operand |= Neg ? SISrcMods::NEG : 0;
111 int64_t getIntModifiersOperand() const {
113 Operand |= Sext ? SISrcMods::SEXT : 0;
117 int64_t getModifiersOperand() const {
118 assert(!(hasFPModifiers() && hasIntModifiers())
119 && "fp and int modifiers should not be used simultaneously");
120 if (hasFPModifiers()) {
121 return getFPModifiersOperand();
122 } else if (hasIntModifiers()) {
123 return getIntModifiersOperand();
129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
205 bool isToken() const override {
209 if (Kind != Expression || !Expr)
212 // When parsing operands, we can't always tell if something was meant to be
213 // a token, like 'gds', or an expression that references a global variable.
214 // In this case, we assume the string is an expression, and if we need to
215 // interpret is a token, then we treat the symbol name as the token.
216 return isa<MCSymbolRefExpr>(Expr);
219 bool isImm() const override {
220 return Kind == Immediate;
223 bool isInlinableImm(MVT type) const;
224 bool isLiteralImm(MVT type) const;
226 bool isRegKind() const {
227 return Kind == Register;
230 bool isReg() const override {
231 return isRegKind() && !hasModifiers();
234 bool isRegOrImmWithInputMods(MVT type) const {
235 return isRegKind() || isInlinableImm(type);
238 bool isRegOrImmWithInt16InputMods() const {
239 return isRegOrImmWithInputMods(MVT::i16);
242 bool isRegOrImmWithInt32InputMods() const {
243 return isRegOrImmWithInputMods(MVT::i32);
246 bool isRegOrImmWithInt64InputMods() const {
247 return isRegOrImmWithInputMods(MVT::i64);
250 bool isRegOrImmWithFP16InputMods() const {
251 return isRegOrImmWithInputMods(MVT::f16);
254 bool isRegOrImmWithFP32InputMods() const {
255 return isRegOrImmWithInputMods(MVT::f32);
258 bool isRegOrImmWithFP64InputMods() const {
259 return isRegOrImmWithInputMods(MVT::f64);
262 bool isVReg() const {
263 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
264 isRegClass(AMDGPU::VReg_64RegClassID) ||
265 isRegClass(AMDGPU::VReg_96RegClassID) ||
266 isRegClass(AMDGPU::VReg_128RegClassID) ||
267 isRegClass(AMDGPU::VReg_256RegClassID) ||
268 isRegClass(AMDGPU::VReg_512RegClassID);
271 bool isVReg32OrOff() const {
272 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
275 bool isSDWAOperand(MVT type) const;
276 bool isSDWAFP16Operand() const;
277 bool isSDWAFP32Operand() const;
278 bool isSDWAInt16Operand() const;
279 bool isSDWAInt32Operand() const;
281 bool isImmTy(ImmTy ImmT) const {
282 return isImm() && Imm.Type == ImmT;
285 bool isImmModifier() const {
286 return isImm() && Imm.Type != ImmTyNone;
289 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
290 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
291 bool isDMask() const { return isImmTy(ImmTyDMask); }
292 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
293 bool isDA() const { return isImmTy(ImmTyDA); }
294 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
295 bool isLWE() const { return isImmTy(ImmTyLWE); }
296 bool isOff() const { return isImmTy(ImmTyOff); }
297 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
298 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
299 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
300 bool isOffen() const { return isImmTy(ImmTyOffen); }
301 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
302 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
303 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
304 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
305 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
307 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); }
308 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); }
309 bool isGDS() const { return isImmTy(ImmTyGDS); }
310 bool isLDS() const { return isImmTy(ImmTyLDS); }
311 bool isGLC() const { return isImmTy(ImmTyGLC); }
312 bool isSLC() const { return isImmTy(ImmTySLC); }
313 bool isTFE() const { return isImmTy(ImmTyTFE); }
314 bool isD16() const { return isImmTy(ImmTyD16); }
315 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
316 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
317 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
318 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
319 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
320 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
321 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
322 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
323 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
324 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
325 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
326 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
327 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
328 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
329 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
330 bool isHigh() const { return isImmTy(ImmTyHigh); }
333 return isClampSI() || isOModSI();
336 bool isRegOrImm() const {
337 return isReg() || isImm();
340 bool isRegClass(unsigned RCID) const;
342 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
343 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
346 bool isSCSrcB16() const {
347 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
350 bool isSCSrcV2B16() const {
354 bool isSCSrcB32() const {
355 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
358 bool isSCSrcB64() const {
359 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
362 bool isSCSrcF16() const {
363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
366 bool isSCSrcV2F16() const {
370 bool isSCSrcF32() const {
371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
374 bool isSCSrcF64() const {
375 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
378 bool isSSrcB32() const {
379 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
382 bool isSSrcB16() const {
383 return isSCSrcB16() || isLiteralImm(MVT::i16);
386 bool isSSrcV2B16() const {
387 llvm_unreachable("cannot happen");
391 bool isSSrcB64() const {
392 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
394 return isSCSrcB64() || isLiteralImm(MVT::i64);
397 bool isSSrcF32() const {
398 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
401 bool isSSrcF64() const {
402 return isSCSrcB64() || isLiteralImm(MVT::f64);
405 bool isSSrcF16() const {
406 return isSCSrcB16() || isLiteralImm(MVT::f16);
409 bool isSSrcV2F16() const {
410 llvm_unreachable("cannot happen");
414 bool isVCSrcB32() const {
415 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
418 bool isVCSrcB64() const {
419 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
422 bool isVCSrcB16() const {
423 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
426 bool isVCSrcV2B16() const {
430 bool isVCSrcF32() const {
431 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
434 bool isVCSrcF64() const {
435 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
438 bool isVCSrcF16() const {
439 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
442 bool isVCSrcV2F16() const {
446 bool isVSrcB32() const {
447 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
450 bool isVSrcB64() const {
451 return isVCSrcF64() || isLiteralImm(MVT::i64);
454 bool isVSrcB16() const {
455 return isVCSrcF16() || isLiteralImm(MVT::i16);
458 bool isVSrcV2B16() const {
459 llvm_unreachable("cannot happen");
463 bool isVSrcF32() const {
464 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
467 bool isVSrcF64() const {
468 return isVCSrcF64() || isLiteralImm(MVT::f64);
471 bool isVSrcF16() const {
472 return isVCSrcF16() || isLiteralImm(MVT::f16);
475 bool isVSrcV2F16() const {
476 llvm_unreachable("cannot happen");
480 bool isKImmFP32() const {
481 return isLiteralImm(MVT::f32);
484 bool isKImmFP16() const {
485 return isLiteralImm(MVT::f16);
488 bool isMem() const override {
492 bool isExpr() const {
493 return Kind == Expression;
496 bool isSoppBrTarget() const {
497 return isExpr() || isImm();
500 bool isSWaitCnt() const;
501 bool isHwreg() const;
502 bool isSendMsg() const;
503 bool isSwizzle() const;
504 bool isSMRDOffset8() const;
505 bool isSMRDOffset20() const;
506 bool isSMRDLiteralOffset() const;
507 bool isDPPCtrl() const;
508 bool isGPRIdxMode() const;
509 bool isS16Imm() const;
510 bool isU16Imm() const;
512 StringRef getExpressionAsToken() const {
514 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
515 return S->getSymbol().getName();
518 StringRef getToken() const {
521 if (Kind == Expression)
522 return getExpressionAsToken();
524 return StringRef(Tok.Data, Tok.Length);
527 int64_t getImm() const {
532 ImmTy getImmTy() const {
537 unsigned getReg() const override {
541 SMLoc getStartLoc() const override {
545 SMLoc getEndLoc() const override {
549 SMRange getLocRange() const {
550 return SMRange(StartLoc, EndLoc);
553 Modifiers getModifiers() const {
554 assert(isRegKind() || isImmTy(ImmTyNone));
555 return isRegKind() ? Reg.Mods : Imm.Mods;
558 void setModifiers(Modifiers Mods) {
559 assert(isRegKind() || isImmTy(ImmTyNone));
566 bool hasModifiers() const {
567 return getModifiers().hasModifiers();
570 bool hasFPModifiers() const {
571 return getModifiers().hasFPModifiers();
574 bool hasIntModifiers() const {
575 return getModifiers().hasIntModifiers();
578 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
580 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
582 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
584 template <unsigned Bitwidth>
585 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
587 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
588 addKImmFPOperands<16>(Inst, N);
591 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
592 addKImmFPOperands<32>(Inst, N);
595 void addRegOperands(MCInst &Inst, unsigned N) const;
597 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
599 addRegOperands(Inst, N);
601 Inst.addOperand(MCOperand::createExpr(Expr));
603 addImmOperands(Inst, N);
606 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
607 Modifiers Mods = getModifiers();
608 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
610 addRegOperands(Inst, N);
612 addImmOperands(Inst, N, false);
616 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
617 assert(!hasIntModifiers());
618 addRegOrImmWithInputModsOperands(Inst, N);
621 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
622 assert(!hasFPModifiers());
623 addRegOrImmWithInputModsOperands(Inst, N);
626 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
627 Modifiers Mods = getModifiers();
628 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
630 addRegOperands(Inst, N);
633 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
634 assert(!hasIntModifiers());
635 addRegWithInputModsOperands(Inst, N);
638 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
639 assert(!hasFPModifiers());
640 addRegWithInputModsOperands(Inst, N);
643 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
645 addImmOperands(Inst, N);
648 Inst.addOperand(MCOperand::createExpr(Expr));
652 static void printImmTy(raw_ostream& OS, ImmTy Type) {
654 case ImmTyNone: OS << "None"; break;
655 case ImmTyGDS: OS << "GDS"; break;
656 case ImmTyLDS: OS << "LDS"; break;
657 case ImmTyOffen: OS << "Offen"; break;
658 case ImmTyIdxen: OS << "Idxen"; break;
659 case ImmTyAddr64: OS << "Addr64"; break;
660 case ImmTyOffset: OS << "Offset"; break;
661 case ImmTyInstOffset: OS << "InstOffset"; break;
662 case ImmTyOffset0: OS << "Offset0"; break;
663 case ImmTyOffset1: OS << "Offset1"; break;
664 case ImmTyGLC: OS << "GLC"; break;
665 case ImmTySLC: OS << "SLC"; break;
666 case ImmTyTFE: OS << "TFE"; break;
667 case ImmTyD16: OS << "D16"; break;
668 case ImmTyFORMAT: OS << "FORMAT"; break;
669 case ImmTyClampSI: OS << "ClampSI"; break;
670 case ImmTyOModSI: OS << "OModSI"; break;
671 case ImmTyDppCtrl: OS << "DppCtrl"; break;
672 case ImmTyDppRowMask: OS << "DppRowMask"; break;
673 case ImmTyDppBankMask: OS << "DppBankMask"; break;
674 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
675 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
676 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
677 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
678 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
679 case ImmTyDMask: OS << "DMask"; break;
680 case ImmTyUNorm: OS << "UNorm"; break;
681 case ImmTyDA: OS << "DA"; break;
682 case ImmTyR128A16: OS << "R128A16"; break;
683 case ImmTyLWE: OS << "LWE"; break;
684 case ImmTyOff: OS << "Off"; break;
685 case ImmTyExpTgt: OS << "ExpTgt"; break;
686 case ImmTyExpCompr: OS << "ExpCompr"; break;
687 case ImmTyExpVM: OS << "ExpVM"; break;
688 case ImmTyHwreg: OS << "Hwreg"; break;
689 case ImmTySendMsg: OS << "SendMsg"; break;
690 case ImmTyInterpSlot: OS << "InterpSlot"; break;
691 case ImmTyInterpAttr: OS << "InterpAttr"; break;
692 case ImmTyAttrChan: OS << "AttrChan"; break;
693 case ImmTyOpSel: OS << "OpSel"; break;
694 case ImmTyOpSelHi: OS << "OpSelHi"; break;
695 case ImmTyNegLo: OS << "NegLo"; break;
696 case ImmTyNegHi: OS << "NegHi"; break;
697 case ImmTySwizzle: OS << "Swizzle"; break;
698 case ImmTyHigh: OS << "High"; break;
702 void print(raw_ostream &OS) const override {
705 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
708 OS << '<' << getImm();
709 if (getImmTy() != ImmTyNone) {
710 OS << " type: "; printImmTy(OS, getImmTy());
712 OS << " mods: " << Imm.Mods << '>';
715 OS << '\'' << getToken() << '\'';
718 OS << "<expr " << *Expr << '>';
723 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
724 int64_t Val, SMLoc Loc,
725 ImmTy Type = ImmTyNone,
726 bool IsFPImm = false) {
727 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
729 Op->Imm.IsFPImm = IsFPImm;
731 Op->Imm.Mods = Modifiers();
737 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
738 StringRef Str, SMLoc Loc,
739 bool HasExplicitEncodingSize = true) {
740 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
741 Res->Tok.Data = Str.data();
742 Res->Tok.Length = Str.size();
748 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
749 unsigned RegNo, SMLoc S,
752 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
753 Op->Reg.RegNo = RegNo;
754 Op->Reg.Mods = Modifiers();
755 Op->Reg.IsForcedVOP3 = ForceVOP3;
761 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
762 const class MCExpr *Expr, SMLoc S) {
763 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
771 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
772 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
776 //===----------------------------------------------------------------------===//
778 //===----------------------------------------------------------------------===//
780 // Holds info related to the current kernel, e.g. count of SGPRs used.
781 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
782 // .amdgpu_hsa_kernel or at EOF.
783 class KernelScopeInfo {
784 int SgprIndexUnusedMin = -1;
785 int VgprIndexUnusedMin = -1;
786 MCContext *Ctx = nullptr;
788 void usesSgprAt(int i) {
789 if (i >= SgprIndexUnusedMin) {
790 SgprIndexUnusedMin = ++i;
792 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
793 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
798 void usesVgprAt(int i) {
799 if (i >= VgprIndexUnusedMin) {
800 VgprIndexUnusedMin = ++i;
802 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
803 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
809 KernelScopeInfo() = default;
811 void initialize(MCContext &Context) {
813 usesSgprAt(SgprIndexUnusedMin = -1);
814 usesVgprAt(VgprIndexUnusedMin = -1);
817 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
819 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
820 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
826 class AMDGPUAsmParser : public MCTargetAsmParser {
829 // Number of extra operands parsed after the first optional operand.
830 // This may be necessary to skip hardcoded mandatory operands.
831 static const unsigned MAX_OPR_LOOKAHEAD = 8;
833 unsigned ForcedEncodingSize = 0;
834 bool ForcedDPP = false;
835 bool ForcedSDWA = false;
836 KernelScopeInfo KernelScope;
838 /// @name Auto-generated Match Functions
841 #define GET_ASSEMBLER_HEADER
842 #include "AMDGPUGenAsmMatcher.inc"
847 bool ParseAsAbsoluteExpression(uint32_t &Ret);
848 bool OutOfRangeError(SMRange Range);
849 /// Calculate VGPR/SGPR blocks required for given target, reserved
850 /// registers, and user-specified NextFreeXGPR values.
852 /// \param Features [in] Target features, used for bug corrections.
853 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
854 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
855 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
856 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
857 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
858 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
859 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
860 /// \param VGPRBlocks [out] Result VGPR block count.
861 /// \param SGPRBlocks [out] Result SGPR block count.
862 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
863 bool FlatScrUsed, bool XNACKUsed,
864 unsigned NextFreeVGPR, SMRange VGPRRange,
865 unsigned NextFreeSGPR, SMRange SGPRRange,
866 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
867 bool ParseDirectiveAMDGCNTarget();
868 bool ParseDirectiveAMDHSAKernel();
869 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
870 bool ParseDirectiveHSACodeObjectVersion();
871 bool ParseDirectiveHSACodeObjectISA();
872 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
873 bool ParseDirectiveAMDKernelCodeT();
874 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
875 bool ParseDirectiveAMDGPUHsaKernel();
877 bool ParseDirectiveISAVersion();
878 bool ParseDirectiveHSAMetadata();
879 bool ParseDirectivePALMetadata();
881 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
882 RegisterKind RegKind, unsigned Reg1,
884 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
885 unsigned& RegNum, unsigned& RegWidth,
886 unsigned *DwordRegIndex);
887 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
888 void initializeGprCountSymbol(RegisterKind RegKind);
889 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
891 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
892 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
893 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
894 bool IsGdsHardcoded);
897 enum AMDGPUMatchResultTy {
898 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
901 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
903 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
904 const MCInstrInfo &MII,
905 const MCTargetOptions &Options)
906 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
907 MCAsmParserExtension::Initialize(Parser);
909 if (getFeatureBits().none()) {
910 // Set default features.
911 copySTI().ToggleFeature("SOUTHERN_ISLANDS");
914 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
917 // TODO: make those pre-defined variables read-only.
918 // Currently there is none suitable machinery in the core llvm-mc for this.
919 // MCSymbol::isRedefinable is intended for another purpose, and
920 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
921 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
922 MCContext &Ctx = getContext();
923 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
925 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
926 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
929 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
930 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
931 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
932 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
933 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
934 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
936 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
937 initializeGprCountSymbol(IS_VGPR);
938 initializeGprCountSymbol(IS_SGPR);
940 KernelScope.initialize(getContext());
944 bool hasXNACK() const {
945 return AMDGPU::hasXNACK(getSTI());
948 bool hasMIMG_R128() const {
949 return AMDGPU::hasMIMG_R128(getSTI());
952 bool hasPackedD16() const {
953 return AMDGPU::hasPackedD16(getSTI());
957 return AMDGPU::isSI(getSTI());
961 return AMDGPU::isCI(getSTI());
965 return AMDGPU::isVI(getSTI());
968 bool isGFX9() const {
969 return AMDGPU::isGFX9(getSTI());
972 bool hasInv2PiInlineImm() const {
973 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
976 bool hasFlatOffsets() const {
977 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
980 bool hasSGPR102_SGPR103() const {
984 bool hasIntClamp() const {
985 return getFeatureBits()[AMDGPU::FeatureIntClamp];
988 AMDGPUTargetStreamer &getTargetStreamer() {
989 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
990 return static_cast<AMDGPUTargetStreamer &>(TS);
993 const MCRegisterInfo *getMRI() const {
994 // We need this const_cast because for some reason getContext() is not const
996 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
999 const MCInstrInfo *getMII() const {
1003 const FeatureBitset &getFeatureBits() const {
1004 return getSTI().getFeatureBits();
1007 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1008 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1009 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1011 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1012 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1013 bool isForcedDPP() const { return ForcedDPP; }
1014 bool isForcedSDWA() const { return ForcedSDWA; }
1015 ArrayRef<unsigned> getMatchedVariants() const;
1017 std::unique_ptr<AMDGPUOperand> parseRegister();
1018 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1019 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1020 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1021 unsigned Kind) override;
1022 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1023 OperandVector &Operands, MCStreamer &Out,
1024 uint64_t &ErrorInfo,
1025 bool MatchingInlineAsm) override;
1026 bool ParseDirective(AsmToken DirectiveID) override;
1027 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
1028 StringRef parseMnemonicSuffix(StringRef Name);
1029 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1030 SMLoc NameLoc, OperandVector &Operands) override;
1031 //bool ProcessInstruction(MCInst &Inst);
1033 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1035 OperandMatchResultTy
1036 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1037 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1038 bool (*ConvertResult)(int64_t &) = nullptr);
1040 OperandMatchResultTy parseOperandArrayWithPrefix(
1042 OperandVector &Operands,
1043 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1044 bool (*ConvertResult)(int64_t&) = nullptr);
1046 OperandMatchResultTy
1047 parseNamedBit(const char *Name, OperandVector &Operands,
1048 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1049 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1052 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
1053 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
1054 OperandMatchResultTy parseReg(OperandVector &Operands);
1055 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
1056 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1057 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1058 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1059 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1060 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1061 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1063 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1064 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1065 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1066 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1068 bool parseCnt(int64_t &IntVal);
1069 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1070 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1073 struct OperandInfoTy {
1075 bool IsSymbolic = false;
1077 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1080 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
1081 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1084 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1086 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc);
1087 bool validateConstantBusLimitations(const MCInst &Inst);
1088 bool validateEarlyClobberLimitations(const MCInst &Inst);
1089 bool validateIntClampSupported(const MCInst &Inst);
1090 bool validateMIMGAtomicDMask(const MCInst &Inst);
1091 bool validateMIMGGatherDMask(const MCInst &Inst);
1092 bool validateMIMGDataSize(const MCInst &Inst);
1093 bool validateMIMGD16(const MCInst &Inst);
1094 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1095 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1096 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1098 bool trySkipId(const StringRef Id);
1099 bool trySkipToken(const AsmToken::TokenKind Kind);
1100 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1101 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1102 bool parseExpr(int64_t &Imm);
1105 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1106 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1108 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1109 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1110 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1111 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1112 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1114 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1115 const unsigned MinVal,
1116 const unsigned MaxVal,
1117 const StringRef ErrMsg);
1118 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1119 bool parseSwizzleOffset(int64_t &Imm);
1120 bool parseSwizzleMacro(int64_t &Imm);
1121 bool parseSwizzleQuadPerm(int64_t &Imm);
1122 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1123 bool parseSwizzleBroadcast(int64_t &Imm);
1124 bool parseSwizzleSwap(int64_t &Imm);
1125 bool parseSwizzleReverse(int64_t &Imm);
1127 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1128 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1129 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1130 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1131 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1133 AMDGPUOperand::Ptr defaultGLC() const;
1134 AMDGPUOperand::Ptr defaultSLC() const;
1136 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1137 AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1138 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1139 AMDGPUOperand::Ptr defaultOffsetU12() const;
1140 AMDGPUOperand::Ptr defaultOffsetS13() const;
1142 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1144 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1145 OptionalImmIndexMap &OptionalIdx);
1146 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1147 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1148 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1150 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1152 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1153 bool IsAtomic = false);
1154 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1156 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1157 AMDGPUOperand::Ptr defaultRowMask() const;
1158 AMDGPUOperand::Ptr defaultBankMask() const;
1159 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1160 void cvtDPP(MCInst &Inst, const OperandVector &Operands);
1162 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1163 AMDGPUOperand::ImmTy Type);
1164 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1165 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1166 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1167 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1168 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1169 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1170 uint64_t BasicInstType, bool skipVcc = false);
1173 struct OptionalOperand {
1175 AMDGPUOperand::ImmTy Type;
1177 bool (*ConvertResult)(int64_t&);
1180 } // end anonymous namespace
1182 // May be called with integer type with equivalent bitwidth.
1183 static const fltSemantics *getFltSemantics(unsigned Size) {
1186 return &APFloat::IEEEsingle();
1188 return &APFloat::IEEEdouble();
1190 return &APFloat::IEEEhalf();
1192 llvm_unreachable("unsupported fp type");
1196 static const fltSemantics *getFltSemantics(MVT VT) {
1197 return getFltSemantics(VT.getSizeInBits() / 8);
1200 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1201 switch (OperandType) {
1202 case AMDGPU::OPERAND_REG_IMM_INT32:
1203 case AMDGPU::OPERAND_REG_IMM_FP32:
1204 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1205 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1206 return &APFloat::IEEEsingle();
1207 case AMDGPU::OPERAND_REG_IMM_INT64:
1208 case AMDGPU::OPERAND_REG_IMM_FP64:
1209 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1210 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1211 return &APFloat::IEEEdouble();
1212 case AMDGPU::OPERAND_REG_IMM_INT16:
1213 case AMDGPU::OPERAND_REG_IMM_FP16:
1214 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1215 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1218 return &APFloat::IEEEhalf();
1220 llvm_unreachable("unsupported fp type");
1224 //===----------------------------------------------------------------------===//
1226 //===----------------------------------------------------------------------===//
1228 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1231 // Convert literal to single precision
1232 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1233 APFloat::rmNearestTiesToEven,
1235 // We allow precision lost but not overflow or underflow
1236 if (Status != APFloat::opOK &&
1238 ((Status & APFloat::opOverflow) != 0 ||
1239 (Status & APFloat::opUnderflow) != 0)) {
1246 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1247 if (!isImmTy(ImmTyNone)) {
1248 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1251 // TODO: We should avoid using host float here. It would be better to
1252 // check the float bit values which is what a few other places do.
1253 // We've had bot failures before due to weird NaN support on mips hosts.
1255 APInt Literal(64, Imm.Val);
1257 if (Imm.IsFPImm) { // We got fp literal token
1258 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1259 return AMDGPU::isInlinableLiteral64(Imm.Val,
1260 AsmParser->hasInv2PiInlineImm());
1263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1264 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1267 if (type.getScalarSizeInBits() == 16) {
1268 return AMDGPU::isInlinableLiteral16(
1269 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1270 AsmParser->hasInv2PiInlineImm());
1273 // Check if single precision literal is inlinable
1274 return AMDGPU::isInlinableLiteral32(
1275 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1276 AsmParser->hasInv2PiInlineImm());
1279 // We got int literal token.
1280 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1281 return AMDGPU::isInlinableLiteral64(Imm.Val,
1282 AsmParser->hasInv2PiInlineImm());
1285 if (type.getScalarSizeInBits() == 16) {
1286 return AMDGPU::isInlinableLiteral16(
1287 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1288 AsmParser->hasInv2PiInlineImm());
1291 return AMDGPU::isInlinableLiteral32(
1292 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1293 AsmParser->hasInv2PiInlineImm());
1296 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1297 // Check that this immediate can be added as literal
1298 if (!isImmTy(ImmTyNone)) {
1303 // We got int literal token.
1305 if (type == MVT::f64 && hasFPModifiers()) {
1306 // Cannot apply fp modifiers to int literals preserving the same semantics
1307 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1308 // disable these cases.
1312 unsigned Size = type.getSizeInBits();
1316 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1318 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
1321 // We got fp literal token
1322 if (type == MVT::f64) { // Expected 64-bit fp operand
1323 // We would set low 64-bits of literal to zeroes but we accept this literals
1327 if (type == MVT::i64) { // Expected 64-bit int operand
1328 // We don't allow fp literals in 64-bit integer instructions. It is
1329 // unclear how we should encode them.
1333 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1334 return canLosslesslyConvertToFPType(FPLiteral, type);
1337 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1338 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1341 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1342 if (AsmParser->isVI())
1344 else if (AsmParser->isGFX9())
1345 return isRegKind() || isInlinableImm(type);
1350 bool AMDGPUOperand::isSDWAFP16Operand() const {
1351 return isSDWAOperand(MVT::f16);
1354 bool AMDGPUOperand::isSDWAFP32Operand() const {
1355 return isSDWAOperand(MVT::f32);
1358 bool AMDGPUOperand::isSDWAInt16Operand() const {
1359 return isSDWAOperand(MVT::i16);
1362 bool AMDGPUOperand::isSDWAInt32Operand() const {
1363 return isSDWAOperand(MVT::i32);
1366 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1368 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1369 assert(Size == 2 || Size == 4 || Size == 8);
1371 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1383 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1384 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1385 Inst.getNumOperands())) {
1386 addLiteralImmOperand(Inst, Imm.Val,
1388 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1390 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1391 Inst.addOperand(MCOperand::createImm(Imm.Val));
1395 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1396 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1397 auto OpNum = Inst.getNumOperands();
1398 // Check that this operand accepts literals
1399 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1401 if (ApplyModifiers) {
1402 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1403 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1404 Val = applyInputFPModifiers(Val, Size);
1407 APInt Literal(64, Val);
1408 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1410 if (Imm.IsFPImm) { // We got fp literal token
1412 case AMDGPU::OPERAND_REG_IMM_INT64:
1413 case AMDGPU::OPERAND_REG_IMM_FP64:
1414 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1415 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1416 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1417 AsmParser->hasInv2PiInlineImm())) {
1418 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1423 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1424 // For fp operands we check if low 32 bits are zeros
1425 if (Literal.getLoBits(32) != 0) {
1426 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1427 "Can't encode literal as exact 64-bit floating-point operand. "
1428 "Low 32-bits will be set to zero");
1431 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1435 // We don't allow fp literals in 64-bit integer instructions. It is
1436 // unclear how we should encode them. This case should be checked earlier
1437 // in predicate methods (isLiteralImm())
1438 llvm_unreachable("fp literal in 64-bit integer instruction.");
1440 case AMDGPU::OPERAND_REG_IMM_INT32:
1441 case AMDGPU::OPERAND_REG_IMM_FP32:
1442 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1443 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1444 case AMDGPU::OPERAND_REG_IMM_INT16:
1445 case AMDGPU::OPERAND_REG_IMM_FP16:
1446 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1447 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1448 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1449 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1451 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1452 // Convert literal to single precision
1453 FPLiteral.convert(*getOpFltSemantics(OpTy),
1454 APFloat::rmNearestTiesToEven, &lost);
1455 // We allow precision lost but not overflow or underflow. This should be
1456 // checked earlier in isLiteralImm()
1458 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1459 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
1460 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
1461 ImmVal |= (ImmVal << 16);
1464 Inst.addOperand(MCOperand::createImm(ImmVal));
1468 llvm_unreachable("invalid operand size");
1474 // We got int literal token.
1475 // Only sign extend inline immediates.
1476 // FIXME: No errors on truncation
1478 case AMDGPU::OPERAND_REG_IMM_INT32:
1479 case AMDGPU::OPERAND_REG_IMM_FP32:
1480 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1481 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1482 if (isInt<32>(Val) &&
1483 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1484 AsmParser->hasInv2PiInlineImm())) {
1485 Inst.addOperand(MCOperand::createImm(Val));
1489 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1492 case AMDGPU::OPERAND_REG_IMM_INT64:
1493 case AMDGPU::OPERAND_REG_IMM_FP64:
1494 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1495 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1496 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1497 Inst.addOperand(MCOperand::createImm(Val));
1501 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1504 case AMDGPU::OPERAND_REG_IMM_INT16:
1505 case AMDGPU::OPERAND_REG_IMM_FP16:
1506 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1507 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1508 if (isInt<16>(Val) &&
1509 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1510 AsmParser->hasInv2PiInlineImm())) {
1511 Inst.addOperand(MCOperand::createImm(Val));
1515 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1518 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1519 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
1520 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
1521 assert(AMDGPU::isInlinableLiteral16(LiteralVal,
1522 AsmParser->hasInv2PiInlineImm()));
1524 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
1525 static_cast<uint32_t>(LiteralVal);
1526 Inst.addOperand(MCOperand::createImm(ImmVal));
1530 llvm_unreachable("invalid operand size");
1534 template <unsigned Bitwidth>
1535 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1536 APInt Literal(64, Imm.Val);
1539 // We got int literal token.
1540 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1545 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1546 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1547 APFloat::rmNearestTiesToEven, &Lost);
1548 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1551 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1552 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1555 //===----------------------------------------------------------------------===//
1557 //===----------------------------------------------------------------------===//
1559 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1560 if (Is == IS_VGPR) {
1563 case 1: return AMDGPU::VGPR_32RegClassID;
1564 case 2: return AMDGPU::VReg_64RegClassID;
1565 case 3: return AMDGPU::VReg_96RegClassID;
1566 case 4: return AMDGPU::VReg_128RegClassID;
1567 case 8: return AMDGPU::VReg_256RegClassID;
1568 case 16: return AMDGPU::VReg_512RegClassID;
1570 } else if (Is == IS_TTMP) {
1573 case 1: return AMDGPU::TTMP_32RegClassID;
1574 case 2: return AMDGPU::TTMP_64RegClassID;
1575 case 4: return AMDGPU::TTMP_128RegClassID;
1576 case 8: return AMDGPU::TTMP_256RegClassID;
1577 case 16: return AMDGPU::TTMP_512RegClassID;
1579 } else if (Is == IS_SGPR) {
1582 case 1: return AMDGPU::SGPR_32RegClassID;
1583 case 2: return AMDGPU::SGPR_64RegClassID;
1584 case 4: return AMDGPU::SGPR_128RegClassID;
1585 case 8: return AMDGPU::SGPR_256RegClassID;
1586 case 16: return AMDGPU::SGPR_512RegClassID;
1592 static unsigned getSpecialRegForName(StringRef RegName) {
1593 return StringSwitch<unsigned>(RegName)
1594 .Case("exec", AMDGPU::EXEC)
1595 .Case("vcc", AMDGPU::VCC)
1596 .Case("flat_scratch", AMDGPU::FLAT_SCR)
1597 .Case("xnack_mask", AMDGPU::XNACK_MASK)
1598 .Case("m0", AMDGPU::M0)
1599 .Case("scc", AMDGPU::SCC)
1600 .Case("tba", AMDGPU::TBA)
1601 .Case("tma", AMDGPU::TMA)
1602 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1603 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1604 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1605 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1606 .Case("vcc_lo", AMDGPU::VCC_LO)
1607 .Case("vcc_hi", AMDGPU::VCC_HI)
1608 .Case("exec_lo", AMDGPU::EXEC_LO)
1609 .Case("exec_hi", AMDGPU::EXEC_HI)
1610 .Case("tma_lo", AMDGPU::TMA_LO)
1611 .Case("tma_hi", AMDGPU::TMA_HI)
1612 .Case("tba_lo", AMDGPU::TBA_LO)
1613 .Case("tba_hi", AMDGPU::TBA_HI)
1617 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1619 auto R = parseRegister();
1620 if (!R) return true;
1622 RegNo = R->getReg();
1623 StartLoc = R->getStartLoc();
1624 EndLoc = R->getEndLoc();
1628 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1629 RegisterKind RegKind, unsigned Reg1,
1633 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1638 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
1639 Reg = AMDGPU::FLAT_SCR;
1643 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
1644 Reg = AMDGPU::XNACK_MASK;
1648 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
1653 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
1658 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
1667 if (Reg1 != Reg + RegWidth) {
1673 llvm_unreachable("unexpected register kind");
1677 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1678 unsigned &RegNum, unsigned &RegWidth,
1679 unsigned *DwordRegIndex) {
1680 if (DwordRegIndex) { *DwordRegIndex = 0; }
1681 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
1682 if (getLexer().is(AsmToken::Identifier)) {
1683 StringRef RegName = Parser.getTok().getString();
1684 if ((Reg = getSpecialRegForName(RegName))) {
1686 RegKind = IS_SPECIAL;
1688 unsigned RegNumIndex = 0;
1689 if (RegName[0] == 'v') {
1692 } else if (RegName[0] == 's') {
1695 } else if (RegName.startswith("ttmp")) {
1696 RegNumIndex = strlen("ttmp");
1701 if (RegName.size() > RegNumIndex) {
1702 // Single 32-bit register: vXX.
1703 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum))
1708 // Range of registers: v[XX:YY]. ":YY" is optional.
1710 int64_t RegLo, RegHi;
1711 if (getLexer().isNot(AsmToken::LBrac))
1715 if (getParser().parseAbsoluteExpression(RegLo))
1718 const bool isRBrace = getLexer().is(AsmToken::RBrac);
1719 if (!isRBrace && getLexer().isNot(AsmToken::Colon))
1726 if (getParser().parseAbsoluteExpression(RegHi))
1729 if (getLexer().isNot(AsmToken::RBrac))
1733 RegNum = (unsigned) RegLo;
1734 RegWidth = (RegHi - RegLo) + 1;
1737 } else if (getLexer().is(AsmToken::LBrac)) {
1738 // List of consecutive registers: [s0,s1,s2,s3]
1740 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
1744 RegisterKind RegKind1;
1745 unsigned Reg1, RegNum1, RegWidth1;
1747 if (getLexer().is(AsmToken::Comma)) {
1749 } else if (getLexer().is(AsmToken::RBrac)) {
1752 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
1753 if (RegWidth1 != 1) {
1756 if (RegKind1 != RegKind) {
1759 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) {
1779 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
1780 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
1781 Size = std::min(RegWidth, 4u);
1783 if (RegNum % Size != 0)
1785 if (DwordRegIndex) { *DwordRegIndex = RegNum; }
1786 RegNum = RegNum / Size;
1787 int RCID = getRegClass(RegKind, RegWidth);
1790 const MCRegisterClass RC = TRI->getRegClass(RCID);
1791 if (RegNum >= RC.getNumRegs())
1793 Reg = RC.getRegister(RegNum);
1798 llvm_unreachable("unexpected register kind");
1801 if (!subtargetHasRegister(*TRI, Reg))
1807 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
1810 return StringRef(".amdgcn.next_free_vgpr");
1812 return StringRef(".amdgcn.next_free_sgpr");
1818 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
1819 auto SymbolName = getGprCountSymbolName(RegKind);
1820 assert(SymbolName && "initializing invalid register kind");
1821 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1822 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
1825 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
1826 unsigned DwordRegIndex,
1827 unsigned RegWidth) {
1828 // Symbols are only defined for GCN targets
1829 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
1832 auto SymbolName = getGprCountSymbolName(RegKind);
1835 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
1837 int64_t NewMax = DwordRegIndex + RegWidth - 1;
1840 if (!Sym->isVariable())
1841 return !Error(getParser().getTok().getLoc(),
1842 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
1843 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
1845 getParser().getTok().getLoc(),
1846 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
1848 if (OldCount <= NewMax)
1849 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
1854 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
1855 const auto &Tok = Parser.getTok();
1856 SMLoc StartLoc = Tok.getLoc();
1857 SMLoc EndLoc = Tok.getEndLoc();
1858 RegisterKind RegKind;
1859 unsigned Reg, RegNum, RegWidth, DwordRegIndex;
1861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
1864 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1865 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth))
1868 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
1869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
1873 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
1874 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
1875 (getLexer().getKind() == AsmToken::Integer ||
1876 getLexer().getKind() == AsmToken::Real)) {
1877 // This is a workaround for handling operands like these:
1880 // This syntax is not compatible with syntax of standard
1881 // MC expressions (due to the trailing '|').
1886 if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
1890 return !Expr->evaluateAsAbsolute(Val);
1893 return getParser().parseAbsoluteExpression(Val);
1896 OperandMatchResultTy
1897 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
1898 // TODO: add syntactic sugar for 1/(2*PI)
1900 if (getLexer().getKind() == AsmToken::Minus) {
1901 const AsmToken NextToken = getLexer().peekTok();
1902 if (!NextToken.is(AsmToken::Integer) &&
1903 !NextToken.is(AsmToken::Real)) {
1904 return MatchOperand_NoMatch;
1910 SMLoc S = Parser.getTok().getLoc();
1911 switch(getLexer().getKind()) {
1912 case AsmToken::Integer: {
1914 if (parseAbsoluteExpr(IntVal, AbsMod))
1915 return MatchOperand_ParseFail;
1918 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
1919 return MatchOperand_Success;
1921 case AsmToken::Real: {
1923 if (parseAbsoluteExpr(IntVal, AbsMod))
1924 return MatchOperand_ParseFail;
1926 APFloat F(BitsToDouble(IntVal));
1930 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
1931 AMDGPUOperand::ImmTyNone, true));
1932 return MatchOperand_Success;
1935 return MatchOperand_NoMatch;
1939 OperandMatchResultTy
1940 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
1941 if (auto R = parseRegister()) {
1943 R->Reg.IsForcedVOP3 = isForcedVOP3();
1944 Operands.push_back(std::move(R));
1945 return MatchOperand_Success;
1947 return MatchOperand_NoMatch;
1950 OperandMatchResultTy
1951 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
1952 auto res = parseImm(Operands, AbsMod);
1953 if (res != MatchOperand_NoMatch) {
1957 return parseReg(Operands);
1960 OperandMatchResultTy
1961 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
1963 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
1965 if (getLexer().getKind()== AsmToken::Minus) {
1966 const AsmToken NextToken = getLexer().peekTok();
1968 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
1969 if (NextToken.is(AsmToken::Minus)) {
1970 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
1971 return MatchOperand_ParseFail;
1974 // '-' followed by an integer literal N should be interpreted as integer
1975 // negation rather than a floating-point NEG modifier applied to N.
1976 // Beside being contr-intuitive, such use of floating-point NEG modifier
1977 // results in different meaning of integer literals used with VOP1/2/C
1978 // and VOP3, for example:
1979 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
1980 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
1981 // Negative fp literals should be handled likewise for unifomtity
1982 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
1988 if (getLexer().getKind() == AsmToken::Identifier &&
1989 Parser.getTok().getString() == "neg") {
1991 Error(Parser.getTok().getLoc(), "expected register or immediate");
1992 return MatchOperand_ParseFail;
1996 if (getLexer().isNot(AsmToken::LParen)) {
1997 Error(Parser.getTok().getLoc(), "expected left paren after neg");
1998 return MatchOperand_ParseFail;
2003 if (getLexer().getKind() == AsmToken::Identifier &&
2004 Parser.getTok().getString() == "abs") {
2007 if (getLexer().isNot(AsmToken::LParen)) {
2008 Error(Parser.getTok().getLoc(), "expected left paren after abs");
2009 return MatchOperand_ParseFail;
2014 if (getLexer().getKind() == AsmToken::Pipe) {
2016 Error(Parser.getTok().getLoc(), "expected register or immediate");
2017 return MatchOperand_ParseFail;
2023 OperandMatchResultTy Res;
2025 Res = parseRegOrImm(Operands, Abs);
2027 Res = parseReg(Operands);
2029 if (Res != MatchOperand_Success) {
2033 AMDGPUOperand::Modifiers Mods;
2035 if (getLexer().getKind() != AsmToken::Pipe) {
2036 Error(Parser.getTok().getLoc(), "expected vertical bar");
2037 return MatchOperand_ParseFail;
2043 if (getLexer().isNot(AsmToken::RParen)) {
2044 Error(Parser.getTok().getLoc(), "expected closing parentheses");
2045 return MatchOperand_ParseFail;
2053 } else if (Negate2) {
2054 if (getLexer().isNot(AsmToken::RParen)) {
2055 Error(Parser.getTok().getLoc(), "expected closing parentheses");
2056 return MatchOperand_ParseFail;
2062 if (Mods.hasFPModifiers()) {
2063 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2064 Op.setModifiers(Mods);
2066 return MatchOperand_Success;
2069 OperandMatchResultTy
2070 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2074 if (getLexer().getKind() == AsmToken::Identifier &&
2075 Parser.getTok().getString() == "sext") {
2078 if (getLexer().isNot(AsmToken::LParen)) {
2079 Error(Parser.getTok().getLoc(), "expected left paren after sext");
2080 return MatchOperand_ParseFail;
2085 OperandMatchResultTy Res;
2087 Res = parseRegOrImm(Operands);
2089 Res = parseReg(Operands);
2091 if (Res != MatchOperand_Success) {
2095 AMDGPUOperand::Modifiers Mods;
2097 if (getLexer().isNot(AsmToken::RParen)) {
2098 Error(Parser.getTok().getLoc(), "expected closing parentheses");
2099 return MatchOperand_ParseFail;
2105 if (Mods.hasIntModifiers()) {
2106 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2107 Op.setModifiers(Mods);
2110 return MatchOperand_Success;
2113 OperandMatchResultTy
2114 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2115 return parseRegOrImmWithFPInputMods(Operands, false);
2118 OperandMatchResultTy
2119 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2120 return parseRegOrImmWithIntInputMods(Operands, false);
2123 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2124 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2126 Operands.push_back(std::move(Reg));
2127 return MatchOperand_Success;
2130 const AsmToken &Tok = Parser.getTok();
2131 if (Tok.getString() == "off") {
2132 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
2133 AMDGPUOperand::ImmTyOff, false));
2135 return MatchOperand_Success;
2138 return MatchOperand_NoMatch;
2141 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2142 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2144 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2145 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2146 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2147 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2148 return Match_InvalidOperand;
2150 if ((TSFlags & SIInstrFlags::VOP3) &&
2151 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2152 getForcedEncodingSize() != 64)
2153 return Match_PreferE32;
2155 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2156 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2157 // v_mac_f32/16 allow only dst_sel == DWORD;
2159 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2160 const auto &Op = Inst.getOperand(OpNum);
2161 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2162 return Match_InvalidOperand;
2166 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) {
2167 // FIXME: Produces error without correct column reported.
2169 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset);
2170 const auto &Op = Inst.getOperand(OpNum);
2171 if (Op.getImm() != 0)
2172 return Match_InvalidOperand;
2175 return Match_Success;
2178 // What asm variants we should check
2179 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2180 if (getForcedEncodingSize() == 32) {
2181 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2182 return makeArrayRef(Variants);
2185 if (isForcedVOP3()) {
2186 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2187 return makeArrayRef(Variants);
2190 if (isForcedSDWA()) {
2191 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2192 AMDGPUAsmVariants::SDWA9};
2193 return makeArrayRef(Variants);
2196 if (isForcedDPP()) {
2197 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2198 return makeArrayRef(Variants);
2201 static const unsigned Variants[] = {
2202 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2203 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2206 return makeArrayRef(Variants);
2209 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2210 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2211 const unsigned Num = Desc.getNumImplicitUses();
2212 for (unsigned i = 0; i < Num; ++i) {
2213 unsigned Reg = Desc.ImplicitUses[i];
2215 case AMDGPU::FLAT_SCR:
2223 return AMDGPU::NoRegister;
2226 // NB: This code is correct only when used to check constant
2227 // bus limitations because GFX7 support no f16 inline constants.
2228 // Note that there are no cases when a GFX7 opcode violates
2229 // constant bus limitations due to the use of an f16 constant.
2230 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2231 unsigned OpIdx) const {
2232 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2234 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2238 const MCOperand &MO = Inst.getOperand(OpIdx);
2240 int64_t Val = MO.getImm();
2241 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2243 switch (OpSize) { // expected operand size
2245 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2247 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2249 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2250 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2251 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
2252 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2254 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2258 llvm_unreachable("invalid operand size");
2262 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2263 const MCOperand &MO = Inst.getOperand(OpIdx);
2265 return !isInlineConstant(Inst, OpIdx);
2267 return !MO.isReg() ||
2268 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo());
2271 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2272 const unsigned Opcode = Inst.getOpcode();
2273 const MCInstrDesc &Desc = MII.get(Opcode);
2274 unsigned ConstantBusUseCount = 0;
2277 (SIInstrFlags::VOPC |
2278 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2279 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2280 SIInstrFlags::SDWA)) {
2281 // Check special imm operands (used by madmk, etc)
2282 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2283 ++ConstantBusUseCount;
2286 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2287 if (SGPRUsed != AMDGPU::NoRegister) {
2288 ++ConstantBusUseCount;
2291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2297 for (int OpIdx : OpIndices) {
2298 if (OpIdx == -1) break;
2300 const MCOperand &MO = Inst.getOperand(OpIdx);
2301 if (usesConstantBus(Inst, OpIdx)) {
2303 const unsigned Reg = mc2PseudoReg(MO.getReg());
2304 // Pairs of registers with a partial intersections like these
2306 // flat_scratch_lo, flat_scratch
2307 // flat_scratch_lo, flat_scratch_hi
2308 // are theoretically valid but they are disabled anyway.
2309 // Note that this code mimics SIInstrInfo::verifyInstruction
2310 if (Reg != SGPRUsed) {
2311 ++ConstantBusUseCount;
2314 } else { // Expression or a literal
2315 ++ConstantBusUseCount;
2321 return ConstantBusUseCount <= 1;
2324 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2325 const unsigned Opcode = Inst.getOpcode();
2326 const MCInstrDesc &Desc = MII.get(Opcode);
2328 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2330 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2334 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2336 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2337 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2338 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2340 assert(DstIdx != -1);
2341 const MCOperand &Dst = Inst.getOperand(DstIdx);
2342 assert(Dst.isReg());
2343 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2345 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2347 for (int SrcIdx : SrcIndices) {
2348 if (SrcIdx == -1) break;
2349 const MCOperand &Src = Inst.getOperand(SrcIdx);
2351 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2352 if (isRegIntersect(DstReg, SrcReg, TRI)) {
2361 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2363 const unsigned Opc = Inst.getOpcode();
2364 const MCInstrDesc &Desc = MII.get(Opc);
2366 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2367 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2368 assert(ClampIdx != -1);
2369 return Inst.getOperand(ClampIdx).getImm() == 0;
2375 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2377 const unsigned Opc = Inst.getOpcode();
2378 const MCInstrDesc &Desc = MII.get(Opc);
2380 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2383 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2384 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2385 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2387 assert(VDataIdx != -1);
2388 assert(DMaskIdx != -1);
2389 assert(TFEIdx != -1);
2391 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2392 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2393 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2398 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2399 if (hasPackedD16()) {
2400 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2401 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2402 DataSize = (DataSize + 1) / 2;
2405 return (VDataSize / 4) == DataSize + TFESize;
2408 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
2410 const unsigned Opc = Inst.getOpcode();
2411 const MCInstrDesc &Desc = MII.get(Opc);
2413 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2415 if (!Desc.mayLoad() || !Desc.mayStore())
2416 return true; // Not atomic
2418 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2419 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2421 // This is an incomplete check because image_atomic_cmpswap
2422 // may only use 0x3 and 0xf while other atomic operations
2423 // may use 0x1 and 0x3. However these limitations are
2424 // verified when we check that dmask matches dst size.
2425 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
2428 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
2430 const unsigned Opc = Inst.getOpcode();
2431 const MCInstrDesc &Desc = MII.get(Opc);
2433 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
2436 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2437 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2439 // GATHER4 instructions use dmask in a different fashion compared to
2440 // other MIMG instructions. The only useful DMASK values are
2441 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
2442 // (red,red,red,red) etc.) The ISA document doesn't mention
2444 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
2447 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
2449 const unsigned Opc = Inst.getOpcode();
2450 const MCInstrDesc &Desc = MII.get(Opc);
2452 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2455 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2456 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
2457 if (isCI() || isSI())
2464 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
2465 const SMLoc &IDLoc) {
2466 if (!validateConstantBusLimitations(Inst)) {
2468 "invalid operand (violates constant bus restrictions)");
2471 if (!validateEarlyClobberLimitations(Inst)) {
2473 "destination must be different than all sources");
2476 if (!validateIntClampSupported(Inst)) {
2478 "integer clamping is not supported on this GPU");
2481 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
2482 if (!validateMIMGD16(Inst)) {
2484 "d16 modifier is not supported on this GPU");
2487 if (!validateMIMGDataSize(Inst)) {
2489 "image data size does not match dmask and tfe");
2492 if (!validateMIMGAtomicDMask(Inst)) {
2494 "invalid atomic image dmask");
2497 if (!validateMIMGGatherDMask(Inst)) {
2499 "invalid image_gather dmask: only one bit must be set");
2506 static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS,
2507 unsigned VariantID = 0);
2509 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2510 OperandVector &Operands,
2512 uint64_t &ErrorInfo,
2513 bool MatchingInlineAsm) {
2515 unsigned Result = Match_Success;
2516 for (auto Variant : getMatchedVariants()) {
2518 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
2520 // We order match statuses from least to most specific. We use most specific
2521 // status as resulting
2522 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
2523 if ((R == Match_Success) ||
2524 (R == Match_PreferE32) ||
2525 (R == Match_MissingFeature && Result != Match_PreferE32) ||
2526 (R == Match_InvalidOperand && Result != Match_MissingFeature
2527 && Result != Match_PreferE32) ||
2528 (R == Match_MnemonicFail && Result != Match_InvalidOperand
2529 && Result != Match_MissingFeature
2530 && Result != Match_PreferE32)) {
2534 if (R == Match_Success)
2541 if (!validateInstruction(Inst, IDLoc)) {
2545 Out.EmitInstruction(Inst, getSTI());
2548 case Match_MissingFeature:
2549 return Error(IDLoc, "instruction not supported on this GPU");
2551 case Match_MnemonicFail: {
2552 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
2553 std::string Suggestion = AMDGPUMnemonicSpellCheck(
2554 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
2555 return Error(IDLoc, "invalid instruction" + Suggestion,
2556 ((AMDGPUOperand &)*Operands[0]).getLocRange());
2559 case Match_InvalidOperand: {
2560 SMLoc ErrorLoc = IDLoc;
2561 if (ErrorInfo != ~0ULL) {
2562 if (ErrorInfo >= Operands.size()) {
2563 return Error(IDLoc, "too few operands for instruction");
2565 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
2566 if (ErrorLoc == SMLoc())
2569 return Error(ErrorLoc, "invalid operand for instruction");
2572 case Match_PreferE32:
2573 return Error(IDLoc, "internal error: instruction without _e64 suffix "
2574 "should be encoded as e32");
2576 llvm_unreachable("Implement any new match types added!");
2579 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
2581 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
2584 if (getParser().parseAbsoluteExpression(Tmp)) {
2587 Ret = static_cast<uint32_t>(Tmp);
2591 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
2593 if (ParseAsAbsoluteExpression(Major))
2594 return TokError("invalid major version");
2596 if (getLexer().isNot(AsmToken::Comma))
2597 return TokError("minor version number required, comma expected");
2600 if (ParseAsAbsoluteExpression(Minor))
2601 return TokError("invalid minor version");
2606 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
2607 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2608 return TokError("directive only supported for amdgcn architecture");
2612 SMLoc TargetStart = getTok().getLoc();
2613 if (getParser().parseEscapedString(Target))
2615 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
2617 std::string ExpectedTarget;
2618 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
2619 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
2621 if (Target != ExpectedTargetOS.str())
2622 return getParser().Error(TargetRange.Start, "target must match options",
2625 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
2629 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
2630 return getParser().Error(Range.Start, "value out of range", Range);
2633 bool AMDGPUAsmParser::calculateGPRBlocks(
2634 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
2635 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange,
2636 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks,
2637 unsigned &SGPRBlocks) {
2638 // TODO(scott.linder): These calculations are duplicated from
2639 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
2640 IsaVersion Version = getIsaVersion(getSTI().getCPU());
2642 unsigned NumVGPRs = NextFreeVGPR;
2643 unsigned NumSGPRs = NextFreeSGPR;
2644 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
2646 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
2647 NumSGPRs > MaxAddressableNumSGPRs)
2648 return OutOfRangeError(SGPRRange);
2651 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
2653 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
2654 NumSGPRs > MaxAddressableNumSGPRs)
2655 return OutOfRangeError(SGPRRange);
2657 if (Features.test(FeatureSGPRInitBug))
2658 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
2660 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
2661 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
2666 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
2667 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
2668 return TokError("directive only supported for amdgcn architecture");
2670 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
2671 return TokError("directive only supported for amdhsa OS");
2673 StringRef KernelName;
2674 if (getParser().parseIdentifier(KernelName))
2677 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
2681 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
2684 uint64_t NextFreeVGPR = 0;
2686 uint64_t NextFreeSGPR = 0;
2687 unsigned UserSGPRCount = 0;
2688 bool ReserveVCC = true;
2689 bool ReserveFlatScr = true;
2690 bool ReserveXNACK = hasXNACK();
2693 while (getLexer().is(AsmToken::EndOfStatement))
2696 if (getLexer().isNot(AsmToken::Identifier))
2697 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
2699 StringRef ID = getTok().getIdentifier();
2700 SMRange IDRange = getTok().getLocRange();
2703 if (ID == ".end_amdhsa_kernel")
2706 if (Seen.find(ID) != Seen.end())
2707 return TokError(".amdhsa_ directives cannot be repeated");
2710 SMLoc ValStart = getTok().getLoc();
2712 if (getParser().parseAbsoluteExpression(IVal))
2714 SMLoc ValEnd = getTok().getLoc();
2715 SMRange ValRange = SMRange(ValStart, ValEnd);
2718 return OutOfRangeError(ValRange);
2720 uint64_t Val = IVal;
2722 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
2723 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
2724 return OutOfRangeError(RANGE); \
2725 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
2727 if (ID == ".amdhsa_group_segment_fixed_size") {
2728 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
2729 return OutOfRangeError(ValRange);
2730 KD.group_segment_fixed_size = Val;
2731 } else if (ID == ".amdhsa_private_segment_fixed_size") {
2732 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
2733 return OutOfRangeError(ValRange);
2734 KD.private_segment_fixed_size = Val;
2735 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
2736 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2737 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
2740 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
2741 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2742 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
2745 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
2746 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2747 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
2750 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
2751 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
2755 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
2756 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
2760 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
2761 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2762 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
2765 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
2766 PARSE_BITS_ENTRY(KD.kernel_code_properties,
2767 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
2770 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
2772 KD.compute_pgm_rsrc2,
2773 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
2775 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
2776 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2777 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
2779 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
2780 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2781 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
2783 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
2784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2785 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
2787 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
2788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2789 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
2791 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
2792 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2793 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
2795 } else if (ID == ".amdhsa_next_free_vgpr") {
2796 VGPRRange = ValRange;
2798 } else if (ID == ".amdhsa_next_free_sgpr") {
2799 SGPRRange = ValRange;
2801 } else if (ID == ".amdhsa_reserve_vcc") {
2802 if (!isUInt<1>(Val))
2803 return OutOfRangeError(ValRange);
2805 } else if (ID == ".amdhsa_reserve_flat_scratch") {
2806 if (IVersion.Major < 7)
2807 return getParser().Error(IDRange.Start, "directive requires gfx7+",
2809 if (!isUInt<1>(Val))
2810 return OutOfRangeError(ValRange);
2811 ReserveFlatScr = Val;
2812 } else if (ID == ".amdhsa_reserve_xnack_mask") {
2813 if (IVersion.Major < 8)
2814 return getParser().Error(IDRange.Start, "directive requires gfx8+",
2816 if (!isUInt<1>(Val))
2817 return OutOfRangeError(ValRange);
2819 } else if (ID == ".amdhsa_float_round_mode_32") {
2820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2821 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
2822 } else if (ID == ".amdhsa_float_round_mode_16_64") {
2823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2824 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
2825 } else if (ID == ".amdhsa_float_denorm_mode_32") {
2826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2827 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
2828 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
2829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2830 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
2832 } else if (ID == ".amdhsa_dx10_clamp") {
2833 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
2834 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
2835 } else if (ID == ".amdhsa_ieee_mode") {
2836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
2838 } else if (ID == ".amdhsa_fp16_overflow") {
2839 if (IVersion.Major < 9)
2840 return getParser().Error(IDRange.Start, "directive requires gfx9+",
2842 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
2844 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
2846 KD.compute_pgm_rsrc2,
2847 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
2849 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
2850 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2851 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
2853 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
2855 KD.compute_pgm_rsrc2,
2856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
2858 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
2859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
2862 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
2863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2864 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
2866 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
2867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2868 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
2870 } else if (ID == ".amdhsa_exception_int_div_zero") {
2871 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
2872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
2875 return getParser().Error(IDRange.Start,
2876 "unknown .amdhsa_kernel directive", IDRange);
2879 #undef PARSE_BITS_ENTRY
2882 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
2883 return TokError(".amdhsa_next_free_vgpr directive is required");
2885 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
2886 return TokError(".amdhsa_next_free_sgpr directive is required");
2888 unsigned VGPRBlocks;
2889 unsigned SGPRBlocks;
2890 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
2891 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR,
2892 SGPRRange, VGPRBlocks, SGPRBlocks))
2895 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
2897 return OutOfRangeError(VGPRRange);
2898 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2899 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
2901 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
2903 return OutOfRangeError(SGPRRange);
2904 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
2905 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2908 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
2909 return TokError("too many user SGPRs enabled");
2910 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
2913 getTargetStreamer().EmitAmdhsaKernelDescriptor(
2914 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
2915 ReserveFlatScr, ReserveXNACK);
2919 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
2923 if (ParseDirectiveMajorMinor(Major, Minor))
2926 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
2930 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
2934 StringRef VendorName;
2937 // If this directive has no arguments, then use the ISA version for the
2939 if (getLexer().is(AsmToken::EndOfStatement)) {
2940 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
2941 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
2947 if (ParseDirectiveMajorMinor(Major, Minor))
2950 if (getLexer().isNot(AsmToken::Comma))
2951 return TokError("stepping version number required, comma expected");
2954 if (ParseAsAbsoluteExpression(Stepping))
2955 return TokError("invalid stepping version");
2957 if (getLexer().isNot(AsmToken::Comma))
2958 return TokError("vendor name required, comma expected");
2961 if (getLexer().isNot(AsmToken::String))
2962 return TokError("invalid vendor name");
2964 VendorName = getLexer().getTok().getStringContents();
2967 if (getLexer().isNot(AsmToken::Comma))
2968 return TokError("arch name required, comma expected");
2971 if (getLexer().isNot(AsmToken::String))
2972 return TokError("invalid arch name");
2974 ArchName = getLexer().getTok().getStringContents();
2977 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
2978 VendorName, ArchName);
2982 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
2983 amd_kernel_code_t &Header) {
2984 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
2985 // assembly for backwards compatibility.
2986 if (ID == "max_scratch_backing_memory_byte_size") {
2987 Parser.eatToEndOfStatement();
2991 SmallString<40> ErrStr;
2992 raw_svector_ostream Err(ErrStr);
2993 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
2994 return TokError(Err.str());
3000 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
3001 amd_kernel_code_t Header;
3002 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
3005 // Lex EndOfStatement. This is in a while loop, because lexing a comment
3006 // will set the current token to EndOfStatement.
3007 while(getLexer().is(AsmToken::EndOfStatement))
3010 if (getLexer().isNot(AsmToken::Identifier))
3011 return TokError("expected value identifier or .end_amd_kernel_code_t");
3013 StringRef ID = getLexer().getTok().getIdentifier();
3016 if (ID == ".end_amd_kernel_code_t")
3019 if (ParseAMDKernelCodeTValue(ID, Header))
3023 getTargetStreamer().EmitAMDKernelCodeT(Header);
3028 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
3029 if (getLexer().isNot(AsmToken::Identifier))
3030 return TokError("expected symbol name");
3032 StringRef KernelName = Parser.getTok().getString();
3034 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
3035 ELF::STT_AMDGPU_HSA_KERNEL);
3037 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
3038 KernelScope.initialize(getContext());
3042 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
3043 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
3044 return Error(getParser().getTok().getLoc(),
3045 ".amd_amdgpu_isa directive is not available on non-amdgcn "
3049 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
3051 std::string ISAVersionStringFromSTI;
3052 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
3053 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
3055 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
3056 return Error(getParser().getTok().getLoc(),
3057 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
3058 "arguments specified through the command line");
3061 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
3067 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
3068 const char *AssemblerDirectiveBegin;
3069 const char *AssemblerDirectiveEnd;
3070 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
3071 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
3072 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
3073 HSAMD::V3::AssemblerDirectiveEnd)
3074 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
3075 HSAMD::AssemblerDirectiveEnd);
3077 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
3078 return Error(getParser().getTok().getLoc(),
3079 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
3080 "not available on non-amdhsa OSes")).str());
3083 std::string HSAMetadataString;
3084 raw_string_ostream YamlStream(HSAMetadataString);
3086 getLexer().setSkipSpace(false);
3088 bool FoundEnd = false;
3089 while (!getLexer().is(AsmToken::Eof)) {
3090 while (getLexer().is(AsmToken::Space)) {
3091 YamlStream << getLexer().getTok().getString();
3095 if (getLexer().is(AsmToken::Identifier)) {
3096 StringRef ID = getLexer().getTok().getIdentifier();
3097 if (ID == AssemblerDirectiveEnd) {
3104 YamlStream << Parser.parseStringToEndOfStatement()
3105 << getContext().getAsmInfo()->getSeparatorString();
3107 Parser.eatToEndOfStatement();
3110 getLexer().setSkipSpace(true);
3112 if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
3113 return TokError(Twine("expected directive ") +
3114 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found"));
3119 if (IsaInfo::hasCodeObjectV3(&getSTI())) {
3120 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
3121 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3123 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
3124 return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
3130 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
3131 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
3132 return Error(getParser().getTok().getLoc(),
3133 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
3134 "not available on non-amdpal OSes")).str());
3137 PALMD::Metadata PALMetadata;
3140 if (ParseAsAbsoluteExpression(Value)) {
3141 return TokError(Twine("invalid value in ") +
3142 Twine(PALMD::AssemblerDirective));
3144 PALMetadata.push_back(Value);
3145 if (getLexer().isNot(AsmToken::Comma))
3149 getTargetStreamer().EmitPALMetadata(PALMetadata);
3153 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
3154 StringRef IDVal = DirectiveID.getString();
3156 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
3157 if (IDVal == ".amdgcn_target")
3158 return ParseDirectiveAMDGCNTarget();
3160 if (IDVal == ".amdhsa_kernel")
3161 return ParseDirectiveAMDHSAKernel();
3163 // TODO: Restructure/combine with PAL metadata directive.
3164 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
3165 return ParseDirectiveHSAMetadata();
3167 if (IDVal == ".hsa_code_object_version")
3168 return ParseDirectiveHSACodeObjectVersion();
3170 if (IDVal == ".hsa_code_object_isa")
3171 return ParseDirectiveHSACodeObjectISA();
3173 if (IDVal == ".amd_kernel_code_t")
3174 return ParseDirectiveAMDKernelCodeT();
3176 if (IDVal == ".amdgpu_hsa_kernel")
3177 return ParseDirectiveAMDGPUHsaKernel();
3179 if (IDVal == ".amd_amdgpu_isa")
3180 return ParseDirectiveISAVersion();
3182 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
3183 return ParseDirectiveHSAMetadata();
3186 if (IDVal == PALMD::AssemblerDirective)
3187 return ParseDirectivePALMetadata();
3192 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
3193 unsigned RegNo) const {
3195 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
3203 case AMDGPU::TBA_LO:
3204 case AMDGPU::TBA_HI:
3206 case AMDGPU::TMA_LO:
3207 case AMDGPU::TMA_HI:
3209 case AMDGPU::XNACK_MASK:
3210 case AMDGPU::XNACK_MASK_LO:
3211 case AMDGPU::XNACK_MASK_HI:
3212 return !isCI() && !isSI() && hasXNACK();
3223 case AMDGPU::FLAT_SCR:
3224 case AMDGPU::FLAT_SCR_LO:
3225 case AMDGPU::FLAT_SCR_HI:
3232 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
3234 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
3243 OperandMatchResultTy
3244 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
3245 // Try to parse with a custom parser
3246 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
3248 // If we successfully parsed the operand or if there as an error parsing,
3251 // If we are parsing after we reach EndOfStatement then this means we
3252 // are appending default values to the Operands list. This is only done
3253 // by custom parser, so we shouldn't continue on to the generic parsing.
3254 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
3255 getLexer().is(AsmToken::EndOfStatement))
3258 ResTy = parseRegOrImm(Operands);
3260 if (ResTy == MatchOperand_Success)
3263 const auto &Tok = Parser.getTok();
3264 SMLoc S = Tok.getLoc();
3266 const MCExpr *Expr = nullptr;
3267 if (!Parser.parseExpression(Expr)) {
3268 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3269 return MatchOperand_Success;
3272 // Possibly this is an instruction flag like 'gds'.
3273 if (Tok.getKind() == AsmToken::Identifier) {
3274 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S));
3276 return MatchOperand_Success;
3279 return MatchOperand_NoMatch;
3282 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
3283 // Clear any forced encodings from the previous instruction.
3284 setForcedEncodingSize(0);
3285 setForcedDPP(false);
3286 setForcedSDWA(false);
3288 if (Name.endswith("_e64")) {
3289 setForcedEncodingSize(64);
3290 return Name.substr(0, Name.size() - 4);
3291 } else if (Name.endswith("_e32")) {
3292 setForcedEncodingSize(32);
3293 return Name.substr(0, Name.size() - 4);
3294 } else if (Name.endswith("_dpp")) {
3296 return Name.substr(0, Name.size() - 4);
3297 } else if (Name.endswith("_sdwa")) {
3298 setForcedSDWA(true);
3299 return Name.substr(0, Name.size() - 5);
3304 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
3306 SMLoc NameLoc, OperandVector &Operands) {
3307 // Add the instruction mnemonic
3308 Name = parseMnemonicSuffix(Name);
3309 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
3311 while (!getLexer().is(AsmToken::EndOfStatement)) {
3312 OperandMatchResultTy Res = parseOperand(Operands, Name);
3314 // Eat the comma or space if there is one.
3315 if (getLexer().is(AsmToken::Comma))
3319 case MatchOperand_Success: break;
3320 case MatchOperand_ParseFail:
3321 Error(getLexer().getLoc(), "failed parsing operand.");
3322 while (!getLexer().is(AsmToken::EndOfStatement)) {
3326 case MatchOperand_NoMatch:
3327 Error(getLexer().getLoc(), "not a valid operand.");
3328 while (!getLexer().is(AsmToken::EndOfStatement)) {
3338 //===----------------------------------------------------------------------===//
3339 // Utility functions
3340 //===----------------------------------------------------------------------===//
3342 OperandMatchResultTy
3343 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
3344 switch(getLexer().getKind()) {
3345 default: return MatchOperand_NoMatch;
3346 case AsmToken::Identifier: {
3347 StringRef Name = Parser.getTok().getString();
3348 if (!Name.equals(Prefix)) {
3349 return MatchOperand_NoMatch;
3353 if (getLexer().isNot(AsmToken::Colon))
3354 return MatchOperand_ParseFail;
3358 bool IsMinus = false;
3359 if (getLexer().getKind() == AsmToken::Minus) {
3364 if (getLexer().isNot(AsmToken::Integer))
3365 return MatchOperand_ParseFail;
3367 if (getParser().parseAbsoluteExpression(Int))
3368 return MatchOperand_ParseFail;
3375 return MatchOperand_Success;
3378 OperandMatchResultTy
3379 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
3380 AMDGPUOperand::ImmTy ImmTy,
3381 bool (*ConvertResult)(int64_t&)) {
3382 SMLoc S = Parser.getTok().getLoc();
3385 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
3386 if (Res != MatchOperand_Success)
3389 if (ConvertResult && !ConvertResult(Value)) {
3390 return MatchOperand_ParseFail;
3393 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
3394 return MatchOperand_Success;
3397 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
3399 OperandVector &Operands,
3400 AMDGPUOperand::ImmTy ImmTy,
3401 bool (*ConvertResult)(int64_t&)) {
3402 StringRef Name = Parser.getTok().getString();
3403 if (!Name.equals(Prefix))
3404 return MatchOperand_NoMatch;
3407 if (getLexer().isNot(AsmToken::Colon))
3408 return MatchOperand_ParseFail;
3411 if (getLexer().isNot(AsmToken::LBrac))
3412 return MatchOperand_ParseFail;
3416 SMLoc S = Parser.getTok().getLoc();
3418 // FIXME: How to verify the number of elements matches the number of src
3420 for (int I = 0; I < 4; ++I) {
3422 if (getLexer().is(AsmToken::RBrac))
3425 if (getLexer().isNot(AsmToken::Comma))
3426 return MatchOperand_ParseFail;
3430 if (getLexer().isNot(AsmToken::Integer))
3431 return MatchOperand_ParseFail;
3434 if (getParser().parseAbsoluteExpression(Op))
3435 return MatchOperand_ParseFail;
3437 if (Op != 0 && Op != 1)
3438 return MatchOperand_ParseFail;
3443 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
3444 return MatchOperand_Success;
3447 OperandMatchResultTy
3448 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
3449 AMDGPUOperand::ImmTy ImmTy) {
3451 SMLoc S = Parser.getTok().getLoc();
3453 // We are at the end of the statement, and this is a default argument, so
3454 // use a default value.
3455 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3456 switch(getLexer().getKind()) {
3457 case AsmToken::Identifier: {
3458 StringRef Tok = Parser.getTok().getString();
3460 if (Tok == "r128" && isGFX9())
3461 Error(S, "r128 modifier is not supported on this GPU");
3462 if (Tok == "a16" && !isGFX9())
3463 Error(S, "a16 modifier is not supported on this GPU");
3466 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
3470 return MatchOperand_NoMatch;
3475 return MatchOperand_NoMatch;
3479 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
3480 return MatchOperand_Success;
3483 static void addOptionalImmOperand(
3484 MCInst& Inst, const OperandVector& Operands,
3485 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
3486 AMDGPUOperand::ImmTy ImmT,
3487 int64_t Default = 0) {
3488 auto i = OptionalIdx.find(ImmT);
3489 if (i != OptionalIdx.end()) {
3490 unsigned Idx = i->second;
3491 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
3493 Inst.addOperand(MCOperand::createImm(Default));
3497 OperandMatchResultTy
3498 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
3499 if (getLexer().isNot(AsmToken::Identifier)) {
3500 return MatchOperand_NoMatch;
3502 StringRef Tok = Parser.getTok().getString();
3503 if (Tok != Prefix) {
3504 return MatchOperand_NoMatch;
3508 if (getLexer().isNot(AsmToken::Colon)) {
3509 return MatchOperand_ParseFail;
3513 if (getLexer().isNot(AsmToken::Identifier)) {
3514 return MatchOperand_ParseFail;
3517 Value = Parser.getTok().getString();
3518 return MatchOperand_Success;
3521 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3522 // values to live in a joint format operand in the MCInst encoding.
3523 OperandMatchResultTy
3524 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3525 SMLoc S = Parser.getTok().getLoc();
3526 int64_t Dfmt = 0, Nfmt = 0;
3527 // dfmt and nfmt can appear in either order, and each is optional.
3528 bool GotDfmt = false, GotNfmt = false;
3529 while (!GotDfmt || !GotNfmt) {
3531 auto Res = parseIntWithPrefix("dfmt", Dfmt);
3532 if (Res != MatchOperand_NoMatch) {
3533 if (Res != MatchOperand_Success)
3536 Error(Parser.getTok().getLoc(), "out of range dfmt");
3537 return MatchOperand_ParseFail;
3545 auto Res = parseIntWithPrefix("nfmt", Nfmt);
3546 if (Res != MatchOperand_NoMatch) {
3547 if (Res != MatchOperand_Success)
3550 Error(Parser.getTok().getLoc(), "out of range nfmt");
3551 return MatchOperand_ParseFail;
3560 if (!GotDfmt && !GotNfmt)
3561 return MatchOperand_NoMatch;
3562 auto Format = Dfmt | Nfmt << 4;
3564 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3565 return MatchOperand_Success;
3568 //===----------------------------------------------------------------------===//
3570 //===----------------------------------------------------------------------===//
3572 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
3573 const OperandVector &Operands) {
3574 OptionalImmIndexMap OptionalIdx;
3576 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3577 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3579 // Add the register arguments
3581 Op.addRegOperands(Inst, 1);
3585 // Handle optional arguments
3586 OptionalIdx[Op.getImmTy()] = i;
3589 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
3590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
3591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3593 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3596 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
3597 bool IsGdsHardcoded) {
3598 OptionalImmIndexMap OptionalIdx;
3600 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3601 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3603 // Add the register arguments
3605 Op.addRegOperands(Inst, 1);
3609 if (Op.isToken() && Op.getToken() == "gds") {
3610 IsGdsHardcoded = true;
3614 // Handle optional arguments
3615 OptionalIdx[Op.getImmTy()] = i;
3618 AMDGPUOperand::ImmTy OffsetType =
3619 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si ||
3620 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
3621 AMDGPUOperand::ImmTyOffset;
3623 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
3625 if (!IsGdsHardcoded) {
3626 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
3628 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
3631 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
3632 OptionalImmIndexMap OptionalIdx;
3634 unsigned OperandIdx[4];
3635 unsigned EnMask = 0;
3638 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3639 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3641 // Add the register arguments
3644 OperandIdx[SrcIdx] = Inst.size();
3645 Op.addRegOperands(Inst, 1);
3652 OperandIdx[SrcIdx] = Inst.size();
3653 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
3658 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
3659 Op.addImmOperands(Inst, 1);
3663 if (Op.isToken() && Op.getToken() == "done")
3666 // Handle optional arguments
3667 OptionalIdx[Op.getImmTy()] = i;
3670 assert(SrcIdx == 4);
3673 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
3675 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
3676 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
3677 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
3680 for (auto i = 0; i < SrcIdx; ++i) {
3681 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
3682 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
3686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
3687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
3689 Inst.addOperand(MCOperand::createImm(EnMask));
3692 //===----------------------------------------------------------------------===//
3694 //===----------------------------------------------------------------------===//
3698 const AMDGPU::IsaVersion ISA,
3702 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
3703 unsigned (*decode)(const IsaVersion &Version, unsigned))
3705 bool Failed = false;
3707 IntVal = encode(ISA, IntVal, CntVal);
3708 if (CntVal != decode(ISA, IntVal)) {
3710 IntVal = encode(ISA, IntVal, -1);
3718 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
3719 StringRef CntName = Parser.getTok().getString();
3723 if (getLexer().isNot(AsmToken::LParen))
3727 if (getLexer().isNot(AsmToken::Integer))
3730 SMLoc ValLoc = Parser.getTok().getLoc();
3731 if (getParser().parseAbsoluteExpression(CntVal))
3734 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3737 bool Sat = CntName.endswith("_sat");
3739 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
3740 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
3741 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
3742 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
3743 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
3744 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
3748 Error(ValLoc, "too large value for " + CntName);
3752 if (getLexer().isNot(AsmToken::RParen)) {
3757 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
3758 const AsmToken NextToken = getLexer().peekTok();
3759 if (NextToken.is(AsmToken::Identifier)) {
3767 OperandMatchResultTy
3768 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
3769 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3770 int64_t Waitcnt = getWaitcntBitMask(ISA);
3771 SMLoc S = Parser.getTok().getLoc();
3773 switch(getLexer().getKind()) {
3774 default: return MatchOperand_ParseFail;
3775 case AsmToken::Integer:
3776 // The operand can be an integer value.
3777 if (getParser().parseAbsoluteExpression(Waitcnt))
3778 return MatchOperand_ParseFail;
3781 case AsmToken::Identifier:
3783 if (parseCnt(Waitcnt))
3784 return MatchOperand_ParseFail;
3785 } while(getLexer().isNot(AsmToken::EndOfStatement));
3788 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
3789 return MatchOperand_Success;
3792 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
3794 using namespace llvm::AMDGPU::Hwreg;
3796 if (Parser.getTok().getString() != "hwreg")
3800 if (getLexer().isNot(AsmToken::LParen))
3804 if (getLexer().is(AsmToken::Identifier)) {
3805 HwReg.IsSymbolic = true;
3806 HwReg.Id = ID_UNKNOWN_;
3807 const StringRef tok = Parser.getTok().getString();
3808 int Last = ID_SYMBOLIC_LAST_;
3809 if (isSI() || isCI() || isVI())
3810 Last = ID_SYMBOLIC_FIRST_GFX9_;
3811 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) {
3812 if (tok == IdSymbolic[i]) {
3819 HwReg.IsSymbolic = false;
3820 if (getLexer().isNot(AsmToken::Integer))
3822 if (getParser().parseAbsoluteExpression(HwReg.Id))
3826 if (getLexer().is(AsmToken::RParen)) {
3832 if (getLexer().isNot(AsmToken::Comma))
3836 if (getLexer().isNot(AsmToken::Integer))
3838 if (getParser().parseAbsoluteExpression(Offset))
3841 if (getLexer().isNot(AsmToken::Comma))
3845 if (getLexer().isNot(AsmToken::Integer))
3847 if (getParser().parseAbsoluteExpression(Width))
3850 if (getLexer().isNot(AsmToken::RParen))
3857 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
3858 using namespace llvm::AMDGPU::Hwreg;
3860 int64_t Imm16Val = 0;
3861 SMLoc S = Parser.getTok().getLoc();
3863 switch(getLexer().getKind()) {
3864 default: return MatchOperand_NoMatch;
3865 case AsmToken::Integer:
3866 // The operand can be an integer value.
3867 if (getParser().parseAbsoluteExpression(Imm16Val))
3868 return MatchOperand_NoMatch;
3869 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
3870 Error(S, "invalid immediate: only 16-bit values are legal");
3871 // Do not return error code, but create an imm operand anyway and proceed
3872 // to the next operand, if any. That avoids unneccessary error messages.
3876 case AsmToken::Identifier: {
3877 OperandInfoTy HwReg(ID_UNKNOWN_);
3878 int64_t Offset = OFFSET_DEFAULT_;
3879 int64_t Width = WIDTH_M1_DEFAULT_ + 1;
3880 if (parseHwregConstruct(HwReg, Offset, Width))
3881 return MatchOperand_ParseFail;
3882 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) {
3883 if (HwReg.IsSymbolic)
3884 Error(S, "invalid symbolic name of hardware register");
3886 Error(S, "invalid code of hardware register: only 6-bit values are legal");
3888 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset))
3889 Error(S, "invalid bit offset: only 5-bit values are legal");
3890 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1))
3891 Error(S, "invalid bitfield width: only values from 1 to 32 are legal");
3892 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_);
3896 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
3897 return MatchOperand_Success;
3900 bool AMDGPUOperand::isSWaitCnt() const {
3904 bool AMDGPUOperand::isHwreg() const {
3905 return isImmTy(ImmTyHwreg);
3908 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) {
3909 using namespace llvm::AMDGPU::SendMsg;
3911 if (Parser.getTok().getString() != "sendmsg")
3915 if (getLexer().isNot(AsmToken::LParen))
3919 if (getLexer().is(AsmToken::Identifier)) {
3920 Msg.IsSymbolic = true;
3921 Msg.Id = ID_UNKNOWN_;
3922 const std::string tok = Parser.getTok().getString();
3923 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
3925 default: continue; // Omit gaps.
3926 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break;
3928 if (tok == IdSymbolic[i]) {
3935 Msg.IsSymbolic = false;
3936 if (getLexer().isNot(AsmToken::Integer))
3938 if (getParser().parseAbsoluteExpression(Msg.Id))
3940 if (getLexer().is(AsmToken::Integer))
3941 if (getParser().parseAbsoluteExpression(Msg.Id))
3942 Msg.Id = ID_UNKNOWN_;
3944 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest.
3947 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) {
3948 if (getLexer().isNot(AsmToken::RParen))
3954 if (getLexer().isNot(AsmToken::Comma))
3958 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG);
3959 Operation.Id = ID_UNKNOWN_;
3960 if (getLexer().is(AsmToken::Identifier)) {
3961 Operation.IsSymbolic = true;
3962 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
3963 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
3964 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
3965 const StringRef Tok = Parser.getTok().getString();
3966 for (int i = F; i < L; ++i) {
3974 Operation.IsSymbolic = false;
3975 if (getLexer().isNot(AsmToken::Integer))
3977 if (getParser().parseAbsoluteExpression(Operation.Id))
3981 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
3982 // Stream id is optional.
3983 if (getLexer().is(AsmToken::RParen)) {
3988 if (getLexer().isNot(AsmToken::Comma))
3992 if (getLexer().isNot(AsmToken::Integer))
3994 if (getParser().parseAbsoluteExpression(StreamId))
3998 if (getLexer().isNot(AsmToken::RParen))
4004 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
4005 if (getLexer().getKind() != AsmToken::Identifier)
4006 return MatchOperand_NoMatch;
4008 StringRef Str = Parser.getTok().getString();
4009 int Slot = StringSwitch<int>(Str)
4015 SMLoc S = Parser.getTok().getLoc();
4017 return MatchOperand_ParseFail;
4020 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
4021 AMDGPUOperand::ImmTyInterpSlot));
4022 return MatchOperand_Success;
4025 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
4026 if (getLexer().getKind() != AsmToken::Identifier)
4027 return MatchOperand_NoMatch;
4029 StringRef Str = Parser.getTok().getString();
4030 if (!Str.startswith("attr"))
4031 return MatchOperand_NoMatch;
4033 StringRef Chan = Str.take_back(2);
4034 int AttrChan = StringSwitch<int>(Chan)
4041 return MatchOperand_ParseFail;
4043 Str = Str.drop_back(2).drop_front(4);
4046 if (Str.getAsInteger(10, Attr))
4047 return MatchOperand_ParseFail;
4049 SMLoc S = Parser.getTok().getLoc();
4052 Error(S, "out of bounds attr");
4053 return MatchOperand_Success;
4056 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
4058 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
4059 AMDGPUOperand::ImmTyInterpAttr));
4060 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
4061 AMDGPUOperand::ImmTyAttrChan));
4062 return MatchOperand_Success;
4065 void AMDGPUAsmParser::errorExpTgt() {
4066 Error(Parser.getTok().getLoc(), "invalid exp target");
4069 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
4071 if (Str == "null") {
4073 return MatchOperand_Success;
4076 if (Str.startswith("mrt")) {
4077 Str = Str.drop_front(3);
4078 if (Str == "z") { // == mrtz
4080 return MatchOperand_Success;
4083 if (Str.getAsInteger(10, Val))
4084 return MatchOperand_ParseFail;
4089 return MatchOperand_Success;
4092 if (Str.startswith("pos")) {
4093 Str = Str.drop_front(3);
4094 if (Str.getAsInteger(10, Val))
4095 return MatchOperand_ParseFail;
4101 return MatchOperand_Success;
4104 if (Str.startswith("param")) {
4105 Str = Str.drop_front(5);
4106 if (Str.getAsInteger(10, Val))
4107 return MatchOperand_ParseFail;
4113 return MatchOperand_Success;
4116 if (Str.startswith("invalid_target_")) {
4117 Str = Str.drop_front(15);
4118 if (Str.getAsInteger(10, Val))
4119 return MatchOperand_ParseFail;
4122 return MatchOperand_Success;
4125 return MatchOperand_NoMatch;
4128 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
4130 StringRef Str = Parser.getTok().getString();
4132 auto Res = parseExpTgtImpl(Str, Val);
4133 if (Res != MatchOperand_Success)
4136 SMLoc S = Parser.getTok().getLoc();
4139 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
4140 AMDGPUOperand::ImmTyExpTgt));
4141 return MatchOperand_Success;
4144 OperandMatchResultTy
4145 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
4146 using namespace llvm::AMDGPU::SendMsg;
4148 int64_t Imm16Val = 0;
4149 SMLoc S = Parser.getTok().getLoc();
4151 switch(getLexer().getKind()) {
4153 return MatchOperand_NoMatch;
4154 case AsmToken::Integer:
4155 // The operand can be an integer value.
4156 if (getParser().parseAbsoluteExpression(Imm16Val))
4157 return MatchOperand_NoMatch;
4158 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) {
4159 Error(S, "invalid immediate: only 16-bit values are legal");
4160 // Do not return error code, but create an imm operand anyway and proceed
4161 // to the next operand, if any. That avoids unneccessary error messages.
4164 case AsmToken::Identifier: {
4165 OperandInfoTy Msg(ID_UNKNOWN_);
4166 OperandInfoTy Operation(OP_UNKNOWN_);
4167 int64_t StreamId = STREAM_ID_DEFAULT_;
4168 if (parseSendMsgConstruct(Msg, Operation, StreamId))
4169 return MatchOperand_ParseFail;
4171 // Validate and encode message ID.
4172 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE)
4173 || Msg.Id == ID_SYSMSG)) {
4175 Error(S, "invalid/unsupported symbolic name of message");
4177 Error(S, "invalid/unsupported code of message");
4180 Imm16Val = (Msg.Id << ID_SHIFT_);
4181 // Validate and encode operation ID.
4182 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) {
4183 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) {
4184 if (Operation.IsSymbolic)
4185 Error(S, "invalid symbolic name of GS_OP");
4187 Error(S, "invalid code of GS_OP: only 2-bit values are legal");
4190 if (Operation.Id == OP_GS_NOP
4191 && Msg.Id != ID_GS_DONE) {
4192 Error(S, "invalid GS_OP: NOP is for GS_DONE only");
4195 Imm16Val |= (Operation.Id << OP_SHIFT_);
4197 if (Msg.Id == ID_SYSMSG) {
4198 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) {
4199 if (Operation.IsSymbolic)
4200 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP");
4202 Error(S, "invalid/unsupported code of SYSMSG_OP");
4205 Imm16Val |= (Operation.Id << OP_SHIFT_);
4207 // Validate and encode stream ID.
4208 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) {
4209 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) {
4210 Error(S, "invalid stream id: only 2-bit values are legal");
4213 Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
4219 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
4220 return MatchOperand_Success;
4223 bool AMDGPUOperand::isSendMsg() const {
4224 return isImmTy(ImmTySendMsg);
4227 //===----------------------------------------------------------------------===//
4229 //===----------------------------------------------------------------------===//
4232 AMDGPUAsmParser::trySkipId(const StringRef Id) {
4233 if (getLexer().getKind() == AsmToken::Identifier &&
4234 Parser.getTok().getString() == Id) {
4242 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
4243 if (getLexer().getKind() == Kind) {
4251 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
4252 const StringRef ErrMsg) {
4253 if (!trySkipToken(Kind)) {
4254 Error(Parser.getTok().getLoc(), ErrMsg);
4261 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
4262 return !getParser().parseAbsoluteExpression(Imm);
4266 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
4267 SMLoc S = Parser.getTok().getLoc();
4268 if (getLexer().getKind() == AsmToken::String) {
4269 Val = Parser.getTok().getStringContents();
4278 //===----------------------------------------------------------------------===//
4280 //===----------------------------------------------------------------------===//
4284 encodeBitmaskPerm(const unsigned AndMask,
4285 const unsigned OrMask,
4286 const unsigned XorMask) {
4287 using namespace llvm::AMDGPU::Swizzle;
4289 return BITMASK_PERM_ENC |
4290 (AndMask << BITMASK_AND_SHIFT) |
4291 (OrMask << BITMASK_OR_SHIFT) |
4292 (XorMask << BITMASK_XOR_SHIFT);
4296 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
4297 const unsigned MinVal,
4298 const unsigned MaxVal,
4299 const StringRef ErrMsg) {
4300 for (unsigned i = 0; i < OpNum; ++i) {
4301 if (!skipToken(AsmToken::Comma, "expected a comma")){
4304 SMLoc ExprLoc = Parser.getTok().getLoc();
4305 if (!parseExpr(Op[i])) {
4308 if (Op[i] < MinVal || Op[i] > MaxVal) {
4309 Error(ExprLoc, ErrMsg);
4318 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
4319 using namespace llvm::AMDGPU::Swizzle;
4321 int64_t Lane[LANE_NUM];
4322 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
4323 "expected a 2-bit lane id")) {
4324 Imm = QUAD_PERM_ENC;
4325 for (auto i = 0; i < LANE_NUM; ++i) {
4326 Imm |= Lane[i] << (LANE_SHIFT * i);
4334 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
4335 using namespace llvm::AMDGPU::Swizzle;
4337 SMLoc S = Parser.getTok().getLoc();
4341 if (!parseSwizzleOperands(1, &GroupSize,
4343 "group size must be in the interval [2,32]")) {
4346 if (!isPowerOf2_64(GroupSize)) {
4347 Error(S, "group size must be a power of two");
4350 if (parseSwizzleOperands(1, &LaneIdx,
4352 "lane id must be in the interval [0,group size - 1]")) {
4353 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
4360 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
4361 using namespace llvm::AMDGPU::Swizzle;
4363 SMLoc S = Parser.getTok().getLoc();
4366 if (!parseSwizzleOperands(1, &GroupSize,
4367 2, 32, "group size must be in the interval [2,32]")) {
4370 if (!isPowerOf2_64(GroupSize)) {
4371 Error(S, "group size must be a power of two");
4375 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
4380 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
4381 using namespace llvm::AMDGPU::Swizzle;
4383 SMLoc S = Parser.getTok().getLoc();
4386 if (!parseSwizzleOperands(1, &GroupSize,
4387 1, 16, "group size must be in the interval [1,16]")) {
4390 if (!isPowerOf2_64(GroupSize)) {
4391 Error(S, "group size must be a power of two");
4395 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
4400 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
4401 using namespace llvm::AMDGPU::Swizzle;
4403 if (!skipToken(AsmToken::Comma, "expected a comma")) {
4408 SMLoc StrLoc = Parser.getTok().getLoc();
4409 if (!parseString(Ctl)) {
4412 if (Ctl.size() != BITMASK_WIDTH) {
4413 Error(StrLoc, "expected a 5-character mask");
4417 unsigned AndMask = 0;
4418 unsigned OrMask = 0;
4419 unsigned XorMask = 0;
4421 for (size_t i = 0; i < Ctl.size(); ++i) {
4422 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
4425 Error(StrLoc, "invalid mask");
4442 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
4447 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
4449 SMLoc OffsetLoc = Parser.getTok().getLoc();
4451 if (!parseExpr(Imm)) {
4454 if (!isUInt<16>(Imm)) {
4455 Error(OffsetLoc, "expected a 16-bit offset");
4462 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
4463 using namespace llvm::AMDGPU::Swizzle;
4465 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
4467 SMLoc ModeLoc = Parser.getTok().getLoc();
4470 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
4471 Ok = parseSwizzleQuadPerm(Imm);
4472 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
4473 Ok = parseSwizzleBitmaskPerm(Imm);
4474 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
4475 Ok = parseSwizzleBroadcast(Imm);
4476 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
4477 Ok = parseSwizzleSwap(Imm);
4478 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
4479 Ok = parseSwizzleReverse(Imm);
4481 Error(ModeLoc, "expected a swizzle mode");
4484 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
4490 OperandMatchResultTy
4491 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
4492 SMLoc S = Parser.getTok().getLoc();
4495 if (trySkipId("offset")) {
4498 if (skipToken(AsmToken::Colon, "expected a colon")) {
4499 if (trySkipId("swizzle")) {
4500 Ok = parseSwizzleMacro(Imm);
4502 Ok = parseSwizzleOffset(Imm);
4506 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
4508 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
4510 // Swizzle "offset" operand is optional.
4511 // If it is omitted, try parsing other optional operands.
4512 return parseOptionalOpr(Operands);
4517 AMDGPUOperand::isSwizzle() const {
4518 return isImmTy(ImmTySwizzle);
4521 //===----------------------------------------------------------------------===//
4522 // sopp branch targets
4523 //===----------------------------------------------------------------------===//
4525 OperandMatchResultTy
4526 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
4527 SMLoc S = Parser.getTok().getLoc();
4529 switch (getLexer().getKind()) {
4530 default: return MatchOperand_ParseFail;
4531 case AsmToken::Integer: {
4533 if (getParser().parseAbsoluteExpression(Imm))
4534 return MatchOperand_ParseFail;
4535 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
4536 return MatchOperand_Success;
4539 case AsmToken::Identifier:
4540 Operands.push_back(AMDGPUOperand::CreateExpr(this,
4541 MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
4542 Parser.getTok().getString()), getContext()), S));
4544 return MatchOperand_Success;
4548 //===----------------------------------------------------------------------===//
4550 //===----------------------------------------------------------------------===//
4552 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
4553 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
4556 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
4557 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
4560 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
4561 const OperandVector &Operands,
4563 bool IsAtomicReturn,
4565 bool IsLdsOpcode = IsLds;
4566 bool HasLdsModifier = false;
4567 OptionalImmIndexMap OptionalIdx;
4568 assert(IsAtomicReturn ? IsAtomic : true);
4570 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4573 // Add the register arguments
4575 Op.addRegOperands(Inst, 1);
4579 // Handle the case where soffset is an immediate
4580 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4581 Op.addImmOperands(Inst, 1);
4585 HasLdsModifier = Op.isLDS();
4587 // Handle tokens like 'offen' which are sometimes hard-coded into the
4588 // asm string. There are no MCInst operands for these.
4594 // Handle optional arguments
4595 OptionalIdx[Op.getImmTy()] = i;
4598 // This is a workaround for an llvm quirk which may result in an
4599 // incorrect instruction selection. Lds and non-lds versions of
4600 // MUBUF instructions are identical except that lds versions
4601 // have mandatory 'lds' modifier. However this modifier follows
4602 // optional modifiers and llvm asm matcher regards this 'lds'
4603 // modifier as an optional one. As a result, an lds version
4604 // of opcode may be selected even if it has no 'lds' modifier.
4605 if (IsLdsOpcode && !HasLdsModifier) {
4606 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
4607 if (NoLdsOpcode != -1) { // Got lds version - correct it.
4608 Inst.setOpcode(NoLdsOpcode);
4609 IsLdsOpcode = false;
4613 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns.
4614 if (IsAtomicReturn) {
4615 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning.
4619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
4620 if (!IsAtomic) { // glc is hard-coded.
4621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4625 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
4626 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4630 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
4631 OptionalImmIndexMap OptionalIdx;
4633 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4634 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4636 // Add the register arguments
4638 Op.addRegOperands(Inst, 1);
4642 // Handle the case where soffset is an immediate
4643 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
4644 Op.addImmOperands(Inst, 1);
4648 // Handle tokens like 'offen' which are sometimes hard-coded into the
4649 // asm string. There are no MCInst operands for these.
4655 // Handle optional arguments
4656 OptionalIdx[Op.getImmTy()] = i;
4659 addOptionalImmOperand(Inst, Operands, OptionalIdx,
4660 AMDGPUOperand::ImmTyOffset);
4661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
4662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4667 //===----------------------------------------------------------------------===//
4669 //===----------------------------------------------------------------------===//
4671 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
4674 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4675 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4676 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4680 // Add src, same as dst
4681 assert(Desc.getNumDefs() == 1);
4682 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
4685 OptionalImmIndexMap OptionalIdx;
4687 for (unsigned E = Operands.size(); I != E; ++I) {
4688 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4690 // Add the register arguments
4692 Op.addRegOperands(Inst, 1);
4693 } else if (Op.isImmModifier()) {
4694 OptionalIdx[Op.getImmTy()] = I;
4696 llvm_unreachable("unexpected operand type");
4700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
4701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
4702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
4703 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
4705 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
4706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
4707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
4708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
4711 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
4712 cvtMIMG(Inst, Operands, true);
4715 //===----------------------------------------------------------------------===//
4717 //===----------------------------------------------------------------------===//
4719 bool AMDGPUOperand::isSMRDOffset8() const {
4720 return isImm() && isUInt<8>(getImm());
4723 bool AMDGPUOperand::isSMRDOffset20() const {
4724 return isImm() && isUInt<20>(getImm());
4727 bool AMDGPUOperand::isSMRDLiteralOffset() const {
4728 // 32-bit literals are only supported on CI and we only want to use them
4729 // when the offset is > 8-bits.
4730 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
4733 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
4734 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4737 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
4738 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4741 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
4742 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4745 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const {
4746 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4749 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const {
4750 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
4753 //===----------------------------------------------------------------------===//
4755 //===----------------------------------------------------------------------===//
4757 static bool ConvertOmodMul(int64_t &Mul) {
4758 if (Mul != 1 && Mul != 2 && Mul != 4)
4765 static bool ConvertOmodDiv(int64_t &Div) {
4779 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
4780 if (BoundCtrl == 0) {
4785 if (BoundCtrl == -1) {
4793 // Note: the order in this table matches the order of operands in AsmString.
4794 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
4795 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
4796 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
4797 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
4798 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
4799 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
4800 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
4801 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
4802 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
4803 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4804 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
4805 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
4806 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
4807 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
4808 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
4809 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
4810 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
4811 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
4812 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
4813 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
4814 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
4815 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
4816 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
4817 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
4818 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
4819 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
4820 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
4821 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
4822 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
4823 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
4824 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
4825 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
4826 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
4827 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
4828 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
4829 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
4830 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
4831 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
4834 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
4835 unsigned size = Operands.size();
4838 OperandMatchResultTy res = parseOptionalOpr(Operands);
4840 // This is a hack to enable hardcoded mandatory operands which follow
4841 // optional operands.
4843 // Current design assumes that all operands after the first optional operand
4844 // are also optional. However implementation of some instructions violates
4845 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
4847 // To alleviate this problem, we have to (implicitly) parse extra operands
4848 // to make sure autogenerated parser of custom operands never hit hardcoded
4849 // mandatory operands.
4851 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
4853 // We have parsed the first optional operand.
4854 // Parse as many operands as necessary to skip all mandatory operands.
4856 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
4857 if (res != MatchOperand_Success ||
4858 getLexer().is(AsmToken::EndOfStatement)) break;
4859 if (getLexer().is(AsmToken::Comma)) Parser.Lex();
4860 res = parseOptionalOpr(Operands);
4867 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
4868 OperandMatchResultTy res;
4869 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
4870 // try to parse any optional operand here
4872 res = parseNamedBit(Op.Name, Operands, Op.Type);
4873 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
4874 res = parseOModOperand(Operands);
4875 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
4876 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
4877 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
4878 res = parseSDWASel(Operands, Op.Name, Op.Type);
4879 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
4880 res = parseSDWADstUnused(Operands);
4881 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
4882 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
4883 Op.Type == AMDGPUOperand::ImmTyNegLo ||
4884 Op.Type == AMDGPUOperand::ImmTyNegHi) {
4885 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
4887 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
4888 res = parseDfmtNfmt(Operands);
4890 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
4892 if (res != MatchOperand_NoMatch) {
4896 return MatchOperand_NoMatch;
4899 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
4900 StringRef Name = Parser.getTok().getString();
4901 if (Name == "mul") {
4902 return parseIntWithPrefix("mul", Operands,
4903 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
4906 if (Name == "div") {
4907 return parseIntWithPrefix("div", Operands,
4908 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
4911 return MatchOperand_NoMatch;
4914 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
4915 cvtVOP3P(Inst, Operands);
4917 int Opc = Inst.getOpcode();
4920 const int Ops[] = { AMDGPU::OpName::src0,
4921 AMDGPU::OpName::src1,
4922 AMDGPU::OpName::src2 };
4924 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
4928 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4929 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4931 if ((OpSel & (1 << SrcNum)) != 0) {
4932 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
4933 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
4934 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
4938 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
4939 // 1. This operand is input modifiers
4940 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
4941 // 2. This is not last operand
4942 && Desc.NumOperands > (OpNum + 1)
4943 // 3. Next operand is register class
4944 && Desc.OpInfo[OpNum + 1].RegClass != -1
4945 // 4. Next register is not tied to any other operand
4946 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
4949 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
4951 OptionalImmIndexMap OptionalIdx;
4952 unsigned Opc = Inst.getOpcode();
4955 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4956 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4957 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4960 for (unsigned E = Operands.size(); I != E; ++I) {
4961 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
4962 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
4963 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
4964 } else if (Op.isInterpSlot() ||
4965 Op.isInterpAttr() ||
4967 Inst.addOperand(MCOperand::createImm(Op.Imm.Val));
4968 } else if (Op.isImmModifier()) {
4969 OptionalIdx[Op.getImmTy()] = I;
4971 llvm_unreachable("unhandled operand type");
4975 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
4976 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
4979 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
4980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
4983 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
4984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
4988 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
4989 OptionalImmIndexMap &OptionalIdx) {
4990 unsigned Opc = Inst.getOpcode();
4993 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4994 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
4995 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
4998 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
4999 // This instruction has src modifiers
5000 for (unsigned E = Operands.size(); I != E; ++I) {
5001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5002 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5003 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
5004 } else if (Op.isImmModifier()) {
5005 OptionalIdx[Op.getImmTy()] = I;
5006 } else if (Op.isRegOrImm()) {
5007 Op.addRegOrImmOperands(Inst, 1);
5009 llvm_unreachable("unhandled operand type");
5014 for (unsigned E = Operands.size(); I != E; ++I) {
5015 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5017 OptionalIdx[Op.getImmTy()] = I;
5019 Op.addRegOrImmOperands(Inst, 1);
5024 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
5025 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
5028 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
5029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
5032 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
5033 // it has src2 register operand that is tied to dst operand
5034 // we don't allow modifiers for this operand in assembler so src2_modifiers
5036 if (Opc == AMDGPU::V_MAC_F32_e64_si ||
5037 Opc == AMDGPU::V_MAC_F32_e64_vi ||
5038 Opc == AMDGPU::V_MAC_F16_e64_vi ||
5039 Opc == AMDGPU::V_FMAC_F32_e64_vi) {
5040 auto it = Inst.begin();
5041 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
5042 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
5044 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5048 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
5049 OptionalImmIndexMap OptionalIdx;
5050 cvtVOP3(Inst, Operands, OptionalIdx);
5053 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
5054 const OperandVector &Operands) {
5055 OptionalImmIndexMap OptIdx;
5056 const int Opc = Inst.getOpcode();
5057 const MCInstrDesc &Desc = MII.get(Opc);
5059 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
5061 cvtVOP3(Inst, Operands, OptIdx);
5063 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
5065 Inst.addOperand(Inst.getOperand(0));
5068 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
5069 // instruction, and then figure out where to actually put the modifiers
5071 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
5073 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
5074 if (OpSelHiIdx != -1) {
5075 int DefaultVal = IsPacked ? -1 : 0;
5076 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
5080 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
5081 if (NegLoIdx != -1) {
5083 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
5084 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
5087 const int Ops[] = { AMDGPU::OpName::src0,
5088 AMDGPU::OpName::src1,
5089 AMDGPU::OpName::src2 };
5090 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
5091 AMDGPU::OpName::src1_modifiers,
5092 AMDGPU::OpName::src2_modifiers };
5094 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
5096 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
5097 unsigned OpSelHi = 0;
5101 if (OpSelHiIdx != -1) {
5102 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
5105 if (NegLoIdx != -1) {
5106 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
5107 NegLo = Inst.getOperand(NegLoIdx).getImm();
5108 NegHi = Inst.getOperand(NegHiIdx).getImm();
5111 for (int J = 0; J < 3; ++J) {
5112 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
5116 uint32_t ModVal = 0;
5118 if ((OpSel & (1 << J)) != 0)
5119 ModVal |= SISrcMods::OP_SEL_0;
5121 if ((OpSelHi & (1 << J)) != 0)
5122 ModVal |= SISrcMods::OP_SEL_1;
5124 if ((NegLo & (1 << J)) != 0)
5125 ModVal |= SISrcMods::NEG;
5127 if ((NegHi & (1 << J)) != 0)
5128 ModVal |= SISrcMods::NEG_HI;
5130 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
5132 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
5136 //===----------------------------------------------------------------------===//
5138 //===----------------------------------------------------------------------===//
5140 bool AMDGPUOperand::isDPPCtrl() const {
5141 using namespace AMDGPU::DPP;
5143 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
5145 int64_t Imm = getImm();
5146 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
5147 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
5148 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
5149 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
5150 (Imm == DppCtrl::WAVE_SHL1) ||
5151 (Imm == DppCtrl::WAVE_ROL1) ||
5152 (Imm == DppCtrl::WAVE_SHR1) ||
5153 (Imm == DppCtrl::WAVE_ROR1) ||
5154 (Imm == DppCtrl::ROW_MIRROR) ||
5155 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
5156 (Imm == DppCtrl::BCAST15) ||
5157 (Imm == DppCtrl::BCAST31);
5162 bool AMDGPUOperand::isGPRIdxMode() const {
5163 return isImm() && isUInt<4>(getImm());
5166 bool AMDGPUOperand::isS16Imm() const {
5167 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
5170 bool AMDGPUOperand::isU16Imm() const {
5171 return isImm() && isUInt<16>(getImm());
5174 OperandMatchResultTy
5175 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
5176 using namespace AMDGPU::DPP;
5178 SMLoc S = Parser.getTok().getLoc();
5182 if (getLexer().getKind() == AsmToken::Identifier) {
5183 Prefix = Parser.getTok().getString();
5185 return MatchOperand_NoMatch;
5188 if (Prefix == "row_mirror") {
5189 Int = DppCtrl::ROW_MIRROR;
5191 } else if (Prefix == "row_half_mirror") {
5192 Int = DppCtrl::ROW_HALF_MIRROR;
5195 // Check to prevent parseDPPCtrlOps from eating invalid tokens
5196 if (Prefix != "quad_perm"
5197 && Prefix != "row_shl"
5198 && Prefix != "row_shr"
5199 && Prefix != "row_ror"
5200 && Prefix != "wave_shl"
5201 && Prefix != "wave_rol"
5202 && Prefix != "wave_shr"
5203 && Prefix != "wave_ror"
5204 && Prefix != "row_bcast") {
5205 return MatchOperand_NoMatch;
5209 if (getLexer().isNot(AsmToken::Colon))
5210 return MatchOperand_ParseFail;
5212 if (Prefix == "quad_perm") {
5213 // quad_perm:[%d,%d,%d,%d]
5215 if (getLexer().isNot(AsmToken::LBrac))
5216 return MatchOperand_ParseFail;
5219 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
5220 return MatchOperand_ParseFail;
5222 for (int i = 0; i < 3; ++i) {
5223 if (getLexer().isNot(AsmToken::Comma))
5224 return MatchOperand_ParseFail;
5228 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
5229 return MatchOperand_ParseFail;
5230 const int shift = i*2 + 2;
5231 Int += (Temp << shift);
5234 if (getLexer().isNot(AsmToken::RBrac))
5235 return MatchOperand_ParseFail;
5240 if (getParser().parseAbsoluteExpression(Int))
5241 return MatchOperand_ParseFail;
5243 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
5244 Int |= DppCtrl::ROW_SHL0;
5245 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
5246 Int |= DppCtrl::ROW_SHR0;
5247 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
5248 Int |= DppCtrl::ROW_ROR0;
5249 } else if (Prefix == "wave_shl" && 1 == Int) {
5250 Int = DppCtrl::WAVE_SHL1;
5251 } else if (Prefix == "wave_rol" && 1 == Int) {
5252 Int = DppCtrl::WAVE_ROL1;
5253 } else if (Prefix == "wave_shr" && 1 == Int) {
5254 Int = DppCtrl::WAVE_SHR1;
5255 } else if (Prefix == "wave_ror" && 1 == Int) {
5256 Int = DppCtrl::WAVE_ROR1;
5257 } else if (Prefix == "row_bcast") {
5259 Int = DppCtrl::BCAST15;
5260 } else if (Int == 31) {
5261 Int = DppCtrl::BCAST31;
5263 return MatchOperand_ParseFail;
5266 return MatchOperand_ParseFail;
5271 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
5272 return MatchOperand_Success;
5275 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
5276 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
5279 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
5280 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
5283 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
5284 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
5287 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
5288 OptionalImmIndexMap OptionalIdx;
5291 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5292 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5293 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5296 for (unsigned E = Operands.size(); I != E; ++I) {
5297 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
5300 assert((unsigned)TiedTo < Inst.getNumOperands());
5301 // handle tied old or src2 for MAC instructions
5302 Inst.addOperand(Inst.getOperand(TiedTo));
5304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5305 // Add the register arguments
5306 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5307 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
5310 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5311 Op.addRegWithFPInputModsOperands(Inst, 2);
5312 } else if (Op.isDPPCtrl()) {
5313 Op.addImmOperands(Inst, 1);
5314 } else if (Op.isImm()) {
5315 // Handle optional arguments
5316 OptionalIdx[Op.getImmTy()] = I;
5318 llvm_unreachable("Invalid operand type");
5322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
5323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
5324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
5327 //===----------------------------------------------------------------------===//
5329 //===----------------------------------------------------------------------===//
5331 OperandMatchResultTy
5332 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
5333 AMDGPUOperand::ImmTy Type) {
5334 using namespace llvm::AMDGPU::SDWA;
5336 SMLoc S = Parser.getTok().getLoc();
5338 OperandMatchResultTy res;
5340 res = parseStringWithPrefix(Prefix, Value);
5341 if (res != MatchOperand_Success) {
5346 Int = StringSwitch<int64_t>(Value)
5347 .Case("BYTE_0", SdwaSel::BYTE_0)
5348 .Case("BYTE_1", SdwaSel::BYTE_1)
5349 .Case("BYTE_2", SdwaSel::BYTE_2)
5350 .Case("BYTE_3", SdwaSel::BYTE_3)
5351 .Case("WORD_0", SdwaSel::WORD_0)
5352 .Case("WORD_1", SdwaSel::WORD_1)
5353 .Case("DWORD", SdwaSel::DWORD)
5354 .Default(0xffffffff);
5355 Parser.Lex(); // eat last token
5357 if (Int == 0xffffffff) {
5358 return MatchOperand_ParseFail;
5361 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
5362 return MatchOperand_Success;
5365 OperandMatchResultTy
5366 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
5367 using namespace llvm::AMDGPU::SDWA;
5369 SMLoc S = Parser.getTok().getLoc();
5371 OperandMatchResultTy res;
5373 res = parseStringWithPrefix("dst_unused", Value);
5374 if (res != MatchOperand_Success) {
5379 Int = StringSwitch<int64_t>(Value)
5380 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
5381 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
5382 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
5383 .Default(0xffffffff);
5384 Parser.Lex(); // eat last token
5386 if (Int == 0xffffffff) {
5387 return MatchOperand_ParseFail;
5390 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
5391 return MatchOperand_Success;
5394 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
5395 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
5398 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
5399 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
5402 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
5403 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true);
5406 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
5407 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
5410 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
5411 uint64_t BasicInstType, bool skipVcc) {
5412 using namespace llvm::AMDGPU::SDWA;
5414 OptionalImmIndexMap OptionalIdx;
5415 bool skippedVcc = false;
5418 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5419 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5420 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5423 for (unsigned E = Operands.size(); I != E; ++I) {
5424 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5425 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
5426 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
5427 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
5428 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
5429 // Skip VCC only if we didn't skip it on previous iteration.
5430 if (BasicInstType == SIInstrFlags::VOP2 &&
5431 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) {
5434 } else if (BasicInstType == SIInstrFlags::VOPC &&
5435 Inst.getNumOperands() == 0) {
5440 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
5441 Op.addRegOrImmWithInputModsOperands(Inst, 2);
5442 } else if (Op.isImm()) {
5443 // Handle optional arguments
5444 OptionalIdx[Op.getImmTy()] = I;
5446 llvm_unreachable("Invalid operand type");
5451 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
5452 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
5453 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
5454 switch (BasicInstType) {
5455 case SIInstrFlags::VOP1:
5456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5457 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5465 case SIInstrFlags::VOP2:
5466 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5467 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
5468 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
5470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
5471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
5472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5476 case SIInstrFlags::VOPC:
5477 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
5479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
5483 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
5487 // special case v_mac_{f16, f32}:
5488 // it has src2 register operand that is tied to dst operand
5489 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
5490 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
5491 auto it = Inst.begin();
5493 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
5494 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
5498 /// Force static initialization.
5499 extern "C" void LLVMInitializeAMDGPUAsmParser() {
5500 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
5501 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
5504 #define GET_REGISTER_MATCHER
5505 #define GET_MATCHER_IMPLEMENTATION
5506 #define GET_MNEMONIC_SPELL_CHECKER
5507 #include "AMDGPUGenAsmMatcher.inc"
5509 // This fuction should be defined after auto-generated include so that we have
5510 // MatchClassKind enum defined
5511 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
5513 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
5514 // But MatchInstructionImpl() expects to meet token and fails to validate
5515 // operand. This method checks if we are given immediate operand but expect to
5516 // get corresponding token.
5517 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
5520 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
5522 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
5524 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
5526 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
5528 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
5530 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
5532 // When operands have expression values, they will return true for isToken,
5533 // because it is not possible to distinguish between a token and an
5534 // expression at parse time. MatchInstructionImpl() will always try to
5535 // match an operand as a token, when isToken returns true, and when the
5536 // name of the expression is not a valid token, the match will fail,
5537 // so we need to handle it here.
5538 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
5540 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
5541 case MCK_SoppBrTarget:
5542 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
5543 case MCK_VReg32OrOff:
5544 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
5545 case MCK_InterpSlot:
5546 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
5548 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
5550 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
5552 return Match_InvalidOperand;