1 //===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file This pass tries to apply several peephole SDWA patterns.
13 /// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
14 /// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
15 /// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
18 /// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3
19 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
21 //===----------------------------------------------------------------------===//
25 #include "AMDGPUSubtarget.h"
26 #include "SIDefines.h"
27 #include "SIInstrInfo.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include <unordered_map>
36 #define DEBUG_TYPE "si-peephole-sdwa"
38 STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
39 STATISTIC(NumSDWAInstructionsPeepholed,
40 "Number of instruction converted to SDWA.");
46 class SIPeepholeSDWA : public MachineFunctionPass {
48 MachineRegisterInfo *MRI;
49 const SIRegisterInfo *TRI;
50 const SIInstrInfo *TII;
52 std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
54 Optional<int64_t> foldToImm(const MachineOperand &Op) const;
59 typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector;
61 SIPeepholeSDWA() : MachineFunctionPass(ID) {
62 initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
65 bool runOnMachineFunction(MachineFunction &MF) override;
66 void matchSDWAOperands(MachineFunction &MF);
67 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
69 StringRef getPassName() const override { return "SI Peephole SDWA"; }
71 void getAnalysisUsage(AnalysisUsage &AU) const override {
73 MachineFunctionPass::getAnalysisUsage(AU);
79 MachineOperand *Target; // Operand that would be used in converted instruction
80 MachineOperand *Replaced; // Operand that would be replace by Target
83 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
84 : Target(TargetOp), Replaced(ReplacedOp) {
85 assert(Target->isReg());
86 assert(Replaced->isReg());
89 virtual ~SDWAOperand() {}
91 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
92 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
94 MachineOperand *getTargetOperand() const { return Target; }
95 MachineOperand *getReplacedOperand() const { return Replaced; }
96 MachineInstr *getParentInst() const { return Target->getParent(); }
97 MachineRegisterInfo *getMRI() const {
98 return &getParentInst()->getParent()->getParent()->getRegInfo();
102 using namespace AMDGPU::SDWA;
104 class SDWASrcOperand : public SDWAOperand {
112 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
113 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
115 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
116 Neg(Neg_), Sext(Sext_) {}
118 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
119 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
121 SdwaSel getSrcSel() const { return SrcSel; }
122 bool getAbs() const { return Abs; }
123 bool getNeg() const { return Neg; }
124 bool getSext() const { return Sext; }
126 uint64_t getSrcMods() const;
129 class SDWADstOperand : public SDWAOperand {
135 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
136 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
137 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
139 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
140 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
142 SdwaSel getDstSel() const { return DstSel; }
143 DstUnused getDstUnused() const { return DstUn; }
146 } // End anonymous namespace.
148 INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
150 char SIPeepholeSDWA::ID = 0;
152 char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
154 FunctionPass *llvm::createSIPeepholeSDWAPass() {
155 return new SIPeepholeSDWA();
160 static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
162 case BYTE_0: OS << "BYTE_0"; break;
163 case BYTE_1: OS << "BYTE_1"; break;
164 case BYTE_2: OS << "BYTE_2"; break;
165 case BYTE_3: OS << "BYTE_3"; break;
166 case WORD_0: OS << "WORD_0"; break;
167 case WORD_1: OS << "WORD_1"; break;
168 case DWORD: OS << "DWORD"; break;
173 static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
175 case UNUSED_PAD: OS << "UNUSED_PAD"; break;
176 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
177 case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
182 static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) {
183 OS << "SDWA src: " << *Src.getTargetOperand()
184 << " src_sel:" << Src.getSrcSel()
185 << " abs:" << Src.getAbs() << " neg:" << Src.getNeg()
186 << " sext:" << Src.getSext() << '\n';
190 static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
191 OS << "SDWA dst: " << *Dst.getTargetOperand()
192 << " dst_sel:" << Dst.getDstSel()
193 << " dst_unused:" << Dst.getDstUnused() << '\n';
199 static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
200 assert(To.isReg() && From.isReg());
201 To.setReg(From.getReg());
202 To.setSubReg(From.getSubReg());
203 To.setIsUndef(From.isUndef());
205 To.setIsKill(From.isKill());
207 To.setIsDead(From.isDead());
211 static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
212 return LHS.isReg() &&
214 LHS.getReg() == RHS.getReg() &&
215 LHS.getSubReg() == RHS.getSubReg();
218 static bool isSubregOf(const MachineOperand &SubReg,
219 const MachineOperand &SuperReg,
220 const TargetRegisterInfo *TRI) {
222 if (!SuperReg.isReg() || !SubReg.isReg())
225 if (isSameReg(SuperReg, SubReg))
228 if (SuperReg.getReg() != SubReg.getReg())
231 LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg());
232 LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg());
233 SuperMask |= ~SubMask;
234 return SuperMask.all();
237 uint64_t SDWASrcOperand::getSrcMods() const {
241 "Float and integer src modifiers can't be set simulteniously");
242 Mods |= Abs ? SISrcMods::ABS : 0;
243 Mods |= Neg ? SISrcMods::NEG : 0;
245 Mods |= SISrcMods::SEXT;
251 MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
252 // For SDWA src operand potential instruction is one that use register
253 // defined by parent instruction
254 MachineRegisterInfo *MRI = getMRI();
255 MachineOperand *Replaced = getReplacedOperand();
256 assert(Replaced->isReg());
258 MachineInstr *PotentialMI = nullptr;
259 for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) {
260 // If this is use of another subreg of dst reg then do nothing
261 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
264 // If there exist use of superreg of dst then we should not combine this
266 if (!isSameReg(PotentialMO, *Replaced))
269 // Check that PotentialMI is only instruction that uses dst reg
270 if (PotentialMI == nullptr) {
271 PotentialMI = PotentialMO.getParent();
272 } else if (PotentialMI != PotentialMO.getParent()) {
280 bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
281 // Find operand in instruction that matches source operand and replace it with
282 // target operand. Set corresponding src_sel
284 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
285 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
286 MachineOperand *SrcMods =
287 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
288 assert(Src && Src->isReg());
289 if (!isSameReg(*Src, *getReplacedOperand())) {
290 // If this is not src0 then it should be src1
291 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
292 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
293 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
295 assert(Src && Src->isReg());
297 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
298 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
299 !isSameReg(*Src, *getReplacedOperand())) {
300 // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
301 // src2. This is not allowed.
305 assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
307 copyRegOperand(*Src, *getTargetOperand());
308 SrcSel->setImm(getSrcSel());
309 SrcMods->setImm(getSrcMods());
310 getTargetOperand()->setIsKill(false);
314 MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
315 // For SDWA dst operand potential instruction is one that defines register
316 // that this operand uses
317 MachineRegisterInfo *MRI = getMRI();
318 MachineInstr *ParentMI = getParentInst();
319 MachineOperand *Replaced = getReplacedOperand();
320 assert(Replaced->isReg());
322 for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) {
323 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
326 if (!isSameReg(*Replaced, PotentialMO))
329 // Check that ParentMI is the only instruction that uses replaced register
330 for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) {
331 if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) &&
332 UseMO.getParent() != ParentMI) {
337 // Due to SSA this should be onle def of replaced register, so return it
338 return PotentialMO.getParent();
344 bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
345 // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
347 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
348 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
349 getDstSel() != AMDGPU::SDWA::DWORD) {
350 // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
354 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
357 isSameReg(*Operand, *getReplacedOperand()));
358 copyRegOperand(*Operand, *getTargetOperand());
359 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
361 DstSel->setImm(getDstSel());
362 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
364 DstUnused->setImm(getDstUnused());
366 // Remove original instruction because it would conflict with our new
367 // instruction by register definition
368 getParentInst()->eraseFromParent();
372 Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
377 // If this is not immediate then it can be copy of immediate value, e.g.:
378 // %vreg1<def> = S_MOV_B32 255;
380 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
381 if (!isSameReg(Op, Def))
384 const MachineInstr *DefInst = Def.getParent();
385 if (!TII->isFoldableCopy(*DefInst))
388 const MachineOperand &Copied = DefInst->getOperand(1);
392 return Copied.getImm();
399 void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
400 for (MachineBasicBlock &MBB : MF) {
401 for (MachineInstr &MI : MBB) {
402 unsigned Opcode = MI.getOpcode();
404 case AMDGPU::V_LSHRREV_B32_e32:
405 case AMDGPU::V_ASHRREV_I32_e32:
406 case AMDGPU::V_LSHLREV_B32_e32: {
407 // from: v_lshrrev_b32_e32 v1, 16/24, v0
408 // to SDWA src:v0 src_sel:WORD_1/BYTE_3
410 // from: v_ashrrev_i32_e32 v1, 16/24, v0
411 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
413 // from: v_lshlrev_b32_e32 v1, 16/24, v0
414 // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
415 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
416 auto Imm = foldToImm(*Src0);
420 if (*Imm != 16 && *Imm != 24)
423 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
424 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
425 if (TRI->isPhysicalRegister(Src1->getReg()) ||
426 TRI->isPhysicalRegister(Dst->getReg()))
429 if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
430 auto SDWADst = make_unique<SDWADstOperand>(
431 Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
432 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
433 SDWAOperands[&MI] = std::move(SDWADst);
434 ++NumSDWAPatternsFound;
436 auto SDWASrc = make_unique<SDWASrcOperand>(
437 Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
438 Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
439 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
440 SDWAOperands[&MI] = std::move(SDWASrc);
441 ++NumSDWAPatternsFound;
446 case AMDGPU::V_LSHRREV_B16_e32:
447 case AMDGPU::V_ASHRREV_I16_e32:
448 case AMDGPU::V_LSHLREV_B16_e32: {
449 // from: v_lshrrev_b16_e32 v1, 8, v0
450 // to SDWA src:v0 src_sel:BYTE_1
452 // from: v_ashrrev_i16_e32 v1, 8, v0
453 // to SDWA src:v0 src_sel:BYTE_1 sext:1
455 // from: v_lshlrev_b16_e32 v1, 8, v0
456 // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
457 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
458 auto Imm = foldToImm(*Src0);
459 if (!Imm || *Imm != 8)
462 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
463 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
465 if (TRI->isPhysicalRegister(Src1->getReg()) ||
466 TRI->isPhysicalRegister(Dst->getReg()))
469 if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
471 make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
472 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
473 SDWAOperands[&MI] = std::move(SDWADst);
474 ++NumSDWAPatternsFound;
476 auto SDWASrc = make_unique<SDWASrcOperand>(
477 Src1, Dst, BYTE_1, false, false,
478 Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
479 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
480 SDWAOperands[&MI] = std::move(SDWASrc);
481 ++NumSDWAPatternsFound;
486 case AMDGPU::V_BFE_I32:
487 case AMDGPU::V_BFE_U32: {
489 // from: v_bfe_u32 v1, v0, 8, 8
490 // to SDWA src:v0 src_sel:BYTE_1
492 // offset | width | src_sel
493 // ------------------------
502 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
503 auto Offset = foldToImm(*Src1);
507 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
508 auto Width = foldToImm(*Src2);
512 SdwaSel SrcSel = DWORD;
514 if (*Offset == 0 && *Width == 8)
516 else if (*Offset == 0 && *Width == 16)
518 else if (*Offset == 0 && *Width == 32)
520 else if (*Offset == 8 && *Width == 8)
522 else if (*Offset == 16 && *Width == 8)
524 else if (*Offset == 16 && *Width == 16)
526 else if (*Offset == 24 && *Width == 8)
531 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
532 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
534 if (TRI->isPhysicalRegister(Src0->getReg()) ||
535 TRI->isPhysicalRegister(Dst->getReg()))
538 auto SDWASrc = make_unique<SDWASrcOperand>(
539 Src0, Dst, SrcSel, false, false,
540 Opcode == AMDGPU::V_BFE_U32 ? false : true);
541 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
542 SDWAOperands[&MI] = std::move(SDWASrc);
543 ++NumSDWAPatternsFound;
546 case AMDGPU::V_AND_B32_e32: {
548 // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
549 // to SDWA src:v0 src_sel:WORD_0/BYTE_0
551 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
552 auto Imm = foldToImm(*Src0);
556 if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
559 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
560 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
562 if (TRI->isPhysicalRegister(Src1->getReg()) ||
563 TRI->isPhysicalRegister(Dst->getReg()))
566 auto SDWASrc = make_unique<SDWASrcOperand>(
567 Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
568 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
569 SDWAOperands[&MI] = std::move(SDWASrc);
570 ++NumSDWAPatternsFound;
578 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
579 const SDWAOperandsVector &SDWAOperands) {
580 // Check if this instruction can be converted to SDWA:
581 // 1. Does this opcode support SDWA
582 if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
585 // 2. Are all operands - VGPRs
586 for (const MachineOperand &Operand : MI.explicit_operands()) {
587 if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
592 int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
593 assert(SDWAOpcode != -1);
595 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
597 // Create SDWA version of instruction MI and initialize its operands
598 MachineInstrBuilder SDWAInst =
599 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
601 // Copy dst, if it is present in original then should also be present in SDWA
602 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
604 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
607 assert(TII->isVOPC(MI));
610 // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
611 // src0_modifiers (except for v_nop_sdwa, but it can't get here)
612 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
615 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
616 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
620 // Copy src1 if present, initialize src1_modifiers.
621 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
624 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
625 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
629 assert(TII->isVOP1(MI));
632 if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
633 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
634 // v_mac_f16/32 has additional src2 operand tied to vdst
635 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
641 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
644 // Initialize dst_sel and dst_unused if present
647 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
648 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
649 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
650 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
653 // Initialize src0_sel
654 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
655 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
658 // Initialize src1_sel if present
660 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
661 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
664 // Apply all sdwa operand pattenrs
665 bool Converted = false;
666 for (auto &Operand : SDWAOperands) {
667 Converted |= Operand->convertToSDWA(*SDWAInst, TII);
670 SDWAInst->eraseFromParent();
674 DEBUG(dbgs() << "Convert instruction:" << MI
675 << "Into:" << *SDWAInst << '\n');
676 ++NumSDWAInstructionsPeepholed;
678 MI.eraseFromParent();
682 bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
683 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
686 !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
690 MRI = &MF.getRegInfo();
691 TRI = ST.getRegisterInfo();
692 TII = ST.getInstrInfo();
694 std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
696 matchSDWAOperands(MF);
698 for (auto &OperandPair : SDWAOperands) {
699 auto &Operand = OperandPair.second;
700 MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
702 PotentialMatches[PotentialMI].push_back(std::move(Operand));
706 for (auto &PotentialPair : PotentialMatches) {
707 MachineInstr &PotentialMI = *PotentialPair.first;
708 convertToSDWA(PotentialMI, PotentialPair.second);
711 SDWAOperands.clear();