contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

   1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
  12 /// will sometimes generate these illegal copies in situations like this:
  13 ///
  14 ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
  15 ///
  16 /// BB0:
  17 ///   %vreg0 <sgpr> = SCALAR_INST
  18 ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
  19 ///    ...
  20 ///    BRANCH %cond BB1, BB2
  21 ///  BB1:
  22 ///    %vreg2 <vgpr> = VECTOR_INST
  23 ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  24 ///  BB2:
  25 ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
  26 ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
  27 ///
  28 ///
  29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
  30 /// code will look like this:
  31 ///
  32 /// BB0:
  33 ///   %vreg0 <sgpr> = SCALAR_INST
  34 ///    ...
  35 ///    BRANCH %cond BB1, BB2
  36 /// BB1:
  37 ///   %vreg2 <vgpr> = VECTOR_INST
  38 ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  39 /// BB2:
  40 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
  41 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  42 ///
  43 /// Now that the result of the PHI instruction is an SGPR, the register
  44 /// allocator is now forced to constrain the register class of %vreg3 to
  45 /// <sgpr> so we end up with final code like this:
  46 ///
  47 /// BB0:
  48 ///   %vreg0 <sgpr> = SCALAR_INST
  49 ///    ...
  50 ///    BRANCH %cond BB1, BB2
  51 /// BB1:
  52 ///   %vreg2 <vgpr> = VECTOR_INST
  53 ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
  54 /// BB2:
  55 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
  56 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  57 ///
  58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
  59 ///
  60 /// In order to avoid this problem, this pass searches for PHI instructions
  61 /// which define a <vsrc> register and constrains its definition class to
  62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
  63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
  64 /// will be unable to perform the COPY removal from the above example  which
  65 /// ultimately led to the creation of an illegal COPY.
  66 //===----------------------------------------------------------------------===//
  67
  68 #include "AMDGPU.h"
  69 #include "AMDGPUSubtarget.h"
  70 #include "SIInstrInfo.h"
  71 #include "llvm/ADT/DenseSet.h"
  72 #include "llvm/CodeGen/MachineDominators.h"
  73 #include "llvm/CodeGen/MachineFunctionPass.h"
  74 #include "llvm/CodeGen/MachineInstrBuilder.h"
  75 #include "llvm/CodeGen/MachineRegisterInfo.h"
  76 #include "llvm/Support/Debug.h"
  77 #include "llvm/Support/raw_ostream.h"
  78 #include "llvm/Target/TargetMachine.h"
  79
  80 using namespace llvm;
  81
  82 #define DEBUG_TYPE "si-fix-sgpr-copies"
  83
  84 static cl::opt<bool> EnableM0Merge(
  85   "amdgpu-enable-merge-m0",
  86   cl::desc("Merge and hoist M0 initializations"),
  87   cl::init(false));
  88
  89 namespace {
  90
  91 class SIFixSGPRCopies : public MachineFunctionPass {
  92
  93   MachineDominatorTree *MDT;
  94
  95 public:
  96   static char ID;
  97
  98   SIFixSGPRCopies() : MachineFunctionPass(ID) { }
  99
 100   bool runOnMachineFunction(MachineFunction &MF) override;
 101
 102   StringRef getPassName() const override { return "SI Fix SGPR copies"; }
 103
 104   void getAnalysisUsage(AnalysisUsage &AU) const override {
 105     AU.addRequired<MachineDominatorTree>();
 106     AU.addPreserved<MachineDominatorTree>();
 107     AU.setPreservesCFG();
 108     MachineFunctionPass::getAnalysisUsage(AU);
 109   }
 110 };
 111
 112 } // End anonymous namespace
 113
 114 INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
 115                      "SI Fix SGPR copies", false, false)
 116 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 117 INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
 118                      "SI Fix SGPR copies", false, false)
 119
 120
 121 char SIFixSGPRCopies::ID = 0;
 122
 123 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
 124
 125 FunctionPass *llvm::createSIFixSGPRCopiesPass() {
 126   return new SIFixSGPRCopies();
 127 }
 128
 129 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
 130   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
 131   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 132     if (!MI.getOperand(i).isReg() ||
 133         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
 134       continue;
 135
 136     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
 137       return true;
 138   }
 139   return false;
 140 }
 141
 142 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 143 getCopyRegClasses(const MachineInstr &Copy,
 144                   const SIRegisterInfo &TRI,
 145                   const MachineRegisterInfo &MRI) {
 146   unsigned DstReg = Copy.getOperand(0).getReg();
 147   unsigned SrcReg = Copy.getOperand(1).getReg();
 148
 149   const TargetRegisterClass *SrcRC =
 150     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
 151     MRI.getRegClass(SrcReg) :
 152     TRI.getPhysRegClass(SrcReg);
 153
 154   // We don't really care about the subregister here.
 155   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
 156
 157   const TargetRegisterClass *DstRC =
 158     TargetRegisterInfo::isVirtualRegister(DstReg) ?
 159     MRI.getRegClass(DstReg) :
 160     TRI.getPhysRegClass(DstReg);
 161
 162   return std::make_pair(SrcRC, DstRC);
 163 }
 164
 165 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
 166                              const TargetRegisterClass *DstRC,
 167                              const SIRegisterInfo &TRI) {
 168   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
 169 }
 170
 171 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
 172                              const TargetRegisterClass *DstRC,
 173                              const SIRegisterInfo &TRI) {
 174   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
 175 }
 176
 177 static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
 178                                       const SIRegisterInfo *TRI,
 179                                       const SIInstrInfo *TII) {
 180   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
 181   auto &Src = MI.getOperand(1);
 182   unsigned DstReg = MI.getOperand(0).getReg();
 183   unsigned SrcReg = Src.getReg();
 184   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
 185       !TargetRegisterInfo::isVirtualRegister(DstReg))
 186     return false;
 187
 188   for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) {
 189     const auto *UseMI = MO.getParent();
 190     if (UseMI == &MI)
 191       continue;
 192     if (MO.isDef() || UseMI->getParent() != MI.getParent() ||
 193         UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END ||
 194         !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src))
 195       return false;
 196   }
 197   // Change VGPR to SGPR destination.
 198   MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg)));
 199   return true;
 200 }
 201
 202 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
 203 //
 204 // SGPRx = ...
 205 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 206 // VGPRz = COPY SGPRy
 207 //
 208 // ==>
 209 //
 210 // VGPRx = COPY SGPRx
 211 // VGPRz = REG_SEQUENCE VGPRx, sub0
 212 //
 213 // This exposes immediate folding opportunities when materializing 64-bit
 214 // immediates.
 215 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
 216                                         const SIRegisterInfo *TRI,
 217                                         const SIInstrInfo *TII,
 218                                         MachineRegisterInfo &MRI) {
 219   assert(MI.isRegSequence());
 220
 221   unsigned DstReg = MI.getOperand(0).getReg();
 222   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
 223     return false;
 224
 225   if (!MRI.hasOneUse(DstReg))
 226     return false;
 227
 228   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
 229   if (!CopyUse.isCopy())
 230     return false;
 231
 232   // It is illegal to have vreg inputs to a physreg defining reg_sequence.
 233   if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
 234     return false;
 235
 236   const TargetRegisterClass *SrcRC, *DstRC;
 237   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
 238
 239   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
 240     return false;
 241
 242   if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII))
 243     return true;
 244
 245   // TODO: Could have multiple extracts?
 246   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
 247   if (SubReg != AMDGPU::NoSubRegister)
 248     return false;
 249
 250   MRI.setRegClass(DstReg, DstRC);
 251
 252   // SGPRx = ...
 253   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 254   // VGPRz = COPY SGPRy
 255
 256   // =>
 257   // VGPRx = COPY SGPRx
 258   // VGPRz = REG_SEQUENCE VGPRx, sub0
 259
 260   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
 261
 262   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
 263     unsigned SrcReg = MI.getOperand(I).getReg();
 264     unsigned SrcSubReg = MI.getOperand(I).getSubReg();
 265
 266     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
 267     assert(TRI->isSGPRClass(SrcRC) &&
 268            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
 269
 270     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
 271     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
 272
 273     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
 274
 275     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
 276             TmpReg)
 277         .add(MI.getOperand(I));
 278
 279     MI.getOperand(I).setReg(TmpReg);
 280   }
 281
 282   CopyUse.eraseFromParent();
 283   return true;
 284 }
 285
 286 static bool phiHasVGPROperands(const MachineInstr &PHI,
 287                                const MachineRegisterInfo &MRI,
 288                                const SIRegisterInfo *TRI,
 289                                const SIInstrInfo *TII) {
 290
 291   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
 292     unsigned Reg = PHI.getOperand(i).getReg();
 293     if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
 294       return true;
 295   }
 296   return false;
 297 }
 298 static bool phiHasBreakDef(const MachineInstr &PHI,
 299                            const MachineRegisterInfo &MRI,
 300                            SmallSet<unsigned, 8> &Visited) {
 301
 302   for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
 303     unsigned Reg = PHI.getOperand(i).getReg();
 304     if (Visited.count(Reg))
 305       continue;
 306
 307     Visited.insert(Reg);
 308
 309     MachineInstr *DefInstr = MRI.getVRegDef(Reg);
 310     switch (DefInstr->getOpcode()) {
 311     default:
 312       break;
 313     case AMDGPU::SI_BREAK:
 314     case AMDGPU::SI_IF_BREAK:
 315     case AMDGPU::SI_ELSE_BREAK:
 316       return true;
 317     case AMDGPU::PHI:
 318       if (phiHasBreakDef(*DefInstr, MRI, Visited))
 319         return true;
 320     }
 321   }
 322   return false;
 323 }
 324
 325 static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
 326                                           const TargetRegisterInfo &TRI) {
 327   for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
 328        E = MBB.end(); I != E; ++I) {
 329     if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
 330       return true;
 331   }
 332   return false;
 333 }
 334
 335 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
 336                                     const MachineInstr *MoveImm,
 337                                     const SIInstrInfo *TII,
 338                                     unsigned &SMovOp,
 339                                     int64_t &Imm) {
 340
 341   if (!MoveImm->isMoveImmediate())
 342     return false;
 343
 344   const MachineOperand *ImmOp =
 345       TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
 346   if (!ImmOp->isImm())
 347     return false;
 348
 349   // FIXME: Handle copies with sub-regs.
 350   if (Copy->getOperand(0).getSubReg())
 351     return false;
 352
 353   switch (MoveImm->getOpcode()) {
 354   default:
 355     return false;
 356   case AMDGPU::V_MOV_B32_e32:
 357     SMovOp = AMDGPU::S_MOV_B32;
 358     break;
 359   case AMDGPU::V_MOV_B64_PSEUDO:
 360     SMovOp = AMDGPU::S_MOV_B64;
 361     break;
 362   }
 363   Imm = ImmOp->getImm();
 364   return true;
 365 }
 366
 367 template <class UnaryPredicate>
 368 bool searchPredecessors(const MachineBasicBlock *MBB,
 369                         const MachineBasicBlock *CutOff,
 370                         UnaryPredicate Predicate) {
 371
 372   if (MBB == CutOff)
 373     return false;
 374
 375   DenseSet<const MachineBasicBlock*> Visited;
 376   SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
 377                                               MBB->pred_end());
 378
 379   while (!Worklist.empty()) {
 380     MachineBasicBlock *MBB = Worklist.pop_back_val();
 381
 382     if (!Visited.insert(MBB).second)
 383       continue;
 384     if (MBB == CutOff)
 385       continue;
 386     if (Predicate(MBB))
 387       return true;
 388
 389     Worklist.append(MBB->pred_begin(), MBB->pred_end());
 390   }
 391
 392   return false;
 393 }
 394
 395 static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
 396                                         const TargetRegisterInfo *TRI) {
 397   return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
 398            return hasTerminatorThatModifiesExec(*MBB, *TRI); });
 399 }
 400
 401 // Checks if there is potential path From instruction To instruction.
 402 // If CutOff is specified and it sits in between of that path we ignore
 403 // a higher portion of the path and report it is not reachable.
 404 static bool isReachable(const MachineInstr *From,
 405                         const MachineInstr *To,
 406                         const MachineBasicBlock *CutOff,
 407                         MachineDominatorTree &MDT) {
 408   // If either From block dominates To block or instructions are in the same
 409   // block and From is higher.
 410   if (MDT.dominates(From, To))
 411     return true;
 412
 413   const MachineBasicBlock *MBBFrom = From->getParent();
 414   const MachineBasicBlock *MBBTo = To->getParent();
 415   if (MBBFrom == MBBTo)
 416     return false;
 417
 418   // Instructions are in different blocks, do predecessor search.
 419   // We should almost never get here since we do not usually produce M0 stores
 420   // other than -1.
 421   return searchPredecessors(MBBTo, CutOff, [MBBFrom]
 422            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
 423 }
 424
 425 // Hoist and merge identical SGPR initializations into a common predecessor.
 426 // This is intended to combine M0 initializations, but can work with any
 427 // SGPR. A VGPR cannot be processed since we cannot guarantee vector
 428 // executioon.
 429 static bool hoistAndMergeSGPRInits(unsigned Reg,
 430                                    const MachineRegisterInfo &MRI,
 431                                    MachineDominatorTree &MDT) {
 432   // List of inits by immediate value.
 433   typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap;
 434   InitListMap Inits;
 435   // List of clobbering instructions.
 436   SmallVector<MachineInstr*, 8> Clobbers;
 437   bool Changed = false;
 438
 439   for (auto &MI : MRI.def_instructions(Reg)) {
 440     MachineOperand *Imm = nullptr;
 441     for (auto &MO: MI.operands()) {
 442       if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
 443           (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
 444         Imm = nullptr;
 445         break;
 446       } else if (MO.isImm())
 447         Imm = &MO;
 448     }
 449     if (Imm)
 450       Inits[Imm->getImm()].push_front(&MI);
 451     else
 452       Clobbers.push_back(&MI);
 453   }
 454
 455   for (auto &Init : Inits) {
 456     auto &Defs = Init.second;
 457
 458     for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
 459       MachineInstr *MI1 = *I1;
 460
 461       for (auto I2 = std::next(I1); I2 != E; ) {
 462         MachineInstr *MI2 = *I2;
 463
 464         // Check any possible interference
 465         auto intereferes = [&](MachineBasicBlock::iterator From,
 466                                MachineBasicBlock::iterator To) -> bool {
 467
 468           assert(MDT.dominates(&*To, &*From));
 469
 470           auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
 471             const MachineBasicBlock *MBBFrom = From->getParent();
 472             const MachineBasicBlock *MBBTo = To->getParent();
 473             bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
 474             bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
 475             if (!MayClobberFrom && !MayClobberTo)
 476               return false;
 477             if ((MayClobberFrom && !MayClobberTo) ||
 478                 (!MayClobberFrom && MayClobberTo))
 479               return true;
 480             // Both can clobber, this is not an interference only if both are
 481             // dominated by Clobber and belong to the same block or if Clobber
 482             // properly dominates To, given that To >> From, so it dominates
 483             // both and located in a common dominator.
 484             return !((MBBFrom == MBBTo &&
 485                       MDT.dominates(Clobber, &*From) &&
 486                       MDT.dominates(Clobber, &*To)) ||
 487                      MDT.properlyDominates(Clobber->getParent(), MBBTo));
 488           };
 489
 490           return (any_of(Clobbers, interferes)) ||
 491                  (any_of(Inits, [&](InitListMap::value_type &C) {
 492                     return C.first != Init.first && any_of(C.second, interferes);
 493                   }));
 494         };
 495
 496         if (MDT.dominates(MI1, MI2)) {
 497           if (!intereferes(MI2, MI1)) {
 498             DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber()
 499                          << " " << *MI2);
 500             MI2->eraseFromParent();
 501             Defs.erase(I2++);
 502             Changed = true;
 503             continue;
 504           }
 505         } else if (MDT.dominates(MI2, MI1)) {
 506           if (!intereferes(MI1, MI2)) {
 507             DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
 508                          << " " << *MI1);
 509             MI1->eraseFromParent();
 510             Defs.erase(I1++);
 511             Changed = true;
 512             break;
 513           }
 514         } else {
 515           auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
 516                                                      MI2->getParent());
 517           if (!MBB) {
 518             ++I2;
 519             continue;
 520           }
 521
 522           MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
 523           if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
 524             DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
 525                          << " " << *MI1 << "and moving from BB#"
 526                          << MI2->getParent()->getNumber() << " to BB#"
 527                          << I->getParent()->getNumber() << " " << *MI2);
 528             I->getParent()->splice(I, MI2->getParent(), MI2);
 529             MI1->eraseFromParent();
 530             Defs.erase(I1++);
 531             Changed = true;
 532             break;
 533           }
 534         }
 535         ++I2;
 536       }
 537       ++I1;
 538     }
 539   }
 540
 541   if (Changed)
 542     MRI.clearKillFlags(Reg);
 543
 544   return Changed;
 545 }
 546
 547 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 548   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 549   MachineRegisterInfo &MRI = MF.getRegInfo();
 550   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 551   const SIInstrInfo *TII = ST.getInstrInfo();
 552   MDT = &getAnalysis<MachineDominatorTree>();
 553
 554   SmallVector<MachineInstr *, 16> Worklist;
 555
 556   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 557                                                   BI != BE; ++BI) {
 558
 559     MachineBasicBlock &MBB = *BI;
 560     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
 561          I != E; ++I) {
 562       MachineInstr &MI = *I;
 563
 564       switch (MI.getOpcode()) {
 565       default:
 566         continue;
 567       case AMDGPU::COPY: {
 568         // If the destination register is a physical register there isn't really
 569         // much we can do to fix this.
 570         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
 571           continue;
 572
 573         const TargetRegisterClass *SrcRC, *DstRC;
 574         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
 575         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
 576           unsigned SrcReg = MI.getOperand(1).getReg();
 577           if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
 578             TII->moveToVALU(MI);
 579             break;
 580           }
 581
 582           MachineInstr *DefMI = MRI.getVRegDef(SrcReg);
 583           unsigned SMovOp;
 584           int64_t Imm;
 585           // If we are just copying an immediate, we can replace the copy with
 586           // s_mov_b32.
 587           if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
 588             MI.getOperand(1).ChangeToImmediate(Imm);
 589             MI.addImplicitDefUseOperands(MF);
 590             MI.setDesc(TII->get(SMovOp));
 591             break;
 592           }
 593           TII->moveToVALU(MI);
 594         } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) {
 595           tryChangeVGPRtoSGPRinCopy(MI, TRI, TII);
 596         }
 597
 598         break;
 599       }
 600       case AMDGPU::PHI: {
 601         unsigned Reg = MI.getOperand(0).getReg();
 602         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
 603           break;
 604
 605         // We don't need to fix the PHI if the common dominator of the
 606         // two incoming blocks terminates with a uniform branch.
 607         if (MI.getNumExplicitOperands() == 5) {
 608           MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
 609           MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
 610
 611           if (!predsHasDivergentTerminator(MBB0, TRI) &&
 612               !predsHasDivergentTerminator(MBB1, TRI)) {
 613             DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
 614             break;
 615           }
 616         }
 617
 618         // If a PHI node defines an SGPR and any of its operands are VGPRs,
 619         // then we need to move it to the VALU.
 620         //
 621         // Also, if a PHI node defines an SGPR and has all SGPR operands
 622         // we must move it to the VALU, because the SGPR operands will
 623         // all end up being assigned the same register, which means
 624         // there is a potential for a conflict if different threads take
 625         // different control flow paths.
 626         //
 627         // For Example:
 628         //
 629         // sgpr0 = def;
 630         // ...
 631         // sgpr1 = def;
 632         // ...
 633         // sgpr2 = PHI sgpr0, sgpr1
 634         // use sgpr2;
 635         //
 636         // Will Become:
 637         //
 638         // sgpr2 = def;
 639         // ...
 640         // sgpr2 = def;
 641         // ...
 642         // use sgpr2
 643         //
 644         // The one exception to this rule is when one of the operands
 645         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
 646         // instruction.  In this case, there we know the program will
 647         // never enter the second block (the loop) without entering
 648         // the first block (where the condition is computed), so there
 649         // is no chance for values to be over-written.
 650
 651         SmallSet<unsigned, 8> Visited;
 652         if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
 653             !phiHasBreakDef(MI, MRI, Visited)) {
 654           DEBUG(dbgs() << "Fixing PHI: " << MI);
 655           TII->moveToVALU(MI);
 656         }
 657         break;
 658       }
 659       case AMDGPU::REG_SEQUENCE: {
 660         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
 661             !hasVGPROperands(MI, TRI)) {
 662           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
 663           continue;
 664         }
 665
 666         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
 667
 668         TII->moveToVALU(MI);
 669         break;
 670       }
 671       case AMDGPU::INSERT_SUBREG: {
 672         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
 673         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
 674         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
 675         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
 676         if (TRI->isSGPRClass(DstRC) &&
 677             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
 678           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
 679           TII->moveToVALU(MI);
 680         }
 681         break;
 682       }
 683       }
 684     }
 685   }
 686
 687   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
 688     hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
 689
 690   return true;
 691 }