contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

   1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// SI implementation of the TargetRegisterInfo class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SIRegisterInfo.h"
  15 #include "AMDGPURegisterBankInfo.h"
  16 #include "AMDGPUSubtarget.h"
  17 #include "SIInstrInfo.h"
  18 #include "SIMachineFunctionInfo.h"
  19 #include "MCTargetDesc/AMDGPUInstPrinter.h"
  20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  21 #include "llvm/CodeGen/LiveIntervals.h"
  22 #include "llvm/CodeGen/MachineDominators.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineInstrBuilder.h"
  25 #include "llvm/CodeGen/RegisterScavenging.h"
  26 #include "llvm/CodeGen/SlotIndexes.h"
  27 #include "llvm/IR/Function.h"
  28 #include "llvm/IR/LLVMContext.h"
  29
  30 using namespace llvm;
  31
  32 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
  33   for (unsigned i = 0; PSets[i] != -1; ++i) {
  34     if (PSets[i] == (int)PSetID)
  35       return true;
  36   }
  37   return false;
  38 }
  39
  40 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
  41                                          BitVector &PressureSets) const {
  42   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
  43     const int *PSets = getRegUnitPressureSets(*U);
  44     if (hasPressureSet(PSets, PSetID)) {
  45       PressureSets.set(PSetID);
  46       break;
  47     }
  48   }
  49 }
  50
  51 static cl::opt<bool> EnableSpillSGPRToSMEM(
  52   "amdgpu-spill-sgpr-to-smem",
  53   cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
  54   cl::init(false));
  55
  56 static cl::opt<bool> EnableSpillSGPRToVGPR(
  57   "amdgpu-spill-sgpr-to-vgpr",
  58   cl::desc("Enable spilling VGPRs to SGPRs"),
  59   cl::ReallyHidden,
  60   cl::init(true));
  61
  62 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
  63   AMDGPURegisterInfo(),
  64   SGPRPressureSets(getNumRegPressureSets()),
  65   VGPRPressureSets(getNumRegPressureSets()),
  66   AGPRPressureSets(getNumRegPressureSets()),
  67   SpillSGPRToVGPR(false),
  68   SpillSGPRToSMEM(false),
  69   isWave32(ST.isWave32()) {
  70   if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
  71     SpillSGPRToSMEM = true;
  72   else if (EnableSpillSGPRToVGPR)
  73     SpillSGPRToVGPR = true;
  74
  75   unsigned NumRegPressureSets = getNumRegPressureSets();
  76
  77   SGPRSetID = NumRegPressureSets;
  78   VGPRSetID = NumRegPressureSets;
  79   AGPRSetID = NumRegPressureSets;
  80
  81   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
  82     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
  83     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
  84     classifyPressureSet(i, AMDGPU::AGPR0, AGPRPressureSets);
  85   }
  86
  87   // Determine the number of reg units for each pressure set.
  88   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
  89   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
  90     const int *PSets = getRegUnitPressureSets(i);
  91     for (unsigned j = 0; PSets[j] != -1; ++j) {
  92       ++PressureSetRegUnits[PSets[j]];
  93     }
  94   }
  95
  96   unsigned VGPRMax = 0, SGPRMax = 0, AGPRMax = 0;
  97   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
  98     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
  99       VGPRSetID = i;
 100       VGPRMax = PressureSetRegUnits[i];
 101       continue;
 102     }
 103     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
 104       SGPRSetID = i;
 105       SGPRMax = PressureSetRegUnits[i];
 106     }
 107     if (isAGPRPressureSet(i) && PressureSetRegUnits[i] > AGPRMax) {
 108       AGPRSetID = i;
 109       AGPRMax = PressureSetRegUnits[i];
 110       continue;
 111     }
 112   }
 113
 114   assert(SGPRSetID < NumRegPressureSets &&
 115          VGPRSetID < NumRegPressureSets &&
 116          AGPRSetID < NumRegPressureSets);
 117 }
 118
 119 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
 120   const MachineFunction &MF) const {
 121
 122   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 123   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
 124   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
 125   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
 126 }
 127
 128 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
 129   unsigned Reg;
 130
 131   // Try to place it in a hole after PrivateSegmentBufferReg.
 132   if (RegCount & 3) {
 133     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
 134     // alignment constraints, so we have a hole where can put the wave offset.
 135     Reg = RegCount - 1;
 136   } else {
 137     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
 138     // wave offset before it.
 139     Reg = RegCount - 5;
 140   }
 141
 142   return Reg;
 143 }
 144
 145 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
 146   const MachineFunction &MF) const {
 147   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 148   unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
 149   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
 150 }
 151
 152 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 153   BitVector Reserved(getNumRegs());
 154
 155   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
 156   // this seems likely to result in bugs, so I'm marking them as reserved.
 157   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
 158   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
 159
 160   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
 161   reserveRegisterTuples(Reserved, AMDGPU::M0);
 162
 163   // Reserve src_vccz, src_execz, src_scc.
 164   reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
 165   reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
 166   reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
 167
 168   // Reserve the memory aperture registers.
 169   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
 170   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
 171   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
 172   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
 173
 174   // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
 175   reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
 176
 177   // Reserve xnack_mask registers - support is not implemented in Codegen.
 178   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
 179
 180   // Reserve lds_direct register - support is not implemented in Codegen.
 181   reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
 182
 183   // Reserve Trap Handler registers - support is not implemented in Codegen.
 184   reserveRegisterTuples(Reserved, AMDGPU::TBA);
 185   reserveRegisterTuples(Reserved, AMDGPU::TMA);
 186   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
 187   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
 188   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
 189   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
 190   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
 191   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
 192   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
 193   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
 194
 195   // Reserve null register - it shall never be allocated
 196   reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
 197
 198   // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
 199   // will result in bugs.
 200   if (isWave32) {
 201     Reserved.set(AMDGPU::VCC);
 202     Reserved.set(AMDGPU::VCC_HI);
 203   }
 204
 205   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 206
 207   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
 208   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
 209   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
 210     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
 211     reserveRegisterTuples(Reserved, Reg);
 212   }
 213
 214   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
 215   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
 216   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
 217     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
 218     reserveRegisterTuples(Reserved, Reg);
 219     Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
 220     reserveRegisterTuples(Reserved, Reg);
 221   }
 222
 223   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 224
 225   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
 226   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
 227     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
 228     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
 229   }
 230
 231   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
 232   if (ScratchRSrcReg != AMDGPU::NoRegister) {
 233     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
 234     // to spill.
 235     // TODO: May need to reserve a VGPR if doing LDS spilling.
 236     reserveRegisterTuples(Reserved, ScratchRSrcReg);
 237     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
 238   }
 239
 240   // We have to assume the SP is needed in case there are calls in the function,
 241   // which is detected after the function is lowered. If we aren't really going
 242   // to need SP, don't bother reserving it.
 243   unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
 244
 245   if (StackPtrReg != AMDGPU::NoRegister) {
 246     reserveRegisterTuples(Reserved, StackPtrReg);
 247     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
 248   }
 249
 250   unsigned FrameReg = MFI->getFrameOffsetReg();
 251   if (FrameReg != AMDGPU::NoRegister) {
 252     reserveRegisterTuples(Reserved, FrameReg);
 253     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
 254   }
 255
 256   for (unsigned Reg : MFI->WWMReservedRegs) {
 257     reserveRegisterTuples(Reserved, Reg);
 258   }
 259
 260   // FIXME: Stop using reserved registers for this.
 261   for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
 262     reserveRegisterTuples(Reserved, Reg);
 263
 264   for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
 265     reserveRegisterTuples(Reserved, Reg);
 266
 267   return Reserved;
 268 }
 269
 270 bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 271   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
 272   // On entry, the base address is 0, so it can't possibly need any more
 273   // alignment.
 274
 275   // FIXME: Should be able to specify the entry frame alignment per calling
 276   // convention instead.
 277   if (Info->isEntryFunction())
 278     return false;
 279
 280   return TargetRegisterInfo::canRealignStack(MF);
 281 }
 282
 283 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
 284   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
 285   if (Info->isEntryFunction()) {
 286     const MachineFrameInfo &MFI = Fn.getFrameInfo();
 287     return MFI.hasStackObjects() || MFI.hasCalls();
 288   }
 289
 290   // May need scavenger for dealing with callee saved registers.
 291   return true;
 292 }
 293
 294 bool SIRegisterInfo::requiresFrameIndexScavenging(
 295   const MachineFunction &MF) const {
 296   const MachineFrameInfo &MFI = MF.getFrameInfo();
 297   if (MFI.hasStackObjects())
 298     return true;
 299
 300   // May need to deal with callee saved registers.
 301   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
 302   return !Info->isEntryFunction();
 303 }
 304
 305 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
 306   const MachineFunction &MF) const {
 307   const MachineFrameInfo &MFI = MF.getFrameInfo();
 308   if (!MFI.hasStackObjects())
 309     return false;
 310
 311   // The scavenger is used for large frames which may require finding a free
 312   // register for large offsets.
 313   if (!isUInt<12>(MFI.getStackSize()))
 314     return true;
 315
 316   // If using scalar stores, for spills, m0 is needed for the scalar store
 317   // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
 318   // register for it during frame index elimination, so the scavenger is
 319   // directly needed.
 320   return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
 321          MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
 322 }
 323
 324 bool SIRegisterInfo::requiresVirtualBaseRegisters(
 325   const MachineFunction &) const {
 326   // There are no special dedicated stack or frame pointers.
 327   return true;
 328 }
 329
 330 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
 331   // This helps catch bugs as verifier errors.
 332   return true;
 333 }
 334
 335 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
 336   assert(SIInstrInfo::isMUBUF(*MI));
 337
 338   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
 339                                           AMDGPU::OpName::offset);
 340   return MI->getOperand(OffIdx).getImm();
 341 }
 342
 343 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
 344                                                  int Idx) const {
 345   if (!SIInstrInfo::isMUBUF(*MI))
 346     return 0;
 347
 348   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
 349                                            AMDGPU::OpName::vaddr) &&
 350          "Should never see frame index on non-address operand");
 351
 352   return getMUBUFInstrOffset(MI);
 353 }
 354
 355 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
 356   if (!MI->mayLoadOrStore())
 357     return false;
 358
 359   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
 360
 361   return !isUInt<12>(FullOffset);
 362 }
 363
 364 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
 365                                                   unsigned BaseReg,
 366                                                   int FrameIdx,
 367                                                   int64_t Offset) const {
 368   MachineBasicBlock::iterator Ins = MBB->begin();
 369   DebugLoc DL; // Defaults to "unknown"
 370
 371   if (Ins != MBB->end())
 372     DL = Ins->getDebugLoc();
 373
 374   MachineFunction *MF = MBB->getParent();
 375   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
 376   const SIInstrInfo *TII = Subtarget.getInstrInfo();
 377
 378   if (Offset == 0) {
 379     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
 380       .addFrameIndex(FrameIdx);
 381     return;
 382   }
 383
 384   MachineRegisterInfo &MRI = MF->getRegInfo();
 385   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 386
 387   unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 388
 389   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
 390     .addImm(Offset);
 391   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
 392     .addFrameIndex(FrameIdx);
 393
 394   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
 395     .addReg(OffsetReg, RegState::Kill)
 396     .addReg(FIReg)
 397     .addImm(0); // clamp bit
 398 }
 399
 400 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 401                                        int64_t Offset) const {
 402
 403   MachineBasicBlock *MBB = MI.getParent();
 404   MachineFunction *MF = MBB->getParent();
 405   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
 406   const SIInstrInfo *TII = Subtarget.getInstrInfo();
 407
 408 #ifndef NDEBUG
 409   // FIXME: Is it possible to be storing a frame index to itself?
 410   bool SeenFI = false;
 411   for (const MachineOperand &MO: MI.operands()) {
 412     if (MO.isFI()) {
 413       if (SeenFI)
 414         llvm_unreachable("should not see multiple frame indices");
 415
 416       SeenFI = true;
 417     }
 418   }
 419 #endif
 420
 421   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
 422   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
 423   assert(TII->isMUBUF(MI));
 424   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
 425          MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() &&
 426          "should only be seeing frame offset relative FrameIndex");
 427
 428
 429   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
 430   int64_t NewOffset = OffsetOp->getImm() + Offset;
 431   assert(isUInt<12>(NewOffset) && "offset should be legal");
 432
 433   FIOp->ChangeToRegister(BaseReg, false);
 434   OffsetOp->setImm(NewOffset);
 435 }
 436
 437 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
 438                                         unsigned BaseReg,
 439                                         int64_t Offset) const {
 440   if (!SIInstrInfo::isMUBUF(*MI))
 441     return false;
 442
 443   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
 444
 445   return isUInt<12>(NewOffset);
 446 }
 447
 448 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
 449   const MachineFunction &MF, unsigned Kind) const {
 450   // This is inaccurate. It depends on the instruction and address space. The
 451   // only place where we should hit this is for dealing with frame indexes /
 452   // private accesses, so this is correct in that case.
 453   return &AMDGPU::VGPR_32RegClass;
 454 }
 455
 456 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
 457
 458   switch (Op) {
 459   case AMDGPU::SI_SPILL_S1024_SAVE:
 460   case AMDGPU::SI_SPILL_S1024_RESTORE:
 461   case AMDGPU::SI_SPILL_V1024_SAVE:
 462   case AMDGPU::SI_SPILL_V1024_RESTORE:
 463   case AMDGPU::SI_SPILL_A1024_SAVE:
 464   case AMDGPU::SI_SPILL_A1024_RESTORE:
 465     return 32;
 466   case AMDGPU::SI_SPILL_S512_SAVE:
 467   case AMDGPU::SI_SPILL_S512_RESTORE:
 468   case AMDGPU::SI_SPILL_V512_SAVE:
 469   case AMDGPU::SI_SPILL_V512_RESTORE:
 470   case AMDGPU::SI_SPILL_A512_SAVE:
 471   case AMDGPU::SI_SPILL_A512_RESTORE:
 472     return 16;
 473   case AMDGPU::SI_SPILL_S256_SAVE:
 474   case AMDGPU::SI_SPILL_S256_RESTORE:
 475   case AMDGPU::SI_SPILL_V256_SAVE:
 476   case AMDGPU::SI_SPILL_V256_RESTORE:
 477     return 8;
 478   case AMDGPU::SI_SPILL_S160_SAVE:
 479   case AMDGPU::SI_SPILL_S160_RESTORE:
 480   case AMDGPU::SI_SPILL_V160_SAVE:
 481   case AMDGPU::SI_SPILL_V160_RESTORE:
 482     return 5;
 483   case AMDGPU::SI_SPILL_S128_SAVE:
 484   case AMDGPU::SI_SPILL_S128_RESTORE:
 485   case AMDGPU::SI_SPILL_V128_SAVE:
 486   case AMDGPU::SI_SPILL_V128_RESTORE:
 487   case AMDGPU::SI_SPILL_A128_SAVE:
 488   case AMDGPU::SI_SPILL_A128_RESTORE:
 489     return 4;
 490   case AMDGPU::SI_SPILL_S96_SAVE:
 491   case AMDGPU::SI_SPILL_S96_RESTORE:
 492   case AMDGPU::SI_SPILL_V96_SAVE:
 493   case AMDGPU::SI_SPILL_V96_RESTORE:
 494     return 3;
 495   case AMDGPU::SI_SPILL_S64_SAVE:
 496   case AMDGPU::SI_SPILL_S64_RESTORE:
 497   case AMDGPU::SI_SPILL_V64_SAVE:
 498   case AMDGPU::SI_SPILL_V64_RESTORE:
 499   case AMDGPU::SI_SPILL_A64_SAVE:
 500   case AMDGPU::SI_SPILL_A64_RESTORE:
 501     return 2;
 502   case AMDGPU::SI_SPILL_S32_SAVE:
 503   case AMDGPU::SI_SPILL_S32_RESTORE:
 504   case AMDGPU::SI_SPILL_V32_SAVE:
 505   case AMDGPU::SI_SPILL_V32_RESTORE:
 506   case AMDGPU::SI_SPILL_A32_SAVE:
 507   case AMDGPU::SI_SPILL_A32_RESTORE:
 508     return 1;
 509   default: llvm_unreachable("Invalid spill opcode");
 510   }
 511 }
 512
 513 static int getOffsetMUBUFStore(unsigned Opc) {
 514   switch (Opc) {
 515   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
 516     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
 517   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
 518     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
 519   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
 520     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
 521   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
 522     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
 523   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
 524     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
 525   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
 526     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
 527   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
 528     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
 529   default:
 530     return -1;
 531   }
 532 }
 533
 534 static int getOffsetMUBUFLoad(unsigned Opc) {
 535   switch (Opc) {
 536   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
 537     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
 538   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
 539     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
 540   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
 541     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
 542   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
 543     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
 544   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
 545     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
 546   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
 547     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
 548   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
 549     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
 550   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
 551     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
 552   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
 553     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
 554   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
 555     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
 556   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
 557     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
 558   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
 559     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
 560   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
 561     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
 562   default:
 563     return -1;
 564   }
 565 }
 566
 567 static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
 568                                            int Index,
 569                                            unsigned Lane,
 570                                            unsigned ValueReg,
 571                                            bool IsKill) {
 572   MachineBasicBlock *MBB = MI->getParent();
 573   MachineFunction *MF = MI->getParent()->getParent();
 574   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 575   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
 576   const SIInstrInfo *TII = ST.getInstrInfo();
 577
 578   MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
 579
 580   if (Reg == AMDGPU::NoRegister)
 581     return MachineInstrBuilder();
 582
 583   bool IsStore = MI->mayStore();
 584   MachineRegisterInfo &MRI = MF->getRegInfo();
 585   auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
 586
 587   unsigned Dst = IsStore ? Reg : ValueReg;
 588   unsigned Src = IsStore ? ValueReg : Reg;
 589   unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
 590                                                    : AMDGPU::V_ACCVGPR_READ_B32;
 591
 592   return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
 593            .addReg(Src, getKillRegState(IsKill));
 594 }
 595
 596 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
 597 // need to handle the case where an SGPR may need to be spilled while spilling.
 598 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
 599                                       MachineFrameInfo &MFI,
 600                                       MachineBasicBlock::iterator MI,
 601                                       int Index,
 602                                       int64_t Offset) {
 603   MachineBasicBlock *MBB = MI->getParent();
 604   const DebugLoc &DL = MI->getDebugLoc();
 605   bool IsStore = MI->mayStore();
 606
 607   unsigned Opc = MI->getOpcode();
 608   int LoadStoreOp = IsStore ?
 609     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
 610   if (LoadStoreOp == -1)
 611     return false;
 612
 613   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
 614   if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
 615     return true;
 616
 617   MachineInstrBuilder NewMI =
 618       BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
 619           .add(*Reg)
 620           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
 621           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
 622           .addImm(Offset)
 623           .addImm(0) // glc
 624           .addImm(0) // slc
 625           .addImm(0) // tfe
 626           .addImm(0) // dlc
 627           .cloneMemRefs(*MI);
 628
 629   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
 630                                                        AMDGPU::OpName::vdata_in);
 631   if (VDataIn)
 632     NewMI.add(*VDataIn);
 633   return true;
 634 }
 635
 636 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
 637                                          unsigned LoadStoreOp,
 638                                          int Index,
 639                                          unsigned ValueReg,
 640                                          bool IsKill,
 641                                          unsigned ScratchRsrcReg,
 642                                          unsigned ScratchOffsetReg,
 643                                          int64_t InstOffset,
 644                                          MachineMemOperand *MMO,
 645                                          RegScavenger *RS) const {
 646   MachineBasicBlock *MBB = MI->getParent();
 647   MachineFunction *MF = MI->getParent()->getParent();
 648   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
 649   const SIInstrInfo *TII = ST.getInstrInfo();
 650   const MachineFrameInfo &MFI = MF->getFrameInfo();
 651
 652   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
 653   const DebugLoc &DL = MI->getDebugLoc();
 654   bool IsStore = Desc.mayStore();
 655
 656   bool Scavenged = false;
 657   unsigned SOffset = ScratchOffsetReg;
 658
 659   const unsigned EltSize = 4;
 660   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
 661   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
 662   unsigned Size = NumSubRegs * EltSize;
 663   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
 664   int64_t ScratchOffsetRegDelta = 0;
 665
 666   unsigned Align = MFI.getObjectAlignment(Index);
 667   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
 668
 669   Register TmpReg =
 670     hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
 671                  : Register();
 672
 673   assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
 674
 675   if (!isUInt<12>(Offset + Size - EltSize)) {
 676     SOffset = AMDGPU::NoRegister;
 677
 678     // We currently only support spilling VGPRs to EltSize boundaries, meaning
 679     // we can simplify the adjustment of Offset here to just scale with
 680     // WavefrontSize.
 681     Offset *= ST.getWavefrontSize();
 682
 683     // We don't have access to the register scavenger if this function is called
 684     // during  PEI::scavengeFrameVirtualRegs().
 685     if (RS)
 686       SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
 687
 688     if (SOffset == AMDGPU::NoRegister) {
 689       // There are no free SGPRs, and since we are in the process of spilling
 690       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
 691       // on SI/CI and on VI it is true until we implement spilling using scalar
 692       // stores), we have no way to free up an SGPR.  Our solution here is to
 693       // add the offset directly to the ScratchOffset register, and then
 694       // subtract the offset after the spill to return ScratchOffset to it's
 695       // original value.
 696       SOffset = ScratchOffsetReg;
 697       ScratchOffsetRegDelta = Offset;
 698     } else {
 699       Scavenged = true;
 700     }
 701
 702     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
 703       .addReg(ScratchOffsetReg)
 704       .addImm(Offset);
 705
 706     Offset = 0;
 707   }
 708
 709   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
 710     unsigned SubReg = NumSubRegs == 1 ?
 711       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
 712
 713     unsigned SOffsetRegState = 0;
 714     unsigned SrcDstRegState = getDefRegState(!IsStore);
 715     if (i + 1 == e) {
 716       SOffsetRegState |= getKillRegState(Scavenged);
 717       // The last implicit use carries the "Kill" flag.
 718       SrcDstRegState |= getKillRegState(IsKill);
 719     }
 720
 721     auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
 722
 723     if (!MIB.getInstr()) {
 724       unsigned FinalReg = SubReg;
 725       if (TmpReg != AMDGPU::NoRegister) {
 726         if (IsStore)
 727           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
 728             .addReg(SubReg, getKillRegState(IsKill));
 729         SubReg = TmpReg;
 730       }
 731
 732       MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
 733       MachineMemOperand *NewMMO
 734         = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
 735                                    EltSize, MinAlign(Align, EltSize * i));
 736
 737       MIB = BuildMI(*MBB, MI, DL, Desc)
 738         .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
 739         .addReg(ScratchRsrcReg)
 740         .addReg(SOffset, SOffsetRegState)
 741         .addImm(Offset)
 742         .addImm(0) // glc
 743         .addImm(0) // slc
 744         .addImm(0) // tfe
 745         .addImm(0) // dlc
 746         .addMemOperand(NewMMO);
 747
 748       if (!IsStore && TmpReg != AMDGPU::NoRegister)
 749         MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
 750                       FinalReg)
 751           .addReg(TmpReg, RegState::Kill);
 752     }
 753
 754     if (NumSubRegs > 1)
 755       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
 756   }
 757
 758   if (ScratchOffsetRegDelta != 0) {
 759     // Subtract the offset we added to the ScratchOffset register.
 760     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
 761         .addReg(ScratchOffsetReg)
 762         .addImm(ScratchOffsetRegDelta);
 763   }
 764 }
 765
 766 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
 767                                                      bool Store) {
 768   if (SuperRegSize % 16 == 0) {
 769     return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
 770                          AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
 771   }
 772
 773   if (SuperRegSize % 8 == 0) {
 774     return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
 775                         AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
 776   }
 777
 778   return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
 779                       AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
 780 }
 781
 782 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
 783                                int Index,
 784                                RegScavenger *RS,
 785                                bool OnlyToVGPR) const {
 786   MachineBasicBlock *MBB = MI->getParent();
 787   MachineFunction *MF = MBB->getParent();
 788   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 789   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
 790
 791   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
 792     = MFI->getSGPRToVGPRSpills(Index);
 793   bool SpillToVGPR = !VGPRSpills.empty();
 794   if (OnlyToVGPR && !SpillToVGPR)
 795     return false;
 796
 797   MachineRegisterInfo &MRI = MF->getRegInfo();
 798   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
 799   const SIInstrInfo *TII = ST.getInstrInfo();
 800
 801   unsigned SuperReg = MI->getOperand(0).getReg();
 802   bool IsKill = MI->getOperand(0).isKill();
 803   const DebugLoc &DL = MI->getDebugLoc();
 804
 805   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
 806
 807   bool SpillToSMEM = spillSGPRToSMEM();
 808   if (SpillToSMEM && OnlyToVGPR)
 809     return false;
 810
 811   Register FrameReg = getFrameRegister(*MF);
 812
 813   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
 814                          SuperReg != MFI->getFrameOffsetReg() &&
 815                          SuperReg != MFI->getScratchWaveOffsetReg()));
 816
 817   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
 818
 819   unsigned OffsetReg = AMDGPU::M0;
 820   unsigned M0CopyReg = AMDGPU::NoRegister;
 821
 822   if (SpillToSMEM) {
 823     if (RS->isRegUsed(AMDGPU::M0)) {
 824       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 825       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
 826         .addReg(AMDGPU::M0);
 827     }
 828   }
 829
 830   unsigned ScalarStoreOp;
 831   unsigned EltSize = 4;
 832   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
 833   if (SpillToSMEM && isSGPRClass(RC)) {
 834     // XXX - if private_element_size is larger than 4 it might be useful to be
 835     // able to spill wider vmem spills.
 836     std::tie(EltSize, ScalarStoreOp) =
 837           getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
 838   }
 839
 840   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
 841   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
 842
 843   // SubReg carries the "Kill" flag when SubReg == SuperReg.
 844   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
 845   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
 846     unsigned SubReg = NumSubRegs == 1 ?
 847       SuperReg : getSubReg(SuperReg, SplitParts[i]);
 848
 849     if (SpillToSMEM) {
 850       int64_t FrOffset = FrameInfo.getObjectOffset(Index);
 851
 852       // The allocated memory size is really the wavefront size * the frame
 853       // index size. The widest register class is 64 bytes, so a 4-byte scratch
 854       // allocation is enough to spill this in a single stack object.
 855       //
 856       // FIXME: Frame size/offsets are computed earlier than this, so the extra
 857       // space is still unnecessarily allocated.
 858
 859       unsigned Align = FrameInfo.getObjectAlignment(Index);
 860       MachinePointerInfo PtrInfo
 861         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
 862       MachineMemOperand *MMO
 863         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
 864                                    EltSize, MinAlign(Align, EltSize * i));
 865
 866       // SMEM instructions only support a single offset, so increment the wave
 867       // offset.
 868
 869       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
 870       if (Offset != 0) {
 871         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
 872           .addReg(FrameReg)
 873           .addImm(Offset);
 874       } else {
 875         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
 876           .addReg(FrameReg);
 877       }
 878
 879       BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
 880         .addReg(SubReg, getKillRegState(IsKill)) // sdata
 881         .addReg(MFI->getScratchRSrcReg())        // sbase
 882         .addReg(OffsetReg, RegState::Kill)       // soff
 883         .addImm(0)                               // glc
 884         .addImm(0)                               // dlc
 885         .addMemOperand(MMO);
 886
 887       continue;
 888     }
 889
 890     if (SpillToVGPR) {
 891       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
 892
 893       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
 894       // only circumstance in which we say it is undefined is when it is the
 895       // first spill to this VGPR in the first basic block.
 896       bool VGPRDefined = true;
 897       if (MBB == &MF->front())
 898         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
 899
 900       // Mark the "old value of vgpr" input undef only if this is the first sgpr
 901       // spill to this specific vgpr in the first basic block.
 902       BuildMI(*MBB, MI, DL,
 903               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
 904               Spill.VGPR)
 905         .addReg(SubReg, getKillRegState(IsKill))
 906         .addImm(Spill.Lane)
 907         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
 908
 909       // FIXME: Since this spills to another register instead of an actual
 910       // frame index, we should delete the frame index when all references to
 911       // it are fixed.
 912     } else {
 913       // XXX - Can to VGPR spill fail for some subregisters but not others?
 914       if (OnlyToVGPR)
 915         return false;
 916
 917       // Spill SGPR to a frame index.
 918       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
 919       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 920       // TODO: Should VI try to spill to VGPR and then spill to SMEM?
 921
 922       MachineInstrBuilder Mov
 923         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
 924         .addReg(SubReg, SubKillState);
 925
 926
 927       // There could be undef components of a spilled super register.
 928       // TODO: Can we detect this and skip the spill?
 929       if (NumSubRegs > 1) {
 930         // The last implicit use of the SuperReg carries the "Kill" flag.
 931         unsigned SuperKillState = 0;
 932         if (i + 1 == e)
 933           SuperKillState |= getKillRegState(IsKill);
 934         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
 935       }
 936
 937       unsigned Align = FrameInfo.getObjectAlignment(Index);
 938       MachinePointerInfo PtrInfo
 939         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
 940       MachineMemOperand *MMO
 941         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
 942                                    EltSize, MinAlign(Align, EltSize * i));
 943       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
 944         .addReg(TmpReg, RegState::Kill)       // src
 945         .addFrameIndex(Index)                 // vaddr
 946         .addReg(MFI->getScratchRSrcReg())     // srrsrc
 947         .addReg(MFI->getStackPtrOffsetReg())  // soffset
 948         .addImm(i * 4)                        // offset
 949         .addMemOperand(MMO);
 950     }
 951   }
 952
 953   if (M0CopyReg != AMDGPU::NoRegister) {
 954     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
 955       .addReg(M0CopyReg, RegState::Kill);
 956   }
 957
 958   MI->eraseFromParent();
 959   MFI->addToSpilledSGPRs(NumSubRegs);
 960   return true;
 961 }
 962
 963 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
 964                                  int Index,
 965                                  RegScavenger *RS,
 966                                  bool OnlyToVGPR) const {
 967   MachineFunction *MF = MI->getParent()->getParent();
 968   MachineRegisterInfo &MRI = MF->getRegInfo();
 969   MachineBasicBlock *MBB = MI->getParent();
 970   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
 971
 972   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
 973     = MFI->getSGPRToVGPRSpills(Index);
 974   bool SpillToVGPR = !VGPRSpills.empty();
 975   if (OnlyToVGPR && !SpillToVGPR)
 976     return false;
 977
 978   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
 979   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
 980   const SIInstrInfo *TII = ST.getInstrInfo();
 981   const DebugLoc &DL = MI->getDebugLoc();
 982
 983   unsigned SuperReg = MI->getOperand(0).getReg();
 984   bool SpillToSMEM = spillSGPRToSMEM();
 985   if (SpillToSMEM && OnlyToVGPR)
 986     return false;
 987
 988   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
 989
 990   unsigned OffsetReg = AMDGPU::M0;
 991   unsigned M0CopyReg = AMDGPU::NoRegister;
 992
 993   if (SpillToSMEM) {
 994     if (RS->isRegUsed(AMDGPU::M0)) {
 995       M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 996       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
 997         .addReg(AMDGPU::M0);
 998     }
 999   }
1000
1001   unsigned EltSize = 4;
1002   unsigned ScalarLoadOp;
1003
1004   Register FrameReg = getFrameRegister(*MF);
1005
1006   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
1007   if (SpillToSMEM && isSGPRClass(RC)) {
1008     // XXX - if private_element_size is larger than 4 it might be useful to be
1009     // able to spill wider vmem spills.
1010     std::tie(EltSize, ScalarLoadOp) =
1011           getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
1012   }
1013
1014   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
1015   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
1016
1017   // SubReg carries the "Kill" flag when SubReg == SuperReg.
1018   int64_t FrOffset = FrameInfo.getObjectOffset(Index);
1019
1020   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
1021     unsigned SubReg = NumSubRegs == 1 ?
1022       SuperReg : getSubReg(SuperReg, SplitParts[i]);
1023
1024     if (SpillToSMEM) {
1025       // FIXME: Size may be > 4 but extra bytes wasted.
1026       unsigned Align = FrameInfo.getObjectAlignment(Index);
1027       MachinePointerInfo PtrInfo
1028         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1029       MachineMemOperand *MMO
1030         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
1031                                    EltSize, MinAlign(Align, EltSize * i));
1032
1033       // Add i * 4 offset
1034       int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
1035       if (Offset != 0) {
1036         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
1037           .addReg(FrameReg)
1038           .addImm(Offset);
1039       } else {
1040         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
1041           .addReg(FrameReg);
1042       }
1043
1044       auto MIB =
1045         BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
1046         .addReg(MFI->getScratchRSrcReg())  // sbase
1047         .addReg(OffsetReg, RegState::Kill) // soff
1048         .addImm(0)                         // glc
1049         .addImm(0)                         // dlc
1050         .addMemOperand(MMO);
1051
1052       if (NumSubRegs > 1 && i == 0)
1053         MIB.addReg(SuperReg, RegState::ImplicitDefine);
1054
1055       continue;
1056     }
1057
1058     if (SpillToVGPR) {
1059       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1060       auto MIB =
1061         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
1062                 SubReg)
1063         .addReg(Spill.VGPR)
1064         .addImm(Spill.Lane);
1065
1066       if (NumSubRegs > 1 && i == 0)
1067         MIB.addReg(SuperReg, RegState::ImplicitDefine);
1068     } else {
1069       if (OnlyToVGPR)
1070         return false;
1071
1072       // Restore SGPR from a stack slot.
1073       // FIXME: We should use S_LOAD_DWORD here for VI.
1074       unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1075       unsigned Align = FrameInfo.getObjectAlignment(Index);
1076
1077       MachinePointerInfo PtrInfo
1078         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1079
1080       MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
1081         MachineMemOperand::MOLoad, EltSize,
1082         MinAlign(Align, EltSize * i));
1083
1084       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
1085         .addFrameIndex(Index)                 // vaddr
1086         .addReg(MFI->getScratchRSrcReg())     // srsrc
1087         .addReg(MFI->getStackPtrOffsetReg())  // soffset
1088         .addImm(i * 4)                        // offset
1089         .addMemOperand(MMO);
1090
1091       auto MIB =
1092         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
1093         .addReg(TmpReg, RegState::Kill);
1094
1095       if (NumSubRegs > 1)
1096         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
1097     }
1098   }
1099
1100   if (M0CopyReg != AMDGPU::NoRegister) {
1101     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
1102       .addReg(M0CopyReg, RegState::Kill);
1103   }
1104
1105   MI->eraseFromParent();
1106   return true;
1107 }
1108
1109 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1110 /// a VGPR and the stack slot can be safely eliminated when all other users are
1111 /// handled.
1112 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
1113   MachineBasicBlock::iterator MI,
1114   int FI,
1115   RegScavenger *RS) const {
1116   switch (MI->getOpcode()) {
1117   case AMDGPU::SI_SPILL_S1024_SAVE:
1118   case AMDGPU::SI_SPILL_S512_SAVE:
1119   case AMDGPU::SI_SPILL_S256_SAVE:
1120   case AMDGPU::SI_SPILL_S160_SAVE:
1121   case AMDGPU::SI_SPILL_S128_SAVE:
1122   case AMDGPU::SI_SPILL_S96_SAVE:
1123   case AMDGPU::SI_SPILL_S64_SAVE:
1124   case AMDGPU::SI_SPILL_S32_SAVE:
1125     return spillSGPR(MI, FI, RS, true);
1126   case AMDGPU::SI_SPILL_S1024_RESTORE:
1127   case AMDGPU::SI_SPILL_S512_RESTORE:
1128   case AMDGPU::SI_SPILL_S256_RESTORE:
1129   case AMDGPU::SI_SPILL_S160_RESTORE:
1130   case AMDGPU::SI_SPILL_S128_RESTORE:
1131   case AMDGPU::SI_SPILL_S96_RESTORE:
1132   case AMDGPU::SI_SPILL_S64_RESTORE:
1133   case AMDGPU::SI_SPILL_S32_RESTORE:
1134     return restoreSGPR(MI, FI, RS, true);
1135   default:
1136     llvm_unreachable("not an SGPR spill instruction");
1137   }
1138 }
1139
1140 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
1141                                         int SPAdj, unsigned FIOperandNum,
1142                                         RegScavenger *RS) const {
1143   MachineFunction *MF = MI->getParent()->getParent();
1144   MachineRegisterInfo &MRI = MF->getRegInfo();
1145   MachineBasicBlock *MBB = MI->getParent();
1146   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1147   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1148   const GCNSubtarget &ST =  MF->getSubtarget<GCNSubtarget>();
1149   const SIInstrInfo *TII = ST.getInstrInfo();
1150   DebugLoc DL = MI->getDebugLoc();
1151
1152   assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
1153
1154   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1155   int Index = MI->getOperand(FIOperandNum).getIndex();
1156
1157   Register FrameReg = getFrameRegister(*MF);
1158
1159   switch (MI->getOpcode()) {
1160     // SGPR register spill
1161     case AMDGPU::SI_SPILL_S1024_SAVE:
1162     case AMDGPU::SI_SPILL_S512_SAVE:
1163     case AMDGPU::SI_SPILL_S256_SAVE:
1164     case AMDGPU::SI_SPILL_S160_SAVE:
1165     case AMDGPU::SI_SPILL_S128_SAVE:
1166     case AMDGPU::SI_SPILL_S96_SAVE:
1167     case AMDGPU::SI_SPILL_S64_SAVE:
1168     case AMDGPU::SI_SPILL_S32_SAVE: {
1169       spillSGPR(MI, Index, RS);
1170       break;
1171     }
1172
1173     // SGPR register restore
1174     case AMDGPU::SI_SPILL_S1024_RESTORE:
1175     case AMDGPU::SI_SPILL_S512_RESTORE:
1176     case AMDGPU::SI_SPILL_S256_RESTORE:
1177     case AMDGPU::SI_SPILL_S160_RESTORE:
1178     case AMDGPU::SI_SPILL_S128_RESTORE:
1179     case AMDGPU::SI_SPILL_S96_RESTORE:
1180     case AMDGPU::SI_SPILL_S64_RESTORE:
1181     case AMDGPU::SI_SPILL_S32_RESTORE: {
1182       restoreSGPR(MI, Index, RS);
1183       break;
1184     }
1185
1186     // VGPR register spill
1187     case AMDGPU::SI_SPILL_V1024_SAVE:
1188     case AMDGPU::SI_SPILL_V512_SAVE:
1189     case AMDGPU::SI_SPILL_V256_SAVE:
1190     case AMDGPU::SI_SPILL_V160_SAVE:
1191     case AMDGPU::SI_SPILL_V128_SAVE:
1192     case AMDGPU::SI_SPILL_V96_SAVE:
1193     case AMDGPU::SI_SPILL_V64_SAVE:
1194     case AMDGPU::SI_SPILL_V32_SAVE:
1195     case AMDGPU::SI_SPILL_A1024_SAVE:
1196     case AMDGPU::SI_SPILL_A512_SAVE:
1197     case AMDGPU::SI_SPILL_A128_SAVE:
1198     case AMDGPU::SI_SPILL_A64_SAVE:
1199     case AMDGPU::SI_SPILL_A32_SAVE: {
1200       const MachineOperand *VData = TII->getNamedOperand(*MI,
1201                                                          AMDGPU::OpName::vdata);
1202       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1203              MFI->getStackPtrOffsetReg());
1204
1205       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1206             Index,
1207             VData->getReg(), VData->isKill(),
1208             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1209             FrameReg,
1210             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1211             *MI->memoperands_begin(),
1212             RS);
1213       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1214       MI->eraseFromParent();
1215       break;
1216     }
1217     case AMDGPU::SI_SPILL_V32_RESTORE:
1218     case AMDGPU::SI_SPILL_V64_RESTORE:
1219     case AMDGPU::SI_SPILL_V96_RESTORE:
1220     case AMDGPU::SI_SPILL_V128_RESTORE:
1221     case AMDGPU::SI_SPILL_V160_RESTORE:
1222     case AMDGPU::SI_SPILL_V256_RESTORE:
1223     case AMDGPU::SI_SPILL_V512_RESTORE:
1224     case AMDGPU::SI_SPILL_V1024_RESTORE:
1225     case AMDGPU::SI_SPILL_A32_RESTORE:
1226     case AMDGPU::SI_SPILL_A64_RESTORE:
1227     case AMDGPU::SI_SPILL_A128_RESTORE:
1228     case AMDGPU::SI_SPILL_A512_RESTORE:
1229     case AMDGPU::SI_SPILL_A1024_RESTORE: {
1230       const MachineOperand *VData = TII->getNamedOperand(*MI,
1231                                                          AMDGPU::OpName::vdata);
1232       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1233              MFI->getStackPtrOffsetReg());
1234
1235       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1236             Index,
1237             VData->getReg(), VData->isKill(),
1238             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1239             FrameReg,
1240             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1241             *MI->memoperands_begin(),
1242             RS);
1243       MI->eraseFromParent();
1244       break;
1245     }
1246
1247     default: {
1248       const DebugLoc &DL = MI->getDebugLoc();
1249       bool IsMUBUF = TII->isMUBUF(*MI);
1250
1251       if (!IsMUBUF && !MFI->isEntryFunction()) {
1252         // Convert to an absolute stack address by finding the offset from the
1253         // scratch wave base and scaling by the wave size.
1254         //
1255         // In an entry function/kernel the offset is already the absolute
1256         // address relative to the frame register.
1257
1258         unsigned DiffReg
1259           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1260
1261         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1262         Register ResultReg = IsCopy ?
1263           MI->getOperand(0).getReg() :
1264           MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1265
1266         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg)
1267           .addReg(FrameReg)
1268           .addReg(MFI->getScratchWaveOffsetReg());
1269
1270         int64_t Offset = FrameInfo.getObjectOffset(Index);
1271         if (Offset == 0) {
1272           // XXX - This never happens because of emergency scavenging slot at 0?
1273           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1274             .addImm(Log2_32(ST.getWavefrontSize()))
1275             .addReg(DiffReg);
1276         } else {
1277           unsigned ScaledReg
1278             = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1279
1280           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg)
1281             .addImm(Log2_32(ST.getWavefrontSize()))
1282             .addReg(DiffReg, RegState::Kill);
1283
1284           // TODO: Fold if use instruction is another add of a constant.
1285           if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1286             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1287               .addImm(Offset)
1288               .addReg(ScaledReg, RegState::Kill)
1289               .addImm(0); // clamp bit
1290           } else {
1291             unsigned ConstOffsetReg
1292               = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1293
1294             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1295               .addImm(Offset);
1296             TII->getAddNoCarry(*MBB, MI, DL, ResultReg)
1297               .addReg(ConstOffsetReg, RegState::Kill)
1298               .addReg(ScaledReg, RegState::Kill)
1299               .addImm(0); // clamp bit
1300           }
1301         }
1302
1303         // Don't introduce an extra copy if we're just materializing in a mov.
1304         if (IsCopy)
1305           MI->eraseFromParent();
1306         else
1307           FIOp.ChangeToRegister(ResultReg, false, false, true);
1308         return;
1309       }
1310
1311       if (IsMUBUF) {
1312         // Disable offen so we don't need a 0 vgpr base.
1313         assert(static_cast<int>(FIOperandNum) ==
1314                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1315                                           AMDGPU::OpName::vaddr));
1316
1317         assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1318                MFI->getStackPtrOffsetReg());
1319
1320         TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);
1321
1322         int64_t Offset = FrameInfo.getObjectOffset(Index);
1323         int64_t OldImm
1324           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1325         int64_t NewOffset = OldImm + Offset;
1326
1327         if (isUInt<12>(NewOffset) &&
1328             buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
1329           MI->eraseFromParent();
1330           return;
1331         }
1332       }
1333
1334       // If the offset is simply too big, don't convert to a scratch wave offset
1335       // relative index.
1336
1337       int64_t Offset = FrameInfo.getObjectOffset(Index);
1338       FIOp.ChangeToImmediate(Offset);
1339       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1340         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1341         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1342           .addImm(Offset);
1343         FIOp.ChangeToRegister(TmpReg, false, false, true);
1344       }
1345     }
1346   }
1347 }
1348
1349 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1350   const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
1351   unsigned Size = getRegSizeInBits(*RC);
1352   unsigned AltName = AMDGPU::NoRegAltName;
1353
1354   switch (Size) {
1355   case 32:   AltName = AMDGPU::Reg32; break;
1356   case 64:   AltName = AMDGPU::Reg64; break;
1357   case 96:   AltName = AMDGPU::Reg96; break;
1358   case 128:  AltName = AMDGPU::Reg128; break;
1359   case 160:  AltName = AMDGPU::Reg160; break;
1360   case 256:  AltName = AMDGPU::Reg256; break;
1361   case 512:  AltName = AMDGPU::Reg512; break;
1362   case 1024: AltName = AMDGPU::Reg1024; break;
1363   }
1364   return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
1365 }
1366
1367 // FIXME: This is very slow. It might be worth creating a map from physreg to
1368 // register class.
1369 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
1370   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
1371
1372   static const TargetRegisterClass *const BaseClasses[] = {
1373     &AMDGPU::VGPR_32RegClass,
1374     &AMDGPU::SReg_32RegClass,
1375     &AMDGPU::AGPR_32RegClass,
1376     &AMDGPU::VReg_64RegClass,
1377     &AMDGPU::SReg_64RegClass,
1378     &AMDGPU::AReg_64RegClass,
1379     &AMDGPU::VReg_96RegClass,
1380     &AMDGPU::SReg_96RegClass,
1381     &AMDGPU::VReg_128RegClass,
1382     &AMDGPU::SReg_128RegClass,
1383     &AMDGPU::AReg_128RegClass,
1384     &AMDGPU::VReg_160RegClass,
1385     &AMDGPU::SReg_160RegClass,
1386     &AMDGPU::VReg_256RegClass,
1387     &AMDGPU::SReg_256RegClass,
1388     &AMDGPU::VReg_512RegClass,
1389     &AMDGPU::SReg_512RegClass,
1390     &AMDGPU::AReg_512RegClass,
1391     &AMDGPU::SReg_1024RegClass,
1392     &AMDGPU::VReg_1024RegClass,
1393     &AMDGPU::AReg_1024RegClass,
1394     &AMDGPU::SCC_CLASSRegClass,
1395     &AMDGPU::Pseudo_SReg_32RegClass,
1396     &AMDGPU::Pseudo_SReg_128RegClass,
1397   };
1398
1399   for (const TargetRegisterClass *BaseClass : BaseClasses) {
1400     if (BaseClass->contains(Reg)) {
1401       return BaseClass;
1402     }
1403   }
1404   return nullptr;
1405 }
1406
1407 // TODO: It might be helpful to have some target specific flags in
1408 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1409 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1410   unsigned Size = getRegSizeInBits(*RC);
1411   if (Size < 32)
1412     return false;
1413   switch (Size) {
1414   case 32:
1415     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1416   case 64:
1417     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1418   case 96:
1419     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1420   case 128:
1421     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1422   case 160:
1423     return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1424   case 256:
1425     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1426   case 512:
1427     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1428   case 1024:
1429     return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1430   default:
1431     llvm_unreachable("Invalid register class size");
1432   }
1433 }
1434
1435 bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
1436   unsigned Size = getRegSizeInBits(*RC);
1437   if (Size < 32)
1438     return false;
1439   switch (Size) {
1440   case 32:
1441     return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1442   case 64:
1443     return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1444   case 96:
1445     return false;
1446   case 128:
1447     return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1448   case 160:
1449   case 256:
1450     return false;
1451   case 512:
1452     return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1453   case 1024:
1454     return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1455   default:
1456     llvm_unreachable("Invalid register class size");
1457   }
1458 }
1459
1460 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1461                                          const TargetRegisterClass *SRC) const {
1462   switch (getRegSizeInBits(*SRC)) {
1463   case 32:
1464     return &AMDGPU::VGPR_32RegClass;
1465   case 64:
1466     return &AMDGPU::VReg_64RegClass;
1467   case 96:
1468     return &AMDGPU::VReg_96RegClass;
1469   case 128:
1470     return &AMDGPU::VReg_128RegClass;
1471   case 160:
1472     return &AMDGPU::VReg_160RegClass;
1473   case 256:
1474     return &AMDGPU::VReg_256RegClass;
1475   case 512:
1476     return &AMDGPU::VReg_512RegClass;
1477   case 1024:
1478     return &AMDGPU::VReg_1024RegClass;
1479   default:
1480     llvm_unreachable("Invalid register class size");
1481   }
1482 }
1483
1484 const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
1485                                          const TargetRegisterClass *SRC) const {
1486   switch (getRegSizeInBits(*SRC)) {
1487   case 32:
1488     return &AMDGPU::AGPR_32RegClass;
1489   case 64:
1490     return &AMDGPU::AReg_64RegClass;
1491   case 128:
1492     return &AMDGPU::AReg_128RegClass;
1493   case 512:
1494     return &AMDGPU::AReg_512RegClass;
1495   case 1024:
1496     return &AMDGPU::AReg_1024RegClass;
1497   default:
1498     llvm_unreachable("Invalid register class size");
1499   }
1500 }
1501
1502 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1503                                          const TargetRegisterClass *VRC) const {
1504   switch (getRegSizeInBits(*VRC)) {
1505   case 32:
1506     return &AMDGPU::SGPR_32RegClass;
1507   case 64:
1508     return &AMDGPU::SReg_64RegClass;
1509   case 96:
1510     return &AMDGPU::SReg_96RegClass;
1511   case 128:
1512     return &AMDGPU::SReg_128RegClass;
1513   case 160:
1514     return &AMDGPU::SReg_160RegClass;
1515   case 256:
1516     return &AMDGPU::SReg_256RegClass;
1517   case 512:
1518     return &AMDGPU::SReg_512RegClass;
1519   case 1024:
1520     return &AMDGPU::SReg_1024RegClass;
1521   default:
1522     llvm_unreachable("Invalid register class size");
1523   }
1524 }
1525
1526 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1527                          const TargetRegisterClass *RC, unsigned SubIdx) const {
1528   if (SubIdx == AMDGPU::NoSubRegister)
1529     return RC;
1530
1531   // We can assume that each lane corresponds to one 32-bit register.
1532   unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes();
1533   if (isSGPRClass(RC)) {
1534     switch (Count) {
1535     case 1:
1536       return &AMDGPU::SGPR_32RegClass;
1537     case 2:
1538       return &AMDGPU::SReg_64RegClass;
1539     case 3:
1540       return &AMDGPU::SReg_96RegClass;
1541     case 4:
1542       return &AMDGPU::SReg_128RegClass;
1543     case 5:
1544       return &AMDGPU::SReg_160RegClass;
1545     case 8:
1546       return &AMDGPU::SReg_256RegClass;
1547     case 16:
1548       return &AMDGPU::SReg_512RegClass;
1549     case 32: /* fall-through */
1550     default:
1551       llvm_unreachable("Invalid sub-register class size");
1552     }
1553   } else if (hasAGPRs(RC)) {
1554     switch (Count) {
1555     case 1:
1556       return &AMDGPU::AGPR_32RegClass;
1557     case 2:
1558       return &AMDGPU::AReg_64RegClass;
1559     case 4:
1560       return &AMDGPU::AReg_128RegClass;
1561     case 16:
1562       return &AMDGPU::AReg_512RegClass;
1563     case 32: /* fall-through */
1564     default:
1565       llvm_unreachable("Invalid sub-register class size");
1566     }
1567   } else {
1568     switch (Count) {
1569     case 1:
1570       return &AMDGPU::VGPR_32RegClass;
1571     case 2:
1572       return &AMDGPU::VReg_64RegClass;
1573     case 3:
1574       return &AMDGPU::VReg_96RegClass;
1575     case 4:
1576       return &AMDGPU::VReg_128RegClass;
1577     case 5:
1578       return &AMDGPU::VReg_160RegClass;
1579     case 8:
1580       return &AMDGPU::VReg_256RegClass;
1581     case 16:
1582       return &AMDGPU::VReg_512RegClass;
1583     case 32: /* fall-through */
1584     default:
1585       llvm_unreachable("Invalid sub-register class size");
1586     }
1587   }
1588 }
1589
1590 bool SIRegisterInfo::shouldRewriteCopySrc(
1591   const TargetRegisterClass *DefRC,
1592   unsigned DefSubReg,
1593   const TargetRegisterClass *SrcRC,
1594   unsigned SrcSubReg) const {
1595   // We want to prefer the smallest register class possible, so we don't want to
1596   // stop and rewrite on anything that looks like a subregister
1597   // extract. Operations mostly don't care about the super register class, so we
1598   // only want to stop on the most basic of copies between the same register
1599   // class.
1600   //
1601   // e.g. if we have something like
1602   // %0 = ...
1603   // %1 = ...
1604   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1605   // %3 = COPY %2, sub0
1606   //
1607   // We want to look through the COPY to find:
1608   //  => %3 = COPY %0
1609
1610   // Plain copy.
1611   return getCommonSubClass(DefRC, SrcRC) != nullptr;
1612 }
1613
1614 /// Returns a register that is not used at any point in the function.
1615 ///        If all registers are used, then this function will return
1616 //         AMDGPU::NoRegister.
1617 unsigned
1618 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1619                                    const TargetRegisterClass *RC,
1620                                    const MachineFunction &MF) const {
1621
1622   for (unsigned Reg : *RC)
1623     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1624       return Reg;
1625   return AMDGPU::NoRegister;
1626 }
1627
1628 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1629                                                    unsigned EltSize) const {
1630   if (EltSize == 4) {
1631     static const int16_t Sub0_31[] = {
1632       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1633       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1634       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1635       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1636       AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1637       AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1638       AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1639       AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1640     };
1641
1642     static const int16_t Sub0_15[] = {
1643       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1644       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1645       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1646       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1647     };
1648
1649     static const int16_t Sub0_7[] = {
1650       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1651       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1652     };
1653
1654     static const int16_t Sub0_4[] = {
1655       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1656     };
1657
1658     static const int16_t Sub0_3[] = {
1659       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1660     };
1661
1662     static const int16_t Sub0_2[] = {
1663       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1664     };
1665
1666     static const int16_t Sub0_1[] = {
1667       AMDGPU::sub0, AMDGPU::sub1,
1668     };
1669
1670     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1671     case 32:
1672       return {};
1673     case 64:
1674       return makeArrayRef(Sub0_1);
1675     case 96:
1676       return makeArrayRef(Sub0_2);
1677     case 128:
1678       return makeArrayRef(Sub0_3);
1679     case 160:
1680       return makeArrayRef(Sub0_4);
1681     case 256:
1682       return makeArrayRef(Sub0_7);
1683     case 512:
1684       return makeArrayRef(Sub0_15);
1685     case 1024:
1686       return makeArrayRef(Sub0_31);
1687     default:
1688       llvm_unreachable("unhandled register size");
1689     }
1690   }
1691
1692   if (EltSize == 8) {
1693     static const int16_t Sub0_31_64[] = {
1694       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1695       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1696       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1697       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1698       AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1699       AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1700       AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1701       AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1702     };
1703
1704     static const int16_t Sub0_15_64[] = {
1705       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1706       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1707       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1708       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1709     };
1710
1711     static const int16_t Sub0_7_64[] = {
1712       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1713       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1714     };
1715
1716
1717     static const int16_t Sub0_3_64[] = {
1718       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1719     };
1720
1721     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1722     case 64:
1723       return {};
1724     case 128:
1725       return makeArrayRef(Sub0_3_64);
1726     case 256:
1727       return makeArrayRef(Sub0_7_64);
1728     case 512:
1729       return makeArrayRef(Sub0_15_64);
1730     case 1024:
1731       return makeArrayRef(Sub0_31_64);
1732     default:
1733       llvm_unreachable("unhandled register size");
1734     }
1735   }
1736
1737   if (EltSize == 16) {
1738
1739     static const int16_t Sub0_31_128[] = {
1740       AMDGPU::sub0_sub1_sub2_sub3,
1741       AMDGPU::sub4_sub5_sub6_sub7,
1742       AMDGPU::sub8_sub9_sub10_sub11,
1743       AMDGPU::sub12_sub13_sub14_sub15,
1744       AMDGPU::sub16_sub17_sub18_sub19,
1745       AMDGPU::sub20_sub21_sub22_sub23,
1746       AMDGPU::sub24_sub25_sub26_sub27,
1747       AMDGPU::sub28_sub29_sub30_sub31
1748     };
1749
1750     static const int16_t Sub0_15_128[] = {
1751       AMDGPU::sub0_sub1_sub2_sub3,
1752       AMDGPU::sub4_sub5_sub6_sub7,
1753       AMDGPU::sub8_sub9_sub10_sub11,
1754       AMDGPU::sub12_sub13_sub14_sub15
1755     };
1756
1757     static const int16_t Sub0_7_128[] = {
1758       AMDGPU::sub0_sub1_sub2_sub3,
1759       AMDGPU::sub4_sub5_sub6_sub7
1760     };
1761
1762     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1763     case 128:
1764       return {};
1765     case 256:
1766       return makeArrayRef(Sub0_7_128);
1767     case 512:
1768       return makeArrayRef(Sub0_15_128);
1769     case 1024:
1770       return makeArrayRef(Sub0_31_128);
1771     default:
1772       llvm_unreachable("unhandled register size");
1773     }
1774   }
1775
1776   assert(EltSize == 32 && "unhandled elt size");
1777
1778   static const int16_t Sub0_31_256[] = {
1779     AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1780     AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1781     AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1782     AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1783   };
1784
1785   static const int16_t Sub0_15_256[] = {
1786     AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1787     AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1788   };
1789
1790   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1791   case 256:
1792     return {};
1793   case 512:
1794     return makeArrayRef(Sub0_15_256);
1795   case 1024:
1796     return makeArrayRef(Sub0_31_256);
1797   default:
1798     llvm_unreachable("unhandled register size");
1799   }
1800 }
1801
1802 const TargetRegisterClass*
1803 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1804                                   unsigned Reg) const {
1805   if (TargetRegisterInfo::isVirtualRegister(Reg))
1806     return  MRI.getRegClass(Reg);
1807
1808   return getPhysRegClass(Reg);
1809 }
1810
1811 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1812                             unsigned Reg) const {
1813   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1814   assert(RC && "Register class for the reg not found");
1815   return hasVGPRs(RC);
1816 }
1817
1818 bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
1819                             unsigned Reg) const {
1820   const TargetRegisterClass * RC = getRegClassForReg(MRI, Reg);
1821   assert(RC && "Register class for the reg not found");
1822   return hasAGPRs(RC);
1823 }
1824
1825 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1826                                     const TargetRegisterClass *SrcRC,
1827                                     unsigned SubReg,
1828                                     const TargetRegisterClass *DstRC,
1829                                     unsigned DstSubReg,
1830                                     const TargetRegisterClass *NewRC,
1831                                     LiveIntervals &LIS) const {
1832   unsigned SrcSize = getRegSizeInBits(*SrcRC);
1833   unsigned DstSize = getRegSizeInBits(*DstRC);
1834   unsigned NewSize = getRegSizeInBits(*NewRC);
1835
1836   // Do not increase size of registers beyond dword, we would need to allocate
1837   // adjacent registers and constraint regalloc more than needed.
1838
1839   // Always allow dword coalescing.
1840   if (SrcSize <= 32 || DstSize <= 32)
1841     return true;
1842
1843   return NewSize <= DstSize || NewSize <= SrcSize;
1844 }
1845
1846 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1847                                              MachineFunction &MF) const {
1848
1849   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1850   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1851
1852   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1853                                                        MF.getFunction());
1854   switch (RC->getID()) {
1855   default:
1856     return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
1857   case AMDGPU::VGPR_32RegClassID:
1858     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1859   case AMDGPU::SGPR_32RegClassID:
1860     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1861   }
1862 }
1863
1864 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1865                                                 unsigned Idx) const {
1866   if (Idx == getVGPRPressureSet() || Idx == getAGPRPressureSet())
1867     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1868                                const_cast<MachineFunction &>(MF));
1869
1870   if (Idx == getSGPRPressureSet())
1871     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1872                                const_cast<MachineFunction &>(MF));
1873
1874   return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
1875 }
1876
1877 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1878   static const int Empty[] = { -1 };
1879
1880   if (hasRegUnit(AMDGPU::M0, RegUnit))
1881     return Empty;
1882   return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
1883 }
1884
1885 unsigned SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1886   // Not a callee saved register.
1887   return AMDGPU::SGPR30_SGPR31;
1888 }
1889
1890 const TargetRegisterClass *
1891 SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
1892                                          const RegisterBank &RB,
1893                                          const MachineRegisterInfo &MRI) const {
1894   switch (Size) {
1895   case 1: {
1896     switch (RB.getID()) {
1897     case AMDGPU::VGPRRegBankID:
1898       return &AMDGPU::VGPR_32RegClass;
1899     case AMDGPU::VCCRegBankID:
1900       return isWave32 ?
1901         &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1902     case AMDGPU::SGPRRegBankID:
1903       return &AMDGPU::SReg_32_XM0RegClass;
1904     case AMDGPU::SCCRegBankID:
1905       // This needs to return an allocatable class, so don't bother returning
1906       // the dummy SCC class.
1907       return &AMDGPU::SReg_32_XM0RegClass;
1908     default:
1909       llvm_unreachable("unknown register bank");
1910     }
1911   }
1912   case 32:
1913     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1914                                                  &AMDGPU::SReg_32_XM0RegClass;
1915   case 64:
1916     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1917                                                  &AMDGPU::SReg_64_XEXECRegClass;
1918   case 96:
1919     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1920                                                  &AMDGPU::SReg_96RegClass;
1921   case 128:
1922     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1923                                                  &AMDGPU::SReg_128RegClass;
1924   case 160:
1925     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1926                                                  &AMDGPU::SReg_160RegClass;
1927   case 256:
1928     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1929                                                  &AMDGPU::SReg_256RegClass;
1930   case 512:
1931     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1932                                                  &AMDGPU::SReg_512RegClass;
1933   default:
1934     if (Size < 32)
1935       return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1936                                                    &AMDGPU::SReg_32_XM0RegClass;
1937     return nullptr;
1938   }
1939 }
1940
1941 const TargetRegisterClass *
1942 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1943                                          const MachineRegisterInfo &MRI) const {
1944   if (const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()))
1945     return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1946   return nullptr;
1947 }
1948
1949 unsigned SIRegisterInfo::getVCC() const {
1950   return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1951 }
1952
1953 const TargetRegisterClass *
1954 SIRegisterInfo::getRegClass(unsigned RCID) const {
1955   switch ((int)RCID) {
1956   case AMDGPU::SReg_1RegClassID:
1957     return getBoolRC();
1958   case AMDGPU::SReg_1_XEXECRegClassID:
1959     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
1960       : &AMDGPU::SReg_64_XEXECRegClass;
1961   case -1:
1962     return nullptr;
1963   default:
1964     return AMDGPURegisterInfo::getRegClass(RCID);
1965   }
1966 }
1967
1968 // Find reaching register definition
1969 MachineInstr *SIRegisterInfo::findReachingDef(unsigned Reg, unsigned SubReg,
1970                                               MachineInstr &Use,
1971                                               MachineRegisterInfo &MRI,
1972                                               LiveIntervals *LIS) const {
1973   auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1974   SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1975   SlotIndex DefIdx;
1976
1977   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1978     if (!LIS->hasInterval(Reg))
1979       return nullptr;
1980     LiveInterval &LI = LIS->getInterval(Reg);
1981     LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1982                                   : MRI.getMaxLaneMaskForVReg(Reg);
1983     VNInfo *V = nullptr;
1984     if (LI.hasSubRanges()) {
1985       for (auto &S : LI.subranges()) {
1986         if ((S.LaneMask & SubLanes) == SubLanes) {
1987           V = S.getVNInfoAt(UseIdx);
1988           break;
1989         }
1990       }
1991     } else {
1992       V = LI.getVNInfoAt(UseIdx);
1993     }
1994     if (!V)
1995       return nullptr;
1996     DefIdx = V->def;
1997   } else {
1998     // Find last def.
1999     for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
2000       LiveRange &LR = LIS->getRegUnit(*Units);
2001       if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
2002         if (!DefIdx.isValid() ||
2003             MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
2004                           LIS->getInstructionFromIndex(V->def)))
2005           DefIdx = V->def;
2006       } else {
2007         return nullptr;
2008       }
2009     }
2010   }
2011
2012   MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
2013
2014   if (!Def || !MDT.dominates(Def, &Use))
2015     return nullptr;
2016
2017   assert(Def->modifiesRegister(Reg, this));
2018
2019   return Def;
2020 }